agent-apprenticeship 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +217 -0
- package/bin/agent-apprenticeship.js +131 -0
- package/package.json +30 -0
- package/pyproject.toml +23 -0
- package/src/agent_apprenticeship_trace/__init__.py +2 -0
- package/src/agent_apprenticeship_trace/actual_outputs_normalizer.py +240 -0
- package/src/agent_apprenticeship_trace/apprentice_adapters.py +348 -0
- package/src/agent_apprenticeship_trace/artifact_capture.py +23 -0
- package/src/agent_apprenticeship_trace/artifact_previews.py +80 -0
- package/src/agent_apprenticeship_trace/artifact_resolver.py +142 -0
- package/src/agent_apprenticeship_trace/batch_runner.py +116 -0
- package/src/agent_apprenticeship_trace/bundle_exporter.py +254 -0
- package/src/agent_apprenticeship_trace/certification.py +580 -0
- package/src/agent_apprenticeship_trace/cli.py +2979 -0
- package/src/agent_apprenticeship_trace/codex_runner.py +428 -0
- package/src/agent_apprenticeship_trace/command_discovery.py +94 -0
- package/src/agent_apprenticeship_trace/config.py +609 -0
- package/src/agent_apprenticeship_trace/contract_diagnostics.py +69 -0
- package/src/agent_apprenticeship_trace/env.py +46 -0
- package/src/agent_apprenticeship_trace/evaluator.py +64 -0
- package/src/agent_apprenticeship_trace/grader.py +194 -0
- package/src/agent_apprenticeship_trace/integration_status.py +193 -0
- package/src/agent_apprenticeship_trace/io.py +20 -0
- package/src/agent_apprenticeship_trace/learning.py +627 -0
- package/src/agent_apprenticeship_trace/lesson_extractor.py +5 -0
- package/src/agent_apprenticeship_trace/llm_output_normalizer.py +467 -0
- package/src/agent_apprenticeship_trace/loop.py +111 -0
- package/src/agent_apprenticeship_trace/mentor_checkpoints.py +354 -0
- package/src/agent_apprenticeship_trace/openai_structured.py +783 -0
- package/src/agent_apprenticeship_trace/package_exporter.py +303 -0
- package/src/agent_apprenticeship_trace/progress.py +223 -0
- package/src/agent_apprenticeship_trace/public_run.py +1109 -0
- package/src/agent_apprenticeship_trace/public_sanitizer.py +139 -0
- package/src/agent_apprenticeship_trace/recipes.py +129 -0
- package/src/agent_apprenticeship_trace/release_exporter.py +259 -0
- package/src/agent_apprenticeship_trace/revision.py +21 -0
- package/src/agent_apprenticeship_trace/role_runners.py +7 -0
- package/src/agent_apprenticeship_trace/rubric_generation.py +75 -0
- package/src/agent_apprenticeship_trace/schemas.py +273 -0
- package/src/agent_apprenticeship_trace/session_events.py +99 -0
- package/src/agent_apprenticeship_trace/task_intake.py +112 -0
- package/src/agent_apprenticeship_trace/trace_normalizer.py +669 -0
- package/src/agent_apprenticeship_trace/trace_prompt.py +51 -0
- package/src/agent_apprenticeship_trace/training_signals.py +30 -0
- package/src/agent_apprenticeship_trace/validation.py +210 -0
- package/src/agent_apprenticeship_trace/verifier.py +55 -0
|
@@ -0,0 +1,627 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import shutil
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from .config import Settings, get_settings
|
|
12
|
+
from .env import contains_secret
|
|
13
|
+
from .io import read_json, read_jsonl, write_json
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
PACK_STATUSES = {"draft", "active", "reverted", "removed"}
|
|
17
|
+
RESULT_LABELS = {"improved", "no_observed_change", "regressed", "inconclusive"}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def utc_now() -> str:
|
|
21
|
+
return datetime.now(timezone.utc).isoformat()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def slugify(value: str, fallback: str = "experience") -> str:
|
|
25
|
+
chars = []
|
|
26
|
+
for char in value.lower():
|
|
27
|
+
if char.isalnum():
|
|
28
|
+
chars.append(char)
|
|
29
|
+
elif chars and chars[-1] != "-":
|
|
30
|
+
chars.append("-")
|
|
31
|
+
slug = "".join(chars).strip("-")
|
|
32
|
+
return (slug or fallback)[:72]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def repo_root() -> Path:
|
|
36
|
+
configured = os.getenv("AA_ECOSYSTEM_REPO_PATH")
|
|
37
|
+
if configured:
|
|
38
|
+
path = Path(configured).expanduser()
|
|
39
|
+
if (path / "seed_dataset").exists():
|
|
40
|
+
return path
|
|
41
|
+
try:
|
|
42
|
+
settings = get_settings()
|
|
43
|
+
if settings.ecosystem_repo_path and (settings.ecosystem_repo_path / "seed_dataset").exists():
|
|
44
|
+
return settings.ecosystem_repo_path
|
|
45
|
+
except Exception:
|
|
46
|
+
pass
|
|
47
|
+
candidates = [Path.cwd(), Path(__file__).resolve().parents[2]]
|
|
48
|
+
for candidate in candidates:
|
|
49
|
+
if (candidate / "seed_dataset").exists():
|
|
50
|
+
return candidate
|
|
51
|
+
return Path.cwd()
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def seed_dataset_root() -> Path:
|
|
55
|
+
return repo_root() / "seed_dataset"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def seed_registry_path() -> Path:
|
|
59
|
+
root = seed_dataset_root()
|
|
60
|
+
if (root / "ecosystem_registry.jsonl").exists():
|
|
61
|
+
return root / "ecosystem_registry.jsonl"
|
|
62
|
+
return root / "ecosystem_registry.json"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def experience_root(settings: Settings | None = None) -> Path:
|
|
66
|
+
s = settings or get_settings()
|
|
67
|
+
root = s.app_home / "experience_packs"
|
|
68
|
+
root.mkdir(parents=True, exist_ok=True)
|
|
69
|
+
return root
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _load_json_or_jsonl(path: Path) -> list[dict[str, Any]]:
|
|
73
|
+
if not path.exists():
|
|
74
|
+
return []
|
|
75
|
+
if path.suffix == ".jsonl":
|
|
76
|
+
return [row for row in read_jsonl(path) if isinstance(row, dict)]
|
|
77
|
+
data = read_json(path)
|
|
78
|
+
if isinstance(data, list):
|
|
79
|
+
return [row for row in data if isinstance(row, dict)]
|
|
80
|
+
if isinstance(data, dict):
|
|
81
|
+
entries = data.get("entries") or data.get("bundles") or data.get("items") or data.get("contributions")
|
|
82
|
+
if isinstance(entries, list):
|
|
83
|
+
return [row for row in entries if isinstance(row, dict)]
|
|
84
|
+
return []
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def load_seed_registry() -> list[dict[str, Any]]:
|
|
88
|
+
return _load_json_or_jsonl(seed_registry_path())
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def load_contribution_registry() -> list[dict[str, Any]]:
|
|
92
|
+
root = repo_root()
|
|
93
|
+
rows: list[dict[str, Any]] = []
|
|
94
|
+
for path in [
|
|
95
|
+
root / "ecosystem" / "contributions" / "index.json",
|
|
96
|
+
root / "ecosystem" / "contributions" / "index.jsonl",
|
|
97
|
+
]:
|
|
98
|
+
rows.extend(_load_json_or_jsonl(path))
|
|
99
|
+
normalized: list[dict[str, Any]] = []
|
|
100
|
+
for row in rows:
|
|
101
|
+
bundle_id = row.get("bundle_id")
|
|
102
|
+
if not bundle_id:
|
|
103
|
+
continue
|
|
104
|
+
out = dict(row)
|
|
105
|
+
out.setdefault("source_kind", "contribution_bundle")
|
|
106
|
+
out.setdefault("experience_source_type", "contribution_bundle")
|
|
107
|
+
for key in ["local_bundle_path", "bundle_path", "bundle_path_or_url"]:
|
|
108
|
+
value = out.get(key)
|
|
109
|
+
if value:
|
|
110
|
+
candidate = Path(str(value)).expanduser()
|
|
111
|
+
out[key] = str(candidate if candidate.is_absolute() else root / candidate)
|
|
112
|
+
if not any(out.get(key) for key in ["local_bundle_path", "bundle_path", "bundle_path_or_url"]):
|
|
113
|
+
bundle_path = root / "ecosystem" / "contributions" / "bundles" / str(bundle_id)
|
|
114
|
+
if bundle_path.exists():
|
|
115
|
+
out["local_bundle_path"] = str(bundle_path)
|
|
116
|
+
normalized.append(out)
|
|
117
|
+
return normalized
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def search_learning_sources(query: str | None = None) -> list[dict[str, Any]]:
|
|
121
|
+
query_text = (query or "").strip().lower()
|
|
122
|
+
rows = load_seed_registry() + load_contribution_registry()
|
|
123
|
+
if not query_text:
|
|
124
|
+
return rows
|
|
125
|
+
terms = [term for term in query_text.split() if term]
|
|
126
|
+
matches: list[dict[str, Any]] = []
|
|
127
|
+
for row in rows:
|
|
128
|
+
haystack = " ".join(
|
|
129
|
+
[
|
|
130
|
+
str(row.get("bundle_id") or ""),
|
|
131
|
+
str(row.get("seed_task_id") or ""),
|
|
132
|
+
str(row.get("task_id") or ""),
|
|
133
|
+
str(row.get("title") or ""),
|
|
134
|
+
" ".join(map(str, row.get("domains") or [])),
|
|
135
|
+
" ".join(map(str, row.get("subdomains") or [])),
|
|
136
|
+
str(row.get("agent_apprentice_role") or ""),
|
|
137
|
+
]
|
|
138
|
+
).lower()
|
|
139
|
+
if all(term in haystack for term in terms):
|
|
140
|
+
matches.append(row)
|
|
141
|
+
return matches
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _resolve_relative(path_value: str | None) -> Path | None:
|
|
145
|
+
if not path_value:
|
|
146
|
+
return None
|
|
147
|
+
path = Path(path_value)
|
|
148
|
+
if path.is_absolute():
|
|
149
|
+
return path
|
|
150
|
+
return repo_root() / path
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _source_id(row: dict[str, Any]) -> str:
|
|
154
|
+
return str(row.get("bundle_id") or row.get("seed_task_id") or row.get("task_id") or "source")
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _bundle_source(path: Path) -> dict[str, Any]:
|
|
158
|
+
manifest_path = path / "contribution_manifest.json"
|
|
159
|
+
manifest = read_json(manifest_path) if manifest_path.exists() else {}
|
|
160
|
+
bundle_id = str(manifest.get("bundle_id") or path.name)
|
|
161
|
+
artifacts_index = path / "outputs" / "artifacts_index.json"
|
|
162
|
+
artifact_count = 0
|
|
163
|
+
if artifacts_index.exists():
|
|
164
|
+
try:
|
|
165
|
+
data = read_json(artifacts_index)
|
|
166
|
+
if isinstance(data, list):
|
|
167
|
+
artifact_count = len(data)
|
|
168
|
+
except Exception:
|
|
169
|
+
artifact_count = 0
|
|
170
|
+
return {
|
|
171
|
+
"source_kind": "contribution_bundle",
|
|
172
|
+
"bundle_id": bundle_id,
|
|
173
|
+
"title": manifest.get("title") or bundle_id,
|
|
174
|
+
"domains": manifest.get("domains") or [],
|
|
175
|
+
"subdomains": manifest.get("subdomains") or [],
|
|
176
|
+
"agent_apprentice_role": manifest.get("agent_apprentice_role"),
|
|
177
|
+
"task_status": manifest.get("task_status"),
|
|
178
|
+
"run_status": manifest.get("run_status"),
|
|
179
|
+
"attempt_count": manifest.get("attempts") or 0,
|
|
180
|
+
"trace_count": manifest.get("traced_steps") or 0,
|
|
181
|
+
"artifact_count": artifact_count,
|
|
182
|
+
"bundle_path": str(path),
|
|
183
|
+
"manifest_path": str(manifest_path),
|
|
184
|
+
"artifact_index_path": str(artifacts_index) if artifacts_index.exists() else None,
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def resolve_learning_source(source: str, settings: Settings | None = None) -> dict[str, Any]:
|
|
189
|
+
text = source.strip()
|
|
190
|
+
path = Path(text).expanduser()
|
|
191
|
+
if path.exists():
|
|
192
|
+
if path.is_file():
|
|
193
|
+
path = path.parent
|
|
194
|
+
return _bundle_source(path)
|
|
195
|
+
rows = load_seed_registry()
|
|
196
|
+
rows.extend(load_contribution_registry())
|
|
197
|
+
for row in rows:
|
|
198
|
+
keys = {
|
|
199
|
+
str(row.get("bundle_id") or ""),
|
|
200
|
+
str(row.get("seed_task_id") or ""),
|
|
201
|
+
str(row.get("task_id") or ""),
|
|
202
|
+
}
|
|
203
|
+
if text in keys:
|
|
204
|
+
resolved = dict(row)
|
|
205
|
+
resolved["source_kind"] = resolved.get("source_kind") or (
|
|
206
|
+
"seed_task" if resolved.get("seed_task_id") or resolved.get("task_packet_path") else "contribution_bundle"
|
|
207
|
+
)
|
|
208
|
+
if resolved["source_kind"] == "contribution_bundle":
|
|
209
|
+
bundle_path = (
|
|
210
|
+
resolved.get("local_bundle_path")
|
|
211
|
+
or resolved.get("bundle_path")
|
|
212
|
+
or resolved.get("bundle_path_or_url")
|
|
213
|
+
)
|
|
214
|
+
if bundle_path and Path(str(bundle_path)).expanduser().exists():
|
|
215
|
+
return _bundle_source(Path(str(bundle_path)).expanduser())
|
|
216
|
+
return resolved
|
|
217
|
+
s = settings or get_settings()
|
|
218
|
+
pulled = s.app_home / "ecosystem" / "bundles" / text
|
|
219
|
+
if pulled.exists():
|
|
220
|
+
return _bundle_source(pulled)
|
|
221
|
+
raise FileNotFoundError(f"Ecosystem experience source not found: {source}")
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _safe_read(path: Path | None, limit: int = 12000) -> str:
|
|
225
|
+
if not path or not path.exists() or not path.is_file():
|
|
226
|
+
return ""
|
|
227
|
+
text = path.read_text(errors="replace")
|
|
228
|
+
return text[:limit]
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def _read_task_packet(source: dict[str, Any]) -> dict[str, Any]:
|
|
232
|
+
path = _resolve_relative(source.get("task_packet_path"))
|
|
233
|
+
if path and path.exists():
|
|
234
|
+
try:
|
|
235
|
+
data = read_json(path)
|
|
236
|
+
return data if isinstance(data, dict) else {}
|
|
237
|
+
except Exception:
|
|
238
|
+
return {}
|
|
239
|
+
return {}
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _read_learning_lessons(source: dict[str, Any]) -> list[dict[str, Any]]:
|
|
243
|
+
lessons_root = _resolve_relative(source.get("learning_signals_path"))
|
|
244
|
+
if not lessons_root or not lessons_root.exists():
|
|
245
|
+
return []
|
|
246
|
+
lessons: list[dict[str, Any]] = []
|
|
247
|
+
for path in sorted(lessons_root.glob("*.jsonl")):
|
|
248
|
+
lessons.extend([row for row in read_jsonl(path) if isinstance(row, dict)])
|
|
249
|
+
return lessons
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _read_rubric_reminders(source: dict[str, Any]) -> list[str]:
|
|
253
|
+
path = _resolve_relative(source.get("worker_visible_rubric_path"))
|
|
254
|
+
text = _safe_read(path, limit=6000)
|
|
255
|
+
reminders: list[str] = []
|
|
256
|
+
for line in text.splitlines():
|
|
257
|
+
line = line.strip("-*# ").strip()
|
|
258
|
+
if line and len(line) <= 220:
|
|
259
|
+
reminders.append(line)
|
|
260
|
+
if len(reminders) >= 8:
|
|
261
|
+
break
|
|
262
|
+
return reminders
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def _dedupe(values: list[str]) -> list[str]:
|
|
266
|
+
seen: set[str] = set()
|
|
267
|
+
out: list[str] = []
|
|
268
|
+
for value in values:
|
|
269
|
+
clean = str(value).strip()
|
|
270
|
+
if not clean or clean in seen:
|
|
271
|
+
continue
|
|
272
|
+
seen.add(clean)
|
|
273
|
+
out.append(clean)
|
|
274
|
+
return out
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def _extend_from_lessons(lessons: list[dict[str, Any]], key: str) -> list[str]:
|
|
278
|
+
values: list[str] = []
|
|
279
|
+
for lesson in lessons:
|
|
280
|
+
item = lesson.get(key)
|
|
281
|
+
if isinstance(item, list):
|
|
282
|
+
values.extend([str(v) for v in item])
|
|
283
|
+
elif item:
|
|
284
|
+
values.append(str(item))
|
|
285
|
+
return values
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def _trace_refs(source: dict[str, Any]) -> list[dict[str, Any]]:
|
|
289
|
+
refs = []
|
|
290
|
+
for path_value in source.get("trace_paths") or []:
|
|
291
|
+
path = _resolve_relative(str(path_value))
|
|
292
|
+
refs.append({"path": str(path_value), "exists": bool(path and path.exists())})
|
|
293
|
+
return refs
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def _source_ref(source: dict[str, Any]) -> dict[str, Any]:
|
|
297
|
+
return {
|
|
298
|
+
"source_id": _source_id(source),
|
|
299
|
+
"experience_source_type": source.get("source_kind") or "seed_task",
|
|
300
|
+
"title": source.get("title"),
|
|
301
|
+
"domains": source.get("domains") or [],
|
|
302
|
+
"subdomains": source.get("subdomains") or [],
|
|
303
|
+
"task_packet_path": source.get("task_packet_path"),
|
|
304
|
+
"worker_visible_rubric_path": source.get("worker_visible_rubric_path"),
|
|
305
|
+
"learning_signals_path": source.get("learning_signals_path"),
|
|
306
|
+
"trace_refs": _trace_refs(source),
|
|
307
|
+
"bundle_path": source.get("bundle_path"),
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def compile_experience_pack(
|
|
312
|
+
sources: list[dict[str, Any]],
|
|
313
|
+
*,
|
|
314
|
+
title: str | None = None,
|
|
315
|
+
settings: Settings | None = None,
|
|
316
|
+
) -> Path:
|
|
317
|
+
if not sources:
|
|
318
|
+
raise ValueError("At least one ecosystem experience source is required.")
|
|
319
|
+
s = settings or get_settings()
|
|
320
|
+
source_ids = [_source_id(source) for source in sources]
|
|
321
|
+
first_title = str(sources[0].get("title") or source_ids[0])
|
|
322
|
+
pack_title = title or (first_title if len(sources) == 1 else f"{first_title} + {len(sources) - 1} more")
|
|
323
|
+
digest = hashlib.sha256("|".join(source_ids + [pack_title]).encode()).hexdigest()[:10]
|
|
324
|
+
pack_id = f"aa-exp-{slugify(pack_title)}-{digest}"
|
|
325
|
+
pack_path = experience_root(s) / pack_id
|
|
326
|
+
suffix = 2
|
|
327
|
+
while pack_path.exists():
|
|
328
|
+
pack_path = experience_root(s) / f"{pack_id}-{suffix}"
|
|
329
|
+
suffix += 1
|
|
330
|
+
pack_id = pack_path.name
|
|
331
|
+
domains: list[str] = []
|
|
332
|
+
subdomains: list[str] = []
|
|
333
|
+
task_patterns: list[str] = []
|
|
334
|
+
artifact_requirements: list[str] = []
|
|
335
|
+
rubric_reminders: list[str] = []
|
|
336
|
+
common_failure_modes: list[str] = []
|
|
337
|
+
strategy_lessons: list[str] = []
|
|
338
|
+
revision_lessons: list[str] = []
|
|
339
|
+
verifier_lessons: list[str] = []
|
|
340
|
+
good_bad_signals: list[str] = []
|
|
341
|
+
original_rubric_refs: list[str] = []
|
|
342
|
+
original_learning_signal_refs: list[str] = []
|
|
343
|
+
source_refs: list[dict[str, Any]] = []
|
|
344
|
+
for source in sources:
|
|
345
|
+
domains.extend(map(str, source.get("domains") or []))
|
|
346
|
+
subdomains.extend(map(str, source.get("subdomains") or []))
|
|
347
|
+
packet = _read_task_packet(source)
|
|
348
|
+
if packet.get("task_family"):
|
|
349
|
+
task_patterns.append(str(packet["task_family"]))
|
|
350
|
+
if packet.get("expected_deliverable"):
|
|
351
|
+
artifact_requirements.append(str(packet["expected_deliverable"]))
|
|
352
|
+
artifact_requirements.extend(map(str, packet.get("deliverables") or []))
|
|
353
|
+
lessons = _read_learning_lessons(source)
|
|
354
|
+
artifact_requirements.extend(_extend_from_lessons(lessons, "artifact_requirements"))
|
|
355
|
+
common_failure_modes.extend(_extend_from_lessons(lessons, "common_failure_modes"))
|
|
356
|
+
strategy_lessons.extend(_extend_from_lessons(lessons, "strategy_lessons"))
|
|
357
|
+
revision_lessons.extend(_extend_from_lessons(lessons, "revision_lessons"))
|
|
358
|
+
verifier_lessons.extend(_extend_from_lessons(lessons, "verifier_feedback_summary"))
|
|
359
|
+
for lesson in lessons:
|
|
360
|
+
if lesson.get("lesson_summary"):
|
|
361
|
+
good_bad_signals.append(str(lesson["lesson_summary"]))
|
|
362
|
+
if lesson.get("rubric_reminders"):
|
|
363
|
+
item = lesson["rubric_reminders"]
|
|
364
|
+
if isinstance(item, list):
|
|
365
|
+
rubric_reminders.extend(map(str, item))
|
|
366
|
+
rubric_reminders.extend(_read_rubric_reminders(source))
|
|
367
|
+
if source.get("worker_visible_rubric_path"):
|
|
368
|
+
original_rubric_refs.append(str(source["worker_visible_rubric_path"]))
|
|
369
|
+
if source.get("learning_signals_path"):
|
|
370
|
+
original_learning_signal_refs.append(str(source["learning_signals_path"]))
|
|
371
|
+
source_refs.append(_source_ref(source))
|
|
372
|
+
domains = _dedupe(domains)
|
|
373
|
+
subdomains = _dedupe(subdomains)
|
|
374
|
+
task_patterns = _dedupe(task_patterns)
|
|
375
|
+
artifact_requirements = _dedupe(artifact_requirements)[:12]
|
|
376
|
+
rubric_reminders = _dedupe(rubric_reminders)[:12]
|
|
377
|
+
common_failure_modes = _dedupe(common_failure_modes)[:12]
|
|
378
|
+
strategy_lessons = _dedupe(strategy_lessons)[:12]
|
|
379
|
+
revision_lessons = _dedupe(revision_lessons)[:12]
|
|
380
|
+
verifier_lessons = _dedupe(verifier_lessons)[:12]
|
|
381
|
+
good_bad_signals = _dedupe(good_bad_signals)[:12]
|
|
382
|
+
pack = {
|
|
383
|
+
"pack_id": pack_id,
|
|
384
|
+
"title": pack_title,
|
|
385
|
+
"source_ecosystem_ids": source_ids,
|
|
386
|
+
"experience_source_types": _dedupe([str(source.get("source_kind") or "seed_task") for source in sources]),
|
|
387
|
+
"domains": domains,
|
|
388
|
+
"subdomains": subdomains,
|
|
389
|
+
"task_patterns": task_patterns,
|
|
390
|
+
"artifact_requirements": artifact_requirements,
|
|
391
|
+
"rubric_reminders": rubric_reminders,
|
|
392
|
+
"common_failure_modes": common_failure_modes,
|
|
393
|
+
"strategy_lessons": strategy_lessons,
|
|
394
|
+
"revision_lessons": revision_lessons,
|
|
395
|
+
"verifier_evaluator_lessons": verifier_lessons,
|
|
396
|
+
"examples_of_good_bad_attempt_signals": good_bad_signals,
|
|
397
|
+
"original_trace_refs": [ref for source in sources for ref in _trace_refs(source)],
|
|
398
|
+
"original_rubric_refs": original_rubric_refs,
|
|
399
|
+
"original_learning_signal_refs": original_learning_signal_refs,
|
|
400
|
+
"created_at": utc_now(),
|
|
401
|
+
"status": "draft",
|
|
402
|
+
}
|
|
403
|
+
skill = render_skill_markdown(pack)
|
|
404
|
+
summary = render_pack_summary(pack)
|
|
405
|
+
for name, text in {"skill.md": skill, "SUMMARY.md": summary}.items():
|
|
406
|
+
if contains_secret(text):
|
|
407
|
+
raise ValueError(f"Experience Pack blocked because {name} appears to contain a secret.")
|
|
408
|
+
pack_path.mkdir(parents=True)
|
|
409
|
+
write_json(pack_path / "experience_pack.json", pack)
|
|
410
|
+
write_json(pack_path / "source_refs.json", {"sources": source_refs})
|
|
411
|
+
(pack_path / "skill.md").write_text(skill)
|
|
412
|
+
(pack_path / "SUMMARY.md").write_text(summary)
|
|
413
|
+
return pack_path
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
def render_skill_markdown(pack: dict[str, Any]) -> str:
|
|
417
|
+
def bullets(key: str, fallback: str = "None captured.") -> str:
|
|
418
|
+
values = pack.get(key) or []
|
|
419
|
+
if not values:
|
|
420
|
+
return f"- {fallback}"
|
|
421
|
+
return "\n".join(f"- {value}" for value in values[:8])
|
|
422
|
+
|
|
423
|
+
return (
|
|
424
|
+
f"# Experience Pack: {pack.get('title')}\n\n"
|
|
425
|
+
"Use these concise lessons when working on similar tasks. Do not treat this as hidden answer key content.\n\n"
|
|
426
|
+
"## Key Lessons\n"
|
|
427
|
+
f"{bullets('strategy_lessons')}\n\n"
|
|
428
|
+
"## Artifact Requirements\n"
|
|
429
|
+
f"{bullets('artifact_requirements')}\n\n"
|
|
430
|
+
"## Rubric Reminders\n"
|
|
431
|
+
f"{bullets('rubric_reminders')}\n\n"
|
|
432
|
+
"## Common Failure Modes To Avoid\n"
|
|
433
|
+
f"{bullets('common_failure_modes')}\n\n"
|
|
434
|
+
"## Revision Lessons\n"
|
|
435
|
+
f"{bullets('revision_lessons')}\n\n"
|
|
436
|
+
"## Source Refs\n"
|
|
437
|
+
+ "\n".join(f"- {source_id}" for source_id in pack.get("source_ecosystem_ids", [])[:12])
|
|
438
|
+
+ "\n"
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
def render_pack_summary(pack: dict[str, Any]) -> str:
|
|
443
|
+
return (
|
|
444
|
+
f"# {pack.get('title')}\n\n"
|
|
445
|
+
f"Pack ID: {pack.get('pack_id')}\n\n"
|
|
446
|
+
f"Status: {pack.get('status')}\n\n"
|
|
447
|
+
f"Sources: {', '.join(pack.get('source_ecosystem_ids') or [])}\n\n"
|
|
448
|
+
f"Domains: {', '.join(pack.get('domains') or []) or 'not specified'}\n\n"
|
|
449
|
+
"This Experience Pack contains compact Apprentice Agent guidance plus source references for inspection.\n"
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
def load_pack(pack_id_or_path: str, settings: Settings | None = None) -> tuple[Path, dict[str, Any]]:
|
|
454
|
+
s = settings or get_settings()
|
|
455
|
+
candidate = Path(pack_id_or_path).expanduser()
|
|
456
|
+
if not candidate.exists():
|
|
457
|
+
candidate = experience_root(s) / pack_id_or_path
|
|
458
|
+
if not candidate.exists() and (experience_root(s) / "_removed" / pack_id_or_path).exists():
|
|
459
|
+
candidate = experience_root(s) / "_removed" / pack_id_or_path
|
|
460
|
+
data_path = candidate / "experience_pack.json"
|
|
461
|
+
if not data_path.exists():
|
|
462
|
+
raise FileNotFoundError(f"Experience Pack not found: {pack_id_or_path}")
|
|
463
|
+
data = read_json(data_path)
|
|
464
|
+
if not isinstance(data, dict):
|
|
465
|
+
raise ValueError(f"Invalid Experience Pack: {pack_id_or_path}")
|
|
466
|
+
return candidate, data
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def list_packs(settings: Settings | None = None, include_removed: bool = False) -> list[dict[str, Any]]:
|
|
470
|
+
root = experience_root(settings)
|
|
471
|
+
paths = [path for path in root.iterdir() if path.is_dir() and path.name != "_removed"]
|
|
472
|
+
if include_removed and (root / "_removed").exists():
|
|
473
|
+
paths.extend([path for path in (root / "_removed").iterdir() if path.is_dir()])
|
|
474
|
+
packs = []
|
|
475
|
+
for path in sorted(paths):
|
|
476
|
+
data_path = path / "experience_pack.json"
|
|
477
|
+
if data_path.exists():
|
|
478
|
+
try:
|
|
479
|
+
data = read_json(data_path)
|
|
480
|
+
if isinstance(data, dict):
|
|
481
|
+
data = dict(data)
|
|
482
|
+
data["path"] = str(path)
|
|
483
|
+
packs.append(data)
|
|
484
|
+
except Exception:
|
|
485
|
+
continue
|
|
486
|
+
return packs
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
def update_pack_status(pack_id: str, status: str, settings: Settings | None = None) -> Path:
|
|
490
|
+
if status not in PACK_STATUSES:
|
|
491
|
+
raise ValueError(f"Unsupported Experience Pack status: {status}")
|
|
492
|
+
path, data = load_pack(pack_id, settings)
|
|
493
|
+
data["status"] = status
|
|
494
|
+
data["updated_at"] = utc_now()
|
|
495
|
+
write_json(path / "experience_pack.json", data)
|
|
496
|
+
(path / "SUMMARY.md").write_text(render_pack_summary(data))
|
|
497
|
+
return path
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def remove_pack(pack_id: str, settings: Settings | None = None) -> Path:
|
|
501
|
+
path, data = load_pack(pack_id, settings)
|
|
502
|
+
data["status"] = "removed"
|
|
503
|
+
data["updated_at"] = utc_now()
|
|
504
|
+
write_json(path / "experience_pack.json", data)
|
|
505
|
+
removed_root = experience_root(settings) / "_removed"
|
|
506
|
+
removed_root.mkdir(exist_ok=True)
|
|
507
|
+
target = removed_root / path.name
|
|
508
|
+
if target.exists():
|
|
509
|
+
shutil.rmtree(target)
|
|
510
|
+
shutil.move(str(path), str(target))
|
|
511
|
+
return target
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
def active_packs(settings: Settings | None = None) -> list[dict[str, Any]]:
|
|
515
|
+
return [pack for pack in list_packs(settings) if pack.get("status") == "active"]
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
def pack_prompt_section(pack: dict[str, Any]) -> str:
|
|
519
|
+
skill = render_skill_markdown(pack)
|
|
520
|
+
lines = skill.splitlines()
|
|
521
|
+
trimmed = "\n".join(lines[:80])
|
|
522
|
+
return (
|
|
523
|
+
"\n\n---\n"
|
|
524
|
+
f"Experience Pack: {pack.get('title')}\n"
|
|
525
|
+
"Apply these lessons when useful. Keep the task's original requirements authoritative.\n\n"
|
|
526
|
+
f"{trimmed}\n"
|
|
527
|
+
"---\n"
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
def resolve_packs_for_run(
|
|
532
|
+
pack_ids: list[str],
|
|
533
|
+
*,
|
|
534
|
+
use_active: bool = False,
|
|
535
|
+
no_packs: bool = False,
|
|
536
|
+
settings: Settings | None = None,
|
|
537
|
+
) -> tuple[list[dict[str, Any]], str]:
|
|
538
|
+
if no_packs:
|
|
539
|
+
return [], ""
|
|
540
|
+
selected: list[dict[str, Any]] = []
|
|
541
|
+
seen: set[str] = set()
|
|
542
|
+
s = settings or get_settings()
|
|
543
|
+
for pack_id in pack_ids:
|
|
544
|
+
path, pack = load_pack(pack_id, s)
|
|
545
|
+
status = pack.get("status")
|
|
546
|
+
if status in {"reverted", "removed"}:
|
|
547
|
+
continue
|
|
548
|
+
pack = dict(pack)
|
|
549
|
+
pack["path"] = str(path)
|
|
550
|
+
if pack.get("pack_id") not in seen:
|
|
551
|
+
seen.add(str(pack.get("pack_id")))
|
|
552
|
+
selected.append(pack)
|
|
553
|
+
if use_active:
|
|
554
|
+
for pack in active_packs(s):
|
|
555
|
+
if pack.get("pack_id") not in seen:
|
|
556
|
+
seen.add(str(pack.get("pack_id")))
|
|
557
|
+
selected.append(pack)
|
|
558
|
+
guidance = "".join(pack_prompt_section(pack) for pack in selected)
|
|
559
|
+
return selected, guidance
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
def pack_run_refs(packs: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
563
|
+
refs = []
|
|
564
|
+
for pack in packs:
|
|
565
|
+
refs.append(
|
|
566
|
+
{
|
|
567
|
+
"pack_id": pack.get("pack_id"),
|
|
568
|
+
"title": pack.get("title"),
|
|
569
|
+
"source_refs": pack.get("source_ecosystem_ids") or [],
|
|
570
|
+
"status": pack.get("status"),
|
|
571
|
+
}
|
|
572
|
+
)
|
|
573
|
+
return refs
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
def replay_instruction_for_pack(pack: dict[str, Any], settings: Settings | None = None) -> str:
|
|
577
|
+
source_ids = pack.get("source_ecosystem_ids") or []
|
|
578
|
+
if source_ids:
|
|
579
|
+
try:
|
|
580
|
+
source = resolve_learning_source(str(source_ids[0]), settings)
|
|
581
|
+
packet = _read_task_packet(source)
|
|
582
|
+
instruction = packet.get("instruction") or packet.get("task_instruction") or packet.get("normalized_instruction")
|
|
583
|
+
if instruction:
|
|
584
|
+
deliverables = packet.get("deliverables") or []
|
|
585
|
+
extra = f"\n\nRequired deliverables: {', '.join(map(str, deliverables))}" if deliverables else ""
|
|
586
|
+
return str(instruction) + extra
|
|
587
|
+
except Exception:
|
|
588
|
+
pass
|
|
589
|
+
artifacts = ", ".join(pack.get("artifact_requirements") or []) or "a concise artifact summary"
|
|
590
|
+
return f"Complete a small task inspired by {pack.get('title')}. Produce {artifacts}."
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
def compare_replay(before: dict[str, Any], after: dict[str, Any]) -> dict[str, Any]:
|
|
594
|
+
before_status = before.get("task_status") or before.get("run_status")
|
|
595
|
+
after_status = after.get("task_status") or after.get("run_status")
|
|
596
|
+
before_artifacts = int(before.get("artifact_count") or 0)
|
|
597
|
+
after_artifacts = int(after.get("artifact_count") or 0)
|
|
598
|
+
label = "inconclusive"
|
|
599
|
+
if before_status != "completed" and after_status == "completed":
|
|
600
|
+
label = "improved"
|
|
601
|
+
elif before_status == "completed" and after_status != "completed":
|
|
602
|
+
label = "regressed"
|
|
603
|
+
elif before_status == after_status == "completed":
|
|
604
|
+
if after_artifacts > before_artifacts:
|
|
605
|
+
label = "improved"
|
|
606
|
+
elif after_artifacts < before_artifacts:
|
|
607
|
+
label = "regressed"
|
|
608
|
+
else:
|
|
609
|
+
label = "no_observed_change"
|
|
610
|
+
elif before_status == after_status:
|
|
611
|
+
label = "no_observed_change"
|
|
612
|
+
return {
|
|
613
|
+
"result": label,
|
|
614
|
+
"before_status": before_status,
|
|
615
|
+
"after_status": after_status,
|
|
616
|
+
"before_artifact_count": before_artifacts,
|
|
617
|
+
"after_artifact_count": after_artifacts,
|
|
618
|
+
"score_available": False,
|
|
619
|
+
"score_note": "No comparable evaluator score was available; comparison used status and artifact completeness.",
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
def write_before_after_result(pack_path: Path, result: dict[str, Any]) -> Path:
|
|
624
|
+
result = dict(result)
|
|
625
|
+
result["created_at"] = utc_now()
|
|
626
|
+
write_json(pack_path / "before_after_result.json", result)
|
|
627
|
+
return pack_path / "before_after_result.json"
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from .schemas import LessonPack, HillclimbResult
|
|
3
|
+
|
|
4
|
+
def extract_lesson(h: HillclimbResult) -> LessonPack:
|
|
5
|
+
return LessonPack(lesson_id=f'lesson_{h.task_id}', task_id=h.task_id, source_attempt_ids=[h.baseline_attempt_id,h.revised_attempt_id], lesson_summary='Use verifier and grader feedback to repair missing artifacts before revision.', strategy_lessons=['Map every output requirement to a concrete file.'], common_failure_modes=h.failed_criteria_before, rubric_reminders=h.failed_criteria_after, artifact_requirements=['Preserve package-relative artifact paths.'], verifier_feedback_summary=h.hillclimb_evidence_strength, hidden_reference_leaked=False, metadata_json={})
|