agent-apprenticeship 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +217 -0
  3. package/bin/agent-apprenticeship.js +131 -0
  4. package/package.json +30 -0
  5. package/pyproject.toml +23 -0
  6. package/src/agent_apprenticeship_trace/__init__.py +2 -0
  7. package/src/agent_apprenticeship_trace/actual_outputs_normalizer.py +240 -0
  8. package/src/agent_apprenticeship_trace/apprentice_adapters.py +348 -0
  9. package/src/agent_apprenticeship_trace/artifact_capture.py +23 -0
  10. package/src/agent_apprenticeship_trace/artifact_previews.py +80 -0
  11. package/src/agent_apprenticeship_trace/artifact_resolver.py +142 -0
  12. package/src/agent_apprenticeship_trace/batch_runner.py +116 -0
  13. package/src/agent_apprenticeship_trace/bundle_exporter.py +254 -0
  14. package/src/agent_apprenticeship_trace/certification.py +580 -0
  15. package/src/agent_apprenticeship_trace/cli.py +2979 -0
  16. package/src/agent_apprenticeship_trace/codex_runner.py +428 -0
  17. package/src/agent_apprenticeship_trace/command_discovery.py +94 -0
  18. package/src/agent_apprenticeship_trace/config.py +609 -0
  19. package/src/agent_apprenticeship_trace/contract_diagnostics.py +69 -0
  20. package/src/agent_apprenticeship_trace/env.py +46 -0
  21. package/src/agent_apprenticeship_trace/evaluator.py +64 -0
  22. package/src/agent_apprenticeship_trace/grader.py +194 -0
  23. package/src/agent_apprenticeship_trace/integration_status.py +193 -0
  24. package/src/agent_apprenticeship_trace/io.py +20 -0
  25. package/src/agent_apprenticeship_trace/learning.py +627 -0
  26. package/src/agent_apprenticeship_trace/lesson_extractor.py +5 -0
  27. package/src/agent_apprenticeship_trace/llm_output_normalizer.py +467 -0
  28. package/src/agent_apprenticeship_trace/loop.py +111 -0
  29. package/src/agent_apprenticeship_trace/mentor_checkpoints.py +354 -0
  30. package/src/agent_apprenticeship_trace/openai_structured.py +783 -0
  31. package/src/agent_apprenticeship_trace/package_exporter.py +303 -0
  32. package/src/agent_apprenticeship_trace/progress.py +223 -0
  33. package/src/agent_apprenticeship_trace/public_run.py +1109 -0
  34. package/src/agent_apprenticeship_trace/public_sanitizer.py +139 -0
  35. package/src/agent_apprenticeship_trace/recipes.py +129 -0
  36. package/src/agent_apprenticeship_trace/release_exporter.py +259 -0
  37. package/src/agent_apprenticeship_trace/revision.py +21 -0
  38. package/src/agent_apprenticeship_trace/role_runners.py +7 -0
  39. package/src/agent_apprenticeship_trace/rubric_generation.py +75 -0
  40. package/src/agent_apprenticeship_trace/schemas.py +273 -0
  41. package/src/agent_apprenticeship_trace/session_events.py +99 -0
  42. package/src/agent_apprenticeship_trace/task_intake.py +112 -0
  43. package/src/agent_apprenticeship_trace/trace_normalizer.py +669 -0
  44. package/src/agent_apprenticeship_trace/trace_prompt.py +51 -0
  45. package/src/agent_apprenticeship_trace/training_signals.py +30 -0
  46. package/src/agent_apprenticeship_trace/validation.py +210 -0
  47. package/src/agent_apprenticeship_trace/verifier.py +55 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Agent Apprenticeship
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,217 @@
1
+ # Agent Apprenticeship
2
+
3
+ The living ecosystem where AI agents learn from real-world work through iterative loops, reusable experience, and training-signal exchange.
4
+
5
+ As agents move into long-horizon, economically valuable work, Agent Apprenticeship creates the open infrastructure where useful work generates reusable learning signals and challenging tasks improve through automated iterative loops.
6
+
7
+ Agent Apprenticeship is designed for an infinite exchange of work experience between agents: useful work creates training signals, signals improve future work, and future work creates new signals for the ecosystem.
8
+
9
+ Agent Apprenticeship is built for loop iterations across domains, from simple tasks to complex specialized workflows. Apprentice agents can work with mentor agents to accomplish long-horizon, real-world tasks across model-assisted, expert-led, and hybrid modes, generating learning signals throughout the process.
10
+
11
+ The first seed dataset includes:
12
+
13
+ * 500+ curated seed tasks sourced and grounded from real world
14
+ * 495 reusable agent lessons
15
+ * 1000+ full agent execution traces
16
+ * 1000+ agent work episodes / task rollouts
17
+
18
+ The seed dataset spans specialized economically valuable tasks across domains and forms the first layer of the Agent Apprenticeship ecosystem.
19
+
20
+ Agent Apprenticeship is now available for anyone to start using with local agents including Codex, Cursor, Claude Code, OpenClaw, OpenCode, Hermes Agent, and custom agents, alongside different model providers. Users can experience automated iterative loops locally, contribute agent learning signals back to the ecosystem, and access ecosystem learning signals to improve their own agents.
21
+
22
+ Agent Apprenticeship is also about the future of work and the economic value of agents. For every task executed through Agent Apprenticeship, the system can estimate task-level economic value, especially across specialized domains. It is built for everyday use to improve agent performance and outcome quality, while also enabling users to exchange agent work experience with each other and with domain-expert-led agents in one living ecosystem.
23
+
24
+ ## Install
25
+
26
+ ```bash
27
+ npx agent-apprenticeship init
28
+ ```
29
+
30
+ ```bash
31
+ npm install -g agent-apprenticeship
32
+ apprentice init
33
+ ```
34
+
35
+ The installed command is:
36
+
37
+ ```bash
38
+ apprentice
39
+ ```
40
+
41
+ The long-form command also remains available:
42
+
43
+ ```bash
44
+ agent-apprenticeship
45
+ ```
46
+
47
+ ## Quickstart
48
+
49
+ ```bash
50
+ apprentice init
51
+ apprentice settings
52
+ apprentice run "Create a short market map for AI procurement tools."
53
+ ```
54
+
55
+ Runs print the artifacts path and Contribution Bundle path.
56
+
57
+ ```bash
58
+ apprentice ecosystem contribute <bundle_path>
59
+ ```
60
+
61
+ Public ecosystem:
62
+
63
+ https://github.com/Forsy-AI/agent-apprenticeship
64
+
65
+ ## Apprentice Agents
66
+
67
+ Selected v0 Apprentice Agents:
68
+
69
+ * Codex
70
+ * Cursor
71
+ * Claude Code
72
+ * OpenClaw
73
+ * OpenCode
74
+ * Hermes Agent
75
+ * Custom
76
+
77
+ Agent Apprenticeship auto-detects installed CLIs. If multiple are detected, choose one during setup.
78
+
79
+ Custom lets you provide a command template:
80
+
81
+ ```bash
82
+ apprentice configure agent custom --command-template "my-agent run --workspace {workspace} --prompt-file {prompt_file}"
83
+ ```
84
+
85
+ ## Mentor Model Providers
86
+
87
+ Store local keys in:
88
+
89
+ ```text
90
+ ~/.agent-apprenticeship/.env.local
91
+ ```
92
+
93
+ Example:
94
+
95
+ ```bash
96
+ OPENAI_API_KEY=""
97
+ ANTHROPIC_API_KEY=""
98
+ GEMINI_API_KEY=""
99
+ OPENROUTER_API_KEY=""
100
+ ```
101
+
102
+ Configure:
103
+
104
+ ```bash
105
+ apprentice configure model
106
+ apprentice doctor
107
+ ```
108
+
109
+ ## Mentor Modes
110
+
111
+ ```bash
112
+ apprentice run "..." --mentor-mode model-assisted
113
+ apprentice run "..." --mentor-mode expert-led
114
+ apprentice run "..." --mentor-mode hybrid
115
+ ```
116
+
117
+ * `model-assisted`: Mentor Model Provider handles the mentor loop.
118
+ * `expert-led`: human expert checkpoints guide the mentor loop.
119
+ * `hybrid`: Mentor Model Provider drafts and human expert checkpoints approve or edit.
120
+
121
+ ## Seed Dataset
122
+
123
+ The seed dataset is included under:
124
+
125
+ ```text
126
+ seed_dataset/
127
+ ```
128
+
129
+ Explore seed tasks:
130
+
131
+ ```bash
132
+ apprentice ecosystem list
133
+ apprentice ecosystem search cloud
134
+ apprentice ecosystem inspect aa-seed-task-501
135
+ apprentice ecosystem pull aa-seed-task-501
136
+ ```
137
+
138
+ ## Ecosystem Learning
139
+
140
+ Create Experience Packs from ecosystem experience:
141
+
142
+ ```bash
143
+ apprentice learn create aa-seed-task-501
144
+ apprentice learn preview <pack_id>
145
+ apprentice learn replay <pack_id>
146
+ apprentice learn keep <pack_id>
147
+ apprentice run "Create a related incident response checklist." --experience-pack <pack_id>
148
+ apprentice learn revert <pack_id>
149
+ ```
150
+
151
+ Use active packs explicitly:
152
+
153
+ ```bash
154
+ apprentice run "..." --use-active-experience-packs
155
+ apprentice run "..." --no-experience-packs
156
+ ```
157
+
158
+ ## Contribution Bundles
159
+
160
+ Runs produce Contribution Bundles.
161
+
162
+ Contribute one to the public ecosystem:
163
+
164
+ ```bash
165
+ apprentice ecosystem contribute <bundle_path>
166
+ apprentice bundle contribute <bundle_path>
167
+ ```
168
+
169
+ Public ecosystem:
170
+
171
+ https://github.com/Forsy-AI/agent-apprenticeship
172
+
173
+ ## Ecosystem Auto-Share
174
+
175
+ Default mode is Manual.
176
+
177
+ ```bash
178
+ apprentice ecosystem configure --repo Forsy-AI/agent-apprenticeship
179
+ apprentice ecosystem configure --auto-share manual
180
+ apprentice ecosystem configure --auto-share ask
181
+ apprentice ecosystem configure --auto-share automatic
182
+ apprentice ecosystem status
183
+ ```
184
+
185
+ Requirements:
186
+
187
+ * GitHub CLI installed
188
+ * `gh` authenticated
189
+ * ecosystem repo configured
190
+
191
+ ## Search, Inspect, Pull
192
+
193
+ Discover and export ecosystem experience:
194
+
195
+ ```bash
196
+ apprentice ecosystem search <query>
197
+ apprentice ecosystem inspect <id>
198
+ apprentice ecosystem pull <id>
199
+ ```
200
+
201
+ ## Public Repo Structure
202
+
203
+ ```text
204
+ seed_dataset/
205
+ ecosystem/
206
+ ecosystem/contributions/
207
+ schemas/
208
+ examples/
209
+ ```
210
+
211
+ ## Development Commands
212
+
213
+ ```bash
214
+ .venv/bin/python -m pytest -q tests
215
+ PYTHONPATH=src .venv/bin/python -m compileall -q src tests scripts examples
216
+ bash scripts/export_public_repo.sh
217
+ ```
@@ -0,0 +1,131 @@
1
+ #!/usr/bin/env node
2
+ "use strict";
3
+
4
+ const fs = require("fs");
5
+ const os = require("os");
6
+ const path = require("path");
7
+ const { spawnSync } = require("child_process");
8
+
9
+ const packageRoot = path.resolve(__dirname, "..");
10
+ const packageJson = JSON.parse(fs.readFileSync(path.join(packageRoot, "package.json"), "utf8"));
11
+
12
+ function userHome() {
13
+ return process.env.HOME || process.env.USERPROFILE || os.homedir();
14
+ }
15
+
16
+ function expandHome(value) {
17
+ if (!value) return value;
18
+ if (value === "~") return userHome();
19
+ if (value.startsWith("~/") || value.startsWith("~\\")) {
20
+ return path.join(userHome(), value.slice(2));
21
+ }
22
+ return value;
23
+ }
24
+
25
+ function appHome() {
26
+ return path.resolve(expandHome(process.env.AA_HOME || path.join(userHome(), ".agent-apprenticeship")));
27
+ }
28
+
29
+ function runCandidate(command, args) {
30
+ return spawnSync(command, args, {
31
+ encoding: "utf8",
32
+ stdio: ["ignore", "pipe", "pipe"],
33
+ env: process.env
34
+ });
35
+ }
36
+
37
+ function pythonIsUsable(command) {
38
+ const check = runCandidate(command, [
39
+ "-c",
40
+ "import sys; raise SystemExit(0 if sys.version_info >= (3, 11) else 1)"
41
+ ]);
42
+ return !check.error && check.status === 0;
43
+ }
44
+
45
+ function findPython() {
46
+ const candidates = [];
47
+ if (process.env.AA_PYTHON) candidates.push(process.env.AA_PYTHON);
48
+ candidates.push("python3", "python");
49
+ for (const candidate of candidates) {
50
+ if (pythonIsUsable(candidate)) return candidate;
51
+ }
52
+ return null;
53
+ }
54
+
55
+ function venvPython(venvDir) {
56
+ return process.platform === "win32"
57
+ ? path.join(venvDir, "Scripts", "python.exe")
58
+ : path.join(venvDir, "bin", "python");
59
+ }
60
+
61
+ function runQuiet(command, args, label) {
62
+ const result = spawnSync(command, args, {
63
+ encoding: "utf8",
64
+ stdio: ["ignore", "pipe", "pipe"],
65
+ env: process.env
66
+ });
67
+ if (result.error || result.status !== 0) {
68
+ console.error(`Agent Apprenticeship runtime setup failed while trying to ${label}.`);
69
+ if (result.error) console.error(String(result.error.message || result.error));
70
+ const out = [result.stdout, result.stderr].filter(Boolean).join("\n").trim();
71
+ if (out) console.error(out);
72
+ process.exit(result.status || 1);
73
+ }
74
+ }
75
+
76
+ function ensureRuntime(python) {
77
+ const venvDir = path.resolve(process.env.AA_NPM_VENV || path.join(appHome(), "npm-venv", packageJson.version));
78
+ const py = venvPython(venvDir);
79
+ const markerPath = path.join(venvDir, ".agent-apprenticeship-npm.json");
80
+ try {
81
+ const marker = JSON.parse(fs.readFileSync(markerPath, "utf8"));
82
+ if (marker.packageVersion === packageJson.version && fs.existsSync(py)) {
83
+ return py;
84
+ }
85
+ } catch (_) {
86
+ // Fall through and rebuild the runtime.
87
+ }
88
+
89
+ console.error("Installing Agent Apprenticeship runtime...");
90
+ fs.rmSync(venvDir, { recursive: true, force: true });
91
+ fs.mkdirSync(path.dirname(venvDir), { recursive: true });
92
+ runQuiet(python, ["-m", "venv", venvDir], "create the Python environment");
93
+ runQuiet(py, ["-m", "pip", "install", "--disable-pip-version-check", "--no-input", "--quiet", packageRoot], "install the Python package");
94
+ fs.writeFileSync(markerPath, JSON.stringify({
95
+ packageName: packageJson.name,
96
+ packageVersion: packageJson.version,
97
+ installedAt: new Date().toISOString()
98
+ }, null, 2) + "\n");
99
+ console.error("Done.");
100
+ return py;
101
+ }
102
+
103
+ function runCli(python, args, usePackagedSource) {
104
+ const env = { ...process.env };
105
+ if (usePackagedSource) {
106
+ const srcPath = path.join(packageRoot, "src");
107
+ env.PYTHONPATH = env.PYTHONPATH ? `${srcPath}${path.delimiter}${env.PYTHONPATH}` : srcPath;
108
+ }
109
+ const result = spawnSync(python, ["-m", "agent_apprenticeship_trace.cli", ...args], {
110
+ stdio: "inherit",
111
+ env
112
+ });
113
+ if (result.error) {
114
+ console.error(`Failed to start Agent Apprenticeship: ${result.error.message || result.error}`);
115
+ process.exit(1);
116
+ }
117
+ process.exit(result.status === null ? 1 : result.status);
118
+ }
119
+
120
+ const python = findPython();
121
+ if (!python) {
122
+ console.error("Agent Apprenticeship requires Python 3.11 or newer.");
123
+ console.error("Install python3 or set AA_PYTHON=/path/to/python.");
124
+ process.exit(1);
125
+ }
126
+
127
+ if (process.env.AA_NPM_USE_SYSTEM_PYTHON === "1") {
128
+ runCli(python, process.argv.slice(2), true);
129
+ }
130
+
131
+ runCli(ensureRuntime(python), process.argv.slice(2), false);
package/package.json ADDED
@@ -0,0 +1,30 @@
1
+ {
2
+ "name": "agent-apprenticeship",
3
+ "version": "0.1.0",
4
+ "description": "The living ecosystem for AI agents learning from real-world work through iterative loops and training-signal exchange.",
5
+ "license": "MIT",
6
+ "repository": {
7
+ "type": "git",
8
+ "url": "git+https://github.com/Forsy-AI/agent-apprenticeship.git"
9
+ },
10
+ "bin": {
11
+ "agent-apprenticeship": "bin/agent-apprenticeship.js",
12
+ "apprentice": "bin/agent-apprenticeship.js"
13
+ },
14
+ "files": [
15
+ "README.md",
16
+ "LICENSE",
17
+ "pyproject.toml",
18
+ "src/agent_apprenticeship_trace",
19
+ "bin"
20
+ ],
21
+ "scripts": {
22
+ "test": ".venv/bin/python -m pytest -q tests",
23
+ "pack": "npm pack --dry-run",
24
+ "prepack": "find src bin -name __pycache__ -type d -prune -exec rm -rf {} + && find src bin -name '*.pyc' -delete",
25
+ "smoke": "PATH=./.venv/bin:$PATH AA_NPM_USE_SYSTEM_PYTHON=1 node ./bin/agent-apprenticeship.js --help"
26
+ },
27
+ "engines": {
28
+ "node": ">=18"
29
+ }
30
+ }
package/pyproject.toml ADDED
@@ -0,0 +1,23 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "agent-apprenticeship"
7
+ version = "0.1.0"
8
+ description = "Open framework for turning real agent work into transferable agent experience"
9
+ readme = "README.md"
10
+ requires-python = ">=3.11"
11
+ dependencies = ["pydantic>=2", "typer>=0.12", "openai>=1.0"]
12
+
13
+ [project.optional-dependencies]
14
+ llm = ["openai>=1.0"]
15
+ test = ["pytest>=8"]
16
+
17
+ [project.scripts]
18
+ apprentice = "agent_apprenticeship_trace.cli:main"
19
+ agent-apprenticeship = "agent_apprenticeship_trace.cli:main"
20
+ aa-trace = "agent_apprenticeship_trace.cli:main"
21
+
22
+ [tool.setuptools.packages.find]
23
+ where = ["src"]
@@ -0,0 +1,2 @@
1
+ """Agent Apprenticeship framework."""
2
+ __version__ = "0.1.0"
@@ -0,0 +1,240 @@
1
+ from __future__ import annotations
2
+ import json
3
+ from pathlib import Path
4
+ from typing import Any
5
+ from pydantic import BaseModel, Field
6
+ from .schemas import ActualOutputs
7
+ from .io import write_json
8
+
9
+ CANONICAL_ACTUAL_FIELDS = set(ActualOutputs.model_fields.keys())
10
+
11
+ class ActualOutputsNormalizationContext(BaseModel):
12
+ task_id: str
13
+ attempt_id: str
14
+ attempt_kind: str
15
+ package_root: Path
16
+ required_artifacts: list[str] = Field(default_factory=list)
17
+
18
+ class ActualOutputsNormalizationReport(BaseModel):
19
+ task_id: str
20
+ attempt_id: str
21
+ attempt_kind: str
22
+ raw_outputs_ref: str | None = None
23
+ invalid_outputs_ref: str | None = None
24
+ normalized_outputs_ref: str | None = None
25
+ canonical_outputs_ref: str | None = None
26
+ actual_outputs_schema_valid: bool = False
27
+ actual_outputs_normalized: bool = False
28
+ actual_outputs_fallback: bool = False
29
+ actual_outputs_raw_count: int = 0
30
+ actual_outputs_normalized_count: int = 0
31
+ actual_outputs_schema_valid_count: int = 0
32
+ actual_outputs_fallback_count: int = 0
33
+ actual_outputs_inferred_artifact_count: int = 0
34
+ actual_outputs_discarded_field_count: int = 0
35
+ validation_errors: list[str] = Field(default_factory=list)
36
+ warnings: list[str] = Field(default_factory=list)
37
+ metadata_json: dict[str, Any] = Field(default_factory=dict)
38
+
39
+ class ActualOutputsNormalizationResult(BaseModel):
40
+ actual_outputs: dict[str, Any] | None = None
41
+ report: ActualOutputsNormalizationReport
42
+ fallback_required: bool = False
43
+ parse_error: str | None = None
44
+
45
+
46
+ def _as_list(v: Any) -> list[Any]:
47
+ if v is None:
48
+ return []
49
+ if isinstance(v, list):
50
+ return v
51
+ return [v]
52
+
53
+
54
+ def _compact(v: Any, max_len: int=2000) -> str:
55
+ if v is None:
56
+ return ''
57
+ if isinstance(v, str):
58
+ return v[:max_len]
59
+ try:
60
+ return json.dumps(v, sort_keys=True)[:max_len]
61
+ except Exception:
62
+ return str(v)[:max_len]
63
+
64
+
65
+ def _validation_errors(exc: Exception) -> list[str]:
66
+ if hasattr(exc, 'errors'):
67
+ try:
68
+ return [_compact(e, 2000) for e in exc.errors()]
69
+ except Exception:
70
+ pass
71
+ return [str(exc)]
72
+
73
+
74
+ def _norm_ref(ref: str, attempt_kind: str) -> str:
75
+ ref=str(ref)
76
+ if ref.startswith(f'attempts/{attempt_kind}/'):
77
+ return ref
78
+ if ref.startswith('artifacts/'):
79
+ return f'attempts/{attempt_kind}/{ref}'
80
+ if '/' not in ref:
81
+ return f'attempts/{attempt_kind}/artifacts/{ref}'
82
+ return ref
83
+
84
+
85
+ def _artifact_paths_from_raw(raw: dict[str, Any]) -> dict[str, Any]:
86
+ found={}
87
+ for k,v in raw.items():
88
+ if isinstance(k, str) and (k.startswith('artifacts/') or '/artifacts/' in k):
89
+ found[k]=v
90
+ return found
91
+
92
+
93
+ def _scan_existing_artifacts(ctx: ActualOutputsNormalizationContext) -> list[str]:
94
+ artifact_dir=ctx.package_root/'attempts'/ctx.attempt_kind/'artifacts'
95
+ refs=[]
96
+ if artifact_dir.exists():
97
+ required_names={Path(x).name for x in ctx.required_artifacts if x}
98
+ files=[p for p in artifact_dir.iterdir() if p.is_file()]
99
+ if required_names:
100
+ files=[p for p in files if p.name in required_names] or files
101
+ refs=[f'attempts/{ctx.attempt_kind}/artifacts/{p.name}' for p in files]
102
+ return refs
103
+
104
+
105
+ def _extract_refs(raw: dict[str, Any], ctx: ActualOutputsNormalizationContext) -> tuple[list[str], dict[str, Any], list[str]]:
106
+ warnings=[]
107
+ refs=[]
108
+ original_path_fields=_artifact_paths_from_raw(raw)
109
+ for k in original_path_fields:
110
+ refs.append(_norm_ref(k, ctx.attempt_kind))
111
+ for key in ['deliverable_refs','artifact_refs','files_created','outputs','artifacts','output_files']:
112
+ val=raw.get(key)
113
+ if isinstance(val, dict):
114
+ for k in val:
115
+ if isinstance(k, str) and (k.startswith('artifacts/') or '/artifacts/' in k or '.' in Path(k).name):
116
+ refs.append(_norm_ref(k, ctx.attempt_kind))
117
+ else:
118
+ for item in _as_list(val):
119
+ if isinstance(item, str):
120
+ refs.append(_norm_ref(item, ctx.attempt_kind))
121
+ existing=_scan_existing_artifacts(ctx)
122
+ refs.extend(existing)
123
+ dedup=[]
124
+ for r in refs:
125
+ if r and r not in dedup:
126
+ dedup.append(r)
127
+ return dedup, original_path_fields, warnings
128
+
129
+
130
+ def normalize_actual_outputs(raw: dict[str, Any] | None, ctx: ActualOutputsNormalizationContext) -> ActualOutputsNormalizationResult:
131
+ raw = dict(raw or {})
132
+ report=ActualOutputsNormalizationReport(task_id=ctx.task_id, attempt_id=ctx.attempt_id, attempt_kind=ctx.attempt_kind, actual_outputs_raw_count=1)
133
+ refs, original_path_fields, warnings = _extract_refs(raw, ctx)
134
+ original_fields={k:v for k,v in raw.items() if k not in CANONICAL_ACTUAL_FIELDS}
135
+ raw_status=str(raw.get('status') or '').lower()
136
+ failure_status=raw_status in {'failed','error','timeout'}
137
+ status = raw.get('status') if raw.get('status') in {'success','partial','failed','timeout','error'} else None
138
+ if status is None:
139
+ status='success' if refs and not failure_status else ('failed' if failure_status else ('partial' if refs else 'failed'))
140
+ if refs and not failure_status and status == 'failed':
141
+ status='success'
142
+ summary=raw.get('output_summary') or raw.get('summary') or raw.get('final_summary') or ('Normalized actual outputs from artifact files.' if refs else 'No canonical actual outputs were produced.')
143
+ md=dict(raw.get('metadata_json') or {}) if isinstance(raw.get('metadata_json'), dict) else {}
144
+ if original_fields:
145
+ md['original_fields']=original_fields
146
+ if original_path_fields:
147
+ md['original_artifact_path_fields']=original_path_fields
148
+ md['raw_actual_outputs']=raw
149
+ md['actual_outputs_discarded_field_count']=0
150
+ md['actual_outputs_normalized']=True
151
+ md['expected_deliverable_items']=[Path(str(x)).name for x in ctx.required_artifacts if x]
152
+ md['produced_deliverable_items']=[Path(str(x)).name for x in refs]
153
+ actual={
154
+ 'task_id': str(raw.get('task_id') or ctx.task_id),
155
+ 'attempt_id': str(raw.get('attempt_id') or ctx.attempt_id),
156
+ 'attempt_kind': str(raw.get('attempt_kind') or ctx.attempt_kind),
157
+ 'status': status,
158
+ 'output_summary': str(summary),
159
+ 'primary_output_ref': raw.get('primary_output_ref') or (refs[0] if refs else None),
160
+ 'deliverable_refs': [str(x) for x in (raw.get('deliverable_refs') if isinstance(raw.get('deliverable_refs'), list) else [])] or refs,
161
+ 'final_message_ref': raw.get('final_message_ref') or f'attempts/{ctx.attempt_kind}/final_message.txt',
162
+ 'artifact_refs': [str(x) for x in (raw.get('artifact_refs') if isinstance(raw.get('artifact_refs'), list) else [])] or refs,
163
+ 'files_created': [str(x) for x in (raw.get('files_created') if isinstance(raw.get('files_created'), list) else [])] or refs,
164
+ 'files_modified': [str(x) for x in (raw.get('files_modified') if isinstance(raw.get('files_modified'), list) else [])],
165
+ 'files_deleted': [str(x) for x in (raw.get('files_deleted') if isinstance(raw.get('files_deleted'), list) else [])],
166
+ 'stdout_ref': raw.get('stdout_ref') or f'attempts/{ctx.attempt_kind}/stdout.txt',
167
+ 'stderr_ref': raw.get('stderr_ref') or f'attempts/{ctx.attempt_kind}/stderr.txt',
168
+ 'raw_log_refs': [str(x) for x in (raw.get('raw_log_refs') if isinstance(raw.get('raw_log_refs'), list) else [])] or [f'attempts/{ctx.attempt_kind}/stdout.txt', f'attempts/{ctx.attempt_kind}/stderr.txt', f'attempts/{ctx.attempt_kind}/final_message.txt'],
169
+ 'error_type': raw.get('error_type') if status in {'failed','timeout','error'} else None,
170
+ 'error_message': raw.get('error_message') if status in {'failed','timeout','error'} else None,
171
+ 'metadata_json': md,
172
+ }
173
+ try:
174
+ obj=ActualOutputs.model_validate(actual)
175
+ normalized=obj.model_dump(mode='json')
176
+ report.actual_outputs_schema_valid=True
177
+ report.actual_outputs_schema_valid_count=1
178
+ except Exception as exc:
179
+ normalized=actual
180
+ report.validation_errors.extend(_validation_errors(exc))
181
+ report.actual_outputs_normalized=True
182
+ report.actual_outputs_normalized_count=1
183
+ report.actual_outputs_inferred_artifact_count=len(refs)
184
+ report.actual_outputs_discarded_field_count=0
185
+ report.warnings.extend(warnings)
186
+ return ActualOutputsNormalizationResult(actual_outputs=normalized, report=report, fallback_required=not report.actual_outputs_schema_valid)
187
+
188
+
189
+ def repair_actual_outputs_file(path: Path, ctx: ActualOutputsNormalizationContext) -> ActualOutputsNormalizationResult:
190
+ report=ActualOutputsNormalizationReport(task_id=ctx.task_id, attempt_id=ctx.attempt_id, attempt_kind=ctx.attempt_kind, raw_outputs_ref='actual_outputs.raw.json')
191
+ if not path.exists():
192
+ existing=_scan_existing_artifacts(ctx)
193
+ if existing:
194
+ result=normalize_actual_outputs({}, ctx)
195
+ result.report.raw_outputs_ref=None
196
+ return result
197
+ report.actual_outputs_fallback=True
198
+ report.actual_outputs_fallback_count=1
199
+ report.validation_errors.append('actual_outputs.json missing and no artifact evidence was available')
200
+ return ActualOutputsNormalizationResult(actual_outputs=None, report=report, fallback_required=True, parse_error='missing actual_outputs.json')
201
+ text=path.read_text()
202
+ raw_path=path.with_name('actual_outputs.raw.json')
203
+ raw_path.write_text(text)
204
+ try:
205
+ raw=json.loads(text)
206
+ if not isinstance(raw, dict):
207
+ raise ValueError('actual_outputs JSON was not an object')
208
+ except Exception as exc:
209
+ report.actual_outputs_fallback=True
210
+ report.actual_outputs_fallback_count=1
211
+ report.validation_errors.append(str(exc))
212
+ return ActualOutputsNormalizationResult(actual_outputs=None, report=report, fallback_required=True, parse_error=str(exc))
213
+ try:
214
+ valid=ActualOutputs.model_validate(raw).model_dump(mode='json')
215
+ result=ActualOutputsNormalizationResult(actual_outputs=valid, report=report)
216
+ result.report.actual_outputs_raw_count=1
217
+ result.report.actual_outputs_schema_valid=True
218
+ result.report.actual_outputs_schema_valid_count=1
219
+ result.report.actual_outputs_normalized=False
220
+ result.report.actual_outputs_normalized_count=0
221
+ result.report.actual_outputs_inferred_artifact_count=len(valid.get('deliverable_refs') or [])
222
+ return result
223
+ except Exception as exc:
224
+ path.with_name('actual_outputs.invalid.json').write_text(text)
225
+ result=normalize_actual_outputs(raw, ctx)
226
+ result.report.raw_outputs_ref='actual_outputs.raw.json'
227
+ result.report.invalid_outputs_ref='actual_outputs.invalid.json'
228
+ result.report.normalized_outputs_ref='actual_outputs.normalized.json'
229
+ result.report.canonical_outputs_ref='actual_outputs.json'
230
+ result.report.validation_errors.extend(_validation_errors(exc))
231
+ return result
232
+
233
+
234
+ def write_actual_outputs_normalization(attempt_dir: Path, result: ActualOutputsNormalizationResult) -> None:
235
+ if result.actual_outputs is not None:
236
+ write_json(attempt_dir/'actual_outputs.normalized.json', result.actual_outputs)
237
+ write_json(attempt_dir/'actual_outputs.json', result.actual_outputs)
238
+ result.report.normalized_outputs_ref=result.report.normalized_outputs_ref or 'actual_outputs.normalized.json'
239
+ result.report.canonical_outputs_ref=result.report.canonical_outputs_ref or 'actual_outputs.json'
240
+ write_json(attempt_dir/'actual_outputs_normalization_report.json', result.report)