agent-apprenticeship 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +217 -0
- package/bin/agent-apprenticeship.js +131 -0
- package/package.json +30 -0
- package/pyproject.toml +23 -0
- package/src/agent_apprenticeship_trace/__init__.py +2 -0
- package/src/agent_apprenticeship_trace/actual_outputs_normalizer.py +240 -0
- package/src/agent_apprenticeship_trace/apprentice_adapters.py +348 -0
- package/src/agent_apprenticeship_trace/artifact_capture.py +23 -0
- package/src/agent_apprenticeship_trace/artifact_previews.py +80 -0
- package/src/agent_apprenticeship_trace/artifact_resolver.py +142 -0
- package/src/agent_apprenticeship_trace/batch_runner.py +116 -0
- package/src/agent_apprenticeship_trace/bundle_exporter.py +254 -0
- package/src/agent_apprenticeship_trace/certification.py +580 -0
- package/src/agent_apprenticeship_trace/cli.py +2979 -0
- package/src/agent_apprenticeship_trace/codex_runner.py +428 -0
- package/src/agent_apprenticeship_trace/command_discovery.py +94 -0
- package/src/agent_apprenticeship_trace/config.py +609 -0
- package/src/agent_apprenticeship_trace/contract_diagnostics.py +69 -0
- package/src/agent_apprenticeship_trace/env.py +46 -0
- package/src/agent_apprenticeship_trace/evaluator.py +64 -0
- package/src/agent_apprenticeship_trace/grader.py +194 -0
- package/src/agent_apprenticeship_trace/integration_status.py +193 -0
- package/src/agent_apprenticeship_trace/io.py +20 -0
- package/src/agent_apprenticeship_trace/learning.py +627 -0
- package/src/agent_apprenticeship_trace/lesson_extractor.py +5 -0
- package/src/agent_apprenticeship_trace/llm_output_normalizer.py +467 -0
- package/src/agent_apprenticeship_trace/loop.py +111 -0
- package/src/agent_apprenticeship_trace/mentor_checkpoints.py +354 -0
- package/src/agent_apprenticeship_trace/openai_structured.py +783 -0
- package/src/agent_apprenticeship_trace/package_exporter.py +303 -0
- package/src/agent_apprenticeship_trace/progress.py +223 -0
- package/src/agent_apprenticeship_trace/public_run.py +1109 -0
- package/src/agent_apprenticeship_trace/public_sanitizer.py +139 -0
- package/src/agent_apprenticeship_trace/recipes.py +129 -0
- package/src/agent_apprenticeship_trace/release_exporter.py +259 -0
- package/src/agent_apprenticeship_trace/revision.py +21 -0
- package/src/agent_apprenticeship_trace/role_runners.py +7 -0
- package/src/agent_apprenticeship_trace/rubric_generation.py +75 -0
- package/src/agent_apprenticeship_trace/schemas.py +273 -0
- package/src/agent_apprenticeship_trace/session_events.py +99 -0
- package/src/agent_apprenticeship_trace/task_intake.py +112 -0
- package/src/agent_apprenticeship_trace/trace_normalizer.py +669 -0
- package/src/agent_apprenticeship_trace/trace_prompt.py +51 -0
- package/src/agent_apprenticeship_trace/training_signals.py +30 -0
- package/src/agent_apprenticeship_trace/validation.py +210 -0
- package/src/agent_apprenticeship_trace/verifier.py +55 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Agent Apprenticeship
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
# Agent Apprenticeship
|
|
2
|
+
|
|
3
|
+
The living ecosystem where AI agents learn from real-world work through iterative loops, reusable experience, and training-signal exchange.
|
|
4
|
+
|
|
5
|
+
As agents move into long-horizon, economically valuable work, Agent Apprenticeship creates the open infrastructure where useful work generates reusable learning signals and challenging tasks improve through automated iterative loops.
|
|
6
|
+
|
|
7
|
+
Agent Apprenticeship is designed for an infinite exchange of work experience between agents: useful work creates training signals, signals improve future work, and future work creates new signals for the ecosystem.
|
|
8
|
+
|
|
9
|
+
Agent Apprenticeship is built for loop iterations across domains, from simple tasks to complex specialized workflows. Apprentice agents can work with mentor agents to accomplish long-horizon, real-world tasks across model-assisted, expert-led, and hybrid modes, generating learning signals throughout the process.
|
|
10
|
+
|
|
11
|
+
The first seed dataset includes:
|
|
12
|
+
|
|
13
|
+
* 500+ curated seed tasks sourced and grounded from real world
|
|
14
|
+
* 495 reusable agent lessons
|
|
15
|
+
* 1000+ full agent execution traces
|
|
16
|
+
* 1000+ agent work episodes / task rollouts
|
|
17
|
+
|
|
18
|
+
The seed dataset spans specialized economically valuable tasks across domains and forms the first layer of the Agent Apprenticeship ecosystem.
|
|
19
|
+
|
|
20
|
+
Agent Apprenticeship is now available for anyone to start using with local agents including Codex, Cursor, Claude Code, OpenClaw, OpenCode, Hermes Agent, and custom agents, alongside different model providers. Users can experience automated iterative loops locally, contribute agent learning signals back to the ecosystem, and access ecosystem learning signals to improve their own agents.
|
|
21
|
+
|
|
22
|
+
Agent Apprenticeship is also about the future of work and the economic value of agents. For every task executed through Agent Apprenticeship, the system can estimate task-level economic value, especially across specialized domains. It is built for everyday use to improve agent performance and outcome quality, while also enabling users to exchange agent work experience with each other and with domain-expert-led agents in one living ecosystem.
|
|
23
|
+
|
|
24
|
+
## Install
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
npx agent-apprenticeship init
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
npm install -g agent-apprenticeship
|
|
32
|
+
apprentice init
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
The installed command is:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
apprentice
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
The long-form command also remains available:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
agent-apprenticeship
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Quickstart
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
apprentice init
|
|
51
|
+
apprentice settings
|
|
52
|
+
apprentice run "Create a short market map for AI procurement tools."
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Runs print the artifacts path and Contribution Bundle path.
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
apprentice ecosystem contribute <bundle_path>
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Public ecosystem:
|
|
62
|
+
|
|
63
|
+
https://github.com/Forsy-AI/agent-apprenticeship
|
|
64
|
+
|
|
65
|
+
## Apprentice Agents
|
|
66
|
+
|
|
67
|
+
Selected v0 Apprentice Agents:
|
|
68
|
+
|
|
69
|
+
* Codex
|
|
70
|
+
* Cursor
|
|
71
|
+
* Claude Code
|
|
72
|
+
* OpenClaw
|
|
73
|
+
* OpenCode
|
|
74
|
+
* Hermes Agent
|
|
75
|
+
* Custom
|
|
76
|
+
|
|
77
|
+
Agent Apprenticeship auto-detects installed CLIs. If multiple are detected, choose one during setup.
|
|
78
|
+
|
|
79
|
+
Custom lets you provide a command template:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
apprentice configure agent custom --command-template "my-agent run --workspace {workspace} --prompt-file {prompt_file}"
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Mentor Model Providers
|
|
86
|
+
|
|
87
|
+
Store local keys in:
|
|
88
|
+
|
|
89
|
+
```text
|
|
90
|
+
~/.agent-apprenticeship/.env.local
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Example:
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
OPENAI_API_KEY=""
|
|
97
|
+
ANTHROPIC_API_KEY=""
|
|
98
|
+
GEMINI_API_KEY=""
|
|
99
|
+
OPENROUTER_API_KEY=""
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Configure:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
apprentice configure model
|
|
106
|
+
apprentice doctor
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## Mentor Modes
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
apprentice run "..." --mentor-mode model-assisted
|
|
113
|
+
apprentice run "..." --mentor-mode expert-led
|
|
114
|
+
apprentice run "..." --mentor-mode hybrid
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
* `model-assisted`: Mentor Model Provider handles the mentor loop.
|
|
118
|
+
* `expert-led`: human expert checkpoints guide the mentor loop.
|
|
119
|
+
* `hybrid`: Mentor Model Provider drafts and human expert checkpoints approve or edit.
|
|
120
|
+
|
|
121
|
+
## Seed Dataset
|
|
122
|
+
|
|
123
|
+
The seed dataset is included under:
|
|
124
|
+
|
|
125
|
+
```text
|
|
126
|
+
seed_dataset/
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Explore seed tasks:
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
apprentice ecosystem list
|
|
133
|
+
apprentice ecosystem search cloud
|
|
134
|
+
apprentice ecosystem inspect aa-seed-task-501
|
|
135
|
+
apprentice ecosystem pull aa-seed-task-501
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## Ecosystem Learning
|
|
139
|
+
|
|
140
|
+
Create Experience Packs from ecosystem experience:
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
apprentice learn create aa-seed-task-501
|
|
144
|
+
apprentice learn preview <pack_id>
|
|
145
|
+
apprentice learn replay <pack_id>
|
|
146
|
+
apprentice learn keep <pack_id>
|
|
147
|
+
apprentice run "Create a related incident response checklist." --experience-pack <pack_id>
|
|
148
|
+
apprentice learn revert <pack_id>
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
Use active packs explicitly:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
apprentice run "..." --use-active-experience-packs
|
|
155
|
+
apprentice run "..." --no-experience-packs
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## Contribution Bundles
|
|
159
|
+
|
|
160
|
+
Runs produce Contribution Bundles.
|
|
161
|
+
|
|
162
|
+
Contribute one to the public ecosystem:
|
|
163
|
+
|
|
164
|
+
```bash
|
|
165
|
+
apprentice ecosystem contribute <bundle_path>
|
|
166
|
+
apprentice bundle contribute <bundle_path>
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
Public ecosystem:
|
|
170
|
+
|
|
171
|
+
https://github.com/Forsy-AI/agent-apprenticeship
|
|
172
|
+
|
|
173
|
+
## Ecosystem Auto-Share
|
|
174
|
+
|
|
175
|
+
Default mode is Manual.
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
apprentice ecosystem configure --repo Forsy-AI/agent-apprenticeship
|
|
179
|
+
apprentice ecosystem configure --auto-share manual
|
|
180
|
+
apprentice ecosystem configure --auto-share ask
|
|
181
|
+
apprentice ecosystem configure --auto-share automatic
|
|
182
|
+
apprentice ecosystem status
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
Requirements:
|
|
186
|
+
|
|
187
|
+
* GitHub CLI installed
|
|
188
|
+
* `gh` authenticated
|
|
189
|
+
* ecosystem repo configured
|
|
190
|
+
|
|
191
|
+
## Search, Inspect, Pull
|
|
192
|
+
|
|
193
|
+
Discover and export ecosystem experience:
|
|
194
|
+
|
|
195
|
+
```bash
|
|
196
|
+
apprentice ecosystem search <query>
|
|
197
|
+
apprentice ecosystem inspect <id>
|
|
198
|
+
apprentice ecosystem pull <id>
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
## Public Repo Structure
|
|
202
|
+
|
|
203
|
+
```text
|
|
204
|
+
seed_dataset/
|
|
205
|
+
ecosystem/
|
|
206
|
+
ecosystem/contributions/
|
|
207
|
+
schemas/
|
|
208
|
+
examples/
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
## Development Commands
|
|
212
|
+
|
|
213
|
+
```bash
|
|
214
|
+
.venv/bin/python -m pytest -q tests
|
|
215
|
+
PYTHONPATH=src .venv/bin/python -m compileall -q src tests scripts examples
|
|
216
|
+
bash scripts/export_public_repo.sh
|
|
217
|
+
```
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
"use strict";
|
|
3
|
+
|
|
4
|
+
const fs = require("fs");
|
|
5
|
+
const os = require("os");
|
|
6
|
+
const path = require("path");
|
|
7
|
+
const { spawnSync } = require("child_process");
|
|
8
|
+
|
|
9
|
+
const packageRoot = path.resolve(__dirname, "..");
|
|
10
|
+
const packageJson = JSON.parse(fs.readFileSync(path.join(packageRoot, "package.json"), "utf8"));
|
|
11
|
+
|
|
12
|
+
function userHome() {
|
|
13
|
+
return process.env.HOME || process.env.USERPROFILE || os.homedir();
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function expandHome(value) {
|
|
17
|
+
if (!value) return value;
|
|
18
|
+
if (value === "~") return userHome();
|
|
19
|
+
if (value.startsWith("~/") || value.startsWith("~\\")) {
|
|
20
|
+
return path.join(userHome(), value.slice(2));
|
|
21
|
+
}
|
|
22
|
+
return value;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function appHome() {
|
|
26
|
+
return path.resolve(expandHome(process.env.AA_HOME || path.join(userHome(), ".agent-apprenticeship")));
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function runCandidate(command, args) {
|
|
30
|
+
return spawnSync(command, args, {
|
|
31
|
+
encoding: "utf8",
|
|
32
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
33
|
+
env: process.env
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function pythonIsUsable(command) {
|
|
38
|
+
const check = runCandidate(command, [
|
|
39
|
+
"-c",
|
|
40
|
+
"import sys; raise SystemExit(0 if sys.version_info >= (3, 11) else 1)"
|
|
41
|
+
]);
|
|
42
|
+
return !check.error && check.status === 0;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function findPython() {
|
|
46
|
+
const candidates = [];
|
|
47
|
+
if (process.env.AA_PYTHON) candidates.push(process.env.AA_PYTHON);
|
|
48
|
+
candidates.push("python3", "python");
|
|
49
|
+
for (const candidate of candidates) {
|
|
50
|
+
if (pythonIsUsable(candidate)) return candidate;
|
|
51
|
+
}
|
|
52
|
+
return null;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function venvPython(venvDir) {
|
|
56
|
+
return process.platform === "win32"
|
|
57
|
+
? path.join(venvDir, "Scripts", "python.exe")
|
|
58
|
+
: path.join(venvDir, "bin", "python");
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function runQuiet(command, args, label) {
|
|
62
|
+
const result = spawnSync(command, args, {
|
|
63
|
+
encoding: "utf8",
|
|
64
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
65
|
+
env: process.env
|
|
66
|
+
});
|
|
67
|
+
if (result.error || result.status !== 0) {
|
|
68
|
+
console.error(`Agent Apprenticeship runtime setup failed while trying to ${label}.`);
|
|
69
|
+
if (result.error) console.error(String(result.error.message || result.error));
|
|
70
|
+
const out = [result.stdout, result.stderr].filter(Boolean).join("\n").trim();
|
|
71
|
+
if (out) console.error(out);
|
|
72
|
+
process.exit(result.status || 1);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function ensureRuntime(python) {
|
|
77
|
+
const venvDir = path.resolve(process.env.AA_NPM_VENV || path.join(appHome(), "npm-venv", packageJson.version));
|
|
78
|
+
const py = venvPython(venvDir);
|
|
79
|
+
const markerPath = path.join(venvDir, ".agent-apprenticeship-npm.json");
|
|
80
|
+
try {
|
|
81
|
+
const marker = JSON.parse(fs.readFileSync(markerPath, "utf8"));
|
|
82
|
+
if (marker.packageVersion === packageJson.version && fs.existsSync(py)) {
|
|
83
|
+
return py;
|
|
84
|
+
}
|
|
85
|
+
} catch (_) {
|
|
86
|
+
// Fall through and rebuild the runtime.
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
console.error("Installing Agent Apprenticeship runtime...");
|
|
90
|
+
fs.rmSync(venvDir, { recursive: true, force: true });
|
|
91
|
+
fs.mkdirSync(path.dirname(venvDir), { recursive: true });
|
|
92
|
+
runQuiet(python, ["-m", "venv", venvDir], "create the Python environment");
|
|
93
|
+
runQuiet(py, ["-m", "pip", "install", "--disable-pip-version-check", "--no-input", "--quiet", packageRoot], "install the Python package");
|
|
94
|
+
fs.writeFileSync(markerPath, JSON.stringify({
|
|
95
|
+
packageName: packageJson.name,
|
|
96
|
+
packageVersion: packageJson.version,
|
|
97
|
+
installedAt: new Date().toISOString()
|
|
98
|
+
}, null, 2) + "\n");
|
|
99
|
+
console.error("Done.");
|
|
100
|
+
return py;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function runCli(python, args, usePackagedSource) {
|
|
104
|
+
const env = { ...process.env };
|
|
105
|
+
if (usePackagedSource) {
|
|
106
|
+
const srcPath = path.join(packageRoot, "src");
|
|
107
|
+
env.PYTHONPATH = env.PYTHONPATH ? `${srcPath}${path.delimiter}${env.PYTHONPATH}` : srcPath;
|
|
108
|
+
}
|
|
109
|
+
const result = spawnSync(python, ["-m", "agent_apprenticeship_trace.cli", ...args], {
|
|
110
|
+
stdio: "inherit",
|
|
111
|
+
env
|
|
112
|
+
});
|
|
113
|
+
if (result.error) {
|
|
114
|
+
console.error(`Failed to start Agent Apprenticeship: ${result.error.message || result.error}`);
|
|
115
|
+
process.exit(1);
|
|
116
|
+
}
|
|
117
|
+
process.exit(result.status === null ? 1 : result.status);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const python = findPython();
|
|
121
|
+
if (!python) {
|
|
122
|
+
console.error("Agent Apprenticeship requires Python 3.11 or newer.");
|
|
123
|
+
console.error("Install python3 or set AA_PYTHON=/path/to/python.");
|
|
124
|
+
process.exit(1);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if (process.env.AA_NPM_USE_SYSTEM_PYTHON === "1") {
|
|
128
|
+
runCli(python, process.argv.slice(2), true);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
runCli(ensureRuntime(python), process.argv.slice(2), false);
|
package/package.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "agent-apprenticeship",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "The living ecosystem for AI agents learning from real-world work through iterative loops and training-signal exchange.",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"repository": {
|
|
7
|
+
"type": "git",
|
|
8
|
+
"url": "git+https://github.com/Forsy-AI/agent-apprenticeship.git"
|
|
9
|
+
},
|
|
10
|
+
"bin": {
|
|
11
|
+
"agent-apprenticeship": "bin/agent-apprenticeship.js",
|
|
12
|
+
"apprentice": "bin/agent-apprenticeship.js"
|
|
13
|
+
},
|
|
14
|
+
"files": [
|
|
15
|
+
"README.md",
|
|
16
|
+
"LICENSE",
|
|
17
|
+
"pyproject.toml",
|
|
18
|
+
"src/agent_apprenticeship_trace",
|
|
19
|
+
"bin"
|
|
20
|
+
],
|
|
21
|
+
"scripts": {
|
|
22
|
+
"test": ".venv/bin/python -m pytest -q tests",
|
|
23
|
+
"pack": "npm pack --dry-run",
|
|
24
|
+
"prepack": "find src bin -name __pycache__ -type d -prune -exec rm -rf {} + && find src bin -name '*.pyc' -delete",
|
|
25
|
+
"smoke": "PATH=./.venv/bin:$PATH AA_NPM_USE_SYSTEM_PYTHON=1 node ./bin/agent-apprenticeship.js --help"
|
|
26
|
+
},
|
|
27
|
+
"engines": {
|
|
28
|
+
"node": ">=18"
|
|
29
|
+
}
|
|
30
|
+
}
|
package/pyproject.toml
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "agent-apprenticeship"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Open framework for turning real agent work into transferable agent experience"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
dependencies = ["pydantic>=2", "typer>=0.12", "openai>=1.0"]
|
|
12
|
+
|
|
13
|
+
[project.optional-dependencies]
|
|
14
|
+
llm = ["openai>=1.0"]
|
|
15
|
+
test = ["pytest>=8"]
|
|
16
|
+
|
|
17
|
+
[project.scripts]
|
|
18
|
+
apprentice = "agent_apprenticeship_trace.cli:main"
|
|
19
|
+
agent-apprenticeship = "agent_apprenticeship_trace.cli:main"
|
|
20
|
+
aa-trace = "agent_apprenticeship_trace.cli:main"
|
|
21
|
+
|
|
22
|
+
[tool.setuptools.packages.find]
|
|
23
|
+
where = ["src"]
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import json
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
from .schemas import ActualOutputs
|
|
7
|
+
from .io import write_json
|
|
8
|
+
|
|
9
|
+
CANONICAL_ACTUAL_FIELDS = set(ActualOutputs.model_fields.keys())
|
|
10
|
+
|
|
11
|
+
class ActualOutputsNormalizationContext(BaseModel):
|
|
12
|
+
task_id: str
|
|
13
|
+
attempt_id: str
|
|
14
|
+
attempt_kind: str
|
|
15
|
+
package_root: Path
|
|
16
|
+
required_artifacts: list[str] = Field(default_factory=list)
|
|
17
|
+
|
|
18
|
+
class ActualOutputsNormalizationReport(BaseModel):
|
|
19
|
+
task_id: str
|
|
20
|
+
attempt_id: str
|
|
21
|
+
attempt_kind: str
|
|
22
|
+
raw_outputs_ref: str | None = None
|
|
23
|
+
invalid_outputs_ref: str | None = None
|
|
24
|
+
normalized_outputs_ref: str | None = None
|
|
25
|
+
canonical_outputs_ref: str | None = None
|
|
26
|
+
actual_outputs_schema_valid: bool = False
|
|
27
|
+
actual_outputs_normalized: bool = False
|
|
28
|
+
actual_outputs_fallback: bool = False
|
|
29
|
+
actual_outputs_raw_count: int = 0
|
|
30
|
+
actual_outputs_normalized_count: int = 0
|
|
31
|
+
actual_outputs_schema_valid_count: int = 0
|
|
32
|
+
actual_outputs_fallback_count: int = 0
|
|
33
|
+
actual_outputs_inferred_artifact_count: int = 0
|
|
34
|
+
actual_outputs_discarded_field_count: int = 0
|
|
35
|
+
validation_errors: list[str] = Field(default_factory=list)
|
|
36
|
+
warnings: list[str] = Field(default_factory=list)
|
|
37
|
+
metadata_json: dict[str, Any] = Field(default_factory=dict)
|
|
38
|
+
|
|
39
|
+
class ActualOutputsNormalizationResult(BaseModel):
|
|
40
|
+
actual_outputs: dict[str, Any] | None = None
|
|
41
|
+
report: ActualOutputsNormalizationReport
|
|
42
|
+
fallback_required: bool = False
|
|
43
|
+
parse_error: str | None = None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _as_list(v: Any) -> list[Any]:
|
|
47
|
+
if v is None:
|
|
48
|
+
return []
|
|
49
|
+
if isinstance(v, list):
|
|
50
|
+
return v
|
|
51
|
+
return [v]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _compact(v: Any, max_len: int=2000) -> str:
|
|
55
|
+
if v is None:
|
|
56
|
+
return ''
|
|
57
|
+
if isinstance(v, str):
|
|
58
|
+
return v[:max_len]
|
|
59
|
+
try:
|
|
60
|
+
return json.dumps(v, sort_keys=True)[:max_len]
|
|
61
|
+
except Exception:
|
|
62
|
+
return str(v)[:max_len]
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _validation_errors(exc: Exception) -> list[str]:
|
|
66
|
+
if hasattr(exc, 'errors'):
|
|
67
|
+
try:
|
|
68
|
+
return [_compact(e, 2000) for e in exc.errors()]
|
|
69
|
+
except Exception:
|
|
70
|
+
pass
|
|
71
|
+
return [str(exc)]
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _norm_ref(ref: str, attempt_kind: str) -> str:
|
|
75
|
+
ref=str(ref)
|
|
76
|
+
if ref.startswith(f'attempts/{attempt_kind}/'):
|
|
77
|
+
return ref
|
|
78
|
+
if ref.startswith('artifacts/'):
|
|
79
|
+
return f'attempts/{attempt_kind}/{ref}'
|
|
80
|
+
if '/' not in ref:
|
|
81
|
+
return f'attempts/{attempt_kind}/artifacts/{ref}'
|
|
82
|
+
return ref
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _artifact_paths_from_raw(raw: dict[str, Any]) -> dict[str, Any]:
|
|
86
|
+
found={}
|
|
87
|
+
for k,v in raw.items():
|
|
88
|
+
if isinstance(k, str) and (k.startswith('artifacts/') or '/artifacts/' in k):
|
|
89
|
+
found[k]=v
|
|
90
|
+
return found
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _scan_existing_artifacts(ctx: ActualOutputsNormalizationContext) -> list[str]:
|
|
94
|
+
artifact_dir=ctx.package_root/'attempts'/ctx.attempt_kind/'artifacts'
|
|
95
|
+
refs=[]
|
|
96
|
+
if artifact_dir.exists():
|
|
97
|
+
required_names={Path(x).name for x in ctx.required_artifacts if x}
|
|
98
|
+
files=[p for p in artifact_dir.iterdir() if p.is_file()]
|
|
99
|
+
if required_names:
|
|
100
|
+
files=[p for p in files if p.name in required_names] or files
|
|
101
|
+
refs=[f'attempts/{ctx.attempt_kind}/artifacts/{p.name}' for p in files]
|
|
102
|
+
return refs
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _extract_refs(raw: dict[str, Any], ctx: ActualOutputsNormalizationContext) -> tuple[list[str], dict[str, Any], list[str]]:
|
|
106
|
+
warnings=[]
|
|
107
|
+
refs=[]
|
|
108
|
+
original_path_fields=_artifact_paths_from_raw(raw)
|
|
109
|
+
for k in original_path_fields:
|
|
110
|
+
refs.append(_norm_ref(k, ctx.attempt_kind))
|
|
111
|
+
for key in ['deliverable_refs','artifact_refs','files_created','outputs','artifacts','output_files']:
|
|
112
|
+
val=raw.get(key)
|
|
113
|
+
if isinstance(val, dict):
|
|
114
|
+
for k in val:
|
|
115
|
+
if isinstance(k, str) and (k.startswith('artifacts/') or '/artifacts/' in k or '.' in Path(k).name):
|
|
116
|
+
refs.append(_norm_ref(k, ctx.attempt_kind))
|
|
117
|
+
else:
|
|
118
|
+
for item in _as_list(val):
|
|
119
|
+
if isinstance(item, str):
|
|
120
|
+
refs.append(_norm_ref(item, ctx.attempt_kind))
|
|
121
|
+
existing=_scan_existing_artifacts(ctx)
|
|
122
|
+
refs.extend(existing)
|
|
123
|
+
dedup=[]
|
|
124
|
+
for r in refs:
|
|
125
|
+
if r and r not in dedup:
|
|
126
|
+
dedup.append(r)
|
|
127
|
+
return dedup, original_path_fields, warnings
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def normalize_actual_outputs(raw: dict[str, Any] | None, ctx: ActualOutputsNormalizationContext) -> ActualOutputsNormalizationResult:
|
|
131
|
+
raw = dict(raw or {})
|
|
132
|
+
report=ActualOutputsNormalizationReport(task_id=ctx.task_id, attempt_id=ctx.attempt_id, attempt_kind=ctx.attempt_kind, actual_outputs_raw_count=1)
|
|
133
|
+
refs, original_path_fields, warnings = _extract_refs(raw, ctx)
|
|
134
|
+
original_fields={k:v for k,v in raw.items() if k not in CANONICAL_ACTUAL_FIELDS}
|
|
135
|
+
raw_status=str(raw.get('status') or '').lower()
|
|
136
|
+
failure_status=raw_status in {'failed','error','timeout'}
|
|
137
|
+
status = raw.get('status') if raw.get('status') in {'success','partial','failed','timeout','error'} else None
|
|
138
|
+
if status is None:
|
|
139
|
+
status='success' if refs and not failure_status else ('failed' if failure_status else ('partial' if refs else 'failed'))
|
|
140
|
+
if refs and not failure_status and status == 'failed':
|
|
141
|
+
status='success'
|
|
142
|
+
summary=raw.get('output_summary') or raw.get('summary') or raw.get('final_summary') or ('Normalized actual outputs from artifact files.' if refs else 'No canonical actual outputs were produced.')
|
|
143
|
+
md=dict(raw.get('metadata_json') or {}) if isinstance(raw.get('metadata_json'), dict) else {}
|
|
144
|
+
if original_fields:
|
|
145
|
+
md['original_fields']=original_fields
|
|
146
|
+
if original_path_fields:
|
|
147
|
+
md['original_artifact_path_fields']=original_path_fields
|
|
148
|
+
md['raw_actual_outputs']=raw
|
|
149
|
+
md['actual_outputs_discarded_field_count']=0
|
|
150
|
+
md['actual_outputs_normalized']=True
|
|
151
|
+
md['expected_deliverable_items']=[Path(str(x)).name for x in ctx.required_artifacts if x]
|
|
152
|
+
md['produced_deliverable_items']=[Path(str(x)).name for x in refs]
|
|
153
|
+
actual={
|
|
154
|
+
'task_id': str(raw.get('task_id') or ctx.task_id),
|
|
155
|
+
'attempt_id': str(raw.get('attempt_id') or ctx.attempt_id),
|
|
156
|
+
'attempt_kind': str(raw.get('attempt_kind') or ctx.attempt_kind),
|
|
157
|
+
'status': status,
|
|
158
|
+
'output_summary': str(summary),
|
|
159
|
+
'primary_output_ref': raw.get('primary_output_ref') or (refs[0] if refs else None),
|
|
160
|
+
'deliverable_refs': [str(x) for x in (raw.get('deliverable_refs') if isinstance(raw.get('deliverable_refs'), list) else [])] or refs,
|
|
161
|
+
'final_message_ref': raw.get('final_message_ref') or f'attempts/{ctx.attempt_kind}/final_message.txt',
|
|
162
|
+
'artifact_refs': [str(x) for x in (raw.get('artifact_refs') if isinstance(raw.get('artifact_refs'), list) else [])] or refs,
|
|
163
|
+
'files_created': [str(x) for x in (raw.get('files_created') if isinstance(raw.get('files_created'), list) else [])] or refs,
|
|
164
|
+
'files_modified': [str(x) for x in (raw.get('files_modified') if isinstance(raw.get('files_modified'), list) else [])],
|
|
165
|
+
'files_deleted': [str(x) for x in (raw.get('files_deleted') if isinstance(raw.get('files_deleted'), list) else [])],
|
|
166
|
+
'stdout_ref': raw.get('stdout_ref') or f'attempts/{ctx.attempt_kind}/stdout.txt',
|
|
167
|
+
'stderr_ref': raw.get('stderr_ref') or f'attempts/{ctx.attempt_kind}/stderr.txt',
|
|
168
|
+
'raw_log_refs': [str(x) for x in (raw.get('raw_log_refs') if isinstance(raw.get('raw_log_refs'), list) else [])] or [f'attempts/{ctx.attempt_kind}/stdout.txt', f'attempts/{ctx.attempt_kind}/stderr.txt', f'attempts/{ctx.attempt_kind}/final_message.txt'],
|
|
169
|
+
'error_type': raw.get('error_type') if status in {'failed','timeout','error'} else None,
|
|
170
|
+
'error_message': raw.get('error_message') if status in {'failed','timeout','error'} else None,
|
|
171
|
+
'metadata_json': md,
|
|
172
|
+
}
|
|
173
|
+
try:
|
|
174
|
+
obj=ActualOutputs.model_validate(actual)
|
|
175
|
+
normalized=obj.model_dump(mode='json')
|
|
176
|
+
report.actual_outputs_schema_valid=True
|
|
177
|
+
report.actual_outputs_schema_valid_count=1
|
|
178
|
+
except Exception as exc:
|
|
179
|
+
normalized=actual
|
|
180
|
+
report.validation_errors.extend(_validation_errors(exc))
|
|
181
|
+
report.actual_outputs_normalized=True
|
|
182
|
+
report.actual_outputs_normalized_count=1
|
|
183
|
+
report.actual_outputs_inferred_artifact_count=len(refs)
|
|
184
|
+
report.actual_outputs_discarded_field_count=0
|
|
185
|
+
report.warnings.extend(warnings)
|
|
186
|
+
return ActualOutputsNormalizationResult(actual_outputs=normalized, report=report, fallback_required=not report.actual_outputs_schema_valid)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def repair_actual_outputs_file(path: Path, ctx: ActualOutputsNormalizationContext) -> ActualOutputsNormalizationResult:
|
|
190
|
+
report=ActualOutputsNormalizationReport(task_id=ctx.task_id, attempt_id=ctx.attempt_id, attempt_kind=ctx.attempt_kind, raw_outputs_ref='actual_outputs.raw.json')
|
|
191
|
+
if not path.exists():
|
|
192
|
+
existing=_scan_existing_artifacts(ctx)
|
|
193
|
+
if existing:
|
|
194
|
+
result=normalize_actual_outputs({}, ctx)
|
|
195
|
+
result.report.raw_outputs_ref=None
|
|
196
|
+
return result
|
|
197
|
+
report.actual_outputs_fallback=True
|
|
198
|
+
report.actual_outputs_fallback_count=1
|
|
199
|
+
report.validation_errors.append('actual_outputs.json missing and no artifact evidence was available')
|
|
200
|
+
return ActualOutputsNormalizationResult(actual_outputs=None, report=report, fallback_required=True, parse_error='missing actual_outputs.json')
|
|
201
|
+
text=path.read_text()
|
|
202
|
+
raw_path=path.with_name('actual_outputs.raw.json')
|
|
203
|
+
raw_path.write_text(text)
|
|
204
|
+
try:
|
|
205
|
+
raw=json.loads(text)
|
|
206
|
+
if not isinstance(raw, dict):
|
|
207
|
+
raise ValueError('actual_outputs JSON was not an object')
|
|
208
|
+
except Exception as exc:
|
|
209
|
+
report.actual_outputs_fallback=True
|
|
210
|
+
report.actual_outputs_fallback_count=1
|
|
211
|
+
report.validation_errors.append(str(exc))
|
|
212
|
+
return ActualOutputsNormalizationResult(actual_outputs=None, report=report, fallback_required=True, parse_error=str(exc))
|
|
213
|
+
try:
|
|
214
|
+
valid=ActualOutputs.model_validate(raw).model_dump(mode='json')
|
|
215
|
+
result=ActualOutputsNormalizationResult(actual_outputs=valid, report=report)
|
|
216
|
+
result.report.actual_outputs_raw_count=1
|
|
217
|
+
result.report.actual_outputs_schema_valid=True
|
|
218
|
+
result.report.actual_outputs_schema_valid_count=1
|
|
219
|
+
result.report.actual_outputs_normalized=False
|
|
220
|
+
result.report.actual_outputs_normalized_count=0
|
|
221
|
+
result.report.actual_outputs_inferred_artifact_count=len(valid.get('deliverable_refs') or [])
|
|
222
|
+
return result
|
|
223
|
+
except Exception as exc:
|
|
224
|
+
path.with_name('actual_outputs.invalid.json').write_text(text)
|
|
225
|
+
result=normalize_actual_outputs(raw, ctx)
|
|
226
|
+
result.report.raw_outputs_ref='actual_outputs.raw.json'
|
|
227
|
+
result.report.invalid_outputs_ref='actual_outputs.invalid.json'
|
|
228
|
+
result.report.normalized_outputs_ref='actual_outputs.normalized.json'
|
|
229
|
+
result.report.canonical_outputs_ref='actual_outputs.json'
|
|
230
|
+
result.report.validation_errors.extend(_validation_errors(exc))
|
|
231
|
+
return result
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def write_actual_outputs_normalization(attempt_dir: Path, result: ActualOutputsNormalizationResult) -> None:
|
|
235
|
+
if result.actual_outputs is not None:
|
|
236
|
+
write_json(attempt_dir/'actual_outputs.normalized.json', result.actual_outputs)
|
|
237
|
+
write_json(attempt_dir/'actual_outputs.json', result.actual_outputs)
|
|
238
|
+
result.report.normalized_outputs_ref=result.report.normalized_outputs_ref or 'actual_outputs.normalized.json'
|
|
239
|
+
result.report.canonical_outputs_ref=result.report.canonical_outputs_ref or 'actual_outputs.json'
|
|
240
|
+
write_json(attempt_dir/'actual_outputs_normalization_report.json', result.report)
|