jfl 0.9.9 → 0.9.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/init.d.ts.map +1 -1
- package/dist/commands/init.js +85 -20
- package/dist/commands/init.js.map +1 -1
- package/dist/commands/peter.d.ts.map +1 -1
- package/dist/commands/peter.js +83 -35
- package/dist/commands/peter.js.map +1 -1
- package/dist/commands/repair.d.ts.map +1 -1
- package/dist/commands/repair.js +13 -11
- package/dist/commands/repair.js.map +1 -1
- package/dist/commands/session.d.ts.map +1 -1
- package/dist/commands/session.js +3 -37
- package/dist/commands/session.js.map +1 -1
- package/dist/commands/start.js +3 -3
- package/dist/commands/start.js.map +1 -1
- package/dist/lib/agent-config.d.ts +1 -0
- package/dist/lib/agent-config.d.ts.map +1 -1
- package/dist/lib/agent-config.js.map +1 -1
- package/dist/lib/agent-guards.d.ts +67 -0
- package/dist/lib/agent-guards.d.ts.map +1 -0
- package/dist/lib/agent-guards.js +229 -0
- package/dist/lib/agent-guards.js.map +1 -0
- package/dist/lib/agent-session.d.ts.map +1 -1
- package/dist/lib/agent-session.js +249 -25
- package/dist/lib/agent-session.js.map +1 -1
- package/dist/lib/gtm-generator.js +3 -1
- package/dist/lib/gtm-generator.js.map +1 -1
- package/dist/lib/memory-search.d.ts.map +1 -1
- package/dist/lib/memory-search.js +0 -8
- package/dist/lib/memory-search.js.map +1 -1
- package/dist/utils/jfl-paths.d.ts +9 -0
- package/dist/utils/jfl-paths.d.ts.map +1 -1
- package/dist/utils/jfl-paths.js +13 -0
- package/dist/utils/jfl-paths.js.map +1 -1
- package/package.json +1 -1
- package/packages/pi/dist/index.d.ts.map +1 -1
- package/packages/pi/dist/index.js +19 -1
- package/packages/pi/dist/index.js.map +1 -1
- package/packages/pi/dist/session.d.ts +5 -1
- package/packages/pi/dist/session.d.ts.map +1 -1
- package/packages/pi/dist/session.js +247 -116
- package/packages/pi/dist/session.js.map +1 -1
- package/packages/pi/extensions/index.ts +24 -1
- package/packages/pi/extensions/session.ts +256 -96
- package/packages/pi/skills/end/SKILL.md +8 -0
- package/scripts/session/session-cleanup.sh +19 -6
- package/template/.github/workflows/jfl-eval.yml +8 -1
- package/template/scripts/session/session-cleanup.sh +23 -8
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent Guards
|
|
3
|
+
*
|
|
4
|
+
* Pre-flight checks that run before an agent session starts.
|
|
5
|
+
* Implements the TOCTOU guard pattern used elsewhere (hub-health.ts, planning-loop.ts)
|
|
6
|
+
* but scoped to the agent runner lifecycle.
|
|
7
|
+
*
|
|
8
|
+
* Guards are composable: each returns a GuardResult, and runGuards() aggregates them.
|
|
9
|
+
* Non-critical guards warn but don't block; critical guards abort the session.
|
|
10
|
+
*
|
|
11
|
+
* @purpose Pre-flight guard checks for scoped agent runner sessions
|
|
12
|
+
* @invariant HubRequiredForScheduling (SystemSpec.tla)
|
|
13
|
+
*/
|
|
14
|
+
import { existsSync, readFileSync, readdirSync } from "fs";
|
|
15
|
+
import { join } from "path";
|
|
16
|
+
import { spawnSync } from "child_process";
|
|
17
|
+
// ============================================================================
|
|
18
|
+
// Individual Guards
|
|
19
|
+
// ============================================================================
|
|
20
|
+
/**
|
|
21
|
+
* Guard: Hub must be reachable before starting an agent session.
|
|
22
|
+
* Critical: false — agents can run without hub, but results won't be tracked.
|
|
23
|
+
*
|
|
24
|
+
* Re-implements the TOCTOU pattern from hub-health.ts for the agent runner path.
|
|
25
|
+
*/
|
|
26
|
+
export async function guardHub(projectRoot) {
|
|
27
|
+
try {
|
|
28
|
+
const { checkHubHealth } = await import("./hub-health.js");
|
|
29
|
+
const status = await checkHubHealth(projectRoot);
|
|
30
|
+
if (status.available) {
|
|
31
|
+
return {
|
|
32
|
+
name: "hub",
|
|
33
|
+
passed: true,
|
|
34
|
+
critical: false,
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
return {
|
|
38
|
+
name: "hub",
|
|
39
|
+
passed: false,
|
|
40
|
+
critical: false,
|
|
41
|
+
reason: `Hub unavailable (${status.error}). Agent will run but results won't be tracked.`,
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
catch {
|
|
45
|
+
return {
|
|
46
|
+
name: "hub",
|
|
47
|
+
passed: false,
|
|
48
|
+
critical: false,
|
|
49
|
+
reason: "Hub health check unavailable. Agent will run but results won't be tracked.",
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Guard: No concurrent session for the same agent.
|
|
55
|
+
* Critical: true — running two sessions on the same agent causes git conflicts.
|
|
56
|
+
*/
|
|
57
|
+
export function guardNoConcurrentSession(projectRoot, agentName) {
|
|
58
|
+
const sessionsDir = join(projectRoot, ".jfl", "sessions");
|
|
59
|
+
if (!existsSync(sessionsDir)) {
|
|
60
|
+
return { name: "concurrent_session", passed: true, critical: true };
|
|
61
|
+
}
|
|
62
|
+
let entries;
|
|
63
|
+
try {
|
|
64
|
+
entries = readdirSync(sessionsDir);
|
|
65
|
+
}
|
|
66
|
+
catch {
|
|
67
|
+
return { name: "concurrent_session", passed: true, critical: true };
|
|
68
|
+
}
|
|
69
|
+
for (const entry of entries) {
|
|
70
|
+
// Only check sessions for this specific agent
|
|
71
|
+
if (!entry.startsWith(`${agentName}-`))
|
|
72
|
+
continue;
|
|
73
|
+
const statePath = join(sessionsDir, entry, "state.json");
|
|
74
|
+
if (!existsSync(statePath))
|
|
75
|
+
continue;
|
|
76
|
+
try {
|
|
77
|
+
const state = JSON.parse(readFileSync(statePath, "utf-8"));
|
|
78
|
+
if (state.status === "active") {
|
|
79
|
+
// Check if the worktree still exists (session might be stale)
|
|
80
|
+
if (state.worktreePath && existsSync(state.worktreePath)) {
|
|
81
|
+
return {
|
|
82
|
+
name: "concurrent_session",
|
|
83
|
+
passed: false,
|
|
84
|
+
critical: true,
|
|
85
|
+
reason: `Agent "${agentName}" has an active session: ${entry}. ` +
|
|
86
|
+
`Worktree: ${state.worktreePath}. ` +
|
|
87
|
+
`If this is stale, remove it: rm -rf ${state.worktreePath} && rm -rf ${join(sessionsDir, entry)}`,
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
// Worktree doesn't exist — session is stale, mark it as completed
|
|
91
|
+
// (don't block — just note it)
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
catch {
|
|
95
|
+
// Corrupted state file — skip
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
return { name: "concurrent_session", passed: true, critical: true };
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Guard: Eval script and data files must exist.
|
|
102
|
+
* Critical: true — agent can't measure improvement without eval.
|
|
103
|
+
*/
|
|
104
|
+
export function guardEvalReady(projectRoot, config) {
|
|
105
|
+
const errors = [];
|
|
106
|
+
const evalScriptPath = join(projectRoot, config.eval.script);
|
|
107
|
+
if (!existsSync(evalScriptPath)) {
|
|
108
|
+
errors.push(`Eval script not found: ${config.eval.script}`);
|
|
109
|
+
}
|
|
110
|
+
const evalDataPath = join(projectRoot, config.eval.data);
|
|
111
|
+
if (!existsSync(evalDataPath)) {
|
|
112
|
+
errors.push(`Eval data not found: ${config.eval.data}`);
|
|
113
|
+
}
|
|
114
|
+
if (errors.length > 0) {
|
|
115
|
+
return {
|
|
116
|
+
name: "eval_ready",
|
|
117
|
+
passed: false,
|
|
118
|
+
critical: true,
|
|
119
|
+
reason: errors.join("; "),
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
return { name: "eval_ready", passed: true, critical: true };
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Guard: No stale worktree for this agent.
|
|
126
|
+
* Critical: true — stale worktrees cause git worktree add to fail.
|
|
127
|
+
*/
|
|
128
|
+
export function guardWorktreeClean(projectRoot, agentName) {
|
|
129
|
+
// Check for stale /tmp worktrees matching this agent
|
|
130
|
+
const result = spawnSync("git", ["worktree", "list", "--porcelain"], {
|
|
131
|
+
cwd: projectRoot,
|
|
132
|
+
encoding: "utf-8",
|
|
133
|
+
stdio: "pipe",
|
|
134
|
+
});
|
|
135
|
+
if (result.status !== 0) {
|
|
136
|
+
// Can't check worktrees — don't block
|
|
137
|
+
return { name: "worktree_clean", passed: true, critical: true };
|
|
138
|
+
}
|
|
139
|
+
const lines = (result.stdout || "").split("\n");
|
|
140
|
+
const staleWorktrees = [];
|
|
141
|
+
for (const line of lines) {
|
|
142
|
+
if (line.startsWith("worktree ")) {
|
|
143
|
+
const worktreePath = line.replace("worktree ", "").trim();
|
|
144
|
+
// Match agent worktrees in /tmp
|
|
145
|
+
if (worktreePath.includes(`jfl-agent-${agentName}-`)) {
|
|
146
|
+
// Check if the directory actually exists
|
|
147
|
+
if (!existsSync(worktreePath)) {
|
|
148
|
+
staleWorktrees.push(worktreePath);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
if (staleWorktrees.length > 0) {
|
|
154
|
+
// Auto-prune stale worktrees — they're just git bookkeeping for deleted /tmp dirs
|
|
155
|
+
spawnSync("git", ["worktree", "prune"], {
|
|
156
|
+
cwd: projectRoot,
|
|
157
|
+
stdio: "pipe",
|
|
158
|
+
});
|
|
159
|
+
// Re-check after pruning
|
|
160
|
+
const recheck = spawnSync("git", ["worktree", "list", "--porcelain"], {
|
|
161
|
+
cwd: projectRoot,
|
|
162
|
+
encoding: "utf-8",
|
|
163
|
+
stdio: "pipe",
|
|
164
|
+
});
|
|
165
|
+
const recheckLines = (recheck.stdout || "").split("\n");
|
|
166
|
+
const remaining = recheckLines.filter((l) => l.startsWith("worktree ") && l.includes(`jfl-agent-${agentName}-`));
|
|
167
|
+
if (remaining.length > 0) {
|
|
168
|
+
const paths = remaining.map((l) => l.replace("worktree ", "").trim());
|
|
169
|
+
return {
|
|
170
|
+
name: "worktree_clean",
|
|
171
|
+
passed: false,
|
|
172
|
+
critical: true,
|
|
173
|
+
reason: `Stale worktrees found for agent "${agentName}": ${paths.join(", ")}. ` +
|
|
174
|
+
`Clean up with: git worktree remove <path> --force`,
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
return { name: "worktree_clean", passed: true, critical: true };
|
|
179
|
+
}
|
|
180
|
+
/**
|
|
181
|
+
* Guard: Git repo must be in a clean state for the base branch.
|
|
182
|
+
* Critical: false — uncommitted changes are stashed by agent-session.
|
|
183
|
+
*/
|
|
184
|
+
export function guardGitClean(projectRoot) {
|
|
185
|
+
const result = spawnSync("git", ["status", "--porcelain"], {
|
|
186
|
+
cwd: projectRoot,
|
|
187
|
+
encoding: "utf-8",
|
|
188
|
+
stdio: "pipe",
|
|
189
|
+
});
|
|
190
|
+
const output = (result.stdout || "").trim();
|
|
191
|
+
if (output) {
|
|
192
|
+
return {
|
|
193
|
+
name: "git_clean",
|
|
194
|
+
passed: false,
|
|
195
|
+
critical: false,
|
|
196
|
+
reason: `Uncommitted changes in working directory (${output.split("\n").length} files). ` +
|
|
197
|
+
`These may conflict with agent worktree creation.`,
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
return { name: "git_clean", passed: true, critical: false };
|
|
201
|
+
}
|
|
202
|
+
// ============================================================================
|
|
203
|
+
// Guard Runner
|
|
204
|
+
// ============================================================================
|
|
205
|
+
/**
|
|
206
|
+
* Run all guards for an agent session.
|
|
207
|
+
* Returns a summary with proceed/blockers/warnings.
|
|
208
|
+
*/
|
|
209
|
+
export async function runGuards(projectRoot, agentName, config) {
|
|
210
|
+
const results = [];
|
|
211
|
+
// Run all guards (async hub guard + sync guards)
|
|
212
|
+
const [hubResult] = await Promise.all([
|
|
213
|
+
guardHub(projectRoot),
|
|
214
|
+
]);
|
|
215
|
+
results.push(hubResult);
|
|
216
|
+
results.push(guardNoConcurrentSession(projectRoot, agentName));
|
|
217
|
+
results.push(guardEvalReady(projectRoot, config));
|
|
218
|
+
results.push(guardWorktreeClean(projectRoot, agentName));
|
|
219
|
+
results.push(guardGitClean(projectRoot));
|
|
220
|
+
const blockers = results.filter((r) => !r.passed && r.critical);
|
|
221
|
+
const warnings = results.filter((r) => !r.passed && !r.critical);
|
|
222
|
+
return {
|
|
223
|
+
proceed: blockers.length === 0,
|
|
224
|
+
results,
|
|
225
|
+
blockers,
|
|
226
|
+
warnings,
|
|
227
|
+
};
|
|
228
|
+
}
|
|
229
|
+
//# sourceMappingURL=agent-guards.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent-guards.js","sourceRoot":"","sources":["../../src/lib/agent-guards.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,IAAI,CAAA;AAC1D,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAA;AAC3B,OAAO,EAAE,SAAS,EAAE,MAAM,eAAe,CAAA;AA6BzC,+EAA+E;AAC/E,oBAAoB;AACpB,+EAA+E;AAE/E;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,WAAmB;IAChD,IAAI,CAAC;QACH,MAAM,EAAE,cAAc,EAAE,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAA;QAC1D,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,WAAW,CAAC,CAAA;QAEhD,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;YACrB,OAAO;gBACL,IAAI,EAAE,KAAK;gBACX,MAAM,EAAE,IAAI;gBACZ,QAAQ,EAAE,KAAK;aAChB,CAAA;QACH,CAAC;QAED,OAAO;YACL,IAAI,EAAE,KAAK;YACX,MAAM,EAAE,KAAK;YACb,QAAQ,EAAE,KAAK;YACf,MAAM,EAAE,oBAAoB,MAAM,CAAC,KAAK,iDAAiD;SAC1F,CAAA;IACH,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;YACL,IAAI,EAAE,KAAK;YACX,MAAM,EAAE,KAAK;YACb,QAAQ,EAAE,KAAK;YACf,MAAM,EAAE,4EAA4E;SACrF,CAAA;IACH,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,wBAAwB,CACtC,WAAmB,EACnB,SAAiB;IAEjB,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,EAAE,MAAM,EAAE,UAAU,CAAC,CAAA;IACzD,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;QAC7B,OAAO,EAAE,IAAI,EAAE,oBAAoB,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAA;IACrE,CAAC;IAED,IAAI,OAAiB,CAAA;IACrB,IAAI,CAAC;QACH,OAAO,GAAG,WAAW,CAAC,WAAW,CAAC,CAAA;IACpC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,IAAI,EAAE,oBAAoB,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAA;IACrE,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,8CAA8C;QAC9C,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,GAAG,SAAS,GAAG,CAAC;YAAE,SAAQ;QAEhD,MAAM,SAAS,GAAG,IAAI,CAAC,WAAW,EAAE,KAAK,EAAE,YAAY,CAAC,CAAA;QACxD,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;YAAE,SAAQ;QAEpC,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,CAAA;YAC1D,IAAI,KAAK,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;gBAC9B,8DAA8D;gBAC9D,IAAI,KAAK,CAAC,YAAY,IAAI,UAAU,CAAC,KAAK,CAAC,YAAY,CAAC,EAAE,CAAC;oBACzD,OAAO;wBACL,IAAI,EAAE,oBAAoB;wBAC1B,MAAM,EAAE,KAAK;wBACb,QAAQ,EAAE,IAAI;wBACd,MAAM,EAAE,UAAU,SAAS,4BAA4B,KAAK,IAAI;4BACxD,aAAa,KAAK,CAAC,YAAY,IAAI;4BACnC,uCAAuC,KAAK,CAAC,YAAY,cAAc,IAAI,CAAC,WAAW,EAAE,KAAK,CAAC,EAAE;qBAC1G,CAAA;gBACH,CAAC;gBACD,kEAAkE;gBAClE,+BAA+B;YACjC,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,8BAA8B;QAChC,CAAC;IACH,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,oBAAoB,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAA;AACrE,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAC5B,WAAmB,EACnB,MAAmB;IAEnB,MAAM,MAAM,GAAa,EAAE,CAAA;IAE3B,MAAM,cAAc,GAAG,IAAI,CAAC,WAAW,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IAC5D,IAAI,CAAC,UAAU,CAAC,cAAc,CAAC,EAAE,CAAC;QAChC,MAAM,CAAC,IAAI,CAAC,0BAA0B,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAA;IAC7D,CAAC;IAED,MAAM,YAAY,GAAG,IAAI,CAAC,WAAW,EAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACxD,IAAI,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE,CAAC;QAC9B,MAAM,CAAC,IAAI,CAAC,wBAAwB,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAA;IACzD,CAAC;IAED,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,OAAO;YACL,IAAI,EAAE,YAAY;YAClB,MAAM,EAAE,KAAK;YACb,QAAQ,EAAE,IAAI;YACd,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC;SAC1B,CAAA;IACH,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAA;AAC7D,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAChC,WAAmB,EACnB,SAAiB;IAEjB,qDAAqD;IACrD,MAAM,MAAM,GAAG,SAAS,CAAC,KAAK,EAAE,CAAC,UAAU,EAAE,MAAM,EAAE,aAAa,CAAC,EAAE;QACnE,GAAG,EAAE,WAAW;QAChB,QAAQ,EAAE,OAAO;QACjB,KAAK,EAAE,MAAM;KACd,CAAC,CAAA;IAEF,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,sCAAsC;QACtC,OAAO,EAAE,IAAI,EAAE,gBAAgB,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAA;IACjE,CAAC;IAED,MAAM,KAAK,GAAG,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;IAC/C,MAAM,cAAc,GAAa,EAAE,CAAA;IAEnC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;YACjC,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;YACzD,gCAAgC;YAChC,IAAI,YAAY,CAAC,QAAQ,CAAC,aAAa,SAAS,GAAG,CAAC,EAAE,CAAC;gBACrD,yCAAyC;gBACzC,IAAI,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE,CAAC;oBAC9B,cAAc,CAAC,IAAI,CAAC,YAAY,CAAC,CAAA;gBACnC,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC9B,kFAAkF;QAClF,SAAS,CAAC,KAAK,EAAE,CAAC,UAAU,EAAE,OAAO,CAAC,EAAE;YACtC,GAAG,EAAE,WAAW;YAChB,KAAK,EAAE,MAAM;SACd,CAAC,CAAA;QAEF,yBAAyB;QACzB,MAAM,OAAO,GAAG,SAAS,CAAC,KAAK,EAAE,CAAC,UAAU,EAAE,MAAM,EAAE,aAAa,CAAC,EAAE;YACpE,GAAG,EAAE,WAAW;YAChB,QAAQ,EAAE,OAAO;YACjB,KAAK,EAAE,MAAM;SACd,CAAC,CAAA;QAEF,MAAM,YAAY,GAAG,CAAC,OAAO,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QACvD,MAAM,SAAS,GAAG,YAAY,CAAC,MAAM,CACnC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,aAAa,SAAS,GAAG,CAAC,CAC1E,CAAA;QAED,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAA;YACrE,OAAO;gBACL,IAAI,EAAE,gBAAgB;gBACtB,MAAM,EAAE,KAAK;gBACb,QAAQ,EAAE,IAAI;gBACd,MAAM,EAAE,oCAAoC,SAAS,MAAM,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI;oBACvE,mDAAmD;aAC5D,CAAA;QACH,CAAC;IACH,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,gBAAgB,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAA;AACjE,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,aAAa,CAAC,WAAmB;IAC/C,MAAM,MAAM,GAAG,SAAS,CAAC,KAAK,EAAE,CAAC,QAAQ,EAAE,aAAa,CAAC,EAAE;QACzD,GAAG,EAAE,WAAW;QAChB,QAAQ,EAAE,OAAO;QACjB,KAAK,EAAE,MAAM;KACd,CAAC,CAAA;IAEF,MAAM,MAAM,GAAG,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;IAC3C,IAAI,MAAM,EAAE,CAAC;QACX,OAAO;YACL,IAAI,EAAE,WAAW;YACjB,MAAM,EAAE,KAAK;YACb,QAAQ,EAAE,KAAK;YACf,MAAM,EAAE,6CAA6C,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,WAAW;gBACjF,kDAAkD;SAC3D,CAAA;IACH,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAA;AAC7D,CAAC;AAED,+EAA+E;AAC/E,eAAe;AACf,+EAA+E;AAE/E;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAC7B,WAAmB,EACnB,SAAiB,EACjB,MAAmB;IAEnB,MAAM,OAAO,GAAkB,EAAE,CAAA;IAEjC,iDAAiD;IACjD,MAAM,CAAC,SAAS,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;QACpC,QAAQ,CAAC,WAAW,CAAC;KACtB,CAAC,CAAA;IACF,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;IAEvB,OAAO,CAAC,IAAI,CAAC,wBAAwB,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC,CAAA;IAC9D,OAAO,CAAC,IAAI,CAAC,cAAc,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC,CAAA;IACjD,OAAO,CAAC,IAAI,CAAC,kBAAkB,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC,CAAA;IACxD,OAAO,CAAC,IAAI,CAAC,aAAa,CAAC,WAAW,CAAC,CAAC,CAAA;IAExC,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAA;IAC/D,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAA;IAEhE,OAAO;QACL,OAAO,EAAE,QAAQ,CAAC,MAAM,KAAK,CAAC;QAC9B,OAAO;QACP,QAAQ;QACR,QAAQ;KACT,CAAA;AACH,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent-session.d.ts","sourceRoot":"","sources":["../../src/lib/agent-session.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAMH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAA;AACpD,OAAO,EAA2D,KAAK,YAAY,EAAmB,MAAM,oBAAoB,CAAA;AAChI,OAAO,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAY,MAAM,sBAAsB,CAAA;AAOvE,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,EAAE,MAAM,CAAA;IACZ,YAAY,EAAE,MAAM,CAAA;IACpB,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,EAAE,OAAO,CAAA;IACb,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,SAAS,EAAE,MAAM,CAAA;CAClB;AAED;;;;;;GAMG;AACH,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,YAAY,EACrB,WAAW,EAAE,UAAU,EAAE,EACzB,aAAa,EAAE,MAAM,GACpB,MAAM,CAwIR;AAED;;;GAGG;AACH,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,YAAY,EACrB,WAAW,EAAE,UAAU,EAAE,EACzB,aAAa,EAAE,MAAM,GACpB,IAAI,CASN;AAED;;;GAGG;AACH,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,YAAY,GAAG,MAAM,CAUnE;AAMD,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAA;IACV,SAAS,EAAE,MAAM,CAAA;IACjB,MAAM,EAAE,WAAW,CAAA;IACnB,WAAW,EAAE,MAAM,CAAA;IACnB,QAAQ,EAAE,MAAM,CAAA;IAChB,YAAY,EAAE,MAAM,CAAA;IACpB,MAAM,EAAE,MAAM,CAAA;IACd,UAAU,EAAE,MAAM,CAAA;IAClB,YAAY,EAAE,YAAY,CAAA;IAC1B,cAAc,EAAE,MAAM,CAAA;IACtB,SAAS,EAAE,MAAM,CAAA;IACjB,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,QAAQ,GAAG,WAAW,GAAG,QAAQ,CAAA;CAC1C;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,EAAE,MAAM,CAAA;IACZ,UAAU,EAAE,MAAM,CAAA;IAClB,YAAY,EAAE,MAAM,CAAA;IACpB,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,EAAE,OAAO,CAAA;IACb,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAED,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAA;IACb,UAAU,EAAE,MAAM,CAAA;IAClB,UAAU,EAAE,MAAM,CAAA;IAClB,KAAK,EAAE,OAAO,CAAA;IACd,WAAW,EAAE,MAAM,CAAA;IACnB,MAAM,EAAE,QAAQ,CAAA;IAChB,UAAU,EAAE,MAAM,CAAA;IAClB,MAAM,EAAE,MAAM,CAAA;IACd,SAAS,EAAE,MAAM,CAAA;CAClB;AAED,MAAM,WAAW,cAAc;IAC7B,UAAU,EAAE,MAAM,CAAA;IAClB,SAAS,EAAE,MAAM,CAAA;IACjB,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,MAAM,CAAA;IAChB,WAAW,EAAE,MAAM,CAAA;IACnB,UAAU,EAAE,MAAM,CAAA;IAClB,SAAS,EAAE,MAAM,CAAA;IACjB,cAAc,EAAE,MAAM,CAAA;IACtB,WAAW,EAAE,UAAU,EAAE,CAAA;IACzB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,SAAS,CAAC,EAAE,MAAM,CAAA;CACnB;AAmGD,wBAAgB,YAAY,CAC1B,MAAM,EAAE,WAAW,EACnB,WAAW,EAAE,MAAM,EACnB,UAAU,GAAE,MAAe,GAC1B,YAAY,
|
|
1
|
+
{"version":3,"file":"agent-session.d.ts","sourceRoot":"","sources":["../../src/lib/agent-session.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAMH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAA;AACpD,OAAO,EAA2D,KAAK,YAAY,EAAmB,MAAM,oBAAoB,CAAA;AAChI,OAAO,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAY,MAAM,sBAAsB,CAAA;AAOvE,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,EAAE,MAAM,CAAA;IACZ,YAAY,EAAE,MAAM,CAAA;IACpB,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,EAAE,OAAO,CAAA;IACb,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,SAAS,EAAE,MAAM,CAAA;CAClB;AAED;;;;;;GAMG;AACH,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,YAAY,EACrB,WAAW,EAAE,UAAU,EAAE,EACzB,aAAa,EAAE,MAAM,GACpB,MAAM,CAwIR;AAED;;;GAGG;AACH,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,YAAY,EACrB,WAAW,EAAE,UAAU,EAAE,EACzB,aAAa,EAAE,MAAM,GACpB,IAAI,CASN;AAED;;;GAGG;AACH,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,YAAY,GAAG,MAAM,CAUnE;AAMD,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAA;IACV,SAAS,EAAE,MAAM,CAAA;IACjB,MAAM,EAAE,WAAW,CAAA;IACnB,WAAW,EAAE,MAAM,CAAA;IACnB,QAAQ,EAAE,MAAM,CAAA;IAChB,YAAY,EAAE,MAAM,CAAA;IACpB,MAAM,EAAE,MAAM,CAAA;IACd,UAAU,EAAE,MAAM,CAAA;IAClB,YAAY,EAAE,YAAY,CAAA;IAC1B,cAAc,EAAE,MAAM,CAAA;IACtB,SAAS,EAAE,MAAM,CAAA;IACjB,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,QAAQ,GAAG,WAAW,GAAG,QAAQ,CAAA;CAC1C;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,EAAE,MAAM,CAAA;IACZ,UAAU,EAAE,MAAM,CAAA;IAClB,YAAY,EAAE,MAAM,CAAA;IACpB,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,EAAE,OAAO,CAAA;IACb,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAED,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAA;IACb,UAAU,EAAE,MAAM,CAAA;IAClB,UAAU,EAAE,MAAM,CAAA;IAClB,KAAK,EAAE,OAAO,CAAA;IACd,WAAW,EAAE,MAAM,CAAA;IACnB,MAAM,EAAE,QAAQ,CAAA;IAChB,UAAU,EAAE,MAAM,CAAA;IAClB,MAAM,EAAE,MAAM,CAAA;IACd,SAAS,EAAE,MAAM,CAAA;CAClB;AAED,MAAM,WAAW,cAAc;IAC7B,UAAU,EAAE,MAAM,CAAA;IAClB,SAAS,EAAE,MAAM,CAAA;IACjB,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,MAAM,CAAA;IAChB,WAAW,EAAE,MAAM,CAAA;IACnB,UAAU,EAAE,MAAM,CAAA;IAClB,SAAS,EAAE,MAAM,CAAA;IACjB,cAAc,EAAE,MAAM,CAAA;IACtB,WAAW,EAAE,UAAU,EAAE,CAAA;IACzB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,SAAS,CAAC,EAAE,MAAM,CAAA;CACnB;AAmGD,wBAAgB,YAAY,CAC1B,MAAM,EAAE,WAAW,EACnB,WAAW,EAAE,MAAM,EACnB,UAAU,GAAE,MAAe,GAC1B,YAAY,CAuFd;AAED,wBAAsB,WAAW,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAkBxE;AAED,wBAAsB,QAAQ,CAC5B,OAAO,EAAE,YAAY,EACrB,KAAK,EAAE,MAAM,EACb,IAAI,EAAE,MAAM,EACZ,UAAU,EAAE,MAAM,EAClB,mBAAmB,GAAE,UAAU,EAAO,GACrC,OAAO,CAAC;IAAE,MAAM,EAAE,WAAW,CAAC;IAAC,UAAU,EAAE,UAAU,CAAA;CAAE,CAAC,CA+M1D;AAmND,wBAAsB,UAAU,CAC9B,OAAO,EAAE,YAAY,EACrB,WAAW,EAAE,UAAU,EAAE,GACxB,OAAO,CAAC,cAAc,CAAC,CAkIzB;AAkED,wBAAgB,gBAAgB,CAAC,WAAW,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,YAAY,GAAG,IAAI,CAY5F;AAED,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,YAAY,GAAG,IAAI,CAQ5D;AAED,wBAAgB,kBAAkB,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,EAAE,CAoBhE"}
|
|
@@ -285,6 +285,19 @@ export function startSession(config, projectRoot, baseBranch = "main") {
|
|
|
285
285
|
const repoRoot = config.target_repo
|
|
286
286
|
? join(projectRoot, config.target_repo)
|
|
287
287
|
: projectRoot;
|
|
288
|
+
// Clean up stale worktree at this path (from crashed/killed sessions)
|
|
289
|
+
// Without this, git worktree add fails and falls back to main — breaking auto-PR
|
|
290
|
+
if (existsSync(worktreePath)) {
|
|
291
|
+
console.log(` Cleaning stale worktree: ${worktreePath}`);
|
|
292
|
+
gitExec(["worktree", "remove", worktreePath, "--force"], repoRoot);
|
|
293
|
+
}
|
|
294
|
+
gitExec(["worktree", "prune"], repoRoot);
|
|
295
|
+
// Delete stale session branch if it exists (from a previous crashed session with same hash)
|
|
296
|
+
const branchCheck = gitExec(["rev-parse", "--verify", branch], repoRoot);
|
|
297
|
+
if (branchCheck.ok) {
|
|
298
|
+
console.log(` Deleting stale branch: ${branch}`);
|
|
299
|
+
gitExec(["branch", "-D", branch], repoRoot);
|
|
300
|
+
}
|
|
288
301
|
// Fetch latest base branch
|
|
289
302
|
gitExec(["fetch", "origin", baseBranch], repoRoot);
|
|
290
303
|
// Create worktree with new branch from origin/baseBranch (in the target repo, not GTM)
|
|
@@ -403,6 +416,59 @@ export async function runRound(session, round, task, hypothesis, previousTransit
|
|
|
403
416
|
.trim()
|
|
404
417
|
.split("\n")
|
|
405
418
|
.filter(Boolean);
|
|
419
|
+
// ── Guard check: must pass before eval (T1.2) ──
|
|
420
|
+
// e.g. "cd ${AGENT_WORKTREE} && npx tsc --noEmit" catches type errors
|
|
421
|
+
const guardCmd = session.config.eval.guard;
|
|
422
|
+
if (guardCmd) {
|
|
423
|
+
const expandedGuard = guardCmd.replace(/\$\{AGENT_WORKTREE\}/g, session.worktreePath);
|
|
424
|
+
console.log(` Running guard: ${expandedGuard.slice(0, 80)}...`);
|
|
425
|
+
const guardResult = spawnSync("bash", ["-c", expandedGuard], {
|
|
426
|
+
cwd: session.worktreePath,
|
|
427
|
+
encoding: "utf-8",
|
|
428
|
+
stdio: "pipe",
|
|
429
|
+
timeout: 60000, // 60s max for guard
|
|
430
|
+
});
|
|
431
|
+
if (guardResult.status !== 0) {
|
|
432
|
+
console.log(` ✗ Guard FAILED — reverting changes`);
|
|
433
|
+
const guardError = (guardResult.stderr || guardResult.stdout || "").trim().split("\n").slice(-5).join("\n");
|
|
434
|
+
if (guardError)
|
|
435
|
+
console.log(` ${guardError.split("\n").join("\n ")}`);
|
|
436
|
+
// Revert: discard all changes in worktree
|
|
437
|
+
gitExec(["checkout", "--", "."], session.worktreePath);
|
|
438
|
+
gitExec(["clean", "-fd"], session.worktreePath);
|
|
439
|
+
const guardFailResult = {
|
|
440
|
+
round,
|
|
441
|
+
task,
|
|
442
|
+
hypothesis,
|
|
443
|
+
metricBefore: session.baselineMetric,
|
|
444
|
+
metricAfter: session.baselineMetric,
|
|
445
|
+
delta: 0,
|
|
446
|
+
kept: false,
|
|
447
|
+
duration_ms: Date.now() - startTime,
|
|
448
|
+
error: `Guard failed: ${guardError.slice(0, 200)}`,
|
|
449
|
+
};
|
|
450
|
+
const guardFailTransition = {
|
|
451
|
+
agent: session.agentName,
|
|
452
|
+
session_id: session.id,
|
|
453
|
+
state_hash: stateHash,
|
|
454
|
+
state: stateBefore,
|
|
455
|
+
action_diff: diff,
|
|
456
|
+
action: {
|
|
457
|
+
type: "experiment",
|
|
458
|
+
description: task,
|
|
459
|
+
files_affected: changedFiles,
|
|
460
|
+
scope: changedFiles.length <= 1 ? "small" : changedFiles.length <= 3 ? "medium" : "large",
|
|
461
|
+
branch: session.branch,
|
|
462
|
+
},
|
|
463
|
+
hypothesis,
|
|
464
|
+
reward: -0.001, // Small negative signal — guard failures are worse than no-ops
|
|
465
|
+
timestamp: new Date().toISOString(),
|
|
466
|
+
};
|
|
467
|
+
logResult(session, guardFailResult);
|
|
468
|
+
return { result: guardFailResult, transition: guardFailTransition };
|
|
469
|
+
}
|
|
470
|
+
console.log(` ✓ Guard passed`);
|
|
471
|
+
}
|
|
406
472
|
// Commit changes in worktree with provenance signing
|
|
407
473
|
signedAgentCommit(session.worktreePath, `agent(${session.agentName}): round ${round} - ${task.slice(0, 50)}`, {
|
|
408
474
|
agentId: session.agentName,
|
|
@@ -462,34 +528,144 @@ export async function runRound(session, round, task, hypothesis, previousTransit
|
|
|
462
528
|
}
|
|
463
529
|
async function runClaudeCode(session, task) {
|
|
464
530
|
const timeout = session.config.time_budget_seconds * 1000;
|
|
465
|
-
//
|
|
466
|
-
//
|
|
531
|
+
// Write all context to files — Karpathy pattern
|
|
532
|
+
// EXPERIMENTS.md has: eval diagnostics, code context, experiment history
|
|
533
|
+
// AGENT.md has: task, constraints, scope files
|
|
534
|
+
// CLAUDE.md has: minimal "read files, edit code, stop" instructions
|
|
535
|
+
writeAgentMd(session, task);
|
|
536
|
+
writeAgentClaudeMd(session);
|
|
537
|
+
// Task prompt is deliberately tiny — all context is in files.
|
|
538
|
+
// Long prompts waste tokens and slow down time-to-first-edit.
|
|
539
|
+
const tinyTask = `Read EXPERIMENTS.md then AGENT.md. Make ONE edit to improve ${session.config.metric}. Stop after editing.`;
|
|
540
|
+
return new Promise((resolve) => {
|
|
541
|
+
spawnAgentRuntime(tinyTask, session.worktreePath, timeout, resolve);
|
|
542
|
+
});
|
|
543
|
+
}
|
|
544
|
+
/**
|
|
545
|
+
* Write AGENT.md to the worktree — focused instructions for RL agents.
|
|
546
|
+
* Contains: task, scope, eval diagnostics, code context, constraints.
|
|
547
|
+
* This is what the agent reads instead of CLAUDE.md.
|
|
548
|
+
*/
|
|
549
|
+
function writeAgentMd(session, task) {
|
|
467
550
|
const scopeFiles = (() => { const raw = session.config.constraints?.scope_files || session.config.constraints.files_in_scope || []; return Array.isArray(raw) ? raw : [String(raw)]; })();
|
|
468
|
-
const
|
|
469
|
-
const enhancedTask = `You are an autonomous code improvement agent. Your job is to EDIT SOURCE CODE FILES to improve a metric.
|
|
551
|
+
const agentMd = `# Agent: ${session.agentName}
|
|
470
552
|
|
|
471
|
-
|
|
553
|
+
## Task
|
|
554
|
+
${task}
|
|
472
555
|
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
5. Make ONE small, focused change to improve the metric
|
|
479
|
-
6. Do NOT add new files. Do NOT modify files outside the scope: ${scopeFilesStr}
|
|
480
|
-
7. After making the change, stop. The eval will run automatically.
|
|
556
|
+
## Constraints
|
|
557
|
+
- **Modify ONLY these files:** ${scopeFiles.join(", ")}
|
|
558
|
+
- **Do NOT modify:** AGENT.md, EXPERIMENTS.md, eval scripts, node_modules, dist
|
|
559
|
+
- **Max file changes:** ${session.config.constraints.max_file_changes || 2}
|
|
560
|
+
- **Time budget:** ${session.config.time_budget_seconds}s
|
|
481
561
|
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
562
|
+
## How to Succeed
|
|
563
|
+
1. Read EXPERIMENTS.md — it shows what failed and the actual source code
|
|
564
|
+
2. Make ONE small, targeted change to a source file
|
|
565
|
+
3. Stop after making the change — the eval runs automatically
|
|
566
|
+
|
|
567
|
+
## What NOT to Do
|
|
568
|
+
- Do not add new files
|
|
569
|
+
- Do not modify test files or eval scripts
|
|
570
|
+
- Do not just add comments or documentation
|
|
571
|
+
- Do not repeat experiments listed as REJECTED in EXPERIMENTS.md
|
|
572
|
+
`;
|
|
573
|
+
writeFileSync(join(session.worktreePath, "AGENT.md"), agentMd);
|
|
490
574
|
}
|
|
575
|
+
/**
|
|
576
|
+
* Write a minimal CLAUDE.md to the worktree for RL agents.
|
|
577
|
+
* Karpathy pattern: agent gets focused instructions, no ceremony.
|
|
578
|
+
* Claude CLI reads CLAUDE.md automatically — this replaces the full project CLAUDE.md.
|
|
579
|
+
*/
|
|
580
|
+
function writeAgentClaudeMd(session) {
|
|
581
|
+
const scopeFiles = (() => { const raw = session.config.constraints?.scope_files || session.config.constraints.files_in_scope || []; return Array.isArray(raw) ? raw : [String(raw)]; })();
|
|
582
|
+
const claudeMd = `# RL Agent — ${session.agentName}
|
|
583
|
+
|
|
584
|
+
You are a lightweight code improvement agent. You have ONE job: improve a metric by editing source code.
|
|
585
|
+
|
|
586
|
+
## Instructions
|
|
587
|
+
1. Read EXPERIMENTS.md — it has eval diagnostics, code context, and experiment history
|
|
588
|
+
2. Read AGENT.md — it has your specific task and constraints
|
|
589
|
+
3. Make ONE small, targeted edit to a source file
|
|
590
|
+
4. Stop immediately after making the change
|
|
591
|
+
|
|
592
|
+
## Rules
|
|
593
|
+
- Only modify: ${scopeFiles.join(", ")}
|
|
594
|
+
- Do NOT modify: AGENT.md, EXPERIMENTS.md, CLAUDE.md, eval scripts, node_modules, dist
|
|
595
|
+
- Do NOT add new files
|
|
596
|
+
- Do NOT write documentation or comments as your primary change
|
|
597
|
+
- Max file changes: ${session.config.constraints.max_file_changes || 2}
|
|
598
|
+
- The eval runs automatically after you exit — do not run it yourself
|
|
599
|
+
|
|
600
|
+
## Available Tools
|
|
601
|
+
You have: read, edit, write, bash. That's all you need.
|
|
602
|
+
Do NOT try to use any other tools (no memory search, no hub, no subway, no CRM).
|
|
603
|
+
`;
|
|
604
|
+
writeFileSync(join(session.worktreePath, "CLAUDE.md"), claudeMd);
|
|
605
|
+
}
|
|
606
|
+
/**
|
|
607
|
+
* Spawn agent runtime — uses claude CLI for lightweight execution.
|
|
608
|
+
* Karpathy pattern: agent is just an LLM with file tools, all context in files.
|
|
609
|
+
* ~50MB per process vs ~500MB for full Pi with JFL extension.
|
|
610
|
+
* Use JFL_AGENT_USE_PI=1 to force Pi runtime (for debugging).
|
|
611
|
+
*/
|
|
612
|
+
function spawnAgentRuntime(task, cwd, timeout, resolve) {
|
|
613
|
+
const usePi = !!process.env.JFL_AGENT_USE_PI; // Opt-in to Pi (heavy), default is claude CLI (light)
|
|
614
|
+
if (usePi) {
|
|
615
|
+
console.log(" Spawning Pi agent (heavy — use for debugging only)...");
|
|
616
|
+
const child = spawn("pi", [
|
|
617
|
+
"--print", task,
|
|
618
|
+
"--no-session",
|
|
619
|
+
"--no-skills",
|
|
620
|
+
"--no-prompt-templates",
|
|
621
|
+
"--no-themes",
|
|
622
|
+
], {
|
|
623
|
+
cwd,
|
|
624
|
+
stdio: "inherit",
|
|
625
|
+
env: {
|
|
626
|
+
...process.env,
|
|
627
|
+
JFL_AGENT_MODE: "1",
|
|
628
|
+
JFL_PP_SPAWNED: "1",
|
|
629
|
+
CLAUDECODE: undefined,
|
|
630
|
+
CLAUDE_CODE: undefined,
|
|
631
|
+
},
|
|
632
|
+
});
|
|
633
|
+
const timeoutId = setTimeout(() => {
|
|
634
|
+
child.kill("SIGTERM");
|
|
635
|
+
setTimeout(() => {
|
|
636
|
+
if (!child.killed)
|
|
637
|
+
child.kill("SIGKILL");
|
|
638
|
+
}, 5000);
|
|
639
|
+
}, timeout);
|
|
640
|
+
child.on("error", (err) => {
|
|
641
|
+
console.error(` Pi agent error: ${err.message}`);
|
|
642
|
+
clearTimeout(timeoutId);
|
|
643
|
+
console.log(" Falling back to claude CLI...");
|
|
644
|
+
spawnClaudeCli(task, cwd, timeout, resolve);
|
|
645
|
+
});
|
|
646
|
+
child.on("exit", (code, signal) => {
|
|
647
|
+
if (code !== null) {
|
|
648
|
+
console.log(` Pi agent exit: code=${code}`);
|
|
649
|
+
}
|
|
650
|
+
else if (signal) {
|
|
651
|
+
console.log(` Pi agent killed: signal=${signal}`);
|
|
652
|
+
}
|
|
653
|
+
clearTimeout(timeoutId);
|
|
654
|
+
resolve();
|
|
655
|
+
});
|
|
656
|
+
}
|
|
657
|
+
else {
|
|
658
|
+
spawnClaudeCli(task, cwd, timeout, resolve);
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
/**
|
|
662
|
+
* Spawn claude CLI — lightweight agent runtime (~50MB).
|
|
663
|
+
* Karpathy pattern: LLM reads files, edits code, exits.
|
|
664
|
+
* CLAUDE.md in worktree provides focused instructions.
|
|
665
|
+
* EXPERIMENTS.md has all context (eval diagnostics, code, history).
|
|
666
|
+
*/
|
|
491
667
|
function spawnClaudeCli(task, cwd, timeout, resolve) {
|
|
492
|
-
console.log(" Spawning claude CLI...");
|
|
668
|
+
console.log(" Spawning claude CLI (lightweight, Karpathy-style)...");
|
|
493
669
|
const child = spawn("claude", [
|
|
494
670
|
"--dangerously-skip-permissions",
|
|
495
671
|
"-p", task,
|
|
@@ -596,14 +772,62 @@ export async function endSession(session, transitions) {
|
|
|
596
772
|
}
|
|
597
773
|
}
|
|
598
774
|
catch { }
|
|
599
|
-
// If we improved, push the branch
|
|
600
|
-
//
|
|
601
|
-
// humans (or their agents) review and merge in the morning
|
|
775
|
+
// If we improved, push the branch and create a PR for review
|
|
776
|
+
// T1.4: Auto-create PR from kept improvement
|
|
602
777
|
if (improvedRounds > 0) {
|
|
603
778
|
// Push the branch so it's available for review
|
|
604
779
|
const push = gitExec(["push", "-u", "origin", session.branch], session.worktreePath);
|
|
605
780
|
if (push.ok) {
|
|
606
781
|
summary.branchUrl = `https://github.com/402goose/${session.worktreePath.split('/').pop()}/tree/${session.branch}`;
|
|
782
|
+
// Auto-create PR with improvement summary
|
|
783
|
+
const prTitle = `agent(${session.agentName}): ${session.config.metric} ${totalDelta > 0 ? "+" : ""}${totalDelta.toFixed(4)} (${improvedRounds}/${transitions.length} rounds kept)`;
|
|
784
|
+
const prBody = [
|
|
785
|
+
`## Agent Improvement: ${session.agentName}`,
|
|
786
|
+
"",
|
|
787
|
+
`**Metric:** ${session.config.metric} (${session.config.direction})`,
|
|
788
|
+
`**Baseline:** ${(session.baselineMetric - totalDelta).toFixed(4)}`,
|
|
789
|
+
`**Final:** ${session.baselineMetric.toFixed(4)}`,
|
|
790
|
+
`**Delta:** ${totalDelta > 0 ? "+" : ""}${totalDelta.toFixed(4)}`,
|
|
791
|
+
`**Rounds:** ${improvedRounds} kept / ${transitions.length} total`,
|
|
792
|
+
"",
|
|
793
|
+
"### Kept Changes",
|
|
794
|
+
...transitions.filter(t => t.reward > 0).map(t => `- **+${t.reward.toFixed(4)}**: ${t.action.description.slice(0, 100)} (${t.action.files_affected.join(", ")})`),
|
|
795
|
+
"",
|
|
796
|
+
"### Rejected Changes",
|
|
797
|
+
`${transitions.filter(t => t.reward <= 0).length} experiments reverted (no improvement).`,
|
|
798
|
+
"",
|
|
799
|
+
`---`,
|
|
800
|
+
`Session: \`${session.id}\``,
|
|
801
|
+
`Branch: \`${session.branch}\``,
|
|
802
|
+
`Guard: ${session.config.eval.guard ? "✅ " + session.config.eval.guard.slice(0, 60) : "none"}`,
|
|
803
|
+
].join("\n");
|
|
804
|
+
const prResult = spawnSync("gh", [
|
|
805
|
+
"pr", "create",
|
|
806
|
+
"--title", prTitle,
|
|
807
|
+
"--body", prBody,
|
|
808
|
+
"--base", session.baseBranch,
|
|
809
|
+
"--head", session.branch,
|
|
810
|
+
], {
|
|
811
|
+
cwd: session.worktreePath,
|
|
812
|
+
encoding: "utf-8",
|
|
813
|
+
stdio: "pipe",
|
|
814
|
+
timeout: 30000,
|
|
815
|
+
});
|
|
816
|
+
if (prResult.status === 0) {
|
|
817
|
+
const prUrl = (prResult.stdout || "").trim();
|
|
818
|
+
summary.prUrl = prUrl;
|
|
819
|
+
console.log(` PR created: ${prUrl}`);
|
|
820
|
+
}
|
|
821
|
+
else {
|
|
822
|
+
// PR creation failed — branch is still pushed for manual review
|
|
823
|
+
const err = (prResult.stderr || "").trim();
|
|
824
|
+
if (err.includes("already exists")) {
|
|
825
|
+
console.log(` PR already exists for ${session.branch}`);
|
|
826
|
+
}
|
|
827
|
+
else {
|
|
828
|
+
console.log(` PR creation failed: ${err.slice(0, 100)}`);
|
|
829
|
+
}
|
|
830
|
+
}
|
|
607
831
|
}
|
|
608
832
|
// Emit scope:impact events for each produces pattern
|
|
609
833
|
// This triggers downstream agents that consume these patterns
|