jfl 0.9.9 → 0.9.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/commands/init.d.ts.map +1 -1
  2. package/dist/commands/init.js +85 -20
  3. package/dist/commands/init.js.map +1 -1
  4. package/dist/commands/peter.d.ts.map +1 -1
  5. package/dist/commands/peter.js +83 -35
  6. package/dist/commands/peter.js.map +1 -1
  7. package/dist/commands/repair.d.ts.map +1 -1
  8. package/dist/commands/repair.js +13 -11
  9. package/dist/commands/repair.js.map +1 -1
  10. package/dist/commands/session.d.ts.map +1 -1
  11. package/dist/commands/session.js +3 -37
  12. package/dist/commands/session.js.map +1 -1
  13. package/dist/commands/start.js +3 -3
  14. package/dist/commands/start.js.map +1 -1
  15. package/dist/lib/agent-config.d.ts +1 -0
  16. package/dist/lib/agent-config.d.ts.map +1 -1
  17. package/dist/lib/agent-config.js.map +1 -1
  18. package/dist/lib/agent-guards.d.ts +67 -0
  19. package/dist/lib/agent-guards.d.ts.map +1 -0
  20. package/dist/lib/agent-guards.js +229 -0
  21. package/dist/lib/agent-guards.js.map +1 -0
  22. package/dist/lib/agent-session.d.ts.map +1 -1
  23. package/dist/lib/agent-session.js +249 -25
  24. package/dist/lib/agent-session.js.map +1 -1
  25. package/dist/lib/gtm-generator.js +3 -1
  26. package/dist/lib/gtm-generator.js.map +1 -1
  27. package/dist/lib/memory-search.d.ts.map +1 -1
  28. package/dist/lib/memory-search.js +0 -8
  29. package/dist/lib/memory-search.js.map +1 -1
  30. package/dist/utils/jfl-paths.d.ts +9 -0
  31. package/dist/utils/jfl-paths.d.ts.map +1 -1
  32. package/dist/utils/jfl-paths.js +13 -0
  33. package/dist/utils/jfl-paths.js.map +1 -1
  34. package/package.json +1 -1
  35. package/packages/pi/dist/index.d.ts.map +1 -1
  36. package/packages/pi/dist/index.js +19 -1
  37. package/packages/pi/dist/index.js.map +1 -1
  38. package/packages/pi/dist/session.d.ts +5 -1
  39. package/packages/pi/dist/session.d.ts.map +1 -1
  40. package/packages/pi/dist/session.js +247 -116
  41. package/packages/pi/dist/session.js.map +1 -1
  42. package/packages/pi/extensions/index.ts +24 -1
  43. package/packages/pi/extensions/session.ts +256 -96
  44. package/packages/pi/skills/end/SKILL.md +8 -0
  45. package/scripts/session/session-cleanup.sh +19 -6
  46. package/template/.github/workflows/jfl-eval.yml +8 -1
  47. package/template/scripts/session/session-cleanup.sh +23 -8
@@ -0,0 +1,229 @@
1
+ /**
2
+ * Agent Guards
3
+ *
4
+ * Pre-flight checks that run before an agent session starts.
5
+ * Implements the TOCTOU guard pattern used elsewhere (hub-health.ts, planning-loop.ts)
6
+ * but scoped to the agent runner lifecycle.
7
+ *
8
+ * Guards are composable: each returns a GuardResult, and runGuards() aggregates them.
9
+ * Non-critical guards warn but don't block; critical guards abort the session.
10
+ *
11
+ * @purpose Pre-flight guard checks for scoped agent runner sessions
12
+ * @invariant HubRequiredForScheduling (SystemSpec.tla)
13
+ */
14
+ import { existsSync, readFileSync, readdirSync } from "fs";
15
+ import { join } from "path";
16
+ import { spawnSync } from "child_process";
17
+ // ============================================================================
18
+ // Individual Guards
19
+ // ============================================================================
20
+ /**
21
+ * Guard: Hub must be reachable before starting an agent session.
22
+ * Critical: false — agents can run without hub, but results won't be tracked.
23
+ *
24
+ * Re-implements the TOCTOU pattern from hub-health.ts for the agent runner path.
25
+ */
26
+ export async function guardHub(projectRoot) {
27
+ try {
28
+ const { checkHubHealth } = await import("./hub-health.js");
29
+ const status = await checkHubHealth(projectRoot);
30
+ if (status.available) {
31
+ return {
32
+ name: "hub",
33
+ passed: true,
34
+ critical: false,
35
+ };
36
+ }
37
+ return {
38
+ name: "hub",
39
+ passed: false,
40
+ critical: false,
41
+ reason: `Hub unavailable (${status.error}). Agent will run but results won't be tracked.`,
42
+ };
43
+ }
44
+ catch {
45
+ return {
46
+ name: "hub",
47
+ passed: false,
48
+ critical: false,
49
+ reason: "Hub health check unavailable. Agent will run but results won't be tracked.",
50
+ };
51
+ }
52
+ }
53
+ /**
54
+ * Guard: No concurrent session for the same agent.
55
+ * Critical: true — running two sessions on the same agent causes git conflicts.
56
+ */
57
+ export function guardNoConcurrentSession(projectRoot, agentName) {
58
+ const sessionsDir = join(projectRoot, ".jfl", "sessions");
59
+ if (!existsSync(sessionsDir)) {
60
+ return { name: "concurrent_session", passed: true, critical: true };
61
+ }
62
+ let entries;
63
+ try {
64
+ entries = readdirSync(sessionsDir);
65
+ }
66
+ catch {
67
+ return { name: "concurrent_session", passed: true, critical: true };
68
+ }
69
+ for (const entry of entries) {
70
+ // Only check sessions for this specific agent
71
+ if (!entry.startsWith(`${agentName}-`))
72
+ continue;
73
+ const statePath = join(sessionsDir, entry, "state.json");
74
+ if (!existsSync(statePath))
75
+ continue;
76
+ try {
77
+ const state = JSON.parse(readFileSync(statePath, "utf-8"));
78
+ if (state.status === "active") {
79
+ // Check if the worktree still exists (session might be stale)
80
+ if (state.worktreePath && existsSync(state.worktreePath)) {
81
+ return {
82
+ name: "concurrent_session",
83
+ passed: false,
84
+ critical: true,
85
+ reason: `Agent "${agentName}" has an active session: ${entry}. ` +
86
+ `Worktree: ${state.worktreePath}. ` +
87
+ `If this is stale, remove it: rm -rf ${state.worktreePath} && rm -rf ${join(sessionsDir, entry)}`,
88
+ };
89
+ }
90
+ // Worktree doesn't exist — session is stale, mark it as completed
91
+ // (don't block — just note it)
92
+ }
93
+ }
94
+ catch {
95
+ // Corrupted state file — skip
96
+ }
97
+ }
98
+ return { name: "concurrent_session", passed: true, critical: true };
99
+ }
100
+ /**
101
+ * Guard: Eval script and data files must exist.
102
+ * Critical: true — agent can't measure improvement without eval.
103
+ */
104
+ export function guardEvalReady(projectRoot, config) {
105
+ const errors = [];
106
+ const evalScriptPath = join(projectRoot, config.eval.script);
107
+ if (!existsSync(evalScriptPath)) {
108
+ errors.push(`Eval script not found: ${config.eval.script}`);
109
+ }
110
+ const evalDataPath = join(projectRoot, config.eval.data);
111
+ if (!existsSync(evalDataPath)) {
112
+ errors.push(`Eval data not found: ${config.eval.data}`);
113
+ }
114
+ if (errors.length > 0) {
115
+ return {
116
+ name: "eval_ready",
117
+ passed: false,
118
+ critical: true,
119
+ reason: errors.join("; "),
120
+ };
121
+ }
122
+ return { name: "eval_ready", passed: true, critical: true };
123
+ }
124
+ /**
125
+ * Guard: No stale worktree for this agent.
126
+ * Critical: true — stale worktrees cause git worktree add to fail.
127
+ */
128
+ export function guardWorktreeClean(projectRoot, agentName) {
129
+ // Check for stale /tmp worktrees matching this agent
130
+ const result = spawnSync("git", ["worktree", "list", "--porcelain"], {
131
+ cwd: projectRoot,
132
+ encoding: "utf-8",
133
+ stdio: "pipe",
134
+ });
135
+ if (result.status !== 0) {
136
+ // Can't check worktrees — don't block
137
+ return { name: "worktree_clean", passed: true, critical: true };
138
+ }
139
+ const lines = (result.stdout || "").split("\n");
140
+ const staleWorktrees = [];
141
+ for (const line of lines) {
142
+ if (line.startsWith("worktree ")) {
143
+ const worktreePath = line.replace("worktree ", "").trim();
144
+ // Match agent worktrees in /tmp
145
+ if (worktreePath.includes(`jfl-agent-${agentName}-`)) {
146
+ // Check if the directory actually exists
147
+ if (!existsSync(worktreePath)) {
148
+ staleWorktrees.push(worktreePath);
149
+ }
150
+ }
151
+ }
152
+ }
153
+ if (staleWorktrees.length > 0) {
154
+ // Auto-prune stale worktrees — they're just git bookkeeping for deleted /tmp dirs
155
+ spawnSync("git", ["worktree", "prune"], {
156
+ cwd: projectRoot,
157
+ stdio: "pipe",
158
+ });
159
+ // Re-check after pruning
160
+ const recheck = spawnSync("git", ["worktree", "list", "--porcelain"], {
161
+ cwd: projectRoot,
162
+ encoding: "utf-8",
163
+ stdio: "pipe",
164
+ });
165
+ const recheckLines = (recheck.stdout || "").split("\n");
166
+ const remaining = recheckLines.filter((l) => l.startsWith("worktree ") && l.includes(`jfl-agent-${agentName}-`));
167
+ if (remaining.length > 0) {
168
+ const paths = remaining.map((l) => l.replace("worktree ", "").trim());
169
+ return {
170
+ name: "worktree_clean",
171
+ passed: false,
172
+ critical: true,
173
+ reason: `Stale worktrees found for agent "${agentName}": ${paths.join(", ")}. ` +
174
+ `Clean up with: git worktree remove <path> --force`,
175
+ };
176
+ }
177
+ }
178
+ return { name: "worktree_clean", passed: true, critical: true };
179
+ }
180
+ /**
181
+ * Guard: Git repo must be in a clean state for the base branch.
182
+ * Critical: false — uncommitted changes are stashed by agent-session.
183
+ */
184
+ export function guardGitClean(projectRoot) {
185
+ const result = spawnSync("git", ["status", "--porcelain"], {
186
+ cwd: projectRoot,
187
+ encoding: "utf-8",
188
+ stdio: "pipe",
189
+ });
190
+ const output = (result.stdout || "").trim();
191
+ if (output) {
192
+ return {
193
+ name: "git_clean",
194
+ passed: false,
195
+ critical: false,
196
+ reason: `Uncommitted changes in working directory (${output.split("\n").length} files). ` +
197
+ `These may conflict with agent worktree creation.`,
198
+ };
199
+ }
200
+ return { name: "git_clean", passed: true, critical: false };
201
+ }
202
+ // ============================================================================
203
+ // Guard Runner
204
+ // ============================================================================
205
+ /**
206
+ * Run all guards for an agent session.
207
+ * Returns a summary with proceed/blockers/warnings.
208
+ */
209
+ export async function runGuards(projectRoot, agentName, config) {
210
+ const results = [];
211
+ // Run all guards (async hub guard + sync guards)
212
+ const [hubResult] = await Promise.all([
213
+ guardHub(projectRoot),
214
+ ]);
215
+ results.push(hubResult);
216
+ results.push(guardNoConcurrentSession(projectRoot, agentName));
217
+ results.push(guardEvalReady(projectRoot, config));
218
+ results.push(guardWorktreeClean(projectRoot, agentName));
219
+ results.push(guardGitClean(projectRoot));
220
+ const blockers = results.filter((r) => !r.passed && r.critical);
221
+ const warnings = results.filter((r) => !r.passed && !r.critical);
222
+ return {
223
+ proceed: blockers.length === 0,
224
+ results,
225
+ blockers,
226
+ warnings,
227
+ };
228
+ }
229
+ //# sourceMappingURL=agent-guards.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"agent-guards.js","sourceRoot":"","sources":["../../src/lib/agent-guards.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,IAAI,CAAA;AAC1D,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAA;AAC3B,OAAO,EAAE,SAAS,EAAE,MAAM,eAAe,CAAA;AA6BzC,+EAA+E;AAC/E,oBAAoB;AACpB,+EAA+E;AAE/E;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,WAAmB;IAChD,IAAI,CAAC;QACH,MAAM,EAAE,cAAc,EAAE,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAA;QAC1D,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,WAAW,CAAC,CAAA;QAEhD,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;YACrB,OAAO;gBACL,IAAI,EAAE,KAAK;gBACX,MAAM,EAAE,IAAI;gBACZ,QAAQ,EAAE,KAAK;aAChB,CAAA;QACH,CAAC;QAED,OAAO;YACL,IAAI,EAAE,KAAK;YACX,MAAM,EAAE,KAAK;YACb,QAAQ,EAAE,KAAK;YACf,MAAM,EAAE,oBAAoB,MAAM,CAAC,KAAK,iDAAiD;SAC1F,CAAA;IACH,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;YACL,IAAI,EAAE,KAAK;YACX,MAAM,EAAE,KAAK;YACb,QAAQ,EAAE,KAAK;YACf,MAAM,EAAE,4EAA4E;SACrF,CAAA;IACH,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,wBAAwB,CACtC,WAAmB,EACnB,SAAiB;IAEjB,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,EAAE,MAAM,EAAE,UAAU,CAAC,CAAA;IACzD,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;QAC7B,OAAO,EAAE,IAAI,EAAE,oBAAoB,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAA;IACrE,CAAC;IAED,IAAI,OAAiB,CAAA;IACrB,IAAI,CAAC;QACH,OAAO,GAAG,WAAW,CAAC,WAAW,CAAC,CAAA;IACpC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,IAAI,EAAE,oBAAoB,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAA;IACrE,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,8CAA8C;QAC9C,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,GAAG,SAAS,GAAG,CAAC;YAAE,SAAQ;QAEhD,MAAM,SAAS,GAAG,IAAI,CAAC,WAAW,EAAE,KAAK,EAAE,YAAY,CAAC,CAAA;QACxD,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;YAAE,SAAQ;QAEpC,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,CAAA;YAC1D,IAAI,KAAK,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;gBAC9B,8DAA8D;gBAC9D,IAAI,KAAK,CAAC,YAAY,IAAI,UAAU,CAAC,KAAK,CAAC,YAAY,CAAC,EAAE,CAAC;oBACzD,OAAO;wBACL,IAAI,EAAE,oBAAoB;wBAC1B,MAAM,EAAE,KAAK;wBACb,QAAQ,EAAE,IAAI;wBACd,MAAM,EAAE,UAAU,SAAS,4BAA4B,KAAK,IAAI;4BACxD,aAAa,KAAK,CAAC,YAAY,IAAI;4BACnC,uCAAuC,KAAK,CAAC,YAAY,cAAc,IAAI,CAAC,WAAW,EAAE,KAAK,CAAC,EAAE;qBAC1G,CAAA;gBACH,CAAC;gBACD,kEAAkE;gBAClE,+BAA+B;YACjC,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,8BAA8B;QAChC,CAAC;IACH,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,oBAAoB,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAA;AACrE,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAC5B,WAAmB,EACnB,MAAmB;IAEnB,MAAM,MAAM,GAAa,EAAE,CAAA;IAE3B,MAAM,cAAc,GAAG,IAAI,CAAC,WAAW,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IAC5D,IAAI,CAAC,UAAU,CAAC,cAAc,CAAC,EAAE,CAAC;QAChC,MAAM,CAAC,IAAI,CAAC,0BAA0B,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAA;IAC7D,CAAC;IAED,MAAM,YAAY,GAAG,IAAI,CAAC,WAAW,EAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACxD,IAAI,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE,CAAC;QAC9B,MAAM,CAAC,IAAI,CAAC,wBAAwB,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAA;IACzD,CAAC;IAED,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,OAAO;YACL,IAAI,EAAE,YAAY;YAClB,MAAM,EAAE,KAAK;YACb,QAAQ,EAAE,IAAI;YACd,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC;SAC1B,CAAA;IACH,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAA;AAC7D,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAChC,WAAmB,EACnB,SAAiB;IAEjB,qDAAqD;IACrD,MAAM,MAAM,GAAG,SAAS,CAAC,KAAK,EAAE,CAAC,UAAU,EAAE,MAAM,EAAE,aAAa,CAAC,EAAE;QACnE,GAAG,EAAE,WAAW;QAChB,QAAQ,EAAE,OAAO;QACjB,KAAK,EAAE,MAAM;KACd,CAAC,CAAA;IAEF,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,sCAAsC;QACtC,OAAO,EAAE,IAAI,EAAE,gBAAgB,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAA;IACjE,CAAC;IAED,MAAM,KAAK,GAAG,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;IAC/C,MAAM,cAAc,GAAa,EAAE,CAAA;IAEnC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;YACjC,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;YACzD,gCAAgC;YAChC,IAAI,YAAY,CAAC,QAAQ,CAAC,aAAa,SAAS,GAAG,CAAC,EAAE,CAAC;gBACrD,yCAAyC;gBACzC,IAAI,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE,CAAC;oBAC9B,cAAc,CAAC,IAAI,CAAC,YAAY,CAAC,CAAA;gBACnC,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC9B,kFAAkF;QAClF,SAAS,CAAC,KAAK,EAAE,CAAC,UAAU,EAAE,OAAO,CAAC,EAAE;YACtC,GAAG,EAAE,WAAW;YAChB,KAAK,EAAE,MAAM;SACd,CAAC,CAAA;QAEF,yBAAyB;QACzB,MAAM,OAAO,GAAG,SAAS,CAAC,KAAK,EAAE,CAAC,UAAU,EAAE,MAAM,EAAE,aAAa,CAAC,EAAE;YACpE,GAAG,EAAE,WAAW;YAChB,QAAQ,EAAE,OAAO;YACjB,KAAK,EAAE,MAAM;SACd,CAAC,CAAA;QAEF,MAAM,YAAY,GAAG,CAAC,OAAO,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QACvD,MAAM,SAAS,GAAG,YAAY,CAAC,MAAM,CACnC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,aAAa,SAAS,GAAG,CAAC,CAC1E,CAAA;QAED,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAA;YACrE,OAAO;gBACL,IAAI,EAAE,gBAAgB;gBACtB,MAAM,EAAE,KAAK;gBACb,QAAQ,EAAE,IAAI;gBACd,MAAM,EAAE,oCAAoC,SAAS,MAAM,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI;oBACvE,mDAAmD;aAC5D,CAAA;QACH,CAAC;IACH,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,gBAAgB,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAA;AACjE,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,aAAa,CAAC,WAAmB;IAC/C,MAAM,MAAM,GAAG,SAAS,CAAC,KAAK,EAAE,CAAC,QAAQ,EAAE,aAAa,CAAC,EAAE;QACzD,GAAG,EAAE,WAAW;QAChB,QAAQ,EAAE,OAAO;QACjB,KAAK,EAAE,MAAM;KACd,CAAC,CAAA;IAEF,MAAM,MAAM,GAAG,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;IAC3C,IAAI,MAAM,EAAE,CAAC;QACX,OAAO;YACL,IAAI,EAAE,WAAW;YACjB,MAAM,EAAE,KAAK;YACb,QAAQ,EAAE,KAAK;YACf,MAAM,EAAE,6CAA6C,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,WAAW;gBACjF,kDAAkD;SAC3D,CAAA;IACH,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAA;AAC7D,CAAC;AAED,+EAA+E;AAC/E,eAAe;AACf,+EAA+E;AAE/E;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAC7B,WAAmB,EACnB,SAAiB,EACjB,MAAmB;IAEnB,MAAM,OAAO,GAAkB,EAAE,CAAA;IAEjC,iDAAiD;IACjD,MAAM,CAAC,SAAS,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;QACpC,QAAQ,CAAC,WAAW,CAAC;KACtB,CAAC,CAAA;IACF,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;IAEvB,OAAO,CAAC,IAAI,CAAC,wBAAwB,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC,CAAA;IAC9D,OAAO,CAAC,IAAI,CAAC,cAAc,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC,CAAA;IACjD,OAAO,CAAC,IAAI,CAAC,kBAAkB,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC,CAAA;IACxD,OAAO,CAAC,IAAI,CAAC,aAAa,CAAC,WAAW,CAAC,CAAC,CAAA;IAExC,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAA;IAC/D,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAA;IAEhE,OAAO;QACL,OAAO,EAAE,QAAQ,CAAC,MAAM,KAAK,CAAC;QAC9B,OAAO;QACP,QAAQ;QACR,QAAQ;KACT,CAAA;AACH,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"agent-session.d.ts","sourceRoot":"","sources":["../../src/lib/agent-session.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAMH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAA;AACpD,OAAO,EAA2D,KAAK,YAAY,EAAmB,MAAM,oBAAoB,CAAA;AAChI,OAAO,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAY,MAAM,sBAAsB,CAAA;AAOvE,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,EAAE,MAAM,CAAA;IACZ,YAAY,EAAE,MAAM,CAAA;IACpB,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,EAAE,OAAO,CAAA;IACb,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,SAAS,EAAE,MAAM,CAAA;CAClB;AAED;;;;;;GAMG;AACH,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,YAAY,EACrB,WAAW,EAAE,UAAU,EAAE,EACzB,aAAa,EAAE,MAAM,GACpB,MAAM,CAwIR;AAED;;;GAGG;AACH,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,YAAY,EACrB,WAAW,EAAE,UAAU,EAAE,EACzB,aAAa,EAAE,MAAM,GACpB,IAAI,CASN;AAED;;;GAGG;AACH,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,YAAY,GAAG,MAAM,CAUnE;AAMD,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAA;IACV,SAAS,EAAE,MAAM,CAAA;IACjB,MAAM,EAAE,WAAW,CAAA;IACnB,WAAW,EAAE,MAAM,CAAA;IACnB,QAAQ,EAAE,MAAM,CAAA;IAChB,YAAY,EAAE,MAAM,CAAA;IACpB,MAAM,EAAE,MAAM,CAAA;IACd,UAAU,EAAE,MAAM,CAAA;IAClB,YAAY,EAAE,YAAY,CAAA;IAC1B,cAAc,EAAE,MAAM,CAAA;IACtB,SAAS,EAAE,MAAM,CAAA;IACjB,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,QAAQ,GAAG,WAAW,GAAG,QAAQ,CAAA;CAC1C;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,EAAE,MAAM,CAAA;IACZ,UAAU,EAAE,MAAM,CAAA;IAClB,YAAY,EAAE,MAAM,CAAA;IACpB,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,EAAE,OAAO,CAAA;IACb,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAED,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAA;IACb,UAAU,EAAE,MAAM,CAAA;IAClB,UAAU,EAAE,MAAM,CAAA;IAClB,KAAK,EAAE,OAAO,CAAA;IACd,WAAW,EAAE,MAAM,CAAA;IACnB,MAAM,EAAE,QAAQ,CAAA;IAChB,UAAU,EAAE,MAAM,CAAA;IAClB,MAAM,EAAE,MAAM,CAAA;IACd,SAAS,EAAE,MAAM,CAAA;CAClB;AAED,MAAM,WAAW,cAAc;IAC7B,UAAU,EAAE,MAAM,CAAA;IAClB,SAAS,EAAE,MAAM,CAAA;IACjB,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,MAAM,CAAA;IAChB,WAAW,EAAE,MAAM,CAAA;IACnB,UAAU,EAAE,MAAM,CAAA;IAClB,SAAS,EAAE,MAAM,CAAA;IACjB,cAAc,EAAE,MAAM,CAAA;IACtB,WAAW,EAAE,UAAU,EAAE,CAAA;IACzB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,SAAS,CAAC,EAAE,MAAM,CAAA;CACnB;AAmGD,wBAAgB,YAAY,CAC1B,MAAM,EAAE,WAAW,EACnB,WAAW,EAAE,MAAM,EACnB,UAAU,GAAE,MAAe,GAC1B,YAAY,CAwEd;AAED,wBAAsB,WAAW,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAkBxE;AAED,wBAAsB,QAAQ,CAC5B,OAAO,EAAE,YAAY,EACrB,KAAK,EAAE,MAAM,EACb,IAAI,EAAE,MAAM,EACZ,UAAU,EAAE,MAAM,EAClB,mBAAmB,GAAE,UAAU,EAAO,GACrC,OAAO,CAAC;IAAE,MAAM,EAAE,WAAW,CAAC;IAAC,UAAU,EAAE,UAAU,CAAA;CAAE,CAAC,CAqJ1D;AA6FD,wBAAsB,UAAU,CAC9B,OAAO,EAAE,YAAY,EACrB,WAAW,EAAE,UAAU,EAAE,GACxB,OAAO,CAAC,cAAc,CAAC,CA+EzB;AAkED,wBAAgB,gBAAgB,CAAC,WAAW,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,YAAY,GAAG,IAAI,CAY5F;AAED,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,YAAY,GAAG,IAAI,CAQ5D;AAED,wBAAgB,kBAAkB,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,EAAE,CAoBhE"}
1
+ {"version":3,"file":"agent-session.d.ts","sourceRoot":"","sources":["../../src/lib/agent-session.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAMH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAA;AACpD,OAAO,EAA2D,KAAK,YAAY,EAAmB,MAAM,oBAAoB,CAAA;AAChI,OAAO,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAY,MAAM,sBAAsB,CAAA;AAOvE,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,EAAE,MAAM,CAAA;IACZ,YAAY,EAAE,MAAM,CAAA;IACpB,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,EAAE,OAAO,CAAA;IACb,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,SAAS,EAAE,MAAM,CAAA;CAClB;AAED;;;;;;GAMG;AACH,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,YAAY,EACrB,WAAW,EAAE,UAAU,EAAE,EACzB,aAAa,EAAE,MAAM,GACpB,MAAM,CAwIR;AAED;;;GAGG;AACH,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,YAAY,EACrB,WAAW,EAAE,UAAU,EAAE,EACzB,aAAa,EAAE,MAAM,GACpB,IAAI,CASN;AAED;;;GAGG;AACH,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,YAAY,GAAG,MAAM,CAUnE;AAMD,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAA;IACV,SAAS,EAAE,MAAM,CAAA;IACjB,MAAM,EAAE,WAAW,CAAA;IACnB,WAAW,EAAE,MAAM,CAAA;IACnB,QAAQ,EAAE,MAAM,CAAA;IAChB,YAAY,EAAE,MAAM,CAAA;IACpB,MAAM,EAAE,MAAM,CAAA;IACd,UAAU,EAAE,MAAM,CAAA;IAClB,YAAY,EAAE,YAAY,CAAA;IAC1B,cAAc,EAAE,MAAM,CAAA;IACtB,SAAS,EAAE,MAAM,CAAA;IACjB,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,QAAQ,GAAG,WAAW,GAAG,QAAQ,CAAA;CAC1C;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,EAAE,MAAM,CAAA;IACZ,UAAU,EAAE,MAAM,CAAA;IAClB,YAAY,EAAE,MAAM,CAAA;IACpB,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,EAAE,OAAO,CAAA;IACb,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAED,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAA;IACb,UAAU,EAAE,MAAM,CAAA;IAClB,UAAU,EAAE,MAAM,CAAA;IAClB,KAAK,EAAE,OAAO,CAAA;IACd,WAAW,EAAE,MAAM,CAAA;IACnB,MAAM,EAAE,QAAQ,CAAA;IAChB,UAAU,EAAE,MAAM,CAAA;IAClB,MAAM,EAAE,MAAM,CAAA;IACd,SAAS,EAAE,MAAM,CAAA;CAClB;AAED,MAAM,WAAW,cAAc;IAC7B,UAAU,EAAE,MAAM,CAAA;IAClB,SAAS,EAAE,MAAM,CAAA;IACjB,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,MAAM,CAAA;IAChB,WAAW,EAAE,MAAM,CAAA;IACnB,UAAU,EAAE,MAAM,CAAA;IAClB,SAAS,EAAE,MAAM,CAAA;IACjB,cAAc,EAAE,MAAM,CAAA;IACtB,WAAW,EAAE,UAAU,EAAE,CAAA;IACzB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,SAAS,CAAC,EAAE,MAAM,CAAA;CACnB;AAmGD,wBAAgB,YAAY,CAC1B,MAAM,EAAE,WAAW,EACnB,WAAW,EAAE,MAAM,EACnB,UAAU,GAAE,MAAe,GAC1B,YAAY,CAuFd;AAED,wBAAsB,WAAW,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAkBxE;AAED,wBAAsB,QAAQ,CAC5B,OAAO,EAAE,YAAY,EACrB,KAAK,EAAE,MAAM,EACb,IAAI,EAAE,MAAM,EACZ,UAAU,EAAE,MAAM,EAClB,mBAAmB,GAAE,UAAU,EAAO,GACrC,OAAO,CAAC;IAAE,MAAM,EAAE,WAAW,CAAC;IAAC,UAAU,EAAE,UAAU,CAAA;CAAE,CAAC,CA+M1D;AAmND,wBAAsB,UAAU,CAC9B,OAAO,EAAE,YAAY,EACrB,WAAW,EAAE,UAAU,EAAE,GACxB,OAAO,CAAC,cAAc,CAAC,CAkIzB;AAkED,wBAAgB,gBAAgB,CAAC,WAAW,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,YAAY,GAAG,IAAI,CAY5F;AAED,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,YAAY,GAAG,IAAI,CAQ5D;AAED,wBAAgB,kBAAkB,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,EAAE,CAoBhE"}
@@ -285,6 +285,19 @@ export function startSession(config, projectRoot, baseBranch = "main") {
285
285
  const repoRoot = config.target_repo
286
286
  ? join(projectRoot, config.target_repo)
287
287
  : projectRoot;
288
+ // Clean up stale worktree at this path (from crashed/killed sessions)
289
+ // Without this, git worktree add fails and falls back to main — breaking auto-PR
290
+ if (existsSync(worktreePath)) {
291
+ console.log(` Cleaning stale worktree: ${worktreePath}`);
292
+ gitExec(["worktree", "remove", worktreePath, "--force"], repoRoot);
293
+ }
294
+ gitExec(["worktree", "prune"], repoRoot);
295
+ // Delete stale session branch if it exists (from a previous crashed session with same hash)
296
+ const branchCheck = gitExec(["rev-parse", "--verify", branch], repoRoot);
297
+ if (branchCheck.ok) {
298
+ console.log(` Deleting stale branch: ${branch}`);
299
+ gitExec(["branch", "-D", branch], repoRoot);
300
+ }
288
301
  // Fetch latest base branch
289
302
  gitExec(["fetch", "origin", baseBranch], repoRoot);
290
303
  // Create worktree with new branch from origin/baseBranch (in the target repo, not GTM)
@@ -403,6 +416,59 @@ export async function runRound(session, round, task, hypothesis, previousTransit
403
416
  .trim()
404
417
  .split("\n")
405
418
  .filter(Boolean);
419
+ // ── Guard check: must pass before eval (T1.2) ──
420
+ // e.g. "cd ${AGENT_WORKTREE} && npx tsc --noEmit" catches type errors
421
+ const guardCmd = session.config.eval.guard;
422
+ if (guardCmd) {
423
+ const expandedGuard = guardCmd.replace(/\$\{AGENT_WORKTREE\}/g, session.worktreePath);
424
+ console.log(` Running guard: ${expandedGuard.slice(0, 80)}...`);
425
+ const guardResult = spawnSync("bash", ["-c", expandedGuard], {
426
+ cwd: session.worktreePath,
427
+ encoding: "utf-8",
428
+ stdio: "pipe",
429
+ timeout: 60000, // 60s max for guard
430
+ });
431
+ if (guardResult.status !== 0) {
432
+ console.log(` ✗ Guard FAILED — reverting changes`);
433
+ const guardError = (guardResult.stderr || guardResult.stdout || "").trim().split("\n").slice(-5).join("\n");
434
+ if (guardError)
435
+ console.log(` ${guardError.split("\n").join("\n ")}`);
436
+ // Revert: discard all changes in worktree
437
+ gitExec(["checkout", "--", "."], session.worktreePath);
438
+ gitExec(["clean", "-fd"], session.worktreePath);
439
+ const guardFailResult = {
440
+ round,
441
+ task,
442
+ hypothesis,
443
+ metricBefore: session.baselineMetric,
444
+ metricAfter: session.baselineMetric,
445
+ delta: 0,
446
+ kept: false,
447
+ duration_ms: Date.now() - startTime,
448
+ error: `Guard failed: ${guardError.slice(0, 200)}`,
449
+ };
450
+ const guardFailTransition = {
451
+ agent: session.agentName,
452
+ session_id: session.id,
453
+ state_hash: stateHash,
454
+ state: stateBefore,
455
+ action_diff: diff,
456
+ action: {
457
+ type: "experiment",
458
+ description: task,
459
+ files_affected: changedFiles,
460
+ scope: changedFiles.length <= 1 ? "small" : changedFiles.length <= 3 ? "medium" : "large",
461
+ branch: session.branch,
462
+ },
463
+ hypothesis,
464
+ reward: -0.001, // Small negative signal — guard failures are worse than no-ops
465
+ timestamp: new Date().toISOString(),
466
+ };
467
+ logResult(session, guardFailResult);
468
+ return { result: guardFailResult, transition: guardFailTransition };
469
+ }
470
+ console.log(` ✓ Guard passed`);
471
+ }
406
472
  // Commit changes in worktree with provenance signing
407
473
  signedAgentCommit(session.worktreePath, `agent(${session.agentName}): round ${round} - ${task.slice(0, 50)}`, {
408
474
  agentId: session.agentName,
@@ -462,34 +528,144 @@ export async function runRound(session, round, task, hypothesis, previousTransit
462
528
  }
463
529
  async function runClaudeCode(session, task) {
464
530
  const timeout = session.config.time_budget_seconds * 1000;
465
- // Build enhanced task with experiment history reference
466
- // This is the Karpathy pattern: agent sees program.md (EXPERIMENTS.md) with history
531
+ // Write all context to files Karpathy pattern
532
+ // EXPERIMENTS.md has: eval diagnostics, code context, experiment history
533
+ // AGENT.md has: task, constraints, scope files
534
+ // CLAUDE.md has: minimal "read files, edit code, stop" instructions
535
+ writeAgentMd(session, task);
536
+ writeAgentClaudeMd(session);
537
+ // Task prompt is deliberately tiny — all context is in files.
538
+ // Long prompts waste tokens and slow down time-to-first-edit.
539
+ const tinyTask = `Read EXPERIMENTS.md then AGENT.md. Make ONE edit to improve ${session.config.metric}. Stop after editing.`;
540
+ return new Promise((resolve) => {
541
+ spawnAgentRuntime(tinyTask, session.worktreePath, timeout, resolve);
542
+ });
543
+ }
544
+ /**
545
+ * Write AGENT.md to the worktree — focused instructions for RL agents.
546
+ * Contains: task, scope, eval diagnostics, code context, constraints.
547
+ * This is what the agent reads instead of CLAUDE.md.
548
+ */
549
+ function writeAgentMd(session, task) {
467
550
  const scopeFiles = (() => { const raw = session.config.constraints?.scope_files || session.config.constraints.files_in_scope || []; return Array.isArray(raw) ? raw : [String(raw)]; })();
468
- const scopeFilesStr = scopeFiles.slice(0, 5).join(", ");
469
- const enhancedTask = `You are an autonomous code improvement agent. Your job is to EDIT SOURCE CODE FILES to improve a metric.
551
+ const agentMd = `# Agent: ${session.agentName}
470
552
 
471
- TASK: ${task}
553
+ ## Task
554
+ ${task}
472
555
 
473
- INSTRUCTIONS:
474
- 1. Read EXPERIMENTS.md it has eval diagnostics showing WHAT IS FAILING and the actual source code
475
- 2. Based on the failing queries/tests, identify ONE targeted fix in the source code
476
- 3. EDIT the source file directly (e.g. ${scopeFiles[0] || "the scope files"}) — use the read and edit tools
477
- 4. Do NOT just write documentation or update EXPERIMENTS.md — you must modify actual .ts source files
478
- 5. Make ONE small, focused change to improve the metric
479
- 6. Do NOT add new files. Do NOT modify files outside the scope: ${scopeFilesStr}
480
- 7. After making the change, stop. The eval will run automatically.
556
+ ## Constraints
557
+ - **Modify ONLY these files:** ${scopeFiles.join(", ")}
558
+ - **Do NOT modify:** AGENT.md, EXPERIMENTS.md, eval scripts, node_modules, dist
559
+ - **Max file changes:** ${session.config.constraints.max_file_changes || 2}
560
+ - **Time budget:** ${session.config.time_budget_seconds}s
481
561
 
482
- CRITICAL: Your change must be to SOURCE CODE (.ts files), not markdown files. If you only modify EXPERIMENTS.md, the eval score will not change and your work will be reverted.`;
483
- // RL agents use Claude CLI directly not Pi
484
- // Pi loads CLAUDE.md + full session init which overrides the agent's focused task
485
- // Claude CLI with --dangerously-skip-permissions just executes the task
486
- // Pi is for Layer 2 (interactive sessions). RL agents are Layer 3 (autonomous).
487
- return new Promise((resolve) => {
488
- spawnClaudeCli(enhancedTask, session.worktreePath, timeout, resolve);
489
- });
562
+ ## How to Succeed
563
+ 1. Read EXPERIMENTS.md it shows what failed and the actual source code
564
+ 2. Make ONE small, targeted change to a source file
565
+ 3. Stop after making the change the eval runs automatically
566
+
567
+ ## What NOT to Do
568
+ - Do not add new files
569
+ - Do not modify test files or eval scripts
570
+ - Do not just add comments or documentation
571
+ - Do not repeat experiments listed as REJECTED in EXPERIMENTS.md
572
+ `;
573
+ writeFileSync(join(session.worktreePath, "AGENT.md"), agentMd);
490
574
  }
575
+ /**
576
+ * Write a minimal CLAUDE.md to the worktree for RL agents.
577
+ * Karpathy pattern: agent gets focused instructions, no ceremony.
578
+ * Claude CLI reads CLAUDE.md automatically — this replaces the full project CLAUDE.md.
579
+ */
580
+ function writeAgentClaudeMd(session) {
581
+ const scopeFiles = (() => { const raw = session.config.constraints?.scope_files || session.config.constraints.files_in_scope || []; return Array.isArray(raw) ? raw : [String(raw)]; })();
582
+ const claudeMd = `# RL Agent — ${session.agentName}
583
+
584
+ You are a lightweight code improvement agent. You have ONE job: improve a metric by editing source code.
585
+
586
+ ## Instructions
587
+ 1. Read EXPERIMENTS.md — it has eval diagnostics, code context, and experiment history
588
+ 2. Read AGENT.md — it has your specific task and constraints
589
+ 3. Make ONE small, targeted edit to a source file
590
+ 4. Stop immediately after making the change
591
+
592
+ ## Rules
593
+ - Only modify: ${scopeFiles.join(", ")}
594
+ - Do NOT modify: AGENT.md, EXPERIMENTS.md, CLAUDE.md, eval scripts, node_modules, dist
595
+ - Do NOT add new files
596
+ - Do NOT write documentation or comments as your primary change
597
+ - Max file changes: ${session.config.constraints.max_file_changes || 2}
598
+ - The eval runs automatically after you exit — do not run it yourself
599
+
600
+ ## Available Tools
601
+ You have: read, edit, write, bash. That's all you need.
602
+ Do NOT try to use any other tools (no memory search, no hub, no subway, no CRM).
603
+ `;
604
+ writeFileSync(join(session.worktreePath, "CLAUDE.md"), claudeMd);
605
+ }
606
+ /**
607
+ * Spawn agent runtime — uses claude CLI for lightweight execution.
608
+ * Karpathy pattern: agent is just an LLM with file tools, all context in files.
609
+ * ~50MB per process vs ~500MB for full Pi with JFL extension.
610
+ * Use JFL_AGENT_USE_PI=1 to force Pi runtime (for debugging).
611
+ */
612
+ function spawnAgentRuntime(task, cwd, timeout, resolve) {
613
+ const usePi = !!process.env.JFL_AGENT_USE_PI; // Opt-in to Pi (heavy), default is claude CLI (light)
614
+ if (usePi) {
615
+ console.log(" Spawning Pi agent (heavy — use for debugging only)...");
616
+ const child = spawn("pi", [
617
+ "--print", task,
618
+ "--no-session",
619
+ "--no-skills",
620
+ "--no-prompt-templates",
621
+ "--no-themes",
622
+ ], {
623
+ cwd,
624
+ stdio: "inherit",
625
+ env: {
626
+ ...process.env,
627
+ JFL_AGENT_MODE: "1",
628
+ JFL_PP_SPAWNED: "1",
629
+ CLAUDECODE: undefined,
630
+ CLAUDE_CODE: undefined,
631
+ },
632
+ });
633
+ const timeoutId = setTimeout(() => {
634
+ child.kill("SIGTERM");
635
+ setTimeout(() => {
636
+ if (!child.killed)
637
+ child.kill("SIGKILL");
638
+ }, 5000);
639
+ }, timeout);
640
+ child.on("error", (err) => {
641
+ console.error(` Pi agent error: ${err.message}`);
642
+ clearTimeout(timeoutId);
643
+ console.log(" Falling back to claude CLI...");
644
+ spawnClaudeCli(task, cwd, timeout, resolve);
645
+ });
646
+ child.on("exit", (code, signal) => {
647
+ if (code !== null) {
648
+ console.log(` Pi agent exit: code=${code}`);
649
+ }
650
+ else if (signal) {
651
+ console.log(` Pi agent killed: signal=${signal}`);
652
+ }
653
+ clearTimeout(timeoutId);
654
+ resolve();
655
+ });
656
+ }
657
+ else {
658
+ spawnClaudeCli(task, cwd, timeout, resolve);
659
+ }
660
+ }
661
+ /**
662
+ * Spawn claude CLI — lightweight agent runtime (~50MB).
663
+ * Karpathy pattern: LLM reads files, edits code, exits.
664
+ * CLAUDE.md in worktree provides focused instructions.
665
+ * EXPERIMENTS.md has all context (eval diagnostics, code, history).
666
+ */
491
667
  function spawnClaudeCli(task, cwd, timeout, resolve) {
492
- console.log(" Spawning claude CLI...");
668
+ console.log(" Spawning claude CLI (lightweight, Karpathy-style)...");
493
669
  const child = spawn("claude", [
494
670
  "--dangerously-skip-permissions",
495
671
  "-p", task,
@@ -596,14 +772,62 @@ export async function endSession(session, transitions) {
596
772
  }
597
773
  }
598
774
  catch { }
599
- // If we improved, push the branch (but do NOT create a PR or auto-merge)
600
- // Following Karpathy's autoresearch pattern: branches grow overnight,
601
- // humans (or their agents) review and merge in the morning
775
+ // If we improved, push the branch and create a PR for review
776
+ // T1.4: Auto-create PR from kept improvement
602
777
  if (improvedRounds > 0) {
603
778
  // Push the branch so it's available for review
604
779
  const push = gitExec(["push", "-u", "origin", session.branch], session.worktreePath);
605
780
  if (push.ok) {
606
781
  summary.branchUrl = `https://github.com/402goose/${session.worktreePath.split('/').pop()}/tree/${session.branch}`;
782
+ // Auto-create PR with improvement summary
783
+ const prTitle = `agent(${session.agentName}): ${session.config.metric} ${totalDelta > 0 ? "+" : ""}${totalDelta.toFixed(4)} (${improvedRounds}/${transitions.length} rounds kept)`;
784
+ const prBody = [
785
+ `## Agent Improvement: ${session.agentName}`,
786
+ "",
787
+ `**Metric:** ${session.config.metric} (${session.config.direction})`,
788
+ `**Baseline:** ${(session.baselineMetric - totalDelta).toFixed(4)}`,
789
+ `**Final:** ${session.baselineMetric.toFixed(4)}`,
790
+ `**Delta:** ${totalDelta > 0 ? "+" : ""}${totalDelta.toFixed(4)}`,
791
+ `**Rounds:** ${improvedRounds} kept / ${transitions.length} total`,
792
+ "",
793
+ "### Kept Changes",
794
+ ...transitions.filter(t => t.reward > 0).map(t => `- **+${t.reward.toFixed(4)}**: ${t.action.description.slice(0, 100)} (${t.action.files_affected.join(", ")})`),
795
+ "",
796
+ "### Rejected Changes",
797
+ `${transitions.filter(t => t.reward <= 0).length} experiments reverted (no improvement).`,
798
+ "",
799
+ `---`,
800
+ `Session: \`${session.id}\``,
801
+ `Branch: \`${session.branch}\``,
802
+ `Guard: ${session.config.eval.guard ? "✅ " + session.config.eval.guard.slice(0, 60) : "none"}`,
803
+ ].join("\n");
804
+ const prResult = spawnSync("gh", [
805
+ "pr", "create",
806
+ "--title", prTitle,
807
+ "--body", prBody,
808
+ "--base", session.baseBranch,
809
+ "--head", session.branch,
810
+ ], {
811
+ cwd: session.worktreePath,
812
+ encoding: "utf-8",
813
+ stdio: "pipe",
814
+ timeout: 30000,
815
+ });
816
+ if (prResult.status === 0) {
817
+ const prUrl = (prResult.stdout || "").trim();
818
+ summary.prUrl = prUrl;
819
+ console.log(` PR created: ${prUrl}`);
820
+ }
821
+ else {
822
+ // PR creation failed — branch is still pushed for manual review
823
+ const err = (prResult.stderr || "").trim();
824
+ if (err.includes("already exists")) {
825
+ console.log(` PR already exists for ${session.branch}`);
826
+ }
827
+ else {
828
+ console.log(` PR creation failed: ${err.slice(0, 100)}`);
829
+ }
830
+ }
607
831
  }
608
832
  // Emit scope:impact events for each produces pattern
609
833
  // This triggers downstream agents that consume these patterns