@lnilluv/pi-ralph-loop 0.3.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/release.yml +8 -39
- package/README.md +53 -160
- package/package.json +2 -2
- package/scripts/version-helper.ts +210 -0
- package/src/index.ts +1388 -187
- package/src/ralph-draft-context.ts +618 -0
- package/src/ralph-draft-llm.ts +297 -0
- package/src/ralph-draft.ts +33 -0
- package/src/ralph.ts +924 -102
- package/src/runner-rpc.ts +466 -0
- package/src/runner-state.ts +839 -0
- package/src/runner.ts +1042 -0
- package/src/secret-paths.ts +66 -0
- package/src/shims.d.ts +0 -3
- package/tests/fixtures/parity/migrate/OPEN_QUESTIONS.md +3 -0
- package/tests/fixtures/parity/migrate/RALPH.md +27 -0
- package/tests/fixtures/parity/migrate/golden/MIGRATED.md +15 -0
- package/tests/fixtures/parity/migrate/legacy/source.md +6 -0
- package/tests/fixtures/parity/migrate/legacy/source.yaml +3 -0
- package/tests/fixtures/parity/migrate/scripts/show-legacy.sh +10 -0
- package/tests/fixtures/parity/migrate/scripts/verify.sh +15 -0
- package/tests/fixtures/parity/research/OPEN_QUESTIONS.md +3 -0
- package/tests/fixtures/parity/research/RALPH.md +45 -0
- package/tests/fixtures/parity/research/claim-evidence-checklist.md +15 -0
- package/tests/fixtures/parity/research/expected-outputs.md +22 -0
- package/tests/fixtures/parity/research/scripts/show-snapshots.sh +13 -0
- package/tests/fixtures/parity/research/scripts/verify.sh +55 -0
- package/tests/fixtures/parity/research/snapshots/app-factory-ai-cli.md +11 -0
- package/tests/fixtures/parity/research/snapshots/docs-factory-ai-cli-features-missions.md +11 -0
- package/tests/fixtures/parity/research/snapshots/factory-ai-news-missions.md +11 -0
- package/tests/fixtures/parity/research/source-manifest.md +20 -0
- package/tests/index.test.ts +3801 -0
- package/tests/parity/README.md +9 -0
- package/tests/parity/harness.py +526 -0
- package/tests/parity-harness.test.ts +42 -0
- package/tests/parity-research-fixture.test.ts +34 -0
- package/tests/ralph-draft-context.test.ts +672 -0
- package/tests/ralph-draft-llm.test.ts +434 -0
- package/tests/ralph-draft.test.ts +168 -0
- package/tests/ralph.test.ts +1413 -19
- package/tests/runner-event-contract.test.ts +235 -0
- package/tests/runner-rpc.test.ts +446 -0
- package/tests/runner-state.test.ts +581 -0
- package/tests/runner.test.ts +1552 -0
- package/tests/secret-paths.test.ts +55 -0
- package/tests/version-helper.test.ts +75 -0
|
@@ -0,0 +1,434 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import test from "node:test";
|
|
6
|
+
import type { AssistantMessage } from "@mariozechner/pi-ai";
|
|
7
|
+
import {
|
|
8
|
+
buildDraftRequest,
|
|
9
|
+
buildRepoContext,
|
|
10
|
+
extractDraftMetadata,
|
|
11
|
+
inspectRepo,
|
|
12
|
+
parseRalphMarkdown,
|
|
13
|
+
type DraftRequest,
|
|
14
|
+
} from "../src/ralph.ts";
|
|
15
|
+
import { SECRET_PATH_POLICY_TOKEN } from "../src/secret-paths.ts";
|
|
16
|
+
import {
|
|
17
|
+
buildStrengtheningPrompt,
|
|
18
|
+
strengthenDraftWithLlm,
|
|
19
|
+
type StrengthenDraftRuntime,
|
|
20
|
+
} from "../src/ralph-draft-llm.ts";
|
|
21
|
+
|
|
22
|
+
function createTempRepo(): string {
|
|
23
|
+
return mkdtempSync(join(tmpdir(), "pi-ralph-llm-"));
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function writeTextFile(root: string, relativePath: string, content: string): void {
|
|
27
|
+
const fullPath = join(root, relativePath);
|
|
28
|
+
mkdirSync(join(fullPath, ".."), { recursive: true });
|
|
29
|
+
writeFileSync(fullPath, content, "utf8");
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function makeRequest(): DraftRequest {
|
|
33
|
+
const repoSignals = {
|
|
34
|
+
packageManager: "npm" as const,
|
|
35
|
+
testCommand: "npm test",
|
|
36
|
+
lintCommand: "npm run lint",
|
|
37
|
+
hasGit: true,
|
|
38
|
+
topLevelDirs: ["src", "tests"],
|
|
39
|
+
topLevelFiles: ["package.json", "README.md"],
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
return buildDraftRequest(
|
|
43
|
+
"Fix flaky auth tests",
|
|
44
|
+
{ slug: "fix-flaky-auth-tests", dirPath: "/repo/fix-flaky-auth-tests", ralphPath: "/repo/fix-flaky-auth-tests/RALPH.md" },
|
|
45
|
+
repoSignals,
|
|
46
|
+
{
|
|
47
|
+
...buildRepoContext(repoSignals),
|
|
48
|
+
summaryLines: [
|
|
49
|
+
"package manager: npm",
|
|
50
|
+
"test command: npm test",
|
|
51
|
+
"lint command: npm run lint",
|
|
52
|
+
"git repository: present",
|
|
53
|
+
"top-level dirs: src, tests",
|
|
54
|
+
"top-level files: package.json, README.md",
|
|
55
|
+
],
|
|
56
|
+
selectedFiles: [
|
|
57
|
+
{
|
|
58
|
+
path: "src/auth.ts",
|
|
59
|
+
reason: "auth logic looks relevant",
|
|
60
|
+
content: "export const authEnabled = true;\nexport function login() { return true; }\n",
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
path: "tests/auth.test.ts",
|
|
64
|
+
reason: "captures the flaky auth path",
|
|
65
|
+
content: "import assert from 'node:assert/strict';\nassert.equal(true, true);\n",
|
|
66
|
+
},
|
|
67
|
+
],
|
|
68
|
+
},
|
|
69
|
+
);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function makeRuntime(overrides: Partial<StrengthenDraftRuntime> = {}): StrengthenDraftRuntime {
|
|
73
|
+
const model: NonNullable<StrengthenDraftRuntime["model"]> = {
|
|
74
|
+
provider: "anthropic",
|
|
75
|
+
id: "claude-sonnet",
|
|
76
|
+
name: "Claude Sonnet",
|
|
77
|
+
api: "anthropic-messages",
|
|
78
|
+
baseUrl: "https://example.invalid",
|
|
79
|
+
reasoning: false,
|
|
80
|
+
input: ["text"],
|
|
81
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
82
|
+
contextWindow: 200_000,
|
|
83
|
+
maxTokens: 8_192,
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
const modelRegistry: StrengthenDraftRuntime["modelRegistry"] = {
|
|
87
|
+
async getApiKeyAndHeaders() {
|
|
88
|
+
return { ok: true, apiKey: "test-api-key", headers: { "x-test": "1" } };
|
|
89
|
+
},
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
return {
|
|
93
|
+
model: Object.prototype.hasOwnProperty.call(overrides, "model") ? overrides.model : model,
|
|
94
|
+
modelRegistry: overrides.modelRegistry ?? modelRegistry,
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function makeAssistantMessage(content: AssistantMessage["content"]): AssistantMessage {
|
|
99
|
+
return {
|
|
100
|
+
role: "assistant",
|
|
101
|
+
api: "openai-responses",
|
|
102
|
+
provider: "anthropic",
|
|
103
|
+
model: "claude-sonnet",
|
|
104
|
+
content,
|
|
105
|
+
usage: {
|
|
106
|
+
input: 0,
|
|
107
|
+
output: 0,
|
|
108
|
+
cacheRead: 0,
|
|
109
|
+
cacheWrite: 0,
|
|
110
|
+
totalTokens: 0,
|
|
111
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
112
|
+
},
|
|
113
|
+
stopReason: "stop",
|
|
114
|
+
timestamp: 0,
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function promptText(prompt: ReturnType<typeof buildStrengtheningPrompt>): string {
|
|
119
|
+
return [prompt.systemPrompt ?? "", ...prompt.messages.map((message) => (typeof message.content === "string" ? message.content : ""))].join("\n");
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
test("buildStrengtheningPrompt includes the full prompt contract and repo signals", () => {
|
|
123
|
+
const request = makeRequest();
|
|
124
|
+
const prompt = buildStrengtheningPrompt(request, "body-only");
|
|
125
|
+
const text = promptText(prompt);
|
|
126
|
+
|
|
127
|
+
assert.match(text, /Fix flaky auth tests/);
|
|
128
|
+
assert.match(text, /inferred mode: fix/i);
|
|
129
|
+
assert.match(text, /package manager: npm/);
|
|
130
|
+
assert.match(text, /test command: npm test/);
|
|
131
|
+
assert.match(text, /lint command: npm run lint/);
|
|
132
|
+
assert.match(text, /src\/auth\.ts/);
|
|
133
|
+
assert.match(text, /auth logic looks relevant/);
|
|
134
|
+
assert.match(text, /export const authEnabled = true;/);
|
|
135
|
+
assert.match(text, /tests\/auth\.test\.ts/);
|
|
136
|
+
assert.match(text, /captures the flaky auth path/);
|
|
137
|
+
assert.match(text, /export function login\(\) \{ return true; \}/);
|
|
138
|
+
assert.match(text, /deterministic baseline draft/i);
|
|
139
|
+
assert.match(text, /return only a complete RALPH\.md/i);
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
test("buildStrengtheningPrompt states the body-and-commands compatibility contract", () => {
|
|
143
|
+
const request = makeRequest();
|
|
144
|
+
const prompt = buildStrengtheningPrompt(request, "body-and-commands");
|
|
145
|
+
const text = promptText(prompt);
|
|
146
|
+
|
|
147
|
+
assert.match(text, /strengthening scope: body-and-commands/i);
|
|
148
|
+
assert.match(text, /body-and-commands scope/i);
|
|
149
|
+
assert.match(text, /command names and run strings must match the deterministic baseline exactly/i);
|
|
150
|
+
assert.match(text, /max_iterations may stay the same or decrease from the deterministic baseline, never increase/i);
|
|
151
|
+
assert.match(text, /top-level timeout may stay the same or decrease from the deterministic baseline, never increase/i);
|
|
152
|
+
assert.match(text, /per-command timeout may stay the same or decrease from that command's baseline timeout, and must still be <= timeout/i);
|
|
153
|
+
assert.match(text, /completion_promise must remain unchanged, including remaining absent when absent from the baseline/i);
|
|
154
|
+
assert.match(text, /every \{\{\s*commands\.<name>\s*\}\} must refer to an accepted command/i);
|
|
155
|
+
assert.match(text, /baseline guardrails remain fixed in this phase/i);
|
|
156
|
+
assert.match(text, /unsupported frontmatter changes are rejected and fall back automatically/i);
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
test("buildStrengtheningPrompt omits secret-bearing top-level repo names", (t) => {
|
|
160
|
+
const cwd = createTempRepo();
|
|
161
|
+
t.after(() => rmSync(cwd, { recursive: true, force: true }));
|
|
162
|
+
|
|
163
|
+
writeTextFile(cwd, ".env", "TOKEN=one\n");
|
|
164
|
+
writeTextFile(cwd, ".npmrc", "registry=https://example.invalid\n");
|
|
165
|
+
mkdirSync(join(cwd, ".ssh"), { recursive: true });
|
|
166
|
+
mkdirSync(join(cwd, "secrets"), { recursive: true });
|
|
167
|
+
mkdirSync(join(cwd, "credentials"), { recursive: true });
|
|
168
|
+
mkdirSync(join(cwd, "src"), { recursive: true });
|
|
169
|
+
writeTextFile(cwd, "package.json", JSON.stringify({ name: "demo", scripts: { test: "vitest", lint: "eslint ." } }, null, 2));
|
|
170
|
+
|
|
171
|
+
const repoSignals = inspectRepo(cwd);
|
|
172
|
+
const request = buildDraftRequest(
|
|
173
|
+
"Reverse engineer this app",
|
|
174
|
+
{ slug: "reverse-engineer-this-app", dirPath: "/repo/reverse-engineer-this-app", ralphPath: "/repo/reverse-engineer-this-app/RALPH.md" },
|
|
175
|
+
repoSignals,
|
|
176
|
+
buildRepoContext(repoSignals),
|
|
177
|
+
);
|
|
178
|
+
const prompt = buildStrengtheningPrompt(request, "body-only");
|
|
179
|
+
const text = promptText(prompt);
|
|
180
|
+
|
|
181
|
+
assert.ok(repoSignals.topLevelFiles.includes("package.json"));
|
|
182
|
+
assert.ok(repoSignals.topLevelDirs.includes("src"));
|
|
183
|
+
assert.ok(!repoSignals.topLevelFiles.includes(".env"));
|
|
184
|
+
assert.ok(!repoSignals.topLevelFiles.includes(".npmrc"));
|
|
185
|
+
assert.ok(!repoSignals.topLevelDirs.includes(".ssh"));
|
|
186
|
+
assert.ok(!repoSignals.topLevelDirs.includes("secrets"));
|
|
187
|
+
assert.ok(!repoSignals.topLevelDirs.includes("credentials"));
|
|
188
|
+
|
|
189
|
+
for (const token of [".env", ".npmrc", ".ssh", "secrets", "credentials"]) {
|
|
190
|
+
assert.ok(!text.includes(token), `unexpected leaked token in prompt: ${token}`);
|
|
191
|
+
}
|
|
192
|
+
assert.match(text, /package manager: npm/);
|
|
193
|
+
assert.match(text, /package\.json/);
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
test("buildStrengtheningPrompt omits .env* top-level repo names from repo signals and prompt text", (t) => {
|
|
197
|
+
const cwd = createTempRepo();
|
|
198
|
+
t.after(() => rmSync(cwd, { recursive: true, force: true }));
|
|
199
|
+
|
|
200
|
+
writeTextFile(cwd, ".envrc", "TOKEN=one\n");
|
|
201
|
+
writeTextFile(cwd, ".env.production", "TOKEN=two\n");
|
|
202
|
+
writeTextFile(cwd, ".npmrc", "registry=https://example.invalid\n");
|
|
203
|
+
mkdirSync(join(cwd, ".ssh"), { recursive: true });
|
|
204
|
+
mkdirSync(join(cwd, "src"), { recursive: true });
|
|
205
|
+
writeTextFile(cwd, "package.json", JSON.stringify({ name: "demo", scripts: { test: "vitest", lint: "eslint ." } }, null, 2));
|
|
206
|
+
|
|
207
|
+
const repoSignals = inspectRepo(cwd);
|
|
208
|
+
const request = buildDraftRequest(
|
|
209
|
+
"Reverse engineer this app",
|
|
210
|
+
{ slug: "reverse-engineer-this-app", dirPath: "/repo/reverse-engineer-this-app", ralphPath: "/repo/reverse-engineer-this-app/RALPH.md" },
|
|
211
|
+
repoSignals,
|
|
212
|
+
buildRepoContext(repoSignals),
|
|
213
|
+
);
|
|
214
|
+
const prompt = buildStrengtheningPrompt(request, "body-only");
|
|
215
|
+
const text = promptText(prompt);
|
|
216
|
+
|
|
217
|
+
assert.ok(repoSignals.topLevelFiles.includes("package.json"));
|
|
218
|
+
assert.ok(!repoSignals.topLevelFiles.includes(".envrc"));
|
|
219
|
+
assert.ok(!repoSignals.topLevelFiles.includes(".env.production"));
|
|
220
|
+
assert.ok(!request.repoContext.summaryLines.some((line) => line.includes(".envrc")));
|
|
221
|
+
assert.ok(!request.repoContext.summaryLines.some((line) => line.includes(".env.production")));
|
|
222
|
+
assert.ok(!text.includes(".envrc"));
|
|
223
|
+
assert.ok(!text.includes(".env.production"));
|
|
224
|
+
assert.match(text, /package manager: npm/);
|
|
225
|
+
assert.match(text, /package\.json/);
|
|
226
|
+
});
|
|
227
|
+
|
|
228
|
+
test("buildStrengtheningPrompt redacts secret-bearing target paths", () => {
|
|
229
|
+
const request = makeRequest();
|
|
230
|
+
|
|
231
|
+
for (const [ralphPath, leakedPrefix] of [
|
|
232
|
+
["/repo/config/secrets/task/RALPH.md", "/repo/config/secrets/task"],
|
|
233
|
+
["/repo/credentials/task/RALPH.md", "/repo/credentials/task"],
|
|
234
|
+
] as const) {
|
|
235
|
+
const prompt = buildStrengtheningPrompt(
|
|
236
|
+
{
|
|
237
|
+
...request,
|
|
238
|
+
target: { ...request.target, ralphPath },
|
|
239
|
+
},
|
|
240
|
+
"body-only",
|
|
241
|
+
);
|
|
242
|
+
const text = promptText(prompt);
|
|
243
|
+
|
|
244
|
+
assert.ok(!text.includes(ralphPath), `unexpected leaked target path in prompt: ${ralphPath}`);
|
|
245
|
+
assert.ok(!text.includes(leakedPrefix), `unexpected leaked target path prefix in prompt: ${ralphPath}`);
|
|
246
|
+
assert.match(text, /Target file: RALPH\.md/);
|
|
247
|
+
}
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
test("strengthenDraftWithLlm falls back when the selected model is missing", async () => {
|
|
251
|
+
const request = makeRequest();
|
|
252
|
+
const result = await strengthenDraftWithLlm(request, makeRuntime({ model: undefined }), {
|
|
253
|
+
completeImpl: async () => {
|
|
254
|
+
throw new Error("should not be called");
|
|
255
|
+
},
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
assert.deepEqual(result, { kind: "fallback" });
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
test("strengthenDraftWithLlm falls back when auth lookup fails", async () => {
|
|
262
|
+
const request = makeRequest();
|
|
263
|
+
const runtime = makeRuntime({
|
|
264
|
+
modelRegistry: {
|
|
265
|
+
async getApiKeyAndHeaders() {
|
|
266
|
+
return { ok: false, error: "no auth" };
|
|
267
|
+
},
|
|
268
|
+
},
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
const result = await strengthenDraftWithLlm(request, runtime, {
|
|
272
|
+
completeImpl: async () => {
|
|
273
|
+
throw new Error("should not be called");
|
|
274
|
+
},
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
assert.deepEqual(result, { kind: "fallback" });
|
|
278
|
+
});
|
|
279
|
+
|
|
280
|
+
test("strengthenDraftWithLlm falls back when auth succeeds but apiKey is missing", async () => {
|
|
281
|
+
const request = makeRequest();
|
|
282
|
+
const runtime = makeRuntime({
|
|
283
|
+
modelRegistry: {
|
|
284
|
+
async getApiKeyAndHeaders() {
|
|
285
|
+
return { ok: true, headers: { "x-test": "1" } };
|
|
286
|
+
},
|
|
287
|
+
},
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
const result = await strengthenDraftWithLlm(request, runtime, {
|
|
291
|
+
completeImpl: async () => {
|
|
292
|
+
throw new Error("should not be called");
|
|
293
|
+
},
|
|
294
|
+
});
|
|
295
|
+
|
|
296
|
+
assert.deepEqual(result, { kind: "fallback" });
|
|
297
|
+
});
|
|
298
|
+
|
|
299
|
+
test("strengthenDraftWithLlm falls back on timeout", async () => {
|
|
300
|
+
const request = makeRequest();
|
|
301
|
+
const runtime = makeRuntime();
|
|
302
|
+
const result = await strengthenDraftWithLlm(request, runtime, {
|
|
303
|
+
timeoutMs: 1,
|
|
304
|
+
completeImpl: async () => await new Promise<never>(() => {}),
|
|
305
|
+
});
|
|
306
|
+
|
|
307
|
+
assert.deepEqual(result, { kind: "fallback" });
|
|
308
|
+
});
|
|
309
|
+
|
|
310
|
+
test("strengthenDraftWithLlm normalizes a stronger full draft while preserving deterministic frontmatter", async () => {
|
|
311
|
+
const request = makeRequest();
|
|
312
|
+
const runtime = makeRuntime();
|
|
313
|
+
const rawDraft = `---\ncommands:\n - name: rogue\n run: rm -rf /\n timeout: 1\nmax_iterations: 1\ntimeout: 1\nguardrails:\n block_commands:\n - allow-all\n protected_files:\n - tmp/**\n---\nTask: Fix flaky auth tests\n\nAdd concrete verification steps, summarize the auth regression, and end with a concrete checklist.`;
|
|
314
|
+
|
|
315
|
+
const result = await strengthenDraftWithLlm(request, runtime, {
|
|
316
|
+
scope: "body-only",
|
|
317
|
+
completeImpl: async () =>
|
|
318
|
+
makeAssistantMessage([
|
|
319
|
+
{ type: "thinking", thinking: "drafting" },
|
|
320
|
+
{ type: "text", text: rawDraft },
|
|
321
|
+
]),
|
|
322
|
+
});
|
|
323
|
+
|
|
324
|
+
assert.equal(result.kind, "llm-strengthened");
|
|
325
|
+
const parsed = parseRalphMarkdown(result.draft.content);
|
|
326
|
+
const baseline = parseRalphMarkdown(request.baselineDraft);
|
|
327
|
+
|
|
328
|
+
assert.deepEqual(result.draft.target, request.target);
|
|
329
|
+
assert.deepEqual(parsed.frontmatter, baseline.frontmatter);
|
|
330
|
+
assert.match(parsed.body, /Add concrete verification steps/);
|
|
331
|
+
assert.deepEqual(extractDraftMetadata(result.draft.content), {
|
|
332
|
+
generator: "pi-ralph-loop",
|
|
333
|
+
version: 2,
|
|
334
|
+
source: "llm-strengthened",
|
|
335
|
+
task: "Fix flaky auth tests",
|
|
336
|
+
mode: "fix",
|
|
337
|
+
});
|
|
338
|
+
});
|
|
339
|
+
|
|
340
|
+
test("strengthenDraftWithLlm accepts only compatible body-and-commands changes", async () => {
|
|
341
|
+
const request = makeRequest();
|
|
342
|
+
const runtime = makeRuntime();
|
|
343
|
+
const baseline = parseRalphMarkdown(request.baselineDraft);
|
|
344
|
+
const [testsCommand, , gitLogCommand] = baseline.frontmatter.commands;
|
|
345
|
+
const rawDraft = `---
|
|
346
|
+
commands:
|
|
347
|
+
- name: ${gitLogCommand.name}
|
|
348
|
+
run: ${gitLogCommand.run}
|
|
349
|
+
timeout: ${Math.max(1, gitLogCommand.timeout - 5)}
|
|
350
|
+
- name: ${testsCommand.name}
|
|
351
|
+
run: ${testsCommand.run}
|
|
352
|
+
timeout: ${Math.max(1, testsCommand.timeout - 15)}
|
|
353
|
+
max_iterations: ${Math.max(1, baseline.frontmatter.maxIterations - 5)}
|
|
354
|
+
timeout: ${Math.max(1, Math.min(baseline.frontmatter.timeout, 120))}
|
|
355
|
+
guardrails:
|
|
356
|
+
block_commands:
|
|
357
|
+
- 'git\\s+push'
|
|
358
|
+
protected_files:
|
|
359
|
+
- '${SECRET_PATH_POLICY_TOKEN}'
|
|
360
|
+
---
|
|
361
|
+
${baseline.body.replace(/\{\{ commands\.lint \}\}/g, "")}
|
|
362
|
+
|
|
363
|
+
Use {{ commands.${testsCommand.name} }} and {{ commands.${gitLogCommand.name} }}.
|
|
364
|
+
`;
|
|
365
|
+
|
|
366
|
+
const result = await strengthenDraftWithLlm(request, runtime, {
|
|
367
|
+
scope: "body-and-commands",
|
|
368
|
+
completeImpl: async () => makeAssistantMessage([{ type: "text", text: rawDraft }]),
|
|
369
|
+
});
|
|
370
|
+
|
|
371
|
+
assert.equal(result.kind, "llm-strengthened");
|
|
372
|
+
const parsed = parseRalphMarkdown(result.draft.content);
|
|
373
|
+
|
|
374
|
+
assert.deepEqual(result.draft.target, request.target);
|
|
375
|
+
assert.deepEqual(parsed.frontmatter.commands, [
|
|
376
|
+
{ name: gitLogCommand.name, run: gitLogCommand.run, timeout: Math.max(1, gitLogCommand.timeout - 5) },
|
|
377
|
+
{ name: testsCommand.name, run: testsCommand.run, timeout: Math.max(1, testsCommand.timeout - 15) },
|
|
378
|
+
]);
|
|
379
|
+
assert.equal(parsed.frontmatter.maxIterations, Math.max(1, baseline.frontmatter.maxIterations - 5));
|
|
380
|
+
assert.equal(parsed.frontmatter.timeout, Math.max(1, Math.min(baseline.frontmatter.timeout, 120)));
|
|
381
|
+
assert.deepEqual(parsed.frontmatter.guardrails, baseline.frontmatter.guardrails);
|
|
382
|
+
assert.equal(parsed.body.trim(), `${baseline.body.replace(/\{\{ commands\.lint \}\}/g, "").trim()}\n\nUse {{ commands.${testsCommand.name} }} and {{ commands.${gitLogCommand.name} }}.`.trim());
|
|
383
|
+
assert.deepEqual(extractDraftMetadata(result.draft.content), {
|
|
384
|
+
generator: "pi-ralph-loop",
|
|
385
|
+
version: 2,
|
|
386
|
+
source: "llm-strengthened",
|
|
387
|
+
task: "Fix flaky auth tests",
|
|
388
|
+
mode: "fix",
|
|
389
|
+
});
|
|
390
|
+
});
|
|
391
|
+
|
|
392
|
+
test("strengthenDraftWithLlm falls back when unsupported frontmatter changes are requested in body-and-commands scope", async () => {
|
|
393
|
+
const request = makeRequest();
|
|
394
|
+
const runtime = makeRuntime();
|
|
395
|
+
const baseline = parseRalphMarkdown(request.baselineDraft);
|
|
396
|
+
const [testsCommand, , gitLogCommand] = baseline.frontmatter.commands;
|
|
397
|
+
const rawDraft = `---
|
|
398
|
+
commands:
|
|
399
|
+
- name: ${gitLogCommand.name}
|
|
400
|
+
run: ${gitLogCommand.run}
|
|
401
|
+
timeout: ${gitLogCommand.timeout}
|
|
402
|
+
- name: ${testsCommand.name}
|
|
403
|
+
run: ${testsCommand.run}
|
|
404
|
+
timeout: ${testsCommand.timeout}
|
|
405
|
+
max_iterations: ${baseline.frontmatter.maxIterations}
|
|
406
|
+
timeout: ${baseline.frontmatter.timeout}
|
|
407
|
+
guardrails:
|
|
408
|
+
block_commands:
|
|
409
|
+
- 'git\\s+push'
|
|
410
|
+
- 'rm\\s+-rf'
|
|
411
|
+
protected_files: []
|
|
412
|
+
---
|
|
413
|
+
${baseline.body}
|
|
414
|
+
|
|
415
|
+
Use {{ commands.${testsCommand.name} }} and {{ commands.${gitLogCommand.name} }}.
|
|
416
|
+
`;
|
|
417
|
+
|
|
418
|
+
const result = await strengthenDraftWithLlm(request, runtime, {
|
|
419
|
+
scope: "body-and-commands",
|
|
420
|
+
completeImpl: async () => makeAssistantMessage([{ type: "text", text: rawDraft }]),
|
|
421
|
+
});
|
|
422
|
+
|
|
423
|
+
assert.deepEqual(result, { kind: "fallback" });
|
|
424
|
+
});
|
|
425
|
+
|
|
426
|
+
test("strengthenDraftWithLlm falls back on invalid model output", async () => {
|
|
427
|
+
const request = makeRequest();
|
|
428
|
+
const runtime = makeRuntime();
|
|
429
|
+
const result = await strengthenDraftWithLlm(request, runtime, {
|
|
430
|
+
completeImpl: async () => makeAssistantMessage([{ type: "text", text: "This is not a complete RALPH draft." }]),
|
|
431
|
+
});
|
|
432
|
+
|
|
433
|
+
assert.deepEqual(result, { kind: "fallback" });
|
|
434
|
+
});
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { mkdtempSync, rmSync } from "node:fs";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import test from "node:test";
|
|
6
|
+
import { createDraftPlan } from "../src/ralph-draft.ts";
|
|
7
|
+
import { generateDraft, slugifyTask, type DraftPlan, type DraftTarget } from "../src/ralph.ts";
|
|
8
|
+
import type { StrengthenDraftRuntime } from "../src/ralph-draft-llm.ts";
|
|
9
|
+
|
|
10
|
+
function createTempDir(): string {
|
|
11
|
+
return mkdtempSync(join(tmpdir(), "pi-ralph-loop-draft-"));
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function createTarget(cwd: string, task: string): DraftTarget {
|
|
15
|
+
const slug = slugifyTask(task);
|
|
16
|
+
return {
|
|
17
|
+
slug,
|
|
18
|
+
dirPath: join(cwd, slug),
|
|
19
|
+
ralphPath: join(cwd, slug, "RALPH.md"),
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function makeDraftPlan(task: string, target: DraftTarget, source: DraftPlan["source"], cwd: string): DraftPlan {
|
|
24
|
+
const base = generateDraft(task, target, {
|
|
25
|
+
packageManager: "npm",
|
|
26
|
+
testCommand: "npm test",
|
|
27
|
+
lintCommand: "npm run lint",
|
|
28
|
+
hasGit: true,
|
|
29
|
+
topLevelDirs: ["src", "tests"],
|
|
30
|
+
topLevelFiles: ["package.json"],
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
return {
|
|
34
|
+
...base,
|
|
35
|
+
source,
|
|
36
|
+
target,
|
|
37
|
+
content: base.content,
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function makeRuntime(): StrengthenDraftRuntime {
|
|
42
|
+
return {
|
|
43
|
+
model: {
|
|
44
|
+
provider: "anthropic",
|
|
45
|
+
id: "claude-sonnet-4-5",
|
|
46
|
+
name: "Claude Sonnet 4.5",
|
|
47
|
+
api: "anthropic-messages",
|
|
48
|
+
baseUrl: "https://example.invalid",
|
|
49
|
+
reasoning: false,
|
|
50
|
+
input: ["text"],
|
|
51
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
52
|
+
contextWindow: 200_000,
|
|
53
|
+
maxTokens: 8_192,
|
|
54
|
+
},
|
|
55
|
+
modelRegistry: {
|
|
56
|
+
async getApiKeyAndHeaders(model) {
|
|
57
|
+
assert.equal(model.id, "claude-sonnet-4-5");
|
|
58
|
+
return { ok: true, apiKey: "active-api-key", headers: { "x-runtime": "1" } };
|
|
59
|
+
},
|
|
60
|
+
},
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const CLEAN_ENV_KEYS = [
|
|
65
|
+
"ANTHROPIC_OAUTH_TOKEN",
|
|
66
|
+
"ANTHROPIC_API_KEY",
|
|
67
|
+
"GOOGLE_CLOUD_API_KEY",
|
|
68
|
+
"GOOGLE_APPLICATION_CREDENTIALS",
|
|
69
|
+
"GOOGLE_CLOUD_PROJECT",
|
|
70
|
+
"GCLOUD_PROJECT",
|
|
71
|
+
"GOOGLE_CLOUD_LOCATION",
|
|
72
|
+
"COPILOT_GITHUB_TOKEN",
|
|
73
|
+
"GH_TOKEN",
|
|
74
|
+
"GITHUB_TOKEN",
|
|
75
|
+
"OPENAI_API_KEY",
|
|
76
|
+
"AZURE_OPENAI_API_KEY",
|
|
77
|
+
"GEMINI_API_KEY",
|
|
78
|
+
"GROQ_API_KEY",
|
|
79
|
+
"CEREBRAS_API_KEY",
|
|
80
|
+
"XAI_API_KEY",
|
|
81
|
+
"OPENROUTER_API_KEY",
|
|
82
|
+
"AI_GATEWAY_API_KEY",
|
|
83
|
+
"ZAI_API_KEY",
|
|
84
|
+
"MISTRAL_API_KEY",
|
|
85
|
+
"MINIMAX_API_KEY",
|
|
86
|
+
"MINIMAX_CN_API_KEY",
|
|
87
|
+
"HF_TOKEN",
|
|
88
|
+
"OPENCODE_API_KEY",
|
|
89
|
+
"KIMI_API_KEY",
|
|
90
|
+
"AWS_PROFILE",
|
|
91
|
+
"AWS_ACCESS_KEY_ID",
|
|
92
|
+
"AWS_SECRET_ACCESS_KEY",
|
|
93
|
+
"AWS_BEARER_TOKEN_BEDROCK",
|
|
94
|
+
"AWS_CONTAINER_CREDENTIALS_RELATIVE_URI",
|
|
95
|
+
"AWS_CONTAINER_CREDENTIALS_FULL_URI",
|
|
96
|
+
"AWS_WEB_IDENTITY_TOKEN_FILE",
|
|
97
|
+
] as const;
|
|
98
|
+
|
|
99
|
+
function clearEnv() {
|
|
100
|
+
const snapshot = new Map<string, string | undefined>();
|
|
101
|
+
for (const key of CLEAN_ENV_KEYS) {
|
|
102
|
+
snapshot.set(key, process.env[key]);
|
|
103
|
+
delete process.env[key];
|
|
104
|
+
}
|
|
105
|
+
return snapshot;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function restoreEnv(snapshot: Map<string, string | undefined>) {
|
|
109
|
+
for (const [key, value] of snapshot) {
|
|
110
|
+
if (typeof value === "undefined") {
|
|
111
|
+
delete process.env[key];
|
|
112
|
+
} else {
|
|
113
|
+
process.env[key] = value;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
test("createDraftPlan strengthens with an injected active model runtime", async (t) => {
|
|
119
|
+
const cwd = createTempDir();
|
|
120
|
+
t.after(() => rmSync(cwd, { recursive: true, force: true }));
|
|
121
|
+
|
|
122
|
+
const snapshot = clearEnv();
|
|
123
|
+
t.after(() => restoreEnv(snapshot));
|
|
124
|
+
|
|
125
|
+
const task = "reverse engineer this app";
|
|
126
|
+
const target = createTarget(cwd, task);
|
|
127
|
+
const runtime = makeRuntime();
|
|
128
|
+
const activeModel = runtime.model;
|
|
129
|
+
assert.ok(activeModel);
|
|
130
|
+
let strengthenCalls = 0;
|
|
131
|
+
|
|
132
|
+
const draft = await createDraftPlan(task, target, cwd, runtime, {
|
|
133
|
+
strengthenDraftWithLlmImpl: async (request, runtimeArg, options) => {
|
|
134
|
+
strengthenCalls += 1;
|
|
135
|
+
assert.equal(runtimeArg.model?.id, activeModel.id);
|
|
136
|
+
assert.equal(runtimeArg.modelRegistry, runtime.modelRegistry);
|
|
137
|
+
assert.equal(options?.scope, "body-and-commands");
|
|
138
|
+
assert.match(request.baselineDraft, /reverse engineer this app/);
|
|
139
|
+
return { kind: "llm-strengthened", draft: makeDraftPlan(task, target, "llm-strengthened", cwd) };
|
|
140
|
+
},
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
assert.equal(strengthenCalls, 1);
|
|
144
|
+
assert.equal(draft.source, "llm-strengthened");
|
|
145
|
+
assert.deepEqual(draft.target, target);
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
test("createDraftPlan falls back when no active model is selected", async (t) => {
|
|
149
|
+
const cwd = createTempDir();
|
|
150
|
+
t.after(() => rmSync(cwd, { recursive: true, force: true }));
|
|
151
|
+
|
|
152
|
+
const snapshot = clearEnv();
|
|
153
|
+
t.after(() => restoreEnv(snapshot));
|
|
154
|
+
|
|
155
|
+
const task = "reverse engineer this app";
|
|
156
|
+
const target = createTarget(cwd, task);
|
|
157
|
+
let strengthenCalls = 0;
|
|
158
|
+
|
|
159
|
+
const draft = await createDraftPlan(task, target, cwd, undefined, {
|
|
160
|
+
strengthenDraftWithLlmImpl: async () => {
|
|
161
|
+
strengthenCalls += 1;
|
|
162
|
+
throw new Error("should not be called when no active model is available");
|
|
163
|
+
},
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
assert.equal(strengthenCalls, 0);
|
|
167
|
+
assert.equal(draft.source, "fallback");
|
|
168
|
+
});
|