@lnilluv/pi-ralph-loop 0.1.4-dev.0 → 0.1.4-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +63 -12
  2. package/package.json +1 -1
  3. package/src/index.ts +1034 -168
  4. package/src/ralph-draft-llm.ts +35 -7
  5. package/src/ralph-draft.ts +1 -1
  6. package/src/ralph.ts +708 -51
  7. package/src/runner-rpc.ts +434 -0
  8. package/src/runner-state.ts +822 -0
  9. package/src/runner.ts +957 -0
  10. package/tests/fixtures/parity/migrate/OPEN_QUESTIONS.md +3 -0
  11. package/tests/fixtures/parity/migrate/RALPH.md +27 -0
  12. package/tests/fixtures/parity/migrate/golden/MIGRATED.md +15 -0
  13. package/tests/fixtures/parity/migrate/legacy/source.md +6 -0
  14. package/tests/fixtures/parity/migrate/legacy/source.yaml +3 -0
  15. package/tests/fixtures/parity/migrate/scripts/show-legacy.sh +10 -0
  16. package/tests/fixtures/parity/migrate/scripts/verify.sh +15 -0
  17. package/tests/fixtures/parity/research/OPEN_QUESTIONS.md +3 -0
  18. package/tests/fixtures/parity/research/RALPH.md +45 -0
  19. package/tests/fixtures/parity/research/claim-evidence-checklist.md +15 -0
  20. package/tests/fixtures/parity/research/expected-outputs.md +22 -0
  21. package/tests/fixtures/parity/research/scripts/show-snapshots.sh +13 -0
  22. package/tests/fixtures/parity/research/scripts/verify.sh +55 -0
  23. package/tests/fixtures/parity/research/snapshots/app-factory-ai-cli.md +11 -0
  24. package/tests/fixtures/parity/research/snapshots/docs-factory-ai-cli-features-missions.md +11 -0
  25. package/tests/fixtures/parity/research/snapshots/factory-ai-news-missions.md +11 -0
  26. package/tests/fixtures/parity/research/source-manifest.md +20 -0
  27. package/tests/index.test.ts +3169 -104
  28. package/tests/parity/README.md +9 -0
  29. package/tests/parity/harness.py +526 -0
  30. package/tests/parity-harness.test.ts +42 -0
  31. package/tests/parity-research-fixture.test.ts +34 -0
  32. package/tests/ralph-draft-llm.test.ts +82 -9
  33. package/tests/ralph-draft.test.ts +1 -1
  34. package/tests/ralph.test.ts +1265 -36
  35. package/tests/runner-event-contract.test.ts +235 -0
  36. package/tests/runner-rpc.test.ts +358 -0
  37. package/tests/runner-state.test.ts +553 -0
  38. package/tests/runner.test.ts +1347 -0
@@ -0,0 +1,434 @@
1
+ import { spawn } from "node:child_process";
2
+ import { randomUUID } from "node:crypto";
3
+ import { fileURLToPath } from "node:url";
4
+
5
+ // --- Types ---
6
+
7
+ export type RpcEvent = {
8
+ type: string;
9
+ [key: string]: unknown;
10
+ };
11
+
12
+ export type RpcSubprocessConfig = {
13
+ prompt: string;
14
+ cwd: string;
15
+ timeoutMs: number;
16
+ /** Override the spawn command for testing. Defaults to "pi" */
17
+ spawnCommand?: string;
18
+ /** Override spawn args for testing. Defaults to ["--mode", "rpc", "--no-session"] */
19
+ spawnArgs?: string[];
20
+ /** Additional environment variables for the subprocess */
21
+ env?: Record<string, string>;
22
+ /** Model selection for RPC subprocess. Format: "provider/modelId" or "provider/modelId:thinkingLevel"
23
+ * Examples: "anthropic/claude-sonnet-4-20250514" or "openai-codex/gpt-5.4-mini:high"
24
+ * Parsed into set_model + set_thinking_level commands.
25
+ */
26
+ modelPattern?: string;
27
+ /** Explicit provider for set_model (overrides modelPattern provider) */
28
+ provider?: string;
29
+ /** Explicit modelId for set_model (overrides modelPattern modelId) */
30
+ modelId?: string;
31
+ /** Thinking level for set_thinking_level: "off", "minimal", "low", "medium", "high", "xhigh".
32
+ * Also parsed from modelPattern suffix (e.g. ":high").
33
+ */
34
+ thinkingLevel?: string;
35
+ /** Callback for observing events as they stream */
36
+ onEvent?: (event: RpcEvent) => void;
37
+ };
38
+
39
+ export type RpcTelemetry = {
40
+ spawnedAt: string;
41
+ promptSentAt?: string;
42
+ firstStdoutEventAt?: string;
43
+ lastEventAt?: string;
44
+ lastEventType?: string;
45
+ exitedAt?: string;
46
+ timedOutAt?: string;
47
+ exitCode?: number | null;
48
+ exitSignal?: NodeJS.Signals | null;
49
+ stderrText?: string;
50
+ error?: string;
51
+ };
52
+
53
+ export type RpcSubprocessResult = {
54
+ success: boolean;
55
+ lastAssistantText: string;
56
+ agentEndMessages: unknown[];
57
+ timedOut: boolean;
58
+ error?: string;
59
+ telemetry: RpcTelemetry;
60
+ };
61
+
62
+ export type RpcPromptResult = {
63
+ success: boolean;
64
+ error?: string;
65
+ };
66
+
67
+ // --- RPC JSONL Parsing ---
68
+
69
+ export function parseRpcEvent(line: string): RpcEvent {
70
+ const trimmed = line.trim();
71
+ if (!trimmed) return { type: "empty" };
72
+ try {
73
+ const parsed: unknown = JSON.parse(trimmed);
74
+ if (typeof parsed === "object" && parsed !== null && "type" in parsed) {
75
+ return parsed as RpcEvent;
76
+ }
77
+ return { type: "unknown" };
78
+ } catch {
79
+ return { type: "unknown" };
80
+ }
81
+ }
82
+
83
+ function extractAssistantText(messages: unknown[]): string {
84
+ if (!Array.isArray(messages)) return "";
85
+ const texts: string[] = [];
86
+ for (const msg of messages) {
87
+ if (
88
+ typeof msg === "object" &&
89
+ msg !== null &&
90
+ "role" in msg &&
91
+ (msg as Record<string, unknown>).role === "assistant" &&
92
+ "content" in msg
93
+ ) {
94
+ const content = (msg as Record<string, unknown>).content;
95
+ if (Array.isArray(content)) {
96
+ for (const block of content) {
97
+ if (
98
+ typeof block === "object" &&
99
+ block !== null &&
100
+ "type" in block &&
101
+ (block as Record<string, unknown>).type === "text" &&
102
+ "text" in block
103
+ ) {
104
+ texts.push(String((block as Record<string, unknown>).text));
105
+ }
106
+ }
107
+ } else if (typeof content === "string") {
108
+ texts.push(content);
109
+ }
110
+ }
111
+ }
112
+ return texts.join("");
113
+ }
114
+
115
+ // --- RPC Subprocess Execution ---
116
+
117
+ export async function runRpcIteration(config: RpcSubprocessConfig): Promise<RpcSubprocessResult> {
118
+ const {
119
+ prompt,
120
+ cwd,
121
+ timeoutMs,
122
+ spawnCommand = "pi",
123
+ spawnArgs,
124
+ env,
125
+ modelPattern,
126
+ provider: explicitProvider,
127
+ modelId: explicitModelId,
128
+ onEvent,
129
+ } = config;
130
+
131
+ // Parse modelPattern ("provider/modelId" or "provider/modelId:thinking") into provider and modelId
132
+ let modelProvider = explicitProvider;
133
+ let modelModelId = explicitModelId;
134
+ let thinkingLevel = config.thinkingLevel;
135
+ if (modelPattern && !explicitModelId) {
136
+ // Extract thinking level suffix (e.g. ":high")
137
+ const lastColonIdx = modelPattern.lastIndexOf(":");
138
+ const validThinkingLevels = new Set(["off", "minimal", "low", "medium", "high", "xhigh"]);
139
+ let patternWithoutThinking = modelPattern;
140
+ if (lastColonIdx > 0 && validThinkingLevels.has(modelPattern.slice(lastColonIdx + 1))) {
141
+ thinkingLevel = modelPattern.slice(lastColonIdx + 1);
142
+ patternWithoutThinking = modelPattern.slice(0, lastColonIdx);
143
+ }
144
+
145
+ const slashIdx = patternWithoutThinking.indexOf("/");
146
+ if (slashIdx > 0) {
147
+ modelProvider = patternWithoutThinking.slice(0, slashIdx);
148
+ modelModelId = patternWithoutThinking.slice(slashIdx + 1);
149
+ }
150
+ }
151
+
152
+ const extensionPath = fileURLToPath(new URL("./index.ts", import.meta.url));
153
+ const args = spawnArgs ?? ["--mode", "rpc", "--no-session", "-e", extensionPath];
154
+ const subprocessEnv = { ...process.env, ...env };
155
+ const telemetry: RpcTelemetry = {
156
+ spawnedAt: new Date().toISOString(),
157
+ };
158
+
159
+ let childProcess: ReturnType<typeof spawn>;
160
+ let stderrText = "";
161
+ const buildResult = (result: Omit<RpcSubprocessResult, "telemetry">): RpcSubprocessResult => ({
162
+ ...result,
163
+ telemetry: {
164
+ ...telemetry,
165
+ ...(stderrText ? { stderrText } : {}),
166
+ },
167
+ });
168
+
169
+ try {
170
+ childProcess = spawn(spawnCommand, args, {
171
+ cwd,
172
+ env: subprocessEnv,
173
+ stdio: ["pipe", "pipe", "pipe"],
174
+ });
175
+ } catch (err) {
176
+ telemetry.error = err instanceof Error ? err.message : String(err);
177
+ return buildResult({
178
+ success: false,
179
+ lastAssistantText: "",
180
+ agentEndMessages: [],
181
+ timedOut: false,
182
+ error: telemetry.error,
183
+ });
184
+ }
185
+
186
+ return new Promise<RpcSubprocessResult>((resolve) => {
187
+ let settled = false;
188
+ let lastAssistantText = "";
189
+ let agentEndMessages: unknown[] = [];
190
+ let promptSent = false;
191
+ let promptAcknowledged = false;
192
+ let sawAgentEnd = false;
193
+ let modelSetAcknowledged = !(modelProvider && modelModelId); // true if no set_model needed
194
+ let thinkingLevelAcknowledged = !thinkingLevel; // true if no set_thinking_level needed
195
+
196
+ const nowIso = () => new Date().toISOString();
197
+ const markStdoutEvent = (eventType: string) => {
198
+ const observedAt = nowIso();
199
+ if (!telemetry.firstStdoutEventAt) telemetry.firstStdoutEventAt = observedAt;
200
+ telemetry.lastEventAt = observedAt;
201
+ telemetry.lastEventType = eventType;
202
+ };
203
+
204
+ const timeout = setTimeout(() => {
205
+ if (settled) return;
206
+ settled = true;
207
+ telemetry.timedOutAt = nowIso();
208
+ try {
209
+ childProcess.kill("SIGKILL");
210
+ } catch {
211
+ // process may already be dead
212
+ }
213
+ resolve(buildResult({
214
+ success: false,
215
+ lastAssistantText,
216
+ agentEndMessages,
217
+ timedOut: true,
218
+ }));
219
+ }, timeoutMs);
220
+
221
+ const endStdin = () => {
222
+ // Close stdin so the subprocess knows no more commands are coming
223
+ try {
224
+ childProcess.stdin?.end();
225
+ } catch {
226
+ // already closed
227
+ }
228
+ };
229
+
230
+ const cleanup = () => {
231
+ clearTimeout(timeout);
232
+ endStdin();
233
+ };
234
+
235
+ const settle = (result: RpcSubprocessResult) => {
236
+ if (settled) return;
237
+ settled = true;
238
+ cleanup();
239
+ // Kill subprocess if still running
240
+ try {
241
+ childProcess.kill();
242
+ } catch {
243
+ // already dead
244
+ }
245
+ resolve(result);
246
+ };
247
+
248
+ // Set up stderr collection
249
+ childProcess.stderr?.on("data", (data: Buffer) => {
250
+ stderrText += data.toString("utf8");
251
+ });
252
+
253
+ // Set up stdout line reader
254
+ let stdoutBuffer = "";
255
+ childProcess.stdout?.on("data", (data: Buffer) => {
256
+ stdoutBuffer += data.toString("utf8");
257
+
258
+ // Parse complete lines
259
+ let newlineIndex: number;
260
+ while ((newlineIndex = stdoutBuffer.indexOf("\n")) !== -1) {
261
+ const line = stdoutBuffer.slice(0, newlineIndex);
262
+ stdoutBuffer = stdoutBuffer.slice(newlineIndex + 1);
263
+
264
+ // Handle \r\n
265
+ const trimmedLine = line.endsWith("\r") ? line.slice(0, -1) : line;
266
+ if (!trimmedLine) continue;
267
+
268
+ const event = parseRpcEvent(trimmedLine);
269
+ markStdoutEvent(event.type);
270
+ onEvent?.(event);
271
+
272
+ if (event.type === "response") {
273
+ const resp = event as { command?: string; success?: boolean };
274
+ if (resp.command === "set_model" && resp.success === true) {
275
+ modelSetAcknowledged = true;
276
+ }
277
+ if (resp.command === "set_thinking_level" && resp.success === true) {
278
+ thinkingLevelAcknowledged = true;
279
+ }
280
+ if (resp.command === "prompt" && resp.success === true) {
281
+ promptAcknowledged = true;
282
+ }
283
+ continue;
284
+ }
285
+
286
+ if (event.type === "agent_end") {
287
+ const endEvent = event as { messages?: unknown[] };
288
+ sawAgentEnd = true;
289
+ agentEndMessages = Array.isArray(endEvent.messages) ? endEvent.messages : [];
290
+ lastAssistantText = extractAssistantText(agentEndMessages);
291
+ endStdin();
292
+ continue;
293
+ }
294
+ }
295
+ });
296
+
297
+ childProcess.on("error", (err: Error) => {
298
+ telemetry.error = err.message;
299
+ settle(buildResult({
300
+ success: false,
301
+ lastAssistantText,
302
+ agentEndMessages,
303
+ timedOut: false,
304
+ error: err.message,
305
+ }));
306
+ });
307
+ childProcess.stdin?.on("error", (err: Error & { code?: string }) => {
308
+ if (settled) return;
309
+ const error = err.code === "EPIPE" ? "Subprocess closed stdin before prompt could be sent" : err.message;
310
+ telemetry.error = error;
311
+ settle(buildResult({
312
+ success: false,
313
+ lastAssistantText,
314
+ agentEndMessages,
315
+ timedOut: false,
316
+ error,
317
+ }));
318
+ });
319
+
320
+ childProcess.on("close", (code: number | null, signal: NodeJS.Signals | null) => {
321
+ if (settled) return;
322
+ telemetry.exitedAt = nowIso();
323
+ telemetry.exitCode = code;
324
+ telemetry.exitSignal = signal;
325
+
326
+ const closeError =
327
+ code !== 0 && code !== null
328
+ ? `Subprocess exited with code ${code}${stderrText ? `: ${stderrText.slice(0, 200)}` : ""}`
329
+ : signal
330
+ ? `Subprocess exited due to signal ${signal}${stderrText ? `: ${stderrText.slice(0, 200)}` : ""}`
331
+ : sawAgentEnd
332
+ ? undefined
333
+ : "Subprocess exited without sending agent_end";
334
+ if (closeError) telemetry.error = closeError;
335
+
336
+ settle(buildResult({
337
+ success: sawAgentEnd && code === 0 && signal === null,
338
+ lastAssistantText,
339
+ agentEndMessages,
340
+ timedOut: false,
341
+ error: closeError,
342
+ }));
343
+ });
344
+
345
+ // Send set_model command if provider/model are specified
346
+ if (modelProvider && modelModelId) {
347
+ const setModelCommand = JSON.stringify({
348
+ type: "set_model",
349
+ provider: modelProvider,
350
+ modelId: modelModelId,
351
+ });
352
+ try {
353
+ childProcess.stdin?.write(setModelCommand + "\n");
354
+ } catch (err) {
355
+ const error = `Failed to send set_model command: ${err instanceof Error ? err.message : String(err)}`;
356
+ telemetry.error = error;
357
+ settle(buildResult({
358
+ success: false,
359
+ lastAssistantText,
360
+ agentEndMessages,
361
+ timedOut: false,
362
+ error,
363
+ }));
364
+ return;
365
+ }
366
+ }
367
+
368
+ // Send set_thinking_level if specified
369
+ if (thinkingLevel) {
370
+ const setThinkingCommand = JSON.stringify({
371
+ type: "set_thinking_level",
372
+ level: thinkingLevel,
373
+ });
374
+ try {
375
+ childProcess.stdin?.write(setThinkingCommand + "\n");
376
+ } catch (err) {
377
+ const error = `Failed to send set_thinking_level command: ${err instanceof Error ? err.message : String(err)}`;
378
+ telemetry.error = error;
379
+ settle(buildResult({
380
+ success: false,
381
+ lastAssistantText,
382
+ agentEndMessages,
383
+ timedOut: false,
384
+ error,
385
+ }));
386
+ return;
387
+ }
388
+ }
389
+
390
+ // Wait for set_model acknowledgment before sending prompt
391
+ const sendPrompt = () => {
392
+ // Send the prompt command
393
+ const promptCommand = JSON.stringify({
394
+ type: "prompt",
395
+ id: `ralph-${randomUUID()}`,
396
+ message: prompt,
397
+ });
398
+
399
+ try {
400
+ telemetry.promptSentAt = telemetry.promptSentAt ?? nowIso();
401
+ childProcess.stdin?.write(promptCommand + "\n");
402
+ promptSent = true;
403
+ } catch (err) {
404
+ const error = err instanceof Error ? err.message : String(err);
405
+ telemetry.error = error;
406
+ settle(buildResult({
407
+ success: false,
408
+ lastAssistantText,
409
+ agentEndMessages,
410
+ timedOut: false,
411
+ error,
412
+ }));
413
+ }
414
+ };
415
+
416
+ if (modelSetAcknowledged && thinkingLevelAcknowledged) {
417
+ sendPrompt();
418
+ } else {
419
+ const waitForAcknowledgements = async () => {
420
+ const deadline = Date.now() + 5000;
421
+ while (!settled && !promptSent && Date.now() < deadline) {
422
+ if (modelSetAcknowledged && thinkingLevelAcknowledged) break;
423
+ await new Promise<void>((resolveWait) => setTimeout(resolveWait, 50));
424
+ }
425
+ };
426
+
427
+ void waitForAcknowledgements().then(() => {
428
+ if (!settled && !promptSent) {
429
+ sendPrompt();
430
+ }
431
+ });
432
+ }
433
+ });
434
+ }