@entelligentsia/pi-ralph 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,147 @@
1
+ # pi-ralph
2
+
3
+ **A [pi](https://github.com/earendil-works/pi) extension** — iterative goal-achievement loop with Generator → Critique → Judge agents.
4
+
5
+ [![npm](https://img.shields.io/npm/v/@entelligentsia/pi-ralph?style=flat-square)](https://www.npmjs.com/package/@entelligentsia/pi-ralph)
6
+
7
+ ## Install
8
+
9
+ ```
10
+ pi install npm:@entelligentsia/pi-ralph
11
+ ```
12
+
13
+ Or pin a version:
14
+
15
+ ```
16
+ pi install npm:@entelligentsia/pi-ralph@1.0.0
17
+ ```
18
+
19
+ From GitHub:
20
+
21
+ ```
22
+ pi install git:github.com:Entelligentsia/pi-ralph
23
+ ```
24
+
25
+ ## What It Does
26
+
27
+ Give it a goal. It loops — generate, critique, judge — until the goal is met or iterations run out.
28
+
29
+ Each agent's system prompt is dynamically generated from the goal, so they're domain-aligned from the start. Output streams to chat as it happens — like `tail -f`.
30
+
31
+ ## Usage
32
+
33
+ ```
34
+ /ralph-loop-anything --goal "Your goal here" --loop 5
35
+ ```
36
+
37
+ - `--goal` (required): The goal to achieve
38
+ - `--loop` (optional, default 3): Maximum iterations
39
+
40
+ ## How It Works
41
+
42
+ ```
43
+ 1. Feasibility check → can an LLM do this?
44
+ 2. Prompt generation → domain + agent prompts tailored to the goal
45
+ 3. Loop (up to N):
46
+ ├─ Generate → produces a result
47
+ ├─ Critique → identifies problems, suggests improvements
48
+ └─ Judge → { done: true/false, reason }
49
+ └── if not done: feeds result + criticism back to Generator
50
+ 4. Final message with verdict
51
+ ```
52
+
53
+ Every step posts a message to chat as it completes. No hidden progress bars — you see each agent's output the moment it arrives.
54
+
55
+ ### LLM Calls Per Run
56
+
57
+ | Step | Calls |
58
+ |------|-------|
59
+ | Feasibility | 1 |
60
+ | Prompt generation | 1 |
61
+ | Per loop iteration | 3 (generate + critique + judge) |
62
+
63
+ Total for `--loop 3`: 1 + 1 + (3 × 3) = **11** LLM calls
64
+
65
+ ## Examples
66
+
67
+ ### Creative Writing
68
+
69
+ ```
70
+ /ralph-loop-anything --goal "Write a complete short story in one sentence and fewer than 10 words that is better than Hemingway could write"
71
+ ```
72
+
73
+ ```
74
+ /ralph-loop-anything --goal "Write a villanelle poem about debugging at 3am"
75
+ ```
76
+
77
+ ```
78
+ /ralph-loop-anything --goal "Invent a new myth that explains why rivers bend, told as if by a 9th-century monk"
79
+ ```
80
+
81
+ ### Code
82
+
83
+ ```
84
+ /ralph-loop-anything --goal "Write an ergonomic CLI argument parser in Rust that handles flags, options, subcommands, and generates help text" --loop 5
85
+ ```
86
+
87
+ ```
88
+ /ralph-loop-anything --goal "Implement a lock-free concurrent hash map in Zig"
89
+ ```
90
+
91
+ ```
92
+ /ralph-loop-anything --goal "Write a single-file SQLite clone in C that supports CREATE TABLE, INSERT, and SELECT with WHERE clauses"
93
+ ```
94
+
95
+ ### Design & Strategy
96
+
97
+ ```
98
+ /ralph-loop-anything --goal "Design a go-to-market strategy for a developer tools startup that has a free CLI tool but wants to monetize a team tier"
99
+ ```
100
+
101
+ ```
102
+ /ralph-loop-anything --goal "Create a 12-week fitness program for a 40-year-old desk worker who has 30 minutes a day and bad knees"
103
+ ```
104
+
105
+ ```
106
+ /ralph-loop-anything --goal "Write a production-ready incident response playbook for a SaaS company experiencing a data breach"
107
+ ```
108
+
109
+ ### Explaining & Teaching
110
+
111
+ ```
112
+ /ralph-loop-anything --goal "Explain monads to a JavaScript developer who has never used Haskell, using only analogies from web development"
113
+ ```
114
+
115
+ ```
116
+ /ralph-loop-anything --goal "Create a 5-minute presentation script that explains neural networks to a room of skeptical middle managers"
117
+ ```
118
+
119
+ ### Constraints & Style
120
+
121
+ ```
122
+ /ralph-loop-anything --goal "Rewrite the Gettysburg Address as if it were a Slack announcement from a tech CEO" --loop 2
123
+ ```
124
+
125
+ ```
126
+ /ralph-loop-anything --goal "Write a recipe for coq au vin where every step is a haiku"
127
+ ```
128
+
129
+ ```
130
+ /ralph-loop-anything --goal "Explain quantum entanglement using only words with 4 letters or fewer"
131
+ ```
132
+
133
+ ## Architecture
134
+
135
+ | File | Purpose |
136
+ |------|---------|
137
+ | `index.ts` | Thin entry point — parses args, calls orchestrator |
138
+ | `agents/orchestrator.ts` | Coordinates full flow: feasibility, prompts, loop, verdict |
139
+ | `agents/generator.ts` | Generator agent — produces or improves a result |
140
+ | `agents/critique.ts` | Critique agent — evaluates result against the goal |
141
+ | `agents/judge.ts` | Judge agent — decides if goal is achieved |
142
+ | `types.ts` | Shared interfaces (`LoopIteration`, `LoopStep`, etc.) |
143
+ | `prompts.ts` | Static system prompts (feasibility, prompt generator, fallbacks) |
144
+ | `helpers.ts` | Text utilities (`truncate`, `firstNLines`) |
145
+ | `llm.ts` | LLM client (`oneshotLLM`, `parseJsonResponse`) |
146
+ | `argParser.ts` | Command argument parsing |
147
+ | `renderer.ts` | Message renderer — passes markdown content through |
@@ -0,0 +1,77 @@
1
+ // ============================================================================
2
+ // Critique Agent
3
+ //
4
+ // Evaluates a result against the goal.
5
+ // Identifies problems, gaps, and suggests concrete improvements.
6
+ // ============================================================================
7
+
8
+ import type { ExtensionCommandContext } from "@earendil-works/pi-coding-agent";
9
+ import { oneshotLLM } from "../llm";
10
+ import { FALLBACK_CRITIQUE_PROMPT } from "../prompts";
11
+
12
+ // ---------------------------------------------------------------------------
13
+ // Types
14
+ // ---------------------------------------------------------------------------
15
+
16
+ export interface CritiqueInput {
17
+ goal: string;
18
+ systemPrompt: string;
19
+ result: string;
20
+ }
21
+
22
+ export interface CritiqueResult {
23
+ text: string;
24
+ error?: string;
25
+ }
26
+
27
+ // ---------------------------------------------------------------------------
28
+ // Execution
29
+ // ---------------------------------------------------------------------------
30
+
31
+ export async function execute(
32
+ ctx: ExtensionCommandContext,
33
+ input: CritiqueInput,
34
+ signal?: AbortSignal,
35
+ ): Promise<CritiqueResult> {
36
+ const userMsg = buildUserMessage(input);
37
+ const systemPrompt = input.systemPrompt || FALLBACK_CRITIQUE_PROMPT;
38
+
39
+ const result = await oneshotLLM(ctx, systemPrompt, userMsg, signal);
40
+
41
+ if (result.error) {
42
+ return { text: "", error: result.error };
43
+ }
44
+
45
+ return { text: result.text };
46
+ }
47
+
48
+ // ---------------------------------------------------------------------------
49
+ // Message Construction
50
+ // ---------------------------------------------------------------------------
51
+
52
+ function buildUserMessage(input: CritiqueInput): string {
53
+ return [
54
+ `Goal: ${input.goal}`,
55
+ ``,
56
+ `Result to evaluate:`,
57
+ `${input.result}`,
58
+ ``,
59
+ `Critically evaluate this result against the goal. Identify problems and suggest improvements.`,
60
+ ].join("\n");
61
+ }
62
+
63
+ // ---------------------------------------------------------------------------
64
+ // Chat Message Formatting
65
+ // ---------------------------------------------------------------------------
66
+
67
+ export function formatBefore(loop: number, maxLoops: number): string {
68
+ return `## Loop ${loop}/${maxLoops} — Critiquing...`;
69
+ }
70
+
71
+ export function formatAfter(loop: number, maxLoops: number, text: string): string {
72
+ return `## Loop ${loop}/${maxLoops} — Critique\n\n${text}`;
73
+ }
74
+
75
+ export function formatError(loop: number, maxLoops: number, error: string): string {
76
+ return `## Loop ${loop}/${maxLoops} — Critique failed\n\n${error}`;
77
+ }
@@ -0,0 +1,88 @@
1
+ // ============================================================================
2
+ // Generator Agent
3
+ //
4
+ // Produces or improves a result for the goal.
5
+ // On first run: works from the goal alone.
6
+ // On revision: receives previous result + criticism and improves.
7
+ // ============================================================================
8
+
9
+ import type { ExtensionCommandContext } from "@earendil-works/pi-coding-agent";
10
+ import { oneshotLLM } from "../llm";
11
+ import { FALLBACK_GENERATOR_PROMPT } from "../prompts";
12
+
13
+ // ---------------------------------------------------------------------------
14
+ // Types
15
+ // ---------------------------------------------------------------------------
16
+
17
+ export interface GeneratorInput {
18
+ goal: string;
19
+ systemPrompt: string;
20
+ /** Previous result to improve (undefined on first run) */
21
+ previousResult?: string;
22
+ /** Criticism of previous result (undefined on first run) */
23
+ previousCriticism?: string;
24
+ }
25
+
26
+ export interface GeneratorResult {
27
+ text: string;
28
+ error?: string;
29
+ }
30
+
31
+ // ---------------------------------------------------------------------------
32
+ // Execution
33
+ // ---------------------------------------------------------------------------
34
+
35
+ export async function execute(
36
+ ctx: ExtensionCommandContext,
37
+ input: GeneratorInput,
38
+ signal?: AbortSignal,
39
+ ): Promise<GeneratorResult> {
40
+ const userMsg = buildUserMessage(input);
41
+ const systemPrompt = input.systemPrompt || FALLBACK_GENERATOR_PROMPT;
42
+
43
+ const result = await oneshotLLM(ctx, systemPrompt, userMsg, signal);
44
+
45
+ if (result.error) {
46
+ return { text: "", error: result.error };
47
+ }
48
+
49
+ return { text: result.text };
50
+ }
51
+
52
+ // ---------------------------------------------------------------------------
53
+ // Message Construction
54
+ // ---------------------------------------------------------------------------
55
+
56
+ function buildUserMessage(input: GeneratorInput): string {
57
+ if (!input.previousResult) {
58
+ return `Goal: ${input.goal}\n\nProduce your best result to achieve this goal.`;
59
+ }
60
+
61
+ return [
62
+ `Goal: ${input.goal}`,
63
+ ``,
64
+ `Previous result:`,
65
+ `${input.previousResult}`,
66
+ ``,
67
+ `Criticism of the previous result:`,
68
+ `${input.previousCriticism || "(none)"}`,
69
+ ``,
70
+ `Improve upon the previous result, carefully addressing all the criticisms. Produce a better version.`,
71
+ ].join("\n");
72
+ }
73
+
74
+ // ---------------------------------------------------------------------------
75
+ // Chat Message Formatting
76
+ // ---------------------------------------------------------------------------
77
+
78
+ export function formatBefore(loop: number, maxLoops: number): string {
79
+ return `## Loop ${loop}/${maxLoops} — Generating...`;
80
+ }
81
+
82
+ export function formatAfter(loop: number, maxLoops: number, text: string): string {
83
+ return `## Loop ${loop}/${maxLoops} — Generated\n\n${text}`;
84
+ }
85
+
86
+ export function formatError(loop: number, maxLoops: number, error: string): string {
87
+ return `## Loop ${loop}/${maxLoops} — Generator failed\n\n${error}`;
88
+ }
@@ -0,0 +1,95 @@
1
+ // ============================================================================
2
+ // Judge Agent
3
+ //
4
+ // Determines if a result adequately achieves the goal, given the criticism.
5
+ // Returns structured verdict: { done: boolean, reason: string }.
6
+ // ============================================================================
7
+
8
+ import type { ExtensionCommandContext } from "@earendil-works/pi-coding-agent";
9
+ import { oneshotLLM, parseJsonResponse } from "../llm";
10
+ import { FALLBACK_JUDGE_PROMPT } from "../prompts";
11
+
12
+ // ---------------------------------------------------------------------------
13
+ // Types
14
+ // ---------------------------------------------------------------------------
15
+
16
+ export interface JudgeInput {
17
+ goal: string;
18
+ systemPrompt: string;
19
+ result: string;
20
+ criticism: string;
21
+ }
22
+
23
+ export interface JudgeResult {
24
+ done: boolean;
25
+ reason: string;
26
+ raw: string;
27
+ error?: string;
28
+ }
29
+
30
+ // ---------------------------------------------------------------------------
31
+ // Execution
32
+ // ---------------------------------------------------------------------------
33
+
34
+ export async function execute(
35
+ ctx: ExtensionCommandContext,
36
+ input: JudgeInput,
37
+ signal?: AbortSignal,
38
+ ): Promise<JudgeResult> {
39
+ const userMsg = buildUserMessage(input);
40
+ const systemPrompt = input.systemPrompt || FALLBACK_JUDGE_PROMPT;
41
+
42
+ const result = await oneshotLLM(ctx, systemPrompt, userMsg, signal);
43
+
44
+ if (result.error) {
45
+ return { done: false, reason: "", raw: "", error: result.error };
46
+ }
47
+
48
+ const parsed = parseJsonResponse<{ done?: boolean; reason?: string }>(result.text);
49
+ let done = false;
50
+ let reason = "";
51
+
52
+ if (parsed) {
53
+ done = parsed.done === true;
54
+ reason = parsed.reason || "";
55
+ } else {
56
+ reason = `(could not parse judge response) ${result.text.slice(0, 200)}`;
57
+ }
58
+
59
+ return { done, reason, raw: result.text };
60
+ }
61
+
62
+ // ---------------------------------------------------------------------------
63
+ // Message Construction
64
+ // ---------------------------------------------------------------------------
65
+
66
+ function buildUserMessage(input: JudgeInput): string {
67
+ return [
68
+ `Goal: ${input.goal}`,
69
+ ``,
70
+ `Result:`,
71
+ `${input.result}`,
72
+ ``,
73
+ `Criticism:`,
74
+ `${input.criticism}`,
75
+ ``,
76
+ `Determine if the goal has been adequately achieved. Respond with JSON.`,
77
+ ].join("\n");
78
+ }
79
+
80
+ // ---------------------------------------------------------------------------
81
+ // Chat Message Formatting
82
+ // ---------------------------------------------------------------------------
83
+
84
+ export function formatBefore(loop: number, maxLoops: number): string {
85
+ return `## Loop ${loop}/${maxLoops} — Judging...`;
86
+ }
87
+
88
+ export function formatAfter(loop: number, maxLoops: number, done: boolean, reason: string): string {
89
+ const verdict = done ? "DONE ✓" : "CONTINUE ✗";
90
+ return `## Loop ${loop}/${maxLoops} — Judge: ${verdict}\n\n${reason}`;
91
+ }
92
+
93
+ export function formatError(loop: number, maxLoops: number, error: string): string {
94
+ return `## Loop ${loop}/${maxLoops} — Judge failed\n\n${error}`;
95
+ }
@@ -0,0 +1,293 @@
1
+ // ============================================================================
2
+ // Orchestrator Agent
3
+ //
4
+ // Coordinates the full ralph-loop flow:
5
+ // 1. Feasibility check
6
+ // 2. Domain identification + agent prompt generation
7
+ // 3. Loop: Generator → Critique → Judge (repeat until done or max loops)
8
+ // 4. Final verdict
9
+ //
10
+ // Every step posts a message to chat as it completes — tail -f style.
11
+ // ============================================================================
12
+
13
+ import type { ExtensionAPI, ExtensionCommandContext } from "@earendil-works/pi-coding-agent";
14
+ import { oneshotLLM, parseJsonResponse } from "../llm";
15
+ import { FEASIBILITY_PROMPT, PROMPT_GENERATOR_PROMPT, FALLBACK_GENERATOR_PROMPT, FALLBACK_CRITIQUE_PROMPT, FALLBACK_JUDGE_PROMPT } from "../prompts";
16
+ import { truncate } from "../helpers";
17
+ import * as Generator from "./generator";
18
+ import * as Critique from "./critique";
19
+ import * as Judge from "./judge";
20
+ import type { AgentPrompts, RalphLoopResult, LoopIteration } from "../types";
21
+
22
+ // ---------------------------------------------------------------------------
23
+ // Types
24
+ // ---------------------------------------------------------------------------
25
+
26
+ export interface OrchestratorInput {
27
+ goal: string;
28
+ maxLoops: number;
29
+ }
30
+
31
+ export interface OrchestratorResult {
32
+ details: RalphLoopResult;
33
+ achieved: boolean;
34
+ }
35
+
36
+ // ---------------------------------------------------------------------------
37
+ // Execution
38
+ // ---------------------------------------------------------------------------
39
+
40
+ export async function execute(
41
+ pi: ExtensionAPI,
42
+ ctx: ExtensionCommandContext,
43
+ input: OrchestratorInput,
44
+ ): Promise<OrchestratorResult | null> {
45
+ const { goal, maxLoops } = input;
46
+ const signal = ctx.signal;
47
+
48
+ // ── Step 1: Feasibility ──────────────────────────────────────────
49
+ send(pi, `## Checking feasibility\n\n> ${goal}\n\nEvaluating whether this goal is achievable with an LLM...`);
50
+
51
+ const feasibilityPrompt = `Evaluate whether this goal is achievable using an LLM: "${goal}"`;
52
+ const feasibility = await oneshotLLM(ctx, FEASIBILITY_PROMPT, feasibilityPrompt, signal);
53
+
54
+ if (feasibility.error) {
55
+ send(pi, `## Feasibility check failed\n\n${feasibility.error}`);
56
+ return null;
57
+ }
58
+
59
+ const feasibilityJson = parseJsonResponse<{ achievable?: boolean; reason?: string }>(feasibility.text);
60
+ let feasible = true;
61
+ let feasibilityReason = "";
62
+
63
+ if (feasibilityJson) {
64
+ feasible = feasibilityJson.achievable !== false;
65
+ feasibilityReason = feasibilityJson.reason || "";
66
+ } else {
67
+ feasibilityReason = "Could not parse feasibility response; proceeding anyway";
68
+ }
69
+
70
+ if (!feasible) {
71
+ send(pi, `## Goal not achievable\n\n${feasibilityReason || feasibility.text.slice(0, 500)}`);
72
+ return null;
73
+ }
74
+
75
+ send(pi, `## Feasibility: Yes\n\n${feasibilityReason}\n\nGenerating agent prompts...`);
76
+
77
+ // ── Step 2: Generate agent prompts ───────────────────────────────
78
+ const promptGenResult = await generateAgentPrompts(ctx, goal, signal);
79
+
80
+ let agentPrompts: AgentPrompts;
81
+ let domain: string;
82
+
83
+ if (promptGenResult) {
84
+ agentPrompts = promptGenResult.prompts;
85
+ domain = promptGenResult.domain;
86
+ send(pi, formatPromptsMessage(domain, agentPrompts));
87
+ } else {
88
+ agentPrompts = {
89
+ generator_prompt: FALLBACK_GENERATOR_PROMPT,
90
+ critique_prompt: FALLBACK_CRITIQUE_PROMPT,
91
+ judge_prompt: FALLBACK_JUDGE_PROMPT,
92
+ };
93
+ domain = "General";
94
+ }
95
+
96
+ // ── Step 3: Ralph Loop ───────────────────────────────────────────
97
+ const iterations: LoopIteration[] = [];
98
+ let currentResult = "";
99
+ let currentCriticism = "";
100
+
101
+ for (let i = 1; i <= maxLoops; i++) {
102
+ const iteration = buildIteration(i);
103
+
104
+ // ── Generate ──
105
+ send(pi, Generator.formatBefore(i, maxLoops));
106
+ const genResult = await Generator.execute(ctx, {
107
+ goal,
108
+ systemPrompt: agentPrompts.generator_prompt,
109
+ previousResult: i > 1 ? currentResult : undefined,
110
+ previousCriticism: i > 1 ? currentCriticism : undefined,
111
+ }, signal);
112
+
113
+ if (genResult.error) {
114
+ send(pi, Generator.formatError(i, maxLoops, genResult.error));
115
+ return null;
116
+ }
117
+
118
+ currentResult = genResult.text;
119
+ iteration.steps.push({ type: "generate", preview: truncate(currentResult, 100), full: currentResult });
120
+ send(pi, Generator.formatAfter(i, maxLoops, currentResult));
121
+
122
+ // ── Critique ──
123
+ send(pi, Critique.formatBefore(i, maxLoops));
124
+ const critResult = await Critique.execute(ctx, {
125
+ goal,
126
+ systemPrompt: agentPrompts.critique_prompt,
127
+ result: currentResult,
128
+ }, signal);
129
+
130
+ if (critResult.error) {
131
+ send(pi, Critique.formatError(i, maxLoops, critResult.error));
132
+ return null;
133
+ }
134
+
135
+ currentCriticism = critResult.text;
136
+ iteration.steps.push({ type: "critique", preview: truncate(currentCriticism, 100), full: currentCriticism });
137
+ send(pi, Critique.formatAfter(i, maxLoops, currentCriticism));
138
+
139
+ // ── Judge ──
140
+ send(pi, Judge.formatBefore(i, maxLoops));
141
+ const judgeResult = await Judge.execute(ctx, {
142
+ goal,
143
+ systemPrompt: agentPrompts.judge_prompt,
144
+ result: currentResult,
145
+ criticism: currentCriticism,
146
+ }, signal);
147
+
148
+ if (judgeResult.error) {
149
+ send(pi, Judge.formatError(i, maxLoops, judgeResult.error));
150
+ return null;
151
+ }
152
+
153
+ iteration.steps.push({ type: "judge", verdict: judgeResult.done, reason: judgeResult.reason, raw: judgeResult.raw });
154
+ iteration.achieved = judgeResult.done;
155
+ iteration.finalResult = currentResult;
156
+ iteration.finalCriticism = currentCriticism;
157
+ iteration.finalJudgeReason = judgeResult.reason;
158
+
159
+ iterations.push(iteration);
160
+
161
+ send(pi, Judge.formatAfter(i, maxLoops, judgeResult.done, judgeResult.reason));
162
+
163
+ if (judgeResult.done) break;
164
+ }
165
+
166
+ // ── Step 4: Final verdict ─────────────────────────────────────────
167
+ const lastIteration = iterations[iterations.length - 1];
168
+ const achieved = lastIteration?.achieved ?? false;
169
+ const verdictReason = lastIteration?.finalJudgeReason || "no judgment rendered";
170
+ const finalLabel = achieved ? "GOAL ACHIEVED ✓" : "GOAL NOT FULLY ACHIEVED ✗";
171
+
172
+ const lines: string[] = [];
173
+ lines.push(`## Result: ${finalLabel}`);
174
+ lines.push(``);
175
+ lines.push(`> ${verdictReason}`);
176
+ lines.push(``);
177
+ lines.push(`---`);
178
+ lines.push(``);
179
+ lines.push(`**Final output:**`);
180
+ lines.push(``);
181
+ lines.push(lastIteration?.finalResult || "(no result)");
182
+
183
+ if (lastIteration?.finalCriticism && lastIteration.finalCriticism.trim()) {
184
+ lines.push(``);
185
+ lines.push(`**Final criticism:**`);
186
+ lines.push(``);
187
+ lines.push(lastIteration.finalCriticism);
188
+ }
189
+
190
+ lines.push(``);
191
+ lines.push(`---`);
192
+ lines.push(``);
193
+ lines.push(`*${iterations.length}/${maxLoops} iterations, domain: ${domain}*`);
194
+
195
+ const details: RalphLoopResult = {
196
+ goal,
197
+ domain,
198
+ maxLoops,
199
+ loopCount: iterations.length,
200
+ achieved,
201
+ feasibilityReason,
202
+ feasibilityRaw: feasibility.text,
203
+ agentPrompts,
204
+ result: lastIteration?.finalResult || "",
205
+ criticism: lastIteration?.finalCriticism || "",
206
+ judgeReason: lastIteration?.finalJudgeReason || "",
207
+ iterations,
208
+ };
209
+
210
+ send(pi, lines.join("\n"), details);
211
+
212
+ return { details, achieved };
213
+ }
214
+
215
+ // ---------------------------------------------------------------------------
216
+ // Agent Prompt Generation
217
+ // ---------------------------------------------------------------------------
218
+
219
+ async function generateAgentPrompts(
220
+ ctx: ExtensionCommandContext,
221
+ goal: string,
222
+ signal: AbortSignal | undefined,
223
+ ): Promise<{ prompts: AgentPrompts; domain: string } | null> {
224
+ const userMsg = `Design system prompts for three agents that will work together to achieve this goal:
225
+
226
+ "${goal}"
227
+
228
+ Each prompt must be specifically tailored to this goal's domain. Output the JSON with keys: domain, generator_prompt, critique_prompt, judge_prompt. The "domain" field should identify the subject area (e.g., "Literary Fiction", "Systems Programming", "Business Strategy").`;
229
+
230
+ const result = await oneshotLLM(ctx, PROMPT_GENERATOR_PROMPT, userMsg, signal);
231
+
232
+ if (result.error) return null;
233
+
234
+ const json = parseJsonResponse<{ domain?: string; generator_prompt?: string; critique_prompt?: string; judge_prompt?: string }>(result.text);
235
+
236
+ if (
237
+ !json ||
238
+ typeof json.generator_prompt !== "string" ||
239
+ typeof json.critique_prompt !== "string" ||
240
+ typeof json.judge_prompt !== "string"
241
+ ) return null;
242
+
243
+ return {
244
+ prompts: {
245
+ generator_prompt: json.generator_prompt,
246
+ critique_prompt: json.critique_prompt,
247
+ judge_prompt: json.judge_prompt,
248
+ },
249
+ domain: json.domain || "General",
250
+ };
251
+ }
252
+
253
+ // ---------------------------------------------------------------------------
254
+ // Helpers
255
+ // ---------------------------------------------------------------------------
256
+
257
+ function buildIteration(loop: number): LoopIteration {
258
+ return {
259
+ loop,
260
+ steps: [],
261
+ achieved: false,
262
+ finalResult: "",
263
+ finalCriticism: "",
264
+ finalJudgeReason: "",
265
+ };
266
+ }
267
+
268
+ function send(pi: ExtensionAPI, content: string, details?: RalphLoopResult): void {
269
+ pi.sendMessage({
270
+ customType: "ralph-loop",
271
+ content,
272
+ display: true,
273
+ ...(details ? { details } : {}),
274
+ });
275
+ }
276
+
277
+ function formatPromptsMessage(domain: string, prompts: AgentPrompts): string {
278
+ return [
279
+ `## Agent Prompts — Domain: ${domain}`,
280
+ ``,
281
+ `### Generator`,
282
+ ``,
283
+ prompts.generator_prompt,
284
+ ``,
285
+ `### Critique`,
286
+ ``,
287
+ prompts.critique_prompt,
288
+ ``,
289
+ `### Judge`,
290
+ ``,
291
+ prompts.judge_prompt,
292
+ ].join("\n");
293
+ }
package/argParser.ts ADDED
@@ -0,0 +1,51 @@
1
+ // ============================================================================
2
+ // Argument Parsing
3
+ // ============================================================================
4
+
5
+ import type { ParsedArgs } from "./types";
6
+
7
+ /**
8
+ * Parse command arguments: --goal "..." [--loop N]
9
+ * Returns { goal: string, loop: number }
10
+ * Throws if --goal is missing.
11
+ */
12
+ export function parseArgs(raw: string): ParsedArgs {
13
+ let loop = 3;
14
+
15
+ const loopMatch = raw.match(/--loop\s+(\d+)/);
16
+ if (loopMatch) {
17
+ loop = parseInt(loopMatch[1], 10);
18
+ }
19
+
20
+ let remaining = raw.replace(/--loop\s+\d+/, "").trim();
21
+
22
+ if (remaining.startsWith("--goal")) {
23
+ remaining = remaining.slice(6).trim();
24
+ }
25
+
26
+ let goal = "";
27
+
28
+ if (remaining.startsWith('"')) {
29
+ const endQuote = remaining.indexOf('"', 1);
30
+ if (endQuote !== -1) {
31
+ goal = remaining.slice(1, endQuote);
32
+ } else {
33
+ goal = remaining.slice(1);
34
+ }
35
+ } else if (remaining.startsWith("'")) {
36
+ const endQuote = remaining.indexOf("'", 1);
37
+ if (endQuote !== -1) {
38
+ goal = remaining.slice(1, endQuote);
39
+ } else {
40
+ goal = remaining.slice(1);
41
+ }
42
+ } else {
43
+ goal = remaining;
44
+ }
45
+
46
+ if (!goal.trim()) {
47
+ throw new Error('--goal is required. Usage: /ralph-loop-anything --goal "Your goal" [--loop N]');
48
+ }
49
+
50
+ return { goal: goal.trim(), loop };
51
+ }
package/helpers.ts ADDED
@@ -0,0 +1,19 @@
1
+ // ============================================================================
2
+ // Text Helpers
3
+ // ============================================================================
4
+
5
+ /**
6
+ * Truncate text to maxLen characters, appending "…" if truncated
7
+ */
8
+ export function truncate(text: string, maxLen: number): string {
9
+ if (text.length <= maxLen) return text;
10
+ return text.slice(0, maxLen) + "…";
11
+ }
12
+
13
+ /**
14
+ * Return only the first n lines of text
15
+ */
16
+ export function firstNLines(text: string, n: number): string {
17
+ const lines = text.split("\n");
18
+ return lines.slice(0, n).join("\n");
19
+ }
package/index.ts ADDED
@@ -0,0 +1,61 @@
1
+ /**
2
+ * Ralph Loop Extension
3
+ *
4
+ * Iterative goal-achievement loop with Generator → Critique → Judge agents.
5
+ * Each step sends a message to chat as it completes — tail -f style.
6
+ *
7
+ * Architecture:
8
+ * index.ts — thin entry point, registers command & renderer
9
+ * agents/orchestrator — coordinates the full flow
10
+ * agents/generator — produces or improves a result
11
+ * agents/critique — evaluates a result against the goal
12
+ * agents/judge — decides if the goal is achieved
13
+ */
14
+
15
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
16
+
17
+ import { parseArgs } from "./argParser";
18
+ import { renderRalphLoopMessage } from "./renderer";
19
+ import * as Orchestrator from "./agents/orchestrator";
20
+
21
+ export default function (pi: ExtensionAPI) {
22
+
23
+ pi.registerCommand("ralph-loop-anything", {
24
+ description: "Run a dynamic Ralph loop (Generator->Critique->Judge) to achieve a goal",
25
+ getArgumentCompletions: (prefix: string) => {
26
+ const items = [
27
+ { value: '--goal "', label: '--goal "specify your goal"' },
28
+ { value: "--loop ", label: "--loop N (default 3)" },
29
+ ];
30
+ const filtered = items.filter((i) =>
31
+ i.value.startsWith(prefix) || i.label.startsWith(prefix)
32
+ );
33
+ return filtered.length > 0 ? filtered : null;
34
+ },
35
+ handler: async (args, ctx) => {
36
+ let parsed: ReturnType<typeof parseArgs>;
37
+ try {
38
+ parsed = parseArgs(args);
39
+ } catch (err: any) {
40
+ ctx.ui.notify(`Failed: ${err.message}`, "error");
41
+ return;
42
+ }
43
+
44
+ if (!ctx.model) {
45
+ ctx.ui.notify("No model selected. Use /model to select one.", "error");
46
+ return;
47
+ }
48
+
49
+ ctx.ui.notify(`Ralph Loop: "${parsed.goal}" (max ${parsed.loop} iterations)`, "info");
50
+
51
+ await Orchestrator.execute(pi, ctx, {
52
+ goal: parsed.goal,
53
+ maxLoops: parsed.loop,
54
+ });
55
+ },
56
+ });
57
+
58
+ pi.registerMessageRenderer("ralph-loop", (message, theme) => {
59
+ return renderRalphLoopMessage(message, theme);
60
+ });
61
+ }
package/llm.ts ADDED
@@ -0,0 +1,85 @@
1
+ // ============================================================================
2
+ // LLM Client
3
+ // ============================================================================
4
+
5
+ import { complete, type UserMessage } from "@earendil-works/pi-ai";
6
+ import type { ExtensionCommandContext } from "@earendil-works/pi-coding-agent";
7
+ import type { OneshotResult } from "./types";
8
+
9
+ /**
10
+ * Make a oneshot LLM call with system prompt + user message.
11
+ */
12
+ export async function oneshotLLM(
13
+ ctx: ExtensionCommandContext,
14
+ systemPrompt: string,
15
+ userMessage: string,
16
+ signal?: AbortSignal,
17
+ ): Promise<OneshotResult> {
18
+ if (!ctx.model) {
19
+ return { text: "", error: "No model selected" };
20
+ }
21
+
22
+ const auth = await ctx.modelRegistry.getApiKeyAndHeaders(ctx.model);
23
+ if (!auth.ok || !auth.apiKey) {
24
+ return { text: "", error: auth.ok ? `No API key for ${ctx.model.provider}` : auth.error };
25
+ }
26
+
27
+ const messages: UserMessage[] = [
28
+ {
29
+ role: "user",
30
+ content: [{ type: "text", text: userMessage }],
31
+ timestamp: Date.now(),
32
+ },
33
+ ];
34
+
35
+ try {
36
+ const response = await complete(
37
+ ctx.model,
38
+ { systemPrompt, messages },
39
+ { apiKey: auth.apiKey, headers: auth.headers, signal },
40
+ );
41
+
42
+ if (response.stopReason === "aborted") {
43
+ return { text: "", error: "Aborted" };
44
+ }
45
+
46
+ const text = response.content
47
+ .filter((c): c is { type: "text"; text: string } => c.type === "text")
48
+ .map((c) => c.text)
49
+ .join("\n");
50
+
51
+ return { text };
52
+ } catch (err: any) {
53
+ return { text: "", error: err.message || String(err) };
54
+ }
55
+ }
56
+
57
+ // ============================================================================
58
+ // JSON Response Parsing
59
+ // ============================================================================
60
+
61
+ /**
62
+ * Parse JSON from LLM response. Handles plain JSON, JSON in code blocks,
63
+ * or JSON extracted via regex from mixed content.
64
+ */
65
+ export function parseJsonResponse<T = Record<string, unknown>>(text: string): T | null {
66
+ try {
67
+ return JSON.parse(text.trim());
68
+ } catch {}
69
+
70
+ const jsonMatch = text.match(/\{[\s\S]*\}/);
71
+ if (jsonMatch) {
72
+ try {
73
+ return JSON.parse(jsonMatch[0]);
74
+ } catch {}
75
+ }
76
+
77
+ const fenceMatch = text.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/);
78
+ if (fenceMatch) {
79
+ try {
80
+ return JSON.parse(fenceMatch[1]);
81
+ } catch {}
82
+ }
83
+
84
+ return null;
85
+ }
package/package.json ADDED
@@ -0,0 +1,45 @@
1
+ {
2
+ "name": "@entelligentsia/pi-ralph",
3
+ "version": "1.0.0",
4
+ "description": "Iterative goal-achievement loop for pi — Generator→Critique→Judge, streaming output as it happens",
5
+ "keywords": [
6
+ "pi",
7
+ "pi-coding-agent",
8
+ "llm",
9
+ "agent",
10
+ "iterative",
11
+ "refinement"
12
+ ],
13
+ "license": "MIT",
14
+ "author": "Entelligentsia",
15
+ "repository": {
16
+ "type": "git",
17
+ "url": "git+https://github.com/Entelligentsia/pi-ralph.git"
18
+ },
19
+ "homepage": "https://github.com/Entelligentsia/pi-ralph#readme",
20
+ "bugs": {
21
+ "url": "https://github.com/Entelligentsia/pi-ralph/issues"
22
+ },
23
+ "main": "index.ts",
24
+ "files": [
25
+ "index.ts",
26
+ "types.ts",
27
+ "renderer.ts",
28
+ "prompts.ts",
29
+ "helpers.ts",
30
+ "llm.ts",
31
+ "argParser.ts",
32
+ "agents/"
33
+ ],
34
+ "pi": {
35
+ "extensions": [
36
+ "./index.ts"
37
+ ]
38
+ },
39
+ "peerDependencies": {
40
+ "@earendil-works/pi-coding-agent": ">=0.1.0"
41
+ },
42
+ "publishConfig": {
43
+ "access": "public"
44
+ }
45
+ }
package/prompts.ts ADDED
@@ -0,0 +1,82 @@
1
+ // ============================================================================
2
+ // Static System Prompts
3
+ // ============================================================================
4
+
5
+ export const FEASIBILITY_PROMPT = `You are a goal feasibility evaluator. Your job is to determine whether a given goal can be meaningfully pursued and achieved using an LLM.
6
+
7
+ Consider:
8
+ - Is the goal well-defined enough for an LLM to work on?
9
+ - Can an LLM make meaningful progress on this goal (e.g., through text, code, analysis, planning)?
10
+ - Is the goal something that requires physical action that an LLM cannot do (e.g., "cook an omelette")?
11
+ - Could the goal be reinterpreted in a way that an LLM can contribute meaningfully?
12
+
13
+ For example:
14
+ - "Write a poem" → achievable (text generation)
15
+ - "Sort a list" → achievable (code generation)
16
+ - "Make me an omelette" → NOT achievable (requires physical action)
17
+ - "Design a car" → achievable if interpreted as "create a detailed car design document"
18
+
19
+ You MUST respond with ONLY a JSON object (no markdown fences, no extra text):
20
+ {"achievable": true, "reason": "brief explanation"}
21
+ or
22
+ {"achievable": false, "reason": "brief explanation"}`;
23
+
24
+ export const PROMPT_GENERATOR_PROMPT = `You are a prompt engineer. Given a goal, you design the system prompts for three specialized agents that will work together in an iterative loop to achieve that goal. Each prompt must be tailored to the goal's domain so the agents are aligned from the start.
25
+
26
+ The three agents are:
27
+ 1. **Generator** — Produces or improves a result for the goal. On iteration >1, it receives the previous result and criticism, so its prompt should instruct it to address feedback.
28
+ 2. **Critique** — Evaluates the Generator's output against the goal. Identifies problems, gaps, and suggests concrete improvements.
29
+ 3. **Judge** — Determines if the result adequately achieves the goal, given the criticism. Outputs JSON: {"done": true/false, "reason": "..."}
30
+
31
+ Each system prompt should:
32
+ - Establish the agent's role and expertise relevant to the goal's domain
33
+ - Include domain-specific evaluation criteria (e.g., for code: correctness, efficiency, edge cases; for writing: style, coherence, completeness)
34
+ - Be concise but specific — generic instructions are worse than goal-tailored ones
35
+ - For the Judge, require JSON output format: {"done": true/false, "reason": "brief explanation"}
36
+ - For the Judge, use "done": true only if the result adequately achieves the goal or has reached diminishing returns. Use "done": false if significant improvements are still needed.
37
+
38
+ You MUST respond with ONLY a JSON object (no markdown fences, no extra text) with exactly these keys:
39
+ {
40
+ "domain": "the identified domain (e.g., 'Literary Fiction', 'Systems Programming', 'Business Strategy')",
41
+ "generator_prompt": "...",
42
+ "critique_prompt": "...",
43
+ "judge_prompt": "..."
44
+ }`;
45
+
46
+ // ============================================================================
47
+ // Fallback Prompts (used if dynamic generation fails)
48
+ // ============================================================================
49
+
50
+ export const FALLBACK_GENERATOR_PROMPT = `You are a Generator agent. Your job is to produce the best possible result for the given goal.
51
+
52
+ Instructions:
53
+ - Be thorough, creative, and accurate
54
+ - If this is a follow-up iteration, carefully address all criticisms from the previous round
55
+ - Produce a complete, polished result
56
+ - Focus on quality and completeness`;
57
+
58
+ export const FALLBACK_CRITIQUE_PROMPT = `You are a Critique agent. Your job is to critically evaluate a result against the original goal.
59
+
60
+ Instructions:
61
+ - Identify what's missing, incorrect, or incomplete
62
+ - Point out any errors or inaccuracies
63
+ - Suggest specific, actionable improvements
64
+ - Be thorough but constructive
65
+ - Rate how well the result achieves the goal on a scale of 1-10
66
+ - Focus on the most important issues first`;
67
+
68
+ export const FALLBACK_JUDGE_PROMPT = `You are a Judge agent. Your job is to determine whether a result, given its criticism, adequately achieves the original goal.
69
+
70
+ Instructions:
71
+ - Consider: Does the result address the goal?
72
+ - Consider: Are the criticisms minor or fundamental?
73
+ - Consider: Is the result good enough that further iterations are unlikely to produce significantly better output?
74
+ - Be strict but fair
75
+
76
+ You MUST respond with ONLY a JSON object (no markdown fences, no extra text):
77
+ {"done": true, "reason": "brief explanation"}
78
+ or
79
+ {"done": false, "reason": "brief explanation"}
80
+
81
+ Use "done": true ONLY if the result adequately achieves the goal or has reached the point of diminishing returns.
82
+ Use "done": false if significant improvements are still needed.`;
package/renderer.ts ADDED
@@ -0,0 +1,19 @@
1
+ // ============================================================================
2
+ // Message Renderer
3
+ //
4
+ // Each message is self-contained markdown — the renderer just passes it
5
+ // through as a Markdown component. No layout, no boxes, no truncation.
6
+ // ============================================================================
7
+
8
+ import { Markdown } from "@earendil-works/pi-tui";
9
+ import { getMarkdownTheme } from "@earendil-works/pi-coding-agent";
10
+ import type { Theme } from "@earendil-works/pi-tui";
11
+ import type { Message } from "@earendil-works/pi-coding-agent";
12
+
13
+ export function renderRalphLoopMessage(
14
+ message: Message,
15
+ _theme: Theme,
16
+ ): Markdown {
17
+ const content = typeof message.content === "string" ? message.content : "";
18
+ return new Markdown(content, 0, 0, getMarkdownTheme());
19
+ }
package/types.ts ADDED
@@ -0,0 +1,53 @@
1
+ // ============================================================================
2
+ // Types
3
+ // ============================================================================
4
+
5
+ export interface ParsedArgs {
6
+ goal: string;
7
+ loop: number;
8
+ }
9
+
10
+ export interface OneshotResult {
11
+ text: string;
12
+ error?: string;
13
+ }
14
+
15
+ export interface AgentPrompts {
16
+ generator_prompt: string;
17
+ critique_prompt: string;
18
+ judge_prompt: string;
19
+ }
20
+
21
+ /**
22
+ * A single step within a loop iteration.
23
+ * Each iteration has 4 steps: Generate → Critique → Judge → (implied: Revise)
24
+ */
25
+ export type LoopStep =
26
+ | { type: "generate"; preview: string; full: string }
27
+ | { type: "critique"; preview: string; full: string }
28
+ | { type: "judge"; verdict: boolean; reason: string; raw: string }
29
+ | { type: "revision"; reason: string }; // Why we're revising (based on judge verdict)
30
+
31
+ export interface LoopIteration {
32
+ loop: number;
33
+ steps: LoopStep[];
34
+ achieved: boolean; // did judge say done: true
35
+ finalResult: string;
36
+ finalCriticism: string;
37
+ finalJudgeReason: string;
38
+ }
39
+
40
+ export interface RalphLoopResult {
41
+ goal: string;
42
+ domain: string; // identified domain for the goal
43
+ maxLoops: number;
44
+ loopCount: number;
45
+ achieved: boolean;
46
+ feasibilityReason: string;
47
+ feasibilityRaw: string;
48
+ agentPrompts: AgentPrompts;
49
+ result: string;
50
+ criticism: string;
51
+ judgeReason: string;
52
+ iterations: LoopIteration[];
53
+ }