@entelligentsia/pi-ralph 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +147 -0
- package/agents/critique.ts +77 -0
- package/agents/generator.ts +88 -0
- package/agents/judge.ts +95 -0
- package/agents/orchestrator.ts +293 -0
- package/argParser.ts +51 -0
- package/helpers.ts +19 -0
- package/index.ts +61 -0
- package/llm.ts +85 -0
- package/package.json +45 -0
- package/prompts.ts +82 -0
- package/renderer.ts +19 -0
- package/types.ts +53 -0
package/README.md
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# pi-ralph
|
|
2
|
+
|
|
3
|
+
**A [pi](https://github.com/earendil-works/pi) extension** — iterative goal-achievement loop with Generator → Critique → Judge agents.
|
|
4
|
+
|
|
5
|
+
[](https://www.npmjs.com/package/@entelligentsia/pi-ralph)
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
pi install npm:@entelligentsia/pi-ralph
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Or pin a version:
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
pi install npm:@entelligentsia/pi-ralph@1.0.0
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
From GitHub:
|
|
20
|
+
|
|
21
|
+
```
|
|
22
|
+
pi install git:github.com:Entelligentsia/pi-ralph
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## What It Does
|
|
26
|
+
|
|
27
|
+
Give it a goal. It loops — generate, critique, judge — until the goal is met or iterations run out.
|
|
28
|
+
|
|
29
|
+
Each agent's system prompt is dynamically generated from the goal, so they're domain-aligned from the start. Output streams to chat as it happens — like `tail -f`.
|
|
30
|
+
|
|
31
|
+
## Usage
|
|
32
|
+
|
|
33
|
+
```
|
|
34
|
+
/ralph-loop-anything --goal "Your goal here" --loop 5
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
- `--goal` (required): The goal to achieve
|
|
38
|
+
- `--loop` (optional, default 3): Maximum iterations
|
|
39
|
+
|
|
40
|
+
## How It Works
|
|
41
|
+
|
|
42
|
+
```
|
|
43
|
+
1. Feasibility check → can an LLM do this?
|
|
44
|
+
2. Prompt generation → domain + agent prompts tailored to the goal
|
|
45
|
+
3. Loop (up to N):
|
|
46
|
+
├─ Generate → produces a result
|
|
47
|
+
├─ Critique → identifies problems, suggests improvements
|
|
48
|
+
└─ Judge → { done: true/false, reason }
|
|
49
|
+
└── if not done: feeds result + criticism back to Generator
|
|
50
|
+
4. Final message with verdict
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Every step posts a message to chat as it completes. No hidden progress bars — you see each agent's output the moment it arrives.
|
|
54
|
+
|
|
55
|
+
### LLM Calls Per Run
|
|
56
|
+
|
|
57
|
+
| Step | Calls |
|
|
58
|
+
|------|-------|
|
|
59
|
+
| Feasibility | 1 |
|
|
60
|
+
| Prompt generation | 1 |
|
|
61
|
+
| Per loop iteration | 3 (generate + critique + judge) |
|
|
62
|
+
|
|
63
|
+
Total for `--loop 3`: 1 + 1 + (3 × 3) = **11** LLM calls
|
|
64
|
+
|
|
65
|
+
## Examples
|
|
66
|
+
|
|
67
|
+
### Creative Writing
|
|
68
|
+
|
|
69
|
+
```
|
|
70
|
+
/ralph-loop-anything --goal "Write a complete short story in one sentence and fewer than 10 words that is better than Hemingway could write"
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
```
|
|
74
|
+
/ralph-loop-anything --goal "Write a villanelle poem about debugging at 3am"
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
```
|
|
78
|
+
/ralph-loop-anything --goal "Invent a new myth that explains why rivers bend, told as if by a 9th-century monk"
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Code
|
|
82
|
+
|
|
83
|
+
```
|
|
84
|
+
/ralph-loop-anything --goal "Write an ergonomic CLI argument parser in Rust that handles flags, options, subcommands, and generates help text" --loop 5
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
```
|
|
88
|
+
/ralph-loop-anything --goal "Implement a lock-free concurrent hash map in Zig"
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
```
|
|
92
|
+
/ralph-loop-anything --goal "Write a single-file SQLite clone in C that supports CREATE TABLE, INSERT, and SELECT with WHERE clauses"
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Design & Strategy
|
|
96
|
+
|
|
97
|
+
```
|
|
98
|
+
/ralph-loop-anything --goal "Design a go-to-market strategy for a developer tools startup that has a free CLI tool but wants to monetize a team tier"
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
```
|
|
102
|
+
/ralph-loop-anything --goal "Create a 12-week fitness program for a 40-year-old desk worker who has 30 minutes a day and bad knees"
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
```
|
|
106
|
+
/ralph-loop-anything --goal "Write a production-ready incident response playbook for a SaaS company experiencing a data breach"
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Explaining & Teaching
|
|
110
|
+
|
|
111
|
+
```
|
|
112
|
+
/ralph-loop-anything --goal "Explain monads to a JavaScript developer who has never used Haskell, using only analogies from web development"
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
```
|
|
116
|
+
/ralph-loop-anything --goal "Create a 5-minute presentation script that explains neural networks to a room of skeptical middle managers"
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### Constraints & Style
|
|
120
|
+
|
|
121
|
+
```
|
|
122
|
+
/ralph-loop-anything --goal "Rewrite the Gettysburg Address as if it were a Slack announcement from a tech CEO" --loop 2
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
```
|
|
126
|
+
/ralph-loop-anything --goal "Write a recipe for coq au vin where every step is a haiku"
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
```
|
|
130
|
+
/ralph-loop-anything --goal "Explain quantum entanglement using only words with 4 letters or fewer"
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
## Architecture
|
|
134
|
+
|
|
135
|
+
| File | Purpose |
|
|
136
|
+
|------|---------|
|
|
137
|
+
| `index.ts` | Thin entry point — parses args, calls orchestrator |
|
|
138
|
+
| `agents/orchestrator.ts` | Coordinates full flow: feasibility, prompts, loop, verdict |
|
|
139
|
+
| `agents/generator.ts` | Generator agent — produces or improves a result |
|
|
140
|
+
| `agents/critique.ts` | Critique agent — evaluates result against the goal |
|
|
141
|
+
| `agents/judge.ts` | Judge agent — decides if goal is achieved |
|
|
142
|
+
| `types.ts` | Shared interfaces (`LoopIteration`, `LoopStep`, etc.) |
|
|
143
|
+
| `prompts.ts` | Static system prompts (feasibility, prompt generator, fallbacks) |
|
|
144
|
+
| `helpers.ts` | Text utilities (`truncate`, `firstNLines`) |
|
|
145
|
+
| `llm.ts` | LLM client (`oneshotLLM`, `parseJsonResponse`) |
|
|
146
|
+
| `argParser.ts` | Command argument parsing |
|
|
147
|
+
| `renderer.ts` | Message renderer — passes markdown content through |
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
// ============================================================================
|
|
2
|
+
// Critique Agent
|
|
3
|
+
//
|
|
4
|
+
// Evaluates a result against the goal.
|
|
5
|
+
// Identifies problems, gaps, and suggests concrete improvements.
|
|
6
|
+
// ============================================================================
|
|
7
|
+
|
|
8
|
+
import type { ExtensionCommandContext } from "@earendil-works/pi-coding-agent";
|
|
9
|
+
import { oneshotLLM } from "../llm";
|
|
10
|
+
import { FALLBACK_CRITIQUE_PROMPT } from "../prompts";
|
|
11
|
+
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
// Types
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
|
|
16
|
+
export interface CritiqueInput {
|
|
17
|
+
goal: string;
|
|
18
|
+
systemPrompt: string;
|
|
19
|
+
result: string;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface CritiqueResult {
|
|
23
|
+
text: string;
|
|
24
|
+
error?: string;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
// Execution
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
export async function execute(
|
|
32
|
+
ctx: ExtensionCommandContext,
|
|
33
|
+
input: CritiqueInput,
|
|
34
|
+
signal?: AbortSignal,
|
|
35
|
+
): Promise<CritiqueResult> {
|
|
36
|
+
const userMsg = buildUserMessage(input);
|
|
37
|
+
const systemPrompt = input.systemPrompt || FALLBACK_CRITIQUE_PROMPT;
|
|
38
|
+
|
|
39
|
+
const result = await oneshotLLM(ctx, systemPrompt, userMsg, signal);
|
|
40
|
+
|
|
41
|
+
if (result.error) {
|
|
42
|
+
return { text: "", error: result.error };
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return { text: result.text };
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// ---------------------------------------------------------------------------
|
|
49
|
+
// Message Construction
|
|
50
|
+
// ---------------------------------------------------------------------------
|
|
51
|
+
|
|
52
|
+
function buildUserMessage(input: CritiqueInput): string {
|
|
53
|
+
return [
|
|
54
|
+
`Goal: ${input.goal}`,
|
|
55
|
+
``,
|
|
56
|
+
`Result to evaluate:`,
|
|
57
|
+
`${input.result}`,
|
|
58
|
+
``,
|
|
59
|
+
`Critically evaluate this result against the goal. Identify problems and suggest improvements.`,
|
|
60
|
+
].join("\n");
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
// Chat Message Formatting
|
|
65
|
+
// ---------------------------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
export function formatBefore(loop: number, maxLoops: number): string {
|
|
68
|
+
return `## Loop ${loop}/${maxLoops} — Critiquing...`;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export function formatAfter(loop: number, maxLoops: number, text: string): string {
|
|
72
|
+
return `## Loop ${loop}/${maxLoops} — Critique\n\n${text}`;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export function formatError(loop: number, maxLoops: number, error: string): string {
|
|
76
|
+
return `## Loop ${loop}/${maxLoops} — Critique failed\n\n${error}`;
|
|
77
|
+
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
// ============================================================================
|
|
2
|
+
// Generator Agent
|
|
3
|
+
//
|
|
4
|
+
// Produces or improves a result for the goal.
|
|
5
|
+
// On first run: works from the goal alone.
|
|
6
|
+
// On revision: receives previous result + criticism and improves.
|
|
7
|
+
// ============================================================================
|
|
8
|
+
|
|
9
|
+
import type { ExtensionCommandContext } from "@earendil-works/pi-coding-agent";
|
|
10
|
+
import { oneshotLLM } from "../llm";
|
|
11
|
+
import { FALLBACK_GENERATOR_PROMPT } from "../prompts";
|
|
12
|
+
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
// Types
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
|
|
17
|
+
export interface GeneratorInput {
|
|
18
|
+
goal: string;
|
|
19
|
+
systemPrompt: string;
|
|
20
|
+
/** Previous result to improve (undefined on first run) */
|
|
21
|
+
previousResult?: string;
|
|
22
|
+
/** Criticism of previous result (undefined on first run) */
|
|
23
|
+
previousCriticism?: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export interface GeneratorResult {
|
|
27
|
+
text: string;
|
|
28
|
+
error?: string;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
// Execution
|
|
33
|
+
// ---------------------------------------------------------------------------
|
|
34
|
+
|
|
35
|
+
export async function execute(
|
|
36
|
+
ctx: ExtensionCommandContext,
|
|
37
|
+
input: GeneratorInput,
|
|
38
|
+
signal?: AbortSignal,
|
|
39
|
+
): Promise<GeneratorResult> {
|
|
40
|
+
const userMsg = buildUserMessage(input);
|
|
41
|
+
const systemPrompt = input.systemPrompt || FALLBACK_GENERATOR_PROMPT;
|
|
42
|
+
|
|
43
|
+
const result = await oneshotLLM(ctx, systemPrompt, userMsg, signal);
|
|
44
|
+
|
|
45
|
+
if (result.error) {
|
|
46
|
+
return { text: "", error: result.error };
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
return { text: result.text };
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
// Message Construction
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
function buildUserMessage(input: GeneratorInput): string {
|
|
57
|
+
if (!input.previousResult) {
|
|
58
|
+
return `Goal: ${input.goal}\n\nProduce your best result to achieve this goal.`;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return [
|
|
62
|
+
`Goal: ${input.goal}`,
|
|
63
|
+
``,
|
|
64
|
+
`Previous result:`,
|
|
65
|
+
`${input.previousResult}`,
|
|
66
|
+
``,
|
|
67
|
+
`Criticism of the previous result:`,
|
|
68
|
+
`${input.previousCriticism || "(none)"}`,
|
|
69
|
+
``,
|
|
70
|
+
`Improve upon the previous result, carefully addressing all the criticisms. Produce a better version.`,
|
|
71
|
+
].join("\n");
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// ---------------------------------------------------------------------------
|
|
75
|
+
// Chat Message Formatting
|
|
76
|
+
// ---------------------------------------------------------------------------
|
|
77
|
+
|
|
78
|
+
export function formatBefore(loop: number, maxLoops: number): string {
|
|
79
|
+
return `## Loop ${loop}/${maxLoops} — Generating...`;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export function formatAfter(loop: number, maxLoops: number, text: string): string {
|
|
83
|
+
return `## Loop ${loop}/${maxLoops} — Generated\n\n${text}`;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
export function formatError(loop: number, maxLoops: number, error: string): string {
|
|
87
|
+
return `## Loop ${loop}/${maxLoops} — Generator failed\n\n${error}`;
|
|
88
|
+
}
|
package/agents/judge.ts
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
// ============================================================================
|
|
2
|
+
// Judge Agent
|
|
3
|
+
//
|
|
4
|
+
// Determines if a result adequately achieves the goal, given the criticism.
|
|
5
|
+
// Returns structured verdict: { done: boolean, reason: string }.
|
|
6
|
+
// ============================================================================
|
|
7
|
+
|
|
8
|
+
import type { ExtensionCommandContext } from "@earendil-works/pi-coding-agent";
|
|
9
|
+
import { oneshotLLM, parseJsonResponse } from "../llm";
|
|
10
|
+
import { FALLBACK_JUDGE_PROMPT } from "../prompts";
|
|
11
|
+
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
// Types
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
|
|
16
|
+
export interface JudgeInput {
|
|
17
|
+
goal: string;
|
|
18
|
+
systemPrompt: string;
|
|
19
|
+
result: string;
|
|
20
|
+
criticism: string;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface JudgeResult {
|
|
24
|
+
done: boolean;
|
|
25
|
+
reason: string;
|
|
26
|
+
raw: string;
|
|
27
|
+
error?: string;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
// Execution
|
|
32
|
+
// ---------------------------------------------------------------------------
|
|
33
|
+
|
|
34
|
+
export async function execute(
|
|
35
|
+
ctx: ExtensionCommandContext,
|
|
36
|
+
input: JudgeInput,
|
|
37
|
+
signal?: AbortSignal,
|
|
38
|
+
): Promise<JudgeResult> {
|
|
39
|
+
const userMsg = buildUserMessage(input);
|
|
40
|
+
const systemPrompt = input.systemPrompt || FALLBACK_JUDGE_PROMPT;
|
|
41
|
+
|
|
42
|
+
const result = await oneshotLLM(ctx, systemPrompt, userMsg, signal);
|
|
43
|
+
|
|
44
|
+
if (result.error) {
|
|
45
|
+
return { done: false, reason: "", raw: "", error: result.error };
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const parsed = parseJsonResponse<{ done?: boolean; reason?: string }>(result.text);
|
|
49
|
+
let done = false;
|
|
50
|
+
let reason = "";
|
|
51
|
+
|
|
52
|
+
if (parsed) {
|
|
53
|
+
done = parsed.done === true;
|
|
54
|
+
reason = parsed.reason || "";
|
|
55
|
+
} else {
|
|
56
|
+
reason = `(could not parse judge response) ${result.text.slice(0, 200)}`;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return { done, reason, raw: result.text };
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// ---------------------------------------------------------------------------
|
|
63
|
+
// Message Construction
|
|
64
|
+
// ---------------------------------------------------------------------------
|
|
65
|
+
|
|
66
|
+
function buildUserMessage(input: JudgeInput): string {
|
|
67
|
+
return [
|
|
68
|
+
`Goal: ${input.goal}`,
|
|
69
|
+
``,
|
|
70
|
+
`Result:`,
|
|
71
|
+
`${input.result}`,
|
|
72
|
+
``,
|
|
73
|
+
`Criticism:`,
|
|
74
|
+
`${input.criticism}`,
|
|
75
|
+
``,
|
|
76
|
+
`Determine if the goal has been adequately achieved. Respond with JSON.`,
|
|
77
|
+
].join("\n");
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// ---------------------------------------------------------------------------
|
|
81
|
+
// Chat Message Formatting
|
|
82
|
+
// ---------------------------------------------------------------------------
|
|
83
|
+
|
|
84
|
+
export function formatBefore(loop: number, maxLoops: number): string {
|
|
85
|
+
return `## Loop ${loop}/${maxLoops} — Judging...`;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export function formatAfter(loop: number, maxLoops: number, done: boolean, reason: string): string {
|
|
89
|
+
const verdict = done ? "DONE ✓" : "CONTINUE ✗";
|
|
90
|
+
return `## Loop ${loop}/${maxLoops} — Judge: ${verdict}\n\n${reason}`;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export function formatError(loop: number, maxLoops: number, error: string): string {
|
|
94
|
+
return `## Loop ${loop}/${maxLoops} — Judge failed\n\n${error}`;
|
|
95
|
+
}
|
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
// ============================================================================
|
|
2
|
+
// Orchestrator Agent
|
|
3
|
+
//
|
|
4
|
+
// Coordinates the full ralph-loop flow:
|
|
5
|
+
// 1. Feasibility check
|
|
6
|
+
// 2. Domain identification + agent prompt generation
|
|
7
|
+
// 3. Loop: Generator → Critique → Judge (repeat until done or max loops)
|
|
8
|
+
// 4. Final verdict
|
|
9
|
+
//
|
|
10
|
+
// Every step posts a message to chat as it completes — tail -f style.
|
|
11
|
+
// ============================================================================
|
|
12
|
+
|
|
13
|
+
import type { ExtensionAPI, ExtensionCommandContext } from "@earendil-works/pi-coding-agent";
|
|
14
|
+
import { oneshotLLM, parseJsonResponse } from "../llm";
|
|
15
|
+
import { FEASIBILITY_PROMPT, PROMPT_GENERATOR_PROMPT, FALLBACK_GENERATOR_PROMPT, FALLBACK_CRITIQUE_PROMPT, FALLBACK_JUDGE_PROMPT } from "../prompts";
|
|
16
|
+
import { truncate } from "../helpers";
|
|
17
|
+
import * as Generator from "./generator";
|
|
18
|
+
import * as Critique from "./critique";
|
|
19
|
+
import * as Judge from "./judge";
|
|
20
|
+
import type { AgentPrompts, RalphLoopResult, LoopIteration } from "../types";
|
|
21
|
+
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// Types
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
|
|
26
|
+
export interface OrchestratorInput {
|
|
27
|
+
goal: string;
|
|
28
|
+
maxLoops: number;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface OrchestratorResult {
|
|
32
|
+
details: RalphLoopResult;
|
|
33
|
+
achieved: boolean;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
// Execution
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
export async function execute(
|
|
41
|
+
pi: ExtensionAPI,
|
|
42
|
+
ctx: ExtensionCommandContext,
|
|
43
|
+
input: OrchestratorInput,
|
|
44
|
+
): Promise<OrchestratorResult | null> {
|
|
45
|
+
const { goal, maxLoops } = input;
|
|
46
|
+
const signal = ctx.signal;
|
|
47
|
+
|
|
48
|
+
// ── Step 1: Feasibility ──────────────────────────────────────────
|
|
49
|
+
send(pi, `## Checking feasibility\n\n> ${goal}\n\nEvaluating whether this goal is achievable with an LLM...`);
|
|
50
|
+
|
|
51
|
+
const feasibilityPrompt = `Evaluate whether this goal is achievable using an LLM: "${goal}"`;
|
|
52
|
+
const feasibility = await oneshotLLM(ctx, FEASIBILITY_PROMPT, feasibilityPrompt, signal);
|
|
53
|
+
|
|
54
|
+
if (feasibility.error) {
|
|
55
|
+
send(pi, `## Feasibility check failed\n\n${feasibility.error}`);
|
|
56
|
+
return null;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const feasibilityJson = parseJsonResponse<{ achievable?: boolean; reason?: string }>(feasibility.text);
|
|
60
|
+
let feasible = true;
|
|
61
|
+
let feasibilityReason = "";
|
|
62
|
+
|
|
63
|
+
if (feasibilityJson) {
|
|
64
|
+
feasible = feasibilityJson.achievable !== false;
|
|
65
|
+
feasibilityReason = feasibilityJson.reason || "";
|
|
66
|
+
} else {
|
|
67
|
+
feasibilityReason = "Could not parse feasibility response; proceeding anyway";
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if (!feasible) {
|
|
71
|
+
send(pi, `## Goal not achievable\n\n${feasibilityReason || feasibility.text.slice(0, 500)}`);
|
|
72
|
+
return null;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
send(pi, `## Feasibility: Yes\n\n${feasibilityReason}\n\nGenerating agent prompts...`);
|
|
76
|
+
|
|
77
|
+
// ── Step 2: Generate agent prompts ───────────────────────────────
|
|
78
|
+
const promptGenResult = await generateAgentPrompts(ctx, goal, signal);
|
|
79
|
+
|
|
80
|
+
let agentPrompts: AgentPrompts;
|
|
81
|
+
let domain: string;
|
|
82
|
+
|
|
83
|
+
if (promptGenResult) {
|
|
84
|
+
agentPrompts = promptGenResult.prompts;
|
|
85
|
+
domain = promptGenResult.domain;
|
|
86
|
+
send(pi, formatPromptsMessage(domain, agentPrompts));
|
|
87
|
+
} else {
|
|
88
|
+
agentPrompts = {
|
|
89
|
+
generator_prompt: FALLBACK_GENERATOR_PROMPT,
|
|
90
|
+
critique_prompt: FALLBACK_CRITIQUE_PROMPT,
|
|
91
|
+
judge_prompt: FALLBACK_JUDGE_PROMPT,
|
|
92
|
+
};
|
|
93
|
+
domain = "General";
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// ── Step 3: Ralph Loop ───────────────────────────────────────────
|
|
97
|
+
const iterations: LoopIteration[] = [];
|
|
98
|
+
let currentResult = "";
|
|
99
|
+
let currentCriticism = "";
|
|
100
|
+
|
|
101
|
+
for (let i = 1; i <= maxLoops; i++) {
|
|
102
|
+
const iteration = buildIteration(i);
|
|
103
|
+
|
|
104
|
+
// ── Generate ──
|
|
105
|
+
send(pi, Generator.formatBefore(i, maxLoops));
|
|
106
|
+
const genResult = await Generator.execute(ctx, {
|
|
107
|
+
goal,
|
|
108
|
+
systemPrompt: agentPrompts.generator_prompt,
|
|
109
|
+
previousResult: i > 1 ? currentResult : undefined,
|
|
110
|
+
previousCriticism: i > 1 ? currentCriticism : undefined,
|
|
111
|
+
}, signal);
|
|
112
|
+
|
|
113
|
+
if (genResult.error) {
|
|
114
|
+
send(pi, Generator.formatError(i, maxLoops, genResult.error));
|
|
115
|
+
return null;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
currentResult = genResult.text;
|
|
119
|
+
iteration.steps.push({ type: "generate", preview: truncate(currentResult, 100), full: currentResult });
|
|
120
|
+
send(pi, Generator.formatAfter(i, maxLoops, currentResult));
|
|
121
|
+
|
|
122
|
+
// ── Critique ──
|
|
123
|
+
send(pi, Critique.formatBefore(i, maxLoops));
|
|
124
|
+
const critResult = await Critique.execute(ctx, {
|
|
125
|
+
goal,
|
|
126
|
+
systemPrompt: agentPrompts.critique_prompt,
|
|
127
|
+
result: currentResult,
|
|
128
|
+
}, signal);
|
|
129
|
+
|
|
130
|
+
if (critResult.error) {
|
|
131
|
+
send(pi, Critique.formatError(i, maxLoops, critResult.error));
|
|
132
|
+
return null;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
currentCriticism = critResult.text;
|
|
136
|
+
iteration.steps.push({ type: "critique", preview: truncate(currentCriticism, 100), full: currentCriticism });
|
|
137
|
+
send(pi, Critique.formatAfter(i, maxLoops, currentCriticism));
|
|
138
|
+
|
|
139
|
+
// ── Judge ──
|
|
140
|
+
send(pi, Judge.formatBefore(i, maxLoops));
|
|
141
|
+
const judgeResult = await Judge.execute(ctx, {
|
|
142
|
+
goal,
|
|
143
|
+
systemPrompt: agentPrompts.judge_prompt,
|
|
144
|
+
result: currentResult,
|
|
145
|
+
criticism: currentCriticism,
|
|
146
|
+
}, signal);
|
|
147
|
+
|
|
148
|
+
if (judgeResult.error) {
|
|
149
|
+
send(pi, Judge.formatError(i, maxLoops, judgeResult.error));
|
|
150
|
+
return null;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
iteration.steps.push({ type: "judge", verdict: judgeResult.done, reason: judgeResult.reason, raw: judgeResult.raw });
|
|
154
|
+
iteration.achieved = judgeResult.done;
|
|
155
|
+
iteration.finalResult = currentResult;
|
|
156
|
+
iteration.finalCriticism = currentCriticism;
|
|
157
|
+
iteration.finalJudgeReason = judgeResult.reason;
|
|
158
|
+
|
|
159
|
+
iterations.push(iteration);
|
|
160
|
+
|
|
161
|
+
send(pi, Judge.formatAfter(i, maxLoops, judgeResult.done, judgeResult.reason));
|
|
162
|
+
|
|
163
|
+
if (judgeResult.done) break;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// ── Step 4: Final verdict ─────────────────────────────────────────
|
|
167
|
+
const lastIteration = iterations[iterations.length - 1];
|
|
168
|
+
const achieved = lastIteration?.achieved ?? false;
|
|
169
|
+
const verdictReason = lastIteration?.finalJudgeReason || "no judgment rendered";
|
|
170
|
+
const finalLabel = achieved ? "GOAL ACHIEVED ✓" : "GOAL NOT FULLY ACHIEVED ✗";
|
|
171
|
+
|
|
172
|
+
const lines: string[] = [];
|
|
173
|
+
lines.push(`## Result: ${finalLabel}`);
|
|
174
|
+
lines.push(``);
|
|
175
|
+
lines.push(`> ${verdictReason}`);
|
|
176
|
+
lines.push(``);
|
|
177
|
+
lines.push(`---`);
|
|
178
|
+
lines.push(``);
|
|
179
|
+
lines.push(`**Final output:**`);
|
|
180
|
+
lines.push(``);
|
|
181
|
+
lines.push(lastIteration?.finalResult || "(no result)");
|
|
182
|
+
|
|
183
|
+
if (lastIteration?.finalCriticism && lastIteration.finalCriticism.trim()) {
|
|
184
|
+
lines.push(``);
|
|
185
|
+
lines.push(`**Final criticism:**`);
|
|
186
|
+
lines.push(``);
|
|
187
|
+
lines.push(lastIteration.finalCriticism);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
lines.push(``);
|
|
191
|
+
lines.push(`---`);
|
|
192
|
+
lines.push(``);
|
|
193
|
+
lines.push(`*${iterations.length}/${maxLoops} iterations, domain: ${domain}*`);
|
|
194
|
+
|
|
195
|
+
const details: RalphLoopResult = {
|
|
196
|
+
goal,
|
|
197
|
+
domain,
|
|
198
|
+
maxLoops,
|
|
199
|
+
loopCount: iterations.length,
|
|
200
|
+
achieved,
|
|
201
|
+
feasibilityReason,
|
|
202
|
+
feasibilityRaw: feasibility.text,
|
|
203
|
+
agentPrompts,
|
|
204
|
+
result: lastIteration?.finalResult || "",
|
|
205
|
+
criticism: lastIteration?.finalCriticism || "",
|
|
206
|
+
judgeReason: lastIteration?.finalJudgeReason || "",
|
|
207
|
+
iterations,
|
|
208
|
+
};
|
|
209
|
+
|
|
210
|
+
send(pi, lines.join("\n"), details);
|
|
211
|
+
|
|
212
|
+
return { details, achieved };
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// ---------------------------------------------------------------------------
|
|
216
|
+
// Agent Prompt Generation
|
|
217
|
+
// ---------------------------------------------------------------------------
|
|
218
|
+
|
|
219
|
+
async function generateAgentPrompts(
|
|
220
|
+
ctx: ExtensionCommandContext,
|
|
221
|
+
goal: string,
|
|
222
|
+
signal: AbortSignal | undefined,
|
|
223
|
+
): Promise<{ prompts: AgentPrompts; domain: string } | null> {
|
|
224
|
+
const userMsg = `Design system prompts for three agents that will work together to achieve this goal:
|
|
225
|
+
|
|
226
|
+
"${goal}"
|
|
227
|
+
|
|
228
|
+
Each prompt must be specifically tailored to this goal's domain. Output the JSON with keys: domain, generator_prompt, critique_prompt, judge_prompt. The "domain" field should identify the subject area (e.g., "Literary Fiction", "Systems Programming", "Business Strategy").`;
|
|
229
|
+
|
|
230
|
+
const result = await oneshotLLM(ctx, PROMPT_GENERATOR_PROMPT, userMsg, signal);
|
|
231
|
+
|
|
232
|
+
if (result.error) return null;
|
|
233
|
+
|
|
234
|
+
const json = parseJsonResponse<{ domain?: string; generator_prompt?: string; critique_prompt?: string; judge_prompt?: string }>(result.text);
|
|
235
|
+
|
|
236
|
+
if (
|
|
237
|
+
!json ||
|
|
238
|
+
typeof json.generator_prompt !== "string" ||
|
|
239
|
+
typeof json.critique_prompt !== "string" ||
|
|
240
|
+
typeof json.judge_prompt !== "string"
|
|
241
|
+
) return null;
|
|
242
|
+
|
|
243
|
+
return {
|
|
244
|
+
prompts: {
|
|
245
|
+
generator_prompt: json.generator_prompt,
|
|
246
|
+
critique_prompt: json.critique_prompt,
|
|
247
|
+
judge_prompt: json.judge_prompt,
|
|
248
|
+
},
|
|
249
|
+
domain: json.domain || "General",
|
|
250
|
+
};
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// ---------------------------------------------------------------------------
|
|
254
|
+
// Helpers
|
|
255
|
+
// ---------------------------------------------------------------------------
|
|
256
|
+
|
|
257
|
+
function buildIteration(loop: number): LoopIteration {
|
|
258
|
+
return {
|
|
259
|
+
loop,
|
|
260
|
+
steps: [],
|
|
261
|
+
achieved: false,
|
|
262
|
+
finalResult: "",
|
|
263
|
+
finalCriticism: "",
|
|
264
|
+
finalJudgeReason: "",
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
function send(pi: ExtensionAPI, content: string, details?: RalphLoopResult): void {
|
|
269
|
+
pi.sendMessage({
|
|
270
|
+
customType: "ralph-loop",
|
|
271
|
+
content,
|
|
272
|
+
display: true,
|
|
273
|
+
...(details ? { details } : {}),
|
|
274
|
+
});
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
function formatPromptsMessage(domain: string, prompts: AgentPrompts): string {
|
|
278
|
+
return [
|
|
279
|
+
`## Agent Prompts — Domain: ${domain}`,
|
|
280
|
+
``,
|
|
281
|
+
`### Generator`,
|
|
282
|
+
``,
|
|
283
|
+
prompts.generator_prompt,
|
|
284
|
+
``,
|
|
285
|
+
`### Critique`,
|
|
286
|
+
``,
|
|
287
|
+
prompts.critique_prompt,
|
|
288
|
+
``,
|
|
289
|
+
`### Judge`,
|
|
290
|
+
``,
|
|
291
|
+
prompts.judge_prompt,
|
|
292
|
+
].join("\n");
|
|
293
|
+
}
|
package/argParser.ts
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
// ============================================================================
|
|
2
|
+
// Argument Parsing
|
|
3
|
+
// ============================================================================
|
|
4
|
+
|
|
5
|
+
import type { ParsedArgs } from "./types";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Parse command arguments: --goal "..." [--loop N]
|
|
9
|
+
* Returns { goal: string, loop: number }
|
|
10
|
+
* Throws if --goal is missing.
|
|
11
|
+
*/
|
|
12
|
+
export function parseArgs(raw: string): ParsedArgs {
|
|
13
|
+
let loop = 3;
|
|
14
|
+
|
|
15
|
+
const loopMatch = raw.match(/--loop\s+(\d+)/);
|
|
16
|
+
if (loopMatch) {
|
|
17
|
+
loop = parseInt(loopMatch[1], 10);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
let remaining = raw.replace(/--loop\s+\d+/, "").trim();
|
|
21
|
+
|
|
22
|
+
if (remaining.startsWith("--goal")) {
|
|
23
|
+
remaining = remaining.slice(6).trim();
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
let goal = "";
|
|
27
|
+
|
|
28
|
+
if (remaining.startsWith('"')) {
|
|
29
|
+
const endQuote = remaining.indexOf('"', 1);
|
|
30
|
+
if (endQuote !== -1) {
|
|
31
|
+
goal = remaining.slice(1, endQuote);
|
|
32
|
+
} else {
|
|
33
|
+
goal = remaining.slice(1);
|
|
34
|
+
}
|
|
35
|
+
} else if (remaining.startsWith("'")) {
|
|
36
|
+
const endQuote = remaining.indexOf("'", 1);
|
|
37
|
+
if (endQuote !== -1) {
|
|
38
|
+
goal = remaining.slice(1, endQuote);
|
|
39
|
+
} else {
|
|
40
|
+
goal = remaining.slice(1);
|
|
41
|
+
}
|
|
42
|
+
} else {
|
|
43
|
+
goal = remaining;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
if (!goal.trim()) {
|
|
47
|
+
throw new Error('--goal is required. Usage: /ralph-loop-anything --goal "Your goal" [--loop N]');
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return { goal: goal.trim(), loop };
|
|
51
|
+
}
|
package/helpers.ts
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
// ============================================================================
|
|
2
|
+
// Text Helpers
|
|
3
|
+
// ============================================================================
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Truncate text to maxLen characters, appending "…" if truncated
|
|
7
|
+
*/
|
|
8
|
+
export function truncate(text: string, maxLen: number): string {
|
|
9
|
+
if (text.length <= maxLen) return text;
|
|
10
|
+
return text.slice(0, maxLen) + "…";
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Return only the first n lines of text
|
|
15
|
+
*/
|
|
16
|
+
export function firstNLines(text: string, n: number): string {
|
|
17
|
+
const lines = text.split("\n");
|
|
18
|
+
return lines.slice(0, n).join("\n");
|
|
19
|
+
}
|
package/index.ts
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Ralph Loop Extension
|
|
3
|
+
*
|
|
4
|
+
* Iterative goal-achievement loop with Generator → Critique → Judge agents.
|
|
5
|
+
* Each step sends a message to chat as it completes — tail -f style.
|
|
6
|
+
*
|
|
7
|
+
* Architecture:
|
|
8
|
+
* index.ts — thin entry point, registers command & renderer
|
|
9
|
+
* agents/orchestrator — coordinates the full flow
|
|
10
|
+
* agents/generator — produces or improves a result
|
|
11
|
+
* agents/critique — evaluates a result against the goal
|
|
12
|
+
* agents/judge — decides if the goal is achieved
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
16
|
+
|
|
17
|
+
import { parseArgs } from "./argParser";
|
|
18
|
+
import { renderRalphLoopMessage } from "./renderer";
|
|
19
|
+
import * as Orchestrator from "./agents/orchestrator";
|
|
20
|
+
|
|
21
|
+
export default function (pi: ExtensionAPI) {
|
|
22
|
+
|
|
23
|
+
pi.registerCommand("ralph-loop-anything", {
|
|
24
|
+
description: "Run a dynamic Ralph loop (Generator->Critique->Judge) to achieve a goal",
|
|
25
|
+
getArgumentCompletions: (prefix: string) => {
|
|
26
|
+
const items = [
|
|
27
|
+
{ value: '--goal "', label: '--goal "specify your goal"' },
|
|
28
|
+
{ value: "--loop ", label: "--loop N (default 3)" },
|
|
29
|
+
];
|
|
30
|
+
const filtered = items.filter((i) =>
|
|
31
|
+
i.value.startsWith(prefix) || i.label.startsWith(prefix)
|
|
32
|
+
);
|
|
33
|
+
return filtered.length > 0 ? filtered : null;
|
|
34
|
+
},
|
|
35
|
+
handler: async (args, ctx) => {
|
|
36
|
+
let parsed: ReturnType<typeof parseArgs>;
|
|
37
|
+
try {
|
|
38
|
+
parsed = parseArgs(args);
|
|
39
|
+
} catch (err: any) {
|
|
40
|
+
ctx.ui.notify(`Failed: ${err.message}`, "error");
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
if (!ctx.model) {
|
|
45
|
+
ctx.ui.notify("No model selected. Use /model to select one.", "error");
|
|
46
|
+
return;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
ctx.ui.notify(`Ralph Loop: "${parsed.goal}" (max ${parsed.loop} iterations)`, "info");
|
|
50
|
+
|
|
51
|
+
await Orchestrator.execute(pi, ctx, {
|
|
52
|
+
goal: parsed.goal,
|
|
53
|
+
maxLoops: parsed.loop,
|
|
54
|
+
});
|
|
55
|
+
},
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
pi.registerMessageRenderer("ralph-loop", (message, theme) => {
|
|
59
|
+
return renderRalphLoopMessage(message, theme);
|
|
60
|
+
});
|
|
61
|
+
}
|
package/llm.ts
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
// ============================================================================
|
|
2
|
+
// LLM Client
|
|
3
|
+
// ============================================================================
|
|
4
|
+
|
|
5
|
+
import { complete, type UserMessage } from "@earendil-works/pi-ai";
|
|
6
|
+
import type { ExtensionCommandContext } from "@earendil-works/pi-coding-agent";
|
|
7
|
+
import type { OneshotResult } from "./types";
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Make a oneshot LLM call with system prompt + user message.
|
|
11
|
+
*/
|
|
12
|
+
export async function oneshotLLM(
|
|
13
|
+
ctx: ExtensionCommandContext,
|
|
14
|
+
systemPrompt: string,
|
|
15
|
+
userMessage: string,
|
|
16
|
+
signal?: AbortSignal,
|
|
17
|
+
): Promise<OneshotResult> {
|
|
18
|
+
if (!ctx.model) {
|
|
19
|
+
return { text: "", error: "No model selected" };
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const auth = await ctx.modelRegistry.getApiKeyAndHeaders(ctx.model);
|
|
23
|
+
if (!auth.ok || !auth.apiKey) {
|
|
24
|
+
return { text: "", error: auth.ok ? `No API key for ${ctx.model.provider}` : auth.error };
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const messages: UserMessage[] = [
|
|
28
|
+
{
|
|
29
|
+
role: "user",
|
|
30
|
+
content: [{ type: "text", text: userMessage }],
|
|
31
|
+
timestamp: Date.now(),
|
|
32
|
+
},
|
|
33
|
+
];
|
|
34
|
+
|
|
35
|
+
try {
|
|
36
|
+
const response = await complete(
|
|
37
|
+
ctx.model,
|
|
38
|
+
{ systemPrompt, messages },
|
|
39
|
+
{ apiKey: auth.apiKey, headers: auth.headers, signal },
|
|
40
|
+
);
|
|
41
|
+
|
|
42
|
+
if (response.stopReason === "aborted") {
|
|
43
|
+
return { text: "", error: "Aborted" };
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const text = response.content
|
|
47
|
+
.filter((c): c is { type: "text"; text: string } => c.type === "text")
|
|
48
|
+
.map((c) => c.text)
|
|
49
|
+
.join("\n");
|
|
50
|
+
|
|
51
|
+
return { text };
|
|
52
|
+
} catch (err: any) {
|
|
53
|
+
return { text: "", error: err.message || String(err) };
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// ============================================================================
|
|
58
|
+
// JSON Response Parsing
|
|
59
|
+
// ============================================================================
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Parse JSON from LLM response. Handles plain JSON, JSON in code blocks,
|
|
63
|
+
* or JSON extracted via regex from mixed content.
|
|
64
|
+
*/
|
|
65
|
+
export function parseJsonResponse<T = Record<string, unknown>>(text: string): T | null {
|
|
66
|
+
try {
|
|
67
|
+
return JSON.parse(text.trim());
|
|
68
|
+
} catch {}
|
|
69
|
+
|
|
70
|
+
const jsonMatch = text.match(/\{[\s\S]*\}/);
|
|
71
|
+
if (jsonMatch) {
|
|
72
|
+
try {
|
|
73
|
+
return JSON.parse(jsonMatch[0]);
|
|
74
|
+
} catch {}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const fenceMatch = text.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/);
|
|
78
|
+
if (fenceMatch) {
|
|
79
|
+
try {
|
|
80
|
+
return JSON.parse(fenceMatch[1]);
|
|
81
|
+
} catch {}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
return null;
|
|
85
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@entelligentsia/pi-ralph",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Iterative goal-achievement loop for pi — Generator→Critique→Judge, streaming output as it happens",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"pi",
|
|
7
|
+
"pi-coding-agent",
|
|
8
|
+
"llm",
|
|
9
|
+
"agent",
|
|
10
|
+
"iterative",
|
|
11
|
+
"refinement"
|
|
12
|
+
],
|
|
13
|
+
"license": "MIT",
|
|
14
|
+
"author": "Entelligentsia",
|
|
15
|
+
"repository": {
|
|
16
|
+
"type": "git",
|
|
17
|
+
"url": "git+https://github.com/Entelligentsia/pi-ralph.git"
|
|
18
|
+
},
|
|
19
|
+
"homepage": "https://github.com/Entelligentsia/pi-ralph#readme",
|
|
20
|
+
"bugs": {
|
|
21
|
+
"url": "https://github.com/Entelligentsia/pi-ralph/issues"
|
|
22
|
+
},
|
|
23
|
+
"main": "index.ts",
|
|
24
|
+
"files": [
|
|
25
|
+
"index.ts",
|
|
26
|
+
"types.ts",
|
|
27
|
+
"renderer.ts",
|
|
28
|
+
"prompts.ts",
|
|
29
|
+
"helpers.ts",
|
|
30
|
+
"llm.ts",
|
|
31
|
+
"argParser.ts",
|
|
32
|
+
"agents/"
|
|
33
|
+
],
|
|
34
|
+
"pi": {
|
|
35
|
+
"extensions": [
|
|
36
|
+
"./index.ts"
|
|
37
|
+
]
|
|
38
|
+
},
|
|
39
|
+
"peerDependencies": {
|
|
40
|
+
"@earendil-works/pi-coding-agent": ">=0.1.0"
|
|
41
|
+
},
|
|
42
|
+
"publishConfig": {
|
|
43
|
+
"access": "public"
|
|
44
|
+
}
|
|
45
|
+
}
|
package/prompts.ts
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
// ============================================================================
|
|
2
|
+
// Static System Prompts
|
|
3
|
+
// ============================================================================
|
|
4
|
+
|
|
5
|
+
export const FEASIBILITY_PROMPT = `You are a goal feasibility evaluator. Your job is to determine whether a given goal can be meaningfully pursued and achieved using an LLM.
|
|
6
|
+
|
|
7
|
+
Consider:
|
|
8
|
+
- Is the goal well-defined enough for an LLM to work on?
|
|
9
|
+
- Can an LLM make meaningful progress on this goal (e.g., through text, code, analysis, planning)?
|
|
10
|
+
- Is the goal something that requires physical action that an LLM cannot do (e.g., "cook an omelette")?
|
|
11
|
+
- Could the goal be reinterpreted in a way that an LLM can contribute meaningfully?
|
|
12
|
+
|
|
13
|
+
For example:
|
|
14
|
+
- "Write a poem" → achievable (text generation)
|
|
15
|
+
- "Sort a list" → achievable (code generation)
|
|
16
|
+
- "Make me an omelette" → NOT achievable (requires physical action)
|
|
17
|
+
- "Design a car" → achievable if interpreted as "create a detailed car design document"
|
|
18
|
+
|
|
19
|
+
You MUST respond with ONLY a JSON object (no markdown fences, no extra text):
|
|
20
|
+
{"achievable": true, "reason": "brief explanation"}
|
|
21
|
+
or
|
|
22
|
+
{"achievable": false, "reason": "brief explanation"}`;
|
|
23
|
+
|
|
24
|
+
export const PROMPT_GENERATOR_PROMPT = `You are a prompt engineer. Given a goal, you design the system prompts for three specialized agents that will work together in an iterative loop to achieve that goal. Each prompt must be tailored to the goal's domain so the agents are aligned from the start.
|
|
25
|
+
|
|
26
|
+
The three agents are:
|
|
27
|
+
1. **Generator** — Produces or improves a result for the goal. On iteration >1, it receives the previous result and criticism, so its prompt should instruct it to address feedback.
|
|
28
|
+
2. **Critique** — Evaluates the Generator's output against the goal. Identifies problems, gaps, and suggests concrete improvements.
|
|
29
|
+
3. **Judge** — Determines if the result adequately achieves the goal, given the criticism. Outputs JSON: {"done": true/false, "reason": "..."}
|
|
30
|
+
|
|
31
|
+
Each system prompt should:
|
|
32
|
+
- Establish the agent's role and expertise relevant to the goal's domain
|
|
33
|
+
- Include domain-specific evaluation criteria (e.g., for code: correctness, efficiency, edge cases; for writing: style, coherence, completeness)
|
|
34
|
+
- Be concise but specific — generic instructions are worse than goal-tailored ones
|
|
35
|
+
- For the Judge, require JSON output format: {"done": true/false, "reason": "brief explanation"}
|
|
36
|
+
- For the Judge, use "done": true only if the result adequately achieves the goal or has reached diminishing returns. Use "done": false if significant improvements are still needed.
|
|
37
|
+
|
|
38
|
+
You MUST respond with ONLY a JSON object (no markdown fences, no extra text) with exactly these keys:
|
|
39
|
+
{
|
|
40
|
+
"domain": "the identified domain (e.g., 'Literary Fiction', 'Systems Programming', 'Business Strategy')",
|
|
41
|
+
"generator_prompt": "...",
|
|
42
|
+
"critique_prompt": "...",
|
|
43
|
+
"judge_prompt": "..."
|
|
44
|
+
}`;
|
|
45
|
+
|
|
46
|
+
// ============================================================================
|
|
47
|
+
// Fallback Prompts (used if dynamic generation fails)
|
|
48
|
+
// ============================================================================
|
|
49
|
+
|
|
50
|
+
export const FALLBACK_GENERATOR_PROMPT = `You are a Generator agent. Your job is to produce the best possible result for the given goal.
|
|
51
|
+
|
|
52
|
+
Instructions:
|
|
53
|
+
- Be thorough, creative, and accurate
|
|
54
|
+
- If this is a follow-up iteration, carefully address all criticisms from the previous round
|
|
55
|
+
- Produce a complete, polished result
|
|
56
|
+
- Focus on quality and completeness`;
|
|
57
|
+
|
|
58
|
+
export const FALLBACK_CRITIQUE_PROMPT = `You are a Critique agent. Your job is to critically evaluate a result against the original goal.
|
|
59
|
+
|
|
60
|
+
Instructions:
|
|
61
|
+
- Identify what's missing, incorrect, or incomplete
|
|
62
|
+
- Point out any errors or inaccuracies
|
|
63
|
+
- Suggest specific, actionable improvements
|
|
64
|
+
- Be thorough but constructive
|
|
65
|
+
- Rate how well the result achieves the goal on a scale of 1-10
|
|
66
|
+
- Focus on the most important issues first`;
|
|
67
|
+
|
|
68
|
+
export const FALLBACK_JUDGE_PROMPT = `You are a Judge agent. Your job is to determine whether a result, given its criticism, adequately achieves the original goal.
|
|
69
|
+
|
|
70
|
+
Instructions:
|
|
71
|
+
- Consider: Does the result address the goal?
|
|
72
|
+
- Consider: Are the criticisms minor or fundamental?
|
|
73
|
+
- Consider: Is the result good enough that further iterations are unlikely to produce significantly better output?
|
|
74
|
+
- Be strict but fair
|
|
75
|
+
|
|
76
|
+
You MUST respond with ONLY a JSON object (no markdown fences, no extra text):
|
|
77
|
+
{"done": true, "reason": "brief explanation"}
|
|
78
|
+
or
|
|
79
|
+
{"done": false, "reason": "brief explanation"}
|
|
80
|
+
|
|
81
|
+
Use "done": true ONLY if the result adequately achieves the goal or has reached the point of diminishing returns.
|
|
82
|
+
Use "done": false if significant improvements are still needed.`;
|
package/renderer.ts
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
// ============================================================================
|
|
2
|
+
// Message Renderer
|
|
3
|
+
//
|
|
4
|
+
// Each message is self-contained markdown — the renderer just passes it
|
|
5
|
+
// through as a Markdown component. No layout, no boxes, no truncation.
|
|
6
|
+
// ============================================================================
|
|
7
|
+
|
|
8
|
+
import { Markdown } from "@earendil-works/pi-tui";
|
|
9
|
+
import { getMarkdownTheme } from "@earendil-works/pi-coding-agent";
|
|
10
|
+
import type { Theme } from "@earendil-works/pi-tui";
|
|
11
|
+
import type { Message } from "@earendil-works/pi-coding-agent";
|
|
12
|
+
|
|
13
|
+
export function renderRalphLoopMessage(
|
|
14
|
+
message: Message,
|
|
15
|
+
_theme: Theme,
|
|
16
|
+
): Markdown {
|
|
17
|
+
const content = typeof message.content === "string" ? message.content : "";
|
|
18
|
+
return new Markdown(content, 0, 0, getMarkdownTheme());
|
|
19
|
+
}
|
package/types.ts
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
// ============================================================================
|
|
2
|
+
// Types
|
|
3
|
+
// ============================================================================
|
|
4
|
+
|
|
5
|
+
export interface ParsedArgs {
|
|
6
|
+
goal: string;
|
|
7
|
+
loop: number;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export interface OneshotResult {
|
|
11
|
+
text: string;
|
|
12
|
+
error?: string;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export interface AgentPrompts {
|
|
16
|
+
generator_prompt: string;
|
|
17
|
+
critique_prompt: string;
|
|
18
|
+
judge_prompt: string;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* A single step within a loop iteration.
|
|
23
|
+
* Each iteration has 4 steps: Generate → Critique → Judge → (implied: Revise)
|
|
24
|
+
*/
|
|
25
|
+
export type LoopStep =
|
|
26
|
+
| { type: "generate"; preview: string; full: string }
|
|
27
|
+
| { type: "critique"; preview: string; full: string }
|
|
28
|
+
| { type: "judge"; verdict: boolean; reason: string; raw: string }
|
|
29
|
+
| { type: "revision"; reason: string }; // Why we're revising (based on judge verdict)
|
|
30
|
+
|
|
31
|
+
export interface LoopIteration {
|
|
32
|
+
loop: number;
|
|
33
|
+
steps: LoopStep[];
|
|
34
|
+
achieved: boolean; // did judge say done: true
|
|
35
|
+
finalResult: string;
|
|
36
|
+
finalCriticism: string;
|
|
37
|
+
finalJudgeReason: string;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export interface RalphLoopResult {
|
|
41
|
+
goal: string;
|
|
42
|
+
domain: string; // identified domain for the goal
|
|
43
|
+
maxLoops: number;
|
|
44
|
+
loopCount: number;
|
|
45
|
+
achieved: boolean;
|
|
46
|
+
feasibilityReason: string;
|
|
47
|
+
feasibilityRaw: string;
|
|
48
|
+
agentPrompts: AgentPrompts;
|
|
49
|
+
result: string;
|
|
50
|
+
criticism: string;
|
|
51
|
+
judgeReason: string;
|
|
52
|
+
iterations: LoopIteration[];
|
|
53
|
+
}
|