groundswell 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +9 -0
- package/.claude/system_prompts/task-breakdown.md +100 -0
- package/PRPs/001-hierarchical-workflow-engine.md +2438 -0
- package/PRPs/PRDs/001-hierarchical-workflow-engine.md +543 -0
- package/PRPs/PRDs/002-agent-prompt.md +390 -0
- package/PRPs/PRDs/003-agent-prompt.md +943 -0
- package/PRPs/PRDs/004-agent-prompt.md +1136 -0
- package/PRPs/PRDs/tasks-001.json +492 -0
- package/PRPs/README.md +83 -0
- package/PRPs/templates/prp_base.md +222 -0
- package/README.md +218 -0
- package/docs/agent.md +422 -0
- package/docs/prompt.md +419 -0
- package/docs/workflow.md +600 -0
- package/examples/README.md +244 -0
- package/examples/examples/01-basic-workflow.ts +100 -0
- package/examples/examples/02-decorator-options.ts +217 -0
- package/examples/examples/03-parent-child.ts +241 -0
- package/examples/examples/04-observers-debugger.ts +340 -0
- package/examples/examples/05-error-handling.ts +387 -0
- package/examples/examples/06-concurrent-tasks.ts +352 -0
- package/examples/examples/07-agent-loops.ts +432 -0
- package/examples/examples/08-sdk-features.ts +667 -0
- package/examples/examples/09-reflection.ts +573 -0
- package/examples/examples/10-introspection.ts +550 -0
- package/examples/index.ts +143 -0
- package/examples/utils/helpers.ts +57 -0
- package/llms_full.txt +5890 -0
- package/package.json +63 -0
- package/plan/P1P2/PRP.md +527 -0
- package/plan/P1P2/research/LRU_CACHE_BEST_PRACTICES.md +1929 -0
- package/plan/P1P2/research/LRU_CACHE_CODE_PATTERNS.md +857 -0
- package/plan/P1P2/research/LRU_CACHE_INTEGRATION_GUIDE.md +738 -0
- package/plan/P1P2/research/LRU_CACHE_RESEARCH_INDEX.md +424 -0
- package/plan/P1P2/research/REFLECTION_INDEX.md +291 -0
- package/plan/P1P2/research/REFLECTION_RESEARCH_REPORT.md +1342 -0
- package/plan/P1P2/research/RESEARCH_SUMMARY.md +342 -0
- package/plan/P1P2/research/anthropic-sdk.md +174 -0
- package/plan/P1P2/research/async-local-storage.md +200 -0
- package/plan/P1P2/research/reflection-code-patterns.md +1205 -0
- package/plan/P1P2/research/reflection-decision-matrix.md +421 -0
- package/plan/P1P2/research/reflection-implementation-guide.md +1341 -0
- package/plan/P1P2/research/reflection-integration-guide.md +834 -0
- package/plan/P1P2/research/reflection-patterns.md +1468 -0
- package/plan/P1P2/research/reflection-quick-reference.md +558 -0
- package/plan/P1P2/research/zod-schema.md +152 -0
- package/plan/P3P4/PRP.md +1388 -0
- package/plan/P3P4/research/caching-lru.md +116 -0
- package/plan/P3P4/research/introspection-tools.md +177 -0
- package/plan/P3P4/research/reflection-patterns.md +117 -0
- package/plan/P4P5/PRP.md +1136 -0
- package/plan/P4P5/research/RESEARCH_SUMMARY.md +151 -0
- package/plan/architecture/external_deps.md +358 -0
- package/plan/architecture/system_context.md +242 -0
- package/plan/backlog.json +867 -0
- package/plan/research/INTROSPECTION_RESEARCH_SUMMARY.md +378 -0
- package/plan/research/README-INTROSPECTION.md +352 -0
- package/plan/research/agent-introspection-patterns.md +1085 -0
- package/plan/research/introspection-security-guide.md +928 -0
- package/plan/research/introspection-tool-examples.md +875 -0
- package/scripts/generate-llms-full.ts +206 -0
- package/src/__tests__/integration/agent-workflow.test.ts +256 -0
- package/src/__tests__/integration/tree-mirroring.test.ts +114 -0
- package/src/__tests__/unit/agent.test.ts +169 -0
- package/src/__tests__/unit/cache-key.test.ts +182 -0
- package/src/__tests__/unit/cache.test.ts +172 -0
- package/src/__tests__/unit/context.test.ts +138 -0
- package/src/__tests__/unit/decorators.test.ts +100 -0
- package/src/__tests__/unit/introspection-tools.test.ts +277 -0
- package/src/__tests__/unit/prompt.test.ts +135 -0
- package/src/__tests__/unit/reflection.test.ts +210 -0
- package/src/__tests__/unit/tree-debugger.test.ts +85 -0
- package/src/__tests__/unit/workflow.test.ts +81 -0
- package/src/cache/cache-key.ts +244 -0
- package/src/cache/cache.ts +236 -0
- package/src/cache/index.ts +8 -0
- package/src/core/agent.ts +573 -0
- package/src/core/context.ts +119 -0
- package/src/core/event-tree.ts +260 -0
- package/src/core/factory.ts +123 -0
- package/src/core/index.ts +17 -0
- package/src/core/logger.ts +87 -0
- package/src/core/mcp-handler.ts +184 -0
- package/src/core/prompt.ts +150 -0
- package/src/core/workflow-context.ts +349 -0
- package/src/core/workflow.ts +302 -0
- package/src/debugger/index.ts +1 -0
- package/src/debugger/tree-debugger.ts +210 -0
- package/src/decorators/index.ts +3 -0
- package/src/decorators/observed-state.ts +95 -0
- package/src/decorators/step.ts +139 -0
- package/src/decorators/task.ts +96 -0
- package/src/examples/index.ts +2 -0
- package/src/examples/tdd-orchestrator.ts +65 -0
- package/src/examples/test-cycle-workflow.ts +64 -0
- package/src/index.ts +140 -0
- package/src/reflection/index.ts +5 -0
- package/src/reflection/reflection.ts +407 -0
- package/src/tools/index.ts +36 -0
- package/src/tools/introspection.ts +464 -0
- package/src/types/agent.ts +90 -0
- package/src/types/decorators.ts +25 -0
- package/src/types/error-strategy.ts +13 -0
- package/src/types/error.ts +20 -0
- package/src/types/events.ts +74 -0
- package/src/types/index.ts +55 -0
- package/src/types/logging.ts +24 -0
- package/src/types/observer.ts +18 -0
- package/src/types/prompt.ts +40 -0
- package/src/types/reflection.ts +117 -0
- package/src/types/sdk-primitives.ts +128 -0
- package/src/types/snapshot.ts +14 -0
- package/src/types/workflow-context.ts +163 -0
- package/src/types/workflow.ts +37 -0
- package/src/utils/id.ts +11 -0
- package/src/utils/index.ts +3 -0
- package/src/utils/observable.ts +77 -0
- package/tasks.json +0 -0
- package/tsconfig.json +22 -0
- package/vitest.config.ts +16 -0
|
@@ -0,0 +1,1205 @@
|
|
|
1
|
+
# AI Agent Reflection - Practical Code Patterns
|
|
2
|
+
|
|
3
|
+
This document provides copy-paste-ready patterns for implementing reflection in TypeScript/JavaScript agents.
|
|
4
|
+
|
|
5
|
+
## Pattern 1: Simple Reflection Loop (Minimal)
|
|
6
|
+
|
|
7
|
+
**Use Case**: Basic validation + single reflection pass
|
|
8
|
+
**Lines of Code**: ~30
|
|
9
|
+
**Tokens per attempt**: ~200-400
|
|
10
|
+
|
|
11
|
+
```typescript
|
|
12
|
+
async function simpleReflection(task: string): Promise<string> {
|
|
13
|
+
// Generate
|
|
14
|
+
const response = await client.messages.create({
|
|
15
|
+
model: "claude-opus-4.5",
|
|
16
|
+
max_tokens: 1024,
|
|
17
|
+
messages: [{ role: "user", content: task }],
|
|
18
|
+
});
|
|
19
|
+
const output = response.content[0].text || "";
|
|
20
|
+
|
|
21
|
+
// Quick validation
|
|
22
|
+
if (output.length < 50) {
|
|
23
|
+
// Reflect and retry
|
|
24
|
+
const improved = await client.messages.create({
|
|
25
|
+
model: "claude-opus-4.5",
|
|
26
|
+
max_tokens: 1024,
|
|
27
|
+
messages: [
|
|
28
|
+
{
|
|
29
|
+
role: "user",
|
|
30
|
+
content: `${task}\n\nYour previous response was too short. Provide a more detailed, comprehensive answer.`,
|
|
31
|
+
},
|
|
32
|
+
],
|
|
33
|
+
});
|
|
34
|
+
return improved.content[0].text || output;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
return output;
|
|
38
|
+
}
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## Pattern 2: Multi-Attempt Loop with History
|
|
44
|
+
|
|
45
|
+
**Use Case**: Complex tasks needing intelligent retry
|
|
46
|
+
**Lines of Code**: ~50
|
|
47
|
+
**Tokens per attempt**: ~300-600
|
|
48
|
+
|
|
49
|
+
```typescript
|
|
50
|
+
interface AttemptRecord {
|
|
51
|
+
number: number;
|
|
52
|
+
output: string;
|
|
53
|
+
error: string | null;
|
|
54
|
+
timestamp: Date;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
async function multiAttemptReflection(
|
|
58
|
+
task: string,
|
|
59
|
+
maxAttempts: number = 3
|
|
60
|
+
): Promise<string> {
|
|
61
|
+
const attempts: AttemptRecord[] = [];
|
|
62
|
+
|
|
63
|
+
for (let i = 1; i <= maxAttempts; i++) {
|
|
64
|
+
const historyContext =
|
|
65
|
+
attempts.length > 0
|
|
66
|
+
? `\n\nPrevious attempts:\n${attempts
|
|
67
|
+
.map((a) => `Attempt ${a.number}: ${a.error || "completed"}`)
|
|
68
|
+
.join("\n")}\n`
|
|
69
|
+
: "";
|
|
70
|
+
|
|
71
|
+
const response = await client.messages.create({
|
|
72
|
+
model: "claude-opus-4.5",
|
|
73
|
+
max_tokens: 1024,
|
|
74
|
+
messages: [
|
|
75
|
+
{
|
|
76
|
+
role: "user",
|
|
77
|
+
content: `${task}${historyContext}`,
|
|
78
|
+
},
|
|
79
|
+
],
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
const output = response.content[0].text || "";
|
|
83
|
+
const validation = validateOutput(output);
|
|
84
|
+
|
|
85
|
+
if (validation.isValid) {
|
|
86
|
+
return output;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
attempts.push({
|
|
90
|
+
number: i,
|
|
91
|
+
output,
|
|
92
|
+
error: validation.errors[0],
|
|
93
|
+
timestamp: new Date(),
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
// Request reflection if not on last attempt
|
|
97
|
+
if (i < maxAttempts) {
|
|
98
|
+
const reflection = await client.messages.create({
|
|
99
|
+
model: "claude-opus-4.5",
|
|
100
|
+
max_tokens: 300,
|
|
101
|
+
messages: [
|
|
102
|
+
{
|
|
103
|
+
role: "user",
|
|
104
|
+
content: `Task: ${task}\n\nYour attempt failed: ${validation.errors[0]}\n\nWhat should you try differently?`,
|
|
105
|
+
},
|
|
106
|
+
],
|
|
107
|
+
});
|
|
108
|
+
console.log("Reflection:", reflection.content[0].text);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
throw new Error(
|
|
113
|
+
`Failed after ${maxAttempts} attempts. Last error: ${
|
|
114
|
+
attempts[attempts.length - 1].error
|
|
115
|
+
}`
|
|
116
|
+
);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function validateOutput(output: string): {
|
|
120
|
+
isValid: boolean;
|
|
121
|
+
errors: string[];
|
|
122
|
+
} {
|
|
123
|
+
const errors: string[] = [];
|
|
124
|
+
if (output.length < 20) errors.push("Output too short");
|
|
125
|
+
if (!output.includes(" ")) errors.push("No proper sentences");
|
|
126
|
+
return { isValid: errors.length === 0, errors };
|
|
127
|
+
}
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## Pattern 3: Error-Triggered Reflection
|
|
133
|
+
|
|
134
|
+
**Use Case**: Reflection only on detected failures
|
|
135
|
+
**Lines of Code**: ~60
|
|
136
|
+
**Tokens per attempt**: ~400-800
|
|
137
|
+
|
|
138
|
+
```typescript
|
|
139
|
+
interface ReflectionContext {
|
|
140
|
+
originalInput: string;
|
|
141
|
+
failedOutput: string;
|
|
142
|
+
errorType: "validation" | "execution" | "logic";
|
|
143
|
+
errorMessage: string;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
async function errorTriggeredReflection(
|
|
147
|
+
task: string,
|
|
148
|
+
executeTask: (input: string) => Promise<{ success: boolean; output: string; error?: string }>
|
|
149
|
+
): Promise<string> {
|
|
150
|
+
let attempts = 0;
|
|
151
|
+
const maxAttempts = 3;
|
|
152
|
+
|
|
153
|
+
while (attempts < maxAttempts) {
|
|
154
|
+
attempts++;
|
|
155
|
+
|
|
156
|
+
// Generate solution
|
|
157
|
+
const response = await client.messages.create({
|
|
158
|
+
model: "claude-opus-4.5",
|
|
159
|
+
max_tokens: 1024,
|
|
160
|
+
messages: [{ role: "user", content: task }],
|
|
161
|
+
});
|
|
162
|
+
const solution = response.content[0].text || "";
|
|
163
|
+
|
|
164
|
+
// Try to execute
|
|
165
|
+
const execution = await executeTask(solution);
|
|
166
|
+
|
|
167
|
+
if (execution.success) {
|
|
168
|
+
return solution;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// Build reflection context
|
|
172
|
+
const context: ReflectionContext = {
|
|
173
|
+
originalInput: task,
|
|
174
|
+
failedOutput: solution,
|
|
175
|
+
errorType: detectErrorType(execution.error),
|
|
176
|
+
errorMessage: execution.error || "Unknown error",
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
// Reflect on error
|
|
180
|
+
if (attempts < maxAttempts) {
|
|
181
|
+
const reflectionPrompt = buildReflectionPrompt(context);
|
|
182
|
+
const reflectionResponse = await client.messages.create({
|
|
183
|
+
model: "claude-opus-4.5",
|
|
184
|
+
max_tokens: 400,
|
|
185
|
+
messages: [{ role: "user", content: reflectionPrompt }],
|
|
186
|
+
});
|
|
187
|
+
const reflection = reflectionResponse.content[0].text || "";
|
|
188
|
+
console.log(`Attempt ${attempts} - Reflection:\n${reflection}\n`);
|
|
189
|
+
|
|
190
|
+
// Update task with reflection
|
|
191
|
+
task = `${context.originalInput}\n\nPrevious approach didn't work because: ${context.errorMessage}\n\nReflection: ${reflection}\n\nTry again with a different strategy.`;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
throw new Error(
|
|
196
|
+
`Failed after ${maxAttempts} attempts on task: ${task.substring(0, 100)}`
|
|
197
|
+
);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
function detectErrorType(
|
|
201
|
+
error: string | undefined
|
|
202
|
+
): "validation" | "execution" | "logic" {
|
|
203
|
+
if (!error) return "logic";
|
|
204
|
+
if (error.includes("TypeError") || error.includes("SyntaxError"))
|
|
205
|
+
return "execution";
|
|
206
|
+
if (error.includes("assert") || error.includes("expect")) return "validation";
|
|
207
|
+
return "logic";
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
function buildReflectionPrompt(context: ReflectionContext): string {
|
|
211
|
+
return `Your solution failed with this error:
|
|
212
|
+
|
|
213
|
+
ERROR TYPE: ${context.errorType}
|
|
214
|
+
ERROR MESSAGE: ${context.errorMessage}
|
|
215
|
+
|
|
216
|
+
Your attempted solution:
|
|
217
|
+
${context.failedOutput}
|
|
218
|
+
|
|
219
|
+
Analyze:
|
|
220
|
+
1. Why did this approach fail?
|
|
221
|
+
2. What's a fundamentally different approach?
|
|
222
|
+
3. What assumption was wrong?
|
|
223
|
+
|
|
224
|
+
Provide a new strategy to solve: ${context.originalInput}`;
|
|
225
|
+
}
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
---
|
|
229
|
+
|
|
230
|
+
## Pattern 4: Instruction-Following Validation
|
|
231
|
+
|
|
232
|
+
**Use Case**: Ensuring outputs meet explicit criteria
|
|
233
|
+
**Lines of Code**: ~80
|
|
234
|
+
**Tokens per attempt**: ~400-700
|
|
235
|
+
|
|
236
|
+
```typescript
|
|
237
|
+
interface ValidationRule {
|
|
238
|
+
name: string;
|
|
239
|
+
description: string;
|
|
240
|
+
validate: (output: string) => boolean;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
interface ValidatedOutput {
|
|
244
|
+
output: string;
|
|
245
|
+
passedRules: string[];
|
|
246
|
+
failedRules: string[];
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
async function validateWithReflection(
|
|
250
|
+
task: string,
|
|
251
|
+
rules: ValidationRule[],
|
|
252
|
+
maxAttempts: number = 3
|
|
253
|
+
): Promise<string> {
|
|
254
|
+
let output = "";
|
|
255
|
+
|
|
256
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
257
|
+
// Generate
|
|
258
|
+
const response = await client.messages.create({
|
|
259
|
+
model: "claude-opus-4.5",
|
|
260
|
+
max_tokens: 1024,
|
|
261
|
+
messages: [
|
|
262
|
+
{
|
|
263
|
+
role: "user",
|
|
264
|
+
content: buildGenerationPrompt(
|
|
265
|
+
task,
|
|
266
|
+
rules,
|
|
267
|
+
attempt > 1
|
|
268
|
+
),
|
|
269
|
+
},
|
|
270
|
+
],
|
|
271
|
+
});
|
|
272
|
+
output = response.content[0].text || "";
|
|
273
|
+
|
|
274
|
+
// Validate
|
|
275
|
+
const validation = validateOutput(output, rules);
|
|
276
|
+
console.log(`Attempt ${attempt}: ${validation.passedRules.length}/${rules.length} rules passed`);
|
|
277
|
+
|
|
278
|
+
if (validation.failedRules.length === 0) {
|
|
279
|
+
return output;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// Reflect if not last attempt
|
|
283
|
+
if (attempt < maxAttempts) {
|
|
284
|
+
const reflection = await client.messages.create({
|
|
285
|
+
model: "claude-opus-4.5",
|
|
286
|
+
max_tokens: 400,
|
|
287
|
+
messages: [
|
|
288
|
+
{
|
|
289
|
+
role: "user",
|
|
290
|
+
content: buildReflectionPromptForValidation(
|
|
291
|
+
task,
|
|
292
|
+
output,
|
|
293
|
+
validation.failedRules,
|
|
294
|
+
rules
|
|
295
|
+
),
|
|
296
|
+
},
|
|
297
|
+
],
|
|
298
|
+
});
|
|
299
|
+
console.log("Reflection:", reflection.content[0].text);
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
throw new Error(
|
|
304
|
+
`Validation failed after ${maxAttempts} attempts. ` +
|
|
305
|
+
`Failed rules: ${failedRules.join(", ")}`
|
|
306
|
+
);
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
function buildGenerationPrompt(
|
|
310
|
+
task: string,
|
|
311
|
+
rules: ValidationRule[],
|
|
312
|
+
includeReminder: boolean = false
|
|
313
|
+
): string {
|
|
314
|
+
const rulesText = rules
|
|
315
|
+
.map((r) => `- ${r.name}: ${r.description}`)
|
|
316
|
+
.join("\n");
|
|
317
|
+
|
|
318
|
+
const reminderText = includeReminder
|
|
319
|
+
? "\n\nREMINDER: Your previous response violated some of these rules. This time, ensure ALL rules are satisfied."
|
|
320
|
+
: "";
|
|
321
|
+
|
|
322
|
+
return `Task: ${task}
|
|
323
|
+
|
|
324
|
+
Your response MUST satisfy these rules:
|
|
325
|
+
${rulesText}${reminderText}`;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
function validateOutput(
|
|
329
|
+
output: string,
|
|
330
|
+
rules: ValidationRule[]
|
|
331
|
+
): ValidatedOutput {
|
|
332
|
+
const passedRules: string[] = [];
|
|
333
|
+
const failedRules: string[] = [];
|
|
334
|
+
|
|
335
|
+
for (const rule of rules) {
|
|
336
|
+
if (rule.validate(output)) {
|
|
337
|
+
passedRules.push(rule.name);
|
|
338
|
+
} else {
|
|
339
|
+
failedRules.push(rule.name);
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
return { output, passedRules, failedRules };
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
function buildReflectionPromptForValidation(
|
|
347
|
+
task: string,
|
|
348
|
+
output: string,
|
|
349
|
+
failedRules: string[],
|
|
350
|
+
allRules: ValidationRule[]
|
|
351
|
+
): string {
|
|
352
|
+
const failedRulesDetails = allRules
|
|
353
|
+
.filter((r) => failedRules.includes(r.name))
|
|
354
|
+
.map((r) => `- ${r.name}: ${r.description}`)
|
|
355
|
+
.join("\n");
|
|
356
|
+
|
|
357
|
+
return `Task: ${task}
|
|
358
|
+
|
|
359
|
+
Your response violated these rules:
|
|
360
|
+
${failedRulesDetails}
|
|
361
|
+
|
|
362
|
+
Your response was:
|
|
363
|
+
${output}
|
|
364
|
+
|
|
365
|
+
What specific changes would make your response satisfy all rules?`;
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
// Example usage
|
|
369
|
+
const rules: ValidationRule[] = [
|
|
370
|
+
{
|
|
371
|
+
name: "proper_grammar",
|
|
372
|
+
description: "Response uses proper grammar and punctuation",
|
|
373
|
+
validate: (output) => output.includes(".") || output.includes("?"),
|
|
374
|
+
},
|
|
375
|
+
{
|
|
376
|
+
name: "min_length",
|
|
377
|
+
description: "Response is at least 100 characters",
|
|
378
|
+
validate: (output) => output.length >= 100,
|
|
379
|
+
},
|
|
380
|
+
{
|
|
381
|
+
name: "structured_format",
|
|
382
|
+
description: "Response uses clear formatting with sections",
|
|
383
|
+
validate: (output) => output.includes("\n") && output.split("\n").length >= 3,
|
|
384
|
+
},
|
|
385
|
+
];
|
|
386
|
+
|
|
387
|
+
// const result = await validateWithReflection(
|
|
388
|
+
// "Write about the benefits of reflection in AI",
|
|
389
|
+
// rules,
|
|
390
|
+
// 3
|
|
391
|
+
// );
|
|
392
|
+
```
|
|
393
|
+
|
|
394
|
+
---
|
|
395
|
+
|
|
396
|
+
## Pattern 5: Confidence-Based Reflection
|
|
397
|
+
|
|
398
|
+
**Use Case**: Reflect only when model is uncertain
|
|
399
|
+
**Lines of Code**: ~70
|
|
400
|
+
**Tokens per attempt**: ~500-1000
|
|
401
|
+
|
|
402
|
+
```typescript
|
|
403
|
+
interface ConfidenceAssessment {
|
|
404
|
+
output: string;
|
|
405
|
+
confidence: number; // 0-1
|
|
406
|
+
uncertaintyAreas: string[];
|
|
407
|
+
suggestedAlternatives: string[];
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
async function confidenceBasedReflection(
|
|
411
|
+
task: string,
|
|
412
|
+
confidenceThreshold: number = 0.75
|
|
413
|
+
): Promise<string> {
|
|
414
|
+
// First pass: generate and assess confidence
|
|
415
|
+
const assessment = await generateWithConfidence(task);
|
|
416
|
+
|
|
417
|
+
console.log(`Initial confidence: ${(assessment.confidence * 100).toFixed(0)}%`);
|
|
418
|
+
console.log(`Uncertainty areas: ${assessment.uncertaintyAreas.join(", ")}`);
|
|
419
|
+
|
|
420
|
+
// If confident enough, return immediately
|
|
421
|
+
if (assessment.confidence >= confidenceThreshold) {
|
|
422
|
+
console.log("Output meets confidence threshold, returning without reflection");
|
|
423
|
+
return assessment.output;
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
// If not confident, reflect and improve
|
|
427
|
+
console.log("Confidence below threshold, triggering reflection...");
|
|
428
|
+
const improved = await reflectOnLowConfidence(
|
|
429
|
+
task,
|
|
430
|
+
assessment
|
|
431
|
+
);
|
|
432
|
+
|
|
433
|
+
return improved;
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
async function generateWithConfidence(
|
|
437
|
+
task: string
|
|
438
|
+
): Promise<ConfidenceAssessment> {
|
|
439
|
+
const response = await client.messages.create({
|
|
440
|
+
model: "claude-opus-4.5",
|
|
441
|
+
max_tokens: 1200,
|
|
442
|
+
messages: [
|
|
443
|
+
{
|
|
444
|
+
role: "user",
|
|
445
|
+
content: `${task}
|
|
446
|
+
|
|
447
|
+
After providing your response, assess your own confidence by providing a JSON block:
|
|
448
|
+
{
|
|
449
|
+
"confidence": <0.0 to 1.0>,
|
|
450
|
+
"uncertaintyAreas": ["area1", "area2"],
|
|
451
|
+
"suggestedAlternatives": ["alternative1", "alternative2"]
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
Only provide JSON for the assessment, nothing else after it.`,
|
|
455
|
+
},
|
|
456
|
+
],
|
|
457
|
+
});
|
|
458
|
+
|
|
459
|
+
const text = response.content[0].text || "";
|
|
460
|
+
const jsonMatch = text.match(/\{[\s\S]*\}$/);
|
|
461
|
+
|
|
462
|
+
if (!jsonMatch) {
|
|
463
|
+
return {
|
|
464
|
+
output: text,
|
|
465
|
+
confidence: 0.5,
|
|
466
|
+
uncertaintyAreas: [],
|
|
467
|
+
suggestedAlternatives: [],
|
|
468
|
+
};
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
const assessment = JSON.parse(jsonMatch[0]);
|
|
472
|
+
return {
|
|
473
|
+
output: text.substring(0, text.lastIndexOf("{")),
|
|
474
|
+
confidence: assessment.confidence,
|
|
475
|
+
uncertaintyAreas: assessment.uncertaintyAreas || [],
|
|
476
|
+
suggestedAlternatives: assessment.suggestedAlternatives || [],
|
|
477
|
+
};
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
async function reflectOnLowConfidence(
|
|
481
|
+
task: string,
|
|
482
|
+
assessment: ConfidenceAssessment
|
|
483
|
+
): Promise<string> {
|
|
484
|
+
const response = await client.messages.create({
|
|
485
|
+
model: "claude-opus-4.5",
|
|
486
|
+
max_tokens: 1024,
|
|
487
|
+
messages: [
|
|
488
|
+
{
|
|
489
|
+
role: "user",
|
|
490
|
+
content: `Original task: ${task}
|
|
491
|
+
|
|
492
|
+
Your previous response (confidence: ${(assessment.confidence * 100).toFixed(0)}%):
|
|
493
|
+
${assessment.output}
|
|
494
|
+
|
|
495
|
+
You identified these uncertainty areas:
|
|
496
|
+
${assessment.uncertaintyAreas.map((a) => `- ${a}`).join("\n")}
|
|
497
|
+
|
|
498
|
+
You considered these alternatives:
|
|
499
|
+
${assessment.suggestedAlternatives.map((a) => `- ${a}`).join("\n")}
|
|
500
|
+
|
|
501
|
+
Given your identified uncertainties, provide an improved response that:
|
|
502
|
+
1. Directly addresses the uncertainty areas
|
|
503
|
+
2. Is more thorough and robust
|
|
504
|
+
3. Includes more detail or evidence
|
|
505
|
+
4. Considers the alternatives you mentioned
|
|
506
|
+
|
|
507
|
+
Provide your improved response, then a brief confidence assessment (0-1).`,
|
|
508
|
+
},
|
|
509
|
+
],
|
|
510
|
+
});
|
|
511
|
+
|
|
512
|
+
return response.content[0].text || "";
|
|
513
|
+
}
|
|
514
|
+
```
|
|
515
|
+
|
|
516
|
+
---
|
|
517
|
+
|
|
518
|
+
## Pattern 6: Tool-Feedback Reflection (Code)
|
|
519
|
+
|
|
520
|
+
**Use Case**: Reflection based on test/lint results
|
|
521
|
+
**Lines of Code**: ~90
|
|
522
|
+
**Tokens per attempt**: ~600-1200
|
|
523
|
+
|
|
524
|
+
```typescript
|
|
525
|
+
interface CodeFeedback {
|
|
526
|
+
type: "syntax" | "lint" | "test" | "runtime";
|
|
527
|
+
message: string;
|
|
528
|
+
severity: "error" | "warning";
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
async function codeGenerationWithReflection(
|
|
532
|
+
requirement: string,
|
|
533
|
+
maxAttempts: number = 3
|
|
534
|
+
): Promise<string> {
|
|
535
|
+
let code = "";
|
|
536
|
+
const feedback: CodeFeedback[] = [];
|
|
537
|
+
|
|
538
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
539
|
+
// Generate code
|
|
540
|
+
const response = await client.messages.create({
|
|
541
|
+
model: "claude-opus-4.5",
|
|
542
|
+
max_tokens: 2048,
|
|
543
|
+
messages: [
|
|
544
|
+
{
|
|
545
|
+
role: "user",
|
|
546
|
+
content: buildCodeGenerationPrompt(
|
|
547
|
+
requirement,
|
|
548
|
+
feedback,
|
|
549
|
+
attempt
|
|
550
|
+
),
|
|
551
|
+
},
|
|
552
|
+
],
|
|
553
|
+
});
|
|
554
|
+
|
|
555
|
+
code = extractCode(response.content[0].text || "");
|
|
556
|
+
|
|
557
|
+
// Get feedback
|
|
558
|
+
const newFeedback = await getCodeFeedback(code);
|
|
559
|
+
|
|
560
|
+
if (newFeedback.length === 0) {
|
|
561
|
+
console.log(`Code passed on attempt ${attempt}`);
|
|
562
|
+
return code;
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
feedback.push(...newFeedback);
|
|
566
|
+
console.log(
|
|
567
|
+
`Attempt ${attempt}: ${newFeedback.length} issues found`
|
|
568
|
+
);
|
|
569
|
+
|
|
570
|
+
// Request reflection if not last attempt
|
|
571
|
+
if (attempt < maxAttempts) {
|
|
572
|
+
const reflection = await client.messages.create({
|
|
573
|
+
model: "claude-opus-4.5",
|
|
574
|
+
max_tokens: 400,
|
|
575
|
+
messages: [
|
|
576
|
+
{
|
|
577
|
+
role: "user",
|
|
578
|
+
content: buildCodeReflectionPrompt(
|
|
579
|
+
requirement,
|
|
580
|
+
code,
|
|
581
|
+
newFeedback
|
|
582
|
+
),
|
|
583
|
+
},
|
|
584
|
+
],
|
|
585
|
+
});
|
|
586
|
+
console.log("Reflection:", reflection.content[0].text);
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
throw new Error(
|
|
591
|
+
`Code generation failed after ${maxAttempts} attempts with ${feedback.length} issues`
|
|
592
|
+
);
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
function buildCodeGenerationPrompt(
|
|
596
|
+
requirement: string,
|
|
597
|
+
previousFeedback: CodeFeedback[],
|
|
598
|
+
attempt: number
|
|
599
|
+
): string {
|
|
600
|
+
const feedbackText =
|
|
601
|
+
previousFeedback.length > 0
|
|
602
|
+
? `\n\nPrevious issues found:\n${previousFeedback
|
|
603
|
+
.map(
|
|
604
|
+
(f) =>
|
|
605
|
+
`[${f.severity.toUpperCase()}] ${f.type}: ${f.message}`
|
|
606
|
+
)
|
|
607
|
+
.join("\n")}\n\nFix these issues in your new response.`
|
|
608
|
+
: "";
|
|
609
|
+
|
|
610
|
+
return `Generate TypeScript/JavaScript code for:
|
|
611
|
+
${requirement}${feedbackText}
|
|
612
|
+
|
|
613
|
+
Requirements:
|
|
614
|
+
- Use modern TypeScript (5.2+)
|
|
615
|
+
- Include proper type annotations
|
|
616
|
+
- Include error handling
|
|
617
|
+
- Include comments for complex logic
|
|
618
|
+
- Be production-ready`;
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
async function getCodeFeedback(code: string): Promise<CodeFeedback[]> {
|
|
622
|
+
const feedback: CodeFeedback[] = [];
|
|
623
|
+
|
|
624
|
+
// Syntax check
|
|
625
|
+
try {
|
|
626
|
+
// Basic syntax validation (would use ts-node or similar in real implementation)
|
|
627
|
+
eval(code);
|
|
628
|
+
} catch (e) {
|
|
629
|
+
feedback.push({
|
|
630
|
+
type: "syntax",
|
|
631
|
+
message: String(e),
|
|
632
|
+
severity: "error",
|
|
633
|
+
});
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
// Lint checks (would use eslint in real implementation)
|
|
637
|
+
if (!code.includes("://") && code.includes("http")) {
|
|
638
|
+
feedback.push({
|
|
639
|
+
type: "lint",
|
|
640
|
+
message: "URL should be quoted string",
|
|
641
|
+
severity: "warning",
|
|
642
|
+
});
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
// Complexity check
|
|
646
|
+
const lines = code.split("\n").length;
|
|
647
|
+
if (lines > 100) {
|
|
648
|
+
feedback.push({
|
|
649
|
+
type: "lint",
|
|
650
|
+
message: "Function is too long, consider breaking it up",
|
|
651
|
+
severity: "warning",
|
|
652
|
+
});
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
return feedback;
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
function buildCodeReflectionPrompt(
|
|
659
|
+
requirement: string,
|
|
660
|
+
code: string,
|
|
661
|
+
feedback: CodeFeedback[]
|
|
662
|
+
): string {
|
|
663
|
+
return `Requirement: ${requirement}
|
|
664
|
+
|
|
665
|
+
Your code had these issues:
|
|
666
|
+
${feedback.map((f) => `[${f.severity.toUpperCase()}] ${f.type}: ${f.message}`).join("\n")}
|
|
667
|
+
|
|
668
|
+
Current code:
|
|
669
|
+
${code}
|
|
670
|
+
|
|
671
|
+
What specific changes would address these issues? Focus on:
|
|
672
|
+
1. The root cause of each issue
|
|
673
|
+
2. How to fix it without breaking functionality`;
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
function extractCode(text: string): string {
|
|
677
|
+
// Extract code from markdown code blocks
|
|
678
|
+
const codeBlockMatch = text.match(/```(?:typescript|javascript)?\n([\s\S]*?)\n```/);
|
|
679
|
+
if (codeBlockMatch) {
|
|
680
|
+
return codeBlockMatch[1];
|
|
681
|
+
}
|
|
682
|
+
return text;
|
|
683
|
+
}
|
|
684
|
+
```
|
|
685
|
+
|
|
686
|
+
---
|
|
687
|
+
|
|
688
|
+
## Pattern 7: Multi-Agent Reflection (Generator + Critic)
|
|
689
|
+
|
|
690
|
+
**Use Case**: Higher quality through dialogue between agents
|
|
691
|
+
**Lines of Code**: ~100
|
|
692
|
+
**Tokens per attempt**: ~800-1600
|
|
693
|
+
|
|
694
|
+
```typescript
|
|
695
|
+
interface CritiqueResult {
|
|
696
|
+
isSatisfactory: boolean;
|
|
697
|
+
strengths: string[];
|
|
698
|
+
weaknesses: string[];
|
|
699
|
+
suggestions: string[];
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
class GeneratorCriticReflection {
|
|
703
|
+
private client: LLMClient;
|
|
704
|
+
private maxRounds: number;
|
|
705
|
+
|
|
706
|
+
constructor(client: LLMClient, maxRounds: number = 3) {
|
|
707
|
+
this.client = client;
|
|
708
|
+
this.maxRounds = maxRounds;
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
async generate(task: string): Promise<string> {
|
|
712
|
+
let output = await this.generatorPass(task);
|
|
713
|
+
|
|
714
|
+
for (let round = 1; round < this.maxRounds; round++) {
|
|
715
|
+
const critique = await this.criticPass(task, output);
|
|
716
|
+
|
|
717
|
+
if (critique.isSatisfactory) {
|
|
718
|
+
console.log(`✓ Critique satisfied on round ${round}`);
|
|
719
|
+
return output;
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
console.log(`Round ${round} critique:`, critique.weaknesses);
|
|
723
|
+
output = await this.generatorImprove(
|
|
724
|
+
task,
|
|
725
|
+
output,
|
|
726
|
+
critique
|
|
727
|
+
);
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
return output;
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
private async generatorPass(task: string): Promise<string> {
|
|
734
|
+
const response = await this.client.messages.create({
|
|
735
|
+
model: "claude-opus-4.5",
|
|
736
|
+
max_tokens: 1024,
|
|
737
|
+
messages: [{ role: "user", content: task }],
|
|
738
|
+
});
|
|
739
|
+
return response.content[0].text || "";
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
private async generatorImprove(
|
|
743
|
+
task: string,
|
|
744
|
+
previousOutput: string,
|
|
745
|
+
critique: CritiqueResult
|
|
746
|
+
): Promise<string> {
|
|
747
|
+
const response = await this.client.messages.create({
|
|
748
|
+
model: "claude-opus-4.5",
|
|
749
|
+
max_tokens: 1024,
|
|
750
|
+
messages: [
|
|
751
|
+
{
|
|
752
|
+
role: "user",
|
|
753
|
+
content: `Task: ${task}
|
|
754
|
+
|
|
755
|
+
Previous output:
|
|
756
|
+
${previousOutput}
|
|
757
|
+
|
|
758
|
+
Critique feedback:
|
|
759
|
+
Weaknesses: ${critique.weaknesses.join("; ")}
|
|
760
|
+
Suggestions: ${critique.suggestions.join("; ")}
|
|
761
|
+
|
|
762
|
+
Provide an improved version that addresses all the feedback.`,
|
|
763
|
+
},
|
|
764
|
+
],
|
|
765
|
+
});
|
|
766
|
+
return response.content[0].text || "";
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
private async criticPass(
|
|
770
|
+
task: string,
|
|
771
|
+
output: string
|
|
772
|
+
): Promise<CritiqueResult> {
|
|
773
|
+
const response = await this.client.messages.create({
|
|
774
|
+
model: "claude-opus-4.5",
|
|
775
|
+
max_tokens: 600,
|
|
776
|
+
messages: [
|
|
777
|
+
{
|
|
778
|
+
role: "user",
|
|
779
|
+
content: `You are a critical reviewer.
|
|
780
|
+
|
|
781
|
+
Task: ${task}
|
|
782
|
+
|
|
783
|
+
Response to critique:
|
|
784
|
+
${output}
|
|
785
|
+
|
|
786
|
+
Provide a JSON critique:
|
|
787
|
+
{
|
|
788
|
+
"isSatisfactory": boolean,
|
|
789
|
+
"strengths": ["strength1", "strength2"],
|
|
790
|
+
"weaknesses": ["weakness1", "weakness2"],
|
|
791
|
+
"suggestions": ["suggestion1", "suggestion2"]
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
Respond with ONLY the JSON object.`,
|
|
795
|
+
},
|
|
796
|
+
],
|
|
797
|
+
});
|
|
798
|
+
|
|
799
|
+
const text = response.content[0].text || "{}";
|
|
800
|
+
try {
|
|
801
|
+
return JSON.parse(text);
|
|
802
|
+
} catch {
|
|
803
|
+
return {
|
|
804
|
+
isSatisfactory: true,
|
|
805
|
+
strengths: [],
|
|
806
|
+
weaknesses: [],
|
|
807
|
+
suggestions: [],
|
|
808
|
+
};
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
// Usage
|
|
814
|
+
// const gc = new GeneratorCriticReflection(client, 3);
|
|
815
|
+
// const result = await gc.generate("Write an essay on AI reflection");
|
|
816
|
+
```
|
|
817
|
+
|
|
818
|
+
---
|
|
819
|
+
|
|
820
|
+
## Pattern 8: State-Aware Reflection with History
|
|
821
|
+
|
|
822
|
+
**Use Case**: Learning from previous attempts within workflow
|
|
823
|
+
**Lines of Code**: ~110
|
|
824
|
+
**Tokens per attempt**: ~700-1400
|
|
825
|
+
|
|
826
|
+
```typescript
|
|
827
|
+
interface ExecutionState {
|
|
828
|
+
taskId: string;
|
|
829
|
+
attempt: number;
|
|
830
|
+
previousAttempts: Array<{
|
|
831
|
+
output: string;
|
|
832
|
+
result: "success" | "failed";
|
|
833
|
+
reason: string;
|
|
834
|
+
timestamp: Date;
|
|
835
|
+
}>;
|
|
836
|
+
currentError: string | null;
|
|
837
|
+
succeededApproaches: string[];
|
|
838
|
+
failedApproaches: string[];
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
async function stateAwareReflection(
|
|
842
|
+
task: string,
|
|
843
|
+
state: ExecutionState,
|
|
844
|
+
maxAttempts: number = 3
|
|
845
|
+
): Promise<string> {
|
|
846
|
+
while (state.attempt < maxAttempts) {
|
|
847
|
+
state.attempt++;
|
|
848
|
+
|
|
849
|
+
// Generate solution informed by history
|
|
850
|
+
const solution = await generateInformedSolution(
|
|
851
|
+
task,
|
|
852
|
+
state
|
|
853
|
+
);
|
|
854
|
+
|
|
855
|
+
// Try to execute
|
|
856
|
+
const result = await executeAndValidate(solution);
|
|
857
|
+
|
|
858
|
+
if (result.success) {
|
|
859
|
+
// Record success
|
|
860
|
+
state.previousAttempts.push({
|
|
861
|
+
output: solution,
|
|
862
|
+
result: "success",
|
|
863
|
+
reason: "Execution passed all checks",
|
|
864
|
+
timestamp: new Date(),
|
|
865
|
+
});
|
|
866
|
+
state.succeededApproaches.push(result.approachUsed);
|
|
867
|
+
return solution;
|
|
868
|
+
}
|
|
869
|
+
|
|
870
|
+
// Record failure
|
|
871
|
+
state.previousAttempts.push({
|
|
872
|
+
output: solution,
|
|
873
|
+
result: "failed",
|
|
874
|
+
reason: result.error,
|
|
875
|
+
timestamp: new Date(),
|
|
876
|
+
});
|
|
877
|
+
state.failedApproaches.push(result.approachUsed);
|
|
878
|
+
state.currentError = result.error;
|
|
879
|
+
|
|
880
|
+
// Reflect before next attempt
|
|
881
|
+
if (state.attempt < maxAttempts) {
|
|
882
|
+
const reflection = await reflectWithState(
|
|
883
|
+
task,
|
|
884
|
+
solution,
|
|
885
|
+
result.error,
|
|
886
|
+
state
|
|
887
|
+
);
|
|
888
|
+
console.log(`Attempt ${state.attempt} reflection:`, reflection);
|
|
889
|
+
}
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
throw new Error(
|
|
893
|
+
`Task failed after ${maxAttempts} attempts. ` +
|
|
894
|
+
`Last error: ${state.currentError}`
|
|
895
|
+
);
|
|
896
|
+
}
|
|
897
|
+
|
|
898
|
+
async function generateInformedSolution(
|
|
899
|
+
task: string,
|
|
900
|
+
state: ExecutionState
|
|
901
|
+
): Promise<string> {
|
|
902
|
+
const historyContext = buildHistoryContext(state);
|
|
903
|
+
|
|
904
|
+
const response = await client.messages.create({
|
|
905
|
+
model: "claude-opus-4.5",
|
|
906
|
+
max_tokens: 1024,
|
|
907
|
+
messages: [
|
|
908
|
+
{
|
|
909
|
+
role: "user",
|
|
910
|
+
content: `${task}${historyContext}`,
|
|
911
|
+
},
|
|
912
|
+
],
|
|
913
|
+
});
|
|
914
|
+
|
|
915
|
+
return response.content[0].text || "";
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
function buildHistoryContext(state: ExecutionState): string {
|
|
919
|
+
if (state.previousAttempts.length === 0) {
|
|
920
|
+
return "";
|
|
921
|
+
}
|
|
922
|
+
|
|
923
|
+
const succeeded =
|
|
924
|
+
state.succeededApproaches.length > 0
|
|
925
|
+
? `\n\nApproaches that worked: ${state.succeededApproaches.join(", ")}`
|
|
926
|
+
: "";
|
|
927
|
+
|
|
928
|
+
const failed =
|
|
929
|
+
state.failedApproaches.length > 0
|
|
930
|
+
? `\nApproaches that failed: ${state.failedApproaches.join(", ")}`
|
|
931
|
+
: "";
|
|
932
|
+
|
|
933
|
+
const recent = state.previousAttempts
|
|
934
|
+
.slice(-2)
|
|
935
|
+
.map(
|
|
936
|
+
(a) =>
|
|
937
|
+
`Attempt ${a.previousAttempts.length}: ${a.result} (${a.reason})`
|
|
938
|
+
)
|
|
939
|
+
.join("\n");
|
|
940
|
+
|
|
941
|
+
return `${succeeded}${failed}\n\nRecent attempts:\n${recent}\n\nFor this attempt, try a different approach from what failed before.`;
|
|
942
|
+
}
|
|
943
|
+
|
|
944
|
+
async function reflectWithState(
|
|
945
|
+
task: string,
|
|
946
|
+
solution: string,
|
|
947
|
+
error: string,
|
|
948
|
+
state: ExecutionState
|
|
949
|
+
): Promise<string> {
|
|
950
|
+
const response = await client.messages.create({
|
|
951
|
+
model: "claude-opus-4.5",
|
|
952
|
+
max_tokens: 400,
|
|
953
|
+
messages: [
|
|
954
|
+
{
|
|
955
|
+
role: "user",
|
|
956
|
+
content: `Task: ${task}
|
|
957
|
+
|
|
958
|
+
Attempt ${state.attempt} failed with: ${error}
|
|
959
|
+
|
|
960
|
+
Your attempted solution:
|
|
961
|
+
${solution}
|
|
962
|
+
|
|
963
|
+
What different approach should we try next?
|
|
964
|
+
- What assumptions was this approach based on?
|
|
965
|
+
- How could those assumptions be wrong?
|
|
966
|
+
- What fundamentally different strategy exists?`,
|
|
967
|
+
},
|
|
968
|
+
],
|
|
969
|
+
});
|
|
970
|
+
|
|
971
|
+
return response.content[0].text || "";
|
|
972
|
+
}
|
|
973
|
+
|
|
974
|
+
async function executeAndValidate(solution: string): Promise<{
|
|
975
|
+
success: boolean;
|
|
976
|
+
error: string;
|
|
977
|
+
approachUsed: string;
|
|
978
|
+
}> {
|
|
979
|
+
// Mock implementation - would be actual execution
|
|
980
|
+
try {
|
|
981
|
+
// Execute solution (parse, validate, run, etc.)
|
|
982
|
+
return {
|
|
983
|
+
success: true,
|
|
984
|
+
error: "",
|
|
985
|
+
approachUsed: "web_search_approach",
|
|
986
|
+
};
|
|
987
|
+
} catch (e) {
|
|
988
|
+
return {
|
|
989
|
+
success: false,
|
|
990
|
+
error: String(e),
|
|
991
|
+
approachUsed: "web_search_approach",
|
|
992
|
+
};
|
|
993
|
+
}
|
|
994
|
+
}
|
|
995
|
+
```
|
|
996
|
+
|
|
997
|
+
---
|
|
998
|
+
|
|
999
|
+
## Pattern 9: Low-Token Reflection (Budget-Conscious)
|
|
1000
|
+
|
|
1001
|
+
**Use Case**: Reflection with minimal token overhead
|
|
1002
|
+
**Lines of Code**: ~40
|
|
1003
|
+
**Tokens per attempt**: ~150-300
|
|
1004
|
+
|
|
1005
|
+
```typescript
|
|
1006
|
+
async function budgetConsciousReflection(
|
|
1007
|
+
task: string,
|
|
1008
|
+
tokenBudget: number = 1000
|
|
1009
|
+
): Promise<string> {
|
|
1010
|
+
let tokensUsed = 0;
|
|
1011
|
+
let output = "";
|
|
1012
|
+
|
|
1013
|
+
// Initial generation
|
|
1014
|
+
const gen1 = await client.messages.create({
|
|
1015
|
+
model: "claude-haiku-4.5", // Cheaper model
|
|
1016
|
+
max_tokens: Math.min(500, tokenBudget - 200),
|
|
1017
|
+
messages: [{ role: "user", content: task }],
|
|
1018
|
+
});
|
|
1019
|
+
|
|
1020
|
+
output = gen1.content[0].text || "";
|
|
1021
|
+
tokensUsed += gen1.usage?.input_tokens || 0;
|
|
1022
|
+
tokensUsed += gen1.usage?.output_tokens || 0;
|
|
1023
|
+
|
|
1024
|
+
if (tokensUsed + 200 > tokenBudget) {
|
|
1025
|
+
return output;
|
|
1026
|
+
}
|
|
1027
|
+
|
|
1028
|
+
// Lightweight reflection
|
|
1029
|
+
const reflection = await client.messages.create({
|
|
1030
|
+
model: "claude-haiku-4.5",
|
|
1031
|
+
max_tokens: Math.min(300, tokenBudget - tokensUsed - 100),
|
|
1032
|
+
messages: [
|
|
1033
|
+
{
|
|
1034
|
+
role: "user",
|
|
1035
|
+
content: `Review this response for errors (brief 1-2 sentence assessment):\n\n${output.substring(
|
|
1036
|
+
0,
|
|
1037
|
+
500
|
|
1038
|
+
)}...`,
|
|
1039
|
+
},
|
|
1040
|
+
],
|
|
1041
|
+
});
|
|
1042
|
+
|
|
1043
|
+
tokensUsed +=
|
|
1044
|
+
reflection.usage?.input_tokens || 0;
|
|
1045
|
+
tokensUsed +=
|
|
1046
|
+
reflection.usage?.output_tokens || 0;
|
|
1047
|
+
|
|
1048
|
+
// If still within budget and issues found, do one quick pass
|
|
1049
|
+
const reflectionText = reflection.content[0].text || "";
|
|
1050
|
+
if (
|
|
1051
|
+
tokensUsed + 200 <= tokenBudget &&
|
|
1052
|
+
reflectionText.toLowerCase().includes("error")
|
|
1053
|
+
) {
|
|
1054
|
+
const fix = await client.messages.create({
|
|
1055
|
+
model: "claude-haiku-4.5",
|
|
1056
|
+
max_tokens: 300,
|
|
1057
|
+
messages: [
|
|
1058
|
+
{
|
|
1059
|
+
role: "user",
|
|
1060
|
+
content: `Issue: ${reflectionText}\n\nOriginal response: ${output}\n\nQuick fix:`,
|
|
1061
|
+
},
|
|
1062
|
+
],
|
|
1063
|
+
});
|
|
1064
|
+
|
|
1065
|
+
output = fix.content[0].text || output;
|
|
1066
|
+
}
|
|
1067
|
+
|
|
1068
|
+
return output;
|
|
1069
|
+
}
|
|
1070
|
+
```
|
|
1071
|
+
|
|
1072
|
+
---
|
|
1073
|
+
|
|
1074
|
+
## Pattern 10: Timeout-Safe Reflection
|
|
1075
|
+
|
|
1076
|
+
**Use Case**: Reflection with guaranteed completion
|
|
1077
|
+
**Lines of Code**: ~60
|
|
1078
|
+
**Tokens per attempt**: Variable
|
|
1079
|
+
|
|
1080
|
+
```typescript
|
|
1081
|
+
interface TimeoutAwareReflectionOptions {
|
|
1082
|
+
initialTimeout: number; // ms for first attempt
|
|
1083
|
+
maxTotalTime: number; // ms for entire reflection
|
|
1084
|
+
reflectionTime: number; // ms budget per reflection
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1087
|
+
async function timeoutSafeReflection(
|
|
1088
|
+
task: string,
|
|
1089
|
+
options: TimeoutAwareReflectionOptions
|
|
1090
|
+
): Promise<string> {
|
|
1091
|
+
const startTime = Date.now();
|
|
1092
|
+
const deadline = startTime + options.maxTotalTime;
|
|
1093
|
+
let output = "";
|
|
1094
|
+
|
|
1095
|
+
try {
|
|
1096
|
+
// First attempt with time limit
|
|
1097
|
+
output = await withTimeout(
|
|
1098
|
+
generateResponse(task),
|
|
1099
|
+
options.initialTimeout
|
|
1100
|
+
);
|
|
1101
|
+
return output;
|
|
1102
|
+
} catch (e) {
|
|
1103
|
+
// If generation fails, return error instead of reflecting
|
|
1104
|
+
if (Date.now() > deadline) {
|
|
1105
|
+
throw new Error("Timeout exceeded during generation");
|
|
1106
|
+
}
|
|
1107
|
+
}
|
|
1108
|
+
|
|
1109
|
+
// Reflection (only if time permits)
|
|
1110
|
+
const timeRemaining = deadline - Date.now();
|
|
1111
|
+
if (timeRemaining < 500) {
|
|
1112
|
+
// Less than 500ms left, don't reflect
|
|
1113
|
+
return output;
|
|
1114
|
+
}
|
|
1115
|
+
|
|
1116
|
+
try {
|
|
1117
|
+
const reflection = await withTimeout(
|
|
1118
|
+
reflectOnOutput(task, output),
|
|
1119
|
+
Math.min(options.reflectionTime, timeRemaining - 100)
|
|
1120
|
+
);
|
|
1121
|
+
return reflection;
|
|
1122
|
+
} catch (e) {
|
|
1123
|
+
// Reflection failed or timed out, return what we have
|
|
1124
|
+
console.warn("Reflection timed out or failed, returning original output");
|
|
1125
|
+
return output;
|
|
1126
|
+
}
|
|
1127
|
+
}
|
|
1128
|
+
|
|
1129
|
+
async function withTimeout<T>(
|
|
1130
|
+
promise: Promise<T>,
|
|
1131
|
+
timeoutMs: number
|
|
1132
|
+
): Promise<T> {
|
|
1133
|
+
return Promise.race([
|
|
1134
|
+
promise,
|
|
1135
|
+
new Promise<T>((_, reject) =>
|
|
1136
|
+
setTimeout(
|
|
1137
|
+
() => reject(new Error(`Operation timed out after ${timeoutMs}ms`)),
|
|
1138
|
+
timeoutMs
|
|
1139
|
+
)
|
|
1140
|
+
),
|
|
1141
|
+
]);
|
|
1142
|
+
}
|
|
1143
|
+
|
|
1144
|
+
async function generateResponse(task: string): Promise<string> {
|
|
1145
|
+
const response = await client.messages.create({
|
|
1146
|
+
model: "claude-opus-4.5",
|
|
1147
|
+
max_tokens: 1024,
|
|
1148
|
+
messages: [{ role: "user", content: task }],
|
|
1149
|
+
});
|
|
1150
|
+
return response.content[0].text || "";
|
|
1151
|
+
}
|
|
1152
|
+
|
|
1153
|
+
async function reflectOnOutput(
|
|
1154
|
+
task: string,
|
|
1155
|
+
output: string
|
|
1156
|
+
): Promise<string> {
|
|
1157
|
+
const response = await client.messages.create({
|
|
1158
|
+
model: "claude-opus-4.5",
|
|
1159
|
+
max_tokens: 400,
|
|
1160
|
+
messages: [
|
|
1161
|
+
{
|
|
1162
|
+
role: "user",
|
|
1163
|
+
content: `Quickly review and improve:\n\nTask: ${task}\n\nResponse: ${output}`,
|
|
1164
|
+
},
|
|
1165
|
+
],
|
|
1166
|
+
});
|
|
1167
|
+
return response.content[0].text || output;
|
|
1168
|
+
}
|
|
1169
|
+
```
|
|
1170
|
+
|
|
1171
|
+
---
|
|
1172
|
+
|
|
1173
|
+
## Quick Reference: Pattern Selection
|
|
1174
|
+
|
|
1175
|
+
| Pattern | Best For | Tokens/Attempt | Latency |
|
|
1176
|
+
|---------|----------|------------------|---------|
|
|
1177
|
+
| Simple (1) | Basic validation | 200-400 | +200ms |
|
|
1178
|
+
| Multi-Attempt (2) | Complex tasks | 300-600 | +400ms |
|
|
1179
|
+
| Error-Triggered (3) | Execution errors | 400-800 | +500ms |
|
|
1180
|
+
| IFE Validation (4) | Explicit rules | 400-700 | +400ms |
|
|
1181
|
+
| Confidence (5) | Uncertain output | 500-1000 | +600ms |
|
|
1182
|
+
| Tool-Feedback (6) | Code/tests | 600-1200 | +800ms |
|
|
1183
|
+
| Multi-Agent (7) | High quality | 800-1600 | +1000ms |
|
|
1184
|
+
| State-Aware (8) | Learning workflows | 700-1400 | +700ms |
|
|
1185
|
+
| Budget-Conscious (9) | Limited tokens | 150-300 | +200ms |
|
|
1186
|
+
| Timeout-Safe (10) | Time constraints | Variable | Guaranteed |
|
|
1187
|
+
|
|
1188
|
+
---
|
|
1189
|
+
|
|
1190
|
+
## Implementation Checklist
|
|
1191
|
+
|
|
1192
|
+
When implementing reflection patterns:
|
|
1193
|
+
|
|
1194
|
+
- [ ] Define success criteria (not vague goals)
|
|
1195
|
+
- [ ] Set max retry limit (typically 3-5)
|
|
1196
|
+
- [ ] Capture error context before reflection
|
|
1197
|
+
- [ ] Implement exponential backoff for transient errors
|
|
1198
|
+
- [ ] Use state deduplication to detect loops
|
|
1199
|
+
- [ ] Include cost/token tracking
|
|
1200
|
+
- [ ] Log all attempts and reflections
|
|
1201
|
+
- [ ] Test with both success and failure paths
|
|
1202
|
+
- [ ] Monitor reflection effectiveness (did it help?)
|
|
1203
|
+
- [ ] Consider latency impact on user experience
|
|
1204
|
+
- [ ] Plan graceful degradation if reflection fails
|
|
1205
|
+
|