groundswell 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +9 -0
- package/.claude/system_prompts/task-breakdown.md +100 -0
- package/PRPs/001-hierarchical-workflow-engine.md +2438 -0
- package/PRPs/PRDs/001-hierarchical-workflow-engine.md +543 -0
- package/PRPs/PRDs/002-agent-prompt.md +390 -0
- package/PRPs/PRDs/003-agent-prompt.md +943 -0
- package/PRPs/PRDs/004-agent-prompt.md +1136 -0
- package/PRPs/PRDs/tasks-001.json +492 -0
- package/PRPs/README.md +83 -0
- package/PRPs/templates/prp_base.md +222 -0
- package/README.md +218 -0
- package/docs/agent.md +422 -0
- package/docs/prompt.md +419 -0
- package/docs/workflow.md +600 -0
- package/examples/README.md +244 -0
- package/examples/examples/01-basic-workflow.ts +100 -0
- package/examples/examples/02-decorator-options.ts +217 -0
- package/examples/examples/03-parent-child.ts +241 -0
- package/examples/examples/04-observers-debugger.ts +340 -0
- package/examples/examples/05-error-handling.ts +387 -0
- package/examples/examples/06-concurrent-tasks.ts +352 -0
- package/examples/examples/07-agent-loops.ts +432 -0
- package/examples/examples/08-sdk-features.ts +667 -0
- package/examples/examples/09-reflection.ts +573 -0
- package/examples/examples/10-introspection.ts +550 -0
- package/examples/index.ts +143 -0
- package/examples/utils/helpers.ts +57 -0
- package/llms_full.txt +5890 -0
- package/package.json +63 -0
- package/plan/P1P2/PRP.md +527 -0
- package/plan/P1P2/research/LRU_CACHE_BEST_PRACTICES.md +1929 -0
- package/plan/P1P2/research/LRU_CACHE_CODE_PATTERNS.md +857 -0
- package/plan/P1P2/research/LRU_CACHE_INTEGRATION_GUIDE.md +738 -0
- package/plan/P1P2/research/LRU_CACHE_RESEARCH_INDEX.md +424 -0
- package/plan/P1P2/research/REFLECTION_INDEX.md +291 -0
- package/plan/P1P2/research/REFLECTION_RESEARCH_REPORT.md +1342 -0
- package/plan/P1P2/research/RESEARCH_SUMMARY.md +342 -0
- package/plan/P1P2/research/anthropic-sdk.md +174 -0
- package/plan/P1P2/research/async-local-storage.md +200 -0
- package/plan/P1P2/research/reflection-code-patterns.md +1205 -0
- package/plan/P1P2/research/reflection-decision-matrix.md +421 -0
- package/plan/P1P2/research/reflection-implementation-guide.md +1341 -0
- package/plan/P1P2/research/reflection-integration-guide.md +834 -0
- package/plan/P1P2/research/reflection-patterns.md +1468 -0
- package/plan/P1P2/research/reflection-quick-reference.md +558 -0
- package/plan/P1P2/research/zod-schema.md +152 -0
- package/plan/P3P4/PRP.md +1388 -0
- package/plan/P3P4/research/caching-lru.md +116 -0
- package/plan/P3P4/research/introspection-tools.md +177 -0
- package/plan/P3P4/research/reflection-patterns.md +117 -0
- package/plan/P4P5/PRP.md +1136 -0
- package/plan/P4P5/research/RESEARCH_SUMMARY.md +151 -0
- package/plan/architecture/external_deps.md +358 -0
- package/plan/architecture/system_context.md +242 -0
- package/plan/backlog.json +867 -0
- package/plan/research/INTROSPECTION_RESEARCH_SUMMARY.md +378 -0
- package/plan/research/README-INTROSPECTION.md +352 -0
- package/plan/research/agent-introspection-patterns.md +1085 -0
- package/plan/research/introspection-security-guide.md +928 -0
- package/plan/research/introspection-tool-examples.md +875 -0
- package/scripts/generate-llms-full.ts +206 -0
- package/src/__tests__/integration/agent-workflow.test.ts +256 -0
- package/src/__tests__/integration/tree-mirroring.test.ts +114 -0
- package/src/__tests__/unit/agent.test.ts +169 -0
- package/src/__tests__/unit/cache-key.test.ts +182 -0
- package/src/__tests__/unit/cache.test.ts +172 -0
- package/src/__tests__/unit/context.test.ts +138 -0
- package/src/__tests__/unit/decorators.test.ts +100 -0
- package/src/__tests__/unit/introspection-tools.test.ts +277 -0
- package/src/__tests__/unit/prompt.test.ts +135 -0
- package/src/__tests__/unit/reflection.test.ts +210 -0
- package/src/__tests__/unit/tree-debugger.test.ts +85 -0
- package/src/__tests__/unit/workflow.test.ts +81 -0
- package/src/cache/cache-key.ts +244 -0
- package/src/cache/cache.ts +236 -0
- package/src/cache/index.ts +8 -0
- package/src/core/agent.ts +573 -0
- package/src/core/context.ts +119 -0
- package/src/core/event-tree.ts +260 -0
- package/src/core/factory.ts +123 -0
- package/src/core/index.ts +17 -0
- package/src/core/logger.ts +87 -0
- package/src/core/mcp-handler.ts +184 -0
- package/src/core/prompt.ts +150 -0
- package/src/core/workflow-context.ts +349 -0
- package/src/core/workflow.ts +302 -0
- package/src/debugger/index.ts +1 -0
- package/src/debugger/tree-debugger.ts +210 -0
- package/src/decorators/index.ts +3 -0
- package/src/decorators/observed-state.ts +95 -0
- package/src/decorators/step.ts +139 -0
- package/src/decorators/task.ts +96 -0
- package/src/examples/index.ts +2 -0
- package/src/examples/tdd-orchestrator.ts +65 -0
- package/src/examples/test-cycle-workflow.ts +64 -0
- package/src/index.ts +140 -0
- package/src/reflection/index.ts +5 -0
- package/src/reflection/reflection.ts +407 -0
- package/src/tools/index.ts +36 -0
- package/src/tools/introspection.ts +464 -0
- package/src/types/agent.ts +90 -0
- package/src/types/decorators.ts +25 -0
- package/src/types/error-strategy.ts +13 -0
- package/src/types/error.ts +20 -0
- package/src/types/events.ts +74 -0
- package/src/types/index.ts +55 -0
- package/src/types/logging.ts +24 -0
- package/src/types/observer.ts +18 -0
- package/src/types/prompt.ts +40 -0
- package/src/types/reflection.ts +117 -0
- package/src/types/sdk-primitives.ts +128 -0
- package/src/types/snapshot.ts +14 -0
- package/src/types/workflow-context.ts +163 -0
- package/src/types/workflow.ts +37 -0
- package/src/utils/id.ts +11 -0
- package/src/utils/index.ts +3 -0
- package/src/utils/observable.ts +77 -0
- package/tasks.json +0 -0
- package/tsconfig.json +22 -0
- package/vitest.config.ts +16 -0
|
@@ -0,0 +1,834 @@
|
|
|
1
|
+
# Reflection Integration Guide for Groundswell Agent Framework
|
|
2
|
+
|
|
3
|
+
This guide explains how to integrate reflection patterns into the Groundswell workflow orchestration engine.
|
|
4
|
+
|
|
5
|
+
## Architecture Overview
|
|
6
|
+
|
|
7
|
+
Groundswell's hierarchical workflow engine provides the perfect foundation for reflection:
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
Workflow
|
|
11
|
+
├── Step 1 (with reflection capability)
|
|
12
|
+
├── Step 2 (with reflection capability)
|
|
13
|
+
└── Step 3 (with reflection capability)
|
|
14
|
+
└── Child Workflow
|
|
15
|
+
├── Sub-Step 1 (with reflection capability)
|
|
16
|
+
└── Sub-Step 2 (with reflection capability)
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Each step/task can have reflection configured independently, allowing fine-grained control over when and how reflection occurs.
|
|
20
|
+
|
|
21
|
+
## Core Integration Points
|
|
22
|
+
|
|
23
|
+
### 1. **Step-Level Reflection**
|
|
24
|
+
|
|
25
|
+
Add reflection configuration to the `@Step` decorator:
|
|
26
|
+
|
|
27
|
+
```typescript
|
|
28
|
+
interface StepReflectionConfig {
|
|
29
|
+
enabled: boolean;
|
|
30
|
+
trigger: "always" | "on-error" | "low-confidence";
|
|
31
|
+
maxAttempts: number;
|
|
32
|
+
validationRules?: ValidationRule[];
|
|
33
|
+
confidenceThreshold?: number;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
@Step({
|
|
37
|
+
name: "GenerateCode",
|
|
38
|
+
reflection: {
|
|
39
|
+
enabled: true,
|
|
40
|
+
trigger: "on-error",
|
|
41
|
+
maxAttempts: 3,
|
|
42
|
+
validationRules: [
|
|
43
|
+
{ name: "syntax", validate: (output) => checkSyntax(output) },
|
|
44
|
+
{ name: "hasTests", validate: (output) => output.includes("test") }
|
|
45
|
+
]
|
|
46
|
+
}
|
|
47
|
+
})
|
|
48
|
+
async generateCode(requirement: string): Promise<string> {
|
|
49
|
+
// Implementation
|
|
50
|
+
}
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### 2. **Task-Level Reflection**
|
|
54
|
+
|
|
55
|
+
Configure reflection at the task/workflow level:
|
|
56
|
+
|
|
57
|
+
```typescript
|
|
58
|
+
@Task({
|
|
59
|
+
name: "CodeReviewWorkflow",
|
|
60
|
+
reflection: {
|
|
61
|
+
enabled: true,
|
|
62
|
+
trigger: "on-error",
|
|
63
|
+
maxAttempts: 3,
|
|
64
|
+
stateCapture: true, // Capture state before/after
|
|
65
|
+
errorCategories: {
|
|
66
|
+
transient: { retries: 3, backoff: "exponential" },
|
|
67
|
+
logical: { retries: 2, backoff: "linear" },
|
|
68
|
+
invalid: { retries: 0, escalate: true }
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
})
|
|
72
|
+
async reviewCode() {
|
|
73
|
+
// Implementation
|
|
74
|
+
}
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### 3. **Workflow-Level Reflection**
|
|
78
|
+
|
|
79
|
+
Configure reflection at the orchestration level:
|
|
80
|
+
|
|
81
|
+
```typescript
|
|
82
|
+
class TestCycleWorkflow {
|
|
83
|
+
reflection: WorkflowReflectionConfig = {
|
|
84
|
+
enabled: true,
|
|
85
|
+
trigger: "on-workflow-error",
|
|
86
|
+
maxRounds: 3,
|
|
87
|
+
evaluationCriteria: {
|
|
88
|
+
allTestsPassed: true,
|
|
89
|
+
coverageAbove: 85,
|
|
90
|
+
noBlockingIssues: true
|
|
91
|
+
},
|
|
92
|
+
multiAgentCritique: {
|
|
93
|
+
enabled: true,
|
|
94
|
+
models: ["claude-opus-4.5", "claude-opus-4.5"] // Generator + Critic
|
|
95
|
+
}
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
async execute() {
|
|
99
|
+
// Multi-level workflow with reflection
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## Implementation Patterns for Groundswell
|
|
105
|
+
|
|
106
|
+
### Pattern 1: Automatic Retry with Error Reflection
|
|
107
|
+
|
|
108
|
+
```typescript
|
|
109
|
+
import { Step, Task, WorkflowContext } from "@groundswell/core";
|
|
110
|
+
|
|
111
|
+
@Step({
|
|
112
|
+
name: "ExecuteWithReflection",
|
|
113
|
+
reflection: {
|
|
114
|
+
enabled: true,
|
|
115
|
+
trigger: "on-error",
|
|
116
|
+
maxAttempts: 3
|
|
117
|
+
}
|
|
118
|
+
})
|
|
119
|
+
async executeWithReflection(
|
|
120
|
+
ctx: WorkflowContext,
|
|
121
|
+
input: string
|
|
122
|
+
): Promise<string> {
|
|
123
|
+
const state = {
|
|
124
|
+
attempt: 0,
|
|
125
|
+
lastError: null,
|
|
126
|
+
attempts: [] as Array<{ output: string; error: string | null }>
|
|
127
|
+
};
|
|
128
|
+
|
|
129
|
+
while (state.attempt < 3) {
|
|
130
|
+
state.attempt++;
|
|
131
|
+
|
|
132
|
+
try {
|
|
133
|
+
const output = await this.attemptExecution(input, state.attempts);
|
|
134
|
+
|
|
135
|
+
// Emit success event
|
|
136
|
+
ctx.emit({
|
|
137
|
+
type: "step:success",
|
|
138
|
+
step: "ExecuteWithReflection",
|
|
139
|
+
attempt: state.attempt,
|
|
140
|
+
output
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
return output;
|
|
144
|
+
} catch (error) {
|
|
145
|
+
state.lastError = error;
|
|
146
|
+
state.attempts.push({ output: "", error: String(error) });
|
|
147
|
+
|
|
148
|
+
// Emit error event for observability
|
|
149
|
+
ctx.emit({
|
|
150
|
+
type: "step:error",
|
|
151
|
+
step: "ExecuteWithReflection",
|
|
152
|
+
attempt: state.attempt,
|
|
153
|
+
error: String(error)
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
// Trigger reflection if not last attempt
|
|
157
|
+
if (state.attempt < 3) {
|
|
158
|
+
const reflection = await this.reflect(
|
|
159
|
+
input,
|
|
160
|
+
String(error),
|
|
161
|
+
state.attempts
|
|
162
|
+
);
|
|
163
|
+
|
|
164
|
+
ctx.emit({
|
|
165
|
+
type: "step:reflection",
|
|
166
|
+
step: "ExecuteWithReflection",
|
|
167
|
+
attempt: state.attempt,
|
|
168
|
+
reflection
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
throw new Error(
|
|
175
|
+
`Step failed after 3 attempts. Last error: ${state.lastError}`
|
|
176
|
+
);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
private async attemptExecution(
|
|
180
|
+
input: string,
|
|
181
|
+
history: Array<{ output: string; error: string | null }>
|
|
182
|
+
): Promise<string> {
|
|
183
|
+
const historyContext = history.length > 0
|
|
184
|
+
? `Previous attempts failed with: ${history
|
|
185
|
+
.map((h) => h.error)
|
|
186
|
+
.join("; ")}`
|
|
187
|
+
: "";
|
|
188
|
+
|
|
189
|
+
const response = await client.messages.create({
|
|
190
|
+
model: "claude-opus-4.5",
|
|
191
|
+
max_tokens: 1024,
|
|
192
|
+
messages: [
|
|
193
|
+
{
|
|
194
|
+
role: "user",
|
|
195
|
+
content: `${input}\n${historyContext}`,
|
|
196
|
+
},
|
|
197
|
+
],
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
return response.content[0].text || "";
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
private async reflect(
|
|
204
|
+
input: string,
|
|
205
|
+
error: string,
|
|
206
|
+
attempts: Array<{ output: string; error: string | null }>
|
|
207
|
+
): Promise<string> {
|
|
208
|
+
const response = await client.messages.create({
|
|
209
|
+
model: "claude-opus-4.5",
|
|
210
|
+
max_tokens: 400,
|
|
211
|
+
messages: [
|
|
212
|
+
{
|
|
213
|
+
role: "user",
|
|
214
|
+
content: `Task: ${input}
|
|
215
|
+
|
|
216
|
+
Failed with: ${error}
|
|
217
|
+
|
|
218
|
+
What should we try differently?`,
|
|
219
|
+
},
|
|
220
|
+
],
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
return response.content[0].text || "";
|
|
224
|
+
}
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
### Pattern 2: Validation-Driven Reflection
|
|
228
|
+
|
|
229
|
+
```typescript
|
|
230
|
+
@Step({
|
|
231
|
+
name: "GenerateAndValidate",
|
|
232
|
+
reflection: {
|
|
233
|
+
enabled: true,
|
|
234
|
+
trigger: "on-validation-failure",
|
|
235
|
+
maxAttempts: 3,
|
|
236
|
+
validationRules: [
|
|
237
|
+
{ name: "required_fields", validate: (output) => hasRequiredFields(output) },
|
|
238
|
+
{ name: "type_safety", validate: (output) => isTypeValid(output) },
|
|
239
|
+
{ name: "business_rules", validate: (output) => meetsBusinessRules(output) }
|
|
240
|
+
]
|
|
241
|
+
}
|
|
242
|
+
})
|
|
243
|
+
async generateAndValidate(
|
|
244
|
+
ctx: WorkflowContext,
|
|
245
|
+
requirement: string
|
|
246
|
+
): Promise<string> {
|
|
247
|
+
const validationRules = [
|
|
248
|
+
{ name: "required_fields", validate: (output: string) => hasRequiredFields(output) },
|
|
249
|
+
{ name: "type_safety", validate: (output: string) => isTypeValid(output) },
|
|
250
|
+
{ name: "business_rules", validate: (output: string) => meetsBusinessRules(output) }
|
|
251
|
+
];
|
|
252
|
+
|
|
253
|
+
for (let attempt = 1; attempt <= 3; attempt++) {
|
|
254
|
+
const output = await this.generate(requirement);
|
|
255
|
+
|
|
256
|
+
const violations = this.validate(output, validationRules);
|
|
257
|
+
|
|
258
|
+
if (violations.length === 0) {
|
|
259
|
+
ctx.emit({
|
|
260
|
+
type: "step:validation-passed",
|
|
261
|
+
step: "GenerateAndValidate",
|
|
262
|
+
attempt
|
|
263
|
+
});
|
|
264
|
+
return output;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
ctx.emit({
|
|
268
|
+
type: "step:validation-failed",
|
|
269
|
+
step: "GenerateAndValidate",
|
|
270
|
+
attempt,
|
|
271
|
+
violations: violations.map((v) => v.name)
|
|
272
|
+
});
|
|
273
|
+
|
|
274
|
+
if (attempt < 3) {
|
|
275
|
+
const reflection = await this.reflectOnViolations(
|
|
276
|
+
requirement,
|
|
277
|
+
output,
|
|
278
|
+
violations
|
|
279
|
+
);
|
|
280
|
+
|
|
281
|
+
ctx.emit({
|
|
282
|
+
type: "step:reflection",
|
|
283
|
+
step: "GenerateAndValidate",
|
|
284
|
+
attempt,
|
|
285
|
+
reflection,
|
|
286
|
+
violationCount: violations.length
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
throw new Error("Validation failed after 3 attempts");
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
private validate(
|
|
295
|
+
output: string,
|
|
296
|
+
rules: Array<{ name: string; validate: (output: string) => boolean }>
|
|
297
|
+
): Array<{ name: string; description: string }> {
|
|
298
|
+
const violations = [];
|
|
299
|
+
for (const rule of rules) {
|
|
300
|
+
if (!rule.validate(output)) {
|
|
301
|
+
violations.push({
|
|
302
|
+
name: rule.name,
|
|
303
|
+
description: `Failed: ${rule.name}`
|
|
304
|
+
});
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
return violations;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
private async reflectOnViolations(
|
|
311
|
+
requirement: string,
|
|
312
|
+
output: string,
|
|
313
|
+
violations: Array<{ name: string; description: string }>
|
|
314
|
+
): Promise<string> {
|
|
315
|
+
const response = await client.messages.create({
|
|
316
|
+
model: "claude-opus-4.5",
|
|
317
|
+
max_tokens: 400,
|
|
318
|
+
messages: [
|
|
319
|
+
{
|
|
320
|
+
role: "user",
|
|
321
|
+
content: `Requirement: ${requirement}
|
|
322
|
+
|
|
323
|
+
Your output violated these rules:
|
|
324
|
+
${violations.map((v) => `- ${v.description}`).join("\n")}
|
|
325
|
+
|
|
326
|
+
Output: ${output}
|
|
327
|
+
|
|
328
|
+
How would you fix these violations?`,
|
|
329
|
+
},
|
|
330
|
+
],
|
|
331
|
+
});
|
|
332
|
+
|
|
333
|
+
return response.content[0].text || "";
|
|
334
|
+
}
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
### Pattern 3: Confidence-Based Reflection
|
|
338
|
+
|
|
339
|
+
```typescript
|
|
340
|
+
interface ConfidenceOutput {
|
|
341
|
+
content: string;
|
|
342
|
+
confidence: number;
|
|
343
|
+
uncertaintyAreas: string[];
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
@Step({
|
|
347
|
+
name: "HighConfidenceGeneration",
|
|
348
|
+
reflection: {
|
|
349
|
+
enabled: true,
|
|
350
|
+
trigger: "low-confidence",
|
|
351
|
+
confidenceThreshold: 0.75,
|
|
352
|
+
maxAttempts: 2
|
|
353
|
+
}
|
|
354
|
+
})
|
|
355
|
+
async highConfidenceGeneration(
|
|
356
|
+
ctx: WorkflowContext,
|
|
357
|
+
task: string
|
|
358
|
+
): Promise<string> {
|
|
359
|
+
// First pass with confidence assessment
|
|
360
|
+
const firstPass = await this.generateWithConfidence(task);
|
|
361
|
+
|
|
362
|
+
ctx.emit({
|
|
363
|
+
type: "step:confidence-assessment",
|
|
364
|
+
step: "HighConfidenceGeneration",
|
|
365
|
+
confidence: firstPass.confidence,
|
|
366
|
+
uncertaintyAreas: firstPass.uncertaintyAreas
|
|
367
|
+
});
|
|
368
|
+
|
|
369
|
+
if (firstPass.confidence >= 0.75) {
|
|
370
|
+
return firstPass.content;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
// Low confidence, reflect and improve
|
|
374
|
+
const improved = await this.reflectOnUncertainty(
|
|
375
|
+
task,
|
|
376
|
+
firstPass
|
|
377
|
+
);
|
|
378
|
+
|
|
379
|
+
ctx.emit({
|
|
380
|
+
type: "step:low-confidence-reflection",
|
|
381
|
+
step: "HighConfidenceGeneration",
|
|
382
|
+
initialConfidence: firstPass.confidence,
|
|
383
|
+
reflection: improved
|
|
384
|
+
});
|
|
385
|
+
|
|
386
|
+
return improved;
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
private async generateWithConfidence(task: string): Promise<ConfidenceOutput> {
|
|
390
|
+
const response = await client.messages.create({
|
|
391
|
+
model: "claude-opus-4.5",
|
|
392
|
+
max_tokens: 1200,
|
|
393
|
+
messages: [
|
|
394
|
+
{
|
|
395
|
+
role: "user",
|
|
396
|
+
content: `${task}
|
|
397
|
+
|
|
398
|
+
Assess your own confidence (respond with JSON after your answer):
|
|
399
|
+
{"confidence": 0-1, "uncertaintyAreas": ["area1", "area2"]}`,
|
|
400
|
+
},
|
|
401
|
+
],
|
|
402
|
+
});
|
|
403
|
+
|
|
404
|
+
const text = response.content[0].text || "";
|
|
405
|
+
const jsonMatch = text.match(/\{[\s\S]*\}$/);
|
|
406
|
+
|
|
407
|
+
if (jsonMatch) {
|
|
408
|
+
const meta = JSON.parse(jsonMatch[0]);
|
|
409
|
+
return {
|
|
410
|
+
content: text.substring(0, text.lastIndexOf("{")),
|
|
411
|
+
confidence: meta.confidence,
|
|
412
|
+
uncertaintyAreas: meta.uncertaintyAreas || []
|
|
413
|
+
};
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
return {
|
|
417
|
+
content: text,
|
|
418
|
+
confidence: 0.5,
|
|
419
|
+
uncertaintyAreas: []
|
|
420
|
+
};
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
private async reflectOnUncertainty(
|
|
424
|
+
task: string,
|
|
425
|
+
output: ConfidenceOutput
|
|
426
|
+
): Promise<string> {
|
|
427
|
+
const response = await client.messages.create({
|
|
428
|
+
model: "claude-opus-4.5",
|
|
429
|
+
max_tokens: 1024,
|
|
430
|
+
messages: [
|
|
431
|
+
{
|
|
432
|
+
role: "user",
|
|
433
|
+
content: `Task: ${task}
|
|
434
|
+
|
|
435
|
+
Your response (confidence: ${output.confidence}):
|
|
436
|
+
${output.content}
|
|
437
|
+
|
|
438
|
+
You identified uncertainty in: ${output.uncertaintyAreas.join(", ")}
|
|
439
|
+
|
|
440
|
+
Provide an improved response that addresses these uncertainty areas with more detail and evidence.`,
|
|
441
|
+
},
|
|
442
|
+
],
|
|
443
|
+
});
|
|
444
|
+
|
|
445
|
+
return response.content[0].text || "";
|
|
446
|
+
}
|
|
447
|
+
```
|
|
448
|
+
|
|
449
|
+
### Pattern 4: Multi-Agent Reflection in Workflow
|
|
450
|
+
|
|
451
|
+
```typescript
|
|
452
|
+
@Task({
|
|
453
|
+
name: "ReviewAndIterateWorkflow",
|
|
454
|
+
reflection: {
|
|
455
|
+
enabled: true,
|
|
456
|
+
multiAgentCritique: {
|
|
457
|
+
enabled: true,
|
|
458
|
+
generatorModel: "claude-opus-4.5",
|
|
459
|
+
criticModel: "claude-opus-4.5",
|
|
460
|
+
maxRounds: 3
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
})
|
|
464
|
+
class ReviewAndIterateWorkflow {
|
|
465
|
+
@Step()
|
|
466
|
+
async generateOutput(ctx: WorkflowContext, requirement: string): Promise<string> {
|
|
467
|
+
const response = await client.messages.create({
|
|
468
|
+
model: "claude-opus-4.5",
|
|
469
|
+
max_tokens: 1024,
|
|
470
|
+
messages: [{ role: "user", content: requirement }],
|
|
471
|
+
});
|
|
472
|
+
return response.content[0].text || "";
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
@Step()
|
|
476
|
+
async criticReview(
|
|
477
|
+
ctx: WorkflowContext,
|
|
478
|
+
output: string,
|
|
479
|
+
requirement: string
|
|
480
|
+
): Promise<{
|
|
481
|
+
isSatisfactory: boolean;
|
|
482
|
+
feedback: string;
|
|
483
|
+
suggestions: string[];
|
|
484
|
+
}> {
|
|
485
|
+
const response = await client.messages.create({
|
|
486
|
+
model: "claude-opus-4.5",
|
|
487
|
+
max_tokens: 600,
|
|
488
|
+
messages: [
|
|
489
|
+
{
|
|
490
|
+
role: "user",
|
|
491
|
+
content: `Requirement: ${requirement}
|
|
492
|
+
|
|
493
|
+
Response to review:
|
|
494
|
+
${output}
|
|
495
|
+
|
|
496
|
+
Provide JSON critique:
|
|
497
|
+
{"isSatisfactory": boolean, "feedback": string, "suggestions": ["s1", "s2"]}`,
|
|
498
|
+
},
|
|
499
|
+
],
|
|
500
|
+
});
|
|
501
|
+
|
|
502
|
+
const text = response.content[0].text || "{}";
|
|
503
|
+
return JSON.parse(text);
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
@Step()
|
|
507
|
+
async improveBasedOnCritique(
|
|
508
|
+
ctx: WorkflowContext,
|
|
509
|
+
originalOutput: string,
|
|
510
|
+
requirement: string,
|
|
511
|
+
critique: { feedback: string; suggestions: string[] }
|
|
512
|
+
): Promise<string> {
|
|
513
|
+
const response = await client.messages.create({
|
|
514
|
+
model: "claude-opus-4.5",
|
|
515
|
+
max_tokens: 1024,
|
|
516
|
+
messages: [
|
|
517
|
+
{
|
|
518
|
+
role: "user",
|
|
519
|
+
content: `Requirement: ${requirement}
|
|
520
|
+
|
|
521
|
+
Original: ${originalOutput}
|
|
522
|
+
|
|
523
|
+
Critique: ${critique.feedback}
|
|
524
|
+
|
|
525
|
+
Suggestions: ${critique.suggestions.join("; ")}
|
|
526
|
+
|
|
527
|
+
Provide improved version.`,
|
|
528
|
+
},
|
|
529
|
+
],
|
|
530
|
+
});
|
|
531
|
+
|
|
532
|
+
return response.content[0].text || "";
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
async execute(ctx: WorkflowContext, requirement: string): Promise<string> {
|
|
536
|
+
let output = await this.generateOutput(ctx, requirement);
|
|
537
|
+
|
|
538
|
+
for (let round = 1; round <= 3; round++) {
|
|
539
|
+
const critique = await this.criticReview(ctx, output, requirement);
|
|
540
|
+
|
|
541
|
+
ctx.emit({
|
|
542
|
+
type: "workflow:critique",
|
|
543
|
+
round,
|
|
544
|
+
isSatisfactory: critique.isSatisfactory,
|
|
545
|
+
feedback: critique.feedback
|
|
546
|
+
});
|
|
547
|
+
|
|
548
|
+
if (critique.isSatisfactory) {
|
|
549
|
+
return output;
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
output = await this.improveBasedOnCritique(
|
|
553
|
+
ctx,
|
|
554
|
+
output,
|
|
555
|
+
requirement,
|
|
556
|
+
critique
|
|
557
|
+
);
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
return output;
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
```
|
|
564
|
+
|
|
565
|
+
## State Capture & Observability
|
|
566
|
+
|
|
567
|
+
### State Capture Before Reflection
|
|
568
|
+
|
|
569
|
+
```typescript
|
|
570
|
+
interface ReflectionSnapshot {
|
|
571
|
+
timestamp: Date;
|
|
572
|
+
stepName: string;
|
|
573
|
+
attemptNumber: number;
|
|
574
|
+
input: any;
|
|
575
|
+
output: string;
|
|
576
|
+
error: Error | null;
|
|
577
|
+
validationViolations: string[];
|
|
578
|
+
executionTimeMs: number;
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
async function captureStateBeforeReflection(
|
|
582
|
+
ctx: WorkflowContext,
|
|
583
|
+
step: string,
|
|
584
|
+
attempt: number,
|
|
585
|
+
input: any,
|
|
586
|
+
output: string,
|
|
587
|
+
error: Error | null,
|
|
588
|
+
violations: string[],
|
|
589
|
+
executionTimeMs: number
|
|
590
|
+
): Promise<void> {
|
|
591
|
+
const snapshot: ReflectionSnapshot = {
|
|
592
|
+
timestamp: new Date(),
|
|
593
|
+
stepName: step,
|
|
594
|
+
attemptNumber: attempt,
|
|
595
|
+
input,
|
|
596
|
+
output,
|
|
597
|
+
error,
|
|
598
|
+
validationViolations: violations,
|
|
599
|
+
executionTimeMs
|
|
600
|
+
};
|
|
601
|
+
|
|
602
|
+
// Store in context for downstream access
|
|
603
|
+
ctx.state.reflectionHistory = ctx.state.reflectionHistory || [];
|
|
604
|
+
ctx.state.reflectionHistory.push(snapshot);
|
|
605
|
+
|
|
606
|
+
// Emit event for observability
|
|
607
|
+
ctx.emit({
|
|
608
|
+
type: "reflection:state-captured",
|
|
609
|
+
snapshot
|
|
610
|
+
});
|
|
611
|
+
}
|
|
612
|
+
```
|
|
613
|
+
|
|
614
|
+
### Event-Based Observability
|
|
615
|
+
|
|
616
|
+
```typescript
|
|
617
|
+
// In your workflow/step implementation
|
|
618
|
+
ctx.on("step:error", (event) => {
|
|
619
|
+
console.log(`Step ${event.step} failed on attempt ${event.attempt}`);
|
|
620
|
+
console.log(`Error: ${event.error}`);
|
|
621
|
+
});
|
|
622
|
+
|
|
623
|
+
ctx.on("step:reflection", (event) => {
|
|
624
|
+
console.log(`Reflection triggered for ${event.step}`);
|
|
625
|
+
console.log(`Reflection content: ${event.reflection}`);
|
|
626
|
+
});
|
|
627
|
+
|
|
628
|
+
ctx.on("reflection:state-captured", (event) => {
|
|
629
|
+
console.log(`State captured for reflection at ${event.snapshot.timestamp}`);
|
|
630
|
+
console.log(`Violations: ${event.snapshot.validationViolations.join(", ")}`);
|
|
631
|
+
});
|
|
632
|
+
|
|
633
|
+
ctx.on("step:success", (event) => {
|
|
634
|
+
console.log(`Step ${event.step} succeeded on attempt ${event.attempt}`);
|
|
635
|
+
});
|
|
636
|
+
```
|
|
637
|
+
|
|
638
|
+
## Configuration Best Practices
|
|
639
|
+
|
|
640
|
+
### Development Configuration
|
|
641
|
+
|
|
642
|
+
```typescript
|
|
643
|
+
const devConfig = {
|
|
644
|
+
reflection: {
|
|
645
|
+
enabled: true,
|
|
646
|
+
verbose: true,
|
|
647
|
+
captureFullState: true,
|
|
648
|
+
maxAttempts: 5, // More attempts for testing
|
|
649
|
+
logAllAttempts: true,
|
|
650
|
+
saveReflectionHistory: true
|
|
651
|
+
}
|
|
652
|
+
};
|
|
653
|
+
```
|
|
654
|
+
|
|
655
|
+
### Production Configuration
|
|
656
|
+
|
|
657
|
+
```typescript
|
|
658
|
+
const prodConfig = {
|
|
659
|
+
reflection: {
|
|
660
|
+
enabled: true,
|
|
661
|
+
verbose: false,
|
|
662
|
+
captureFullState: false,
|
|
663
|
+
maxAttempts: 3, // Fewer attempts, lower cost
|
|
664
|
+
logAllAttempts: false,
|
|
665
|
+
saveReflectionHistory: false, // Save only on error
|
|
666
|
+
confidenceThreshold: 0.8, // Higher threshold
|
|
667
|
+
maxTokensPerReflection: 300,
|
|
668
|
+
timeoutMs: 5000
|
|
669
|
+
}
|
|
670
|
+
};
|
|
671
|
+
```
|
|
672
|
+
|
|
673
|
+
## Cost Optimization
|
|
674
|
+
|
|
675
|
+
### Cost Calculation
|
|
676
|
+
|
|
677
|
+
```
|
|
678
|
+
Base Cost = Input Tokens + Output Tokens
|
|
679
|
+
Reflection Cost = Base Cost * (Number of Attempts - 1)
|
|
680
|
+
|
|
681
|
+
Example:
|
|
682
|
+
- Initial: 500 input + 500 output = 1000 tokens
|
|
683
|
+
- Reflection (2 retries): 1000 * 2 = 2000 tokens
|
|
684
|
+
- Total: 3000 tokens = 3x cost
|
|
685
|
+
|
|
686
|
+
Rule of thumb: Each reflection attempt multiplies cost by ~2x
|
|
687
|
+
```
|
|
688
|
+
|
|
689
|
+
### Strategies to Reduce Cost
|
|
690
|
+
|
|
691
|
+
1. **Use cheaper models for reflection**:
|
|
692
|
+
```typescript
|
|
693
|
+
const response = await client.messages.create({
|
|
694
|
+
model: state.attempt === 1
|
|
695
|
+
? "claude-opus-4.5" // First attempt with capable model
|
|
696
|
+
: "claude-haiku-4.5", // Reflection with cheaper model
|
|
697
|
+
max_tokens: 1024,
|
|
698
|
+
messages
|
|
699
|
+
});
|
|
700
|
+
```
|
|
701
|
+
|
|
702
|
+
2. **Limit reflection scope**:
|
|
703
|
+
```typescript
|
|
704
|
+
const response = await client.messages.create({
|
|
705
|
+
model: "claude-opus-4.5",
|
|
706
|
+
max_tokens: state.attempt === 1
|
|
707
|
+
? 2048 // First attempt, more tokens
|
|
708
|
+
: 400, // Reflection, fewer tokens
|
|
709
|
+
messages
|
|
710
|
+
});
|
|
711
|
+
```
|
|
712
|
+
|
|
713
|
+
3. **Confidence-based triggering**:
|
|
714
|
+
```typescript
|
|
715
|
+
if (output.confidence > 0.85) {
|
|
716
|
+
return output; // Skip reflection if confident
|
|
717
|
+
}
|
|
718
|
+
```
|
|
719
|
+
|
|
720
|
+
## Monitoring & Metrics
|
|
721
|
+
|
|
722
|
+
### Key Metrics to Track
|
|
723
|
+
|
|
724
|
+
```typescript
|
|
725
|
+
interface ReflectionMetrics {
|
|
726
|
+
stepName: string;
|
|
727
|
+
totalAttempts: number;
|
|
728
|
+
successOnFirstAttempt: number; // % that succeed without reflection
|
|
729
|
+
reflectionEffectiveness: number; // % improved by reflection
|
|
730
|
+
averageReflectionTime: number; // ms
|
|
731
|
+
totalTokensSpent: number;
|
|
732
|
+
costMultiplier: number; // How much more expensive than baseline
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
function calculateMetrics(history: ReflectionSnapshot[]): ReflectionMetrics {
|
|
736
|
+
const total = history.length;
|
|
737
|
+
const successful = history.filter((h) => h.error === null).length;
|
|
738
|
+
const successPercent = successful === 0 ? 0 : 1;
|
|
739
|
+
|
|
740
|
+
return {
|
|
741
|
+
stepName: history[0]?.stepName || "unknown",
|
|
742
|
+
totalAttempts: total,
|
|
743
|
+
successOnFirstAttempt: history.filter((h) => h.attemptNumber === 1).length,
|
|
744
|
+
reflectionEffectiveness: successPercent,
|
|
745
|
+
averageReflectionTime: 0, // Calculated from execution times
|
|
746
|
+
totalTokensSpent: 0, // Calculated from usage
|
|
747
|
+
costMultiplier: total // More attempts = higher multiplier
|
|
748
|
+
};
|
|
749
|
+
}
|
|
750
|
+
```
|
|
751
|
+
|
|
752
|
+
## Testing Reflection
|
|
753
|
+
|
|
754
|
+
### Unit Test Pattern
|
|
755
|
+
|
|
756
|
+
```typescript
|
|
757
|
+
describe("Step with Reflection", () => {
|
|
758
|
+
it("should succeed on first attempt", async () => {
|
|
759
|
+
const output = await step.execute(ctx, "simple task");
|
|
760
|
+
expect(output).toBeDefined();
|
|
761
|
+
expect(ctx.getEventCount("step:error")).toBe(0);
|
|
762
|
+
});
|
|
763
|
+
|
|
764
|
+
it("should retry on error", async () => {
|
|
765
|
+
// Mock to fail first, succeed second
|
|
766
|
+
const attempts: number[] = [];
|
|
767
|
+
step.generate = jest.fn(async () => {
|
|
768
|
+
attempts.push(1);
|
|
769
|
+
if (attempts.length === 1) throw new Error("First attempt fails");
|
|
770
|
+
return "success";
|
|
771
|
+
});
|
|
772
|
+
|
|
773
|
+
const output = await step.executeWithReflection(ctx, "task");
|
|
774
|
+
expect(output).toBe("success");
|
|
775
|
+
expect(attempts.length).toBe(2);
|
|
776
|
+
});
|
|
777
|
+
|
|
778
|
+
it("should fail after max attempts", async () => {
|
|
779
|
+
step.generate = jest.fn(async () => {
|
|
780
|
+
throw new Error("Always fails");
|
|
781
|
+
});
|
|
782
|
+
|
|
783
|
+
await expect(step.executeWithReflection(ctx, "task")).rejects.toThrow();
|
|
784
|
+
});
|
|
785
|
+
});
|
|
786
|
+
```
|
|
787
|
+
|
|
788
|
+
## Troubleshooting
|
|
789
|
+
|
|
790
|
+
### Infinite Reflection Loops
|
|
791
|
+
|
|
792
|
+
**Problem**: Reflection keeps suggesting same fix without making progress
|
|
793
|
+
|
|
794
|
+
**Solution**:
|
|
795
|
+
```typescript
|
|
796
|
+
// Add state deduplication
|
|
797
|
+
const previousOutputs = new Set();
|
|
798
|
+
const output = await generate();
|
|
799
|
+
|
|
800
|
+
if (previousOutputs.has(output)) {
|
|
801
|
+
throw new Error("Stuck in loop, breaking out");
|
|
802
|
+
}
|
|
803
|
+
previousOutputs.add(output);
|
|
804
|
+
```
|
|
805
|
+
|
|
806
|
+
### Excessive Token Usage
|
|
807
|
+
|
|
808
|
+
**Problem**: Reflection consuming too many tokens
|
|
809
|
+
|
|
810
|
+
**Solution**:
|
|
811
|
+
```typescript
|
|
812
|
+
// Cap tokens per step
|
|
813
|
+
const maxTokensPerStep = 2000;
|
|
814
|
+
const maxTokensPerAttempt = maxTokensPerStep / maxAttempts;
|
|
815
|
+
|
|
816
|
+
messages.max_tokens = Math.min(
|
|
817
|
+
1024,
|
|
818
|
+
maxTokensPerAttempt - tokensSoFar
|
|
819
|
+
);
|
|
820
|
+
```
|
|
821
|
+
|
|
822
|
+
### Reflection Not Helping
|
|
823
|
+
|
|
824
|
+
**Problem**: Output quality not improving with reflection
|
|
825
|
+
|
|
826
|
+
**Solution**:
|
|
827
|
+
```typescript
|
|
828
|
+
// Track effectiveness
|
|
829
|
+
if (attempt > 1 && newOutput.quality <= previousOutput.quality) {
|
|
830
|
+
// Reflection didn't help, return original
|
|
831
|
+
return previousOutput;
|
|
832
|
+
}
|
|
833
|
+
```
|
|
834
|
+
|