@girardmedia/bootspring 3.3.2 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. package/assets/agents/accessibility-auditor.md +39 -0
  2. package/assets/agents/api-designer.md +40 -0
  3. package/assets/agents/auth-implementer.md +64 -0
  4. package/assets/agents/bug-hunter.md +42 -0
  5. package/assets/agents/bundle-analyzer.md +40 -0
  6. package/assets/agents/cache-optimizer.md +55 -0
  7. package/assets/agents/changelog-writer.md +55 -0
  8. package/assets/agents/ci-cd-builder.md +40 -0
  9. package/assets/agents/code-explainer.md +39 -0
  10. package/assets/agents/code-reviewer.md +39 -0
  11. package/assets/agents/cost-optimizer.md +57 -0
  12. package/assets/agents/cron-scheduler.md +51 -0
  13. package/assets/agents/data-seeder.md +56 -0
  14. package/assets/agents/database-architect.md +40 -0
  15. package/assets/agents/dependency-updater.md +40 -0
  16. package/assets/agents/deploy-checker.md +40 -0
  17. package/assets/agents/docker-optimizer.md +40 -0
  18. package/assets/agents/documentation-writer.md +40 -0
  19. package/assets/agents/email-builder.md +55 -0
  20. package/assets/agents/env-setup.md +40 -0
  21. package/assets/agents/error-handler.md +40 -0
  22. package/assets/agents/eslint-fixer.md +46 -0
  23. package/assets/agents/feature-flagger.md +69 -0
  24. package/assets/agents/git-detective.md +39 -0
  25. package/assets/agents/graphql-builder.md +60 -0
  26. package/assets/agents/incident-responder.md +59 -0
  27. package/assets/agents/log-analyzer.md +39 -0
  28. package/assets/agents/migration-planner.md +41 -0
  29. package/assets/agents/monorepo-navigator.md +39 -0
  30. package/assets/agents/nextjs-expert.md +57 -0
  31. package/assets/agents/notification-builder.md +56 -0
  32. package/assets/agents/onboarding-guide.md +39 -0
  33. package/assets/agents/performance-profiler.md +40 -0
  34. package/assets/agents/prisma-expert.md +57 -0
  35. package/assets/agents/rate-limiter.md +58 -0
  36. package/assets/agents/react-expert.md +58 -0
  37. package/assets/agents/refactorer.md +42 -0
  38. package/assets/agents/regex-builder.md +46 -0
  39. package/assets/agents/release-manager.md +40 -0
  40. package/assets/agents/s3-manager.md +58 -0
  41. package/assets/agents/schema-validator.md +40 -0
  42. package/assets/agents/search-builder.md +62 -0
  43. package/assets/agents/security-auditor.md +39 -0
  44. package/assets/agents/sitemap-generator.md +53 -0
  45. package/assets/agents/stripe-integrator.md +59 -0
  46. package/assets/agents/tailwind-expert.md +55 -0
  47. package/assets/agents/tech-debt-tracker.md +39 -0
  48. package/assets/agents/test-writer.md +42 -0
  49. package/assets/agents/type-fixer.md +45 -0
  50. package/assets/agents/webhook-builder.md +54 -0
  51. package/assets/rules/cpp.md +53 -0
  52. package/assets/rules/css.md +52 -0
  53. package/assets/rules/go.md +50 -0
  54. package/assets/rules/html.md +52 -0
  55. package/assets/rules/java.md +51 -0
  56. package/assets/rules/kotlin.md +50 -0
  57. package/assets/rules/php.md +51 -0
  58. package/assets/rules/python.md +51 -0
  59. package/assets/rules/ruby.md +51 -0
  60. package/assets/rules/rust.md +49 -0
  61. package/assets/rules/shell.md +52 -0
  62. package/assets/rules/sql.md +49 -0
  63. package/assets/rules/swift.md +50 -0
  64. package/assets/rules/typescript.md +52 -0
  65. package/assets/rules/yaml-json.md +51 -0
  66. package/assets/skills/accessibility.md +210 -0
  67. package/assets/skills/agent-patterns.md +387 -0
  68. package/assets/skills/ai-integration.md +263 -0
  69. package/assets/skills/animation-patterns.md +224 -0
  70. package/assets/skills/api-design.md +218 -0
  71. package/assets/skills/api-gateway.md +341 -0
  72. package/assets/skills/api-versioning.md +226 -0
  73. package/assets/skills/astro-patterns.md +233 -0
  74. package/assets/skills/auth-patterns.md +248 -0
  75. package/assets/skills/aws-patterns.md +171 -0
  76. package/assets/skills/background-jobs.md +162 -0
  77. package/assets/skills/browser-extensions.md +309 -0
  78. package/assets/skills/caching-patterns.md +253 -0
  79. package/assets/skills/ci-cd.md +251 -0
  80. package/assets/skills/cli-development.md +296 -0
  81. package/assets/skills/code-review.md +185 -0
  82. package/assets/skills/cron-patterns.md +327 -0
  83. package/assets/skills/data-fetching.md +231 -0
  84. package/assets/skills/database-migrations.md +346 -0
  85. package/assets/skills/database-patterns.md +219 -0
  86. package/assets/skills/debugging.md +281 -0
  87. package/assets/skills/design-system.md +289 -0
  88. package/assets/skills/django-patterns.md +182 -0
  89. package/assets/skills/docker-patterns.md +235 -0
  90. package/assets/skills/e2e-testing.md +287 -0
  91. package/assets/skills/edge-computing.md +268 -0
  92. package/assets/skills/electron-patterns.md +266 -0
  93. package/assets/skills/email-templates.md +206 -0
  94. package/assets/skills/error-handling.md +265 -0
  95. package/assets/skills/event-driven.md +232 -0
  96. package/assets/skills/express-patterns.md +239 -0
  97. package/assets/skills/fastapi-patterns.md +198 -0
  98. package/assets/skills/feature-flags.md +212 -0
  99. package/assets/skills/figma-to-code.md +298 -0
  100. package/assets/skills/file-upload.md +228 -0
  101. package/assets/skills/forms-patterns.md +264 -0
  102. package/assets/skills/gcp-patterns.md +189 -0
  103. package/assets/skills/git-workflow.md +187 -0
  104. package/assets/skills/golang-patterns.md +185 -0
  105. package/assets/skills/graphql-patterns.md +244 -0
  106. package/assets/skills/i18n-patterns.md +172 -0
  107. package/assets/skills/image-processing.md +350 -0
  108. package/assets/skills/java-springboot.md +226 -0
  109. package/assets/skills/kotlin-patterns.md +207 -0
  110. package/assets/skills/kubernetes-patterns.md +326 -0
  111. package/assets/skills/laravel-patterns.md +261 -0
  112. package/assets/skills/llm-fine-tuning.md +335 -0
  113. package/assets/skills/load-testing.md +303 -0
  114. package/assets/skills/logging-observability.md +228 -0
  115. package/assets/skills/markdown-processing.md +318 -0
  116. package/assets/skills/mcp-server-patterns.md +292 -0
  117. package/assets/skills/microservices.md +272 -0
  118. package/assets/skills/migration-patterns.md +239 -0
  119. package/assets/skills/mongodb-patterns.md +189 -0
  120. package/assets/skills/monorepo-patterns.md +287 -0
  121. package/assets/skills/nextjs-app-router.md +237 -0
  122. package/assets/skills/notification-patterns.md +348 -0
  123. package/assets/skills/oauth-patterns.md +246 -0
  124. package/assets/skills/payment-integration.md +222 -0
  125. package/assets/skills/pdf-generation.md +307 -0
  126. package/assets/skills/performance-optimization.md +277 -0
  127. package/assets/skills/php-patterns.md +210 -0
  128. package/assets/skills/prisma-patterns.md +241 -0
  129. package/assets/skills/prompt-engineering.md +193 -0
  130. package/assets/skills/pwa-patterns.md +247 -0
  131. package/assets/skills/python-patterns.md +158 -0
  132. package/assets/skills/python-testing.md +172 -0
  133. package/assets/skills/queue-patterns.md +295 -0
  134. package/assets/skills/rag-patterns.md +159 -0
  135. package/assets/skills/rate-limiting.md +319 -0
  136. package/assets/skills/react-components.md +201 -0
  137. package/assets/skills/react-native-patterns.md +299 -0
  138. package/assets/skills/real-time-patterns.md +181 -0
  139. package/assets/skills/redis-patterns.md +188 -0
  140. package/assets/skills/refactoring.md +218 -0
  141. package/assets/skills/regex-patterns.md +191 -0
  142. package/assets/skills/remix-patterns.md +262 -0
  143. package/assets/skills/responsive-design.md +199 -0
  144. package/assets/skills/ruby-rails-patterns.md +178 -0
  145. package/assets/skills/rust-patterns.md +211 -0
  146. package/assets/skills/search-patterns.md +227 -0
  147. package/assets/skills/security-hardening.md +237 -0
  148. package/assets/skills/seo-patterns.md +179 -0
  149. package/assets/skills/serverless-patterns.md +223 -0
  150. package/assets/skills/sql-optimization.md +154 -0
  151. package/assets/skills/state-management.md +254 -0
  152. package/assets/skills/storybook-patterns.md +330 -0
  153. package/assets/skills/svelte-patterns.md +258 -0
  154. package/assets/skills/swift-patterns.md +227 -0
  155. package/assets/skills/tailwind-patterns.md +272 -0
  156. package/assets/skills/tdd-workflow.md +199 -0
  157. package/assets/skills/terraform-patterns.md +270 -0
  158. package/assets/skills/testing-react.md +240 -0
  159. package/assets/skills/testing-vitest.md +232 -0
  160. package/assets/skills/typescript-strict.md +159 -0
  161. package/assets/skills/video-processing.md +340 -0
  162. package/assets/skills/vue-patterns.md +247 -0
  163. package/assets/skills/web-workers.md +327 -0
  164. package/assets/skills/webhooks-patterns.md +283 -0
  165. package/assets/skills/websocket-patterns.md +306 -0
  166. package/dist/cli/index.js +941 -958
  167. package/dist/core/index.d.ts +341 -11
  168. package/dist/core.js +58 -95
  169. package/dist/mcp/index.d.ts +33 -1
  170. package/dist/mcp-server.js +177 -255
  171. package/package.json +4 -1
@@ -0,0 +1,387 @@
1
+ ---
2
+ name: agent-patterns
3
+ description: AI agent patterns for tool use, multi-step reasoning, memory management, planning, guardrails, and evaluation.
4
+ ---
5
+
6
+ # AI Agent Patterns
7
+
8
+ ## When to Use
9
+ Build AI agents when tasks require multi-step reasoning, tool use, and dynamic decision-making that cannot be achieved with a single prompt. Agents are appropriate for research tasks, code generation with verification, data analysis pipelines, and customer support automation. Use these patterns to structure tool definitions, implement planning loops, manage conversation memory, and apply safety guardrails. Start simple and add complexity only when needed.
10
+
11
+ ## How It Works
12
+
13
+ ### Tool Definition Pattern
14
+
15
+ ```typescript
16
+ // src/agent/tools.ts
17
+ export interface ToolDefinition {
18
+ name: string;
19
+ description: string;
20
+ parameters: Record<string, {
21
+ type: 'string' | 'number' | 'boolean' | 'array' | 'object';
22
+ description: string;
23
+ required?: boolean;
24
+ enum?: string[];
25
+ }>;
26
+ execute: (params: Record<string, unknown>) => Promise<ToolResult>;
27
+ }
28
+
29
+ export interface ToolResult {
30
+ output: string;
31
+ error?: string;
32
+ metadata?: Record<string, unknown>;
33
+ }
34
+
35
+ const tools: ToolDefinition[] = [
36
+ {
37
+ name: 'search_web',
38
+ description: 'Search the web for current information. Use when the user asks about recent events or needs data not in your training.',
39
+ parameters: {
40
+ query: { type: 'string', description: 'Search query', required: true },
41
+ maxResults: { type: 'number', description: 'Maximum results (1-10)', required: false },
42
+ },
43
+ execute: async ({ query, maxResults }) => {
44
+ const results = await searchAPI(query as string, (maxResults as number) ?? 5);
45
+ return { output: results.map((r: any) => `[${r.title}](${r.url}): ${r.snippet}`).join('\n') };
46
+ },
47
+ },
48
+ {
49
+ name: 'run_code',
50
+ description: 'Execute Python code in a sandboxed environment. Returns stdout and stderr.',
51
+ parameters: {
52
+ code: { type: 'string', description: 'Python code to execute', required: true },
53
+ timeout: { type: 'number', description: 'Timeout in seconds (max 30)', required: false },
54
+ },
55
+ execute: async ({ code, timeout }) => {
56
+ const result = await sandbox.execute(code as string, { timeout: Math.min((timeout as number) ?? 10, 30) });
57
+ return {
58
+ output: result.stdout || '(no output)',
59
+ error: result.stderr || undefined,
60
+ metadata: { exitCode: result.exitCode, executionTime: result.durationMs },
61
+ };
62
+ },
63
+ },
64
+ {
65
+ name: 'read_file',
66
+ description: 'Read the contents of a file. Use to examine code, configuration, or data files.',
67
+ parameters: {
68
+ path: { type: 'string', description: 'File path relative to workspace root', required: true },
69
+ },
70
+ execute: async ({ path: filePath }) => {
71
+ const content = await fs.readFile(filePath as string, 'utf-8');
72
+ return { output: content, metadata: { lines: content.split('\n').length } };
73
+ },
74
+ },
75
+ ];
76
+ ```
77
+
78
+ ### ReAct Loop (Reason + Act)
79
+
80
+ ```typescript
81
+ // src/agent/react-loop.ts
82
+ import Anthropic from '@anthropic-ai/sdk';
83
+
84
+ interface AgentState {
85
+ messages: Anthropic.MessageParam[];
86
+ toolResults: Map<string, ToolResult>;
87
+ stepCount: number;
88
+ maxSteps: number;
89
+ }
90
+
91
+ export async function runAgent(
92
+ client: Anthropic,
93
+ userMessage: string,
94
+ tools: ToolDefinition[],
95
+ maxSteps: number = 10
96
+ ): Promise<string> {
97
+ const state: AgentState = {
98
+ messages: [{ role: 'user', content: userMessage }],
99
+ toolResults: new Map(),
100
+ stepCount: 0,
101
+ maxSteps,
102
+ };
103
+
104
+ const anthropicTools: Anthropic.Tool[] = tools.map((t) => ({
105
+ name: t.name,
106
+ description: t.description,
107
+ input_schema: {
108
+ type: 'object' as const,
109
+ properties: Object.fromEntries(
110
+ Object.entries(t.parameters).map(([k, v]) => [k, { type: v.type, description: v.description }])
111
+ ),
112
+ required: Object.entries(t.parameters).filter(([, v]) => v.required).map(([k]) => k),
113
+ },
114
+ }));
115
+
116
+ while (state.stepCount < state.maxSteps) {
117
+ state.stepCount++;
118
+
119
+ const response = await client.messages.create({
120
+ model: 'claude-sonnet-4-20250514',
121
+ max_tokens: 4096,
122
+ system: AGENT_SYSTEM_PROMPT,
123
+ tools: anthropicTools,
124
+ messages: state.messages,
125
+ });
126
+
127
+ // Check if the model wants to use tools
128
+ if (response.stop_reason === 'tool_use') {
129
+ state.messages.push({ role: 'assistant', content: response.content });
130
+
131
+ const toolUseBlocks = response.content.filter(
132
+ (block): block is Anthropic.ToolUseBlock => block.type === 'tool_use'
133
+ );
134
+
135
+ const toolResults: Anthropic.ToolResultBlockParam[] = [];
136
+
137
+ for (const toolUse of toolUseBlocks) {
138
+ const tool = tools.find((t) => t.name === toolUse.name);
139
+ if (!tool) {
140
+ toolResults.push({
141
+ type: 'tool_result',
142
+ tool_use_id: toolUse.id,
143
+ content: `Error: Unknown tool "${toolUse.name}"`,
144
+ is_error: true,
145
+ });
146
+ continue;
147
+ }
148
+
149
+ try {
150
+ const result = await tool.execute(toolUse.input as Record<string, unknown>);
151
+ toolResults.push({
152
+ type: 'tool_result',
153
+ tool_use_id: toolUse.id,
154
+ content: result.error ? `Error: ${result.error}\n\nOutput: ${result.output}` : result.output,
155
+ is_error: !!result.error,
156
+ });
157
+ } catch (err) {
158
+ toolResults.push({
159
+ type: 'tool_result',
160
+ tool_use_id: toolUse.id,
161
+ content: `Execution error: ${(err as Error).message}`,
162
+ is_error: true,
163
+ });
164
+ }
165
+ }
166
+
167
+ state.messages.push({ role: 'user', content: toolResults });
168
+ continue;
169
+ }
170
+
171
+ // Model is done — extract final text response
172
+ const textBlocks = response.content.filter(
173
+ (block): block is Anthropic.TextBlock => block.type === 'text'
174
+ );
175
+ return textBlocks.map((b) => b.text).join('\n');
176
+ }
177
+
178
+ return 'Agent reached maximum step limit without completing the task.';
179
+ }
180
+ ```
181
+
182
+ ### Memory Management
183
+
184
+ ```typescript
185
+ // src/agent/memory.ts
186
+ interface MemoryEntry {
187
+ id: string;
188
+ content: string;
189
+ timestamp: Date;
190
+ importance: number; // 0-1
191
+ type: 'fact' | 'preference' | 'task_result' | 'error';
192
+ }
193
+
194
+ export class AgentMemory {
195
+ private shortTerm: Anthropic.MessageParam[] = [];
196
+ private longTerm: MemoryEntry[] = [];
197
+ private maxShortTermMessages = 20;
198
+ private maxLongTermEntries = 100;
199
+
200
+ addMessage(message: Anthropic.MessageParam) {
201
+ this.shortTerm.push(message);
202
+
203
+ // Trim old messages when exceeding limit
204
+ if (this.shortTerm.length > this.maxShortTermMessages) {
205
+ const removed = this.shortTerm.splice(1, 2); // keep system, remove oldest
206
+ this.summarizeToLongTerm(removed);
207
+ }
208
+ }
209
+
210
+ addFact(content: string, importance: number = 0.5) {
211
+ this.longTerm.push({
212
+ id: crypto.randomUUID(),
213
+ content,
214
+ timestamp: new Date(),
215
+ importance,
216
+ type: 'fact',
217
+ });
218
+
219
+ // Evict low-importance entries when full
220
+ if (this.longTerm.length > this.maxLongTermEntries) {
221
+ this.longTerm.sort((a, b) => b.importance - a.importance);
222
+ this.longTerm = this.longTerm.slice(0, this.maxLongTermEntries);
223
+ }
224
+ }
225
+
226
+ getContext(): string {
227
+ if (this.longTerm.length === 0) return '';
228
+
229
+ const relevant = this.longTerm
230
+ .sort((a, b) => b.importance - a.importance)
231
+ .slice(0, 10);
232
+
233
+ return `## Relevant Memory\n${relevant.map((e) => `- [${e.type}] ${e.content}`).join('\n')}`;
234
+ }
235
+
236
+ getMessages(): Anthropic.MessageParam[] {
237
+ return [...this.shortTerm];
238
+ }
239
+
240
+ private summarizeToLongTerm(messages: Anthropic.MessageParam[]) {
241
+ // Extract key facts from removed messages
242
+ for (const msg of messages) {
243
+ const content = typeof msg.content === 'string' ? msg.content : '';
244
+ if (content.length > 50) {
245
+ this.addFact(content.slice(0, 200), 0.3);
246
+ }
247
+ }
248
+ }
249
+ }
250
+ ```
251
+
252
+ ### Planning Pattern
253
+
254
+ ```typescript
255
+ // src/agent/planner.ts
256
+ interface Plan {
257
+ goal: string;
258
+ steps: PlanStep[];
259
+ currentStep: number;
260
+ status: 'planning' | 'executing' | 'completed' | 'failed';
261
+ }
262
+
263
+ interface PlanStep {
264
+ description: string;
265
+ tool?: string;
266
+ params?: Record<string, unknown>;
267
+ status: 'pending' | 'running' | 'done' | 'failed';
268
+ result?: string;
269
+ }
270
+
271
+ export async function createPlan(client: Anthropic, goal: string, availableTools: string[]): Promise<Plan> {
272
+ const response = await client.messages.create({
273
+ model: 'claude-sonnet-4-20250514',
274
+ max_tokens: 2048,
275
+ messages: [{
276
+ role: 'user',
277
+ content: `Create a step-by-step plan to accomplish this goal. Available tools: ${availableTools.join(', ')}
278
+
279
+ Goal: ${goal}
280
+
281
+ Respond with a JSON array of steps: [{"description": "...", "tool": "tool_name_or_null"}]`,
282
+ }],
283
+ });
284
+
285
+ const text = (response.content[0] as Anthropic.TextBlock).text;
286
+ const stepsJson = JSON.parse(text.match(/\[[\s\S]*\]/)?.[0] ?? '[]');
287
+
288
+ return {
289
+ goal,
290
+ steps: stepsJson.map((s: any) => ({ ...s, status: 'pending' as const })),
291
+ currentStep: 0,
292
+ status: 'executing',
293
+ };
294
+ }
295
+
296
+ export async function executePlan(plan: Plan, agent: typeof runAgent): Promise<Plan> {
297
+ for (let i = plan.currentStep; i < plan.steps.length; i++) {
298
+ const step = plan.steps[i];
299
+ step.status = 'running';
300
+ plan.currentStep = i;
301
+
302
+ try {
303
+ const result = await agent(
304
+ `Execute this step: ${step.description}\n\nContext from previous steps:\n${
305
+ plan.steps.slice(0, i).filter((s) => s.result).map((s) => `- ${s.description}: ${s.result}`).join('\n')
306
+ }`
307
+ );
308
+ step.result = result;
309
+ step.status = 'done';
310
+ } catch (err) {
311
+ step.status = 'failed';
312
+ step.result = (err as Error).message;
313
+ plan.status = 'failed';
314
+ return plan;
315
+ }
316
+ }
317
+
318
+ plan.status = 'completed';
319
+ return plan;
320
+ }
321
+ ```
322
+
323
+ ### Guardrails
324
+
325
+ ```typescript
326
+ // src/agent/guardrails.ts
327
+ interface GuardrailCheck {
328
+ name: string;
329
+ check: (input: string) => { safe: boolean; reason?: string };
330
+ }
331
+
332
+ const guardrails: GuardrailCheck[] = [
333
+ {
334
+ name: 'no-secrets',
335
+ check: (input) => {
336
+ const patterns = [/password\s*[:=]\s*\S+/i, /api[_-]?key\s*[:=]\s*\S+/i, /-----BEGIN.*KEY-----/];
337
+ const match = patterns.find((p) => p.test(input));
338
+ return match ? { safe: false, reason: 'Output contains potential secrets' } : { safe: true };
339
+ },
340
+ },
341
+ {
342
+ name: 'no-dangerous-commands',
343
+ check: (input) => {
344
+ const dangerous = ['rm -rf /', 'DROP TABLE', 'FORMAT C:', 'sudo rm', ':(){:|:&};:'];
345
+ const found = dangerous.find((cmd) => input.includes(cmd));
346
+ return found ? { safe: false, reason: `Dangerous command detected: ${found}` } : { safe: true };
347
+ },
348
+ },
349
+ {
350
+ name: 'output-length',
351
+ check: (input) => {
352
+ return input.length > 50_000
353
+ ? { safe: false, reason: 'Output exceeds maximum length' }
354
+ : { safe: true };
355
+ },
356
+ },
357
+ ];
358
+
359
+ export function applyGuardrails(output: string): { safe: boolean; violations: string[] } {
360
+ const violations: string[] = [];
361
+ for (const guardrail of guardrails) {
362
+ const result = guardrail.check(output);
363
+ if (!result.safe) violations.push(`${guardrail.name}: ${result.reason}`);
364
+ }
365
+ return { safe: violations.length === 0, violations };
366
+ }
367
+ ```
368
+
369
+ ## Examples
370
+
371
+ | Pattern | When to Use | Complexity |
372
+ |---------|-------------|------------|
373
+ | Single tool call | FAQ lookup, calculations | Low |
374
+ | ReAct loop | Research, multi-step tasks | Medium |
375
+ | Plan-then-execute | Complex workflows with dependencies | High |
376
+ | Memory + planning | Long-running sessions, personalization | High |
377
+ | Multi-agent | Specialized subtasks (research + code + review) | Very high |
378
+
379
+ ## Checklist
380
+ - [ ] Tools have clear descriptions explaining when and why to use them
381
+ - [ ] ReAct loop has a maximum step limit to prevent infinite loops
382
+ - [ ] Tool execution errors caught and returned as structured results
383
+ - [ ] Memory manager trims conversation history to stay within context window
384
+ - [ ] Long-term memory stores key facts with importance scoring
385
+ - [ ] Guardrails check for secrets, dangerous commands, and output length
386
+ - [ ] Agent evaluation suite measures task completion rate and quality
387
+ - [ ] Planning step validates feasibility before execution begins
@@ -0,0 +1,263 @@
1
+ ---
2
+ name: ai-integration
3
+ description: AI/LLM integration patterns for streaming, function calling, token management, fallbacks, and prompt caching.
4
+ ---
5
+
6
+ # AI Integration
7
+
8
+ ## When to Use
9
+ Apply these patterns when integrating LLM APIs into production applications. Use streaming for real-time UX, function calling for structured outputs, token management to control costs, fallbacks for reliability, and prompt caching to reduce latency and spend on repeated context.
10
+
11
+ ## How It Works
12
+
13
+ ### Streaming Responses
14
+
15
+ Stream tokens as they arrive to improve perceived latency:
16
+
17
+ ```typescript
18
+ import Anthropic from "@anthropic-ai/sdk";
19
+
20
+ const client = new Anthropic();
21
+
22
+ async function streamChat(userMessage: string): Promise<string> {
23
+ const chunks: string[] = [];
24
+
25
+ const stream = client.messages.stream({
26
+ model: "claude-sonnet-4-20250514",
27
+ max_tokens: 1024,
28
+ messages: [{ role: "user", content: userMessage }],
29
+ });
30
+
31
+ for await (const event of stream) {
32
+ if (
33
+ event.type === "content_block_delta" &&
34
+ event.delta.type === "text_delta"
35
+ ) {
36
+ chunks.push(event.delta.text);
37
+ process.stdout.write(event.delta.text);
38
+ }
39
+ }
40
+ return chunks.join("");
41
+ }
42
+ ```
43
+
44
+ Server-Sent Events endpoint for web clients:
45
+
46
+ ```typescript
47
+ app.post("/api/chat", async (req, res) => {
48
+ res.setHeader("Content-Type", "text/event-stream");
49
+ res.setHeader("Cache-Control", "no-cache");
50
+ res.setHeader("Connection", "keep-alive");
51
+
52
+ const stream = client.messages.stream({
53
+ model: "claude-sonnet-4-20250514",
54
+ max_tokens: 1024,
55
+ messages: req.body.messages,
56
+ });
57
+
58
+ for await (const event of stream) {
59
+ if (
60
+ event.type === "content_block_delta" &&
61
+ event.delta.type === "text_delta"
62
+ ) {
63
+ res.write(`data: ${JSON.stringify({ text: event.delta.text })}\n\n`);
64
+ }
65
+ }
66
+ res.write("data: [DONE]\n\n");
67
+ res.end();
68
+ });
69
+ ```
70
+
71
+ ### Function Calling with Structured Output
72
+
73
+ Define tools so the model returns structured JSON you can dispatch:
74
+
75
+ ```typescript
76
+ import OpenAI from "openai";
77
+
78
+ const openai = new OpenAI();
79
+
80
+ const tools: OpenAI.ChatCompletionTool[] = [
81
+ {
82
+ type: "function",
83
+ function: {
84
+ name: "get_weather",
85
+ description: "Get current weather for a location",
86
+ parameters: {
87
+ type: "object",
88
+ properties: {
89
+ location: { type: "string", description: "City name" },
90
+ units: { type: "string", enum: ["celsius", "fahrenheit"] },
91
+ },
92
+ required: ["location"],
93
+ },
94
+ },
95
+ },
96
+ ];
97
+
98
+ const response = await openai.chat.completions.create({
99
+ model: "gpt-4o",
100
+ messages: [{ role: "user", content: "Weather in Paris?" }],
101
+ tools,
102
+ tool_choice: "auto",
103
+ });
104
+
105
+ const toolCall = response.choices[0].message.tool_calls?.[0];
106
+ if (toolCall) {
107
+ const args = JSON.parse(toolCall.function.arguments);
108
+ const result = await getWeather(args.location, args.units);
109
+ // Send tool result back for the model to summarize
110
+ }
111
+ ```
112
+
113
+ ### Token Management
114
+
115
+ Count tokens before sending to avoid exceeding limits:
116
+
117
+ ```typescript
118
+ import { encoding_for_model } from "tiktoken";
119
+
120
+ function countTokens(text: string, model = "gpt-4o"): number {
121
+ const enc = encoding_for_model(model as Parameters<typeof encoding_for_model>[0]);
122
+ const tokens = enc.encode(text);
123
+ enc.free();
124
+ return tokens.length;
125
+ }
126
+
127
+ interface Message {
128
+ role: string;
129
+ content: string;
130
+ }
131
+
132
+ function truncateToFit(messages: Message[], maxTokens: number): Message[] {
133
+ let total = 0;
134
+ const kept: Message[] = [];
135
+ const system = messages.find((m) => m.role === "system");
136
+ if (system) {
137
+ total += countTokens(system.content);
138
+ kept.push(system);
139
+ }
140
+ for (const msg of [...messages].reverse()) {
141
+ if (msg.role === "system") continue;
142
+ const cost = countTokens(msg.content);
143
+ if (total + cost > maxTokens) break;
144
+ total += cost;
145
+ kept.unshift(msg);
146
+ }
147
+ return kept;
148
+ }
149
+ ```
150
+
151
+ ### Provider Fallback Chain
152
+
153
+ Retry across providers when one fails:
154
+
155
+ ```typescript
156
+ interface LLMProvider {
157
+ name: string;
158
+ call: (messages: Message[]) => Promise<string>;
159
+ }
160
+
161
+ async function callWithFallback(
162
+ providers: LLMProvider[],
163
+ messages: Message[],
164
+ maxRetries = 2
165
+ ): Promise<string> {
166
+ for (const provider of providers) {
167
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
168
+ try {
169
+ return await provider.call(messages);
170
+ } catch (err: any) {
171
+ const isRateLimit = err.status === 429;
172
+ if (isRateLimit && attempt < maxRetries - 1) {
173
+ await new Promise((r) => setTimeout(r, Math.pow(2, attempt) * 1000));
174
+ continue;
175
+ }
176
+ console.warn(`${provider.name} attempt ${attempt + 1} failed:`, err.message);
177
+ break;
178
+ }
179
+ }
180
+ }
181
+ throw new Error("All LLM providers exhausted");
182
+ }
183
+ ```
184
+
185
+ ### Prompt Caching with Anthropic
186
+
187
+ Use cache_control to avoid re-processing large system prompts:
188
+
189
+ ```typescript
190
+ const anthropic = new Anthropic();
191
+
192
+ const response = await anthropic.messages.create({
193
+ model: "claude-sonnet-4-20250514",
194
+ max_tokens: 1024,
195
+ system: [
196
+ {
197
+ type: "text",
198
+ text: largeSystemPrompt, // 10k+ tokens of instructions
199
+ cache_control: { type: "ephemeral" },
200
+ },
201
+ ],
202
+ messages: [{ role: "user", content: "Summarize the key points." }],
203
+ });
204
+ // cache_read_input_tokens shows tokens served from cache on subsequent calls
205
+ console.log("Cache hit tokens:", response.usage.cache_read_input_tokens);
206
+ ```
207
+
208
+ ### Cost Tracking and Rate Limiting
209
+
210
+ ```typescript
211
+ interface UsageRecord {
212
+ model: string;
213
+ inputTokens: number;
214
+ outputTokens: number;
215
+ costUsd: number;
216
+ userId: string;
217
+ timestamp: Date;
218
+ }
219
+
220
+ const COST_PER_1K: Record<string, { input: number; output: number }> = {
221
+ "claude-sonnet-4-20250514": { input: 0.003, output: 0.015 },
222
+ "gpt-4o": { input: 0.0025, output: 0.01 },
223
+ "gpt-4o-mini": { input: 0.00015, output: 0.0006 },
224
+ };
225
+
226
+ function trackUsage(
227
+ model: string,
228
+ inputTokens: number,
229
+ outputTokens: number,
230
+ userId: string
231
+ ): UsageRecord {
232
+ const rates = COST_PER_1K[model] ?? { input: 0, output: 0 };
233
+ return {
234
+ model,
235
+ inputTokens,
236
+ outputTokens,
237
+ costUsd: (inputTokens * rates.input + outputTokens * rates.output) / 1000,
238
+ userId,
239
+ timestamp: new Date(),
240
+ };
241
+ }
242
+ ```
243
+
244
+ ## Examples
245
+
246
+ | Pattern | When | Result |
247
+ |---------|------|--------|
248
+ | Streaming SSE | Chat UI, CLI output | First token in < 300ms, progressive display |
249
+ | Function calling | Data extraction, tool use | Structured JSON, no regex parsing needed |
250
+ | Token truncation | Long conversations | Stays within context window, drops oldest first |
251
+ | Provider fallback | Production reliability | 99.9% uptime across Anthropic + OpenAI |
252
+ | Prompt caching | Repeated system prompts | 90% cost reduction on cached prefix tokens |
253
+ | Cost tracking | Budget control | Per-user cost visibility, alerts at thresholds |
254
+
255
+ ## Checklist
256
+ - [ ] Streaming enabled for all user-facing chat responses
257
+ - [ ] Function calling schemas validated with Zod or JSON Schema
258
+ - [ ] Token counting runs before every API call to prevent context overflow
259
+ - [ ] Fallback chain configured with at least two providers
260
+ - [ ] Prompt caching enabled for system prompts over 1024 tokens
261
+ - [ ] Rate limiter in place for all LLM API calls
262
+ - [ ] Error handling covers 429, 500, 503, and timeout scenarios
263
+ - [ ] Usage metrics logged per request (tokens in, tokens out, latency, cost)