groundswell 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/.claude/settings.local.json +9 -0
  2. package/.claude/system_prompts/task-breakdown.md +100 -0
  3. package/PRPs/001-hierarchical-workflow-engine.md +2438 -0
  4. package/PRPs/PRDs/001-hierarchical-workflow-engine.md +543 -0
  5. package/PRPs/PRDs/002-agent-prompt.md +390 -0
  6. package/PRPs/PRDs/003-agent-prompt.md +943 -0
  7. package/PRPs/PRDs/004-agent-prompt.md +1136 -0
  8. package/PRPs/PRDs/tasks-001.json +492 -0
  9. package/PRPs/README.md +83 -0
  10. package/PRPs/templates/prp_base.md +222 -0
  11. package/README.md +218 -0
  12. package/docs/agent.md +422 -0
  13. package/docs/prompt.md +419 -0
  14. package/docs/workflow.md +600 -0
  15. package/examples/README.md +244 -0
  16. package/examples/examples/01-basic-workflow.ts +100 -0
  17. package/examples/examples/02-decorator-options.ts +217 -0
  18. package/examples/examples/03-parent-child.ts +241 -0
  19. package/examples/examples/04-observers-debugger.ts +340 -0
  20. package/examples/examples/05-error-handling.ts +387 -0
  21. package/examples/examples/06-concurrent-tasks.ts +352 -0
  22. package/examples/examples/07-agent-loops.ts +432 -0
  23. package/examples/examples/08-sdk-features.ts +667 -0
  24. package/examples/examples/09-reflection.ts +573 -0
  25. package/examples/examples/10-introspection.ts +550 -0
  26. package/examples/index.ts +143 -0
  27. package/examples/utils/helpers.ts +57 -0
  28. package/llms_full.txt +5890 -0
  29. package/package.json +63 -0
  30. package/plan/P1P2/PRP.md +527 -0
  31. package/plan/P1P2/research/LRU_CACHE_BEST_PRACTICES.md +1929 -0
  32. package/plan/P1P2/research/LRU_CACHE_CODE_PATTERNS.md +857 -0
  33. package/plan/P1P2/research/LRU_CACHE_INTEGRATION_GUIDE.md +738 -0
  34. package/plan/P1P2/research/LRU_CACHE_RESEARCH_INDEX.md +424 -0
  35. package/plan/P1P2/research/REFLECTION_INDEX.md +291 -0
  36. package/plan/P1P2/research/REFLECTION_RESEARCH_REPORT.md +1342 -0
  37. package/plan/P1P2/research/RESEARCH_SUMMARY.md +342 -0
  38. package/plan/P1P2/research/anthropic-sdk.md +174 -0
  39. package/plan/P1P2/research/async-local-storage.md +200 -0
  40. package/plan/P1P2/research/reflection-code-patterns.md +1205 -0
  41. package/plan/P1P2/research/reflection-decision-matrix.md +421 -0
  42. package/plan/P1P2/research/reflection-implementation-guide.md +1341 -0
  43. package/plan/P1P2/research/reflection-integration-guide.md +834 -0
  44. package/plan/P1P2/research/reflection-patterns.md +1468 -0
  45. package/plan/P1P2/research/reflection-quick-reference.md +558 -0
  46. package/plan/P1P2/research/zod-schema.md +152 -0
  47. package/plan/P3P4/PRP.md +1388 -0
  48. package/plan/P3P4/research/caching-lru.md +116 -0
  49. package/plan/P3P4/research/introspection-tools.md +177 -0
  50. package/plan/P3P4/research/reflection-patterns.md +117 -0
  51. package/plan/P4P5/PRP.md +1136 -0
  52. package/plan/P4P5/research/RESEARCH_SUMMARY.md +151 -0
  53. package/plan/architecture/external_deps.md +358 -0
  54. package/plan/architecture/system_context.md +242 -0
  55. package/plan/backlog.json +867 -0
  56. package/plan/research/INTROSPECTION_RESEARCH_SUMMARY.md +378 -0
  57. package/plan/research/README-INTROSPECTION.md +352 -0
  58. package/plan/research/agent-introspection-patterns.md +1085 -0
  59. package/plan/research/introspection-security-guide.md +928 -0
  60. package/plan/research/introspection-tool-examples.md +875 -0
  61. package/scripts/generate-llms-full.ts +206 -0
  62. package/src/__tests__/integration/agent-workflow.test.ts +256 -0
  63. package/src/__tests__/integration/tree-mirroring.test.ts +114 -0
  64. package/src/__tests__/unit/agent.test.ts +169 -0
  65. package/src/__tests__/unit/cache-key.test.ts +182 -0
  66. package/src/__tests__/unit/cache.test.ts +172 -0
  67. package/src/__tests__/unit/context.test.ts +138 -0
  68. package/src/__tests__/unit/decorators.test.ts +100 -0
  69. package/src/__tests__/unit/introspection-tools.test.ts +277 -0
  70. package/src/__tests__/unit/prompt.test.ts +135 -0
  71. package/src/__tests__/unit/reflection.test.ts +210 -0
  72. package/src/__tests__/unit/tree-debugger.test.ts +85 -0
  73. package/src/__tests__/unit/workflow.test.ts +81 -0
  74. package/src/cache/cache-key.ts +244 -0
  75. package/src/cache/cache.ts +236 -0
  76. package/src/cache/index.ts +8 -0
  77. package/src/core/agent.ts +573 -0
  78. package/src/core/context.ts +119 -0
  79. package/src/core/event-tree.ts +260 -0
  80. package/src/core/factory.ts +123 -0
  81. package/src/core/index.ts +17 -0
  82. package/src/core/logger.ts +87 -0
  83. package/src/core/mcp-handler.ts +184 -0
  84. package/src/core/prompt.ts +150 -0
  85. package/src/core/workflow-context.ts +349 -0
  86. package/src/core/workflow.ts +302 -0
  87. package/src/debugger/index.ts +1 -0
  88. package/src/debugger/tree-debugger.ts +210 -0
  89. package/src/decorators/index.ts +3 -0
  90. package/src/decorators/observed-state.ts +95 -0
  91. package/src/decorators/step.ts +139 -0
  92. package/src/decorators/task.ts +96 -0
  93. package/src/examples/index.ts +2 -0
  94. package/src/examples/tdd-orchestrator.ts +65 -0
  95. package/src/examples/test-cycle-workflow.ts +64 -0
  96. package/src/index.ts +140 -0
  97. package/src/reflection/index.ts +5 -0
  98. package/src/reflection/reflection.ts +407 -0
  99. package/src/tools/index.ts +36 -0
  100. package/src/tools/introspection.ts +464 -0
  101. package/src/types/agent.ts +90 -0
  102. package/src/types/decorators.ts +25 -0
  103. package/src/types/error-strategy.ts +13 -0
  104. package/src/types/error.ts +20 -0
  105. package/src/types/events.ts +74 -0
  106. package/src/types/index.ts +55 -0
  107. package/src/types/logging.ts +24 -0
  108. package/src/types/observer.ts +18 -0
  109. package/src/types/prompt.ts +40 -0
  110. package/src/types/reflection.ts +117 -0
  111. package/src/types/sdk-primitives.ts +128 -0
  112. package/src/types/snapshot.ts +14 -0
  113. package/src/types/workflow-context.ts +163 -0
  114. package/src/types/workflow.ts +37 -0
  115. package/src/utils/id.ts +11 -0
  116. package/src/utils/index.ts +3 -0
  117. package/src/utils/observable.ts +77 -0
  118. package/tasks.json +0 -0
  119. package/tsconfig.json +22 -0
  120. package/vitest.config.ts +16 -0
@@ -0,0 +1,738 @@
1
+ # LRU Cache Integration Guide for Groundswell
2
+
3
+ **Framework Integration Patterns and Decision Matrix**
4
+
5
+ ---
6
+
7
+ ## Table of Contents
8
+
9
+ 1. [Quick Decision Matrix](#quick-decision-matrix)
10
+ 2. [Integration Strategies](#integration-strategies)
11
+ 3. [Framework-Specific Integration](#framework-specific-integration)
12
+ 4. [Migration Path](#migration-path)
13
+ 5. [Troubleshooting](#troubleshooting)
14
+
15
+ ---
16
+
17
+ ## Quick Decision Matrix
18
+
19
+ ### Choose Your Caching Strategy
20
+
21
+ | Requirement | Exact Cache | Semantic Cache | Hybrid | Comment |
22
+ |-------------|------------|----------------|--------|---------|
23
+ | **Hit Rate** | 30-40% | 60-70% | 65-75% | Semantic better for paraphrased queries |
24
+ | **Latency** | <1ms | 10-50ms | 5-30ms | Exact fastest, semantic requires embedding |
25
+ | **Memory** | Low | High | Medium | Semantic stores embeddings |
26
+ | **Setup** | Easy | Complex | Medium | Semantic needs embedding model |
27
+ | **Use Case** | Repeated identical queries | Similar meaning queries | Production | Choose based on user patterns |
28
+
29
+ ### Deployment Environment
30
+
31
+ | Environment | Recommendation | Config |
32
+ |-------------|-----------------|--------|
33
+ | **Development** | Single-process in-memory | max: 100, ttl: 10min |
34
+ | **Staging** | Single-process with monitoring | max: 1000, ttl: 1hr |
35
+ | **Production (Single Node)** | In-memory with persistence | max: 5000, maxSize: 500MB |
36
+ | **Production (Multi-Node)** | Redis + in-memory L1 | L1: max 1000, L2: Redis |
37
+ | **High-Load** | Redis + semantic cache | Consider GPTCache library |
38
+
39
+ ### Package Selection
40
+
41
+ | Package | When to Use | Performance | Notes |
42
+ |---------|------------|-------------|-------|
43
+ | **lru-cache** | v10+ for all cases | Excellent | Built-in types, recommended |
44
+ | **safe-stable-stringify** | Production LLM cache | Fast (~13k ops/sec) | Handles circular refs, zero deps |
45
+ | **fast-json-stable-stringify** | Performance-critical | Fastest (~17k ops/sec) | No circular ref support |
46
+ | **Redis (node-redis)** | Multi-node deployment | Depends on network | Shared cache across processes |
47
+ | **SQLite** | Persistent cache | Slow but durable | Good for offline testing |
48
+
49
+ ---
50
+
51
+ ## Integration Strategies
52
+
53
+ ### Strategy 1: Minimal Integration (Recommended for MVP)
54
+
55
+ **When:** Simple LLM query caching, single-node deployment, starting out
56
+
57
+ **Implementation:**
58
+
59
+ ```typescript
60
+ // src/services/llm-cache.ts
61
+ import { LRUCache } from 'lru-cache';
62
+ import { createHash } from 'node:crypto';
63
+ import safeStringify from 'safe-stable-stringify';
64
+
65
+ export class LLMCacheService {
66
+ private static instance: LLMCacheService;
67
+ private cache: LRUCache<string, any>;
68
+
69
+ private constructor() {
70
+ this.cache = new LRUCache({
71
+ max: 5000,
72
+ ttl: 24 * 3600 * 1000,
73
+ maxSize: 100 * 1024 * 1024 // 100 MB
74
+ });
75
+ }
76
+
77
+ static getInstance(): LLMCacheService {
78
+ if (!LLMCacheService.instance) {
79
+ LLMCacheService.instance = new LLMCacheService();
80
+ }
81
+ return LLMCacheService.instance;
82
+ }
83
+
84
+ async get<T>(key: string, fetcher: () => Promise<T>): Promise<T> {
85
+ return this.cache.fetch(key, fetcher);
86
+ }
87
+
88
+ private generateKey(input: any): string {
89
+ const normalized = safeStringify(input);
90
+ return createHash('sha256').update(normalized).digest('hex');
91
+ }
92
+ }
93
+ ```
94
+
95
+ **Usage in workflow:**
96
+
97
+ ```typescript
98
+ // src/workflows/llm-query.workflow.ts
99
+ import { LLMCacheService } from '../services/llm-cache';
100
+
101
+ export class LLMQueryWorkflow {
102
+ private cacheService = LLMCacheService.getInstance();
103
+
104
+ async executeQuery(prompt: string, model: string): Promise<string> {
105
+ const cacheKey = this.generateKey(prompt, model);
106
+
107
+ return this.cacheService.get(
108
+ cacheKey,
109
+ async () => {
110
+ // Only called on cache miss
111
+ return this.callLLM(prompt, model);
112
+ }
113
+ );
114
+ }
115
+
116
+ private generateKey(prompt: string, model: string): string {
117
+ return createHash('sha256')
118
+ .update(JSON.stringify({ prompt, model }))
119
+ .digest('hex');
120
+ }
121
+
122
+ private async callLLM(prompt: string, model: string): Promise<string> {
123
+ // Your LLM API call
124
+ return 'response...';
125
+ }
126
+ }
127
+ ```
128
+
129
+ **Pros:**
130
+ - Simple, minimal dependencies
131
+ - Easy to test
132
+ - Singleton pattern ensures single cache instance
133
+
134
+ **Cons:**
135
+ - Only works with single process
136
+ - No distributed caching
137
+ - Manual key generation in each workflow
138
+
139
+ ---
140
+
141
+ ### Strategy 2: Service-Based Integration (Recommended for Production)
142
+
143
+ **When:** Complex caching needs, multiple workflows, monitoring required
144
+
145
+ **Implementation:**
146
+
147
+ ```typescript
148
+ // src/cache/cache.config.ts
149
+ import { LRUCache } from 'lru-cache';
150
+
151
+ export interface CacheOptions {
152
+ maxItems?: number;
153
+ maxSizeMB?: number;
154
+ ttlHours?: number;
155
+ enableMetrics?: boolean;
156
+ }
157
+
158
+ export const getCacheConfig = (env: string): CacheOptions => {
159
+ switch (env) {
160
+ case 'development':
161
+ return { maxItems: 100, maxSizeMB: 50, ttlHours: 1 };
162
+ case 'staging':
163
+ return { maxItems: 1000, maxSizeMB: 200, ttlHours: 4 };
164
+ case 'production':
165
+ return { maxItems: 5000, maxSizeMB: 500, ttlHours: 24, enableMetrics: true };
166
+ default:
167
+ return { maxItems: 500, maxSizeMB: 100, ttlHours: 2 };
168
+ }
169
+ };
170
+
171
+ // src/cache/cache.service.ts
172
+ import { createHash } from 'node:crypto';
173
+ import safeStringify from 'safe-stable-stringify';
174
+
175
+ export class CacheService {
176
+ private cache: LRUCache<string, any>;
177
+ private metrics = {
178
+ hits: 0,
179
+ misses: 0,
180
+ evictions: 0,
181
+ latencies: [] as number[]
182
+ };
183
+
184
+ constructor(options: CacheOptions) {
185
+ this.cache = new LRUCache({
186
+ max: options.maxItems || 1000,
187
+ maxSize: (options.maxSizeMB || 100) * 1024 * 1024,
188
+ sizeCalculation: (val) => {
189
+ const json = JSON.stringify(val);
190
+ return Buffer.byteLength(json, 'utf8') + 100;
191
+ },
192
+ ttl: (options.ttlHours || 24) * 3600 * 1000,
193
+ updateAgeOnGet: true,
194
+
195
+ dispose: (value, key, reason) => {
196
+ if (reason === 'evict' && options.enableMetrics) {
197
+ this.metrics.evictions++;
198
+ }
199
+ }
200
+ });
201
+ }
202
+
203
+ async fetch<T>(
204
+ input: Record<string, any>,
205
+ fetcher: () => Promise<T>
206
+ ): Promise<T> {
207
+ const start = performance.now();
208
+ const key = this.generateKey(input);
209
+
210
+ try {
211
+ const result = await this.cache.fetch(key, fetcher);
212
+ this.recordHit(performance.now() - start);
213
+ return result;
214
+ } catch (error) {
215
+ this.recordMiss(performance.now() - start);
216
+ throw error;
217
+ }
218
+ }
219
+
220
+ private generateKey(input: Record<string, any>): string {
221
+ const normalized = safeStringify(input);
222
+ const hash = createHash('sha256');
223
+ hash.update(normalized);
224
+ return hash.digest('hex');
225
+ }
226
+
227
+ private recordHit(latency: number): void {
228
+ this.metrics.hits++;
229
+ this.recordLatency(latency);
230
+ }
231
+
232
+ private recordMiss(latency: number): void {
233
+ this.metrics.misses++;
234
+ this.recordLatency(latency);
235
+ }
236
+
237
+ private recordLatency(latency: number): void {
238
+ this.metrics.latencies.push(latency);
239
+ if (this.metrics.latencies.length > 10000) {
240
+ this.metrics.latencies.shift();
241
+ }
242
+ }
243
+
244
+ getMetrics() {
245
+ const total = this.metrics.hits + this.metrics.misses;
246
+ const avgLatency = this.metrics.latencies.length > 0
247
+ ? this.metrics.latencies.reduce((a, b) => a + b, 0) / this.metrics.latencies.length
248
+ : 0;
249
+
250
+ return {
251
+ hits: this.metrics.hits,
252
+ misses: this.metrics.misses,
253
+ hitRate: total > 0 ? ((this.metrics.hits / total) * 100).toFixed(2) + '%' : '0%',
254
+ avgLatency: avgLatency.toFixed(3) + ' ms',
255
+ cacheSize: this.cache.size,
256
+ evictions: this.metrics.evictions
257
+ };
258
+ }
259
+
260
+ clear(): void {
261
+ this.cache.clear();
262
+ }
263
+ }
264
+ ```
265
+
266
+ **Usage in dependency injection:**
267
+
268
+ ```typescript
269
+ // src/index.ts (main entry point)
270
+ import { CacheService } from './cache/cache.service';
271
+ import { getCacheConfig } from './cache/cache.config';
272
+
273
+ // Setup
274
+ const env = process.env.NODE_ENV || 'development';
275
+ const cacheConfig = getCacheConfig(env);
276
+ const cacheService = new CacheService(cacheConfig);
277
+
278
+ // Export for dependency injection
279
+ export { cacheService };
280
+
281
+ // src/workflows/llm-query.workflow.ts
282
+ import { cacheService } from '../index';
283
+
284
+ export class LLMQueryWorkflow {
285
+ constructor(private cache = cacheService) {}
286
+
287
+ async query(input: any): Promise<string> {
288
+ return this.cache.fetch(
289
+ input,
290
+ async () => this.callLLMAPI(input)
291
+ );
292
+ }
293
+
294
+ private async callLLMAPI(input: any): Promise<string> {
295
+ // Implementation
296
+ return 'response...';
297
+ }
298
+ }
299
+ ```
300
+
301
+ **Pros:**
302
+ - Centralized cache configuration
303
+ - Easy to swap implementations (Redis, etc.)
304
+ - Metrics built-in
305
+ - Testable (can inject mock)
306
+
307
+ **Cons:**
308
+ - More boilerplate
309
+ - Slightly more complex setup
310
+
311
+ ---
312
+
313
+ ### Strategy 3: Multi-Layer Caching (For High Performance)
314
+
315
+ **When:** Multi-node deployment, critical performance requirements
316
+
317
+ **Implementation:**
318
+
319
+ ```typescript
320
+ // src/cache/multi-layer.cache.ts
321
+ import { LRUCache } from 'lru-cache';
322
+ import { createClient, RedisClientType } from 'redis';
323
+
324
+ export class MultiLayerCache {
325
+ private l1: LRUCache<string, any>; // In-process
326
+ private l2: RedisClientType | null = null; // Redis
327
+
328
+ constructor(
329
+ l1Config: any,
330
+ redisUrl?: string
331
+ ) {
332
+ this.l1 = new LRUCache(l1Config);
333
+
334
+ // Optional Redis
335
+ if (redisUrl) {
336
+ this.initRedis(redisUrl);
337
+ }
338
+ }
339
+
340
+ private async initRedis(url: string): Promise<void> {
341
+ this.l2 = createClient({ url });
342
+ await this.l2.connect();
343
+ }
344
+
345
+ async fetch<T>(
346
+ key: string,
347
+ fetcher: () => Promise<T>,
348
+ options: { ttl?: number } = {}
349
+ ): Promise<T> {
350
+ // L1: In-process (nanoseconds)
351
+ const l1Hit = this.l1.get(key);
352
+ if (l1Hit !== undefined) {
353
+ return l1Hit;
354
+ }
355
+
356
+ // L2: Redis (milliseconds)
357
+ if (this.l2) {
358
+ try {
359
+ const l2Data = await this.l2.get(key);
360
+ if (l2Data) {
361
+ const value = JSON.parse(l2Data);
362
+ this.l1.set(key, value); // Promote to L1
363
+ return value;
364
+ }
365
+ } catch (error) {
366
+ console.error('L2 cache error:', error);
367
+ // Fall through to fetcher
368
+ }
369
+ }
370
+
371
+ // Cache miss - fetch and store
372
+ const value = await fetcher();
373
+
374
+ this.l1.set(key, value);
375
+ if (this.l2) {
376
+ try {
377
+ const ttl = options.ttl || 24 * 3600;
378
+ await this.l2.setEx(
379
+ key,
380
+ Math.floor(ttl / 1000),
381
+ JSON.stringify(value)
382
+ );
383
+ } catch (error) {
384
+ console.error('Failed to store in L2:', error);
385
+ }
386
+ }
387
+
388
+ return value;
389
+ }
390
+
391
+ async disconnect(): Promise<void> {
392
+ if (this.l2) {
393
+ await this.l2.disconnect();
394
+ }
395
+ }
396
+ }
397
+ ```
398
+
399
+ **Usage:**
400
+
401
+ ```typescript
402
+ // src/index.ts
403
+ const cache = new MultiLayerCache(
404
+ {
405
+ max: 1000,
406
+ maxSize: 100 * 1024 * 1024
407
+ },
408
+ process.env.REDIS_URL // Optional
409
+ );
410
+
411
+ export { cache };
412
+ ```
413
+
414
+ **Pros:**
415
+ - Fast L1 lookups
416
+ - Distributed caching with L2
417
+ - Transparent failover
418
+
419
+ **Cons:**
420
+ - Redis dependency
421
+ - Additional latency on L1 miss
422
+ - Network overhead for L2
423
+
424
+ ---
425
+
426
+ ## Framework-Specific Integration
427
+
428
+ ### Integration with Groundswell Workflow Engine
429
+
430
+ ```typescript
431
+ // src/plugins/cache.plugin.ts
432
+ import { WorkflowPlugin } from '@groundswell/core';
433
+ import { CacheService } from '../cache/cache.service';
434
+
435
+ export class CachePlugin implements WorkflowPlugin {
436
+ private cache: CacheService;
437
+
438
+ constructor(cacheService: CacheService) {
439
+ this.cache = cacheService;
440
+ }
441
+
442
+ async onTaskStart(context: any): Promise<void> {
443
+ context.cache = this.cache;
444
+ }
445
+
446
+ async onTaskComplete(context: any): Promise<void> {
447
+ // Optional: log metrics
448
+ if (context.taskName === 'llm-query') {
449
+ console.log('[LLM Cache]', this.cache.getMetrics());
450
+ }
451
+ }
452
+ }
453
+
454
+ // src/index.ts
455
+ import { createWorkflow } from '@groundswell/core';
456
+ import { CachePlugin } from './plugins/cache.plugin';
457
+
458
+ const workflow = createWorkflow()
459
+ .use(new CachePlugin(cacheService))
460
+ .define(/* ... */);
461
+ ```
462
+
463
+ ### Decorator-Based Integration
464
+
465
+ ```typescript
466
+ // src/decorators/cacheable.ts
467
+ import { CacheService } from '../cache/cache.service';
468
+
469
+ export function Cacheable(options: { ttlHours?: number } = {}) {
470
+ return function (
471
+ target: any,
472
+ propertyKey: string,
473
+ descriptor: PropertyDescriptor
474
+ ) {
475
+ const originalMethod = descriptor.value;
476
+
477
+ descriptor.value = async function (...args: any[]) {
478
+ const cache = (this as any).cache || CacheService.getInstance();
479
+
480
+ const cacheKey = {
481
+ method: propertyKey,
482
+ args: args
483
+ };
484
+
485
+ return cache.fetch(
486
+ cacheKey,
487
+ () => originalMethod.apply(this, args)
488
+ );
489
+ };
490
+
491
+ return descriptor;
492
+ };
493
+ }
494
+
495
+ // Usage
496
+ class LLMService {
497
+ @Cacheable({ ttlHours: 24 })
498
+ async query(prompt: string): Promise<string> {
499
+ // Implementation
500
+ return 'response...';
501
+ }
502
+ }
503
+ ```
504
+
505
+ ### OpenAI Integration Example
506
+
507
+ ```typescript
508
+ // src/services/openai-cached.service.ts
509
+ import { OpenAI } from 'openai';
510
+ import { CacheService } from '../cache/cache.service';
511
+
512
+ export class CachedOpenAIService {
513
+ private openai = new OpenAI();
514
+ private cache: CacheService;
515
+
516
+ constructor(cacheService: CacheService) {
517
+ this.cache = cacheService;
518
+ }
519
+
520
+ async chat(params: OpenAI.Chat.ChatCompletionCreateParams): Promise<string> {
521
+ return this.cache.fetch(
522
+ {
523
+ model: params.model,
524
+ messages: params.messages,
525
+ temperature: params.temperature
526
+ },
527
+ async () => {
528
+ const response = await this.openai.chat.completions.create(params);
529
+ return response.choices[0]?.message.content || '';
530
+ }
531
+ );
532
+ }
533
+
534
+ async embed(input: string | string[]): Promise<number[][]> {
535
+ return this.cache.fetch(
536
+ { input },
537
+ async () => {
538
+ const response = await this.openai.embeddings.create({
539
+ model: 'text-embedding-3-small',
540
+ input
541
+ });
542
+ return response.data.map(d => d.embedding);
543
+ }
544
+ );
545
+ }
546
+ }
547
+ ```
548
+
549
+ ---
550
+
551
+ ## Migration Path
552
+
553
+ ### Phase 1: Evaluation (Week 1)
554
+
555
+ ```typescript
556
+ // Minimal cache in test environment
557
+ const testCache = new LRUCache({
558
+ max: 100,
559
+ ttl: 600000 // 10 minutes
560
+ });
561
+
562
+ // Measure baseline metrics
563
+ // - API call latency
564
+ // - Query patterns
565
+ // - Cache hit rates
566
+ ```
567
+
568
+ ### Phase 2: Pilot (Week 2-3)
569
+
570
+ ```typescript
571
+ // Single workflow with caching enabled
572
+ // Monitor:
573
+ // - Hit rate (target: 30%+)
574
+ // - Memory usage
575
+ // - Performance improvement
576
+ ```
577
+
578
+ ### Phase 3: Rollout (Week 4-8)
579
+
580
+ ```typescript
581
+ // Progressive rollout to all LLM workflows
582
+ // Week 4: Critical paths only
583
+ // Week 5: 50% of workflows
584
+ // Week 6-8: 100% with monitoring
585
+ ```
586
+
587
+ ### Phase 4: Optimization (Week 8+)
588
+
589
+ ```typescript
590
+ // Add semantic caching
591
+ // Tune configuration
592
+ // Add distributed caching if needed
593
+ ```
594
+
595
+ ---
596
+
597
+ ## Troubleshooting
598
+
599
+ ### Issue 1: Low Cache Hit Rate (< 20%)
600
+
601
+ **Diagnosis:**
602
+
603
+ ```typescript
604
+ // Check if keys are deterministic
605
+ const key1 = objectKeyHash({ a: 1, b: 2 });
606
+ const key2 = objectKeyHash({ b: 2, a: 1 });
607
+ console.log(key1 === key2); // Should be true
608
+ ```
609
+
610
+ **Solutions:**
611
+ 1. Verify using `safe-stable-stringify` (not `JSON.stringify`)
612
+ 2. Check if prompts vary slightly (space differences, case sensitivity)
613
+ 3. Consider semantic caching for variations
614
+ 4. Normalize prompts before caching
615
+
616
+ ### Issue 2: High Memory Usage
617
+
618
+ **Diagnosis:**
619
+
620
+ ```typescript
621
+ // Check actual cache memory consumption
622
+ const metrics = cache.getMetrics();
623
+ console.log(`Size: ${metrics.cacheSize} items`);
624
+ console.log(`Memory: ${metrics.estimatedMemory} MB`);
625
+ ```
626
+
627
+ **Solutions:**
628
+ 1. Reduce `maxSize` limit
629
+ 2. Reduce `ttl` for faster expiration
630
+ 3. Implement `sizeCalculation` correctly
631
+ 4. Add memory monitoring: `process.memoryUsage()`
632
+
633
+ ### Issue 3: Stale Data
634
+
635
+ **Diagnosis:**
636
+
637
+ ```typescript
638
+ // TTL items aren't deleted until accessed
639
+ // This is expected behavior
640
+ const cached = cache.get(key); // Triggers TTL check
641
+ ```
642
+
643
+ **Solutions:**
644
+ 1. Use shorter TTL (24 hours instead of 30)
645
+ 2. Implement active cache purging
646
+ 3. Use external validation (check API for updates)
647
+ 4. Version cache keys
648
+
649
+ ### Issue 4: Cache Not Working in Tests
650
+
651
+ **Solution:**
652
+
653
+ ```typescript
654
+ // Mock or provide clean cache instance for each test
655
+ beforeEach(() => {
656
+ cache = new LRUCache({ max: 100 });
657
+ });
658
+
659
+ afterEach(() => {
660
+ cache.clear();
661
+ });
662
+ ```
663
+
664
+ ### Issue 5: Race Conditions
665
+
666
+ **Diagnosis:**
667
+
668
+ ```typescript
669
+ // ❌ Wrong: Manual lookup allows duplicates
670
+ if (!cache.has(key)) {
671
+ const value = await expensiveOp();
672
+ cache.set(key, value);
673
+ }
674
+
675
+ // ✅ Correct: fetch() deduplicates
676
+ const value = await cache.fetch(key, expensiveOp);
677
+ ```
678
+
679
+ **Solution:**
680
+ Always use `fetch()` method instead of manual lookup + set.
681
+
682
+ ### Issue 6: Keys Too Long
683
+
684
+ **Problem:** 64-char SHA-256 keys add overhead
685
+
686
+ **Solution:**
687
+
688
+ ```typescript
689
+ // Use shorter hash (8 chars) for readability
690
+ const key = hash.digest('hex').substring(0, 8);
691
+
692
+ // Or use composite key with prefix
693
+ const key = `gpt4:${hash.digest('hex').substring(0, 8)}`;
694
+ ```
695
+
696
+ ---
697
+
698
+ ## Performance Tuning Checklist
699
+
700
+ - [ ] Using `safe-stable-stringify` (not `JSON.stringify`)
701
+ - [ ] Using `fetch()` method (not manual lookup)
702
+ - [ ] `max` or `maxSize` configured
703
+ - [ ] Appropriate `ttl` set (24 hours for LLM responses)
704
+ - [ ] `sizeCalculation` implemented correctly
705
+ - [ ] Metrics collection enabled
706
+ - [ ] Hit rate > 30% (exact) or > 60% (semantic)
707
+ - [ ] Memory usage < budget
708
+ - [ ] Monitoring/alerting configured
709
+ - [ ] Cache warming for common queries
710
+
711
+ ---
712
+
713
+ ## Decision Flowchart
714
+
715
+ ```
716
+ Start: Need to cache LLM responses?
717
+
718
+ ├─ Single node, simple? → Strategy 1 (Minimal)
719
+
720
+ ├─ Multiple nodes?
721
+ │ ├─ Yes → Strategy 3 (Multi-layer)
722
+ │ └─ No → Strategy 2 (Service-based)
723
+
724
+ ├─ Hit rate < 30%?
725
+ │ ├─ Yes, add Semantic Caching
726
+ │ └─ No, optimize existing
727
+
728
+ ├─ Memory limited?
729
+ │ ├─ Yes → Use maxSize, reduce maxItems
730
+ │ └─ No → Configure generously
731
+
732
+ └─ Ready for production? → Add monitoring, metrics, alerts
733
+ ```
734
+
735
+ ---
736
+
737
+ **Document Version:** 1.0
738
+ **Last Updated:** 2025-12-08