groundswell 0.0.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +26 -9
- package/dist/cache/cache-key.d.ts +86 -0
- package/dist/cache/cache-key.d.ts.map +1 -0
- package/dist/cache/cache-key.js +204 -0
- package/dist/cache/cache-key.js.map +1 -0
- package/dist/cache/cache.d.ts +104 -0
- package/dist/cache/cache.d.ts.map +1 -0
- package/dist/cache/cache.js +179 -0
- package/dist/cache/cache.js.map +1 -0
- package/{src/cache/index.ts → dist/cache/index.d.ts} +1 -1
- package/dist/cache/index.d.ts.map +1 -0
- package/dist/cache/index.js +6 -0
- package/dist/cache/index.js.map +1 -0
- package/dist/core/agent.d.ts +203 -0
- package/dist/core/agent.d.ts.map +1 -0
- package/dist/core/agent.js +833 -0
- package/dist/core/agent.js.map +1 -0
- package/{src/core/context.ts → dist/core/context.d.ts} +16 -67
- package/dist/core/context.d.ts.map +1 -0
- package/dist/core/context.js +80 -0
- package/dist/core/context.js.map +1 -0
- package/dist/core/event-tree.d.ts +72 -0
- package/dist/core/event-tree.d.ts.map +1 -0
- package/dist/core/event-tree.js +211 -0
- package/dist/core/event-tree.js.map +1 -0
- package/{src/core/factory.ts → dist/core/factory.d.ts} +6 -27
- package/dist/core/factory.d.ts.map +1 -0
- package/dist/core/factory.js +110 -0
- package/dist/core/factory.js.map +1 -0
- package/{src/core/index.ts → dist/core/index.d.ts} +2 -10
- package/dist/core/index.d.ts.map +1 -0
- package/dist/core/index.js +9 -0
- package/dist/core/index.js.map +1 -0
- package/dist/core/logger.d.ts +50 -0
- package/dist/core/logger.d.ts.map +1 -0
- package/dist/core/logger.js +91 -0
- package/dist/core/logger.js.map +1 -0
- package/dist/core/mcp-handler.d.ts +127 -0
- package/dist/core/mcp-handler.d.ts.map +1 -0
- package/dist/core/mcp-handler.js +323 -0
- package/dist/core/mcp-handler.js.map +1 -0
- package/dist/core/prompt.d.ts +80 -0
- package/dist/core/prompt.d.ts.map +1 -0
- package/dist/core/prompt.js +120 -0
- package/dist/core/prompt.js.map +1 -0
- package/dist/core/workflow-context.d.ts +61 -0
- package/dist/core/workflow-context.d.ts.map +1 -0
- package/dist/core/workflow-context.js +358 -0
- package/dist/core/workflow-context.js.map +1 -0
- package/dist/core/workflow.d.ts +543 -0
- package/dist/core/workflow.d.ts.map +1 -0
- package/dist/core/workflow.js +986 -0
- package/dist/core/workflow.js.map +1 -0
- package/dist/debugger/event-replayer.d.ts +422 -0
- package/dist/debugger/event-replayer.d.ts.map +1 -0
- package/dist/debugger/event-replayer.js +639 -0
- package/dist/debugger/event-replayer.js.map +1 -0
- package/dist/debugger/index.d.ts +2 -0
- package/dist/debugger/index.d.ts.map +1 -0
- package/{src/debugger/index.ts → dist/debugger/index.js} +1 -0
- package/dist/debugger/index.js.map +1 -0
- package/dist/debugger/tree-debugger.d.ts +240 -0
- package/dist/debugger/tree-debugger.d.ts.map +1 -0
- package/dist/debugger/tree-debugger.js +620 -0
- package/dist/debugger/tree-debugger.js.map +1 -0
- package/dist/decorators/index.d.ts +4 -0
- package/dist/decorators/index.d.ts.map +1 -0
- package/{src/decorators/index.ts → dist/decorators/index.js} +1 -0
- package/dist/decorators/index.js.map +1 -0
- package/dist/decorators/observed-state.d.ts +32 -0
- package/dist/decorators/observed-state.d.ts.map +1 -0
- package/dist/decorators/observed-state.js +79 -0
- package/dist/decorators/observed-state.js.map +1 -0
- package/dist/decorators/step.d.ts +15 -0
- package/dist/decorators/step.d.ts.map +1 -0
- package/dist/decorators/step.js +192 -0
- package/dist/decorators/step.js.map +1 -0
- package/dist/decorators/task.d.ts +50 -0
- package/dist/decorators/task.d.ts.map +1 -0
- package/dist/decorators/task.js +118 -0
- package/dist/decorators/task.js.map +1 -0
- package/dist/examples/index.d.ts +3 -0
- package/dist/examples/index.d.ts.map +1 -0
- package/{src/examples/index.ts → dist/examples/index.js} +1 -0
- package/dist/examples/index.js.map +1 -0
- package/dist/examples/tdd-orchestrator.d.ts +15 -0
- package/dist/examples/tdd-orchestrator.d.ts.map +1 -0
- package/dist/examples/tdd-orchestrator.js +121 -0
- package/dist/examples/tdd-orchestrator.js.map +1 -0
- package/dist/examples/test-cycle-workflow.d.ts +14 -0
- package/dist/examples/test-cycle-workflow.d.ts.map +1 -0
- package/dist/examples/test-cycle-workflow.js +116 -0
- package/dist/examples/test-cycle-workflow.js.map +1 -0
- package/dist/harnesses/claude-code-harness.d.ts +391 -0
- package/dist/harnesses/claude-code-harness.d.ts.map +1 -0
- package/dist/harnesses/claude-code-harness.js +1076 -0
- package/dist/harnesses/claude-code-harness.js.map +1 -0
- package/dist/harnesses/harness-registry.d.ts +440 -0
- package/dist/harnesses/harness-registry.d.ts.map +1 -0
- package/dist/harnesses/harness-registry.js +543 -0
- package/dist/harnesses/harness-registry.js.map +1 -0
- package/dist/harnesses/index.d.ts +12 -0
- package/dist/harnesses/index.d.ts.map +1 -0
- package/dist/harnesses/index.js +11 -0
- package/dist/harnesses/index.js.map +1 -0
- package/dist/harnesses/pi-harness.d.ts +219 -0
- package/dist/harnesses/pi-harness.d.ts.map +1 -0
- package/dist/harnesses/pi-harness.js +676 -0
- package/dist/harnesses/pi-harness.js.map +1 -0
- package/dist/harnesses/pi-schema-converter.d.ts +24 -0
- package/dist/harnesses/pi-schema-converter.d.ts.map +1 -0
- package/dist/harnesses/pi-schema-converter.js +81 -0
- package/dist/harnesses/pi-schema-converter.js.map +1 -0
- package/dist/harnesses/register-defaults.d.ts +24 -0
- package/dist/harnesses/register-defaults.d.ts.map +1 -0
- package/dist/harnesses/register-defaults.js +40 -0
- package/dist/harnesses/register-defaults.js.map +1 -0
- package/dist/harnesses/session-store.d.ts +201 -0
- package/dist/harnesses/session-store.d.ts.map +1 -0
- package/dist/harnesses/session-store.js +254 -0
- package/dist/harnesses/session-store.js.map +1 -0
- package/dist/index.d.ts +37 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +57 -0
- package/dist/index.js.map +1 -0
- package/dist/reflection/index.d.ts +5 -0
- package/dist/reflection/index.d.ts.map +1 -0
- package/{src/reflection/index.ts → dist/reflection/index.js} +1 -1
- package/dist/reflection/index.js.map +1 -0
- package/dist/reflection/reflection.d.ts +84 -0
- package/dist/reflection/reflection.d.ts.map +1 -0
- package/dist/reflection/reflection.js +344 -0
- package/dist/reflection/reflection.js.map +1 -0
- package/dist/tools/index.d.ts +6 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/index.js +11 -0
- package/dist/tools/index.js.map +1 -0
- package/dist/tools/introspection.d.ts +165 -0
- package/dist/tools/introspection.d.ts.map +1 -0
- package/dist/tools/introspection.js +324 -0
- package/dist/tools/introspection.js.map +1 -0
- package/dist/types/agent.d.ts +1317 -0
- package/dist/types/agent.d.ts.map +1 -0
- package/dist/types/agent.js +423 -0
- package/dist/types/agent.js.map +1 -0
- package/dist/types/decorators.d.ts +40 -0
- package/dist/types/decorators.d.ts.map +1 -0
- package/dist/types/decorators.js +2 -0
- package/dist/types/decorators.js.map +1 -0
- package/dist/types/error-strategy.d.ts +13 -0
- package/dist/types/error-strategy.d.ts.map +1 -0
- package/dist/types/error-strategy.js +2 -0
- package/dist/types/error-strategy.js.map +1 -0
- package/dist/types/error.d.ts +20 -0
- package/dist/types/error.d.ts.map +1 -0
- package/dist/types/error.js +2 -0
- package/dist/types/error.js.map +1 -0
- package/dist/types/events.d.ts +113 -0
- package/dist/types/events.d.ts.map +1 -0
- package/dist/types/events.js +2 -0
- package/dist/types/events.js.map +1 -0
- package/dist/types/harnesses.d.ts +474 -0
- package/dist/types/harnesses.d.ts.map +1 -0
- package/dist/types/harnesses.js +2 -0
- package/dist/types/harnesses.js.map +1 -0
- package/dist/types/index.d.ts +23 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +8 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/logging.d.ts +24 -0
- package/dist/types/logging.d.ts.map +1 -0
- package/dist/types/logging.js +2 -0
- package/dist/types/logging.js.map +1 -0
- package/dist/types/observer.d.ts +18 -0
- package/dist/types/observer.d.ts.map +1 -0
- package/dist/types/observer.js +2 -0
- package/dist/types/observer.js.map +1 -0
- package/dist/types/prompt.d.ts +31 -0
- package/dist/types/prompt.d.ts.map +1 -0
- package/dist/types/prompt.js +6 -0
- package/dist/types/prompt.js.map +1 -0
- package/dist/types/providers.d.ts +691 -0
- package/dist/types/providers.d.ts.map +1 -0
- package/dist/types/providers.js +14 -0
- package/dist/types/providers.js.map +1 -0
- package/dist/types/reflection.d.ts +96 -0
- package/dist/types/reflection.d.ts.map +1 -0
- package/dist/types/reflection.js +24 -0
- package/dist/types/reflection.js.map +1 -0
- package/dist/types/restart.d.ts +132 -0
- package/dist/types/restart.d.ts.map +1 -0
- package/dist/types/restart.js +2 -0
- package/dist/types/restart.js.map +1 -0
- package/dist/types/sdk-primitives.d.ts +118 -0
- package/dist/types/sdk-primitives.d.ts.map +1 -0
- package/dist/types/sdk-primitives.js +6 -0
- package/dist/types/sdk-primitives.js.map +1 -0
- package/{src/types/snapshot.ts → dist/types/snapshot.d.ts} +5 -5
- package/dist/types/snapshot.d.ts.map +1 -0
- package/dist/types/snapshot.js +2 -0
- package/dist/types/snapshot.js.map +1 -0
- package/dist/types/streaming.d.ts +194 -0
- package/dist/types/streaming.d.ts.map +1 -0
- package/dist/types/streaming.js +67 -0
- package/dist/types/streaming.js.map +1 -0
- package/dist/types/workflow-context.d.ts +275 -0
- package/dist/types/workflow-context.d.ts.map +1 -0
- package/dist/types/workflow-context.js +8 -0
- package/dist/types/workflow-context.js.map +1 -0
- package/dist/types/workflow.d.ts +30 -0
- package/dist/types/workflow.d.ts.map +1 -0
- package/dist/types/workflow.js +2 -0
- package/dist/types/workflow.js.map +1 -0
- package/dist/utils/agent-validation.d.ts +88 -0
- package/dist/utils/agent-validation.d.ts.map +1 -0
- package/dist/utils/agent-validation.js +87 -0
- package/dist/utils/agent-validation.js.map +1 -0
- package/dist/utils/delay.d.ts +7 -0
- package/dist/utils/delay.d.ts.map +1 -0
- package/dist/utils/delay.js +9 -0
- package/dist/utils/delay.js.map +1 -0
- package/dist/utils/harness-config.d.ts +180 -0
- package/dist/utils/harness-config.d.ts.map +1 -0
- package/dist/utils/harness-config.js +311 -0
- package/dist/utils/harness-config.js.map +1 -0
- package/dist/utils/id.d.ts +6 -0
- package/dist/utils/id.d.ts.map +1 -0
- package/dist/utils/id.js +12 -0
- package/dist/utils/id.js.map +1 -0
- package/dist/utils/index.d.ts +13 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +11 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/model-spec.d.ts +110 -0
- package/dist/utils/model-spec.d.ts.map +1 -0
- package/dist/utils/model-spec.js +149 -0
- package/dist/utils/model-spec.js.map +1 -0
- package/dist/utils/observable.d.ts +54 -0
- package/dist/utils/observable.d.ts.map +1 -0
- package/dist/utils/observable.js +82 -0
- package/dist/utils/observable.js.map +1 -0
- package/dist/utils/provider-config.d.ts +10 -0
- package/dist/utils/provider-config.d.ts.map +1 -0
- package/dist/utils/provider-config.js +10 -0
- package/dist/utils/provider-config.js.map +1 -0
- package/dist/utils/restart-analysis.d.ts +202 -0
- package/dist/utils/restart-analysis.d.ts.map +1 -0
- package/dist/utils/restart-analysis.js +426 -0
- package/dist/utils/restart-analysis.js.map +1 -0
- package/dist/utils/session-serialization.d.ts +118 -0
- package/dist/utils/session-serialization.d.ts.map +1 -0
- package/dist/utils/session-serialization.js +217 -0
- package/dist/utils/session-serialization.js.map +1 -0
- package/dist/utils/workflow-error-utils.d.ts +22 -0
- package/dist/utils/workflow-error-utils.d.ts.map +1 -0
- package/dist/utils/workflow-error-utils.js +45 -0
- package/dist/utils/workflow-error-utils.js.map +1 -0
- package/package.json +34 -5
- package/.claude/commands/subtask-planning/prp-base-create.md +0 -120
- package/.claude/commands/subtask-planning/prp-base-execute.md +0 -65
- package/.claude/commands/task-breakdown.md +0 -94
- package/.claude/settings.local.json +0 -9
- package/.claude/system_prompts/task-breakdown.md +0 -101
- package/CHANGELOG.md +0 -188
- package/PRD.md +0 -543
- package/PRPs/001-hierarchical-workflow-engine.md +0 -2438
- package/PRPs/PRDs/002-agent-prompt.md +0 -390
- package/PRPs/PRDs/003-agent-prompt.md +0 -943
- package/PRPs/PRDs/004-agent-prompt.md +0 -1136
- package/PRPs/PRDs/tasks-001.json +0 -492
- package/PRPs/README.md +0 -83
- package/PRPs/templates/prp_base.md +0 -222
- package/docs/agent.md +0 -422
- package/docs/prompt.md +0 -419
- package/docs/workflow.md +0 -600
- package/examples/README.md +0 -258
- package/examples/examples/01-basic-workflow.ts +0 -100
- package/examples/examples/02-decorator-options.ts +0 -217
- package/examples/examples/03-parent-child.ts +0 -241
- package/examples/examples/04-observers-debugger.ts +0 -340
- package/examples/examples/05-error-handling.ts +0 -387
- package/examples/examples/06-concurrent-tasks.ts +0 -352
- package/examples/examples/07-agent-loops.ts +0 -432
- package/examples/examples/08-sdk-features.ts +0 -667
- package/examples/examples/09-reflection.ts +0 -573
- package/examples/examples/10-introspection.ts +0 -550
- package/examples/examples/11-reparenting-workflows.ts +0 -269
- package/examples/index.ts +0 -147
- package/examples/utils/helpers.ts +0 -57
- package/package-lock.json +0 -2398
- package/plan/001_d3bb02af4886/TEST_RESULTS.md +0 -259
- package/plan/001_d3bb02af4886/backlog.json +0 -867
- package/plan/001_d3bb02af4886/bug_fix_tasks.json +0 -484
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M1T1S1/PRP.md +0 -488
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M1T1S2/PRP.md +0 -581
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M1T1S3/PRP.md +0 -687
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M2T1S1/PRP.md +0 -492
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M2T1S3/PRP.md +0 -932
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M2T1S3/research/concurrent_error_testing_patterns.md +0 -1109
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M2T1S3/research/vitest_concurrent_testing.md +0 -802
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M2T1S3/research/workflow_engine_test_references.md +0 -603
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M2T2S1/PRP.md +0 -564
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M2T2S3/PRP.md +0 -518
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M2T2S4/PRP.md +0 -1252
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M2T3S1/PRP.md +0 -364
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M2T3S1/research/CODEBASE_INVENTORY.md +0 -114
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M2T3S1/research/DECORATOR_DOCUMENTATION_PATTERNS.md +0 -205
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M2T3S1/research/PRD_LOCATION_ANALYSIS.md +0 -199
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M2T3S1/research/ULTRATHINK_PRP_PLAN.md +0 -134
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M3T1S1/PRP.md +0 -495
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M3T1S1/research/console_error_inventory.md +0 -435
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M3T1S2/PRP.md +0 -506
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M3T1S3/PRP.md +0 -612
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M3T2S2/PRP.md +0 -558
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M3T2S2/research/external_research.md +0 -788
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M3T3S2/PRP.md +0 -460
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M3T3S3/PRP.md +0 -454
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M3T4S1/PRP.md +0 -520
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M3T4S1/RECOMMENDATION.md +0 -417
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M3T4S1/research/external_workflow_engines_research.md +0 -760
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M3T4S1/research/security_implications_analysis.md +0 -245
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M3T4S2/PRP.md +0 -792
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M4T1S1/PRP.md +0 -535
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M4T1S1/TEST_EXECUTION_REPORT.md +0 -190
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M4T1S2/PRP.md +0 -654
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M4T1S2/TEST_FIX_REPORT.md +0 -227
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M4T1S2/research/KEY_FINDINGS.md +0 -345
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M4T1S2/research/QUICK_REFERENCE.md +0 -193
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M4T1S2/research/test_maintenance_research.md +0 -1323
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M4T3S1/BREAKING_CHANGES_AUDIT.md +0 -1011
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M4T3S1/PRP.md +0 -927
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/P1M4T3S2/PRP.md +0 -505
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/architecture/logger_child_signature_analysis.md +0 -401
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M1T1S3/child_implementation_research.md +0 -142
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M1T1S3/test_patterns_research.md +0 -112
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M1T1S3/vitest_patterns_research.md +0 -159
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M1T1S4/PRP.md +0 -549
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M1T1S4/VERIFICATION_REPORT.md +0 -368
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M1T1S4/edge_case_analysis.md +0 -172
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M1T1S4/usage_inventory.md +0 -175
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M2T1S2/PRP.md +0 -696
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M2T1S4/PRP.md +0 -860
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M2T2S2/PRP.md +0 -1066
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M2T2S2/research/01-testing-aggregated-errors.md +0 -1103
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M2T2S2/research/01_typescript_error_aggregation_patterns.md +0 -789
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M2T2S2/research/02-error-merge-strategy-testing-guide.md +0 -1098
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M2T2S2/research/02_aggregate_error_patterns.md +0 -1037
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M2T2S2/research/03-promise-allsettled-testing-patterns.md +0 -916
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M2T2S2/research/03_error_merging_strategies.md +0 -1045
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M2T2S2/research/04_github_stackoverflow_examples.md +0 -890
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M2T2S2/research/05_comprehensive_summary.md +0 -822
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M2T2S2/research/INDEX.md +0 -668
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M2T2S2/research/QUICK_REFERENCE.md +0 -706
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M2T2S2/research/README.md +0 -265
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M2T2S2/research/RESEARCH_REPORT.md +0 -655
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M2T2S4/research/vitest_testing_patterns.md +0 -1103
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M2T3S2/PRP.md +0 -426
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M3T1S2/PRP.md +0 -506
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M3T1S2/research/QUICK_REFERENCE.md +0 -114
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M3T1S2/research/RESEARCH_SUMMARY.md +0 -316
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M3T1S2/research/vitest_observer_error_logging_best_practices.md +0 -754
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M3T1S3/PRP.md +0 -612
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M3T2S1/PRP.md +0 -719
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M3T2S1/README.md +0 -215
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M3T2S1/analysis.md +0 -765
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M3T2S3/PRP.md +0 -718
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M3T3S1/DECISION.md +0 -149
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M3T3S1/PRP.md +0 -470
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M3T3S1/research/ULTRATHINK_PLAN.md +0 -332
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M3T3S1/research/codebase_workflow_name_analysis.md +0 -167
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M3T3S1/research/external_best_practices.md +0 -265
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M3T3S1/research/validation_patterns.md +0 -273
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M3T4S1/workflow_engine_ancestry_api_research.md +0 -760
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M3T4S3-PRP.md +0 -434
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M4T2S1/PRP.md +0 -717
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M4T2S2/PRP.md +0 -472
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M4T2S2/VALIDATION_REPORT.md +0 -125
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/P1M4T2S2/research/ULTRATHINK_PRP_PLAN.md +0 -301
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/error-logging-best-practices.md +0 -1170
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/research_typescript_partial_and_overloads.md +0 -940
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/vitest-quick-reference.md +0 -151
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/docs/vitest-research.md +0 -650
- package/plan/001_d3bb02af4886/bugfix/001_e8e04329daf3/prd_snapshot.md +0 -259
- package/plan/001_d3bb02af4886/bugfix/P1M1T1S1/PRP.md +0 -457
- package/plan/001_d3bb02af4886/bugfix/RESEARCH_SUMMARY.md +0 -346
- package/plan/001_d3bb02af4886/bugfix/architecture/codebase_structure.md +0 -311
- package/plan/001_d3bb02af4886/bugfix/architecture/concurrent_execution_best_practices.md +0 -1565
- package/plan/001_d3bb02af4886/bugfix/architecture/error_handling_patterns.md +0 -288
- package/plan/001_d3bb02af4886/bugfix/architecture/promise_all_analysis.md +0 -741
- package/plan/001_d3bb02af4886/docs/PRP/P1M1T1S4-functional-workflow-error-state-capture-test.md +0 -652
- package/plan/001_d3bb02af4886/docs/PRP/P1P2-PRP.md +0 -527
- package/plan/001_d3bb02af4886/docs/PRP/P3P4-PRP.md +0 -1388
- package/plan/001_d3bb02af4886/docs/PRP/P4P5-PRP.md +0 -1136
- package/plan/001_d3bb02af4886/docs/PRP/PRP.md +0 -527
- package/plan/001_d3bb02af4886/docs/PRP/bugfix/P1M1T2S1-PRP.md +0 -415
- package/plan/001_d3bb02af4886/docs/PRP/bugfix/P1M1T2S2-PRP.md +0 -378
- package/plan/001_d3bb02af4886/docs/PRP/bugfix/P1M1T2S4-PRP.md +0 -713
- package/plan/001_d3bb02af4886/docs/PRP/bugfix/P1M2T1S4-PRP.md +0 -370
- package/plan/001_d3bb02af4886/docs/PRP_P1M3T1S3.md +0 -499
- package/plan/001_d3bb02af4886/docs/TEST_RESULTS.md +0 -230
- package/plan/001_d3bb02af4886/docs/architecture/external_deps.md +0 -358
- package/plan/001_d3bb02af4886/docs/architecture/system_context.md +0 -242
- package/plan/001_d3bb02af4886/docs/bugfix/ANALYSIS_PRD_VS_IMPLEMENTATION.md +0 -1134
- package/plan/001_d3bb02af4886/docs/bugfix/GAP_ANALYSIS_SUMMARY.md +0 -179
- package/plan/001_d3bb02af4886/docs/bugfix/P1M4T2S1/PRP.md +0 -629
- package/plan/001_d3bb02af4886/docs/bugfix/P1M4T2S1/validation-report.md +0 -214
- package/plan/001_d3bb02af4886/docs/bugfix/PRP_P1M4T2S3.md +0 -629
- package/plan/001_d3bb02af4886/docs/bugfix/bugfix_PRP.md +0 -529
- package/plan/001_d3bb02af4886/docs/bugfix/bugfix_QUICK_REFERENCE.md +0 -142
- package/plan/001_d3bb02af4886/docs/bugfix/bugfix_README.md +0 -304
- package/plan/001_d3bb02af4886/docs/bugfix/bugfix_TEST_RESULTS.md +0 -558
- package/plan/001_d3bb02af4886/docs/bugfix/bugfix_VALIDATION_SUMMARY.md +0 -256
- package/plan/001_d3bb02af4886/docs/bugfix/system_context.md +0 -346
- package/plan/001_d3bb02af4886/docs/bugfix-architecture/bug_analysis.md +0 -415
- package/plan/001_d3bb02af4886/docs/bugfix-architecture/implementation_patterns.md +0 -489
- package/plan/001_d3bb02af4886/docs/bugfix-architecture/system_context.md +0 -218
- package/plan/001_d3bb02af4886/docs/bugfix_INITIATION_SUMMARY.md +0 -380
- package/plan/001_d3bb02af4886/docs/research/CYCLE_DETECTION_PATTERNS.md +0 -1923
- package/plan/001_d3bb02af4886/docs/research/CYCLE_DETECTION_QUICK_REF.md +0 -319
- package/plan/001_d3bb02af4886/docs/research/P1M1T2S1/codebase-context.md +0 -115
- package/plan/001_d3bb02af4886/docs/research/P1M1T2S1/cycle-detection-algorithms.md +0 -134
- package/plan/001_d3bb02af4886/docs/research/P1M1T2S1/test-patterns.md +0 -153
- package/plan/001_d3bb02af4886/docs/research/P1M1T2S1/workflow-class.md +0 -132
- package/plan/001_d3bb02af4886/docs/research/P1M2T1S4/DECORATOR_DOCUMENTATION_BEST_PRACTICES.md +0 -716
- package/plan/001_d3bb02af4886/docs/research/P1M2T1S4/DECORATOR_DOCUMENTATION_QUICK_REF.md +0 -186
- package/plan/001_d3bb02af4886/docs/research/P1M2T1S4/GROUNDSWELL_DECORATOR_EXAMPLES.md +0 -604
- package/plan/001_d3bb02af4886/docs/research/P1M2T1S4/INDEX.md +0 -213
- package/plan/001_d3bb02af4886/docs/research/P1M2T1S4/codebase_structure.md +0 -30
- package/plan/001_d3bb02af4886/docs/research/P1M2T1S4/existing_test_pattern.md +0 -56
- package/plan/001_d3bb02af4886/docs/research/P1M2T1S4/getRootObservers_implementation.md +0 -53
- package/plan/001_d3bb02af4886/docs/research/P1M2T1S4/test_conventions.md +0 -49
- package/plan/001_d3bb02af4886/docs/research/P1M3T1S4/PRP.md +0 -958
- package/plan/001_d3bb02af4886/docs/research/P1M3T1S4/QUICK_REFERENCE.md +0 -339
- package/plan/001_d3bb02af4886/docs/research/P1M3T1S4/README.md +0 -305
- package/plan/001_d3bb02af4886/docs/research/P1M3T1S4/SUMMARY.md +0 -433
- package/plan/001_d3bb02af4886/docs/research/P1M3T1S4/bidirectional-tree-consistency-testing.md +0 -1574
- package/plan/001_d3bb02af4886/docs/research/P1M3T1S4/test-pattern-examples.md +0 -1014
- package/plan/001_d3bb02af4886/docs/research/P1P2/LRU_CACHE_BEST_PRACTICES.md +0 -1929
- package/plan/001_d3bb02af4886/docs/research/P1P2/LRU_CACHE_CODE_PATTERNS.md +0 -857
- package/plan/001_d3bb02af4886/docs/research/P1P2/LRU_CACHE_INTEGRATION_GUIDE.md +0 -738
- package/plan/001_d3bb02af4886/docs/research/P1P2/LRU_CACHE_RESEARCH_INDEX.md +0 -424
- package/plan/001_d3bb02af4886/docs/research/P1P2/REFLECTION_INDEX.md +0 -291
- package/plan/001_d3bb02af4886/docs/research/P1P2/REFLECTION_RESEARCH_REPORT.md +0 -1342
- package/plan/001_d3bb02af4886/docs/research/P1P2/RESEARCH_SUMMARY.md +0 -342
- package/plan/001_d3bb02af4886/docs/research/P1P2/anthropic-sdk.md +0 -174
- package/plan/001_d3bb02af4886/docs/research/P1P2/async-local-storage.md +0 -200
- package/plan/001_d3bb02af4886/docs/research/P1P2/reflection-code-patterns.md +0 -1205
- package/plan/001_d3bb02af4886/docs/research/P1P2/reflection-decision-matrix.md +0 -421
- package/plan/001_d3bb02af4886/docs/research/P1P2/reflection-implementation-guide.md +0 -1341
- package/plan/001_d3bb02af4886/docs/research/P1P2/reflection-integration-guide.md +0 -834
- package/plan/001_d3bb02af4886/docs/research/P1P2/reflection-patterns.md +0 -1468
- package/plan/001_d3bb02af4886/docs/research/P1P2/reflection-quick-reference.md +0 -558
- package/plan/001_d3bb02af4886/docs/research/P1P2/zod-schema.md +0 -152
- package/plan/001_d3bb02af4886/docs/research/P3P4/caching-lru.md +0 -116
- package/plan/001_d3bb02af4886/docs/research/P3P4/introspection-tools.md +0 -177
- package/plan/001_d3bb02af4886/docs/research/P3P4/reflection-patterns.md +0 -117
- package/plan/001_d3bb02af4886/docs/research/P4P5/RESEARCH_SUMMARY.md +0 -151
- package/plan/001_d3bb02af4886/docs/research/PROMISE_ALLSETTLED_QUICK_REF.md +0 -376
- package/plan/001_d3bb02af4886/docs/research/PROMISE_ALLSETTLED_RESEARCH.md +0 -1507
- package/plan/001_d3bb02af4886/docs/research/bugfix_typescript_patterns.md +0 -949
- package/plan/001_d3bb02af4886/docs/research/error-testing-research.md +0 -619
- package/plan/001_d3bb02af4886/docs/research/error_handling_patterns.md +0 -723
- package/plan/001_d3bb02af4886/docs/research/general/INTROSPECTION_RESEARCH_SUMMARY.md +0 -378
- package/plan/001_d3bb02af4886/docs/research/general/README-INTROSPECTION.md +0 -352
- package/plan/001_d3bb02af4886/docs/research/general/agent-introspection-patterns.md +0 -1085
- package/plan/001_d3bb02af4886/docs/research/general/introspection-security-guide.md +0 -984
- package/plan/001_d3bb02af4886/docs/research/general/introspection-tool-examples.md +0 -875
- package/plan/001_d3bb02af4886/docs/research/incremental-tree-map-updates/PRP_TEMPLATE.md +0 -460
- package/plan/001_d3bb02af4886/docs/research/incremental-tree-map-updates/QUICK_REFERENCE.md +0 -324
- package/plan/001_d3bb02af4886/docs/research/incremental-tree-map-updates/README.md +0 -175
- package/plan/001_d3bb02af4886/docs/research/incremental-tree-map-updates/RESEARCH_REPORT.md +0 -499
- package/plan/001_d3bb02af4886/docs/research/incremental-tree-map-updates/SUMMARY.md +0 -163
- package/plan/001_d3bb02af4886/prd_snapshot.md +0 -543
- package/plan/bugfix/BUG_FIX_SUMMARY.md +0 -961
- package/scripts/generate-llms-full.ts +0 -206
- package/src/__tests__/adversarial/attachChild-performance.test.ts +0 -216
- package/src/__tests__/adversarial/circular-reference.test.ts +0 -101
- package/src/__tests__/adversarial/complex-circular-reference.test.ts +0 -139
- package/src/__tests__/adversarial/concurrent-task-failures.test.ts +0 -571
- package/src/__tests__/adversarial/deep-analysis.test.ts +0 -729
- package/src/__tests__/adversarial/deep-hierarchy-stress.test.ts +0 -213
- package/src/__tests__/adversarial/e2e-prd-validation.test.ts +0 -448
- package/src/__tests__/adversarial/edge-case.test.ts +0 -703
- package/src/__tests__/adversarial/error-merge-strategy.test.ts +0 -760
- package/src/__tests__/adversarial/incremental-performance.test.ts +0 -140
- package/src/__tests__/adversarial/node-map-update-benchmarks.test.ts +0 -457
- package/src/__tests__/adversarial/observer-propagation.test.ts +0 -487
- package/src/__tests__/adversarial/parent-validation.test.ts +0 -143
- package/src/__tests__/adversarial/prd-12-2-compliance.test.ts +0 -611
- package/src/__tests__/adversarial/prd-compliance.test.ts +0 -731
- package/src/__tests__/compatibility/backward-compatibility.test.ts +0 -1572
- package/src/__tests__/helpers/index.ts +0 -18
- package/src/__tests__/helpers/tree-verification.ts +0 -257
- package/src/__tests__/integration/agent-workflow.test.ts +0 -256
- package/src/__tests__/integration/bidirectional-consistency.test.ts +0 -847
- package/src/__tests__/integration/observer-logging.test.ts +0 -643
- package/src/__tests__/integration/tree-mirroring.test.ts +0 -151
- package/src/__tests__/integration/workflow-reparenting.test.ts +0 -303
- package/src/__tests__/unit/agent.test.ts +0 -169
- package/src/__tests__/unit/cache-key.test.ts +0 -182
- package/src/__tests__/unit/cache.test.ts +0 -172
- package/src/__tests__/unit/context.test.ts +0 -217
- package/src/__tests__/unit/decorators.test.ts +0 -100
- package/src/__tests__/unit/introspection-tools.test.ts +0 -277
- package/src/__tests__/unit/logger.test.ts +0 -293
- package/src/__tests__/unit/observable.test.ts +0 -321
- package/src/__tests__/unit/prompt.test.ts +0 -135
- package/src/__tests__/unit/reflection.test.ts +0 -210
- package/src/__tests__/unit/tree-debugger-incremental.test.ts +0 -170
- package/src/__tests__/unit/tree-debugger.test.ts +0 -85
- package/src/__tests__/unit/utils/workflow-error-utils.test.ts +0 -209
- package/src/__tests__/unit/workflow-detachChild.test.ts +0 -100
- package/src/__tests__/unit/workflow-emitEvent-childDetached.test.ts +0 -153
- package/src/__tests__/unit/workflow-isDescendantOf.test.ts +0 -180
- package/src/__tests__/unit/workflow.test.ts +0 -357
- package/src/cache/cache-key.ts +0 -244
- package/src/cache/cache.ts +0 -236
- package/src/core/agent.ts +0 -593
- package/src/core/event-tree.ts +0 -260
- package/src/core/logger.ts +0 -112
- package/src/core/mcp-handler.ts +0 -184
- package/src/core/prompt.ts +0 -150
- package/src/core/workflow-context.ts +0 -351
- package/src/core/workflow.ts +0 -540
- package/src/debugger/tree-debugger.ts +0 -255
- package/src/decorators/observed-state.ts +0 -95
- package/src/decorators/step.ts +0 -139
- package/src/decorators/task.ts +0 -159
- package/src/examples/tdd-orchestrator.ts +0 -65
- package/src/examples/test-cycle-workflow.ts +0 -64
- package/src/index.ts +0 -142
- package/src/reflection/reflection.ts +0 -407
- package/src/tools/index.ts +0 -36
- package/src/tools/introspection.ts +0 -464
- package/src/types/agent.ts +0 -90
- package/src/types/decorators.ts +0 -32
- package/src/types/error-strategy.ts +0 -13
- package/src/types/error.ts +0 -20
- package/src/types/events.ts +0 -75
- package/src/types/index.ts +0 -55
- package/src/types/logging.ts +0 -24
- package/src/types/observer.ts +0 -18
- package/src/types/prompt.ts +0 -40
- package/src/types/reflection.ts +0 -117
- package/src/types/sdk-primitives.ts +0 -128
- package/src/types/workflow-context.ts +0 -163
- package/src/types/workflow.ts +0 -37
- package/src/utils/id.ts +0 -11
- package/src/utils/index.ts +0 -4
- package/src/utils/observable.ts +0 -106
- package/src/utils/workflow-error-utils.ts +0 -56
- package/tsconfig.json +0 -22
- package/vitest.config.ts +0 -16
|
@@ -1,1929 +0,0 @@
|
|
|
1
|
-
# LRU Cache Best Practices for LLM Response Caching in TypeScript/Node.js
|
|
2
|
-
|
|
3
|
-
**Research Date:** 2025-12-08
|
|
4
|
-
**Focus:** Production-grade LLM response caching with `lru-cache` v10+, deterministic key generation, and schema hashing
|
|
5
|
-
|
|
6
|
-
---
|
|
7
|
-
|
|
8
|
-
## Table of Contents
|
|
9
|
-
|
|
10
|
-
1. [Executive Summary](#executive-summary)
|
|
11
|
-
2. [1. lru-cache Package v10+ Deep Dive](#1-lru-cache-package-v10-deep-dive)
|
|
12
|
-
3. [2. Deterministic JSON Stringification](#2-deterministic-json-stringification)
|
|
13
|
-
4. [3. SHA-256 Hashing in Node.js](#3-sha-256-hashing-in-nodejs)
|
|
14
|
-
5. [4. Zod Schema Hashing Patterns](#4-zod-schema-hashing-patterns)
|
|
15
|
-
6. [5. LLM Response Caching Architecture](#5-llm-response-caching-architecture)
|
|
16
|
-
7. [6. Common Pitfalls and Solutions](#6-common-pitfalls-and-solutions)
|
|
17
|
-
8. [7. Performance Benchmarks](#7-performance-benchmarks)
|
|
18
|
-
9. [8. Complete Implementation Example](#8-complete-implementation-example)
|
|
19
|
-
10. [9. Version Recommendations](#9-version-recommendations)
|
|
20
|
-
|
|
21
|
-
---
|
|
22
|
-
|
|
23
|
-
## Executive Summary
|
|
24
|
-
|
|
25
|
-
LRU (Least Recently Used) caching is essential for reducing LLM API costs and latency. The JavaScript ecosystem provides excellent tools for implementing deterministic cache keys and efficient in-memory caching:
|
|
26
|
-
|
|
27
|
-
- **`lru-cache` v10+**: Most performant LRU implementation with zero runtime dependencies
|
|
28
|
-
- **Deterministic stringification**: Required for reproducible cache keys across restarts
|
|
29
|
-
- **SHA-256 hashing**: Node.js built-in crypto module provides excellent performance
|
|
30
|
-
- **Semantic vs. Exact caching**: Best practices support both strategies for maximum hit rates
|
|
31
|
-
|
|
32
|
-
**Key Finding:** A well-configured LRU cache with semantic caching can achieve 60-70% hit rates with ~97% response accuracy and 0.8 similarity threshold.
|
|
33
|
-
|
|
34
|
-
---
|
|
35
|
-
|
|
36
|
-
## 1. lru-cache Package v10+ Deep Dive
|
|
37
|
-
|
|
38
|
-
### 1.1 Package Overview
|
|
39
|
-
|
|
40
|
-
**Repository:** [https://www.npmjs.com/package/lru-cache](https://www.npmjs.com/package/lru-cache)
|
|
41
|
-
**Latest Version:** v10+ (v11.2.2 available at time of research)
|
|
42
|
-
**Architecture:** Rewritten in TypeScript v7+, built-in types, hybrid ES6/CJS support
|
|
43
|
-
|
|
44
|
-
### 1.2 Core Concept
|
|
45
|
-
|
|
46
|
-
LRU eviction policy: When the cache exceeds capacity, the least recently accessed item is removed. This balances memory usage with cache performance.
|
|
47
|
-
|
|
48
|
-
```typescript
|
|
49
|
-
// The fundamental principle
|
|
50
|
-
// If you put more stuff in the cache, then less recently used items fall out.
|
|
51
|
-
```
|
|
52
|
-
|
|
53
|
-
### 1.3 Configuration Options
|
|
54
|
-
|
|
55
|
-
At least **one** of `max`, `ttl`, or `maxSize` is **required** to prevent unbounded growth.
|
|
56
|
-
|
|
57
|
-
#### Option: `max` (Maximum Item Count)
|
|
58
|
-
|
|
59
|
-
```typescript
|
|
60
|
-
import { LRUCache } from 'lru-cache';
|
|
61
|
-
|
|
62
|
-
// Pre-allocates storage for best performance
|
|
63
|
-
const cache = new LRUCache<string, any>({
|
|
64
|
-
max: 100 // Store maximum 100 items
|
|
65
|
-
});
|
|
66
|
-
```
|
|
67
|
-
|
|
68
|
-
**Characteristics:**
|
|
69
|
-
- Pre-allocates storage at construction time
|
|
70
|
-
- Significant performance benefit vs. no limit
|
|
71
|
-
- Read-only after creation (cannot be changed)
|
|
72
|
-
- Best for predictable workloads
|
|
73
|
-
|
|
74
|
-
#### Option: `maxSize` (Memory Limit)
|
|
75
|
-
|
|
76
|
-
```typescript
|
|
77
|
-
const cache = new LRUCache<string, string>({
|
|
78
|
-
maxSize: 50 * 1024 * 1024, // 50 MB limit
|
|
79
|
-
sizeCalculation: (value, key) => {
|
|
80
|
-
// MUST implement for maxSize option
|
|
81
|
-
return JSON.stringify(value).length;
|
|
82
|
-
},
|
|
83
|
-
updateAgeOnGet: true // Refresh TTL on access (optional)
|
|
84
|
-
});
|
|
85
|
-
```
|
|
86
|
-
|
|
87
|
-
**Characteristics:**
|
|
88
|
-
- No pre-allocation (slight performance cost)
|
|
89
|
-
- Requires `sizeCalculation` function
|
|
90
|
-
- Size must be positive integer for each item
|
|
91
|
-
- Better for variable-sized responses
|
|
92
|
-
|
|
93
|
-
#### Option: `ttl` (Time-To-Live)
|
|
94
|
-
|
|
95
|
-
```typescript
|
|
96
|
-
const cache = new LRUCache<string, any>({
|
|
97
|
-
ttl: 1000 * 60 * 60, // 1 hour in milliseconds
|
|
98
|
-
max: 100
|
|
99
|
-
});
|
|
100
|
-
|
|
101
|
-
// Override per-item
|
|
102
|
-
cache.set(key, value, { ttl: 1000 * 60 * 5 }); // 5 minutes
|
|
103
|
-
```
|
|
104
|
-
|
|
105
|
-
**Characteristics:**
|
|
106
|
-
- Not primary use case (not preemptively pruned)
|
|
107
|
-
- Items treated as missing when stale
|
|
108
|
-
- Deleted on fetch if expired
|
|
109
|
-
- Can be set per-item in `set()`
|
|
110
|
-
|
|
111
|
-
#### Option: `updateAgeOnGet`
|
|
112
|
-
|
|
113
|
-
```typescript
|
|
114
|
-
const cache = new LRUCache<string, any>({
|
|
115
|
-
max: 100,
|
|
116
|
-
ttl: 3600000,
|
|
117
|
-
updateAgeOnGet: true // Reset TTL on every access
|
|
118
|
-
});
|
|
119
|
-
```
|
|
120
|
-
|
|
121
|
-
**Use Cases:**
|
|
122
|
-
- Sessions/tokens that should stay fresh while actively used
|
|
123
|
-
- LLM responses used frequently (extend lifetime)
|
|
124
|
-
- Rate limiting scenarios
|
|
125
|
-
|
|
126
|
-
### 1.4 The `fetch()` Method (Most Important for LLM Caching)
|
|
127
|
-
|
|
128
|
-
The `fetch()` method is specifically designed for lazy-load caching patterns - perfect for LLM responses.
|
|
129
|
-
|
|
130
|
-
```typescript
|
|
131
|
-
const cache = new LRUCache<string, string>({
|
|
132
|
-
max: 1000,
|
|
133
|
-
ttl: 3600000,
|
|
134
|
-
sizeCalculation: (value) => value.length
|
|
135
|
-
});
|
|
136
|
-
|
|
137
|
-
// Lazy-load pattern - data is fetched only if not cached
|
|
138
|
-
const response = await cache.fetch(
|
|
139
|
-
cacheKey,
|
|
140
|
-
async () => {
|
|
141
|
-
// This function only runs on cache miss
|
|
142
|
-
const response = await llmProvider.query(prompt);
|
|
143
|
-
return response.content;
|
|
144
|
-
},
|
|
145
|
-
{
|
|
146
|
-
ttl: 3600000, // Optional per-fetch TTL override
|
|
147
|
-
allowStale: true, // Return stale value if fetch in progress
|
|
148
|
-
forceRefresh: false // Don't refresh even if cached
|
|
149
|
-
}
|
|
150
|
-
);
|
|
151
|
-
```
|
|
152
|
-
|
|
153
|
-
**Key Features:**
|
|
154
|
-
- Automatic cache miss handling
|
|
155
|
-
- Only calls generator function on miss
|
|
156
|
-
- Promise-based for async operations
|
|
157
|
-
- Deduplicates concurrent requests for same key
|
|
158
|
-
- Supports staleness and refresh options
|
|
159
|
-
|
|
160
|
-
### 1.5 Size Calculation Best Practices
|
|
161
|
-
|
|
162
|
-
```typescript
|
|
163
|
-
// ❌ BAD: Incorrect size calculation
|
|
164
|
-
const badCache = new LRUCache({
|
|
165
|
-
maxSize: 10 * 1024 * 1024, // 10 MB
|
|
166
|
-
sizeCalculation: (value) => 1 // Always returns 1 - defeats purpose!
|
|
167
|
-
});
|
|
168
|
-
|
|
169
|
-
// ✅ GOOD: Account for actual memory usage
|
|
170
|
-
const goodCache = new LRUCache<string, LLMResponse>({
|
|
171
|
-
maxSize: 10 * 1024 * 1024,
|
|
172
|
-
sizeCalculation: (value, key) => {
|
|
173
|
-
// Key + value serialization
|
|
174
|
-
const keySize = Buffer.byteLength(key, 'utf8');
|
|
175
|
-
const valueSize = Buffer.byteLength(JSON.stringify(value), 'utf8');
|
|
176
|
-
return keySize + valueSize + 64; // +64 for object overhead
|
|
177
|
-
}
|
|
178
|
-
});
|
|
179
|
-
|
|
180
|
-
// ✅ BEST: Use actual serialized size
|
|
181
|
-
function calculateResponseSize(response: LLMResponse): number {
|
|
182
|
-
const serialized = JSON.stringify(response);
|
|
183
|
-
// Account for: JSON string + key + object references (~64 bytes overhead)
|
|
184
|
-
return Buffer.byteLength(serialized, 'utf8') + 100;
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
const cache = new LRUCache<string, LLMResponse>({
|
|
188
|
-
maxSize: 50 * 1024 * 1024, // 50 MB
|
|
189
|
-
sizeCalculation: (value) => calculateResponseSize(value)
|
|
190
|
-
});
|
|
191
|
-
```
|
|
192
|
-
|
|
193
|
-
**Performance Notes:**
|
|
194
|
-
- Size calculation is called on every `set()` and `has()`
|
|
195
|
-
- Keep calculation fast (avoid deep serialization per call)
|
|
196
|
-
- Use estimates for performance-critical paths
|
|
197
|
-
- Node.js `Buffer.byteLength()` is accurate for UTF-8
|
|
198
|
-
|
|
199
|
-
### 1.6 Configuration Recommendations for LLM Caching
|
|
200
|
-
|
|
201
|
-
```typescript
|
|
202
|
-
// Production LLM Response Cache
|
|
203
|
-
const llmCache = new LRUCache<string, LLMResponse>({
|
|
204
|
-
// Store up to 5000 responses OR
|
|
205
|
-
max: 5000,
|
|
206
|
-
|
|
207
|
-
// Maximum memory: 500 MB (typical for production)
|
|
208
|
-
maxSize: 500 * 1024 * 1024,
|
|
209
|
-
|
|
210
|
-
// Responses expire after 24 hours
|
|
211
|
-
ttl: 1000 * 60 * 60 * 24,
|
|
212
|
-
|
|
213
|
-
// Size calculation for LLM responses
|
|
214
|
-
sizeCalculation: (response) => {
|
|
215
|
-
const size = Buffer.byteLength(JSON.stringify(response), 'utf8');
|
|
216
|
-
return size + 100; // Buffer overhead
|
|
217
|
-
},
|
|
218
|
-
|
|
219
|
-
// Refresh TTL on access (keep hot responses fresh)
|
|
220
|
-
updateAgeOnGet: true,
|
|
221
|
-
|
|
222
|
-
// Allow returning stale response while fetching new one
|
|
223
|
-
allowStale: false
|
|
224
|
-
});
|
|
225
|
-
```
|
|
226
|
-
|
|
227
|
-
---
|
|
228
|
-
|
|
229
|
-
## 2. Deterministic JSON Stringification
|
|
230
|
-
|
|
231
|
-
### 2.1 The Problem: Non-Deterministic Output
|
|
232
|
-
|
|
233
|
-
JavaScript's native `JSON.stringify()` provides **no guarantees** about object key order:
|
|
234
|
-
|
|
235
|
-
```typescript
|
|
236
|
-
const obj1 = { a: 1, b: 2, c: 3 };
|
|
237
|
-
const obj2 = { c: 3, b: 2, a: 1 };
|
|
238
|
-
|
|
239
|
-
JSON.stringify(obj1); // Could be: {"a":1,"b":2,"c":3}
|
|
240
|
-
JSON.stringify(obj2); // Could be: {"c":3,"b":2,"a":1}
|
|
241
|
-
|
|
242
|
-
// Same logical object, different strings!
|
|
243
|
-
// Cache keys would NOT match!
|
|
244
|
-
```
|
|
245
|
-
|
|
246
|
-
**Impact for LLM Caching:**
|
|
247
|
-
- Identical prompts with reordered properties miss cache
|
|
248
|
-
- 20-40% reduction in effective cache hit rate
|
|
249
|
-
- Wasted API calls and increased latency
|
|
250
|
-
|
|
251
|
-
### 2.2 Solution 1: `json-stringify-deterministic`
|
|
252
|
-
|
|
253
|
-
**Package:** [json-stringify-deterministic](https://www.npmjs.com/package/json-stringify-deterministic)
|
|
254
|
-
**NPM:** `npm install json-stringify-deterministic`
|
|
255
|
-
|
|
256
|
-
```typescript
|
|
257
|
-
import { stringify } from 'json-stringify-deterministic';
|
|
258
|
-
|
|
259
|
-
const obj1 = { c: 3, a: 1, b: 2 };
|
|
260
|
-
const obj2 = { a: 1, b: 2, c: 3 };
|
|
261
|
-
|
|
262
|
-
stringify(obj1); // {"a":1,"b":2,"c":3}
|
|
263
|
-
stringify(obj2); // {"a":1,"b":2,"c":3} ✅ IDENTICAL
|
|
264
|
-
|
|
265
|
-
// Perfect for LLM prompt objects
|
|
266
|
-
const prompt = {
|
|
267
|
-
messages: [...],
|
|
268
|
-
model: 'gpt-4',
|
|
269
|
-
temperature: 0.7,
|
|
270
|
-
systemPrompt: '...'
|
|
271
|
-
};
|
|
272
|
-
|
|
273
|
-
const cacheKey = stringify(prompt); // Always same output
|
|
274
|
-
```
|
|
275
|
-
|
|
276
|
-
**Features:**
|
|
277
|
-
- Alphabetically sorts object keys
|
|
278
|
-
- TypeScript declarations included
|
|
279
|
-
- Handles nested objects and arrays
|
|
280
|
-
- Circular reference handling with `cycles: true` option
|
|
281
|
-
|
|
282
|
-
**Configuration:**
|
|
283
|
-
|
|
284
|
-
```typescript
|
|
285
|
-
// Handle circular references (e.g., self-referencing objects)
|
|
286
|
-
const config = {
|
|
287
|
-
cycles: true // Marks circular refs as [Circular] instead of throwing
|
|
288
|
-
};
|
|
289
|
-
|
|
290
|
-
const str = stringify(circularObj, config);
|
|
291
|
-
```
|
|
292
|
-
|
|
293
|
-
### 2.3 Solution 2: `fast-json-stable-stringify` (Performance Alternative)
|
|
294
|
-
|
|
295
|
-
**Package:** [fast-json-stable-stringify](https://github.com/epoberezkin/fast-json-stable-stringify)
|
|
296
|
-
**NPM:** `npm install fast-json-stable-stringify`
|
|
297
|
-
|
|
298
|
-
```typescript
|
|
299
|
-
import stringify from 'fast-json-stable-stringify';
|
|
300
|
-
|
|
301
|
-
const obj = { c: 8, b: [{ z: 6, y: 5, x: 4 }, 7], a: 3 };
|
|
302
|
-
console.log(stringify(obj));
|
|
303
|
-
// Output: {"a":3,"b":[{"x":4,"y":5,"z":6},7],"c":8}
|
|
304
|
-
```
|
|
305
|
-
|
|
306
|
-
**Performance Benchmark:**
|
|
307
|
-
- `fast-json-stable-stringify`: ~17,189 ops/sec
|
|
308
|
-
- `json-stable-stringify`: ~13,634 ops/sec
|
|
309
|
-
- **34% faster** than original stable-stringify
|
|
310
|
-
|
|
311
|
-
**Custom Comparison Function:**
|
|
312
|
-
|
|
313
|
-
```typescript
|
|
314
|
-
// Sort by value instead of key
|
|
315
|
-
const customSort = (a: any, b: any) => {
|
|
316
|
-
if (a.value < b.value) return -1;
|
|
317
|
-
if (a.value > b.value) return 1;
|
|
318
|
-
return 0;
|
|
319
|
-
};
|
|
320
|
-
|
|
321
|
-
const str = stringify(obj, { cmp: customSort });
|
|
322
|
-
```
|
|
323
|
-
|
|
324
|
-
### 2.4 Solution 3: `safe-stable-stringify` (Circular Reference Safety)
|
|
325
|
-
|
|
326
|
-
**Package:** [safe-stable-stringify](https://www.npmjs.com/package/safe-stable-stringify)
|
|
327
|
-
**NPM:** `npm install safe-stable-stringify`
|
|
328
|
-
|
|
329
|
-
```typescript
|
|
330
|
-
import safeStringify from 'safe-stable-stringify';
|
|
331
|
-
|
|
332
|
-
// Handles circular references gracefully
|
|
333
|
-
const circularObj = { a: 1 };
|
|
334
|
-
circularObj.self = circularObj; // Circular reference
|
|
335
|
-
|
|
336
|
-
const str = safeStringify(circularObj);
|
|
337
|
-
// ✅ Doesn't throw, gracefully handles it
|
|
338
|
-
|
|
339
|
-
// Also handles BigInt and TypedArrays
|
|
340
|
-
const obj = {
|
|
341
|
-
num: 42,
|
|
342
|
-
bigint: BigInt(999999999999),
|
|
343
|
-
typed: new Uint8Array([1, 2, 3])
|
|
344
|
-
};
|
|
345
|
-
|
|
346
|
-
const str = safeStringify(obj);
|
|
347
|
-
```
|
|
348
|
-
|
|
349
|
-
**Characteristics:**
|
|
350
|
-
- Zero dependencies
|
|
351
|
-
- Fastest stable stringify implementation
|
|
352
|
-
- Graceful handling of problematic types
|
|
353
|
-
- ESM and CJS support
|
|
354
|
-
|
|
355
|
-
### 2.5 Recommendation for LLM Caching
|
|
356
|
-
|
|
357
|
-
```typescript
|
|
358
|
-
// Use `safe-stable-stringify` for production
|
|
359
|
-
import safeStringify from 'safe-stable-stringify';
|
|
360
|
-
|
|
361
|
-
interface LLMPromptInput {
|
|
362
|
-
messages: Array<{ role: string; content: string }>;
|
|
363
|
-
model: string;
|
|
364
|
-
temperature?: number;
|
|
365
|
-
systemPrompt?: string;
|
|
366
|
-
tools?: Tool[];
|
|
367
|
-
}
|
|
368
|
-
|
|
369
|
-
function generateCacheKey(input: LLMPromptInput): string {
|
|
370
|
-
const normalized = {
|
|
371
|
-
model: input.model,
|
|
372
|
-
temperature: input.temperature ?? 0.7,
|
|
373
|
-
systemPrompt: input.systemPrompt ?? '',
|
|
374
|
-
messages: input.messages,
|
|
375
|
-
tools: input.tools ?? []
|
|
376
|
-
};
|
|
377
|
-
|
|
378
|
-
return safeStringify(normalized);
|
|
379
|
-
}
|
|
380
|
-
|
|
381
|
-
// Usage
|
|
382
|
-
const key1 = generateCacheKey({
|
|
383
|
-
messages: [{ role: 'user', content: 'hello' }],
|
|
384
|
-
model: 'gpt-4',
|
|
385
|
-
temperature: 0.7
|
|
386
|
-
});
|
|
387
|
-
|
|
388
|
-
const key2 = generateCacheKey({
|
|
389
|
-
temperature: 0.7,
|
|
390
|
-
model: 'gpt-4',
|
|
391
|
-
messages: [{ role: 'user', content: 'hello' }]
|
|
392
|
-
});
|
|
393
|
-
|
|
394
|
-
console.log(key1 === key2); // ✅ true (same logical input)
|
|
395
|
-
```
|
|
396
|
-
|
|
397
|
-
**Comparison Table:**
|
|
398
|
-
|
|
399
|
-
| Package | Performance | Circular Refs | TypeScript | BigInt | Recommendation |
|
|
400
|
-
|---------|-------------|---------------|-----------|--------|-----------------|
|
|
401
|
-
| json-stringify-deterministic | Good | Yes | Yes | No | Basic use |
|
|
402
|
-
| fast-json-stable-stringify | **Fastest** | No | No | No | Performance-critical |
|
|
403
|
-
| safe-stable-stringify | **Fastest** | Yes | No | **Yes** | **Production LLM** |
|
|
404
|
-
|
|
405
|
-
---
|
|
406
|
-
|
|
407
|
-
## 3. SHA-256 Hashing in Node.js
|
|
408
|
-
|
|
409
|
-
### 3.1 Overview
|
|
410
|
-
|
|
411
|
-
Node.js provides built-in SHA-256 hashing via the `node:crypto` module. This is typically **1.5-3x faster** than JavaScript implementations due to OpenSSL bindings.
|
|
412
|
-
|
|
413
|
-
```typescript
|
|
414
|
-
import { createHash } from 'node:crypto';
|
|
415
|
-
|
|
416
|
-
const hash = createHash('sha256');
|
|
417
|
-
hash.update('some data');
|
|
418
|
-
const digest = hash.digest('hex'); // 64-character hex string
|
|
419
|
-
console.log(digest);
|
|
420
|
-
// a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3
|
|
421
|
-
```
|
|
422
|
-
|
|
423
|
-
### 3.2 Basic SHA-256 Hashing Pattern
|
|
424
|
-
|
|
425
|
-
```typescript
|
|
426
|
-
import { createHash } from 'node:crypto';
|
|
427
|
-
import safeStringify from 'safe-stable-stringify';
|
|
428
|
-
|
|
429
|
-
interface HashableInput {
|
|
430
|
-
text: string;
|
|
431
|
-
model?: string;
|
|
432
|
-
params?: Record<string, any>;
|
|
433
|
-
}
|
|
434
|
-
|
|
435
|
-
function hashInput(input: HashableInput): string {
|
|
436
|
-
// 1. Normalize to deterministic string
|
|
437
|
-
const normalized = safeStringify(input);
|
|
438
|
-
|
|
439
|
-
// 2. Create hash
|
|
440
|
-
const hash = createHash('sha256');
|
|
441
|
-
hash.update(normalized, 'utf8');
|
|
442
|
-
|
|
443
|
-
// 3. Get hex digest (64 chars, 256 bits)
|
|
444
|
-
return hash.digest('hex');
|
|
445
|
-
}
|
|
446
|
-
|
|
447
|
-
// Usage
|
|
448
|
-
const prompt = {
|
|
449
|
-
text: 'Explain quantum computing',
|
|
450
|
-
model: 'gpt-4',
|
|
451
|
-
params: { temperature: 0.7 }
|
|
452
|
-
};
|
|
453
|
-
|
|
454
|
-
const cacheKey = hashInput(prompt);
|
|
455
|
-
// 3f4a8c7... (64-character SHA-256)
|
|
456
|
-
```
|
|
457
|
-
|
|
458
|
-
### 3.3 Performance Considerations
|
|
459
|
-
|
|
460
|
-
**Throughput:**
|
|
461
|
-
- SHA-256 on Node.js: Excellent (uses OpenSSL)
|
|
462
|
-
- Modern CPUs with SHA extensions: ~5-10 GB/s
|
|
463
|
-
- JavaScript pure implementation: Much slower, avoid
|
|
464
|
-
|
|
465
|
-
**When to Hash vs. When to Use Raw Strings:**
|
|
466
|
-
|
|
467
|
-
```typescript
|
|
468
|
-
// ✅ Use raw string for small inputs (< 100 chars)
|
|
469
|
-
const key = `${model}:${temperature}:${prompt.substring(0, 50)}`;
|
|
470
|
-
|
|
471
|
-
// ✅ Use SHA-256 for large inputs or security
|
|
472
|
-
const largePrompt = `System: ${systemPrompt}\n\nUser: ${userPrompt}`;
|
|
473
|
-
const key = createHash('sha256').update(largePrompt).digest('hex');
|
|
474
|
-
|
|
475
|
-
// ✅ Use combined approach (prefix + hash)
|
|
476
|
-
const prefix = `${model}:v2:`;
|
|
477
|
-
const contentHash = createHash('sha256').update(largePrompt).digest('hex');
|
|
478
|
-
const key = `${prefix}${contentHash}`;
|
|
479
|
-
```
|
|
480
|
-
|
|
481
|
-
### 3.4 Stream-based Hashing for Large Data
|
|
482
|
-
|
|
483
|
-
For very large LLM responses (multi-MB), process in chunks:
|
|
484
|
-
|
|
485
|
-
```typescript
|
|
486
|
-
import { createHash, createReadStream } from 'node:crypto';
|
|
487
|
-
import fs from 'node:fs';
|
|
488
|
-
|
|
489
|
-
async function hashLargeResponse(filePath: string): Promise<string> {
|
|
490
|
-
const hash = createHash('sha256');
|
|
491
|
-
const stream = createReadStream(filePath);
|
|
492
|
-
|
|
493
|
-
for await (const chunk of stream) {
|
|
494
|
-
hash.update(chunk);
|
|
495
|
-
}
|
|
496
|
-
|
|
497
|
-
return hash.digest('hex');
|
|
498
|
-
}
|
|
499
|
-
|
|
500
|
-
// For in-memory data, still update incrementally for large strings
|
|
501
|
-
function hashLargeString(data: string): string {
|
|
502
|
-
const hash = createHash('sha256');
|
|
503
|
-
const chunkSize = 65536; // 64 KB chunks
|
|
504
|
-
|
|
505
|
-
for (let i = 0; i < data.length; i += chunkSize) {
|
|
506
|
-
hash.update(data.slice(i, i + chunkSize), 'utf8');
|
|
507
|
-
}
|
|
508
|
-
|
|
509
|
-
return hash.digest('hex');
|
|
510
|
-
}
|
|
511
|
-
```
|
|
512
|
-
|
|
513
|
-
### 3.5 HMAC Pattern for Cache Validation
|
|
514
|
-
|
|
515
|
-
Use HMAC when you need to prevent cache tampering:
|
|
516
|
-
|
|
517
|
-
```typescript
|
|
518
|
-
import { createHmac } from 'node:crypto';
|
|
519
|
-
|
|
520
|
-
const CACHE_SECRET = process.env.CACHE_SIGNING_SECRET || 'dev-secret';
|
|
521
|
-
|
|
522
|
-
function signCacheKey(key: string): string {
|
|
523
|
-
const hmac = createHmac('sha256', CACHE_SECRET);
|
|
524
|
-
hmac.update(key);
|
|
525
|
-
return hmac.digest('hex');
|
|
526
|
-
}
|
|
527
|
-
|
|
528
|
-
function verifyCacheKey(key: string, signature: string): boolean {
|
|
529
|
-
const expected = signCacheKey(key);
|
|
530
|
-
// Constant-time comparison to prevent timing attacks
|
|
531
|
-
return crypto.timingSafeEqual(
|
|
532
|
-
Buffer.from(signature),
|
|
533
|
-
Buffer.from(expected)
|
|
534
|
-
);
|
|
535
|
-
}
|
|
536
|
-
|
|
537
|
-
// Usage
|
|
538
|
-
const key = 'model:gpt-4:...';
|
|
539
|
-
const sig = signCacheKey(key); // Store both key and sig
|
|
540
|
-
const valid = verifyCacheKey(key, sig);
|
|
541
|
-
```
|
|
542
|
-
|
|
543
|
-
### 3.6 Recommended Pattern for LLM Caching
|
|
544
|
-
|
|
545
|
-
```typescript
|
|
546
|
-
import { createHash, randomBytes } from 'node:crypto';
|
|
547
|
-
import safeStringify from 'safe-stable-stringify';
|
|
548
|
-
|
|
549
|
-
class LLMCacheKeyGenerator {
|
|
550
|
-
private version = 'v1';
|
|
551
|
-
|
|
552
|
-
generate(input: {
|
|
553
|
-
prompt: string;
|
|
554
|
-
model: string;
|
|
555
|
-
parameters: Record<string, any>;
|
|
556
|
-
}): string {
|
|
557
|
-
// Include version in hash to invalidate cache on schema changes
|
|
558
|
-
const normalized = safeStringify({
|
|
559
|
-
v: this.version,
|
|
560
|
-
...input
|
|
561
|
-
});
|
|
562
|
-
|
|
563
|
-
const hash = createHash('sha256');
|
|
564
|
-
hash.update(normalized);
|
|
565
|
-
return hash.digest('hex');
|
|
566
|
-
}
|
|
567
|
-
|
|
568
|
-
// For debugging: create readable key with metadata
|
|
569
|
-
generateDebug(input: any): { key: string; readable: string } {
|
|
570
|
-
const key = this.generate(input);
|
|
571
|
-
const readable = `${input.model}/${input.prompt.substring(0, 20)}/${key}`;
|
|
572
|
-
return { key, readable };
|
|
573
|
-
}
|
|
574
|
-
}
|
|
575
|
-
|
|
576
|
-
// Usage
|
|
577
|
-
const keyGen = new LLMCacheKeyGenerator();
|
|
578
|
-
const { key, readable } = keyGen.generateDebug({
|
|
579
|
-
prompt: 'What is AI?',
|
|
580
|
-
model: 'gpt-4-turbo',
|
|
581
|
-
parameters: { temperature: 0.7 }
|
|
582
|
-
});
|
|
583
|
-
|
|
584
|
-
console.log(readable);
|
|
585
|
-
// gpt-4-turbo/What is AI?/a1b2c3d4...
|
|
586
|
-
```
|
|
587
|
-
|
|
588
|
-
---
|
|
589
|
-
|
|
590
|
-
## 4. Zod Schema Hashing Patterns
|
|
591
|
-
|
|
592
|
-
### 4.1 The Challenge
|
|
593
|
-
|
|
594
|
-
Zod schemas are TypeScript objects with internal `_def` properties. Unlike plain data, there's **no built-in serialization**. The `_def` property is marked as private (underscore prefix) and may change between versions.
|
|
595
|
-
|
|
596
|
-
```typescript
|
|
597
|
-
import { z } from 'zod';
|
|
598
|
-
|
|
599
|
-
const schema = z.object({
|
|
600
|
-
name: z.string(),
|
|
601
|
-
age: z.number().min(0)
|
|
602
|
-
});
|
|
603
|
-
|
|
604
|
-
// ❌ Problem: Can't serialize _def directly
|
|
605
|
-
console.log(schema._def);
|
|
606
|
-
// ZodObjectDef { ... complex internal structure }
|
|
607
|
-
```
|
|
608
|
-
|
|
609
|
-
### 4.2 Why Hash Schemas?
|
|
610
|
-
|
|
611
|
-
```typescript
|
|
612
|
-
// Scenario: Cache is version-specific
|
|
613
|
-
// If schema changes, old cache entries are invalid
|
|
614
|
-
// Solution: Include schema fingerprint in cache key
|
|
615
|
-
|
|
616
|
-
const cacheKey = `${queryHash}:${schemaHash}`;
|
|
617
|
-
|
|
618
|
-
// When schema changes, different hash = different cache key
|
|
619
|
-
// Old entries naturally expire without manual invalidation
|
|
620
|
-
```
|
|
621
|
-
|
|
622
|
-
### 4.3 Manual Schema Fingerprinting
|
|
623
|
-
|
|
624
|
-
```typescript
|
|
625
|
-
import { createHash } from 'node:crypto';
|
|
626
|
-
import { z, ZodSchema, ZodTypeAny } from 'zod';
|
|
627
|
-
|
|
628
|
-
function schemaFingerprint(schema: ZodSchema): string {
|
|
629
|
-
const def = schema._def;
|
|
630
|
-
|
|
631
|
-
// Extract meaningful properties
|
|
632
|
-
const fingerprint = {
|
|
633
|
-
typeName: def.typeName,
|
|
634
|
-
|
|
635
|
-
// For objects
|
|
636
|
-
...(def.shape && {
|
|
637
|
-
shape: Object.entries(def.shape).map(([key, val]: [string, any]) => ({
|
|
638
|
-
key,
|
|
639
|
-
type: val._def?.typeName
|
|
640
|
-
}))
|
|
641
|
-
}),
|
|
642
|
-
|
|
643
|
-
// For arrays
|
|
644
|
-
...(def.type && {
|
|
645
|
-
elementType: def.type._def?.typeName
|
|
646
|
-
}),
|
|
647
|
-
|
|
648
|
-
// For enums
|
|
649
|
-
...(def.values && {
|
|
650
|
-
enumValues: def.values
|
|
651
|
-
}),
|
|
652
|
-
|
|
653
|
-
// Validation rules
|
|
654
|
-
...(def.checks && {
|
|
655
|
-
checks: def.checks.map((c: any) => ({
|
|
656
|
-
kind: c.kind,
|
|
657
|
-
value: typeof c.value === 'object' ? '[Object]' : c.value
|
|
658
|
-
}))
|
|
659
|
-
})
|
|
660
|
-
};
|
|
661
|
-
|
|
662
|
-
const str = JSON.stringify(fingerprint);
|
|
663
|
-
const hash = createHash('sha256');
|
|
664
|
-
hash.update(str);
|
|
665
|
-
return hash.digest('hex').substring(0, 8); // 8 chars for readability
|
|
666
|
-
}
|
|
667
|
-
|
|
668
|
-
// Usage
|
|
669
|
-
const userSchema = z.object({
|
|
670
|
-
email: z.string().email(),
|
|
671
|
-
age: z.number().int().min(0).max(150)
|
|
672
|
-
});
|
|
673
|
-
|
|
674
|
-
const hash1 = schemaFingerprint(userSchema);
|
|
675
|
-
|
|
676
|
-
// After schema change
|
|
677
|
-
const userSchemaV2 = z.object({
|
|
678
|
-
email: z.string().email(),
|
|
679
|
-
age: z.number().int().min(18).max(150) // Min changed
|
|
680
|
-
});
|
|
681
|
-
|
|
682
|
-
const hash2 = schemaFingerprint(userSchemaV2);
|
|
683
|
-
|
|
684
|
-
console.log(hash1 === hash2); // ❌ false (schema changed!)
|
|
685
|
-
```
|
|
686
|
-
|
|
687
|
-
### 4.4 Detecting Nested Schema Changes
|
|
688
|
-
|
|
689
|
-
```typescript
|
|
690
|
-
import { z } from 'zod';
|
|
691
|
-
|
|
692
|
-
function schemaFingerprintDeep(schema: ZodTypeAny): string {
|
|
693
|
-
const components: string[] = [];
|
|
694
|
-
|
|
695
|
-
function traverse(s: ZodTypeAny, depth = 0): void {
|
|
696
|
-
const def = s._def;
|
|
697
|
-
|
|
698
|
-
// Prevent infinite recursion
|
|
699
|
-
if (depth > 10) return;
|
|
700
|
-
|
|
701
|
-
components.push(`${' '.repeat(depth)}${def.typeName}`);
|
|
702
|
-
|
|
703
|
-
// Object shapes
|
|
704
|
-
if (def.shape) {
|
|
705
|
-
for (const [key, fieldSchema] of Object.entries(def.shape)) {
|
|
706
|
-
components.push(`${' '.repeat(depth + 1)}${key}:`);
|
|
707
|
-
traverse(fieldSchema as ZodTypeAny, depth + 2);
|
|
708
|
-
}
|
|
709
|
-
}
|
|
710
|
-
|
|
711
|
-
// Array elements
|
|
712
|
-
if (def.type) {
|
|
713
|
-
components.push(`${' '.repeat(depth + 1)}[element]:`);
|
|
714
|
-
traverse(def.type, depth + 2);
|
|
715
|
-
}
|
|
716
|
-
|
|
717
|
-
// Union/intersection members
|
|
718
|
-
if (def.options) {
|
|
719
|
-
for (const option of def.options) {
|
|
720
|
-
traverse(option, depth + 1);
|
|
721
|
-
}
|
|
722
|
-
}
|
|
723
|
-
if (def.left) {
|
|
724
|
-
traverse(def.left, depth + 1);
|
|
725
|
-
traverse(def.right, depth + 1);
|
|
726
|
-
}
|
|
727
|
-
|
|
728
|
-
// Validation rules
|
|
729
|
-
if (def.checks) {
|
|
730
|
-
for (const check of def.checks) {
|
|
731
|
-
components.push(
|
|
732
|
-
`${' '.repeat(depth + 1)}check:${check.kind}(${JSON.stringify(check.value)})`
|
|
733
|
-
);
|
|
734
|
-
}
|
|
735
|
-
}
|
|
736
|
-
}
|
|
737
|
-
|
|
738
|
-
traverse(schema);
|
|
739
|
-
|
|
740
|
-
const str = components.join('\n');
|
|
741
|
-
const hash = createHash('sha256');
|
|
742
|
-
hash.update(str);
|
|
743
|
-
return hash.digest('hex');
|
|
744
|
-
}
|
|
745
|
-
|
|
746
|
-
// Example
|
|
747
|
-
const complexSchema = z.object({
|
|
748
|
-
user: z.object({
|
|
749
|
-
id: z.string().uuid(),
|
|
750
|
-
email: z.string().email(),
|
|
751
|
-
posts: z.array(
|
|
752
|
-
z.object({
|
|
753
|
-
title: z.string().min(1).max(200),
|
|
754
|
-
content: z.string()
|
|
755
|
-
})
|
|
756
|
-
)
|
|
757
|
-
}),
|
|
758
|
-
metadata: z.object({
|
|
759
|
-
version: z.number()
|
|
760
|
-
})
|
|
761
|
-
});
|
|
762
|
-
|
|
763
|
-
const hash = schemaFingerprintDeep(complexSchema);
|
|
764
|
-
// 7c4a9f2b... (full structure hash)
|
|
765
|
-
```
|
|
766
|
-
|
|
767
|
-
### 4.5 Schema Versioning Approach (Recommended)
|
|
768
|
-
|
|
769
|
-
Instead of computing hashes, use **explicit versioning**:
|
|
770
|
-
|
|
771
|
-
```typescript
|
|
772
|
-
interface SchemaVersion {
|
|
773
|
-
id: string;
|
|
774
|
-
version: number;
|
|
775
|
-
description: string;
|
|
776
|
-
}
|
|
777
|
-
|
|
778
|
-
const SCHEMA_VERSIONS: Record<string, SchemaVersion> = {
|
|
779
|
-
'user.v1': { id: 'user', version: 1, description: 'Initial schema' },
|
|
780
|
-
'user.v2': { id: 'user', version: 2, description: 'Added email validation' },
|
|
781
|
-
'user.v3': { id: 'user', version: 3, description: 'Changed age min to 18' }
|
|
782
|
-
};
|
|
783
|
-
|
|
784
|
-
// In your schema definitions
|
|
785
|
-
const userSchemaV3 = z.object({
|
|
786
|
-
email: z.string().email(),
|
|
787
|
-
age: z.number().int().min(18).max(150)
|
|
788
|
-
});
|
|
789
|
-
|
|
790
|
-
// Use explicit version in cache key
|
|
791
|
-
function generateLLMCacheKey(
|
|
792
|
-
schemaId: string,
|
|
793
|
-
schemaVersion: number,
|
|
794
|
-
input: any
|
|
795
|
-
): string {
|
|
796
|
-
const normalized = JSON.stringify({
|
|
797
|
-
schema: `${schemaId}.v${schemaVersion}`,
|
|
798
|
-
input
|
|
799
|
-
});
|
|
800
|
-
|
|
801
|
-
const hash = createHash('sha256');
|
|
802
|
-
hash.update(normalized);
|
|
803
|
-
return hash.digest('hex');
|
|
804
|
-
}
|
|
805
|
-
|
|
806
|
-
// When schema changes, increment version number
|
|
807
|
-
const key = generateLLMCacheKey('user', 3, userData);
|
|
808
|
-
```
|
|
809
|
-
|
|
810
|
-
### 4.6 Zod v4 Improvements
|
|
811
|
-
|
|
812
|
-
Zod v4 introduces better introspection:
|
|
813
|
-
|
|
814
|
-
```typescript
|
|
815
|
-
// Zod v4+: All ._zod.def objects are JSON-serializable
|
|
816
|
-
// This makes fingerprinting more reliable
|
|
817
|
-
|
|
818
|
-
import { z } from 'zod';
|
|
819
|
-
|
|
820
|
-
const schema = z.object({
|
|
821
|
-
name: z.string(),
|
|
822
|
-
email: z.string().email()
|
|
823
|
-
});
|
|
824
|
-
|
|
825
|
-
// In Zod v4, this is JSON-serializable
|
|
826
|
-
const def = schema._zod.def; // Better than _def
|
|
827
|
-
const serialized = JSON.stringify(def);
|
|
828
|
-
|
|
829
|
-
const hash = createHash('sha256');
|
|
830
|
-
hash.update(serialized);
|
|
831
|
-
const fingerprint = hash.digest('hex');
|
|
832
|
-
```
|
|
833
|
-
|
|
834
|
-
**Migration Note:** Zod v3 uses `_def`, v4+ uses `_zod.def` with better serialization support.
|
|
835
|
-
|
|
836
|
-
### 4.7 Practical Implementation: Query Response Cache
|
|
837
|
-
|
|
838
|
-
```typescript
|
|
839
|
-
import { z } from 'zod';
|
|
840
|
-
import { LRUCache } from 'lru-cache';
|
|
841
|
-
import { createHash } from 'node:crypto';
|
|
842
|
-
import safeStringify from 'safe-stable-stringify';
|
|
843
|
-
|
|
844
|
-
// Define schema with version
|
|
845
|
-
const ResponseSchema = z.object({
|
|
846
|
-
data: z.array(z.string()),
|
|
847
|
-
status: z.enum(['success', 'error'])
|
|
848
|
-
});
|
|
849
|
-
|
|
850
|
-
type Response = z.infer<typeof ResponseSchema>;
|
|
851
|
-
|
|
852
|
-
class SchemaAwareLLMCache {
|
|
853
|
-
private cache: LRUCache<string, Response>;
|
|
854
|
-
private schemaVersion = 1;
|
|
855
|
-
|
|
856
|
-
constructor() {
|
|
857
|
-
this.cache = new LRUCache<string, Response>({
|
|
858
|
-
max: 1000,
|
|
859
|
-
ttl: 3600000
|
|
860
|
-
});
|
|
861
|
-
}
|
|
862
|
-
|
|
863
|
-
generateKey(input: any): string {
|
|
864
|
-
const key = {
|
|
865
|
-
schema: `response.v${this.schemaVersion}`,
|
|
866
|
-
input: safeStringify(input)
|
|
867
|
-
};
|
|
868
|
-
|
|
869
|
-
const hash = createHash('sha256');
|
|
870
|
-
hash.update(JSON.stringify(key));
|
|
871
|
-
return hash.digest('hex');
|
|
872
|
-
}
|
|
873
|
-
|
|
874
|
-
get(input: any): Response | undefined {
|
|
875
|
-
return this.cache.get(this.generateKey(input));
|
|
876
|
-
}
|
|
877
|
-
|
|
878
|
-
set(input: any, response: Response): void {
|
|
879
|
-
// Validate against schema before caching
|
|
880
|
-
const validated = ResponseSchema.parse(response);
|
|
881
|
-
this.cache.set(this.generateKey(input), validated);
|
|
882
|
-
}
|
|
883
|
-
|
|
884
|
-
// Update schema (invalidates all old entries)
|
|
885
|
-
updateSchema(newVersion: number): void {
|
|
886
|
-
this.schemaVersion = newVersion;
|
|
887
|
-
// Old keys with old version number won't be found
|
|
888
|
-
// Cache effectively expires
|
|
889
|
-
}
|
|
890
|
-
}
|
|
891
|
-
```
|
|
892
|
-
|
|
893
|
-
---
|
|
894
|
-
|
|
895
|
-
## 5. LLM Response Caching Architecture
|
|
896
|
-
|
|
897
|
-
### 5.1 Exact vs. Semantic Caching
|
|
898
|
-
|
|
899
|
-
**Exact Caching:**
|
|
900
|
-
- Matches byte-for-byte identical prompts
|
|
901
|
-
- Fast, predictable, ~30-40% hit rate
|
|
902
|
-
- No similarity computation
|
|
903
|
-
- Best for: Frequently repeated queries
|
|
904
|
-
|
|
905
|
-
**Semantic Caching:**
|
|
906
|
-
- Uses embedding similarity (vector distance)
|
|
907
|
-
- ~60-70% hit rate
|
|
908
|
-
- Higher latency (requires embedding model)
|
|
909
|
-
- Best for: Paraphrased but equivalent queries
|
|
910
|
-
|
|
911
|
-
**Hybrid Approach (Recommended):**
|
|
912
|
-
|
|
913
|
-
```typescript
|
|
914
|
-
import { LRUCache } from 'lru-cache';
|
|
915
|
-
import { createHash } from 'node:crypto';
|
|
916
|
-
import safeStringify from 'safe-stable-stringify';
|
|
917
|
-
|
|
918
|
-
interface CachedResponse {
|
|
919
|
-
content: string;
|
|
920
|
-
embedding?: Float32Array;
|
|
921
|
-
timestamp: number;
|
|
922
|
-
model: string;
|
|
923
|
-
}
|
|
924
|
-
|
|
925
|
-
class HybridLLMCache {
|
|
926
|
-
private exactCache: LRUCache<string, CachedResponse>;
|
|
927
|
-
private semanticCache: CachedResponse[] = [];
|
|
928
|
-
private embeddingModel: any;
|
|
929
|
-
|
|
930
|
-
constructor(embeddingModel: any) {
|
|
931
|
-
this.embeddingModel = embeddingModel;
|
|
932
|
-
this.exactCache = new LRUCache<string, CachedResponse>({
|
|
933
|
-
max: 5000,
|
|
934
|
-
maxSize: 500 * 1024 * 1024,
|
|
935
|
-
sizeCalculation: (val) => JSON.stringify(val).length + 100,
|
|
936
|
-
ttl: 24 * 3600 * 1000 // 24 hours
|
|
937
|
-
});
|
|
938
|
-
}
|
|
939
|
-
|
|
940
|
-
// Layer 1: Fast exact match
|
|
941
|
-
async lookupExact(prompt: string): Promise<CachedResponse | null> {
|
|
942
|
-
const key = this.hashPrompt(prompt);
|
|
943
|
-
return this.exactCache.get(key) || null;
|
|
944
|
-
}
|
|
945
|
-
|
|
946
|
-
// Layer 2: Semantic similarity match
|
|
947
|
-
async lookupSemantic(
|
|
948
|
-
prompt: string,
|
|
949
|
-
threshold = 0.8
|
|
950
|
-
): Promise<CachedResponse | null> {
|
|
951
|
-
const embedding = await this.embeddingModel.embed(prompt);
|
|
952
|
-
|
|
953
|
-
for (const cached of this.semanticCache) {
|
|
954
|
-
if (!cached.embedding) continue;
|
|
955
|
-
|
|
956
|
-
const similarity = this.cosineSimilarity(embedding, cached.embedding);
|
|
957
|
-
if (similarity > threshold) {
|
|
958
|
-
return cached; // Found similar enough response
|
|
959
|
-
}
|
|
960
|
-
}
|
|
961
|
-
|
|
962
|
-
return null;
|
|
963
|
-
}
|
|
964
|
-
|
|
965
|
-
async lookup(prompt: string): Promise<CachedResponse | null> {
|
|
966
|
-
// Try exact first (fast)
|
|
967
|
-
const exact = await this.lookupExact(prompt);
|
|
968
|
-
if (exact) return exact;
|
|
969
|
-
|
|
970
|
-
// Fall back to semantic (slower)
|
|
971
|
-
return this.lookupSemantic(prompt);
|
|
972
|
-
}
|
|
973
|
-
|
|
974
|
-
async cache(
|
|
975
|
-
prompt: string,
|
|
976
|
-
response: string,
|
|
977
|
-
model: string
|
|
978
|
-
): Promise<void> {
|
|
979
|
-
const key = this.hashPrompt(prompt);
|
|
980
|
-
const embedding = await this.embeddingModel.embed(prompt);
|
|
981
|
-
|
|
982
|
-
const cached: CachedResponse = {
|
|
983
|
-
content: response,
|
|
984
|
-
embedding,
|
|
985
|
-
timestamp: Date.now(),
|
|
986
|
-
model
|
|
987
|
-
};
|
|
988
|
-
|
|
989
|
-
// Store in both caches
|
|
990
|
-
this.exactCache.set(key, cached);
|
|
991
|
-
this.semanticCache.push(cached);
|
|
992
|
-
|
|
993
|
-
// Trim semantic cache if too large
|
|
994
|
-
if (this.semanticCache.length > 10000) {
|
|
995
|
-
this.semanticCache = this.semanticCache.slice(-5000);
|
|
996
|
-
}
|
|
997
|
-
}
|
|
998
|
-
|
|
999
|
-
private hashPrompt(prompt: string): string {
|
|
1000
|
-
const hash = createHash('sha256');
|
|
1001
|
-
hash.update(prompt);
|
|
1002
|
-
return hash.digest('hex');
|
|
1003
|
-
}
|
|
1004
|
-
|
|
1005
|
-
private cosineSimilarity(a: Float32Array, b: Float32Array): number {
|
|
1006
|
-
let dotProduct = 0;
|
|
1007
|
-
let normA = 0;
|
|
1008
|
-
let normB = 0;
|
|
1009
|
-
|
|
1010
|
-
for (let i = 0; i < a.length; i++) {
|
|
1011
|
-
dotProduct += a[i] * b[i];
|
|
1012
|
-
normA += a[i] * a[i];
|
|
1013
|
-
normB += b[i] * b[i];
|
|
1014
|
-
}
|
|
1015
|
-
|
|
1016
|
-
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
1017
|
-
}
|
|
1018
|
-
}
|
|
1019
|
-
```
|
|
1020
|
-
|
|
1021
|
-
### 5.2 Multi-Layer Cache Architecture
|
|
1022
|
-
|
|
1023
|
-
```typescript
|
|
1024
|
-
class MultiLayerLLMCache {
|
|
1025
|
-
private l1Memory: LRUCache<string, any>; // In-process memory
|
|
1026
|
-
private l2Redis: any; // Redis (if available)
|
|
1027
|
-
private l3Disk: any; // SQLite or file-based
|
|
1028
|
-
|
|
1029
|
-
async lookup(key: string): Promise<any> {
|
|
1030
|
-
// L1: In-process (nanoseconds)
|
|
1031
|
-
const l1 = this.l1Memory.get(key);
|
|
1032
|
-
if (l1) return l1;
|
|
1033
|
-
|
|
1034
|
-
// L2: Redis (milliseconds)
|
|
1035
|
-
const l2 = await this.l2Redis?.get(key);
|
|
1036
|
-
if (l2) {
|
|
1037
|
-
this.l1Memory.set(key, l2); // Promote to L1
|
|
1038
|
-
return l2;
|
|
1039
|
-
}
|
|
1040
|
-
|
|
1041
|
-
// L3: Disk (hundreds of milliseconds)
|
|
1042
|
-
const l3 = await this.l3Disk?.get(key);
|
|
1043
|
-
if (l3) {
|
|
1044
|
-
this.l1Memory.set(key, l3); // Promote to L1
|
|
1045
|
-
if (this.l2Redis) await this.l2Redis.set(key, l3); // Promote to L2
|
|
1046
|
-
return l3;
|
|
1047
|
-
}
|
|
1048
|
-
|
|
1049
|
-
return null;
|
|
1050
|
-
}
|
|
1051
|
-
|
|
1052
|
-
async cache(key: string, value: any): Promise<void> {
|
|
1053
|
-
this.l1Memory.set(key, value);
|
|
1054
|
-
if (this.l2Redis) await this.l2Redis.set(key, value);
|
|
1055
|
-
if (this.l3Disk) await this.l3Disk.set(key, value);
|
|
1056
|
-
}
|
|
1057
|
-
}
|
|
1058
|
-
```
|
|
1059
|
-
|
|
1060
|
-
### 5.3 Cache Hit Rate Monitoring
|
|
1061
|
-
|
|
1062
|
-
```typescript
|
|
1063
|
-
interface CacheMetrics {
|
|
1064
|
-
hits: number;
|
|
1065
|
-
misses: number;
|
|
1066
|
-
hitRate: number;
|
|
1067
|
-
avgLatency: number;
|
|
1068
|
-
evictions: number;
|
|
1069
|
-
}
|
|
1070
|
-
|
|
1071
|
-
class MonitoredLLMCache {
|
|
1072
|
-
private cache: LRUCache<string, any>;
|
|
1073
|
-
private metrics: CacheMetrics = {
|
|
1074
|
-
hits: 0,
|
|
1075
|
-
misses: 0,
|
|
1076
|
-
hitRate: 0,
|
|
1077
|
-
avgLatency: 0,
|
|
1078
|
-
evictions: 0
|
|
1079
|
-
};
|
|
1080
|
-
private latencies: number[] = [];
|
|
1081
|
-
|
|
1082
|
-
async fetch<T>(
|
|
1083
|
-
key: string,
|
|
1084
|
-
fetchFn: () => Promise<T>
|
|
1085
|
-
): Promise<T> {
|
|
1086
|
-
const start = performance.now();
|
|
1087
|
-
|
|
1088
|
-
const cached = this.cache.get(key);
|
|
1089
|
-
if (cached !== undefined) {
|
|
1090
|
-
this.metrics.hits++;
|
|
1091
|
-
this.recordLatency(performance.now() - start);
|
|
1092
|
-
return cached;
|
|
1093
|
-
}
|
|
1094
|
-
|
|
1095
|
-
this.metrics.misses++;
|
|
1096
|
-
const value = await fetchFn();
|
|
1097
|
-
this.cache.set(key, value);
|
|
1098
|
-
this.recordLatency(performance.now() - start);
|
|
1099
|
-
|
|
1100
|
-
return value;
|
|
1101
|
-
}
|
|
1102
|
-
|
|
1103
|
-
private recordLatency(latency: number): void {
|
|
1104
|
-
this.latencies.push(latency);
|
|
1105
|
-
if (this.latencies.length > 1000) {
|
|
1106
|
-
this.latencies.shift();
|
|
1107
|
-
}
|
|
1108
|
-
this.metrics.avgLatency =
|
|
1109
|
-
this.latencies.reduce((a, b) => a + b, 0) / this.latencies.length;
|
|
1110
|
-
}
|
|
1111
|
-
|
|
1112
|
-
getMetrics(): CacheMetrics {
|
|
1113
|
-
const total = this.metrics.hits + this.metrics.misses;
|
|
1114
|
-
return {
|
|
1115
|
-
...this.metrics,
|
|
1116
|
-
hitRate: total > 0 ? this.metrics.hits / total : 0
|
|
1117
|
-
};
|
|
1118
|
-
}
|
|
1119
|
-
|
|
1120
|
-
// Log metrics periodically
|
|
1121
|
-
startMetricsReporting(intervalMs = 60000): void {
|
|
1122
|
-
setInterval(() => {
|
|
1123
|
-
const metrics = this.getMetrics();
|
|
1124
|
-
console.log('Cache Metrics:', {
|
|
1125
|
-
hitRate: (metrics.hitRate * 100).toFixed(2) + '%',
|
|
1126
|
-
avgLatency: metrics.avgLatency.toFixed(2) + 'ms',
|
|
1127
|
-
total: metrics.hits + metrics.misses
|
|
1128
|
-
});
|
|
1129
|
-
}, intervalMs);
|
|
1130
|
-
}
|
|
1131
|
-
}
|
|
1132
|
-
```
|
|
1133
|
-
|
|
1134
|
-
---
|
|
1135
|
-
|
|
1136
|
-
## 6. Common Pitfalls and Solutions
|
|
1137
|
-
|
|
1138
|
-
### Pitfall 1: Non-Deterministic Cache Keys
|
|
1139
|
-
|
|
1140
|
-
**Problem:**
|
|
1141
|
-
```typescript
|
|
1142
|
-
// ❌ DON'T: JSON.stringify has no key ordering guarantee
|
|
1143
|
-
const cacheKey = JSON.stringify(prompt);
|
|
1144
|
-
// Result: Different keys for same logical input
|
|
1145
|
-
```
|
|
1146
|
-
|
|
1147
|
-
**Solution:**
|
|
1148
|
-
```typescript
|
|
1149
|
-
// ✅ DO: Use deterministic stringification
|
|
1150
|
-
import safeStringify from 'safe-stable-stringify';
|
|
1151
|
-
|
|
1152
|
-
const cacheKey = safeStringify(prompt);
|
|
1153
|
-
// Result: Same key for same logical input
|
|
1154
|
-
```
|
|
1155
|
-
|
|
1156
|
-
### Pitfall 2: Unbounded Cache Growth
|
|
1157
|
-
|
|
1158
|
-
**Problem:**
|
|
1159
|
-
```typescript
|
|
1160
|
-
// ❌ DON'T: No configuration
|
|
1161
|
-
const cache = new LRUCache(); // WARNING: unbounded!
|
|
1162
|
-
// Risk: Memory exhaustion, process crash
|
|
1163
|
-
```
|
|
1164
|
-
|
|
1165
|
-
**Solution:**
|
|
1166
|
-
```typescript
|
|
1167
|
-
// ✅ DO: Always configure at least one limit
|
|
1168
|
-
const cache = new LRUCache<string, any>({
|
|
1169
|
-
max: 5000, // Limit by count
|
|
1170
|
-
// OR
|
|
1171
|
-
maxSize: 500 * 1024 * 1024, // Limit by size
|
|
1172
|
-
// AND optionally
|
|
1173
|
-
ttl: 24 * 3600 * 1000 // Expiration time
|
|
1174
|
-
});
|
|
1175
|
-
```
|
|
1176
|
-
|
|
1177
|
-
### Pitfall 3: Incorrect Size Calculation
|
|
1178
|
-
|
|
1179
|
-
**Problem:**
|
|
1180
|
-
```typescript
|
|
1181
|
-
// ❌ DON'T: Size calculation that doesn't match actual usage
|
|
1182
|
-
const cache = new LRUCache<string, any>({
|
|
1183
|
-
maxSize: 100 * 1024 * 1024,
|
|
1184
|
-
sizeCalculation: (val) => 1 // Always returns 1!
|
|
1185
|
-
});
|
|
1186
|
-
// Result: Cache stores ~100MB of data despite 1-byte size
|
|
1187
|
-
```
|
|
1188
|
-
|
|
1189
|
-
**Solution:**
|
|
1190
|
-
```typescript
|
|
1191
|
-
// ✅ DO: Accurate size calculation
|
|
1192
|
-
function calculateSize(value: any): number {
|
|
1193
|
-
const json = JSON.stringify(value);
|
|
1194
|
-
const bytes = Buffer.byteLength(json, 'utf8');
|
|
1195
|
-
return bytes + 100; // Add overhead
|
|
1196
|
-
}
|
|
1197
|
-
|
|
1198
|
-
const cache = new LRUCache<string, any>({
|
|
1199
|
-
maxSize: 100 * 1024 * 1024,
|
|
1200
|
-
sizeCalculation: (val, key) => {
|
|
1201
|
-
return calculateSize(val) + Buffer.byteLength(key, 'utf8');
|
|
1202
|
-
}
|
|
1203
|
-
});
|
|
1204
|
-
```
|
|
1205
|
-
|
|
1206
|
-
### Pitfall 4: Shared Cache State Issues
|
|
1207
|
-
|
|
1208
|
-
**Problem:**
|
|
1209
|
-
```typescript
|
|
1210
|
-
// ❌ DON'T: Mutable cached values
|
|
1211
|
-
const response = { data: [...], timestamp: Date.now() };
|
|
1212
|
-
cache.set(key, response);
|
|
1213
|
-
|
|
1214
|
-
// Later...
|
|
1215
|
-
const cached = cache.get(key);
|
|
1216
|
-
cached.data.push(newItem); // Mutates original!
|
|
1217
|
-
```
|
|
1218
|
-
|
|
1219
|
-
**Solution:**
|
|
1220
|
-
```typescript
|
|
1221
|
-
// ✅ DO: Store immutable copies or deep-clone on retrieval
|
|
1222
|
-
// Option 1: Freeze cached objects
|
|
1223
|
-
const response = Object.freeze({ data: [...], timestamp: Date.now() });
|
|
1224
|
-
cache.set(key, response);
|
|
1225
|
-
|
|
1226
|
-
// Option 2: Deep clone on retrieval
|
|
1227
|
-
function getCachedResponse<T>(key: string): T | undefined {
|
|
1228
|
-
const cached = cache.get(key);
|
|
1229
|
-
if (!cached) return undefined;
|
|
1230
|
-
return structuredClone(cached); // Deep clone
|
|
1231
|
-
}
|
|
1232
|
-
```
|
|
1233
|
-
|
|
1234
|
-
### Pitfall 5: Cache Invalidation Complexity
|
|
1235
|
-
|
|
1236
|
-
**Problem:**
|
|
1237
|
-
```typescript
|
|
1238
|
-
// ❌ DON'T: Manual cache invalidation
|
|
1239
|
-
cache.clear(); // Clears everything, inefficient
|
|
1240
|
-
|
|
1241
|
-
// or worse:
|
|
1242
|
-
for (const key of someLargeList) {
|
|
1243
|
-
cache.delete(key); // O(n) complexity
|
|
1244
|
-
}
|
|
1245
|
-
```
|
|
1246
|
-
|
|
1247
|
-
**Solution:**
|
|
1248
|
-
```typescript
|
|
1249
|
-
// ✅ DO: Use versioning and TTL
|
|
1250
|
-
const cacheKeyWithVersion = `v2:${baseKey}`;
|
|
1251
|
-
// When schema changes: increment version = new cache keys
|
|
1252
|
-
|
|
1253
|
-
// ✅ DO: Use TTL for automatic expiration
|
|
1254
|
-
const cache = new LRUCache<string, any>({
|
|
1255
|
-
max: 5000,
|
|
1256
|
-
ttl: 3600000, // Auto-expire after 1 hour
|
|
1257
|
-
updateAgeOnGet: true // Reset TTL on each access
|
|
1258
|
-
});
|
|
1259
|
-
|
|
1260
|
-
// ✅ DO: Selective invalidation
|
|
1261
|
-
function invalidateModel(modelName: string): void {
|
|
1262
|
-
for (const key of cache.keys()) {
|
|
1263
|
-
if (key.startsWith(modelName)) {
|
|
1264
|
-
cache.delete(key);
|
|
1265
|
-
}
|
|
1266
|
-
}
|
|
1267
|
-
}
|
|
1268
|
-
```
|
|
1269
|
-
|
|
1270
|
-
### Pitfall 6: Race Conditions with Concurrent Fetches
|
|
1271
|
-
|
|
1272
|
-
**Problem:**
|
|
1273
|
-
```typescript
|
|
1274
|
-
// ❌ DON'T: Naive implementation has race conditions
|
|
1275
|
-
if (!cache.has(key)) {
|
|
1276
|
-
const value = await expensiveOperation();
|
|
1277
|
-
cache.set(key, value);
|
|
1278
|
-
}
|
|
1279
|
-
// Problem: Multiple concurrent requests all pass the if check!
|
|
1280
|
-
```
|
|
1281
|
-
|
|
1282
|
-
**Solution:**
|
|
1283
|
-
```typescript
|
|
1284
|
-
// ✅ DO: Use fetch() method (deduplicates requests)
|
|
1285
|
-
const value = await cache.fetch(
|
|
1286
|
-
key,
|
|
1287
|
-
async () => {
|
|
1288
|
-
return expensiveOperation();
|
|
1289
|
-
}
|
|
1290
|
-
);
|
|
1291
|
-
// Only ONE concurrent request per key!
|
|
1292
|
-
```
|
|
1293
|
-
|
|
1294
|
-
### Pitfall 7: TTL Not Working as Expected
|
|
1295
|
-
|
|
1296
|
-
**Problem:**
|
|
1297
|
-
```typescript
|
|
1298
|
-
// ❌ DON'T: Assume items are preemptively deleted
|
|
1299
|
-
const cache = new LRUCache({ ttl: 1000 });
|
|
1300
|
-
cache.set('key', 'value');
|
|
1301
|
-
|
|
1302
|
-
// Wait 2 seconds...
|
|
1303
|
-
setTimeout(() => {
|
|
1304
|
-
console.log(cache.size); // Might still show 1!
|
|
1305
|
-
// Items aren't removed until accessed
|
|
1306
|
-
}, 2000);
|
|
1307
|
-
```
|
|
1308
|
-
|
|
1309
|
-
**Solution:**
|
|
1310
|
-
```typescript
|
|
1311
|
-
// ✅ DO: Understand LRU cache isn't actively expiring
|
|
1312
|
-
// TTL only deletes on access. For active expiration:
|
|
1313
|
-
|
|
1314
|
-
// Option 1: Use Redis/Memcached instead
|
|
1315
|
-
// Option 2: Implement custom expiration
|
|
1316
|
-
setInterval(() => {
|
|
1317
|
-
for (const key of cache.keys()) {
|
|
1318
|
-
cache.get(key); // Access triggers expiration check
|
|
1319
|
-
}
|
|
1320
|
-
}, 60000);
|
|
1321
|
-
|
|
1322
|
-
// Option 3: Accept lazy expiration (usually fine for LLM cache)
|
|
1323
|
-
// Items expire when accessed after TTL, not before
|
|
1324
|
-
```
|
|
1325
|
-
|
|
1326
|
-
### Pitfall 8: Circular Reference in Cache Keys
|
|
1327
|
-
|
|
1328
|
-
**Problem:**
|
|
1329
|
-
```typescript
|
|
1330
|
-
// ❌ DON'T: Circular references crash stringification
|
|
1331
|
-
const obj = { value: 42 };
|
|
1332
|
-
obj.self = obj; // Circular!
|
|
1333
|
-
|
|
1334
|
-
const key = JSON.stringify(obj); // Error: Converting circular structure
|
|
1335
|
-
```
|
|
1336
|
-
|
|
1337
|
-
**Solution:**
|
|
1338
|
-
```typescript
|
|
1339
|
-
// ✅ DO: Use safe-stable-stringify or handle explicitly
|
|
1340
|
-
import safeStringify from 'safe-stable-stringify';
|
|
1341
|
-
|
|
1342
|
-
const obj = { value: 42 };
|
|
1343
|
-
obj.self = obj;
|
|
1344
|
-
|
|
1345
|
-
const key = safeStringify(obj); // Works! Handles circular refs
|
|
1346
|
-
|
|
1347
|
-
// Or normalize before stringifying
|
|
1348
|
-
function normalizeForCaching(obj: any): any {
|
|
1349
|
-
return {
|
|
1350
|
-
value: obj.value,
|
|
1351
|
-
nested: obj.nested ? normalizeForCaching(obj.nested) : null
|
|
1352
|
-
// Don't include circular references
|
|
1353
|
-
};
|
|
1354
|
-
}
|
|
1355
|
-
```
|
|
1356
|
-
|
|
1357
|
-
---
|
|
1358
|
-
|
|
1359
|
-
## 7. Performance Benchmarks
|
|
1360
|
-
|
|
1361
|
-
### 7.1 Deterministic Stringification Performance
|
|
1362
|
-
|
|
1363
|
-
**Test:** 1000 iterations of complex object stringification
|
|
1364
|
-
|
|
1365
|
-
| Library | Ops/sec | Relative | Notes |
|
|
1366
|
-
|---------|---------|----------|-------|
|
|
1367
|
-
| JSON.stringify | ~45,000 | 3.3x | Non-deterministic (uncontrollable) |
|
|
1368
|
-
| fast-json-stable-stringify | ~17,189 | 1.26x | Fastest stable option |
|
|
1369
|
-
| safe-stable-stringify | ~13,634 | 1.0x | Safest (handles edge cases) |
|
|
1370
|
-
| json-stringify-deterministic | ~12,000 | 0.88x | Good stability |
|
|
1371
|
-
|
|
1372
|
-
**Practical Impact:** For typical LLM prompt (500 bytes), difference is <1ms. Choose safe-stable-stringify for production.
|
|
1373
|
-
|
|
1374
|
-
### 7.2 SHA-256 Hashing Performance
|
|
1375
|
-
|
|
1376
|
-
**Test:** SHA-256 on various data sizes
|
|
1377
|
-
|
|
1378
|
-
| Data Size | Time | Throughput |
|
|
1379
|
-
|-----------|------|-----------|
|
|
1380
|
-
| 100 bytes | 0.005 ms | 20 MB/s |
|
|
1381
|
-
| 1 KB | 0.05 ms | 20 MB/s |
|
|
1382
|
-
| 10 KB | 0.5 ms | 20 MB/s |
|
|
1383
|
-
| 100 KB | 5 ms | 20 MB/s |
|
|
1384
|
-
|
|
1385
|
-
**Finding:** Node.js SHA-256 via OpenSSL is consistent at ~20 MB/s.
|
|
1386
|
-
|
|
1387
|
-
### 7.3 LRU Cache Operations
|
|
1388
|
-
|
|
1389
|
-
**Test:** 10,000 operations on LRUCache with max: 1000
|
|
1390
|
-
|
|
1391
|
-
| Operation | Time | Notes |
|
|
1392
|
-
|-----------|------|-------|
|
|
1393
|
-
| get() hit | 0.0001 ms | Extremely fast |
|
|
1394
|
-
| get() miss | 0.0001 ms | Same as hit |
|
|
1395
|
-
| set() | 0.002 ms | Slightly slower |
|
|
1396
|
-
| set() with eviction | 0.004 ms | Includes cleanup |
|
|
1397
|
-
| fetch() (hit) | 0.0002 ms | Adds ~0.0001ms |
|
|
1398
|
-
| fetch() (miss) | depends | Function execution time |
|
|
1399
|
-
|
|
1400
|
-
**Conclusion:** LRU operations are negligible (<0.01ms each). Bottleneck is I/O, not cache logic.
|
|
1401
|
-
|
|
1402
|
-
### 7.4 Full Caching Pipeline (Realistic)
|
|
1403
|
-
|
|
1404
|
-
```typescript
|
|
1405
|
-
// Prompt input
|
|
1406
|
-
const input = {
|
|
1407
|
-
messages: [
|
|
1408
|
-
{ role: 'user', content: 'What is quantum computing?' }
|
|
1409
|
-
],
|
|
1410
|
-
model: 'gpt-4',
|
|
1411
|
-
temperature: 0.7
|
|
1412
|
-
};
|
|
1413
|
-
|
|
1414
|
-
// Step 1: Stringify deterministically
|
|
1415
|
-
safeStringify(input) // ~0.1 ms
|
|
1416
|
-
|
|
1417
|
-
// Step 2: Hash with SHA-256
|
|
1418
|
-
createHash('sha256') // ~0.01 ms
|
|
1419
|
-
|
|
1420
|
-
// Step 3: Cache lookup
|
|
1421
|
-
cache.fetch(key, ...) // ~0.0001 ms (hit) or fetch time (miss)
|
|
1422
|
-
|
|
1423
|
-
// Total for cache hit: ~0.11 ms
|
|
1424
|
-
// Total for cache miss + fetch: 0.11 + API time
|
|
1425
|
-
```
|
|
1426
|
-
|
|
1427
|
-
**Optimization:** For 60+ requests/second, caching overhead is <1% of total latency.
|
|
1428
|
-
|
|
1429
|
-
---
|
|
1430
|
-
|
|
1431
|
-
## 8. Complete Implementation Example
|
|
1432
|
-
|
|
1433
|
-
### 8.1 Production-Ready LLM Cache
|
|
1434
|
-
|
|
1435
|
-
```typescript
|
|
1436
|
-
import { LRUCache } from 'lru-cache';
|
|
1437
|
-
import { createHash } from 'node:crypto';
|
|
1438
|
-
import safeStringify from 'safe-stable-stringify';
|
|
1439
|
-
import * as z from 'zod';
|
|
1440
|
-
|
|
1441
|
-
// Type definitions
|
|
1442
|
-
interface LLMPrompt {
|
|
1443
|
-
model: string;
|
|
1444
|
-
messages: Array<{ role: 'user' | 'assistant' | 'system'; content: string }>;
|
|
1445
|
-
temperature?: number;
|
|
1446
|
-
maxTokens?: number;
|
|
1447
|
-
systemPrompt?: string;
|
|
1448
|
-
tools?: unknown[];
|
|
1449
|
-
}
|
|
1450
|
-
|
|
1451
|
-
interface LLMResponse {
|
|
1452
|
-
id: string;
|
|
1453
|
-
content: string;
|
|
1454
|
-
model: string;
|
|
1455
|
-
tokens: {
|
|
1456
|
-
prompt: number;
|
|
1457
|
-
completion: number;
|
|
1458
|
-
};
|
|
1459
|
-
timestamp: number;
|
|
1460
|
-
}
|
|
1461
|
-
|
|
1462
|
-
interface CacheOptions {
|
|
1463
|
-
maxItems?: number;
|
|
1464
|
-
maxSizeMB?: number;
|
|
1465
|
-
ttlHours?: number;
|
|
1466
|
-
updateAgeOnGet?: boolean;
|
|
1467
|
-
}
|
|
1468
|
-
|
|
1469
|
-
// Validation schema
|
|
1470
|
-
const LLMPromptSchema = z.object({
|
|
1471
|
-
model: z.string(),
|
|
1472
|
-
messages: z.array(
|
|
1473
|
-
z.object({
|
|
1474
|
-
role: z.enum(['user', 'assistant', 'system']),
|
|
1475
|
-
content: z.string()
|
|
1476
|
-
})
|
|
1477
|
-
),
|
|
1478
|
-
temperature: z.number().min(0).max(2).optional(),
|
|
1479
|
-
maxTokens: z.number().positive().optional(),
|
|
1480
|
-
systemPrompt: z.string().optional(),
|
|
1481
|
-
tools: z.unknown().array().optional()
|
|
1482
|
-
});
|
|
1483
|
-
|
|
1484
|
-
const LLMResponseSchema = z.object({
|
|
1485
|
-
id: z.string(),
|
|
1486
|
-
content: z.string(),
|
|
1487
|
-
model: z.string(),
|
|
1488
|
-
tokens: z.object({
|
|
1489
|
-
prompt: z.number(),
|
|
1490
|
-
completion: z.number()
|
|
1491
|
-
}),
|
|
1492
|
-
timestamp: z.number()
|
|
1493
|
-
});
|
|
1494
|
-
|
|
1495
|
-
// Main cache implementation
|
|
1496
|
-
export class LLMResponseCache {
|
|
1497
|
-
private cache: LRUCache<string, LLMResponse>;
|
|
1498
|
-
private metrics = {
|
|
1499
|
-
hits: 0,
|
|
1500
|
-
misses: 0,
|
|
1501
|
-
evictions: 0,
|
|
1502
|
-
latencies: [] as number[]
|
|
1503
|
-
};
|
|
1504
|
-
private schemaVersion = 1;
|
|
1505
|
-
|
|
1506
|
-
constructor(options: CacheOptions = {}) {
|
|
1507
|
-
const {
|
|
1508
|
-
maxItems = 5000,
|
|
1509
|
-
maxSizeMB = 500,
|
|
1510
|
-
ttlHours = 24,
|
|
1511
|
-
updateAgeOnGet = true
|
|
1512
|
-
} = options;
|
|
1513
|
-
|
|
1514
|
-
this.cache = new LRUCache<string, LLMResponse>({
|
|
1515
|
-
max: maxItems,
|
|
1516
|
-
maxSize: maxSizeMB * 1024 * 1024,
|
|
1517
|
-
sizeCalculation: (value, key) => {
|
|
1518
|
-
const keySize = Buffer.byteLength(key, 'utf8');
|
|
1519
|
-
const valueSize = Buffer.byteLength(JSON.stringify(value), 'utf8');
|
|
1520
|
-
return keySize + valueSize + 100; // +100 for overhead
|
|
1521
|
-
},
|
|
1522
|
-
ttl: ttlHours * 3600 * 1000,
|
|
1523
|
-
updateAgeOnGet,
|
|
1524
|
-
|
|
1525
|
-
// Callback for evictions
|
|
1526
|
-
dispose: (value, key, reason) => {
|
|
1527
|
-
if (reason === 'evict') {
|
|
1528
|
-
this.metrics.evictions++;
|
|
1529
|
-
}
|
|
1530
|
-
}
|
|
1531
|
-
});
|
|
1532
|
-
}
|
|
1533
|
-
|
|
1534
|
-
/**
|
|
1535
|
-
* Generate deterministic cache key from prompt
|
|
1536
|
-
*/
|
|
1537
|
-
private generateKey(prompt: LLMPrompt): string {
|
|
1538
|
-
// Normalize and sort prompt for deterministic output
|
|
1539
|
-
const normalized = {
|
|
1540
|
-
v: this.schemaVersion,
|
|
1541
|
-
model: prompt.model,
|
|
1542
|
-
temperature: prompt.temperature ?? 0.7,
|
|
1543
|
-
maxTokens: prompt.maxTokens ?? 2000,
|
|
1544
|
-
systemPrompt: prompt.systemPrompt ?? '',
|
|
1545
|
-
messages: prompt.messages.map(m => ({
|
|
1546
|
-
role: m.role,
|
|
1547
|
-
content: m.content
|
|
1548
|
-
})),
|
|
1549
|
-
tools: prompt.tools ?? []
|
|
1550
|
-
};
|
|
1551
|
-
|
|
1552
|
-
// Deterministic stringification
|
|
1553
|
-
const str = safeStringify(normalized);
|
|
1554
|
-
|
|
1555
|
-
// SHA-256 hash for compact representation
|
|
1556
|
-
const hash = createHash('sha256');
|
|
1557
|
-
hash.update(str);
|
|
1558
|
-
return hash.digest('hex');
|
|
1559
|
-
}
|
|
1560
|
-
|
|
1561
|
-
/**
|
|
1562
|
-
* Lookup cached response
|
|
1563
|
-
*/
|
|
1564
|
-
async lookup(prompt: LLMPrompt): Promise<LLMResponse | null> {
|
|
1565
|
-
const startTime = performance.now();
|
|
1566
|
-
|
|
1567
|
-
try {
|
|
1568
|
-
// Validate input
|
|
1569
|
-
const validPrompt = LLMPromptSchema.parse(prompt);
|
|
1570
|
-
const key = this.generateKey(validPrompt);
|
|
1571
|
-
|
|
1572
|
-
const cached = this.cache.get(key);
|
|
1573
|
-
|
|
1574
|
-
if (cached) {
|
|
1575
|
-
this.metrics.hits++;
|
|
1576
|
-
this.recordLatency(performance.now() - startTime);
|
|
1577
|
-
return cached;
|
|
1578
|
-
}
|
|
1579
|
-
|
|
1580
|
-
this.metrics.misses++;
|
|
1581
|
-
this.recordLatency(performance.now() - startTime);
|
|
1582
|
-
return null;
|
|
1583
|
-
} catch (error) {
|
|
1584
|
-
console.error('Cache lookup error:', error);
|
|
1585
|
-
return null;
|
|
1586
|
-
}
|
|
1587
|
-
}
|
|
1588
|
-
|
|
1589
|
-
/**
|
|
1590
|
-
* Store response in cache with validation
|
|
1591
|
-
*/
|
|
1592
|
-
async store(
|
|
1593
|
-
prompt: LLMPrompt,
|
|
1594
|
-
response: LLMResponse
|
|
1595
|
-
): Promise<boolean> {
|
|
1596
|
-
try {
|
|
1597
|
-
// Validate both inputs
|
|
1598
|
-
const validPrompt = LLMPromptSchema.parse(prompt);
|
|
1599
|
-
const validResponse = LLMResponseSchema.parse(response);
|
|
1600
|
-
|
|
1601
|
-
const key = this.generateKey(validPrompt);
|
|
1602
|
-
this.cache.set(key, validResponse);
|
|
1603
|
-
|
|
1604
|
-
return true;
|
|
1605
|
-
} catch (error) {
|
|
1606
|
-
console.error('Cache store error:', error);
|
|
1607
|
-
return false;
|
|
1608
|
-
}
|
|
1609
|
-
}
|
|
1610
|
-
|
|
1611
|
-
/**
|
|
1612
|
-
* Fetch with lazy loading (recommended pattern)
|
|
1613
|
-
*/
|
|
1614
|
-
async fetch(
|
|
1615
|
-
prompt: LLMPrompt,
|
|
1616
|
-
fetchFn: () => Promise<LLMResponse>
|
|
1617
|
-
): Promise<LLMResponse> {
|
|
1618
|
-
const startTime = performance.now();
|
|
1619
|
-
|
|
1620
|
-
try {
|
|
1621
|
-
const validPrompt = LLMPromptSchema.parse(prompt);
|
|
1622
|
-
const key = this.generateKey(validPrompt);
|
|
1623
|
-
|
|
1624
|
-
// Use fetch() method for automatic deduplication
|
|
1625
|
-
const response = await this.cache.fetch(
|
|
1626
|
-
key,
|
|
1627
|
-
async () => {
|
|
1628
|
-
const result = await fetchFn();
|
|
1629
|
-
return LLMResponseSchema.parse(result);
|
|
1630
|
-
},
|
|
1631
|
-
{
|
|
1632
|
-
ttl: 24 * 3600 * 1000,
|
|
1633
|
-
allowStale: false
|
|
1634
|
-
}
|
|
1635
|
-
);
|
|
1636
|
-
|
|
1637
|
-
this.metrics.hits++;
|
|
1638
|
-
this.recordLatency(performance.now() - startTime);
|
|
1639
|
-
return response;
|
|
1640
|
-
} catch (error) {
|
|
1641
|
-
console.error('Cache fetch error:', error);
|
|
1642
|
-
throw error;
|
|
1643
|
-
}
|
|
1644
|
-
}
|
|
1645
|
-
|
|
1646
|
-
/**
|
|
1647
|
-
* Invalidate all entries for a model
|
|
1648
|
-
*/
|
|
1649
|
-
invalidateModel(modelName: string): number {
|
|
1650
|
-
let count = 0;
|
|
1651
|
-
for (const key of this.cache.keys()) {
|
|
1652
|
-
// Keys are hashes, but we can track separately if needed
|
|
1653
|
-
count++;
|
|
1654
|
-
}
|
|
1655
|
-
|
|
1656
|
-
// Clear cache when schema changes
|
|
1657
|
-
this.schemaVersion++;
|
|
1658
|
-
return count;
|
|
1659
|
-
}
|
|
1660
|
-
|
|
1661
|
-
/**
|
|
1662
|
-
* Get cache statistics
|
|
1663
|
-
*/
|
|
1664
|
-
getMetrics() {
|
|
1665
|
-
const total = this.metrics.hits + this.metrics.misses;
|
|
1666
|
-
const avgLatency = this.metrics.latencies.length > 0
|
|
1667
|
-
? this.metrics.latencies.reduce((a, b) => a + b, 0) / this.metrics.latencies.length
|
|
1668
|
-
: 0;
|
|
1669
|
-
|
|
1670
|
-
return {
|
|
1671
|
-
size: this.cache.size,
|
|
1672
|
-
maxSize: this.cache.maxSize,
|
|
1673
|
-
hits: this.metrics.hits,
|
|
1674
|
-
misses: this.metrics.misses,
|
|
1675
|
-
hitRate: total > 0 ? (this.metrics.hits / total * 100).toFixed(2) + '%' : '0%',
|
|
1676
|
-
avgLatency: avgLatency.toFixed(3) + ' ms',
|
|
1677
|
-
evictions: this.metrics.evictions
|
|
1678
|
-
};
|
|
1679
|
-
}
|
|
1680
|
-
|
|
1681
|
-
/**
|
|
1682
|
-
* Start periodic metrics reporting
|
|
1683
|
-
*/
|
|
1684
|
-
startMonitoring(intervalMs = 60000): () => void {
|
|
1685
|
-
const timer = setInterval(() => {
|
|
1686
|
-
const metrics = this.getMetrics();
|
|
1687
|
-
console.log('[LLMCache Metrics]', {
|
|
1688
|
-
timestamp: new Date().toISOString(),
|
|
1689
|
-
...metrics
|
|
1690
|
-
});
|
|
1691
|
-
}, intervalMs);
|
|
1692
|
-
|
|
1693
|
-
return () => clearInterval(timer);
|
|
1694
|
-
}
|
|
1695
|
-
|
|
1696
|
-
/**
|
|
1697
|
-
* Clear entire cache
|
|
1698
|
-
*/
|
|
1699
|
-
clear(): void {
|
|
1700
|
-
this.cache.clear();
|
|
1701
|
-
}
|
|
1702
|
-
|
|
1703
|
-
private recordLatency(ms: number): void {
|
|
1704
|
-
this.metrics.latencies.push(ms);
|
|
1705
|
-
// Keep only last 1000 measurements
|
|
1706
|
-
if (this.metrics.latencies.length > 1000) {
|
|
1707
|
-
this.metrics.latencies.shift();
|
|
1708
|
-
}
|
|
1709
|
-
}
|
|
1710
|
-
}
|
|
1711
|
-
|
|
1712
|
-
// Example usage
|
|
1713
|
-
export async function example() {
|
|
1714
|
-
const cache = new LLMResponseCache({
|
|
1715
|
-
maxItems: 5000,
|
|
1716
|
-
maxSizeMB: 500,
|
|
1717
|
-
ttlHours: 24
|
|
1718
|
-
});
|
|
1719
|
-
|
|
1720
|
-
const prompt: LLMPrompt = {
|
|
1721
|
-
model: 'gpt-4-turbo',
|
|
1722
|
-
messages: [
|
|
1723
|
-
{
|
|
1724
|
-
role: 'user',
|
|
1725
|
-
content: 'Explain quantum computing in simple terms'
|
|
1726
|
-
}
|
|
1727
|
-
],
|
|
1728
|
-
temperature: 0.7
|
|
1729
|
-
};
|
|
1730
|
-
|
|
1731
|
-
// Method 1: Manual lookup + store
|
|
1732
|
-
let response = await cache.lookup(prompt);
|
|
1733
|
-
if (!response) {
|
|
1734
|
-
response = await mockLLMCall(prompt);
|
|
1735
|
-
await cache.store(prompt, response);
|
|
1736
|
-
}
|
|
1737
|
-
|
|
1738
|
-
// Method 2: Lazy loading (recommended)
|
|
1739
|
-
const response2 = await cache.fetch(prompt, async () => {
|
|
1740
|
-
return mockLLMCall(prompt);
|
|
1741
|
-
});
|
|
1742
|
-
|
|
1743
|
-
// Monitor performance
|
|
1744
|
-
const stopMonitoring = cache.startMonitoring(30000);
|
|
1745
|
-
|
|
1746
|
-
// Later...
|
|
1747
|
-
// stopMonitoring();
|
|
1748
|
-
}
|
|
1749
|
-
|
|
1750
|
-
async function mockLLMCall(prompt: LLMPrompt): Promise<LLMResponse> {
|
|
1751
|
-
return {
|
|
1752
|
-
id: 'resp_' + Date.now(),
|
|
1753
|
-
content: 'Quantum computing uses quantum bits (qubits)...',
|
|
1754
|
-
model: prompt.model,
|
|
1755
|
-
tokens: { prompt: 25, completion: 150 },
|
|
1756
|
-
timestamp: Date.now()
|
|
1757
|
-
};
|
|
1758
|
-
}
|
|
1759
|
-
```
|
|
1760
|
-
|
|
1761
|
-
### 8.2 Integration with LangChain
|
|
1762
|
-
|
|
1763
|
-
```typescript
|
|
1764
|
-
import { BaseCache } from '@langchain/core/cache';
|
|
1765
|
-
import { Generation } from '@langchain/core/outputs';
|
|
1766
|
-
import { LLMResponseCache } from './llm-cache';
|
|
1767
|
-
|
|
1768
|
-
/**
|
|
1769
|
-
* LangChain-compatible cache using our LRU implementation
|
|
1770
|
-
*/
|
|
1771
|
-
export class LangChainLRUCache extends BaseCache {
|
|
1772
|
-
private cache: LLMResponseCache;
|
|
1773
|
-
|
|
1774
|
-
constructor(cache?: LLMResponseCache) {
|
|
1775
|
-
super();
|
|
1776
|
-
this.cache = cache || new LLMResponseCache();
|
|
1777
|
-
}
|
|
1778
|
-
|
|
1779
|
-
async lookup(prompt: string, llmKey: string): Promise<Generation[] | null> {
|
|
1780
|
-
const cacheKey = `${llmKey}:${prompt}`;
|
|
1781
|
-
const response = await this.cache.lookup({
|
|
1782
|
-
model: llmKey,
|
|
1783
|
-
messages: [{ role: 'user', content: prompt }]
|
|
1784
|
-
});
|
|
1785
|
-
|
|
1786
|
-
if (!response) return null;
|
|
1787
|
-
|
|
1788
|
-
return [{
|
|
1789
|
-
text: response.content,
|
|
1790
|
-
generationInfo: {
|
|
1791
|
-
tokenUsage: response.tokens
|
|
1792
|
-
}
|
|
1793
|
-
}];
|
|
1794
|
-
}
|
|
1795
|
-
|
|
1796
|
-
async update(
|
|
1797
|
-
prompt: string,
|
|
1798
|
-
llmKey: string,
|
|
1799
|
-
outputs: Generation[]
|
|
1800
|
-
): Promise<void> {
|
|
1801
|
-
const text = outputs[0]?.text || '';
|
|
1802
|
-
await this.cache.store(
|
|
1803
|
-
{
|
|
1804
|
-
model: llmKey,
|
|
1805
|
-
messages: [{ role: 'user', content: prompt }]
|
|
1806
|
-
},
|
|
1807
|
-
{
|
|
1808
|
-
id: 'gen_' + Date.now(),
|
|
1809
|
-
content: text,
|
|
1810
|
-
model: llmKey,
|
|
1811
|
-
tokens: { prompt: prompt.length, completion: text.length },
|
|
1812
|
-
timestamp: Date.now()
|
|
1813
|
-
}
|
|
1814
|
-
);
|
|
1815
|
-
}
|
|
1816
|
-
}
|
|
1817
|
-
```
|
|
1818
|
-
|
|
1819
|
-
---
|
|
1820
|
-
|
|
1821
|
-
## 9. Version Recommendations
|
|
1822
|
-
|
|
1823
|
-
### lru-cache Version Strategy
|
|
1824
|
-
|
|
1825
|
-
| Version | Status | Recommendation | Notes |
|
|
1826
|
-
|---------|--------|-----------------|-------|
|
|
1827
|
-
| v6 | Deprecated | Avoid | Different internal structure |
|
|
1828
|
-
| v7-v9 | Stable | OK | Performance improvements |
|
|
1829
|
-
| v10+ | **Latest** | **Recommended** | Better TypeScript support, latest performance |
|
|
1830
|
-
| v11+ | Bleeding edge | Consider for new projects | Test thoroughly |
|
|
1831
|
-
|
|
1832
|
-
**Recommended package.json:**
|
|
1833
|
-
|
|
1834
|
-
```json
|
|
1835
|
-
{
|
|
1836
|
-
"dependencies": {
|
|
1837
|
-
"lru-cache": "^10.0.0",
|
|
1838
|
-
"safe-stable-stringify": "^2.4.0"
|
|
1839
|
-
}
|
|
1840
|
-
}
|
|
1841
|
-
```
|
|
1842
|
-
|
|
1843
|
-
### TypeScript Configuration
|
|
1844
|
-
|
|
1845
|
-
```json
|
|
1846
|
-
{
|
|
1847
|
-
"compilerOptions": {
|
|
1848
|
-
"target": "ES2020",
|
|
1849
|
-
"module": "ESNext",
|
|
1850
|
-
"lib": ["ES2020"],
|
|
1851
|
-
"moduleResolution": "node",
|
|
1852
|
-
"declaration": true,
|
|
1853
|
-
"strict": true
|
|
1854
|
-
}
|
|
1855
|
-
}
|
|
1856
|
-
```
|
|
1857
|
-
|
|
1858
|
-
### Node.js Version Requirements
|
|
1859
|
-
|
|
1860
|
-
- **Minimum:** Node.js 14+
|
|
1861
|
-
- **Recommended:** Node.js 18+ (LTS)
|
|
1862
|
-
- **Best:** Node.js 20+ (current LTS, crypto performance optimizations)
|
|
1863
|
-
|
|
1864
|
-
---
|
|
1865
|
-
|
|
1866
|
-
## 10. Key Takeaways
|
|
1867
|
-
|
|
1868
|
-
### Best Practices Summary
|
|
1869
|
-
|
|
1870
|
-
1. **Always use `safe-stable-stringify`** for deterministic cache keys
|
|
1871
|
-
2. **Implement the `fetch()` method** for lazy-load caching (prevents race conditions)
|
|
1872
|
-
3. **Configure at least one limit** (max, maxSize, or ttl) to prevent unbounded growth
|
|
1873
|
-
4. **Use SHA-256 hashing** for large prompts (>100 chars)
|
|
1874
|
-
5. **Implement semantic caching** for semantic similarity (achieves 60-70% hit rates)
|
|
1875
|
-
6. **Monitor cache metrics** to track hit rate and identify optimization opportunities
|
|
1876
|
-
7. **Version your cache schema** to automatically invalidate old entries
|
|
1877
|
-
8. **Use the `fetch()` method** instead of manual lookup+store
|
|
1878
|
-
9. **Implement TTL** (24 hours recommended for LLM responses)
|
|
1879
|
-
10. **Start with exact caching**, add semantic later if hit rate needs improvement
|
|
1880
|
-
|
|
1881
|
-
### Architecture Recommendations
|
|
1882
|
-
|
|
1883
|
-
```typescript
|
|
1884
|
-
// Development
|
|
1885
|
-
const devCache = new LLMResponseCache({
|
|
1886
|
-
maxItems: 100,
|
|
1887
|
-
maxSizeMB: 50,
|
|
1888
|
-
ttlHours: 1
|
|
1889
|
-
});
|
|
1890
|
-
|
|
1891
|
-
// Production
|
|
1892
|
-
const prodCache = new LLMResponseCache({
|
|
1893
|
-
maxItems: 5000,
|
|
1894
|
-
maxSizeMB: 500,
|
|
1895
|
-
ttlHours: 24
|
|
1896
|
-
});
|
|
1897
|
-
```
|
|
1898
|
-
|
|
1899
|
-
### Performance Expectations
|
|
1900
|
-
|
|
1901
|
-
- **Cache hit lookup:** <0.1ms
|
|
1902
|
-
- **Deterministic stringification:** <1ms for typical prompt
|
|
1903
|
-
- **SHA-256 hashing:** <0.05ms
|
|
1904
|
-
- **Hit rate (exact caching):** 30-40%
|
|
1905
|
-
- **Hit rate (semantic caching):** 60-70%
|
|
1906
|
-
- **Memory efficiency:** ~1-5 MB per 100 cached responses
|
|
1907
|
-
|
|
1908
|
-
---
|
|
1909
|
-
|
|
1910
|
-
## References
|
|
1911
|
-
|
|
1912
|
-
### Web Sources
|
|
1913
|
-
- [lru-cache npm package](https://www.npmjs.com/package/lru-cache)
|
|
1914
|
-
- [safe-stable-stringify GitHub](https://github.com/davidmarkclements/fast-safe-stringify)
|
|
1915
|
-
- [fast-json-stable-stringify GitHub](https://github.com/epoberezkin/fast-json-stable-stringify)
|
|
1916
|
-
- [Node.js Crypto Documentation](https://nodejs.org/api/crypto.html)
|
|
1917
|
-
- [LLM Caching Best Practices - Helicone](https://www.helicone.ai/blog/effective-llm-caching)
|
|
1918
|
-
- [Prompt Caching Overview - IBM](https://www.ibm.com/think/topics/prompt-caching)
|
|
1919
|
-
- [Zod Validation Library](https://zod.dev/)
|
|
1920
|
-
|
|
1921
|
-
### Research Papers Referenced
|
|
1922
|
-
- "Semantic Cache for LLMs: Fully integrated with LangChain and llama_index" (GPTCache)
|
|
1923
|
-
- "LLM Prompt Caching: The Hidden Lever for Speed, Cost, and Reliability"
|
|
1924
|
-
|
|
1925
|
-
---
|
|
1926
|
-
|
|
1927
|
-
**Document Version:** 1.0
|
|
1928
|
-
**Last Updated:** 2025-12-08
|
|
1929
|
-
**Author:** Research Team
|