@revealui/ai 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/LICENSE.commercial +112 -0
- package/README.md +314 -0
- package/dist/a2a/card.d.ts +26 -0
- package/dist/a2a/card.d.ts.map +1 -0
- package/dist/a2a/card.js +173 -0
- package/dist/a2a/handler.d.ts +26 -0
- package/dist/a2a/handler.d.ts.map +1 -0
- package/dist/a2a/handler.js +170 -0
- package/dist/a2a/index.d.ts +10 -0
- package/dist/a2a/index.d.ts.map +1 -0
- package/dist/a2a/index.js +9 -0
- package/dist/a2a/task-store.d.ts +42 -0
- package/dist/a2a/task-store.d.ts.map +1 -0
- package/dist/a2a/task-store.js +99 -0
- package/dist/audit/emitter.d.ts +34 -0
- package/dist/audit/emitter.d.ts.map +1 -0
- package/dist/audit/emitter.js +34 -0
- package/dist/audit/index.d.ts +44 -0
- package/dist/audit/index.d.ts.map +1 -0
- package/dist/audit/index.js +48 -0
- package/dist/audit/observer.d.ts +108 -0
- package/dist/audit/observer.d.ts.map +1 -0
- package/dist/audit/observer.js +271 -0
- package/dist/audit/policy.d.ts +70 -0
- package/dist/audit/policy.d.ts.map +1 -0
- package/dist/audit/policy.js +209 -0
- package/dist/audit/store.d.ts +42 -0
- package/dist/audit/store.d.ts.map +1 -0
- package/dist/audit/store.js +80 -0
- package/dist/audit/types.d.ts +169 -0
- package/dist/audit/types.d.ts.map +1 -0
- package/dist/audit/types.js +80 -0
- package/dist/client/hooks/index.d.ts +22 -0
- package/dist/client/hooks/index.d.ts.map +1 -0
- package/dist/client/hooks/index.js +21 -0
- package/dist/client/hooks/useAgentContext.d.ts +30 -0
- package/dist/client/hooks/useAgentContext.d.ts.map +1 -0
- package/dist/client/hooks/useAgentContext.js +161 -0
- package/dist/client/hooks/useAgentEvents.d.ts +126 -0
- package/dist/client/hooks/useAgentEvents.d.ts.map +1 -0
- package/dist/client/hooks/useAgentEvents.js +232 -0
- package/dist/client/hooks/useAgentStream.d.ts +44 -0
- package/dist/client/hooks/useAgentStream.d.ts.map +1 -0
- package/dist/client/hooks/useAgentStream.js +101 -0
- package/dist/client/hooks/useEpisodicMemory.d.ts +25 -0
- package/dist/client/hooks/useEpisodicMemory.d.ts.map +1 -0
- package/dist/client/hooks/useEpisodicMemory.js +174 -0
- package/dist/client/hooks/useWorkingMemory.d.ts +57 -0
- package/dist/client/hooks/useWorkingMemory.d.ts.map +1 -0
- package/dist/client/hooks/useWorkingMemory.js +276 -0
- package/dist/client/index.d.ts +14 -0
- package/dist/client/index.d.ts.map +1 -0
- package/dist/client/index.js +13 -0
- package/dist/embeddings/index.d.ts +51 -0
- package/dist/embeddings/index.d.ts.map +1 -0
- package/dist/embeddings/index.js +73 -0
- package/dist/index.d.ts +83 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +103 -0
- package/dist/inference/context-assembly.d.ts +27 -0
- package/dist/inference/context-assembly.d.ts.map +1 -0
- package/dist/inference/context-assembly.js +81 -0
- package/dist/inference/overflow-compressor.d.ts +17 -0
- package/dist/inference/overflow-compressor.d.ts.map +1 -0
- package/dist/inference/overflow-compressor.js +40 -0
- package/dist/inference/runRag.d.ts +35 -0
- package/dist/inference/runRag.d.ts.map +1 -0
- package/dist/inference/runRag.js +53 -0
- package/dist/ingestion/bm25.d.ts +29 -0
- package/dist/ingestion/bm25.d.ts.map +1 -0
- package/dist/ingestion/bm25.js +161 -0
- package/dist/ingestion/cms-indexer.d.ts +39 -0
- package/dist/ingestion/cms-indexer.d.ts.map +1 -0
- package/dist/ingestion/cms-indexer.js +74 -0
- package/dist/ingestion/file-parsers.d.ts +51 -0
- package/dist/ingestion/file-parsers.d.ts.map +1 -0
- package/dist/ingestion/file-parsers.js +247 -0
- package/dist/ingestion/hybrid-search.d.ts +22 -0
- package/dist/ingestion/hybrid-search.d.ts.map +1 -0
- package/dist/ingestion/hybrid-search.js +63 -0
- package/dist/ingestion/index.d.ts +9 -0
- package/dist/ingestion/index.d.ts.map +1 -0
- package/dist/ingestion/index.js +8 -0
- package/dist/ingestion/pipeline.d.ts +35 -0
- package/dist/ingestion/pipeline.d.ts.map +1 -0
- package/dist/ingestion/pipeline.js +114 -0
- package/dist/ingestion/rag-vector-service.d.ts +34 -0
- package/dist/ingestion/rag-vector-service.d.ts.map +1 -0
- package/dist/ingestion/rag-vector-service.js +98 -0
- package/dist/ingestion/reranker.d.ts +10 -0
- package/dist/ingestion/reranker.d.ts.map +1 -0
- package/dist/ingestion/reranker.js +41 -0
- package/dist/ingestion/text-splitter.d.ts +25 -0
- package/dist/ingestion/text-splitter.d.ts.map +1 -0
- package/dist/ingestion/text-splitter.js +119 -0
- package/dist/llm/cache-utils.d.ts +146 -0
- package/dist/llm/cache-utils.d.ts.map +1 -0
- package/dist/llm/cache-utils.js +204 -0
- package/dist/llm/client.d.ts +134 -0
- package/dist/llm/client.d.ts.map +1 -0
- package/dist/llm/client.js +497 -0
- package/dist/llm/key-validator.d.ts +25 -0
- package/dist/llm/key-validator.d.ts.map +1 -0
- package/dist/llm/key-validator.js +101 -0
- package/dist/llm/provider-health.d.ts +40 -0
- package/dist/llm/provider-health.d.ts.map +1 -0
- package/dist/llm/provider-health.js +97 -0
- package/dist/llm/providers/anthropic.d.ts +31 -0
- package/dist/llm/providers/anthropic.d.ts.map +1 -0
- package/dist/llm/providers/anthropic.js +248 -0
- package/dist/llm/providers/base.d.ts +111 -0
- package/dist/llm/providers/base.d.ts.map +1 -0
- package/dist/llm/providers/base.js +6 -0
- package/dist/llm/providers/groq.d.ts +23 -0
- package/dist/llm/providers/groq.d.ts.map +1 -0
- package/dist/llm/providers/groq.js +27 -0
- package/dist/llm/providers/ollama.d.ts +27 -0
- package/dist/llm/providers/ollama.d.ts.map +1 -0
- package/dist/llm/providers/ollama.js +48 -0
- package/dist/llm/providers/openai.d.ts +19 -0
- package/dist/llm/providers/openai.d.ts.map +1 -0
- package/dist/llm/providers/openai.js +245 -0
- package/dist/llm/providers/vultr.d.ts +18 -0
- package/dist/llm/providers/vultr.d.ts.map +1 -0
- package/dist/llm/providers/vultr.js +168 -0
- package/dist/llm/response-cache.d.ts +166 -0
- package/dist/llm/response-cache.d.ts.map +1 -0
- package/dist/llm/response-cache.js +233 -0
- package/dist/llm/semantic-cache.d.ts +179 -0
- package/dist/llm/semantic-cache.d.ts.map +1 -0
- package/dist/llm/semantic-cache.js +306 -0
- package/dist/llm/server.d.ts +14 -0
- package/dist/llm/server.d.ts.map +1 -0
- package/dist/llm/server.js +15 -0
- package/dist/llm/token-counter.d.ts +48 -0
- package/dist/llm/token-counter.d.ts.map +1 -0
- package/dist/llm/token-counter.js +77 -0
- package/dist/llm/workspace-provider-config.d.ts +38 -0
- package/dist/llm/workspace-provider-config.d.ts.map +1 -0
- package/dist/llm/workspace-provider-config.js +47 -0
- package/dist/memory/agent/context-manager.d.ts +148 -0
- package/dist/memory/agent/context-manager.d.ts.map +1 -0
- package/dist/memory/agent/context-manager.js +284 -0
- package/dist/memory/agent/index.d.ts +7 -0
- package/dist/memory/agent/index.d.ts.map +1 -0
- package/dist/memory/agent/index.js +6 -0
- package/dist/memory/crdt/index.d.ts +13 -0
- package/dist/memory/crdt/index.d.ts.map +1 -0
- package/dist/memory/crdt/index.js +12 -0
- package/dist/memory/crdt/lww-register.d.ts +108 -0
- package/dist/memory/crdt/lww-register.d.ts.map +1 -0
- package/dist/memory/crdt/lww-register.js +169 -0
- package/dist/memory/crdt/or-set.d.ts +141 -0
- package/dist/memory/crdt/or-set.d.ts.map +1 -0
- package/dist/memory/crdt/or-set.js +291 -0
- package/dist/memory/crdt/pn-counter.d.ts +116 -0
- package/dist/memory/crdt/pn-counter.d.ts.map +1 -0
- package/dist/memory/crdt/pn-counter.js +174 -0
- package/dist/memory/crdt/vector-clock.d.ts +115 -0
- package/dist/memory/crdt/vector-clock.d.ts.map +1 -0
- package/dist/memory/crdt/vector-clock.js +179 -0
- package/dist/memory/errors/index.d.ts +56 -0
- package/dist/memory/errors/index.d.ts.map +1 -0
- package/dist/memory/errors/index.js +85 -0
- package/dist/memory/index.d.ts +21 -0
- package/dist/memory/index.d.ts.map +1 -0
- package/dist/memory/index.js +20 -0
- package/dist/memory/persistence/crdt-persistence.d.ts +85 -0
- package/dist/memory/persistence/crdt-persistence.d.ts.map +1 -0
- package/dist/memory/persistence/crdt-persistence.js +204 -0
- package/dist/memory/persistence/index.d.ts +7 -0
- package/dist/memory/persistence/index.d.ts.map +1 -0
- package/dist/memory/persistence/index.js +6 -0
- package/dist/memory/preferences/index.d.ts +7 -0
- package/dist/memory/preferences/index.d.ts.map +1 -0
- package/dist/memory/preferences/index.js +6 -0
- package/dist/memory/preferences/user-preferences-manager.d.ts +133 -0
- package/dist/memory/preferences/user-preferences-manager.d.ts.map +1 -0
- package/dist/memory/preferences/user-preferences-manager.js +342 -0
- package/dist/memory/services/index.d.ts +8 -0
- package/dist/memory/services/index.d.ts.map +1 -0
- package/dist/memory/services/index.js +6 -0
- package/dist/memory/services/node-id-service.d.ts +75 -0
- package/dist/memory/services/node-id-service.d.ts.map +1 -0
- package/dist/memory/services/node-id-service.js +190 -0
- package/dist/memory/stores/episodic-memory.d.ts +182 -0
- package/dist/memory/stores/episodic-memory.d.ts.map +1 -0
- package/dist/memory/stores/episodic-memory.js +378 -0
- package/dist/memory/stores/index.d.ts +16 -0
- package/dist/memory/stores/index.d.ts.map +1 -0
- package/dist/memory/stores/index.js +15 -0
- package/dist/memory/stores/procedural-memory.d.ts +89 -0
- package/dist/memory/stores/procedural-memory.d.ts.map +1 -0
- package/dist/memory/stores/procedural-memory.js +152 -0
- package/dist/memory/stores/semantic-memory.d.ts +92 -0
- package/dist/memory/stores/semantic-memory.d.ts.map +1 -0
- package/dist/memory/stores/semantic-memory.js +155 -0
- package/dist/memory/stores/working-memory.d.ts +225 -0
- package/dist/memory/stores/working-memory.d.ts.map +1 -0
- package/dist/memory/stores/working-memory.js +336 -0
- package/dist/memory/utils/deep-clone.d.ts +10 -0
- package/dist/memory/utils/deep-clone.d.ts.map +1 -0
- package/dist/memory/utils/deep-clone.js +9 -0
- package/dist/memory/utils/index.d.ts +8 -0
- package/dist/memory/utils/index.d.ts.map +1 -0
- package/dist/memory/utils/index.js +7 -0
- package/dist/memory/utils/logger.d.ts +21 -0
- package/dist/memory/utils/logger.d.ts.map +1 -0
- package/dist/memory/utils/logger.js +62 -0
- package/dist/memory/utils/sql-helpers.d.ts +97 -0
- package/dist/memory/utils/sql-helpers.d.ts.map +1 -0
- package/dist/memory/utils/sql-helpers.js +214 -0
- package/dist/memory/utils/validation.d.ts +62 -0
- package/dist/memory/utils/validation.d.ts.map +1 -0
- package/dist/memory/utils/validation.js +244 -0
- package/dist/memory/vector/index.d.ts +12 -0
- package/dist/memory/vector/index.d.ts.map +1 -0
- package/dist/memory/vector/index.js +14 -0
- package/dist/memory/vector/vector-memory-service.d.ts +88 -0
- package/dist/memory/vector/vector-memory-service.d.ts.map +1 -0
- package/dist/memory/vector/vector-memory-service.js +335 -0
- package/dist/observability/logger.d.ts +79 -0
- package/dist/observability/logger.d.ts.map +1 -0
- package/dist/observability/logger.js +165 -0
- package/dist/observability/metrics.d.ts +43 -0
- package/dist/observability/metrics.d.ts.map +1 -0
- package/dist/observability/metrics.js +197 -0
- package/dist/observability/query.d.ts +150 -0
- package/dist/observability/query.d.ts.map +1 -0
- package/dist/observability/query.js +339 -0
- package/dist/observability/types.d.ts +140 -0
- package/dist/observability/types.d.ts.map +1 -0
- package/dist/observability/types.js +6 -0
- package/dist/orchestration/agent.d.ts +98 -0
- package/dist/orchestration/agent.d.ts.map +1 -0
- package/dist/orchestration/agent.js +6 -0
- package/dist/orchestration/defaults.d.ts +21 -0
- package/dist/orchestration/defaults.d.ts.map +1 -0
- package/dist/orchestration/defaults.js +22 -0
- package/dist/orchestration/memory-integration.d.ts +58 -0
- package/dist/orchestration/memory-integration.d.ts.map +1 -0
- package/dist/orchestration/memory-integration.js +130 -0
- package/dist/orchestration/orchestrator.d.ts +67 -0
- package/dist/orchestration/orchestrator.d.ts.map +1 -0
- package/dist/orchestration/orchestrator.js +174 -0
- package/dist/orchestration/runtime.d.ts +82 -0
- package/dist/orchestration/runtime.d.ts.map +1 -0
- package/dist/orchestration/runtime.js +251 -0
- package/dist/orchestration/streaming-runtime.d.ts +36 -0
- package/dist/orchestration/streaming-runtime.d.ts.map +1 -0
- package/dist/orchestration/streaming-runtime.js +175 -0
- package/dist/orchestration/ticket-agent.d.ts +70 -0
- package/dist/orchestration/ticket-agent.d.ts.map +1 -0
- package/dist/orchestration/ticket-agent.js +146 -0
- package/dist/skills/activation/index.d.ts +7 -0
- package/dist/skills/activation/index.d.ts.map +1 -0
- package/dist/skills/activation/index.js +6 -0
- package/dist/skills/activation/skill-activator.d.ts +68 -0
- package/dist/skills/activation/skill-activator.d.ts.map +1 -0
- package/dist/skills/activation/skill-activator.js +224 -0
- package/dist/skills/catalog/catalog-search.d.ts +55 -0
- package/dist/skills/catalog/catalog-search.d.ts.map +1 -0
- package/dist/skills/catalog/catalog-search.js +111 -0
- package/dist/skills/catalog/catalog-types.d.ts +81 -0
- package/dist/skills/catalog/catalog-types.d.ts.map +1 -0
- package/dist/skills/catalog/catalog-types.js +66 -0
- package/dist/skills/catalog/index.d.ts +9 -0
- package/dist/skills/catalog/index.d.ts.map +1 -0
- package/dist/skills/catalog/index.js +7 -0
- package/dist/skills/catalog/vercel-catalog.d.ts +42 -0
- package/dist/skills/catalog/vercel-catalog.d.ts.map +1 -0
- package/dist/skills/catalog/vercel-catalog.js +189 -0
- package/dist/skills/compat/index.d.ts +9 -0
- package/dist/skills/compat/index.d.ts.map +1 -0
- package/dist/skills/compat/index.js +8 -0
- package/dist/skills/compat/skill-enhancer.d.ts +37 -0
- package/dist/skills/compat/skill-enhancer.d.ts.map +1 -0
- package/dist/skills/compat/skill-enhancer.js +76 -0
- package/dist/skills/compat/tool-mapper.d.ts +61 -0
- package/dist/skills/compat/tool-mapper.d.ts.map +1 -0
- package/dist/skills/compat/tool-mapper.js +168 -0
- package/dist/skills/compat/vercel-compat.d.ts +33 -0
- package/dist/skills/compat/vercel-compat.d.ts.map +1 -0
- package/dist/skills/compat/vercel-compat.js +132 -0
- package/dist/skills/index.d.ts +40 -0
- package/dist/skills/index.d.ts.map +1 -0
- package/dist/skills/index.js +47 -0
- package/dist/skills/integration/agent-skill-provider.d.ts +94 -0
- package/dist/skills/integration/agent-skill-provider.d.ts.map +1 -0
- package/dist/skills/integration/agent-skill-provider.js +161 -0
- package/dist/skills/integration/index.d.ts +7 -0
- package/dist/skills/integration/index.d.ts.map +1 -0
- package/dist/skills/integration/index.js +6 -0
- package/dist/skills/loader/github-loader.d.ts +61 -0
- package/dist/skills/loader/github-loader.d.ts.map +1 -0
- package/dist/skills/loader/github-loader.js +176 -0
- package/dist/skills/loader/index.d.ts +10 -0
- package/dist/skills/loader/index.d.ts.map +1 -0
- package/dist/skills/loader/index.js +9 -0
- package/dist/skills/loader/local-loader.d.ts +56 -0
- package/dist/skills/loader/local-loader.d.ts.map +1 -0
- package/dist/skills/loader/local-loader.js +186 -0
- package/dist/skills/loader/vercel-loader.d.ts +64 -0
- package/dist/skills/loader/vercel-loader.d.ts.map +1 -0
- package/dist/skills/loader/vercel-loader.js +313 -0
- package/dist/skills/loader/vercel-types.d.ts +64 -0
- package/dist/skills/loader/vercel-types.d.ts.map +1 -0
- package/dist/skills/loader/vercel-types.js +55 -0
- package/dist/skills/parser/index.d.ts +7 -0
- package/dist/skills/parser/index.d.ts.map +1 -0
- package/dist/skills/parser/index.js +6 -0
- package/dist/skills/parser/skill-md-parser.d.ts +64 -0
- package/dist/skills/parser/skill-md-parser.d.ts.map +1 -0
- package/dist/skills/parser/skill-md-parser.js +242 -0
- package/dist/skills/registry/index.d.ts +7 -0
- package/dist/skills/registry/index.d.ts.map +1 -0
- package/dist/skills/registry/index.js +6 -0
- package/dist/skills/registry/skill-registry.d.ts +133 -0
- package/dist/skills/registry/skill-registry.d.ts.map +1 -0
- package/dist/skills/registry/skill-registry.js +373 -0
- package/dist/skills/types.d.ts +216 -0
- package/dist/skills/types.d.ts.map +1 -0
- package/dist/skills/types.js +176 -0
- package/dist/templates/agent-spec.d.ts +138 -0
- package/dist/templates/agent-spec.d.ts.map +1 -0
- package/dist/templates/agent-spec.js +138 -0
- package/dist/templates/index.d.ts +56 -0
- package/dist/templates/index.d.ts.map +1 -0
- package/dist/templates/index.js +58 -0
- package/dist/templates/prompt-spec.d.ts +140 -0
- package/dist/templates/prompt-spec.d.ts.map +1 -0
- package/dist/templates/prompt-spec.js +210 -0
- package/dist/templates/skill-spec.d.ts +106 -0
- package/dist/templates/skill-spec.d.ts.map +1 -0
- package/dist/templates/skill-spec.js +119 -0
- package/dist/tools/base.d.ts +74 -0
- package/dist/tools/base.d.ts.map +1 -0
- package/dist/tools/base.js +6 -0
- package/dist/tools/cms/collection-tools.d.ts +36 -0
- package/dist/tools/cms/collection-tools.d.ts.map +1 -0
- package/dist/tools/cms/collection-tools.js +178 -0
- package/dist/tools/cms/factory.d.ts +89 -0
- package/dist/tools/cms/factory.d.ts.map +1 -0
- package/dist/tools/cms/factory.js +462 -0
- package/dist/tools/cms/global-tools.d.ts +21 -0
- package/dist/tools/cms/global-tools.d.ts.map +1 -0
- package/dist/tools/cms/global-tools.js +92 -0
- package/dist/tools/cms/index.d.ts +11 -0
- package/dist/tools/cms/index.d.ts.map +1 -0
- package/dist/tools/cms/index.js +11 -0
- package/dist/tools/cms/media-tools.d.ts +31 -0
- package/dist/tools/cms/media-tools.d.ts.map +1 -0
- package/dist/tools/cms/media-tools.js +140 -0
- package/dist/tools/cms/user-tools.d.ts +31 -0
- package/dist/tools/cms/user-tools.d.ts.map +1 -0
- package/dist/tools/cms/user-tools.js +135 -0
- package/dist/tools/deduplicator.d.ts +19 -0
- package/dist/tools/deduplicator.d.ts.map +1 -0
- package/dist/tools/deduplicator.js +53 -0
- package/dist/tools/document-summarizer.d.ts +11 -0
- package/dist/tools/document-summarizer.d.ts.map +1 -0
- package/dist/tools/document-summarizer.js +82 -0
- package/dist/tools/mcp-adapter.d.ts +66 -0
- package/dist/tools/mcp-adapter.d.ts.map +1 -0
- package/dist/tools/mcp-adapter.js +152 -0
- package/dist/tools/memory/index.d.ts +3 -0
- package/dist/tools/memory/index.d.ts.map +1 -0
- package/dist/tools/memory/index.js +1 -0
- package/dist/tools/memory/store-memory.d.ts +39 -0
- package/dist/tools/memory/store-memory.d.ts.map +1 -0
- package/dist/tools/memory/store-memory.js +94 -0
- package/dist/tools/registry.d.ts +14 -0
- package/dist/tools/registry.d.ts.map +1 -0
- package/dist/tools/registry.js +48 -0
- package/dist/tools/ticket-tools.d.ts +31 -0
- package/dist/tools/ticket-tools.d.ts.map +1 -0
- package/dist/tools/ticket-tools.js +74 -0
- package/dist/tools/web/duck-duck-go.d.ts +52 -0
- package/dist/tools/web/duck-duck-go.d.ts.map +1 -0
- package/dist/tools/web/duck-duck-go.js +202 -0
- package/dist/tools/web/exa.d.ts +34 -0
- package/dist/tools/web/exa.d.ts.map +1 -0
- package/dist/tools/web/exa.js +80 -0
- package/dist/tools/web/index.d.ts +6 -0
- package/dist/tools/web/index.d.ts.map +1 -0
- package/dist/tools/web/index.js +4 -0
- package/dist/tools/web/scraper.d.ts +9 -0
- package/dist/tools/web/scraper.d.ts.map +1 -0
- package/dist/tools/web/scraper.js +118 -0
- package/dist/tools/web/tavily.d.ts +32 -0
- package/dist/tools/web/tavily.d.ts.map +1 -0
- package/dist/tools/web/tavily.js +73 -0
- package/dist/tools/web/types.d.ts +31 -0
- package/dist/tools/web/types.d.ts.map +1 -0
- package/dist/tools/web/types.js +9 -0
- package/package.json +143 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM-based Re-ranker
|
|
3
|
+
*
|
|
4
|
+
* Scores chunks 0–10 for relevance to the query using the configured LLMClient.
|
|
5
|
+
* No cross-encoder model required — uses the existing LLM.
|
|
6
|
+
*/
|
|
7
|
+
const RERANK_SYSTEM_PROMPT = `You are a relevance judge. Given a query and a list of passages, score each passage for relevance to the query on a scale of 0 to 10 (0=not relevant, 10=perfectly relevant). Return ONLY a JSON array of numbers, one score per passage, in the same order as the input. Example: [8, 3, 10, 1]`;
|
|
8
|
+
export async function rerankChunks(query, chunks, llmClient, topK) {
|
|
9
|
+
if (chunks.length === 0)
|
|
10
|
+
return [];
|
|
11
|
+
const passageList = chunks
|
|
12
|
+
.map((r, i) => `[${i + 1}] ${r.chunk.content.slice(0, 400)}`)
|
|
13
|
+
.join('\n\n');
|
|
14
|
+
const userMessage = `Query: ${query}\n\nPassages:\n${passageList}\n\nReturn a JSON array of ${chunks.length} relevance scores (0-10).`;
|
|
15
|
+
try {
|
|
16
|
+
const response = await llmClient.chat([
|
|
17
|
+
{ role: 'system', content: RERANK_SYSTEM_PROMPT },
|
|
18
|
+
{ role: 'user', content: userMessage },
|
|
19
|
+
]);
|
|
20
|
+
// Extract JSON array from response
|
|
21
|
+
const match = response.content.match(/\[[\d\s,]+\]/);
|
|
22
|
+
if (!match)
|
|
23
|
+
return chunks.slice(0, topK);
|
|
24
|
+
const scores = JSON.parse(match[0]);
|
|
25
|
+
if (!Array.isArray(scores) || scores.length !== chunks.length) {
|
|
26
|
+
return chunks.slice(0, topK);
|
|
27
|
+
}
|
|
28
|
+
const scored = chunks.map((chunk, i) => ({
|
|
29
|
+
chunk,
|
|
30
|
+
score: typeof scores[i] === 'number' ? scores[i] : 0,
|
|
31
|
+
}));
|
|
32
|
+
return scored
|
|
33
|
+
.sort((a, b) => b.score - a.score)
|
|
34
|
+
.slice(0, topK ?? chunks.length)
|
|
35
|
+
.map((s) => s.chunk);
|
|
36
|
+
}
|
|
37
|
+
catch {
|
|
38
|
+
// On LLM failure, return original order
|
|
39
|
+
return chunks.slice(0, topK);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recursive Character Text Splitter
|
|
3
|
+
*
|
|
4
|
+
* Splits text into chunks using a hierarchy of separators:
|
|
5
|
+
* double-newline → single-newline → period+space → space
|
|
6
|
+
* Token count is estimated as Math.ceil(length / 4) — sufficient for chunking.
|
|
7
|
+
*/
|
|
8
|
+
export interface Chunk {
|
|
9
|
+
content: string;
|
|
10
|
+
tokenCount: number;
|
|
11
|
+
index: number;
|
|
12
|
+
metadata?: Record<string, unknown>;
|
|
13
|
+
}
|
|
14
|
+
export interface SplitOptions {
|
|
15
|
+
chunkSize?: number;
|
|
16
|
+
overlap?: number;
|
|
17
|
+
metadata?: Record<string, unknown>;
|
|
18
|
+
}
|
|
19
|
+
export declare class RecursiveCharacterSplitter {
|
|
20
|
+
private chunkSize;
|
|
21
|
+
private overlap;
|
|
22
|
+
constructor(options?: SplitOptions);
|
|
23
|
+
split(text: string, options?: SplitOptions): Chunk[];
|
|
24
|
+
}
|
|
25
|
+
//# sourceMappingURL=text-splitter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"text-splitter.d.ts","sourceRoot":"","sources":["../../src/ingestion/text-splitter.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,MAAM,WAAW,KAAK;IACpB,OAAO,EAAE,MAAM,CAAA;IACf,UAAU,EAAE,MAAM,CAAA;IAClB,KAAK,EAAE,MAAM,CAAA;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CACnC;AAED,MAAM,WAAW,YAAY;IAC3B,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CACnC;AA4ED,qBAAa,0BAA0B;IACrC,OAAO,CAAC,SAAS,CAAQ;IACzB,OAAO,CAAC,OAAO,CAAQ;gBAEX,OAAO,GAAE,YAAiB;IAKtC,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,GAAE,YAAiB,GAAG,KAAK,EAAE;CAoCzD"}
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recursive Character Text Splitter
|
|
3
|
+
*
|
|
4
|
+
* Splits text into chunks using a hierarchy of separators:
|
|
5
|
+
* double-newline → single-newline → period+space → space
|
|
6
|
+
* Token count is estimated as Math.ceil(length / 4) — sufficient for chunking.
|
|
7
|
+
*/
|
|
8
|
+
const SEPARATORS = ['\n\n', '\n', '. ', ' '];
|
|
9
|
+
function estimateTokens(text) {
|
|
10
|
+
return Math.ceil(text.length / 4);
|
|
11
|
+
}
|
|
12
|
+
function splitOnSeparator(text, separator) {
|
|
13
|
+
if (!text.includes(separator))
|
|
14
|
+
return [text];
|
|
15
|
+
// Split and re-attach separator to previous segment (keep context)
|
|
16
|
+
const parts = text.split(separator);
|
|
17
|
+
const result = [];
|
|
18
|
+
for (let i = 0; i < parts.length; i++) {
|
|
19
|
+
const part = parts[i];
|
|
20
|
+
if (part === undefined)
|
|
21
|
+
continue;
|
|
22
|
+
if (i < parts.length - 1) {
|
|
23
|
+
result.push(part + separator);
|
|
24
|
+
}
|
|
25
|
+
else if (part.length > 0) {
|
|
26
|
+
result.push(part);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return result;
|
|
30
|
+
}
|
|
31
|
+
function mergeSegments(segments, chunkSize) {
|
|
32
|
+
const chunks = [];
|
|
33
|
+
let current = '';
|
|
34
|
+
for (const seg of segments) {
|
|
35
|
+
if (current.length + seg.length <= chunkSize) {
|
|
36
|
+
current += seg;
|
|
37
|
+
}
|
|
38
|
+
else {
|
|
39
|
+
if (current.length > 0)
|
|
40
|
+
chunks.push(current.trimEnd());
|
|
41
|
+
// If single segment is larger than chunkSize, keep it as-is (will be handled by recursion)
|
|
42
|
+
current = seg;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
if (current.trim().length > 0)
|
|
46
|
+
chunks.push(current.trimEnd());
|
|
47
|
+
return chunks;
|
|
48
|
+
}
|
|
49
|
+
function recursiveSplit(text, chunkSize, sepIndex = 0) {
|
|
50
|
+
if (text.length <= chunkSize)
|
|
51
|
+
return [text];
|
|
52
|
+
const separator = SEPARATORS[sepIndex];
|
|
53
|
+
if (separator === undefined) {
|
|
54
|
+
// No separator left — hard-split at chunkSize
|
|
55
|
+
const chunks = [];
|
|
56
|
+
for (let i = 0; i < text.length; i += chunkSize) {
|
|
57
|
+
chunks.push(text.slice(i, i + chunkSize));
|
|
58
|
+
}
|
|
59
|
+
return chunks;
|
|
60
|
+
}
|
|
61
|
+
const segments = splitOnSeparator(text, separator);
|
|
62
|
+
if (segments.length <= 1) {
|
|
63
|
+
// Separator not found, try next
|
|
64
|
+
return recursiveSplit(text, chunkSize, sepIndex + 1);
|
|
65
|
+
}
|
|
66
|
+
// Merge small segments back together up to chunkSize
|
|
67
|
+
const merged = mergeSegments(segments, chunkSize);
|
|
68
|
+
// Recursively split any chunk that is still too large
|
|
69
|
+
const result = [];
|
|
70
|
+
for (const chunk of merged) {
|
|
71
|
+
if (chunk.length > chunkSize) {
|
|
72
|
+
result.push(...recursiveSplit(chunk, chunkSize, sepIndex + 1));
|
|
73
|
+
}
|
|
74
|
+
else {
|
|
75
|
+
result.push(chunk);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
return result;
|
|
79
|
+
}
|
|
80
|
+
export class RecursiveCharacterSplitter {
|
|
81
|
+
chunkSize;
|
|
82
|
+
overlap;
|
|
83
|
+
constructor(options = {}) {
|
|
84
|
+
this.chunkSize = options.chunkSize ?? 512;
|
|
85
|
+
this.overlap = options.overlap ?? 64;
|
|
86
|
+
}
|
|
87
|
+
split(text, options = {}) {
|
|
88
|
+
const chunkSize = options.chunkSize ?? this.chunkSize;
|
|
89
|
+
const overlap = options.overlap ?? this.overlap;
|
|
90
|
+
const metadata = options.metadata ?? {};
|
|
91
|
+
if (!text || text.trim().length === 0)
|
|
92
|
+
return [];
|
|
93
|
+
const rawChunks = recursiveSplit(text.trim(), chunkSize * 4); // chars, not tokens
|
|
94
|
+
const chunks = [];
|
|
95
|
+
for (let i = 0; i < rawChunks.length; i++) {
|
|
96
|
+
const rawChunk = rawChunks[i];
|
|
97
|
+
if (!rawChunk)
|
|
98
|
+
continue;
|
|
99
|
+
let content = rawChunk.trim();
|
|
100
|
+
// Add overlap from previous chunk
|
|
101
|
+
if (i > 0 && overlap > 0) {
|
|
102
|
+
const prev = rawChunks[i - 1];
|
|
103
|
+
if (prev) {
|
|
104
|
+
const overlapText = prev.slice(Math.max(0, prev.length - overlap * 4));
|
|
105
|
+
content = `${overlapText.trim()} ${content}`;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
if (content.length === 0)
|
|
109
|
+
continue;
|
|
110
|
+
chunks.push({
|
|
111
|
+
content,
|
|
112
|
+
tokenCount: estimateTokens(content),
|
|
113
|
+
index: i,
|
|
114
|
+
metadata: Object.keys(metadata).length > 0 ? metadata : undefined,
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
return chunks;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prompt Caching Utilities
|
|
3
|
+
*
|
|
4
|
+
* Helper functions for optimizing Anthropic prompt caching usage.
|
|
5
|
+
* Cache hits provide up to 90% cost reduction on input tokens.
|
|
6
|
+
*
|
|
7
|
+
* Cache TTL: 5 minutes
|
|
8
|
+
* Minimum cacheable content: ~1024 tokens (~300 words)
|
|
9
|
+
*
|
|
10
|
+
* Best practices:
|
|
11
|
+
* - Cache stable content that repeats across requests
|
|
12
|
+
* - Place cached content at message boundaries
|
|
13
|
+
* - Cache system prompts, tools, and large context documents
|
|
14
|
+
* - Order matters: place most stable content first
|
|
15
|
+
*
|
|
16
|
+
* @see https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
|
|
17
|
+
*/
|
|
18
|
+
import type { Message } from './providers/base.js';
|
|
19
|
+
/**
|
|
20
|
+
* Mark a message for caching
|
|
21
|
+
* Use this for system prompts, tool definitions, or large context that repeats
|
|
22
|
+
*/
|
|
23
|
+
export declare function withCache(message: Message): Message;
|
|
24
|
+
/**
|
|
25
|
+
* Create a cacheable system prompt message
|
|
26
|
+
*
|
|
27
|
+
* @example
|
|
28
|
+
* ```ts
|
|
29
|
+
* const systemPrompt = cacheableSystemPrompt(
|
|
30
|
+
* 'You are a helpful AI assistant with expertise in TypeScript and React.'
|
|
31
|
+
* )
|
|
32
|
+
* ```
|
|
33
|
+
*/
|
|
34
|
+
export declare function cacheableSystemPrompt(content: string): Message;
|
|
35
|
+
/**
|
|
36
|
+
* Calculate potential cost savings from caching
|
|
37
|
+
*
|
|
38
|
+
* @param inputTokens - Total input tokens
|
|
39
|
+
* @param cacheHitRate - Percentage of requests that hit cache (0-1)
|
|
40
|
+
* @param cachedTokenPercentage - Percentage of input that's cached (0-1)
|
|
41
|
+
* @returns Estimated cost reduction percentage
|
|
42
|
+
*
|
|
43
|
+
* @example
|
|
44
|
+
* ```ts
|
|
45
|
+
* // If 50% of requests hit cache and 70% of input is cached content
|
|
46
|
+
* const savings = estimateCacheSavings(10000, 0.5, 0.7)
|
|
47
|
+
* console.log(`${savings}% cost reduction`) // ~31.5%
|
|
48
|
+
* ```
|
|
49
|
+
*/
|
|
50
|
+
export declare function estimateCacheSavings(inputTokens: number, cacheHitRate: number, cachedTokenPercentage: number): number;
|
|
51
|
+
/**
|
|
52
|
+
* Format cache statistics from response
|
|
53
|
+
*
|
|
54
|
+
* @example
|
|
55
|
+
* ```ts
|
|
56
|
+
* const response = await client.chat(messages, { enableCache: true })
|
|
57
|
+
* const stats = formatCacheStats(response.usage)
|
|
58
|
+
* console.log(stats)
|
|
59
|
+
* // "Cache: 45% read (2,500 tokens), 10% created (500 tokens)"
|
|
60
|
+
* ```
|
|
61
|
+
*/
|
|
62
|
+
export declare function formatCacheStats(usage: {
|
|
63
|
+
promptTokens: number;
|
|
64
|
+
cacheCreationTokens?: number;
|
|
65
|
+
cacheReadTokens?: number;
|
|
66
|
+
}): string | null;
|
|
67
|
+
/**
|
|
68
|
+
* Check if caching is beneficial for the given content
|
|
69
|
+
* Anthropic recommends caching content >1024 tokens
|
|
70
|
+
*/
|
|
71
|
+
export declare function shouldCache(content: string, minTokens?: number): boolean;
|
|
72
|
+
/**
|
|
73
|
+
* Create a conversation with optimal caching structure
|
|
74
|
+
*
|
|
75
|
+
* @example
|
|
76
|
+
* ```ts
|
|
77
|
+
* const conversation = createCachedConversation({
|
|
78
|
+
* systemPrompt: 'You are a helpful assistant...',
|
|
79
|
+
* tools: [...], // Large tool definitions
|
|
80
|
+
* contextDocs: ['# Documentation\n...'], // Large context documents
|
|
81
|
+
* messages: [
|
|
82
|
+
* { role: 'user', content: 'What is TypeScript?' },
|
|
83
|
+
* ],
|
|
84
|
+
* })
|
|
85
|
+
*
|
|
86
|
+
* const response = await client.chat(conversation, { enableCache: true })
|
|
87
|
+
* ```
|
|
88
|
+
*/
|
|
89
|
+
export declare function createCachedConversation(config: {
|
|
90
|
+
systemPrompt?: string;
|
|
91
|
+
contextDocs?: string[];
|
|
92
|
+
messages: Message[];
|
|
93
|
+
}): Message[];
|
|
94
|
+
/**
|
|
95
|
+
* Pricing information for Anthropic Claude models (as of 2024)
|
|
96
|
+
* Prices are per million tokens
|
|
97
|
+
*/
|
|
98
|
+
export declare const ANTHROPIC_PRICING: {
|
|
99
|
+
readonly 'claude-3-5-sonnet-20241022': {
|
|
100
|
+
readonly input: 3;
|
|
101
|
+
readonly output: 15;
|
|
102
|
+
readonly cacheWrite: 3.75;
|
|
103
|
+
readonly cacheRead: 0.3;
|
|
104
|
+
};
|
|
105
|
+
readonly 'claude-3-5-haiku-20241022': {
|
|
106
|
+
readonly input: 1;
|
|
107
|
+
readonly output: 5;
|
|
108
|
+
readonly cacheWrite: 1.25;
|
|
109
|
+
readonly cacheRead: 0.1;
|
|
110
|
+
};
|
|
111
|
+
readonly 'claude-3-opus-20240229': {
|
|
112
|
+
readonly input: 15;
|
|
113
|
+
readonly output: 75;
|
|
114
|
+
readonly cacheWrite: 18.75;
|
|
115
|
+
readonly cacheRead: 1.5;
|
|
116
|
+
};
|
|
117
|
+
};
|
|
118
|
+
/**
|
|
119
|
+
* Calculate actual cost of a request with caching
|
|
120
|
+
*
|
|
121
|
+
* @example
|
|
122
|
+
* ```ts
|
|
123
|
+
* const cost = calculateCacheCost({
|
|
124
|
+
* model: 'claude-3-5-sonnet-20241022',
|
|
125
|
+
* promptTokens: 10000,
|
|
126
|
+
* completionTokens: 500,
|
|
127
|
+
* cacheCreationTokens: 3000,
|
|
128
|
+
* cacheReadTokens: 5000,
|
|
129
|
+
* })
|
|
130
|
+
*
|
|
131
|
+
* console.log(`Request cost: $${cost.toFixed(4)}`)
|
|
132
|
+
* console.log(`Savings vs no cache: $${cost.savings.toFixed(4)}`)
|
|
133
|
+
* ```
|
|
134
|
+
*/
|
|
135
|
+
export declare function calculateCacheCost(usage: {
|
|
136
|
+
model: keyof typeof ANTHROPIC_PRICING;
|
|
137
|
+
promptTokens: number;
|
|
138
|
+
completionTokens: number;
|
|
139
|
+
cacheCreationTokens?: number;
|
|
140
|
+
cacheReadTokens?: number;
|
|
141
|
+
}): {
|
|
142
|
+
total: number;
|
|
143
|
+
breakdown: Record<string, number>;
|
|
144
|
+
savings: number;
|
|
145
|
+
};
|
|
146
|
+
//# sourceMappingURL=cache-utils.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cache-utils.d.ts","sourceRoot":"","sources":["../../src/llm/cache-utils.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,qBAAqB,CAAA;AAElD;;;GAGG;AACH,wBAAgB,SAAS,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAKnD;AAED;;;;;;;;;GASG;AACH,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAK9D;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,oBAAoB,CAClC,WAAW,EAAE,MAAM,EACnB,YAAY,EAAE,MAAM,EACpB,qBAAqB,EAAE,MAAM,GAC5B,MAAM,CAeR;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,gBAAgB,CAAC,KAAK,EAAE;IACtC,YAAY,EAAE,MAAM,CAAA;IACpB,mBAAmB,CAAC,EAAE,MAAM,CAAA;IAC5B,eAAe,CAAC,EAAE,MAAM,CAAA;CACzB,GAAG,MAAM,GAAG,IAAI,CAqBhB;AAED;;;GAGG;AACH,wBAAgB,WAAW,CAAC,OAAO,EAAE,MAAM,EAAE,SAAS,SAAO,GAAG,OAAO,CAItE;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,wBAAwB,CAAC,MAAM,EAAE;IAC/C,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,WAAW,CAAC,EAAE,MAAM,EAAE,CAAA;IACtB,QAAQ,EAAE,OAAO,EAAE,CAAA;CACpB,GAAG,OAAO,EAAE,CAwBZ;AAED;;;GAGG;AACH,eAAO,MAAM,iBAAiB;;;;;;;;;;;;;;;;;;;CAmBpB,CAAA;AAEV;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE;IACxC,KAAK,EAAE,MAAM,OAAO,iBAAiB,CAAA;IACrC,YAAY,EAAE,MAAM,CAAA;IACpB,gBAAgB,EAAE,MAAM,CAAA;IACxB,mBAAmB,CAAC,EAAE,MAAM,CAAA;IAC5B,eAAe,CAAC,EAAE,MAAM,CAAA;CACzB,GAAG;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAwBxE"}
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prompt Caching Utilities
|
|
3
|
+
*
|
|
4
|
+
* Helper functions for optimizing Anthropic prompt caching usage.
|
|
5
|
+
* Cache hits provide up to 90% cost reduction on input tokens.
|
|
6
|
+
*
|
|
7
|
+
* Cache TTL: 5 minutes
|
|
8
|
+
* Minimum cacheable content: ~1024 tokens (~300 words)
|
|
9
|
+
*
|
|
10
|
+
* Best practices:
|
|
11
|
+
* - Cache stable content that repeats across requests
|
|
12
|
+
* - Place cached content at message boundaries
|
|
13
|
+
* - Cache system prompts, tools, and large context documents
|
|
14
|
+
* - Order matters: place most stable content first
|
|
15
|
+
*
|
|
16
|
+
* @see https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
|
|
17
|
+
*/
|
|
18
|
+
/**
|
|
19
|
+
* Mark a message for caching
|
|
20
|
+
* Use this for system prompts, tool definitions, or large context that repeats
|
|
21
|
+
*/
|
|
22
|
+
export function withCache(message) {
|
|
23
|
+
return {
|
|
24
|
+
...message,
|
|
25
|
+
cacheControl: { type: 'ephemeral' },
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Create a cacheable system prompt message
|
|
30
|
+
*
|
|
31
|
+
* @example
|
|
32
|
+
* ```ts
|
|
33
|
+
* const systemPrompt = cacheableSystemPrompt(
|
|
34
|
+
* 'You are a helpful AI assistant with expertise in TypeScript and React.'
|
|
35
|
+
* )
|
|
36
|
+
* ```
|
|
37
|
+
*/
|
|
38
|
+
export function cacheableSystemPrompt(content) {
|
|
39
|
+
return withCache({
|
|
40
|
+
role: 'system',
|
|
41
|
+
content,
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Calculate potential cost savings from caching
|
|
46
|
+
*
|
|
47
|
+
* @param inputTokens - Total input tokens
|
|
48
|
+
* @param cacheHitRate - Percentage of requests that hit cache (0-1)
|
|
49
|
+
* @param cachedTokenPercentage - Percentage of input that's cached (0-1)
|
|
50
|
+
* @returns Estimated cost reduction percentage
|
|
51
|
+
*
|
|
52
|
+
* @example
|
|
53
|
+
* ```ts
|
|
54
|
+
* // If 50% of requests hit cache and 70% of input is cached content
|
|
55
|
+
* const savings = estimateCacheSavings(10000, 0.5, 0.7)
|
|
56
|
+
* console.log(`${savings}% cost reduction`) // ~31.5%
|
|
57
|
+
* ```
|
|
58
|
+
*/
|
|
59
|
+
export function estimateCacheSavings(inputTokens, cacheHitRate, cachedTokenPercentage) {
|
|
60
|
+
const cachedTokens = inputTokens * cachedTokenPercentage;
|
|
61
|
+
const uncachedTokens = inputTokens - cachedTokens;
|
|
62
|
+
// Cache creation cost: full price
|
|
63
|
+
const firstRequestCost = inputTokens;
|
|
64
|
+
// Cache hit cost: 10% for cached tokens + full price for uncached
|
|
65
|
+
const cachedRequestCost = cachedTokens * 0.1 + uncachedTokens;
|
|
66
|
+
// Average cost considering hit rate
|
|
67
|
+
const avgCost = firstRequestCost * (1 - cacheHitRate) + cachedRequestCost * cacheHitRate;
|
|
68
|
+
// Savings percentage
|
|
69
|
+
return ((inputTokens - avgCost) / inputTokens) * 100;
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Format cache statistics from response
|
|
73
|
+
*
|
|
74
|
+
* @example
|
|
75
|
+
* ```ts
|
|
76
|
+
* const response = await client.chat(messages, { enableCache: true })
|
|
77
|
+
* const stats = formatCacheStats(response.usage)
|
|
78
|
+
* console.log(stats)
|
|
79
|
+
* // "Cache: 45% read (2,500 tokens), 10% created (500 tokens)"
|
|
80
|
+
* ```
|
|
81
|
+
*/
|
|
82
|
+
export function formatCacheStats(usage) {
|
|
83
|
+
const { promptTokens, cacheCreationTokens = 0, cacheReadTokens = 0 } = usage;
|
|
84
|
+
if (cacheCreationTokens === 0 && cacheReadTokens === 0) {
|
|
85
|
+
return null;
|
|
86
|
+
}
|
|
87
|
+
const readPct = ((cacheReadTokens / promptTokens) * 100).toFixed(0);
|
|
88
|
+
const createdPct = ((cacheCreationTokens / promptTokens) * 100).toFixed(0);
|
|
89
|
+
const parts = [];
|
|
90
|
+
if (cacheReadTokens > 0) {
|
|
91
|
+
parts.push(`${readPct}% read (${cacheReadTokens.toLocaleString()} tokens)`);
|
|
92
|
+
}
|
|
93
|
+
if (cacheCreationTokens > 0) {
|
|
94
|
+
parts.push(`${createdPct}% created (${cacheCreationTokens.toLocaleString()} tokens)`);
|
|
95
|
+
}
|
|
96
|
+
return `Cache: ${parts.join(', ')}`;
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Check if caching is beneficial for the given content
|
|
100
|
+
* Anthropic recommends caching content >1024 tokens
|
|
101
|
+
*/
|
|
102
|
+
export function shouldCache(content, minTokens = 1024) {
|
|
103
|
+
// Rough estimation: ~4 chars per token
|
|
104
|
+
const estimatedTokens = content.length / 4;
|
|
105
|
+
return estimatedTokens >= minTokens;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Create a conversation with optimal caching structure
|
|
109
|
+
*
|
|
110
|
+
* @example
|
|
111
|
+
* ```ts
|
|
112
|
+
* const conversation = createCachedConversation({
|
|
113
|
+
* systemPrompt: 'You are a helpful assistant...',
|
|
114
|
+
* tools: [...], // Large tool definitions
|
|
115
|
+
* contextDocs: ['# Documentation\n...'], // Large context documents
|
|
116
|
+
* messages: [
|
|
117
|
+
* { role: 'user', content: 'What is TypeScript?' },
|
|
118
|
+
* ],
|
|
119
|
+
* })
|
|
120
|
+
*
|
|
121
|
+
* const response = await client.chat(conversation, { enableCache: true })
|
|
122
|
+
* ```
|
|
123
|
+
*/
|
|
124
|
+
export function createCachedConversation(config) {
|
|
125
|
+
const result = [];
|
|
126
|
+
// System prompt (always cached if present)
|
|
127
|
+
if (config.systemPrompt) {
|
|
128
|
+
result.push(cacheableSystemPrompt(config.systemPrompt));
|
|
129
|
+
}
|
|
130
|
+
// Context documents as system messages (cache last one)
|
|
131
|
+
if (config.contextDocs && config.contextDocs.length > 0) {
|
|
132
|
+
config.contextDocs.forEach((doc, index) => {
|
|
133
|
+
const isLast = index === (config.contextDocs?.length ?? 0) - 1;
|
|
134
|
+
result.push({
|
|
135
|
+
role: 'system',
|
|
136
|
+
content: doc,
|
|
137
|
+
...(isLast ? { cacheControl: { type: 'ephemeral' } } : {}),
|
|
138
|
+
});
|
|
139
|
+
});
|
|
140
|
+
}
|
|
141
|
+
// User/assistant messages (not cached by default)
|
|
142
|
+
result.push(...config.messages);
|
|
143
|
+
return result;
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Pricing information for Anthropic Claude models (as of 2024)
|
|
147
|
+
* Prices are per million tokens
|
|
148
|
+
*/
|
|
149
|
+
export const ANTHROPIC_PRICING = {
|
|
150
|
+
'claude-3-5-sonnet-20241022': {
|
|
151
|
+
input: 3.0,
|
|
152
|
+
output: 15.0,
|
|
153
|
+
cacheWrite: 3.75, // 25% markup for cache creation
|
|
154
|
+
cacheRead: 0.3, // 90% discount for cache hits
|
|
155
|
+
},
|
|
156
|
+
'claude-3-5-haiku-20241022': {
|
|
157
|
+
input: 1.0,
|
|
158
|
+
output: 5.0,
|
|
159
|
+
cacheWrite: 1.25,
|
|
160
|
+
cacheRead: 0.1,
|
|
161
|
+
},
|
|
162
|
+
'claude-3-opus-20240229': {
|
|
163
|
+
input: 15.0,
|
|
164
|
+
output: 75.0,
|
|
165
|
+
cacheWrite: 18.75,
|
|
166
|
+
cacheRead: 1.5,
|
|
167
|
+
},
|
|
168
|
+
};
|
|
169
|
+
/**
|
|
170
|
+
* Calculate actual cost of a request with caching
|
|
171
|
+
*
|
|
172
|
+
* @example
|
|
173
|
+
* ```ts
|
|
174
|
+
* const cost = calculateCacheCost({
|
|
175
|
+
* model: 'claude-3-5-sonnet-20241022',
|
|
176
|
+
* promptTokens: 10000,
|
|
177
|
+
* completionTokens: 500,
|
|
178
|
+
* cacheCreationTokens: 3000,
|
|
179
|
+
* cacheReadTokens: 5000,
|
|
180
|
+
* })
|
|
181
|
+
*
|
|
182
|
+
* console.log(`Request cost: $${cost.toFixed(4)}`)
|
|
183
|
+
* console.log(`Savings vs no cache: $${cost.savings.toFixed(4)}`)
|
|
184
|
+
* ```
|
|
185
|
+
*/
|
|
186
|
+
export function calculateCacheCost(usage) {
|
|
187
|
+
const pricing = ANTHROPIC_PRICING[usage.model];
|
|
188
|
+
const uncachedTokens = usage.promptTokens - (usage.cacheCreationTokens || 0) - (usage.cacheReadTokens || 0);
|
|
189
|
+
const costs = {
|
|
190
|
+
input: (uncachedTokens / 1_000_000) * pricing.input,
|
|
191
|
+
output: (usage.completionTokens / 1_000_000) * pricing.output,
|
|
192
|
+
cacheWrite: ((usage.cacheCreationTokens || 0) / 1_000_000) * pricing.cacheWrite,
|
|
193
|
+
cacheRead: ((usage.cacheReadTokens || 0) / 1_000_000) * pricing.cacheRead,
|
|
194
|
+
};
|
|
195
|
+
const total = costs.input + costs.output + costs.cacheWrite + costs.cacheRead;
|
|
196
|
+
// Calculate savings vs no caching
|
|
197
|
+
const noCacheCost = (usage.promptTokens / 1_000_000) * pricing.input +
|
|
198
|
+
(usage.completionTokens / 1_000_000) * pricing.output;
|
|
199
|
+
return {
|
|
200
|
+
total,
|
|
201
|
+
breakdown: costs,
|
|
202
|
+
savings: noCacheCost - total,
|
|
203
|
+
};
|
|
204
|
+
}
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unified LLM Client
|
|
3
|
+
*
|
|
4
|
+
* Single interface for all LLM providers with fallback and rate limiting
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* Redact sensitive fields before passing an object to a logger.
|
|
8
|
+
* Replaces API keys, tokens, and authorization headers with `[REDACTED]`.
|
|
9
|
+
* Recurses into nested plain objects; leaves arrays and primitives as-is.
|
|
10
|
+
*/
|
|
11
|
+
export declare function redactSensitiveFields(obj: Record<string, unknown>): Record<string, unknown>;
|
|
12
|
+
import type { Database } from '@revealui/db/client';
|
|
13
|
+
import type { AuditStore } from '../audit/store.js';
|
|
14
|
+
import type { ProviderHealthMonitor } from './provider-health.js';
|
|
15
|
+
import type { Embedding, LLMChatOptions, LLMChunk, LLMEmbedOptions, LLMResponse, LLMStreamOptions, Message } from './providers/base.js';
|
|
16
|
+
import { type CacheStats, type ResponseCacheOptions } from './response-cache.js';
|
|
17
|
+
import { type SemanticCacheOptions, type SemanticCacheStats } from './semantic-cache.js';
|
|
18
|
+
export type LLMProviderType = 'openai' | 'anthropic' | 'vultr' | 'groq' | 'ollama' | 'huggingface';
|
|
19
|
+
export interface LLMClientConfig {
|
|
20
|
+
provider: LLMProviderType;
|
|
21
|
+
apiKey: string;
|
|
22
|
+
/**
|
|
23
|
+
* Dynamic API key resolver — called before every LLM request.
|
|
24
|
+
* When set, the resolved key replaces `apiKey` on each call.
|
|
25
|
+
* Use this for OAuth tokens or any credential that expires between requests.
|
|
26
|
+
*/
|
|
27
|
+
apiKeyFn?: () => Promise<string>;
|
|
28
|
+
baseURL?: string;
|
|
29
|
+
model?: string;
|
|
30
|
+
temperature?: number;
|
|
31
|
+
maxTokens?: number;
|
|
32
|
+
fallbackProvider?: LLMProviderType;
|
|
33
|
+
rateLimit?: {
|
|
34
|
+
requestsPerMinute?: number;
|
|
35
|
+
requestsPerDay?: number;
|
|
36
|
+
};
|
|
37
|
+
/** Enable Anthropic prompt caching by default (90% cost reduction on cache hits) */
|
|
38
|
+
enableCacheByDefault?: boolean;
|
|
39
|
+
/** Enable response caching (100% cost savings on duplicate requests) */
|
|
40
|
+
enableResponseCache?: boolean;
|
|
41
|
+
/** Response cache options */
|
|
42
|
+
responseCacheOptions?: ResponseCacheOptions;
|
|
43
|
+
/** Enable semantic caching (73% cost reduction, 65% hit rate) */
|
|
44
|
+
enableSemanticCache?: boolean;
|
|
45
|
+
/** Semantic cache options */
|
|
46
|
+
semanticCacheOptions?: SemanticCacheOptions;
|
|
47
|
+
/** Optional health monitor — records latency + error rate per provider */
|
|
48
|
+
healthMonitor?: ProviderHealthMonitor;
|
|
49
|
+
}
|
|
50
|
+
export declare class LLMClient {
|
|
51
|
+
private provider;
|
|
52
|
+
private fallbackProvider?;
|
|
53
|
+
private config;
|
|
54
|
+
private rateLimitState;
|
|
55
|
+
private responseCache?;
|
|
56
|
+
private semanticCache?;
|
|
57
|
+
private healthMonitor?;
|
|
58
|
+
/** Tracks the last resolved API key so we only recreate the provider when it changes */
|
|
59
|
+
private currentApiKey;
|
|
60
|
+
constructor(config: LLMClientConfig);
|
|
61
|
+
private createProvider;
|
|
62
|
+
/**
|
|
63
|
+
* Re-resolve the API key via apiKeyFn (if configured) and recreate the provider
|
|
64
|
+
* when the key has changed. No-op if apiKeyFn is not set.
|
|
65
|
+
*/
|
|
66
|
+
private refreshProviderIfNeeded;
|
|
67
|
+
private checkRateLimit;
|
|
68
|
+
private recordRequest;
|
|
69
|
+
chat(messages: Message[], options?: LLMChatOptions): Promise<LLMResponse>;
|
|
70
|
+
embed(text: string | string[], options?: LLMEmbedOptions): Promise<Embedding | Embedding[]>;
|
|
71
|
+
stream(messages: Message[], options?: LLMStreamOptions): AsyncIterable<LLMChunk>;
|
|
72
|
+
/**
|
|
73
|
+
* Estimate token count and cost for a set of messages using the configured model.
|
|
74
|
+
* Uses a heuristic (~4 chars/token). Useful for pre-flight cost checks.
|
|
75
|
+
*/
|
|
76
|
+
estimateRequest(messages: Message[]): {
|
|
77
|
+
tokens: number;
|
|
78
|
+
estimatedCostUsd: number;
|
|
79
|
+
};
|
|
80
|
+
/**
|
|
81
|
+
* Get the provider health monitor if one was configured.
|
|
82
|
+
*/
|
|
83
|
+
getHealthMonitor(): ProviderHealthMonitor | undefined;
|
|
84
|
+
/**
|
|
85
|
+
* Get response cache statistics
|
|
86
|
+
*
|
|
87
|
+
* @returns Cache stats or undefined if caching is disabled
|
|
88
|
+
*/
|
|
89
|
+
getResponseCacheStats(): CacheStats | undefined;
|
|
90
|
+
/**
|
|
91
|
+
* Clear response cache
|
|
92
|
+
*/
|
|
93
|
+
clearResponseCache(): void;
|
|
94
|
+
/**
|
|
95
|
+
* Get semantic cache statistics
|
|
96
|
+
*
|
|
97
|
+
* @returns Semantic cache stats or undefined if caching is disabled
|
|
98
|
+
*/
|
|
99
|
+
getSemanticCacheStats(): SemanticCacheStats | undefined;
|
|
100
|
+
/**
|
|
101
|
+
* Clear semantic cache
|
|
102
|
+
*/
|
|
103
|
+
clearSemanticCache(): void;
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Create an LLM client from environment variables.
|
|
107
|
+
*
|
|
108
|
+
* When LLM_PROVIDER is not set, auto-detects the provider by checking env vars
|
|
109
|
+
* in priority order: GROQ_API_KEY → OLLAMA_BASE_URL → ANTHROPIC_API_KEY.
|
|
110
|
+
*
|
|
111
|
+
* GROQ and Ollama are preferred — they are free-tier and BYOK-friendly.
|
|
112
|
+
* OpenAI is not in the auto-detection chain (no revenue yet — see LLM provider policy).
|
|
113
|
+
* To use OpenAI, set LLM_PROVIDER=openai explicitly.
|
|
114
|
+
*
|
|
115
|
+
* Provider defaults:
|
|
116
|
+
* groq → llama-3.3-70b-versatile
|
|
117
|
+
* ollama → llama3.2:3b
|
|
118
|
+
*/
|
|
119
|
+
export declare function createLLMClientFromEnv(): LLMClient;
|
|
120
|
+
/**
|
|
121
|
+
* Create an LLM client using a user's stored BYOK API key.
|
|
122
|
+
*
|
|
123
|
+
* Looks up the user's preferred provider from `tenant_provider_configs`
|
|
124
|
+
* (falling back to the first key in `user_api_keys`), decrypts the key
|
|
125
|
+
* with AES-256-GCM, and returns a configured LLMClient.
|
|
126
|
+
*
|
|
127
|
+
* Returns `null` if the user has no stored keys (callers should fall back
|
|
128
|
+
* to `createLLMClientFromEnv()` or return a 402/feature-unavailable error).
|
|
129
|
+
*
|
|
130
|
+
* @param userId - The user's ID from the `users` table
|
|
131
|
+
* @param db - A Drizzle NeonDB client instance
|
|
132
|
+
*/
|
|
133
|
+
export declare function createLLMClientForUser(userId: string, db: Database, auditStore?: AuditStore): Promise<LLMClient | null>;
|
|
134
|
+
//# sourceMappingURL=client.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../src/llm/client.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAoBH;;;;GAIG;AACH,wBAAgB,qBAAqB,CAAC,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAY3F;AAID,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,qBAAqB,CAAA;AAInD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAA;AACnD,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,sBAAsB,CAAA;AAEjE,OAAO,KAAK,EACV,SAAS,EACT,cAAc,EACd,QAAQ,EACR,eAAe,EAEf,WAAW,EACX,gBAAgB,EAChB,OAAO,EACR,MAAM,qBAAqB,CAAA;AAK5B,OAAO,EAAE,KAAK,UAAU,EAAiB,KAAK,oBAAoB,EAAE,MAAM,qBAAqB,CAAA;AAC/F,OAAO,EAEL,KAAK,oBAAoB,EACzB,KAAK,kBAAkB,EACxB,MAAM,qBAAqB,CAAA;AAG5B,MAAM,MAAM,eAAe,GAAG,QAAQ,GAAG,WAAW,GAAG,OAAO,GAAG,MAAM,GAAG,QAAQ,GAAG,aAAa,CAAA;AAElG,MAAM,WAAW,eAAe;IAC9B,QAAQ,EAAE,eAAe,CAAA;IACzB,MAAM,EAAE,MAAM,CAAA;IACd;;;;OAIG;IACH,QAAQ,CAAC,EAAE,MAAM,OAAO,CAAC,MAAM,CAAC,CAAA;IAChC,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,gBAAgB,CAAC,EAAE,eAAe,CAAA;IAClC,SAAS,CAAC,EAAE;QACV,iBAAiB,CAAC,EAAE,MAAM,CAAA;QAC1B,cAAc,CAAC,EAAE,MAAM,CAAA;KACxB,CAAA;IACD,oFAAoF;IACpF,oBAAoB,CAAC,EAAE,OAAO,CAAA;IAC9B,wEAAwE;IACxE,mBAAmB,CAAC,EAAE,OAAO,CAAA;IAC7B,6BAA6B;IAC7B,oBAAoB,CAAC,EAAE,oBAAoB,CAAA;IAC3C,iEAAiE;IACjE,mBAAmB,CAAC,EAAE,OAAO,CAAA;IAC7B,6BAA6B;IAC7B,oBAAoB,CAAC,EAAE,oBAAoB,CAAA;IAC3C,0EAA0E;IAC1E,aAAa,CAAC,EAAE,qBAAqB,CAAA;CACtC;AAQD,qBAAa,SAAS;IACpB,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,gBAAgB,CAAC,CAAa;IACtC,OAAO,CAAC,MAAM,CAAiB;IAC/B,OAAO,CAAC,cAAc,CAAgB;IACtC,OAAO,CAAC,aAAa,CAAC,CAAe;IACrC,OAAO,CAAC,aAAa,CAAC,CAAe;IACrC,OAAO,CAAC,aAAa,CAAC,CAAuB;IAC7C,wFAAwF;IACxF,OAAO,CAAC,aAAa,CAAQ;gBAEjB,MAAM,EAAE,eAAe;IA2CnC,OAAO,CAAC,cAAc;IA4BtB;;;OAGG;YACW,uBAAuB;IAmBrC,OAAO,CAAC,cAAc;IAoCtB,OAAO,CAAC,aAAa;IAMf,IAAI,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,OAAO,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,WAAW,CAAC;IAmHzE,KAAK,CACT,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,EACvB,OAAO,CAAC,EAAE,eAAe,GACxB,OAAO,CAAC,SAAS,GAAG,SAAS,EAAE,CAAC;IAwB5B,MAAM,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,OAAO,CAAC,EAAE,gBAAgB,GAAG,aAAa,CAAC,QAAQ,CAAC;IA0BvF;;;OAGG;IACH,eAAe,CAAC,QAAQ,EAAE,OAAO,EAAE,GAAG;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,gBAAgB,EAAE,MAAM,CAAA;KAAE;IAIlF;;OAEG;IACH,gBAAgB,IAAI,qBAAqB,GAAG,SAAS;IAIrD;;;;OAIG;IACH,qBAAqB,IAAI,UAAU,GAAG,SAAS;IAI/C;;OAEG;IACH,kBAAkB,IAAI,IAAI;IAI1B;;;;OAIG;IACH,qBAAqB,IAAI,kBAAkB,GAAG,SAAS;IAIvD;;OAEG;IACH,kBAAkB,IAAI,IAAI;CAG3B;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,sBAAsB,IAAI,SAAS,CAsElD;AAED;;;;;;;;;;;;GAYG;AACH,wBAAsB,sBAAsB,CAC1C,MAAM,EAAE,MAAM,EACd,EAAE,EAAE,QAAQ,EACZ,UAAU,CAAC,EAAE,UAAU,GACtB,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC,CAiD3B"}
|