@revealui/ai 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/LICENSE.commercial +112 -0
- package/README.md +314 -0
- package/dist/a2a/card.d.ts +26 -0
- package/dist/a2a/card.d.ts.map +1 -0
- package/dist/a2a/card.js +173 -0
- package/dist/a2a/handler.d.ts +26 -0
- package/dist/a2a/handler.d.ts.map +1 -0
- package/dist/a2a/handler.js +170 -0
- package/dist/a2a/index.d.ts +10 -0
- package/dist/a2a/index.d.ts.map +1 -0
- package/dist/a2a/index.js +9 -0
- package/dist/a2a/task-store.d.ts +42 -0
- package/dist/a2a/task-store.d.ts.map +1 -0
- package/dist/a2a/task-store.js +99 -0
- package/dist/audit/emitter.d.ts +34 -0
- package/dist/audit/emitter.d.ts.map +1 -0
- package/dist/audit/emitter.js +34 -0
- package/dist/audit/index.d.ts +44 -0
- package/dist/audit/index.d.ts.map +1 -0
- package/dist/audit/index.js +48 -0
- package/dist/audit/observer.d.ts +108 -0
- package/dist/audit/observer.d.ts.map +1 -0
- package/dist/audit/observer.js +271 -0
- package/dist/audit/policy.d.ts +70 -0
- package/dist/audit/policy.d.ts.map +1 -0
- package/dist/audit/policy.js +209 -0
- package/dist/audit/store.d.ts +42 -0
- package/dist/audit/store.d.ts.map +1 -0
- package/dist/audit/store.js +80 -0
- package/dist/audit/types.d.ts +169 -0
- package/dist/audit/types.d.ts.map +1 -0
- package/dist/audit/types.js +80 -0
- package/dist/client/hooks/index.d.ts +22 -0
- package/dist/client/hooks/index.d.ts.map +1 -0
- package/dist/client/hooks/index.js +21 -0
- package/dist/client/hooks/useAgentContext.d.ts +30 -0
- package/dist/client/hooks/useAgentContext.d.ts.map +1 -0
- package/dist/client/hooks/useAgentContext.js +161 -0
- package/dist/client/hooks/useAgentEvents.d.ts +126 -0
- package/dist/client/hooks/useAgentEvents.d.ts.map +1 -0
- package/dist/client/hooks/useAgentEvents.js +232 -0
- package/dist/client/hooks/useAgentStream.d.ts +44 -0
- package/dist/client/hooks/useAgentStream.d.ts.map +1 -0
- package/dist/client/hooks/useAgentStream.js +101 -0
- package/dist/client/hooks/useEpisodicMemory.d.ts +25 -0
- package/dist/client/hooks/useEpisodicMemory.d.ts.map +1 -0
- package/dist/client/hooks/useEpisodicMemory.js +174 -0
- package/dist/client/hooks/useWorkingMemory.d.ts +57 -0
- package/dist/client/hooks/useWorkingMemory.d.ts.map +1 -0
- package/dist/client/hooks/useWorkingMemory.js +276 -0
- package/dist/client/index.d.ts +14 -0
- package/dist/client/index.d.ts.map +1 -0
- package/dist/client/index.js +13 -0
- package/dist/embeddings/index.d.ts +51 -0
- package/dist/embeddings/index.d.ts.map +1 -0
- package/dist/embeddings/index.js +73 -0
- package/dist/index.d.ts +83 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +103 -0
- package/dist/inference/context-assembly.d.ts +27 -0
- package/dist/inference/context-assembly.d.ts.map +1 -0
- package/dist/inference/context-assembly.js +81 -0
- package/dist/inference/overflow-compressor.d.ts +17 -0
- package/dist/inference/overflow-compressor.d.ts.map +1 -0
- package/dist/inference/overflow-compressor.js +40 -0
- package/dist/inference/runRag.d.ts +35 -0
- package/dist/inference/runRag.d.ts.map +1 -0
- package/dist/inference/runRag.js +53 -0
- package/dist/ingestion/bm25.d.ts +29 -0
- package/dist/ingestion/bm25.d.ts.map +1 -0
- package/dist/ingestion/bm25.js +161 -0
- package/dist/ingestion/cms-indexer.d.ts +39 -0
- package/dist/ingestion/cms-indexer.d.ts.map +1 -0
- package/dist/ingestion/cms-indexer.js +74 -0
- package/dist/ingestion/file-parsers.d.ts +51 -0
- package/dist/ingestion/file-parsers.d.ts.map +1 -0
- package/dist/ingestion/file-parsers.js +247 -0
- package/dist/ingestion/hybrid-search.d.ts +22 -0
- package/dist/ingestion/hybrid-search.d.ts.map +1 -0
- package/dist/ingestion/hybrid-search.js +63 -0
- package/dist/ingestion/index.d.ts +9 -0
- package/dist/ingestion/index.d.ts.map +1 -0
- package/dist/ingestion/index.js +8 -0
- package/dist/ingestion/pipeline.d.ts +35 -0
- package/dist/ingestion/pipeline.d.ts.map +1 -0
- package/dist/ingestion/pipeline.js +114 -0
- package/dist/ingestion/rag-vector-service.d.ts +34 -0
- package/dist/ingestion/rag-vector-service.d.ts.map +1 -0
- package/dist/ingestion/rag-vector-service.js +98 -0
- package/dist/ingestion/reranker.d.ts +10 -0
- package/dist/ingestion/reranker.d.ts.map +1 -0
- package/dist/ingestion/reranker.js +41 -0
- package/dist/ingestion/text-splitter.d.ts +25 -0
- package/dist/ingestion/text-splitter.d.ts.map +1 -0
- package/dist/ingestion/text-splitter.js +119 -0
- package/dist/llm/cache-utils.d.ts +146 -0
- package/dist/llm/cache-utils.d.ts.map +1 -0
- package/dist/llm/cache-utils.js +204 -0
- package/dist/llm/client.d.ts +134 -0
- package/dist/llm/client.d.ts.map +1 -0
- package/dist/llm/client.js +497 -0
- package/dist/llm/key-validator.d.ts +25 -0
- package/dist/llm/key-validator.d.ts.map +1 -0
- package/dist/llm/key-validator.js +101 -0
- package/dist/llm/provider-health.d.ts +40 -0
- package/dist/llm/provider-health.d.ts.map +1 -0
- package/dist/llm/provider-health.js +97 -0
- package/dist/llm/providers/anthropic.d.ts +31 -0
- package/dist/llm/providers/anthropic.d.ts.map +1 -0
- package/dist/llm/providers/anthropic.js +248 -0
- package/dist/llm/providers/base.d.ts +111 -0
- package/dist/llm/providers/base.d.ts.map +1 -0
- package/dist/llm/providers/base.js +6 -0
- package/dist/llm/providers/groq.d.ts +23 -0
- package/dist/llm/providers/groq.d.ts.map +1 -0
- package/dist/llm/providers/groq.js +27 -0
- package/dist/llm/providers/ollama.d.ts +27 -0
- package/dist/llm/providers/ollama.d.ts.map +1 -0
- package/dist/llm/providers/ollama.js +48 -0
- package/dist/llm/providers/openai.d.ts +19 -0
- package/dist/llm/providers/openai.d.ts.map +1 -0
- package/dist/llm/providers/openai.js +245 -0
- package/dist/llm/providers/vultr.d.ts +18 -0
- package/dist/llm/providers/vultr.d.ts.map +1 -0
- package/dist/llm/providers/vultr.js +168 -0
- package/dist/llm/response-cache.d.ts +166 -0
- package/dist/llm/response-cache.d.ts.map +1 -0
- package/dist/llm/response-cache.js +233 -0
- package/dist/llm/semantic-cache.d.ts +179 -0
- package/dist/llm/semantic-cache.d.ts.map +1 -0
- package/dist/llm/semantic-cache.js +306 -0
- package/dist/llm/server.d.ts +14 -0
- package/dist/llm/server.d.ts.map +1 -0
- package/dist/llm/server.js +15 -0
- package/dist/llm/token-counter.d.ts +48 -0
- package/dist/llm/token-counter.d.ts.map +1 -0
- package/dist/llm/token-counter.js +77 -0
- package/dist/llm/workspace-provider-config.d.ts +38 -0
- package/dist/llm/workspace-provider-config.d.ts.map +1 -0
- package/dist/llm/workspace-provider-config.js +47 -0
- package/dist/memory/agent/context-manager.d.ts +148 -0
- package/dist/memory/agent/context-manager.d.ts.map +1 -0
- package/dist/memory/agent/context-manager.js +284 -0
- package/dist/memory/agent/index.d.ts +7 -0
- package/dist/memory/agent/index.d.ts.map +1 -0
- package/dist/memory/agent/index.js +6 -0
- package/dist/memory/crdt/index.d.ts +13 -0
- package/dist/memory/crdt/index.d.ts.map +1 -0
- package/dist/memory/crdt/index.js +12 -0
- package/dist/memory/crdt/lww-register.d.ts +108 -0
- package/dist/memory/crdt/lww-register.d.ts.map +1 -0
- package/dist/memory/crdt/lww-register.js +169 -0
- package/dist/memory/crdt/or-set.d.ts +141 -0
- package/dist/memory/crdt/or-set.d.ts.map +1 -0
- package/dist/memory/crdt/or-set.js +291 -0
- package/dist/memory/crdt/pn-counter.d.ts +116 -0
- package/dist/memory/crdt/pn-counter.d.ts.map +1 -0
- package/dist/memory/crdt/pn-counter.js +174 -0
- package/dist/memory/crdt/vector-clock.d.ts +115 -0
- package/dist/memory/crdt/vector-clock.d.ts.map +1 -0
- package/dist/memory/crdt/vector-clock.js +179 -0
- package/dist/memory/errors/index.d.ts +56 -0
- package/dist/memory/errors/index.d.ts.map +1 -0
- package/dist/memory/errors/index.js +85 -0
- package/dist/memory/index.d.ts +21 -0
- package/dist/memory/index.d.ts.map +1 -0
- package/dist/memory/index.js +20 -0
- package/dist/memory/persistence/crdt-persistence.d.ts +85 -0
- package/dist/memory/persistence/crdt-persistence.d.ts.map +1 -0
- package/dist/memory/persistence/crdt-persistence.js +204 -0
- package/dist/memory/persistence/index.d.ts +7 -0
- package/dist/memory/persistence/index.d.ts.map +1 -0
- package/dist/memory/persistence/index.js +6 -0
- package/dist/memory/preferences/index.d.ts +7 -0
- package/dist/memory/preferences/index.d.ts.map +1 -0
- package/dist/memory/preferences/index.js +6 -0
- package/dist/memory/preferences/user-preferences-manager.d.ts +133 -0
- package/dist/memory/preferences/user-preferences-manager.d.ts.map +1 -0
- package/dist/memory/preferences/user-preferences-manager.js +342 -0
- package/dist/memory/services/index.d.ts +8 -0
- package/dist/memory/services/index.d.ts.map +1 -0
- package/dist/memory/services/index.js +6 -0
- package/dist/memory/services/node-id-service.d.ts +75 -0
- package/dist/memory/services/node-id-service.d.ts.map +1 -0
- package/dist/memory/services/node-id-service.js +190 -0
- package/dist/memory/stores/episodic-memory.d.ts +182 -0
- package/dist/memory/stores/episodic-memory.d.ts.map +1 -0
- package/dist/memory/stores/episodic-memory.js +378 -0
- package/dist/memory/stores/index.d.ts +16 -0
- package/dist/memory/stores/index.d.ts.map +1 -0
- package/dist/memory/stores/index.js +15 -0
- package/dist/memory/stores/procedural-memory.d.ts +89 -0
- package/dist/memory/stores/procedural-memory.d.ts.map +1 -0
- package/dist/memory/stores/procedural-memory.js +152 -0
- package/dist/memory/stores/semantic-memory.d.ts +92 -0
- package/dist/memory/stores/semantic-memory.d.ts.map +1 -0
- package/dist/memory/stores/semantic-memory.js +155 -0
- package/dist/memory/stores/working-memory.d.ts +225 -0
- package/dist/memory/stores/working-memory.d.ts.map +1 -0
- package/dist/memory/stores/working-memory.js +336 -0
- package/dist/memory/utils/deep-clone.d.ts +10 -0
- package/dist/memory/utils/deep-clone.d.ts.map +1 -0
- package/dist/memory/utils/deep-clone.js +9 -0
- package/dist/memory/utils/index.d.ts +8 -0
- package/dist/memory/utils/index.d.ts.map +1 -0
- package/dist/memory/utils/index.js +7 -0
- package/dist/memory/utils/logger.d.ts +21 -0
- package/dist/memory/utils/logger.d.ts.map +1 -0
- package/dist/memory/utils/logger.js +62 -0
- package/dist/memory/utils/sql-helpers.d.ts +97 -0
- package/dist/memory/utils/sql-helpers.d.ts.map +1 -0
- package/dist/memory/utils/sql-helpers.js +214 -0
- package/dist/memory/utils/validation.d.ts +62 -0
- package/dist/memory/utils/validation.d.ts.map +1 -0
- package/dist/memory/utils/validation.js +244 -0
- package/dist/memory/vector/index.d.ts +12 -0
- package/dist/memory/vector/index.d.ts.map +1 -0
- package/dist/memory/vector/index.js +14 -0
- package/dist/memory/vector/vector-memory-service.d.ts +88 -0
- package/dist/memory/vector/vector-memory-service.d.ts.map +1 -0
- package/dist/memory/vector/vector-memory-service.js +335 -0
- package/dist/observability/logger.d.ts +79 -0
- package/dist/observability/logger.d.ts.map +1 -0
- package/dist/observability/logger.js +165 -0
- package/dist/observability/metrics.d.ts +43 -0
- package/dist/observability/metrics.d.ts.map +1 -0
- package/dist/observability/metrics.js +197 -0
- package/dist/observability/query.d.ts +150 -0
- package/dist/observability/query.d.ts.map +1 -0
- package/dist/observability/query.js +339 -0
- package/dist/observability/types.d.ts +140 -0
- package/dist/observability/types.d.ts.map +1 -0
- package/dist/observability/types.js +6 -0
- package/dist/orchestration/agent.d.ts +98 -0
- package/dist/orchestration/agent.d.ts.map +1 -0
- package/dist/orchestration/agent.js +6 -0
- package/dist/orchestration/defaults.d.ts +21 -0
- package/dist/orchestration/defaults.d.ts.map +1 -0
- package/dist/orchestration/defaults.js +22 -0
- package/dist/orchestration/memory-integration.d.ts +58 -0
- package/dist/orchestration/memory-integration.d.ts.map +1 -0
- package/dist/orchestration/memory-integration.js +130 -0
- package/dist/orchestration/orchestrator.d.ts +67 -0
- package/dist/orchestration/orchestrator.d.ts.map +1 -0
- package/dist/orchestration/orchestrator.js +174 -0
- package/dist/orchestration/runtime.d.ts +82 -0
- package/dist/orchestration/runtime.d.ts.map +1 -0
- package/dist/orchestration/runtime.js +251 -0
- package/dist/orchestration/streaming-runtime.d.ts +36 -0
- package/dist/orchestration/streaming-runtime.d.ts.map +1 -0
- package/dist/orchestration/streaming-runtime.js +175 -0
- package/dist/orchestration/ticket-agent.d.ts +70 -0
- package/dist/orchestration/ticket-agent.d.ts.map +1 -0
- package/dist/orchestration/ticket-agent.js +146 -0
- package/dist/skills/activation/index.d.ts +7 -0
- package/dist/skills/activation/index.d.ts.map +1 -0
- package/dist/skills/activation/index.js +6 -0
- package/dist/skills/activation/skill-activator.d.ts +68 -0
- package/dist/skills/activation/skill-activator.d.ts.map +1 -0
- package/dist/skills/activation/skill-activator.js +224 -0
- package/dist/skills/catalog/catalog-search.d.ts +55 -0
- package/dist/skills/catalog/catalog-search.d.ts.map +1 -0
- package/dist/skills/catalog/catalog-search.js +111 -0
- package/dist/skills/catalog/catalog-types.d.ts +81 -0
- package/dist/skills/catalog/catalog-types.d.ts.map +1 -0
- package/dist/skills/catalog/catalog-types.js +66 -0
- package/dist/skills/catalog/index.d.ts +9 -0
- package/dist/skills/catalog/index.d.ts.map +1 -0
- package/dist/skills/catalog/index.js +7 -0
- package/dist/skills/catalog/vercel-catalog.d.ts +42 -0
- package/dist/skills/catalog/vercel-catalog.d.ts.map +1 -0
- package/dist/skills/catalog/vercel-catalog.js +189 -0
- package/dist/skills/compat/index.d.ts +9 -0
- package/dist/skills/compat/index.d.ts.map +1 -0
- package/dist/skills/compat/index.js +8 -0
- package/dist/skills/compat/skill-enhancer.d.ts +37 -0
- package/dist/skills/compat/skill-enhancer.d.ts.map +1 -0
- package/dist/skills/compat/skill-enhancer.js +76 -0
- package/dist/skills/compat/tool-mapper.d.ts +61 -0
- package/dist/skills/compat/tool-mapper.d.ts.map +1 -0
- package/dist/skills/compat/tool-mapper.js +168 -0
- package/dist/skills/compat/vercel-compat.d.ts +33 -0
- package/dist/skills/compat/vercel-compat.d.ts.map +1 -0
- package/dist/skills/compat/vercel-compat.js +132 -0
- package/dist/skills/index.d.ts +40 -0
- package/dist/skills/index.d.ts.map +1 -0
- package/dist/skills/index.js +47 -0
- package/dist/skills/integration/agent-skill-provider.d.ts +94 -0
- package/dist/skills/integration/agent-skill-provider.d.ts.map +1 -0
- package/dist/skills/integration/agent-skill-provider.js +161 -0
- package/dist/skills/integration/index.d.ts +7 -0
- package/dist/skills/integration/index.d.ts.map +1 -0
- package/dist/skills/integration/index.js +6 -0
- package/dist/skills/loader/github-loader.d.ts +61 -0
- package/dist/skills/loader/github-loader.d.ts.map +1 -0
- package/dist/skills/loader/github-loader.js +176 -0
- package/dist/skills/loader/index.d.ts +10 -0
- package/dist/skills/loader/index.d.ts.map +1 -0
- package/dist/skills/loader/index.js +9 -0
- package/dist/skills/loader/local-loader.d.ts +56 -0
- package/dist/skills/loader/local-loader.d.ts.map +1 -0
- package/dist/skills/loader/local-loader.js +186 -0
- package/dist/skills/loader/vercel-loader.d.ts +64 -0
- package/dist/skills/loader/vercel-loader.d.ts.map +1 -0
- package/dist/skills/loader/vercel-loader.js +313 -0
- package/dist/skills/loader/vercel-types.d.ts +64 -0
- package/dist/skills/loader/vercel-types.d.ts.map +1 -0
- package/dist/skills/loader/vercel-types.js +55 -0
- package/dist/skills/parser/index.d.ts +7 -0
- package/dist/skills/parser/index.d.ts.map +1 -0
- package/dist/skills/parser/index.js +6 -0
- package/dist/skills/parser/skill-md-parser.d.ts +64 -0
- package/dist/skills/parser/skill-md-parser.d.ts.map +1 -0
- package/dist/skills/parser/skill-md-parser.js +242 -0
- package/dist/skills/registry/index.d.ts +7 -0
- package/dist/skills/registry/index.d.ts.map +1 -0
- package/dist/skills/registry/index.js +6 -0
- package/dist/skills/registry/skill-registry.d.ts +133 -0
- package/dist/skills/registry/skill-registry.d.ts.map +1 -0
- package/dist/skills/registry/skill-registry.js +373 -0
- package/dist/skills/types.d.ts +216 -0
- package/dist/skills/types.d.ts.map +1 -0
- package/dist/skills/types.js +176 -0
- package/dist/templates/agent-spec.d.ts +138 -0
- package/dist/templates/agent-spec.d.ts.map +1 -0
- package/dist/templates/agent-spec.js +138 -0
- package/dist/templates/index.d.ts +56 -0
- package/dist/templates/index.d.ts.map +1 -0
- package/dist/templates/index.js +58 -0
- package/dist/templates/prompt-spec.d.ts +140 -0
- package/dist/templates/prompt-spec.d.ts.map +1 -0
- package/dist/templates/prompt-spec.js +210 -0
- package/dist/templates/skill-spec.d.ts +106 -0
- package/dist/templates/skill-spec.d.ts.map +1 -0
- package/dist/templates/skill-spec.js +119 -0
- package/dist/tools/base.d.ts +74 -0
- package/dist/tools/base.d.ts.map +1 -0
- package/dist/tools/base.js +6 -0
- package/dist/tools/cms/collection-tools.d.ts +36 -0
- package/dist/tools/cms/collection-tools.d.ts.map +1 -0
- package/dist/tools/cms/collection-tools.js +178 -0
- package/dist/tools/cms/factory.d.ts +89 -0
- package/dist/tools/cms/factory.d.ts.map +1 -0
- package/dist/tools/cms/factory.js +462 -0
- package/dist/tools/cms/global-tools.d.ts +21 -0
- package/dist/tools/cms/global-tools.d.ts.map +1 -0
- package/dist/tools/cms/global-tools.js +92 -0
- package/dist/tools/cms/index.d.ts +11 -0
- package/dist/tools/cms/index.d.ts.map +1 -0
- package/dist/tools/cms/index.js +11 -0
- package/dist/tools/cms/media-tools.d.ts +31 -0
- package/dist/tools/cms/media-tools.d.ts.map +1 -0
- package/dist/tools/cms/media-tools.js +140 -0
- package/dist/tools/cms/user-tools.d.ts +31 -0
- package/dist/tools/cms/user-tools.d.ts.map +1 -0
- package/dist/tools/cms/user-tools.js +135 -0
- package/dist/tools/deduplicator.d.ts +19 -0
- package/dist/tools/deduplicator.d.ts.map +1 -0
- package/dist/tools/deduplicator.js +53 -0
- package/dist/tools/document-summarizer.d.ts +11 -0
- package/dist/tools/document-summarizer.d.ts.map +1 -0
- package/dist/tools/document-summarizer.js +82 -0
- package/dist/tools/mcp-adapter.d.ts +66 -0
- package/dist/tools/mcp-adapter.d.ts.map +1 -0
- package/dist/tools/mcp-adapter.js +152 -0
- package/dist/tools/memory/index.d.ts +3 -0
- package/dist/tools/memory/index.d.ts.map +1 -0
- package/dist/tools/memory/index.js +1 -0
- package/dist/tools/memory/store-memory.d.ts +39 -0
- package/dist/tools/memory/store-memory.d.ts.map +1 -0
- package/dist/tools/memory/store-memory.js +94 -0
- package/dist/tools/registry.d.ts +14 -0
- package/dist/tools/registry.d.ts.map +1 -0
- package/dist/tools/registry.js +48 -0
- package/dist/tools/ticket-tools.d.ts +31 -0
- package/dist/tools/ticket-tools.d.ts.map +1 -0
- package/dist/tools/ticket-tools.js +74 -0
- package/dist/tools/web/duck-duck-go.d.ts +52 -0
- package/dist/tools/web/duck-duck-go.d.ts.map +1 -0
- package/dist/tools/web/duck-duck-go.js +202 -0
- package/dist/tools/web/exa.d.ts +34 -0
- package/dist/tools/web/exa.d.ts.map +1 -0
- package/dist/tools/web/exa.js +80 -0
- package/dist/tools/web/index.d.ts +6 -0
- package/dist/tools/web/index.d.ts.map +1 -0
- package/dist/tools/web/index.js +4 -0
- package/dist/tools/web/scraper.d.ts +9 -0
- package/dist/tools/web/scraper.d.ts.map +1 -0
- package/dist/tools/web/scraper.js +118 -0
- package/dist/tools/web/tavily.d.ts +32 -0
- package/dist/tools/web/tavily.d.ts.map +1 -0
- package/dist/tools/web/tavily.js +73 -0
- package/dist/tools/web/types.d.ts +31 -0
- package/dist/tools/web/types.d.ts.map +1 -0
- package/dist/tools/web/types.js +9 -0
- package/package.json +143 -0
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic Cache for LLM Responses
|
|
3
|
+
*
|
|
4
|
+
* Caches responses based on semantic similarity rather than exact matches.
|
|
5
|
+
* Provides 73% cost reduction vs 20% from exact match caching.
|
|
6
|
+
*
|
|
7
|
+
* Key Features:
|
|
8
|
+
* - Vector-based similarity search (cosine similarity)
|
|
9
|
+
* - Configurable similarity threshold (default: 0.95)
|
|
10
|
+
* - Automatic cache warming
|
|
11
|
+
* - TTL-based expiration
|
|
12
|
+
* - Hit/miss statistics
|
|
13
|
+
*
|
|
14
|
+
* Real-world impact:
|
|
15
|
+
* - 73% cost reduction (vs 20% for exact matches)
|
|
16
|
+
* - 96.9% latency reduction on cache hits
|
|
17
|
+
* - 65% cache hit rate (vs 18% for exact matches)
|
|
18
|
+
*
|
|
19
|
+
* Examples of semantic matches:
|
|
20
|
+
* - "How do I reset my password?" ✅ cached
|
|
21
|
+
* - "What's the process to reset my password?" ✅ also cached (same meaning)
|
|
22
|
+
* - "Help me reset my password" ✅ also cached (same meaning)
|
|
23
|
+
*
|
|
24
|
+
* @see https://redis.io/blog/what-is-semantic-caching/
|
|
25
|
+
*/
|
|
26
|
+
import { generateEmbedding } from '../embeddings/index.js';
|
|
27
|
+
import { createLogger } from '../memory/utils/logger.js';
|
|
28
|
+
import { VectorMemoryService } from '../memory/vector/vector-memory-service.js';
|
|
29
|
+
/**
|
|
30
|
+
* Semantic cache that uses vector similarity for intelligent caching
|
|
31
|
+
*
|
|
32
|
+
* @example
|
|
33
|
+
* ```typescript
|
|
34
|
+
* const cache = new SemanticCache({ similarityThreshold: 0.95 })
|
|
35
|
+
*
|
|
36
|
+
* // Check cache
|
|
37
|
+
* const cached = await cache.get("How do I reset my password?")
|
|
38
|
+
* if (cached) {
|
|
39
|
+
* return cached.response // 100% cost savings!
|
|
40
|
+
* }
|
|
41
|
+
*
|
|
42
|
+
* // Cache miss - call LLM
|
|
43
|
+
* const response = await llm.chat(messages)
|
|
44
|
+
*
|
|
45
|
+
* // Store in cache
|
|
46
|
+
* await cache.set("How do I reset my password?", response)
|
|
47
|
+
* ```
|
|
48
|
+
*/
|
|
49
|
+
export class SemanticCache {
|
|
50
|
+
vectorService;
|
|
51
|
+
options;
|
|
52
|
+
stats;
|
|
53
|
+
logger = createLogger('[SemanticCache]');
|
|
54
|
+
constructor(options = {}) {
|
|
55
|
+
this.vectorService = new VectorMemoryService();
|
|
56
|
+
this.options = {
|
|
57
|
+
similarityThreshold: options.similarityThreshold ?? 0.95,
|
|
58
|
+
ttl: options.ttl ?? 60 * 60 * 1000, // 1 hour default
|
|
59
|
+
enableStats: options.enableStats ?? true,
|
|
60
|
+
userId: options.userId ?? 'global',
|
|
61
|
+
siteId: options.siteId ?? 'global',
|
|
62
|
+
};
|
|
63
|
+
this.stats = {
|
|
64
|
+
hits: 0,
|
|
65
|
+
misses: 0,
|
|
66
|
+
similarityScores: [],
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Get cached response for semantically similar query
|
|
71
|
+
*
|
|
72
|
+
* @param query - User query to search for
|
|
73
|
+
* @returns Cached response if similar query found, undefined otherwise
|
|
74
|
+
*/
|
|
75
|
+
async get(query) {
|
|
76
|
+
try {
|
|
77
|
+
// Generate embedding for query
|
|
78
|
+
const queryEmbedding = await generateEmbedding(query);
|
|
79
|
+
// Search for similar cached responses
|
|
80
|
+
const results = await this.vectorService.searchSimilar(queryEmbedding.vector, {
|
|
81
|
+
limit: 1,
|
|
82
|
+
threshold: this.options.similarityThreshold,
|
|
83
|
+
userId: this.options.userId,
|
|
84
|
+
siteId: this.options.siteId,
|
|
85
|
+
type: 'semantic_cache',
|
|
86
|
+
});
|
|
87
|
+
if (results.length === 0) {
|
|
88
|
+
// Cache miss
|
|
89
|
+
if (this.options.enableStats) {
|
|
90
|
+
this.stats.misses++;
|
|
91
|
+
}
|
|
92
|
+
return undefined;
|
|
93
|
+
}
|
|
94
|
+
const result = results[0];
|
|
95
|
+
if (!result) {
|
|
96
|
+
// Should not happen since we checked length, but be safe
|
|
97
|
+
if (this.options.enableStats) {
|
|
98
|
+
this.stats.misses++;
|
|
99
|
+
}
|
|
100
|
+
return undefined;
|
|
101
|
+
}
|
|
102
|
+
const memory = result.memory;
|
|
103
|
+
// Check TTL
|
|
104
|
+
const age = Date.now() - new Date(memory.createdAt).getTime();
|
|
105
|
+
if (age > this.options.ttl) {
|
|
106
|
+
// Expired - treat as miss
|
|
107
|
+
if (this.options.enableStats) {
|
|
108
|
+
this.stats.misses++;
|
|
109
|
+
}
|
|
110
|
+
return undefined;
|
|
111
|
+
}
|
|
112
|
+
// Cache hit!
|
|
113
|
+
if (this.options.enableStats) {
|
|
114
|
+
this.stats.hits++;
|
|
115
|
+
this.stats.similarityScores.push(result.similarity);
|
|
116
|
+
}
|
|
117
|
+
// Retrieve cached response from metadata.custom
|
|
118
|
+
const customData = memory.metadata?.custom;
|
|
119
|
+
if (!customData?.response) {
|
|
120
|
+
// Invalid cache entry - treat as miss
|
|
121
|
+
if (this.options.enableStats) {
|
|
122
|
+
this.stats.misses++;
|
|
123
|
+
}
|
|
124
|
+
return undefined;
|
|
125
|
+
}
|
|
126
|
+
return {
|
|
127
|
+
query: memory.content,
|
|
128
|
+
response: customData.response,
|
|
129
|
+
embedding: memory.embedding?.vector || [],
|
|
130
|
+
similarity: result.similarity,
|
|
131
|
+
timestamp: new Date(memory.createdAt).getTime(),
|
|
132
|
+
usage: customData.usage,
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
catch (error) {
|
|
136
|
+
// Fail gracefully - return undefined on error
|
|
137
|
+
this.logger.error('Semantic cache error:', error);
|
|
138
|
+
if (this.options.enableStats) {
|
|
139
|
+
this.stats.misses++;
|
|
140
|
+
}
|
|
141
|
+
return undefined;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Store response in semantic cache
|
|
146
|
+
*
|
|
147
|
+
* @param query - Original user query
|
|
148
|
+
* @param response - LLM response to cache
|
|
149
|
+
* @param usage - Optional token usage stats
|
|
150
|
+
*/
|
|
151
|
+
async set(query, response, usage) {
|
|
152
|
+
try {
|
|
153
|
+
// Generate embedding for query
|
|
154
|
+
const queryEmbedding = await generateEmbedding(query);
|
|
155
|
+
// Store in vector database
|
|
156
|
+
await this.vectorService.create({
|
|
157
|
+
version: 1,
|
|
158
|
+
content: query, // Store query as content for embedding similarity
|
|
159
|
+
embedding: queryEmbedding,
|
|
160
|
+
type: 'fact', // Cached LLM responses are facts
|
|
161
|
+
source: {
|
|
162
|
+
type: 'system',
|
|
163
|
+
id: 'semantic-cache',
|
|
164
|
+
context: 'semantic_cache',
|
|
165
|
+
confidence: 1,
|
|
166
|
+
},
|
|
167
|
+
metadata: {
|
|
168
|
+
siteId: this.options.siteId,
|
|
169
|
+
importance: 0.5,
|
|
170
|
+
tags: ['cache', 'llm_response'],
|
|
171
|
+
custom: {
|
|
172
|
+
response, // Store actual response in custom metadata
|
|
173
|
+
usage,
|
|
174
|
+
cachedAt: Date.now(),
|
|
175
|
+
cacheType: 'semantic',
|
|
176
|
+
},
|
|
177
|
+
},
|
|
178
|
+
accessCount: 0,
|
|
179
|
+
verified: false,
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
catch (error) {
|
|
183
|
+
// Fail gracefully - log error but don't throw
|
|
184
|
+
this.logger.error('Failed to store in semantic cache:', error);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Extract query text from messages
|
|
189
|
+
*
|
|
190
|
+
* Combines all user messages into a single query string for caching
|
|
191
|
+
*/
|
|
192
|
+
extractQuery(messages) {
|
|
193
|
+
return messages
|
|
194
|
+
.filter((m) => m.role === 'user')
|
|
195
|
+
.map((m) => m.content)
|
|
196
|
+
.join(' ');
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Get cache statistics
|
|
200
|
+
*/
|
|
201
|
+
getStats() {
|
|
202
|
+
const totalQueries = this.stats.hits + this.stats.misses;
|
|
203
|
+
const hitRate = totalQueries > 0 ? (this.stats.hits / totalQueries) * 100 : 0;
|
|
204
|
+
const avgSimilarity = this.stats.similarityScores.length > 0
|
|
205
|
+
? this.stats.similarityScores.reduce((a, b) => a + b, 0) /
|
|
206
|
+
this.stats.similarityScores.length
|
|
207
|
+
: 0;
|
|
208
|
+
return {
|
|
209
|
+
hits: this.stats.hits,
|
|
210
|
+
misses: this.stats.misses,
|
|
211
|
+
hitRate: Math.round(hitRate * 100) / 100,
|
|
212
|
+
avgSimilarity: Math.round(avgSimilarity * 100) / 100,
|
|
213
|
+
totalQueries,
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
/**
|
|
217
|
+
* Reset statistics
|
|
218
|
+
*/
|
|
219
|
+
resetStats() {
|
|
220
|
+
this.stats = {
|
|
221
|
+
hits: 0,
|
|
222
|
+
misses: 0,
|
|
223
|
+
similarityScores: [],
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
/**
|
|
227
|
+
* Clear expired cache entries
|
|
228
|
+
*
|
|
229
|
+
* @returns Number of entries cleared
|
|
230
|
+
*/
|
|
231
|
+
async clearExpired() {
|
|
232
|
+
// This would require a custom database query
|
|
233
|
+
// For now, expired entries are filtered in get()
|
|
234
|
+
return 0;
|
|
235
|
+
}
|
|
236
|
+
/**
|
|
237
|
+
* Warm cache with common queries
|
|
238
|
+
*
|
|
239
|
+
* Pre-populate cache with FAQ responses
|
|
240
|
+
*
|
|
241
|
+
* @example
|
|
242
|
+
* ```typescript
|
|
243
|
+
* await cache.warmCache([
|
|
244
|
+
* { query: 'How do I reset my password?', response: 'Go to...' },
|
|
245
|
+
* { query: 'What are your hours?', response: 'We are open...' },
|
|
246
|
+
* ])
|
|
247
|
+
* ```
|
|
248
|
+
*/
|
|
249
|
+
async warmCache(entries) {
|
|
250
|
+
for (const entry of entries) {
|
|
251
|
+
await this.set(entry.query, entry.response);
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
/**
|
|
256
|
+
* Calculate cost savings from semantic caching
|
|
257
|
+
*
|
|
258
|
+
* @example
|
|
259
|
+
* ```typescript
|
|
260
|
+
* const stats = cache.getStats()
|
|
261
|
+
* const savings = calculateSemanticCacheSavings(stats, {
|
|
262
|
+
* avgTokensPerQuery: 3500,
|
|
263
|
+
* costPerMTokens: 3.0,
|
|
264
|
+
* })
|
|
265
|
+
*
|
|
266
|
+
* // Saved: $X.XX, Avoided: N API calls
|
|
267
|
+
* logger.info('Cache savings', { saved: savings.totalSaved, avoided: savings.queriesAvoided })
|
|
268
|
+
* ```
|
|
269
|
+
*/
|
|
270
|
+
export function calculateSemanticCacheSavings(stats, pricing) {
|
|
271
|
+
const queriesAvoided = stats.hits;
|
|
272
|
+
const tokensAvoided = queriesAvoided * pricing.avgTokensPerQuery;
|
|
273
|
+
const totalSaved = (tokensAvoided * pricing.costPerMTokens) / 1_000_000;
|
|
274
|
+
return {
|
|
275
|
+
totalSaved,
|
|
276
|
+
queriesAvoided,
|
|
277
|
+
tokensAvoided,
|
|
278
|
+
};
|
|
279
|
+
}
|
|
280
|
+
/**
|
|
281
|
+
* Global semantic cache instance
|
|
282
|
+
* Shared across all LLM client instances in the process
|
|
283
|
+
*/
|
|
284
|
+
let globalSemanticCache = null;
|
|
285
|
+
/**
|
|
286
|
+
* Get or create global semantic cache
|
|
287
|
+
*
|
|
288
|
+
* @example
|
|
289
|
+
* ```typescript
|
|
290
|
+
* const cache = getGlobalSemanticCache({ similarityThreshold: 0.95 })
|
|
291
|
+
* const stats = cache.getStats()
|
|
292
|
+
* // Access stats.hitRate, stats.hits, stats.misses, etc.
|
|
293
|
+
* ```
|
|
294
|
+
*/
|
|
295
|
+
export function getGlobalSemanticCache(options) {
|
|
296
|
+
if (!globalSemanticCache) {
|
|
297
|
+
globalSemanticCache = new SemanticCache(options);
|
|
298
|
+
}
|
|
299
|
+
return globalSemanticCache;
|
|
300
|
+
}
|
|
301
|
+
/**
|
|
302
|
+
* Clear global semantic cache
|
|
303
|
+
*/
|
|
304
|
+
export function clearGlobalSemanticCache() {
|
|
305
|
+
globalSemanticCache = null;
|
|
306
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Server-only LLM exports
|
|
3
|
+
*
|
|
4
|
+
* This module exports only server-side LLM functionality without React hooks.
|
|
5
|
+
* Use this in API routes and server-side code.
|
|
6
|
+
*/
|
|
7
|
+
export * from './client.js';
|
|
8
|
+
export * from './providers/anthropic.js';
|
|
9
|
+
export * from './providers/base.js';
|
|
10
|
+
export * from './providers/groq.js';
|
|
11
|
+
export * from './providers/ollama.js';
|
|
12
|
+
export * from './providers/openai.js';
|
|
13
|
+
export * from './providers/vultr.js';
|
|
14
|
+
//# sourceMappingURL=server.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../../src/llm/server.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,cAAc,aAAa,CAAA;AAG3B,cAAc,0BAA0B,CAAA;AACxC,cAAc,qBAAqB,CAAA;AACnC,cAAc,qBAAqB,CAAA;AACnC,cAAc,uBAAuB,CAAA;AACrC,cAAc,uBAAuB,CAAA;AACrC,cAAc,sBAAsB,CAAA"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Server-only LLM exports
|
|
3
|
+
*
|
|
4
|
+
* This module exports only server-side LLM functionality without React hooks.
|
|
5
|
+
* Use this in API routes and server-side code.
|
|
6
|
+
*/
|
|
7
|
+
// Export LLM client and factory functions
|
|
8
|
+
export * from './client.js';
|
|
9
|
+
// Export provider implementations
|
|
10
|
+
export * from './providers/anthropic.js';
|
|
11
|
+
export * from './providers/base.js';
|
|
12
|
+
export * from './providers/groq.js';
|
|
13
|
+
export * from './providers/ollama.js';
|
|
14
|
+
export * from './providers/openai.js';
|
|
15
|
+
export * from './providers/vultr.js';
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token Counter + Cost Estimator
|
|
3
|
+
*
|
|
4
|
+
* Heuristic-based (no tiktoken dep).
|
|
5
|
+
* Anthropic/OpenAI/Groq: ~4 chars/token
|
|
6
|
+
* Ollama: ~3.5 chars/token
|
|
7
|
+
*
|
|
8
|
+
* Limitation: actual token counts differ by model tokenizer. This is
|
|
9
|
+
* accurate enough for budget tracking and context window management.
|
|
10
|
+
*/
|
|
11
|
+
import type { Message } from './providers/base.js';
|
|
12
|
+
export interface TokenCountResult {
|
|
13
|
+
tokens: number;
|
|
14
|
+
method: 'estimated';
|
|
15
|
+
}
|
|
16
|
+
export interface CostEstimate {
|
|
17
|
+
estimatedCostUsd: number;
|
|
18
|
+
tokens: number;
|
|
19
|
+
model: string;
|
|
20
|
+
direction: 'input' | 'output';
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Estimate token count for a string.
|
|
24
|
+
*/
|
|
25
|
+
export declare function countTokens(text: string, options?: {
|
|
26
|
+
model?: string;
|
|
27
|
+
}): TokenCountResult;
|
|
28
|
+
/**
|
|
29
|
+
* Estimate total token count for a messages array.
|
|
30
|
+
* Adds 4 tokens per message for role/formatting overhead.
|
|
31
|
+
*/
|
|
32
|
+
export declare function countMessages(messages: Message[], options?: {
|
|
33
|
+
model?: string;
|
|
34
|
+
}): TokenCountResult;
|
|
35
|
+
/**
|
|
36
|
+
* Estimate cost in USD for a given token count, model, and direction.
|
|
37
|
+
* Returns 0 for unknown models.
|
|
38
|
+
*/
|
|
39
|
+
export declare function estimateCost(tokens: number, model: string, direction: 'input' | 'output'): CostEstimate;
|
|
40
|
+
/**
|
|
41
|
+
* Estimate input token count and cost for a set of messages.
|
|
42
|
+
* Convenience function for pre-flight cost checking.
|
|
43
|
+
*/
|
|
44
|
+
export declare function estimateRequest(messages: Message[], model: string): {
|
|
45
|
+
tokens: number;
|
|
46
|
+
estimatedCostUsd: number;
|
|
47
|
+
};
|
|
48
|
+
//# sourceMappingURL=token-counter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"token-counter.d.ts","sourceRoot":"","sources":["../../src/llm/token-counter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,qBAAqB,CAAA;AAElD,MAAM,WAAW,gBAAgB;IAC/B,MAAM,EAAE,MAAM,CAAA;IACd,MAAM,EAAE,WAAW,CAAA;CACpB;AAED,MAAM,WAAW,YAAY;IAC3B,gBAAgB,EAAE,MAAM,CAAA;IACxB,MAAM,EAAE,MAAM,CAAA;IACd,KAAK,EAAE,MAAM,CAAA;IACb,SAAS,EAAE,OAAO,GAAG,QAAQ,CAAA;CAC9B;AA4BD;;GAEG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;IAAE,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,gBAAgB,CAGxF;AAED;;;GAGG;AACH,wBAAgB,aAAa,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,OAAO,CAAC,EAAE;IAAE,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,gBAAgB,CAQjG;AAED;;;GAGG;AACH,wBAAgB,YAAY,CAC1B,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,MAAM,EACb,SAAS,EAAE,OAAO,GAAG,QAAQ,GAC5B,YAAY,CASd;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAC7B,QAAQ,EAAE,OAAO,EAAE,EACnB,KAAK,EAAE,MAAM,GACZ;IAAE,MAAM,EAAE,MAAM,CAAC;IAAC,gBAAgB,EAAE,MAAM,CAAA;CAAE,CAI9C"}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token Counter + Cost Estimator
|
|
3
|
+
*
|
|
4
|
+
* Heuristic-based (no tiktoken dep).
|
|
5
|
+
* Anthropic/OpenAI/Groq: ~4 chars/token
|
|
6
|
+
* Ollama: ~3.5 chars/token
|
|
7
|
+
*
|
|
8
|
+
* Limitation: actual token counts differ by model tokenizer. This is
|
|
9
|
+
* accurate enough for budget tracking and context window management.
|
|
10
|
+
*/
|
|
11
|
+
// Cost per 1M tokens (USD) — input/output pricing
|
|
12
|
+
const MODEL_PRICING = {
|
|
13
|
+
// Anthropic
|
|
14
|
+
'claude-opus-4-6': { input: 15.0, output: 75.0 },
|
|
15
|
+
'claude-sonnet-4-6': { input: 3.0, output: 15.0 },
|
|
16
|
+
'claude-haiku-4-5-20251001': { input: 0.25, output: 1.25 },
|
|
17
|
+
// OpenAI
|
|
18
|
+
'gpt-4o': { input: 5.0, output: 15.0 },
|
|
19
|
+
'gpt-4o-mini': { input: 0.15, output: 0.6 },
|
|
20
|
+
// Groq (Llama)
|
|
21
|
+
'llama-3.3-70b-versatile': { input: 0.59, output: 0.79 },
|
|
22
|
+
'llama-3.1-8b-instant': { input: 0.05, output: 0.08 },
|
|
23
|
+
// Ollama (self-hosted — no cost)
|
|
24
|
+
'llama3.2:3b': { input: 0, output: 0 },
|
|
25
|
+
'llama3.2:1b': { input: 0, output: 0 },
|
|
26
|
+
'nomic-embed-text': { input: 0, output: 0 },
|
|
27
|
+
};
|
|
28
|
+
function charsPerToken(model) {
|
|
29
|
+
const lower = model.toLowerCase();
|
|
30
|
+
if (lower.includes('ollama') || lower.includes('llama') || lower.includes('nomic')) {
|
|
31
|
+
return 3.5;
|
|
32
|
+
}
|
|
33
|
+
return 4.0;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Estimate token count for a string.
|
|
37
|
+
*/
|
|
38
|
+
export function countTokens(text, options) {
|
|
39
|
+
const ratio = charsPerToken(options?.model ?? '');
|
|
40
|
+
return { tokens: Math.ceil(text.length / ratio), method: 'estimated' };
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Estimate total token count for a messages array.
|
|
44
|
+
* Adds 4 tokens per message for role/formatting overhead.
|
|
45
|
+
*/
|
|
46
|
+
export function countMessages(messages, options) {
|
|
47
|
+
let total = 0;
|
|
48
|
+
for (const msg of messages) {
|
|
49
|
+
const content = typeof msg.content === 'string' ? msg.content : '';
|
|
50
|
+
total += countTokens(content, options).tokens + 4; // overhead per message
|
|
51
|
+
}
|
|
52
|
+
total += 2; // reply primer
|
|
53
|
+
return { tokens: total, method: 'estimated' };
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Estimate cost in USD for a given token count, model, and direction.
|
|
57
|
+
* Returns 0 for unknown models.
|
|
58
|
+
*/
|
|
59
|
+
export function estimateCost(tokens, model, direction) {
|
|
60
|
+
const pricing = MODEL_PRICING[model];
|
|
61
|
+
const perMillion = pricing?.[direction] ?? 0;
|
|
62
|
+
return {
|
|
63
|
+
estimatedCostUsd: (tokens / 1_000_000) * perMillion,
|
|
64
|
+
tokens,
|
|
65
|
+
model,
|
|
66
|
+
direction,
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Estimate input token count and cost for a set of messages.
|
|
71
|
+
* Convenience function for pre-flight cost checking.
|
|
72
|
+
*/
|
|
73
|
+
export function estimateRequest(messages, model) {
|
|
74
|
+
const { tokens } = countMessages(messages, { model });
|
|
75
|
+
const { estimatedCostUsd } = estimateCost(tokens, model, 'input');
|
|
76
|
+
return { tokens, estimatedCostUsd };
|
|
77
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-Workspace LLM Provider Configuration
|
|
3
|
+
*
|
|
4
|
+
* Allows workspaces to override the default LLM provider/model.
|
|
5
|
+
* Extends the existing createLLMClientForUser() BYOK pattern.
|
|
6
|
+
*/
|
|
7
|
+
import { LLMClient, type LLMProviderType } from './client.js';
|
|
8
|
+
export interface WorkspaceProviderConfig {
|
|
9
|
+
workspaceId: string;
|
|
10
|
+
provider: LLMProviderType;
|
|
11
|
+
apiKey: string;
|
|
12
|
+
model?: string;
|
|
13
|
+
baseURL?: string;
|
|
14
|
+
temperature?: number;
|
|
15
|
+
maxTokens?: number;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* In-memory registry of workspace-level LLM provider overrides.
|
|
19
|
+
* In production, these should be loaded from the database on startup
|
|
20
|
+
* and refreshed when workspace settings change.
|
|
21
|
+
*/
|
|
22
|
+
export declare class WorkspaceProviderRegistry {
|
|
23
|
+
private configs;
|
|
24
|
+
set(config: WorkspaceProviderConfig): void;
|
|
25
|
+
get(workspaceId: string): WorkspaceProviderConfig | undefined;
|
|
26
|
+
delete(workspaceId: string): void;
|
|
27
|
+
/**
|
|
28
|
+
* Create an LLMClient for the given workspace.
|
|
29
|
+
* If no workspace-specific config exists, returns the fallback client.
|
|
30
|
+
*
|
|
31
|
+
* @param workspaceId - Workspace ID to look up
|
|
32
|
+
* @param fallback - Client to use if no workspace override is set
|
|
33
|
+
*/
|
|
34
|
+
createClientForWorkspace(workspaceId: string, fallback: LLMClient): LLMClient;
|
|
35
|
+
}
|
|
36
|
+
/** Singleton registry — use this in API route handlers */
|
|
37
|
+
export declare const workspaceProviderRegistry: WorkspaceProviderRegistry;
|
|
38
|
+
//# sourceMappingURL=workspace-provider-config.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"workspace-provider-config.d.ts","sourceRoot":"","sources":["../../src/llm/workspace-provider-config.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,SAAS,EAAwB,KAAK,eAAe,EAAE,MAAM,aAAa,CAAA;AAEnF,MAAM,WAAW,uBAAuB;IACtC,WAAW,EAAE,MAAM,CAAA;IACnB,QAAQ,EAAE,eAAe,CAAA;IACzB,MAAM,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,SAAS,CAAC,EAAE,MAAM,CAAA;CACnB;AAED;;;;GAIG;AACH,qBAAa,yBAAyB;IACpC,OAAO,CAAC,OAAO,CAAkD;IAEjE,GAAG,CAAC,MAAM,EAAE,uBAAuB,GAAG,IAAI;IAI1C,GAAG,CAAC,WAAW,EAAE,MAAM,GAAG,uBAAuB,GAAG,SAAS;IAI7D,MAAM,CAAC,WAAW,EAAE,MAAM,GAAG,IAAI;IAIjC;;;;;;OAMG;IACH,wBAAwB,CAAC,WAAW,EAAE,MAAM,EAAE,QAAQ,EAAE,SAAS,GAAG,SAAS;CAe9E;AAED,0DAA0D;AAC1D,eAAO,MAAM,yBAAyB,2BAAkC,CAAA"}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-Workspace LLM Provider Configuration
|
|
3
|
+
*
|
|
4
|
+
* Allows workspaces to override the default LLM provider/model.
|
|
5
|
+
* Extends the existing createLLMClientForUser() BYOK pattern.
|
|
6
|
+
*/
|
|
7
|
+
import { LLMClient } from './client.js';
|
|
8
|
+
/**
|
|
9
|
+
* In-memory registry of workspace-level LLM provider overrides.
|
|
10
|
+
* In production, these should be loaded from the database on startup
|
|
11
|
+
* and refreshed when workspace settings change.
|
|
12
|
+
*/
|
|
13
|
+
export class WorkspaceProviderRegistry {
|
|
14
|
+
configs = new Map();
|
|
15
|
+
set(config) {
|
|
16
|
+
this.configs.set(config.workspaceId, config);
|
|
17
|
+
}
|
|
18
|
+
get(workspaceId) {
|
|
19
|
+
return this.configs.get(workspaceId);
|
|
20
|
+
}
|
|
21
|
+
delete(workspaceId) {
|
|
22
|
+
this.configs.delete(workspaceId);
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Create an LLMClient for the given workspace.
|
|
26
|
+
* If no workspace-specific config exists, returns the fallback client.
|
|
27
|
+
*
|
|
28
|
+
* @param workspaceId - Workspace ID to look up
|
|
29
|
+
* @param fallback - Client to use if no workspace override is set
|
|
30
|
+
*/
|
|
31
|
+
createClientForWorkspace(workspaceId, fallback) {
|
|
32
|
+
const config = this.configs.get(workspaceId);
|
|
33
|
+
if (!config)
|
|
34
|
+
return fallback;
|
|
35
|
+
const clientConfig = {
|
|
36
|
+
provider: config.provider,
|
|
37
|
+
apiKey: config.apiKey,
|
|
38
|
+
model: config.model,
|
|
39
|
+
baseURL: config.baseURL,
|
|
40
|
+
temperature: config.temperature,
|
|
41
|
+
maxTokens: config.maxTokens,
|
|
42
|
+
};
|
|
43
|
+
return new LLMClient(clientConfig);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
/** Singleton registry — use this in API route handlers */
|
|
47
|
+
export const workspaceProviderRegistry = new WorkspaceProviderRegistry();
|