@gzoo/cortex 0.5.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/CLAUDE.md +180 -0
- package/CONTRIBUTING.md +52 -0
- package/LICENSE +21 -0
- package/README.md +259 -0
- package/dist/cortex-mcp.mjs +3153 -0
- package/dist/cortex.mjs +8213 -0
- package/icon.png +0 -0
- package/logo.png +0 -0
- package/package.json +86 -0
- package/packages/cli/dist/commands/config.d.ts +4 -0
- package/packages/cli/dist/commands/config.d.ts.map +1 -0
- package/packages/cli/dist/commands/config.js +419 -0
- package/packages/cli/dist/commands/config.js.map +1 -0
- package/packages/cli/dist/commands/contradictions.d.ts +3 -0
- package/packages/cli/dist/commands/contradictions.d.ts.map +1 -0
- package/packages/cli/dist/commands/contradictions.js +74 -0
- package/packages/cli/dist/commands/contradictions.js.map +1 -0
- package/packages/cli/dist/commands/costs.d.ts +3 -0
- package/packages/cli/dist/commands/costs.d.ts.map +1 -0
- package/packages/cli/dist/commands/costs.js +168 -0
- package/packages/cli/dist/commands/costs.js.map +1 -0
- package/packages/cli/dist/commands/db.d.ts +3 -0
- package/packages/cli/dist/commands/db.d.ts.map +1 -0
- package/packages/cli/dist/commands/db.js +139 -0
- package/packages/cli/dist/commands/db.js.map +1 -0
- package/packages/cli/dist/commands/find.d.ts +3 -0
- package/packages/cli/dist/commands/find.d.ts.map +1 -0
- package/packages/cli/dist/commands/find.js +139 -0
- package/packages/cli/dist/commands/find.js.map +1 -0
- package/packages/cli/dist/commands/ingest.d.ts +3 -0
- package/packages/cli/dist/commands/ingest.d.ts.map +1 -0
- package/packages/cli/dist/commands/ingest.js +179 -0
- package/packages/cli/dist/commands/ingest.js.map +1 -0
- package/packages/cli/dist/commands/init.d.ts +3 -0
- package/packages/cli/dist/commands/init.d.ts.map +1 -0
- package/packages/cli/dist/commands/init.js +285 -0
- package/packages/cli/dist/commands/init.js.map +1 -0
- package/packages/cli/dist/commands/mcp.d.ts +3 -0
- package/packages/cli/dist/commands/mcp.d.ts.map +1 -0
- package/packages/cli/dist/commands/mcp.js +65 -0
- package/packages/cli/dist/commands/mcp.js.map +1 -0
- package/packages/cli/dist/commands/models.d.ts +3 -0
- package/packages/cli/dist/commands/models.d.ts.map +1 -0
- package/packages/cli/dist/commands/models.js +245 -0
- package/packages/cli/dist/commands/models.js.map +1 -0
- package/packages/cli/dist/commands/privacy.d.ts +3 -0
- package/packages/cli/dist/commands/privacy.d.ts.map +1 -0
- package/packages/cli/dist/commands/privacy.js +140 -0
- package/packages/cli/dist/commands/privacy.js.map +1 -0
- package/packages/cli/dist/commands/projects.d.ts +3 -0
- package/packages/cli/dist/commands/projects.d.ts.map +1 -0
- package/packages/cli/dist/commands/projects.js +142 -0
- package/packages/cli/dist/commands/projects.js.map +1 -0
- package/packages/cli/dist/commands/query.d.ts +3 -0
- package/packages/cli/dist/commands/query.d.ts.map +1 -0
- package/packages/cli/dist/commands/query.js +153 -0
- package/packages/cli/dist/commands/query.js.map +1 -0
- package/packages/cli/dist/commands/report.d.ts +3 -0
- package/packages/cli/dist/commands/report.d.ts.map +1 -0
- package/packages/cli/dist/commands/report.js +144 -0
- package/packages/cli/dist/commands/report.js.map +1 -0
- package/packages/cli/dist/commands/resolve.d.ts +3 -0
- package/packages/cli/dist/commands/resolve.d.ts.map +1 -0
- package/packages/cli/dist/commands/resolve.js +119 -0
- package/packages/cli/dist/commands/resolve.js.map +1 -0
- package/packages/cli/dist/commands/serve.d.ts +3 -0
- package/packages/cli/dist/commands/serve.d.ts.map +1 -0
- package/packages/cli/dist/commands/serve.js +108 -0
- package/packages/cli/dist/commands/serve.js.map +1 -0
- package/packages/cli/dist/commands/status.d.ts +3 -0
- package/packages/cli/dist/commands/status.d.ts.map +1 -0
- package/packages/cli/dist/commands/status.js +230 -0
- package/packages/cli/dist/commands/status.js.map +1 -0
- package/packages/cli/dist/commands/stop.d.ts +5 -0
- package/packages/cli/dist/commands/stop.d.ts.map +1 -0
- package/packages/cli/dist/commands/stop.js +80 -0
- package/packages/cli/dist/commands/stop.js.map +1 -0
- package/packages/cli/dist/commands/watch.d.ts +3 -0
- package/packages/cli/dist/commands/watch.d.ts.map +1 -0
- package/packages/cli/dist/commands/watch.js +235 -0
- package/packages/cli/dist/commands/watch.js.map +1 -0
- package/packages/cli/dist/index.d.ts +9 -0
- package/packages/cli/dist/index.d.ts.map +1 -0
- package/packages/cli/dist/index.js +68 -0
- package/packages/cli/dist/index.js.map +1 -0
- package/packages/cli/package.json +37 -0
- package/packages/cli/tsconfig.json +16 -0
- package/packages/core/dist/config/loader.d.ts +11 -0
- package/packages/core/dist/config/loader.d.ts.map +1 -0
- package/packages/core/dist/config/loader.js +133 -0
- package/packages/core/dist/config/loader.js.map +1 -0
- package/packages/core/dist/config/project-registry.d.ts +71 -0
- package/packages/core/dist/config/project-registry.d.ts.map +1 -0
- package/packages/core/dist/config/project-registry.js +89 -0
- package/packages/core/dist/config/project-registry.js.map +1 -0
- package/packages/core/dist/config/schema.d.ts +909 -0
- package/packages/core/dist/config/schema.d.ts.map +1 -0
- package/packages/core/dist/config/schema.js +125 -0
- package/packages/core/dist/config/schema.js.map +1 -0
- package/packages/core/dist/errors/cortex-error.d.ts +58 -0
- package/packages/core/dist/errors/cortex-error.d.ts.map +1 -0
- package/packages/core/dist/errors/cortex-error.js +68 -0
- package/packages/core/dist/errors/cortex-error.js.map +1 -0
- package/packages/core/dist/events/event-bus.d.ts +10 -0
- package/packages/core/dist/events/event-bus.d.ts.map +1 -0
- package/packages/core/dist/events/event-bus.js +42 -0
- package/packages/core/dist/events/event-bus.js.map +1 -0
- package/packages/core/dist/index.d.ts +8 -0
- package/packages/core/dist/index.d.ts.map +1 -0
- package/packages/core/dist/index.js +22 -0
- package/packages/core/dist/index.js.map +1 -0
- package/packages/core/dist/logger.d.ts +16 -0
- package/packages/core/dist/logger.d.ts.map +1 -0
- package/packages/core/dist/logger.js +57 -0
- package/packages/core/dist/logger.js.map +1 -0
- package/packages/core/dist/types/config.d.ts +107 -0
- package/packages/core/dist/types/config.d.ts.map +1 -0
- package/packages/core/dist/types/config.js +2 -0
- package/packages/core/dist/types/config.js.map +1 -0
- package/packages/core/dist/types/entity.d.ts +35 -0
- package/packages/core/dist/types/entity.d.ts.map +1 -0
- package/packages/core/dist/types/entity.js +2 -0
- package/packages/core/dist/types/entity.js.map +1 -0
- package/packages/core/dist/types/events.d.ts +76 -0
- package/packages/core/dist/types/events.d.ts.map +1 -0
- package/packages/core/dist/types/events.js +2 -0
- package/packages/core/dist/types/events.js.map +1 -0
- package/packages/core/dist/types/file.d.ts +15 -0
- package/packages/core/dist/types/file.d.ts.map +1 -0
- package/packages/core/dist/types/file.js +2 -0
- package/packages/core/dist/types/file.js.map +1 -0
- package/packages/core/dist/types/graph.d.ts +93 -0
- package/packages/core/dist/types/graph.d.ts.map +1 -0
- package/packages/core/dist/types/graph.js +2 -0
- package/packages/core/dist/types/graph.js.map +1 -0
- package/packages/core/dist/types/index.d.ts +10 -0
- package/packages/core/dist/types/index.d.ts.map +1 -0
- package/packages/core/dist/types/index.js +2 -0
- package/packages/core/dist/types/index.js.map +1 -0
- package/packages/core/dist/types/llm.d.ts +95 -0
- package/packages/core/dist/types/llm.d.ts.map +1 -0
- package/packages/core/dist/types/llm.js +10 -0
- package/packages/core/dist/types/llm.js.map +1 -0
- package/packages/core/dist/types/project.d.ts +11 -0
- package/packages/core/dist/types/project.d.ts.map +1 -0
- package/packages/core/dist/types/project.js +2 -0
- package/packages/core/dist/types/project.js.map +1 -0
- package/packages/core/dist/types/relationship.d.ts +26 -0
- package/packages/core/dist/types/relationship.d.ts.map +1 -0
- package/packages/core/dist/types/relationship.js +2 -0
- package/packages/core/dist/types/relationship.js.map +1 -0
- package/packages/core/package.json +22 -0
- package/packages/core/tsconfig.json +9 -0
- package/packages/graph/dist/index.d.ts +4 -0
- package/packages/graph/dist/index.d.ts.map +1 -0
- package/packages/graph/dist/index.js +4 -0
- package/packages/graph/dist/index.js.map +1 -0
- package/packages/graph/dist/migrations/001-initial.d.ts +4 -0
- package/packages/graph/dist/migrations/001-initial.d.ts.map +1 -0
- package/packages/graph/dist/migrations/001-initial.js +134 -0
- package/packages/graph/dist/migrations/001-initial.js.map +1 -0
- package/packages/graph/dist/query-engine.d.ts +35 -0
- package/packages/graph/dist/query-engine.d.ts.map +1 -0
- package/packages/graph/dist/query-engine.js +185 -0
- package/packages/graph/dist/query-engine.js.map +1 -0
- package/packages/graph/dist/sqlite-store.d.ts +125 -0
- package/packages/graph/dist/sqlite-store.d.ts.map +1 -0
- package/packages/graph/dist/sqlite-store.js +632 -0
- package/packages/graph/dist/sqlite-store.js.map +1 -0
- package/packages/graph/dist/vector-store.d.ts +27 -0
- package/packages/graph/dist/vector-store.d.ts.map +1 -0
- package/packages/graph/dist/vector-store.js +85 -0
- package/packages/graph/dist/vector-store.js.map +1 -0
- package/packages/graph/package.json +27 -0
- package/packages/graph/tsconfig.json +12 -0
- package/packages/ingest/dist/chunker.d.ts +21 -0
- package/packages/ingest/dist/chunker.d.ts.map +1 -0
- package/packages/ingest/dist/chunker.js +118 -0
- package/packages/ingest/dist/chunker.js.map +1 -0
- package/packages/ingest/dist/index.d.ts +11 -0
- package/packages/ingest/dist/index.d.ts.map +1 -0
- package/packages/ingest/dist/index.js +14 -0
- package/packages/ingest/dist/index.js.map +1 -0
- package/packages/ingest/dist/parsers/conversation.d.ts +10 -0
- package/packages/ingest/dist/parsers/conversation.d.ts.map +1 -0
- package/packages/ingest/dist/parsers/conversation.js +150 -0
- package/packages/ingest/dist/parsers/conversation.js.map +1 -0
- package/packages/ingest/dist/parsers/index.d.ts +11 -0
- package/packages/ingest/dist/parsers/index.d.ts.map +1 -0
- package/packages/ingest/dist/parsers/index.js +42 -0
- package/packages/ingest/dist/parsers/index.js.map +1 -0
- package/packages/ingest/dist/parsers/json-parser.d.ts +6 -0
- package/packages/ingest/dist/parsers/json-parser.d.ts.map +1 -0
- package/packages/ingest/dist/parsers/json-parser.js +114 -0
- package/packages/ingest/dist/parsers/json-parser.js.map +1 -0
- package/packages/ingest/dist/parsers/markdown.d.ts +6 -0
- package/packages/ingest/dist/parsers/markdown.d.ts.map +1 -0
- package/packages/ingest/dist/parsers/markdown.js +116 -0
- package/packages/ingest/dist/parsers/markdown.js.map +1 -0
- package/packages/ingest/dist/parsers/types.d.ts +18 -0
- package/packages/ingest/dist/parsers/types.d.ts.map +1 -0
- package/packages/ingest/dist/parsers/types.js +2 -0
- package/packages/ingest/dist/parsers/types.js.map +1 -0
- package/packages/ingest/dist/parsers/typescript.d.ts +11 -0
- package/packages/ingest/dist/parsers/typescript.d.ts.map +1 -0
- package/packages/ingest/dist/parsers/typescript.js +197 -0
- package/packages/ingest/dist/parsers/typescript.js.map +1 -0
- package/packages/ingest/dist/parsers/yaml-parser.d.ts +6 -0
- package/packages/ingest/dist/parsers/yaml-parser.d.ts.map +1 -0
- package/packages/ingest/dist/parsers/yaml-parser.js +52 -0
- package/packages/ingest/dist/parsers/yaml-parser.js.map +1 -0
- package/packages/ingest/dist/pipeline.d.ts +30 -0
- package/packages/ingest/dist/pipeline.d.ts.map +1 -0
- package/packages/ingest/dist/pipeline.js +311 -0
- package/packages/ingest/dist/pipeline.js.map +1 -0
- package/packages/ingest/dist/post-ingest.d.ts +25 -0
- package/packages/ingest/dist/post-ingest.d.ts.map +1 -0
- package/packages/ingest/dist/post-ingest.js +171 -0
- package/packages/ingest/dist/post-ingest.js.map +1 -0
- package/packages/ingest/dist/watcher.d.ts +26 -0
- package/packages/ingest/dist/watcher.d.ts.map +1 -0
- package/packages/ingest/dist/watcher.js +142 -0
- package/packages/ingest/dist/watcher.js.map +1 -0
- package/packages/ingest/package.json +30 -0
- package/packages/ingest/tsconfig.json +14 -0
- package/packages/llm/dist/cache.d.ts +26 -0
- package/packages/llm/dist/cache.d.ts.map +1 -0
- package/packages/llm/dist/cache.js +60 -0
- package/packages/llm/dist/cache.js.map +1 -0
- package/packages/llm/dist/index.d.ts +15 -0
- package/packages/llm/dist/index.d.ts.map +1 -0
- package/packages/llm/dist/index.js +19 -0
- package/packages/llm/dist/index.js.map +1 -0
- package/packages/llm/dist/output-parser.d.ts +4 -0
- package/packages/llm/dist/output-parser.d.ts.map +1 -0
- package/packages/llm/dist/output-parser.js +207 -0
- package/packages/llm/dist/output-parser.js.map +1 -0
- package/packages/llm/dist/prompts/context-ranking.d.ts +33 -0
- package/packages/llm/dist/prompts/context-ranking.d.ts.map +1 -0
- package/packages/llm/dist/prompts/context-ranking.js +30 -0
- package/packages/llm/dist/prompts/context-ranking.js.map +1 -0
- package/packages/llm/dist/prompts/contradiction-detection.d.ts +46 -0
- package/packages/llm/dist/prompts/contradiction-detection.d.ts.map +1 -0
- package/packages/llm/dist/prompts/contradiction-detection.js +45 -0
- package/packages/llm/dist/prompts/contradiction-detection.js.map +1 -0
- package/packages/llm/dist/prompts/conversational-query.d.ts +29 -0
- package/packages/llm/dist/prompts/conversational-query.d.ts.map +1 -0
- package/packages/llm/dist/prompts/conversational-query.js +34 -0
- package/packages/llm/dist/prompts/conversational-query.js.map +1 -0
- package/packages/llm/dist/prompts/entity-extraction.d.ts +67 -0
- package/packages/llm/dist/prompts/entity-extraction.d.ts.map +1 -0
- package/packages/llm/dist/prompts/entity-extraction.js +76 -0
- package/packages/llm/dist/prompts/entity-extraction.js.map +1 -0
- package/packages/llm/dist/prompts/follow-up-generation.d.ts +25 -0
- package/packages/llm/dist/prompts/follow-up-generation.d.ts.map +1 -0
- package/packages/llm/dist/prompts/follow-up-generation.js +25 -0
- package/packages/llm/dist/prompts/follow-up-generation.js.map +1 -0
- package/packages/llm/dist/prompts/merge-detection.d.ts +41 -0
- package/packages/llm/dist/prompts/merge-detection.d.ts.map +1 -0
- package/packages/llm/dist/prompts/merge-detection.js +31 -0
- package/packages/llm/dist/prompts/merge-detection.js.map +1 -0
- package/packages/llm/dist/prompts/relationship-inference.d.ts +60 -0
- package/packages/llm/dist/prompts/relationship-inference.d.ts.map +1 -0
- package/packages/llm/dist/prompts/relationship-inference.js +66 -0
- package/packages/llm/dist/prompts/relationship-inference.js.map +1 -0
- package/packages/llm/dist/providers/anthropic.d.ts +39 -0
- package/packages/llm/dist/providers/anthropic.d.ts.map +1 -0
- package/packages/llm/dist/providers/anthropic.js +157 -0
- package/packages/llm/dist/providers/anthropic.js.map +1 -0
- package/packages/llm/dist/providers/ollama.d.ts +52 -0
- package/packages/llm/dist/providers/ollama.d.ts.map +1 -0
- package/packages/llm/dist/providers/ollama.js +297 -0
- package/packages/llm/dist/providers/ollama.js.map +1 -0
- package/packages/llm/dist/providers/openai-compatible.d.ts +40 -0
- package/packages/llm/dist/providers/openai-compatible.d.ts.map +1 -0
- package/packages/llm/dist/providers/openai-compatible.js +164 -0
- package/packages/llm/dist/providers/openai-compatible.js.map +1 -0
- package/packages/llm/dist/router.d.ts +87 -0
- package/packages/llm/dist/router.d.ts.map +1 -0
- package/packages/llm/dist/router.js +399 -0
- package/packages/llm/dist/router.js.map +1 -0
- package/packages/llm/dist/token-tracker.d.ts +24 -0
- package/packages/llm/dist/token-tracker.d.ts.map +1 -0
- package/packages/llm/dist/token-tracker.js +114 -0
- package/packages/llm/dist/token-tracker.js.map +1 -0
- package/packages/llm/package.json +25 -0
- package/packages/llm/tsconfig.json +12 -0
- package/packages/mcp/dist/index.d.ts +10 -0
- package/packages/mcp/dist/index.d.ts.map +1 -0
- package/packages/mcp/dist/index.js +33 -0
- package/packages/mcp/dist/index.js.map +1 -0
- package/packages/mcp/dist/server.d.ts +5 -0
- package/packages/mcp/dist/server.d.ts.map +1 -0
- package/packages/mcp/dist/server.js +87 -0
- package/packages/mcp/dist/server.js.map +1 -0
- package/packages/mcp/dist/store-factory.d.ts +8 -0
- package/packages/mcp/dist/store-factory.d.ts.map +1 -0
- package/packages/mcp/dist/store-factory.js +24 -0
- package/packages/mcp/dist/store-factory.js.map +1 -0
- package/packages/mcp/dist/tools/contradictions.d.ts +14 -0
- package/packages/mcp/dist/tools/contradictions.d.ts.map +1 -0
- package/packages/mcp/dist/tools/contradictions.js +33 -0
- package/packages/mcp/dist/tools/contradictions.js.map +1 -0
- package/packages/mcp/dist/tools/find.d.ts +37 -0
- package/packages/mcp/dist/tools/find.d.ts.map +1 -0
- package/packages/mcp/dist/tools/find.js +52 -0
- package/packages/mcp/dist/tools/find.js.map +1 -0
- package/packages/mcp/dist/tools/projects.d.ts +16 -0
- package/packages/mcp/dist/tools/projects.d.ts.map +1 -0
- package/packages/mcp/dist/tools/projects.js +17 -0
- package/packages/mcp/dist/tools/projects.js.map +1 -0
- package/packages/mcp/dist/tools/query.d.ts +20 -0
- package/packages/mcp/dist/tools/query.d.ts.map +1 -0
- package/packages/mcp/dist/tools/query.js +65 -0
- package/packages/mcp/dist/tools/query.js.map +1 -0
- package/packages/mcp/dist/tools/status.d.ts +15 -0
- package/packages/mcp/dist/tools/status.d.ts.map +1 -0
- package/packages/mcp/dist/tools/status.js +17 -0
- package/packages/mcp/dist/tools/status.js.map +1 -0
- package/packages/mcp/package.json +27 -0
- package/packages/mcp/tsconfig.json +14 -0
- package/packages/server/dist/index.d.ts +17 -0
- package/packages/server/dist/index.d.ts.map +1 -0
- package/packages/server/dist/index.js +170 -0
- package/packages/server/dist/index.js.map +1 -0
- package/packages/server/dist/middleware/auth.d.ts +9 -0
- package/packages/server/dist/middleware/auth.d.ts.map +1 -0
- package/packages/server/dist/middleware/auth.js +94 -0
- package/packages/server/dist/middleware/auth.js.map +1 -0
- package/packages/server/dist/routes/contradictions.d.ts +4 -0
- package/packages/server/dist/routes/contradictions.d.ts.map +1 -0
- package/packages/server/dist/routes/contradictions.js +63 -0
- package/packages/server/dist/routes/contradictions.js.map +1 -0
- package/packages/server/dist/routes/entities.d.ts +4 -0
- package/packages/server/dist/routes/entities.d.ts.map +1 -0
- package/packages/server/dist/routes/entities.js +61 -0
- package/packages/server/dist/routes/entities.js.map +1 -0
- package/packages/server/dist/routes/projects.d.ts +4 -0
- package/packages/server/dist/routes/projects.d.ts.map +1 -0
- package/packages/server/dist/routes/projects.js +35 -0
- package/packages/server/dist/routes/projects.js.map +1 -0
- package/packages/server/dist/routes/query.d.ts +4 -0
- package/packages/server/dist/routes/query.d.ts.map +1 -0
- package/packages/server/dist/routes/query.js +93 -0
- package/packages/server/dist/routes/query.js.map +1 -0
- package/packages/server/dist/routes/relationships.d.ts +4 -0
- package/packages/server/dist/routes/relationships.d.ts.map +1 -0
- package/packages/server/dist/routes/relationships.js +52 -0
- package/packages/server/dist/routes/relationships.js.map +1 -0
- package/packages/server/dist/routes/status.d.ts +4 -0
- package/packages/server/dist/routes/status.d.ts.map +1 -0
- package/packages/server/dist/routes/status.js +85 -0
- package/packages/server/dist/routes/status.js.map +1 -0
- package/packages/server/dist/ws/event-relay.d.ts +10 -0
- package/packages/server/dist/ws/event-relay.d.ts.map +1 -0
- package/packages/server/dist/ws/event-relay.js +83 -0
- package/packages/server/dist/ws/event-relay.js.map +1 -0
- package/packages/server/package.json +32 -0
- package/packages/server/tsconfig.json +15 -0
- package/packages/web/dist/assets/index-Bxjfq4I0.css +1 -0
- package/packages/web/dist/assets/index-MqwNS5FD.js +248 -0
- package/packages/web/dist/assets/index-MqwNS5FD.js.map +1 -0
- package/packages/web/dist/cortex-icon.svg +9 -0
- package/packages/web/dist/icon.png +0 -0
- package/packages/web/dist/index.html +14 -0
- package/packages/web/dist/logo.png +0 -0
- package/packages/web/index.html +13 -0
- package/packages/web/package.json +34 -0
- package/packages/web/public/cortex-icon.svg +9 -0
- package/packages/web/public/icon.png +0 -0
- package/packages/web/public/logo.png +0 -0
- package/packages/web/tsconfig.json +22 -0
- package/packages/web/vite.config.ts +20 -0
- package/tsconfig.base.json +18 -0
- package/tsconfig.json +12 -0
- package/vitest.config.ts +14 -0
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
import OpenAI from 'openai';
|
|
2
|
+
import { LLMTask, CortexError, LLM_AUTH_FAILED, LLM_PROVIDER_UNAVAILABLE, LLM_RATE_LIMITED, LLM_TIMEOUT, createLogger, } from '@cortex/core';
|
|
3
|
+
const logger = createLogger('llm:openai-compatible');
|
|
4
|
+
export class OpenAICompatibleProvider {
|
|
5
|
+
name = 'openai-compatible';
|
|
6
|
+
type = 'cloud';
|
|
7
|
+
client;
|
|
8
|
+
primaryModel;
|
|
9
|
+
fastModel;
|
|
10
|
+
isGemini;
|
|
11
|
+
capabilities = {
|
|
12
|
+
supportedTasks: [
|
|
13
|
+
LLMTask.ENTITY_EXTRACTION,
|
|
14
|
+
LLMTask.RELATIONSHIP_INFERENCE,
|
|
15
|
+
LLMTask.CONTRADICTION_DETECTION,
|
|
16
|
+
LLMTask.CONVERSATIONAL_QUERY,
|
|
17
|
+
LLMTask.CONTEXT_RANKING,
|
|
18
|
+
],
|
|
19
|
+
maxContextTokens: 128_000,
|
|
20
|
+
supportsStructuredOutput: true,
|
|
21
|
+
supportsStreaming: true,
|
|
22
|
+
estimatedTokensPerSecond: 80,
|
|
23
|
+
// Set to 0 — pricing varies by provider; budget tracking is approximate
|
|
24
|
+
costPerMillionInputTokens: 0,
|
|
25
|
+
costPerMillionOutputTokens: 0,
|
|
26
|
+
};
|
|
27
|
+
constructor(options) {
|
|
28
|
+
if (!options.apiKey) {
|
|
29
|
+
throw new CortexError(LLM_AUTH_FAILED, 'critical', 'llm', 'OpenAI-compatible API key not found. Check llm.cloud.apiKeySource in your config.', undefined, 'Set the environment variable specified in llm.cloud.apiKeySource.', false, 401);
|
|
30
|
+
}
|
|
31
|
+
this.client = new OpenAI({
|
|
32
|
+
apiKey: options.apiKey,
|
|
33
|
+
baseURL: options.baseUrl,
|
|
34
|
+
timeout: options.timeoutMs ?? 60_000,
|
|
35
|
+
maxRetries: options.maxRetries ?? 3,
|
|
36
|
+
});
|
|
37
|
+
this.primaryModel = options.primaryModel ?? 'gpt-4o';
|
|
38
|
+
this.fastModel = options.fastModel ?? 'gpt-4o-mini';
|
|
39
|
+
this.isGemini = options.baseUrl.includes('generativelanguage.googleapis.com');
|
|
40
|
+
logger.info('OpenAI-compatible provider initialized', {
|
|
41
|
+
baseUrl: options.baseUrl,
|
|
42
|
+
primaryModel: this.primaryModel,
|
|
43
|
+
fastModel: this.fastModel,
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
getModel(preference = 'primary') {
|
|
47
|
+
return preference === 'fast' ? this.fastModel : this.primaryModel;
|
|
48
|
+
}
|
|
49
|
+
/** Gemini uses max_completion_tokens; others use max_tokens */
|
|
50
|
+
tokenLimitParams(maxTokens) {
|
|
51
|
+
return this.isGemini
|
|
52
|
+
? { max_completion_tokens: maxTokens }
|
|
53
|
+
: { max_tokens: maxTokens };
|
|
54
|
+
}
|
|
55
|
+
async complete(prompt, options) {
|
|
56
|
+
const result = await this.completeWithSystem(undefined, prompt, options);
|
|
57
|
+
return result.content;
|
|
58
|
+
}
|
|
59
|
+
async completeWithSystem(systemPrompt, userPrompt, options, modelPreference = 'primary') {
|
|
60
|
+
const model = this.getModel(modelPreference);
|
|
61
|
+
try {
|
|
62
|
+
const messages = [];
|
|
63
|
+
if (systemPrompt) {
|
|
64
|
+
messages.push({ role: 'system', content: systemPrompt });
|
|
65
|
+
}
|
|
66
|
+
messages.push({ role: 'user', content: userPrompt });
|
|
67
|
+
const response = await this.client.chat.completions.create({
|
|
68
|
+
model,
|
|
69
|
+
...this.tokenLimitParams(options?.maxTokens ?? 4096),
|
|
70
|
+
temperature: options?.temperature ?? 0.7,
|
|
71
|
+
messages,
|
|
72
|
+
...(options?.stopSequences?.length && { stop: options.stopSequences }),
|
|
73
|
+
});
|
|
74
|
+
const content = response.choices[0]?.message?.content ?? '';
|
|
75
|
+
return {
|
|
76
|
+
content,
|
|
77
|
+
inputTokens: response.usage?.prompt_tokens ?? 0,
|
|
78
|
+
outputTokens: response.usage?.completion_tokens ?? 0,
|
|
79
|
+
model,
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
catch (err) {
|
|
83
|
+
throw this.mapError(err);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
async completeStructured(prompt, _schema, options) {
|
|
87
|
+
const result = await this.complete(prompt, options);
|
|
88
|
+
return JSON.parse(result);
|
|
89
|
+
}
|
|
90
|
+
async *stream(prompt, options) {
|
|
91
|
+
yield* this.streamWithSystem(undefined, prompt, options);
|
|
92
|
+
}
|
|
93
|
+
async *streamWithSystem(systemPrompt, userPrompt, options, modelPreference = 'primary') {
|
|
94
|
+
const model = this.getModel(modelPreference);
|
|
95
|
+
try {
|
|
96
|
+
const messages = [];
|
|
97
|
+
if (systemPrompt) {
|
|
98
|
+
messages.push({ role: 'system', content: systemPrompt });
|
|
99
|
+
}
|
|
100
|
+
messages.push({ role: 'user', content: userPrompt });
|
|
101
|
+
const stream = await this.client.chat.completions.create({
|
|
102
|
+
model,
|
|
103
|
+
...this.tokenLimitParams(options?.maxTokens ?? 4096),
|
|
104
|
+
temperature: options?.temperature ?? 0.7,
|
|
105
|
+
messages,
|
|
106
|
+
stream: true,
|
|
107
|
+
stream_options: { include_usage: true },
|
|
108
|
+
...(options?.stopSequences?.length && { stop: options.stopSequences }),
|
|
109
|
+
});
|
|
110
|
+
let inputTokens = 0;
|
|
111
|
+
let outputTokens = 0;
|
|
112
|
+
for await (const chunk of stream) {
|
|
113
|
+
const delta = chunk.choices[0]?.delta?.content;
|
|
114
|
+
if (delta) {
|
|
115
|
+
yield delta;
|
|
116
|
+
}
|
|
117
|
+
// Usage is reported in the final chunk when stream_options.include_usage = true
|
|
118
|
+
if (chunk.usage) {
|
|
119
|
+
inputTokens = chunk.usage.prompt_tokens;
|
|
120
|
+
outputTokens = chunk.usage.completion_tokens;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
return { inputTokens, outputTokens, model };
|
|
124
|
+
}
|
|
125
|
+
catch (err) {
|
|
126
|
+
throw this.mapError(err);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
async embed(_texts) {
|
|
130
|
+
throw new CortexError(LLM_PROVIDER_UNAVAILABLE, 'medium', 'llm', 'OpenAI-compatible provider does not handle embeddings. Use local embedding model.');
|
|
131
|
+
}
|
|
132
|
+
async isAvailable() {
|
|
133
|
+
try {
|
|
134
|
+
await this.client.chat.completions.create({
|
|
135
|
+
model: this.fastModel,
|
|
136
|
+
...this.tokenLimitParams(1),
|
|
137
|
+
messages: [{ role: 'user', content: 'ping' }],
|
|
138
|
+
});
|
|
139
|
+
return true;
|
|
140
|
+
}
|
|
141
|
+
catch {
|
|
142
|
+
return false;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
mapError(err) {
|
|
146
|
+
if (err instanceof OpenAI.AuthenticationError) {
|
|
147
|
+
return new CortexError(LLM_AUTH_FAILED, 'critical', 'llm', 'OpenAI-compatible API authentication failed. Check your API key.', undefined, 'Verify the environment variable in llm.cloud.apiKeySource is correct.', false, 401);
|
|
148
|
+
}
|
|
149
|
+
if (err instanceof OpenAI.RateLimitError) {
|
|
150
|
+
return new CortexError(LLM_RATE_LIMITED, 'medium', 'llm', 'OpenAI-compatible API rate limit exceeded.', undefined, 'Wait and retry with backoff.', true, 429);
|
|
151
|
+
}
|
|
152
|
+
if (err instanceof OpenAI.APIConnectionTimeoutError) {
|
|
153
|
+
return new CortexError(LLM_TIMEOUT, 'medium', 'llm', 'OpenAI-compatible API request timed out.', undefined, 'Retry the request or increase llm.cloud.timeoutMs.', true, 504);
|
|
154
|
+
}
|
|
155
|
+
if (err instanceof OpenAI.APIError) {
|
|
156
|
+
const body = typeof err.error === 'object' ? JSON.stringify(err.error) : String(err.error ?? '');
|
|
157
|
+
logger.debug('API error details', { status: err.status, body, headers: err.headers });
|
|
158
|
+
return new CortexError(LLM_PROVIDER_UNAVAILABLE, 'high', 'llm', `OpenAI-compatible API error: ${err.status} ${err.message}${body ? ` — ${body}` : ''}`, { status: err.status }, 'Retry or check your provider status page.', true, err.status);
|
|
159
|
+
}
|
|
160
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
161
|
+
return new CortexError(LLM_PROVIDER_UNAVAILABLE, 'high', 'llm', `OpenAI-compatible provider error: ${message}`, undefined, 'Check network connectivity and llm.cloud.baseUrl configuration.', true);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
//# sourceMappingURL=openai-compatible.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"openai-compatible.js","sourceRoot":"","sources":["../../src/providers/openai-compatible.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAIL,OAAO,EACP,WAAW,EACX,eAAe,EACf,wBAAwB,EACxB,gBAAgB,EAChB,WAAW,EACX,YAAY,GACb,MAAM,cAAc,CAAC;AAEtB,MAAM,MAAM,GAAG,YAAY,CAAC,uBAAuB,CAAC,CAAC;AAWrD,MAAM,OAAO,wBAAwB;IAC1B,IAAI,GAAG,mBAAmB,CAAC;IAC3B,IAAI,GAAG,OAAgB,CAAC;IAEzB,MAAM,CAAS;IACf,YAAY,CAAS;IACrB,SAAS,CAAS;IAClB,QAAQ,CAAU;IAEjB,YAAY,GAAyB;QAC5C,cAAc,EAAE;YACd,OAAO,CAAC,iBAAiB;YACzB,OAAO,CAAC,sBAAsB;YAC9B,OAAO,CAAC,uBAAuB;YAC/B,OAAO,CAAC,oBAAoB;YAC5B,OAAO,CAAC,eAAe;SACxB;QACD,gBAAgB,EAAE,OAAO;QACzB,wBAAwB,EAAE,IAAI;QAC9B,iBAAiB,EAAE,IAAI;QACvB,wBAAwB,EAAE,EAAE;QAC5B,wEAAwE;QACxE,yBAAyB,EAAE,CAAC;QAC5B,0BAA0B,EAAE,CAAC;KAC9B,CAAC;IAEF,YAAY,OAAwC;QAClD,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;YACpB,MAAM,IAAI,WAAW,CACnB,eAAe,EACf,UAAU,EACV,KAAK,EACL,mFAAmF,EACnF,SAAS,EACT,mEAAmE,EACnE,KAAK,EACL,GAAG,CACJ,CAAC;QACJ,CAAC;QAED,IAAI,CAAC,MAAM,GAAG,IAAI,MAAM,CAAC;YACvB,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,OAAO,EAAE,OAAO,CAAC,OAAO;YACxB,OAAO,EAAE,OAAO,CAAC,SAAS,IAAI,MAAM;YACpC,UAAU,EAAE,OAAO,CAAC,UAAU,IAAI,CAAC;SACpC,CAAC,CAAC;QAEH,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,IAAI,QAAQ,CAAC;QACrD,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,aAAa,CAAC;QACpD,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC,mCAAmC,CAAC,CAAC;QAE9E,MAAM,CAAC,IAAI,CAAC,wCAAwC,EAAE;YACpD,OAAO,EAAE,OAAO,CAAC,OAAO;YACxB,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,SAAS,EAAE,IAAI,CAAC,SAAS;SAC1B,CAAC,CAAC;IACL,CAAC;IAED,QAAQ,CAAC,aAAiC,SAAS;QACjD,OAAO,UAAU,KAAK,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC;IACpE,CAAC;IAED,+DAA+D;IACvD,gBAAgB,CAAC,SAAiB;QACxC,OAAO,IAAI,CAAC,QAAQ;YAClB,CAAC,CAAC,EAAE,qBAAqB,EAAE,SAAS,EAAE;YACtC,CAAC,CAAC,EAAE,UAAU,EAAE,SAAS,EAAE,CAAC;IAChC,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,MAAc,EAAE,OAA2B;QACxD,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,kBAAkB,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;QACzE,OAAO,MAAM,CAAC,OAAO,CAAC;IACxB,CAAC;IAED,KAAK,CAAC,kBAAkB,CACtB,YAAgC,EAChC,UAAkB,EAClB,OAA2B,EAC3B,kBAAsC,SAAS;QAE/C,MAAM,KAAK,GAAG,IAAI,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAC;QAE7C,IAAI,CAAC;YACH,MAAM,QAAQ,GAA6C,EAAE,CAAC;YAC9D,IAAI,YAAY,EAAE,CAAC;gBACjB,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC;YAC3D,CAAC;YACD,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC,CAAC;YAErD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC;gBACzD,KAAK;gBACL,GAAG,IAAI,CAAC,gBAAgB,CAAC,OAAO,EAAE,SAAS,IAAI,IAAI,CAAC;gBACpD,WAAW,EAAE,OAAO,EAAE,WAAW,IAAI,GAAG;gBACxC,QAAQ;gBACR,GAAG,CAAC,OAAO,EAAE,aAAa,EAAE,MAAM,IAAI,EAAE,IAAI,EAAE,OAAO,CAAC,aAAa,EAAE,CAAC;aACvE,CAAC,CAAC;YAEH,MAAM,OAAO,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,IAAI,EAAE,CAAC;YAE5D,OAAO;gBACL,OAAO;gBACP,WAAW,EAAE,QAAQ,CAAC,KAAK,EAAE,aAAa,IAAI,CAAC;gBAC/C,YAAY,EAAE,QAAQ,CAAC,KAAK,EAAE,iBAAiB,IAAI,CAAC;gBACpD,KAAK;aACN,CAAC;QACJ,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;QAC3B,CAAC;IACH,CAAC;IAED,KAAK,CAAC,kBAAkB,CACtB,MAAc,EACd,OAAgC,EAChC,OAA2B;QAE3B,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QACpD,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAM,CAAC;IACjC,CAAC;IAED,KAAK,CAAC,CAAC,MAAM,CACX,MAAc,EACd,OAA2B;QAE3B,KAAK,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;IAC3D,CAAC;IAED,KAAK,CAAC,CAAC,gBAAgB,CACrB,YAAgC,EAChC,UAAkB,EAClB,OAA2B,EAC3B,kBAAsC,SAAS;QAE/C,MAAM,KAAK,GAAG,IAAI,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAC;QAE7C,IAAI,CAAC;YACH,MAAM,QAAQ,GAA6C,EAAE,CAAC;YAC9D,IAAI,YAAY,EAAE,CAAC;gBACjB,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC;YAC3D,CAAC;YACD,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC,CAAC;YAErD,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC;gBACvD,KAAK;gBACL,GAAG,IAAI,CAAC,gBAAgB,CAAC,OAAO,EAAE,SAAS,IAAI,IAAI,CAAC;gBACpD,WAAW,EAAE,OAAO,EAAE,WAAW,IAAI,GAAG;gBACxC,QAAQ;gBACR,MAAM,EAAE,IAAI;gBACZ,cAAc,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE;gBACvC,GAAG,CAAC,OAAO,EAAE,aAAa,EAAE,MAAM,IAAI,EAAE,IAAI,EAAE,OAAO,CAAC,aAAa,EAAE,CAAC;aACvE,CAAC,CAAC;YAEH,IAAI,WAAW,GAAG,CAAC,CAAC;YACpB,IAAI,YAAY,GAAG,CAAC,CAAC;YAErB,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBACjC,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,CAAC;gBAC/C,IAAI,KAAK,EAAE,CAAC;oBACV,MAAM,KAAK,CAAC;gBACd,CAAC;gBACD,gFAAgF;gBAChF,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;oBAChB,WAAW,GAAG,KAAK,CAAC,KAAK,CAAC,aAAa,CAAC;oBACxC,YAAY,GAAG,KAAK,CAAC,KAAK,CAAC,iBAAiB,CAAC;gBAC/C,CAAC;YACH,CAAC;YAED,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,KAAK,EAAE,CAAC;QAC9C,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;QAC3B,CAAC;IACH,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,MAAgB;QAC1B,MAAM,IAAI,WAAW,CACnB,wBAAwB,EACxB,QAAQ,EACR,KAAK,EACL,mFAAmF,CACpF,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,WAAW;QACf,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC;gBACxC,KAAK,EAAE,IAAI,CAAC,SAAS;gBACrB,GAAG,IAAI,CAAC,gBAAgB,CAAC,CAAC,CAAC;gBAC3B,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;aAC9C,CAAC,CAAC;YACH,OAAO,IAAI,CAAC;QACd,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAEO,QAAQ,CAAC,GAAY;QAC3B,IAAI,GAAG,YAAY,MAAM,CAAC,mBAAmB,EAAE,CAAC;YAC9C,OAAO,IAAI,WAAW,CACpB,eAAe,EAAE,UAAU,EAAE,KAAK,EAClC,kEAAkE,EAClE,SAAS,EAAE,uEAAuE,EAAE,KAAK,EAAE,GAAG,CAC/F,CAAC;QACJ,CAAC;QACD,IAAI,GAAG,YAAY,MAAM,CAAC,cAAc,EAAE,CAAC;YACzC,OAAO,IAAI,WAAW,CACpB,gBAAgB,EAAE,QAAQ,EAAE,KAAK,EACjC,4CAA4C,EAC5C,SAAS,EAAE,8BAA8B,EAAE,IAAI,EAAE,GAAG,CACrD,CAAC;QACJ,CAAC;QACD,IAAI,GAAG,YAAY,MAAM,CAAC,yBAAyB,EAAE,CAAC;YACpD,OAAO,IAAI,WAAW,CACpB,WAAW,EAAE,QAAQ,EAAE,KAAK,EAC5B,0CAA0C,EAC1C,SAAS,EAAE,oDAAoD,EAAE,IAAI,EAAE,GAAG,CAC3E,CAAC;QACJ,CAAC;QACD,IAAI,GAAG,YAAY,MAAM,CAAC,QAAQ,EAAE,CAAC;YACnC,MAAM,IAAI,GAAG,OAAO,GAAG,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC;YACjG,MAAM,CAAC,KAAK,CAAC,mBAAmB,EAAE,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;YACtF,OAAO,IAAI,WAAW,CACpB,wBAAwB,EAAE,MAAM,EAAE,KAAK,EACvC,gCAAgC,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,EACtF,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,EACtB,2CAA2C,EAAE,IAAI,EACjD,GAAG,CAAC,MAAM,CACX,CAAC;QACJ,CAAC;QAED,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACjE,OAAO,IAAI,WAAW,CACpB,wBAAwB,EAAE,MAAM,EAAE,KAAK,EACvC,qCAAqC,OAAO,EAAE,EAC9C,SAAS,EAAE,iEAAiE,EAAE,IAAI,CACnF,CAAC;IACJ,CAAC;CACF"}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import type { ZodSchema } from 'zod';
|
|
2
|
+
import { LLMTask, type CortexConfig } from '@cortex/core';
|
|
3
|
+
import { AnthropicProvider } from './providers/anthropic.js';
|
|
4
|
+
import { OllamaProvider } from './providers/ollama.js';
|
|
5
|
+
import { OpenAICompatibleProvider } from './providers/openai-compatible.js';
|
|
6
|
+
import { TokenTracker } from './token-tracker.js';
|
|
7
|
+
import { ResponseCache } from './cache.js';
|
|
8
|
+
export type LLMMode = 'cloud-first' | 'hybrid' | 'local-first' | 'local-only';
|
|
9
|
+
export type TaskRouting = 'auto' | 'local' | 'cloud';
|
|
10
|
+
export interface RouterOptions {
|
|
11
|
+
config: CortexConfig;
|
|
12
|
+
apiKey?: string;
|
|
13
|
+
}
|
|
14
|
+
export interface CompleteRequest {
|
|
15
|
+
systemPrompt?: string;
|
|
16
|
+
userPrompt: string;
|
|
17
|
+
promptId: string;
|
|
18
|
+
promptVersion: string;
|
|
19
|
+
task: LLMTask;
|
|
20
|
+
modelPreference?: 'primary' | 'fast';
|
|
21
|
+
temperature?: number;
|
|
22
|
+
maxTokens?: number;
|
|
23
|
+
contentHash?: string;
|
|
24
|
+
forceProvider?: 'local' | 'cloud';
|
|
25
|
+
}
|
|
26
|
+
export interface CompleteResult {
|
|
27
|
+
content: string;
|
|
28
|
+
model: string;
|
|
29
|
+
inputTokens: number;
|
|
30
|
+
outputTokens: number;
|
|
31
|
+
cached: boolean;
|
|
32
|
+
latencyMs: number;
|
|
33
|
+
costUsd: number;
|
|
34
|
+
provider: 'anthropic' | 'ollama' | 'openai-compatible';
|
|
35
|
+
}
|
|
36
|
+
interface ProviderWithSystem {
|
|
37
|
+
completeWithSystem(systemPrompt: string | undefined, userPrompt: string, options?: {
|
|
38
|
+
temperature?: number;
|
|
39
|
+
maxTokens?: number;
|
|
40
|
+
}, modelPreference?: 'primary' | 'fast'): Promise<{
|
|
41
|
+
content: string;
|
|
42
|
+
inputTokens: number;
|
|
43
|
+
outputTokens: number;
|
|
44
|
+
model: string;
|
|
45
|
+
}>;
|
|
46
|
+
streamWithSystem(systemPrompt: string | undefined, userPrompt: string, options?: {
|
|
47
|
+
temperature?: number;
|
|
48
|
+
maxTokens?: number;
|
|
49
|
+
}, modelPreference?: 'primary' | 'fast'): AsyncGenerator<string, {
|
|
50
|
+
inputTokens: number;
|
|
51
|
+
outputTokens: number;
|
|
52
|
+
model: string;
|
|
53
|
+
}>;
|
|
54
|
+
getModel(preference?: 'primary' | 'fast'): string;
|
|
55
|
+
isAvailable(): Promise<boolean>;
|
|
56
|
+
}
|
|
57
|
+
type CloudProvider = AnthropicProvider | OpenAICompatibleProvider;
|
|
58
|
+
export declare class Router {
|
|
59
|
+
private cloudProvider;
|
|
60
|
+
private localProvider;
|
|
61
|
+
private mode;
|
|
62
|
+
private taskRouting;
|
|
63
|
+
private tracker;
|
|
64
|
+
private cache;
|
|
65
|
+
private config;
|
|
66
|
+
private availabilityCache;
|
|
67
|
+
private static readonly AVAILABILITY_TTL_MS;
|
|
68
|
+
constructor(options: RouterOptions);
|
|
69
|
+
/**
|
|
70
|
+
* Select provider based on mode, task routing, and availability
|
|
71
|
+
*/
|
|
72
|
+
private selectProvider;
|
|
73
|
+
get provider(): ProviderWithSystem;
|
|
74
|
+
complete(request: CompleteRequest): Promise<CompleteResult>;
|
|
75
|
+
completeStructured<T>(request: CompleteRequest, schema: ZodSchema<T>): Promise<{
|
|
76
|
+
data: T;
|
|
77
|
+
} & CompleteResult>;
|
|
78
|
+
stream(request: Omit<CompleteRequest, 'contentHash'>): AsyncGenerator<string, CompleteResult>;
|
|
79
|
+
getTracker(): TokenTracker;
|
|
80
|
+
getCache(): ResponseCache;
|
|
81
|
+
getLocalProvider(): OllamaProvider | null;
|
|
82
|
+
getCloudProvider(): CloudProvider | null;
|
|
83
|
+
getMode(): LLMMode;
|
|
84
|
+
isAvailable(): Promise<boolean>;
|
|
85
|
+
}
|
|
86
|
+
export {};
|
|
87
|
+
//# sourceMappingURL=router.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"router.d.ts","sourceRoot":"","sources":["../src/router.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,KAAK,CAAC;AACrC,OAAO,EACL,OAAO,EACP,KAAK,YAAY,EAMlB,MAAM,cAAc,CAAC;AACtB,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAE,wBAAwB,EAAE,MAAM,kCAAkC,CAAC;AAC5E,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAK3C,MAAM,MAAM,OAAO,GAAG,aAAa,GAAG,QAAQ,GAAG,aAAa,GAAG,YAAY,CAAC;AAC9E,MAAM,MAAM,WAAW,GAAG,MAAM,GAAG,OAAO,GAAG,OAAO,CAAC;AAErD,MAAM,WAAW,aAAa;IAC5B,MAAM,EAAE,YAAY,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,eAAe;IAC9B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,aAAa,EAAE,MAAM,CAAC;IACtB,IAAI,EAAE,OAAO,CAAC;IACd,eAAe,CAAC,EAAE,SAAS,GAAG,MAAM,CAAC;IACrC,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC;CACnC;AAED,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,OAAO,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,WAAW,GAAG,QAAQ,GAAG,mBAAmB,CAAC;CACxD;AAED,UAAU,kBAAkB;IAC1B,kBAAkB,CAChB,YAAY,EAAE,MAAM,GAAG,SAAS,EAChC,UAAU,EAAE,MAAM,EAClB,OAAO,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,MAAM,CAAC;QAAC,SAAS,CAAC,EAAE,MAAM,CAAA;KAAE,EACtD,eAAe,CAAC,EAAE,SAAS,GAAG,MAAM,GACnC,OAAO,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAC1F,gBAAgB,CACd,YAAY,EAAE,MAAM,GAAG,SAAS,EAChC,UAAU,EAAE,MAAM,EAClB,OAAO,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,MAAM,CAAC;QAAC,SAAS,CAAC,EAAE,MAAM,CAAA;KAAE,EACtD,eAAe,CAAC,EAAE,SAAS,GAAG,MAAM,GACnC,cAAc,CAAC,MAAM,EAAE;QAAE,WAAW,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IACxF,QAAQ,CAAC,UAAU,CAAC,EAAE,SAAS,GAAG,MAAM,GAAG,MAAM,CAAC;IAClD,WAAW,IAAI,OAAO,CAAC,OAAO,CAAC,CAAC;CACjC;AAED,KAAK,aAAa,GAAG,iBAAiB,GAAG,wBAAwB,CAAC;AAiBlE,qBAAa,MAAM;IACjB,OAAO,CAAC,aAAa,CAA8B;IACnD,OAAO,CAAC,aAAa,CAA+B;IACpD,OAAO,CAAC,IAAI,CAAU;IACtB,OAAO,CAAC,WAAW,CAA8B;IACjD,OAAO,CAAC,OAAO,CAAe;IAC9B,OAAO,CAAC,KAAK,CAAgB;IAC7B,OAAO,CAAC,MAAM,CAAe;IAC7B,OAAO,CAAC,iBAAiB,CAAuD;IAChF,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,mBAAmB,CAAU;gBAEzC,OAAO,EAAE,aAAa;IAyElC;;OAEG;YACW,cAAc;IAoJ5B,IAAI,QAAQ,IAAI,kBAAkB,CAEjC;IAEK,QAAQ,CAAC,OAAO,EAAE,eAAe,GAAG,OAAO,CAAC,cAAc,CAAC;IA0G3D,kBAAkB,CAAC,CAAC,EACxB,OAAO,EAAE,eAAe,EACxB,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC,GACnB,OAAO,CAAC;QAAE,IAAI,EAAE,CAAC,CAAA;KAAE,GAAG,cAAc,CAAC;IAiEjC,MAAM,CACX,OAAO,EAAE,IAAI,CAAC,eAAe,EAAE,aAAa,CAAC,GAC5C,cAAc,CAAC,MAAM,EAAE,cAAc,CAAC;IAmFzC,UAAU,IAAI,YAAY;IAI1B,QAAQ,IAAI,aAAa;IAIzB,gBAAgB,IAAI,cAAc,GAAG,IAAI;IAIzC,gBAAgB,IAAI,aAAa,GAAG,IAAI;IAIxC,OAAO,IAAI,OAAO;IAIZ,WAAW,IAAI,OAAO,CAAC,OAAO,CAAC;CA0BtC"}
|
|
@@ -0,0 +1,399 @@
|
|
|
1
|
+
import { LLMTask, CortexError, LLM_BUDGET_EXHAUSTED, LLM_PROVIDER_UNAVAILABLE, createLogger, eventBus, } from '@cortex/core';
|
|
2
|
+
import { AnthropicProvider } from './providers/anthropic.js';
|
|
3
|
+
import { OllamaProvider } from './providers/ollama.js';
|
|
4
|
+
import { OpenAICompatibleProvider } from './providers/openai-compatible.js';
|
|
5
|
+
import { TokenTracker } from './token-tracker.js';
|
|
6
|
+
import { ResponseCache } from './cache.js';
|
|
7
|
+
import { parseStructuredOutput, buildCorrectionPrompt } from './output-parser.js';
|
|
8
|
+
const logger = createLogger('llm:router');
|
|
9
|
+
function resolveApiKeySource(source) {
|
|
10
|
+
if (source.startsWith('env:')) {
|
|
11
|
+
return process.env[source.slice(4)];
|
|
12
|
+
}
|
|
13
|
+
// Warn if a raw key was provided instead of a reference format
|
|
14
|
+
if (source && !source.startsWith('keychain:') && !source.startsWith('file:')) {
|
|
15
|
+
logger.warn('apiKeySource appears to be a raw key. Use "env:VAR_NAME" format instead. ' +
|
|
16
|
+
'Raw keys in config files are a security risk.');
|
|
17
|
+
return source;
|
|
18
|
+
}
|
|
19
|
+
return undefined;
|
|
20
|
+
}
|
|
21
|
+
export class Router {
|
|
22
|
+
cloudProvider = null;
|
|
23
|
+
localProvider = null;
|
|
24
|
+
mode;
|
|
25
|
+
taskRouting;
|
|
26
|
+
tracker;
|
|
27
|
+
cache;
|
|
28
|
+
config;
|
|
29
|
+
availabilityCache = null;
|
|
30
|
+
static AVAILABILITY_TTL_MS = 60_000; // 1 minute
|
|
31
|
+
constructor(options) {
|
|
32
|
+
const { config } = options;
|
|
33
|
+
this.config = config;
|
|
34
|
+
this.mode = config.llm.mode;
|
|
35
|
+
this.taskRouting = config.llm.taskRouting;
|
|
36
|
+
// Initialize providers based on mode
|
|
37
|
+
if (this.mode !== 'local-only') {
|
|
38
|
+
try {
|
|
39
|
+
if (config.llm.cloud.provider === 'openai-compatible') {
|
|
40
|
+
const baseUrl = config.llm.cloud.baseUrl;
|
|
41
|
+
if (!baseUrl) {
|
|
42
|
+
logger.warn('openai-compatible provider requires llm.cloud.baseUrl — skipping cloud');
|
|
43
|
+
}
|
|
44
|
+
else {
|
|
45
|
+
this.cloudProvider = new OpenAICompatibleProvider({
|
|
46
|
+
baseUrl,
|
|
47
|
+
apiKey: options.apiKey ?? resolveApiKeySource(config.llm.cloud.apiKeySource),
|
|
48
|
+
primaryModel: config.llm.cloud.models.primary,
|
|
49
|
+
fastModel: config.llm.cloud.models.fast,
|
|
50
|
+
timeoutMs: config.llm.cloud.timeoutMs,
|
|
51
|
+
maxRetries: config.llm.cloud.maxRetries,
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
else {
|
|
56
|
+
this.cloudProvider = new AnthropicProvider({
|
|
57
|
+
apiKey: options.apiKey,
|
|
58
|
+
primaryModel: config.llm.cloud.models.primary,
|
|
59
|
+
fastModel: config.llm.cloud.models.fast,
|
|
60
|
+
timeoutMs: config.llm.cloud.timeoutMs,
|
|
61
|
+
maxRetries: config.llm.cloud.maxRetries,
|
|
62
|
+
promptCaching: config.llm.cloud.promptCaching,
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
catch (err) {
|
|
67
|
+
// Cloud provider initialization failed (e.g., no API key)
|
|
68
|
+
if (this.mode === 'cloud-first') {
|
|
69
|
+
throw err; // Cloud is required in cloud-first mode
|
|
70
|
+
}
|
|
71
|
+
logger.warn('Cloud provider unavailable, falling back to local-only', {
|
|
72
|
+
error: err instanceof Error ? err.message : String(err),
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
if (this.mode !== 'cloud-first' || !this.cloudProvider) {
|
|
77
|
+
this.localProvider = new OllamaProvider({
|
|
78
|
+
host: config.llm.local.host,
|
|
79
|
+
model: config.llm.local.model,
|
|
80
|
+
embeddingModel: config.llm.local.embeddingModel,
|
|
81
|
+
numCtx: config.llm.local.numCtx,
|
|
82
|
+
numGpu: config.llm.local.numGpu,
|
|
83
|
+
timeoutMs: config.llm.local.timeoutMs,
|
|
84
|
+
keepAlive: config.llm.local.keepAlive,
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
this.tracker = new TokenTracker(config.llm.budget.monthlyLimitUsd, config.llm.budget.warningThresholds);
|
|
88
|
+
this.cache = new ResponseCache({
|
|
89
|
+
enabled: config.llm.cache.enabled,
|
|
90
|
+
ttlMs: config.llm.cache.ttlDays * 24 * 60 * 60 * 1000,
|
|
91
|
+
});
|
|
92
|
+
logger.info('Router initialized', {
|
|
93
|
+
mode: this.mode,
|
|
94
|
+
hasCloud: !!this.cloudProvider,
|
|
95
|
+
hasLocal: !!this.localProvider,
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Select provider based on mode, task routing, and availability
|
|
100
|
+
*/
|
|
101
|
+
async selectProvider(task, forceProvider) {
|
|
102
|
+
const cloudName = () => (this.cloudProvider?.name ?? 'anthropic');
|
|
103
|
+
// Handle forced provider
|
|
104
|
+
if (forceProvider === 'cloud') {
|
|
105
|
+
if (!this.cloudProvider) {
|
|
106
|
+
throw new CortexError(LLM_PROVIDER_UNAVAILABLE, 'high', 'llm', 'Cloud provider requested but not available.', { mode: this.mode }, 'Set your cloud API key or change LLM mode.', false);
|
|
107
|
+
}
|
|
108
|
+
return { provider: this.cloudProvider, name: cloudName() };
|
|
109
|
+
}
|
|
110
|
+
if (forceProvider === 'local') {
|
|
111
|
+
if (!this.localProvider) {
|
|
112
|
+
throw new CortexError(LLM_PROVIDER_UNAVAILABLE, 'high', 'llm', 'Local provider requested but not configured.', { mode: this.mode }, 'Change LLM mode to include local provider.', false);
|
|
113
|
+
}
|
|
114
|
+
return { provider: this.localProvider, name: 'ollama' };
|
|
115
|
+
}
|
|
116
|
+
// Check task-specific routing
|
|
117
|
+
const taskRoute = this.taskRouting[task] ?? 'auto';
|
|
118
|
+
if (taskRoute === 'cloud' && this.cloudProvider) {
|
|
119
|
+
return { provider: this.cloudProvider, name: cloudName() };
|
|
120
|
+
}
|
|
121
|
+
if (taskRoute === 'local' && this.localProvider) {
|
|
122
|
+
return { provider: this.localProvider, name: 'ollama' };
|
|
123
|
+
}
|
|
124
|
+
// Auto routing based on mode
|
|
125
|
+
switch (this.mode) {
|
|
126
|
+
case 'local-only':
|
|
127
|
+
if (!this.localProvider) {
|
|
128
|
+
throw new CortexError(LLM_PROVIDER_UNAVAILABLE, 'high', 'llm', 'Local-only mode but Ollama provider not available.', undefined, 'Ensure Ollama is running with `ollama serve`.', false);
|
|
129
|
+
}
|
|
130
|
+
return { provider: this.localProvider, name: 'ollama' };
|
|
131
|
+
case 'local-first':
|
|
132
|
+
// Try local first, fall back to cloud
|
|
133
|
+
if (this.localProvider && await this.localProvider.isAvailable()) {
|
|
134
|
+
return { provider: this.localProvider, name: 'ollama' };
|
|
135
|
+
}
|
|
136
|
+
if (this.cloudProvider) {
|
|
137
|
+
logger.info('Local provider unavailable, falling back to cloud');
|
|
138
|
+
return { provider: this.cloudProvider, name: cloudName() };
|
|
139
|
+
}
|
|
140
|
+
throw new CortexError(LLM_PROVIDER_UNAVAILABLE, 'high', 'llm', 'No LLM provider available.', { mode: this.mode }, 'Start Ollama or configure cloud API key.', false);
|
|
141
|
+
case 'hybrid':
|
|
142
|
+
// Use local for cheap/high-volume tasks, cloud for reasoning-heavy tasks
|
|
143
|
+
const cheapTasks = [
|
|
144
|
+
LLMTask.ENTITY_EXTRACTION,
|
|
145
|
+
LLMTask.CONTEXT_RANKING,
|
|
146
|
+
LLMTask.EMBEDDING_GENERATION,
|
|
147
|
+
];
|
|
148
|
+
if (cheapTasks.includes(task) && this.localProvider && await this.localProvider.isAvailable()) {
|
|
149
|
+
logger.debug('Hybrid routing to local provider', { task });
|
|
150
|
+
return { provider: this.localProvider, name: 'ollama' };
|
|
151
|
+
}
|
|
152
|
+
if (this.cloudProvider) {
|
|
153
|
+
logger.debug('Hybrid routing to cloud provider', { task });
|
|
154
|
+
return { provider: this.cloudProvider, name: cloudName() };
|
|
155
|
+
}
|
|
156
|
+
// Cloud unavailable — fall back to local for everything
|
|
157
|
+
if (this.localProvider) {
|
|
158
|
+
logger.warn('Cloud provider unavailable in hybrid mode, falling back to local', { task });
|
|
159
|
+
return { provider: this.localProvider, name: 'ollama' };
|
|
160
|
+
}
|
|
161
|
+
throw new CortexError(LLM_PROVIDER_UNAVAILABLE, 'high', 'llm', 'No LLM provider available.', { mode: this.mode }, 'Configure cloud API key or start Ollama.', false);
|
|
162
|
+
case 'cloud-first':
|
|
163
|
+
default:
|
|
164
|
+
// Prefer cloud, fall back to local if budget exhausted
|
|
165
|
+
if (this.cloudProvider && !this.tracker.isBudgetExhausted()) {
|
|
166
|
+
return { provider: this.cloudProvider, name: cloudName() };
|
|
167
|
+
}
|
|
168
|
+
if (this.localProvider && this.config.llm.budget.enforcementAction === 'fallback-local') {
|
|
169
|
+
logger.info('Budget exhausted or cloud unavailable, falling back to local');
|
|
170
|
+
return { provider: this.localProvider, name: 'ollama' };
|
|
171
|
+
}
|
|
172
|
+
if (!this.cloudProvider) {
|
|
173
|
+
throw new CortexError(LLM_PROVIDER_UNAVAILABLE, 'high', 'llm', 'Cloud provider not available.', { mode: this.mode }, 'Set your cloud API key or change LLM mode.', false);
|
|
174
|
+
}
|
|
175
|
+
// Cloud available but budget exhausted and no fallback
|
|
176
|
+
throw new CortexError(LLM_BUDGET_EXHAUSTED, 'high', 'llm', 'Monthly budget exhausted.', { spent: this.tracker.getCurrentMonthSpend() }, 'Increase budget, wait for next month, or enable local fallback.', false, 402);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
// Keep legacy provider getter for backward compatibility
|
|
180
|
+
get provider() {
|
|
181
|
+
return this.cloudProvider ?? this.localProvider;
|
|
182
|
+
}
|
|
183
|
+
async complete(request) {
|
|
184
|
+
// Select provider based on mode and task
|
|
185
|
+
const { provider, name: providerName } = await this.selectProvider(request.task, request.forceProvider);
|
|
186
|
+
// Check cache
|
|
187
|
+
if (request.contentHash) {
|
|
188
|
+
const cached = this.cache.get(request.contentHash, request.promptId, request.promptVersion);
|
|
189
|
+
if (cached) {
|
|
190
|
+
return {
|
|
191
|
+
content: cached.response,
|
|
192
|
+
model: cached.model,
|
|
193
|
+
inputTokens: cached.inputTokens,
|
|
194
|
+
outputTokens: cached.outputTokens,
|
|
195
|
+
cached: true,
|
|
196
|
+
latencyMs: 0,
|
|
197
|
+
costUsd: 0,
|
|
198
|
+
provider: providerName,
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
// Emit start event
|
|
203
|
+
const requestId = crypto.randomUUID();
|
|
204
|
+
eventBus.emit({
|
|
205
|
+
type: 'llm.request.start',
|
|
206
|
+
payload: { requestId, task: request.task, provider: providerName },
|
|
207
|
+
timestamp: new Date().toISOString(),
|
|
208
|
+
source: 'llm:router',
|
|
209
|
+
});
|
|
210
|
+
const startMs = performance.now();
|
|
211
|
+
const result = await provider.completeWithSystem(request.systemPrompt, request.userPrompt, {
|
|
212
|
+
temperature: request.temperature,
|
|
213
|
+
maxTokens: request.maxTokens,
|
|
214
|
+
}, request.modelPreference ?? 'primary');
|
|
215
|
+
const latencyMs = Math.round(performance.now() - startMs);
|
|
216
|
+
// Track tokens
|
|
217
|
+
const usageRecord = this.tracker.record(requestId, request.task, providerName, result.model, result.inputTokens, result.outputTokens, latencyMs);
|
|
218
|
+
// Cache the result
|
|
219
|
+
if (request.contentHash) {
|
|
220
|
+
this.cache.set(request.contentHash, request.promptId, request.promptVersion, result.content, result.model, result.inputTokens, result.outputTokens);
|
|
221
|
+
}
|
|
222
|
+
// Emit complete event
|
|
223
|
+
eventBus.emit({
|
|
224
|
+
type: 'llm.request.complete',
|
|
225
|
+
payload: {
|
|
226
|
+
requestId,
|
|
227
|
+
task: request.task,
|
|
228
|
+
provider: providerName,
|
|
229
|
+
model: result.model,
|
|
230
|
+
usage: {
|
|
231
|
+
inputTokens: result.inputTokens,
|
|
232
|
+
outputTokens: result.outputTokens,
|
|
233
|
+
estimatedCostUsd: usageRecord.estimatedCostUsd,
|
|
234
|
+
},
|
|
235
|
+
latencyMs,
|
|
236
|
+
},
|
|
237
|
+
timestamp: new Date().toISOString(),
|
|
238
|
+
source: 'llm:router',
|
|
239
|
+
});
|
|
240
|
+
return {
|
|
241
|
+
content: result.content,
|
|
242
|
+
model: result.model,
|
|
243
|
+
inputTokens: result.inputTokens,
|
|
244
|
+
outputTokens: result.outputTokens,
|
|
245
|
+
cached: false,
|
|
246
|
+
latencyMs,
|
|
247
|
+
costUsd: usageRecord.estimatedCostUsd,
|
|
248
|
+
provider: providerName,
|
|
249
|
+
};
|
|
250
|
+
}
|
|
251
|
+
async completeStructured(request, schema) {
|
|
252
|
+
const result = await this.complete(request);
|
|
253
|
+
try {
|
|
254
|
+
const data = parseStructuredOutput(result.content, schema);
|
|
255
|
+
// Step 9: local-first confidence escalation
|
|
256
|
+
// If the local provider returned low-confidence entities, retry with cloud
|
|
257
|
+
if (this.mode === 'local-first' &&
|
|
258
|
+
result.provider === 'ollama' &&
|
|
259
|
+
this.cloudProvider &&
|
|
260
|
+
!request.forceProvider) {
|
|
261
|
+
const entities = data.entities;
|
|
262
|
+
if (Array.isArray(entities) && entities.length > 0) {
|
|
263
|
+
const confidences = entities
|
|
264
|
+
.map((e) => (typeof e.confidence === 'number' ? e.confidence : null))
|
|
265
|
+
.filter((c) => c !== null)
|
|
266
|
+
.sort((a, b) => a - b);
|
|
267
|
+
if (confidences.length > 0) {
|
|
268
|
+
const mid = Math.floor(confidences.length / 2);
|
|
269
|
+
const median = confidences.length % 2 !== 0
|
|
270
|
+
? confidences[mid]
|
|
271
|
+
: ((confidences[mid - 1] + confidences[mid]) / 2);
|
|
272
|
+
if (median < 0.6) {
|
|
273
|
+
logger.info('Local confidence below threshold, escalating to cloud', {
|
|
274
|
+
median: Math.round(median * 100) / 100,
|
|
275
|
+
task: request.task,
|
|
276
|
+
});
|
|
277
|
+
const cloudResult = await this.complete({ ...request, forceProvider: 'cloud', contentHash: undefined });
|
|
278
|
+
const cloudData = parseStructuredOutput(cloudResult.content, schema);
|
|
279
|
+
return { ...cloudResult, data: cloudData };
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
return { ...result, data };
|
|
285
|
+
}
|
|
286
|
+
catch (firstErr) {
|
|
287
|
+
// Retry once with correction prompt
|
|
288
|
+
logger.warn('Structured output parse failed, retrying with correction', {
|
|
289
|
+
promptId: request.promptId,
|
|
290
|
+
error: firstErr instanceof Error ? firstErr.message : String(firstErr),
|
|
291
|
+
});
|
|
292
|
+
const correctedPrompt = buildCorrectionPrompt(request.userPrompt, result.content, firstErr instanceof Error ? firstErr.message : String(firstErr));
|
|
293
|
+
const retryResult = await this.complete({
|
|
294
|
+
...request,
|
|
295
|
+
userPrompt: correctedPrompt,
|
|
296
|
+
contentHash: undefined, // Don't cache correction attempts
|
|
297
|
+
});
|
|
298
|
+
const data = parseStructuredOutput(retryResult.content, schema);
|
|
299
|
+
return { ...retryResult, data };
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
async *stream(request) {
|
|
303
|
+
// Select provider based on mode and task
|
|
304
|
+
const { provider, name: providerName } = await this.selectProvider(request.task, request.forceProvider);
|
|
305
|
+
const requestId = crypto.randomUUID();
|
|
306
|
+
eventBus.emit({
|
|
307
|
+
type: 'llm.request.start',
|
|
308
|
+
payload: { requestId, task: request.task, provider: providerName },
|
|
309
|
+
timestamp: new Date().toISOString(),
|
|
310
|
+
source: 'llm:router',
|
|
311
|
+
});
|
|
312
|
+
const startMs = performance.now();
|
|
313
|
+
let fullContent = '';
|
|
314
|
+
const gen = provider.streamWithSystem(request.systemPrompt, request.userPrompt, {
|
|
315
|
+
temperature: request.temperature,
|
|
316
|
+
maxTokens: request.maxTokens,
|
|
317
|
+
}, request.modelPreference ?? 'primary');
|
|
318
|
+
let streamResult;
|
|
319
|
+
while (true) {
|
|
320
|
+
const { value, done } = await gen.next();
|
|
321
|
+
if (done) {
|
|
322
|
+
streamResult = value;
|
|
323
|
+
break;
|
|
324
|
+
}
|
|
325
|
+
fullContent += value;
|
|
326
|
+
yield value;
|
|
327
|
+
}
|
|
328
|
+
const latencyMs = Math.round(performance.now() - startMs);
|
|
329
|
+
const tokens = streamResult ?? { inputTokens: 0, outputTokens: 0, model: provider.getModel() };
|
|
330
|
+
const usageRecord = this.tracker.record(requestId, request.task, providerName, tokens.model, tokens.inputTokens, tokens.outputTokens, latencyMs);
|
|
331
|
+
eventBus.emit({
|
|
332
|
+
type: 'llm.request.complete',
|
|
333
|
+
payload: {
|
|
334
|
+
requestId,
|
|
335
|
+
task: request.task,
|
|
336
|
+
provider: providerName,
|
|
337
|
+
model: tokens.model,
|
|
338
|
+
usage: {
|
|
339
|
+
inputTokens: tokens.inputTokens,
|
|
340
|
+
outputTokens: tokens.outputTokens,
|
|
341
|
+
estimatedCostUsd: usageRecord.estimatedCostUsd,
|
|
342
|
+
},
|
|
343
|
+
latencyMs,
|
|
344
|
+
},
|
|
345
|
+
timestamp: new Date().toISOString(),
|
|
346
|
+
source: 'llm:router',
|
|
347
|
+
});
|
|
348
|
+
return {
|
|
349
|
+
content: fullContent,
|
|
350
|
+
model: tokens.model,
|
|
351
|
+
inputTokens: tokens.inputTokens,
|
|
352
|
+
outputTokens: tokens.outputTokens,
|
|
353
|
+
cached: false,
|
|
354
|
+
latencyMs,
|
|
355
|
+
costUsd: usageRecord.estimatedCostUsd,
|
|
356
|
+
provider: providerName,
|
|
357
|
+
};
|
|
358
|
+
}
|
|
359
|
+
getTracker() {
|
|
360
|
+
return this.tracker;
|
|
361
|
+
}
|
|
362
|
+
getCache() {
|
|
363
|
+
return this.cache;
|
|
364
|
+
}
|
|
365
|
+
getLocalProvider() {
|
|
366
|
+
return this.localProvider;
|
|
367
|
+
}
|
|
368
|
+
getCloudProvider() {
|
|
369
|
+
return this.cloudProvider;
|
|
370
|
+
}
|
|
371
|
+
getMode() {
|
|
372
|
+
return this.mode;
|
|
373
|
+
}
|
|
374
|
+
async isAvailable() {
|
|
375
|
+
// Return cached result if still valid
|
|
376
|
+
if (this.availabilityCache && Date.now() < this.availabilityCache.expiresAt) {
|
|
377
|
+
return this.availabilityCache.result;
|
|
378
|
+
}
|
|
379
|
+
let result;
|
|
380
|
+
switch (this.mode) {
|
|
381
|
+
case 'local-only':
|
|
382
|
+
result = await this.localProvider?.isAvailable() ?? false;
|
|
383
|
+
break;
|
|
384
|
+
case 'cloud-first':
|
|
385
|
+
result = await this.cloudProvider?.isAvailable() ?? false;
|
|
386
|
+
break;
|
|
387
|
+
default: {
|
|
388
|
+
// hybrid or local-first: either provider works
|
|
389
|
+
const localAvailable = await this.localProvider?.isAvailable() ?? false;
|
|
390
|
+
const cloudAvailable = await this.cloudProvider?.isAvailable() ?? false;
|
|
391
|
+
result = localAvailable || cloudAvailable;
|
|
392
|
+
break;
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
this.availabilityCache = { result, expiresAt: Date.now() + Router.AVAILABILITY_TTL_MS };
|
|
396
|
+
return result;
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
//# sourceMappingURL=router.js.map
|