whale-code 6.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +95 -0
- package/bin/swag-agent.js +9 -0
- package/bin/swagmanager-mcp.js +321 -0
- package/dist/cli/app.d.ts +26 -0
- package/dist/cli/app.js +64 -0
- package/dist/cli/chat/AgentSelector.d.ts +14 -0
- package/dist/cli/chat/AgentSelector.js +14 -0
- package/dist/cli/chat/ChatApp.d.ts +9 -0
- package/dist/cli/chat/ChatApp.js +267 -0
- package/dist/cli/chat/ChatInput.d.ts +39 -0
- package/dist/cli/chat/ChatInput.js +509 -0
- package/dist/cli/chat/MarkdownText.d.ts +10 -0
- package/dist/cli/chat/MarkdownText.js +20 -0
- package/dist/cli/chat/MessageList.d.ts +37 -0
- package/dist/cli/chat/MessageList.js +80 -0
- package/dist/cli/chat/ModelSelector.d.ts +20 -0
- package/dist/cli/chat/ModelSelector.js +73 -0
- package/dist/cli/chat/RewindViewer.d.ts +26 -0
- package/dist/cli/chat/RewindViewer.js +185 -0
- package/dist/cli/chat/StoreSelector.d.ts +14 -0
- package/dist/cli/chat/StoreSelector.js +24 -0
- package/dist/cli/chat/StreamingText.d.ts +12 -0
- package/dist/cli/chat/StreamingText.js +12 -0
- package/dist/cli/chat/SubagentPanel.d.ts +45 -0
- package/dist/cli/chat/SubagentPanel.js +110 -0
- package/dist/cli/chat/TeamPanel.d.ts +21 -0
- package/dist/cli/chat/TeamPanel.js +42 -0
- package/dist/cli/chat/ToolIndicator.d.ts +25 -0
- package/dist/cli/chat/ToolIndicator.js +436 -0
- package/dist/cli/chat/hooks/useAgentLoop.d.ts +39 -0
- package/dist/cli/chat/hooks/useAgentLoop.js +382 -0
- package/dist/cli/chat/hooks/useSlashCommands.d.ts +37 -0
- package/dist/cli/chat/hooks/useSlashCommands.js +387 -0
- package/dist/cli/commands/config-cmd.d.ts +10 -0
- package/dist/cli/commands/config-cmd.js +99 -0
- package/dist/cli/commands/doctor.d.ts +14 -0
- package/dist/cli/commands/doctor.js +172 -0
- package/dist/cli/commands/init.d.ts +16 -0
- package/dist/cli/commands/init.js +278 -0
- package/dist/cli/commands/mcp.d.ts +12 -0
- package/dist/cli/commands/mcp.js +162 -0
- package/dist/cli/login/LoginApp.d.ts +7 -0
- package/dist/cli/login/LoginApp.js +157 -0
- package/dist/cli/print-mode.d.ts +31 -0
- package/dist/cli/print-mode.js +202 -0
- package/dist/cli/serve-mode.d.ts +37 -0
- package/dist/cli/serve-mode.js +636 -0
- package/dist/cli/services/agent-definitions.d.ts +25 -0
- package/dist/cli/services/agent-definitions.js +91 -0
- package/dist/cli/services/agent-events.d.ts +178 -0
- package/dist/cli/services/agent-events.js +175 -0
- package/dist/cli/services/agent-loop.d.ts +90 -0
- package/dist/cli/services/agent-loop.js +762 -0
- package/dist/cli/services/agent-worker-base.d.ts +97 -0
- package/dist/cli/services/agent-worker-base.js +220 -0
- package/dist/cli/services/auth-service.d.ts +30 -0
- package/dist/cli/services/auth-service.js +160 -0
- package/dist/cli/services/background-processes.d.ts +126 -0
- package/dist/cli/services/background-processes.js +318 -0
- package/dist/cli/services/browser-auth.d.ts +24 -0
- package/dist/cli/services/browser-auth.js +180 -0
- package/dist/cli/services/claude-md-loader.d.ts +16 -0
- package/dist/cli/services/claude-md-loader.js +58 -0
- package/dist/cli/services/config-store.d.ts +47 -0
- package/dist/cli/services/config-store.js +79 -0
- package/dist/cli/services/debug-log.d.ts +10 -0
- package/dist/cli/services/debug-log.js +52 -0
- package/dist/cli/services/error-logger.d.ts +58 -0
- package/dist/cli/services/error-logger.js +269 -0
- package/dist/cli/services/file-history.d.ts +21 -0
- package/dist/cli/services/file-history.js +83 -0
- package/dist/cli/services/format-server-response.d.ts +16 -0
- package/dist/cli/services/format-server-response.js +440 -0
- package/dist/cli/services/git-context.d.ts +11 -0
- package/dist/cli/services/git-context.js +66 -0
- package/dist/cli/services/hooks.d.ts +85 -0
- package/dist/cli/services/hooks.js +258 -0
- package/dist/cli/services/interactive-tools.d.ts +125 -0
- package/dist/cli/services/interactive-tools.js +260 -0
- package/dist/cli/services/keybinding-manager.d.ts +52 -0
- package/dist/cli/services/keybinding-manager.js +115 -0
- package/dist/cli/services/local-tools.d.ts +22 -0
- package/dist/cli/services/local-tools.js +697 -0
- package/dist/cli/services/lsp-manager.d.ts +18 -0
- package/dist/cli/services/lsp-manager.js +717 -0
- package/dist/cli/services/mcp-client.d.ts +48 -0
- package/dist/cli/services/mcp-client.js +157 -0
- package/dist/cli/services/memory-manager.d.ts +16 -0
- package/dist/cli/services/memory-manager.js +57 -0
- package/dist/cli/services/model-manager.d.ts +18 -0
- package/dist/cli/services/model-manager.js +71 -0
- package/dist/cli/services/model-router.d.ts +26 -0
- package/dist/cli/services/model-router.js +149 -0
- package/dist/cli/services/permission-modes.d.ts +13 -0
- package/dist/cli/services/permission-modes.js +43 -0
- package/dist/cli/services/rewind.d.ts +84 -0
- package/dist/cli/services/rewind.js +194 -0
- package/dist/cli/services/ripgrep.d.ts +28 -0
- package/dist/cli/services/ripgrep.js +138 -0
- package/dist/cli/services/sandbox.d.ts +29 -0
- package/dist/cli/services/sandbox.js +97 -0
- package/dist/cli/services/server-tools.d.ts +61 -0
- package/dist/cli/services/server-tools.js +543 -0
- package/dist/cli/services/session-persistence.d.ts +23 -0
- package/dist/cli/services/session-persistence.js +99 -0
- package/dist/cli/services/subagent-worker.d.ts +19 -0
- package/dist/cli/services/subagent-worker.js +41 -0
- package/dist/cli/services/subagent.d.ts +47 -0
- package/dist/cli/services/subagent.js +647 -0
- package/dist/cli/services/system-prompt.d.ts +7 -0
- package/dist/cli/services/system-prompt.js +198 -0
- package/dist/cli/services/team-lead.d.ts +73 -0
- package/dist/cli/services/team-lead.js +512 -0
- package/dist/cli/services/team-state.d.ts +77 -0
- package/dist/cli/services/team-state.js +398 -0
- package/dist/cli/services/teammate.d.ts +31 -0
- package/dist/cli/services/teammate.js +689 -0
- package/dist/cli/services/telemetry.d.ts +61 -0
- package/dist/cli/services/telemetry.js +209 -0
- package/dist/cli/services/tools/agent-tools.d.ts +14 -0
- package/dist/cli/services/tools/agent-tools.js +347 -0
- package/dist/cli/services/tools/file-ops.d.ts +15 -0
- package/dist/cli/services/tools/file-ops.js +487 -0
- package/dist/cli/services/tools/search-tools.d.ts +8 -0
- package/dist/cli/services/tools/search-tools.js +186 -0
- package/dist/cli/services/tools/shell-exec.d.ts +10 -0
- package/dist/cli/services/tools/shell-exec.js +168 -0
- package/dist/cli/services/tools/task-manager.d.ts +28 -0
- package/dist/cli/services/tools/task-manager.js +209 -0
- package/dist/cli/services/tools/web-tools.d.ts +11 -0
- package/dist/cli/services/tools/web-tools.js +395 -0
- package/dist/cli/setup/SetupApp.d.ts +9 -0
- package/dist/cli/setup/SetupApp.js +191 -0
- package/dist/cli/shared/MatrixIntro.d.ts +4 -0
- package/dist/cli/shared/MatrixIntro.js +83 -0
- package/dist/cli/shared/Theme.d.ts +74 -0
- package/dist/cli/shared/Theme.js +127 -0
- package/dist/cli/shared/WhaleBanner.d.ts +10 -0
- package/dist/cli/shared/WhaleBanner.js +12 -0
- package/dist/cli/shared/markdown.d.ts +21 -0
- package/dist/cli/shared/markdown.js +756 -0
- package/dist/cli/status/StatusApp.d.ts +4 -0
- package/dist/cli/status/StatusApp.js +105 -0
- package/dist/cli/stores/StoreApp.d.ts +7 -0
- package/dist/cli/stores/StoreApp.js +81 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.js +538 -0
- package/dist/local-agent/connection.d.ts +48 -0
- package/dist/local-agent/connection.js +332 -0
- package/dist/local-agent/discovery.d.ts +18 -0
- package/dist/local-agent/discovery.js +146 -0
- package/dist/local-agent/executor.d.ts +34 -0
- package/dist/local-agent/executor.js +241 -0
- package/dist/local-agent/index.d.ts +14 -0
- package/dist/local-agent/index.js +198 -0
- package/dist/node/adapters/base.d.ts +35 -0
- package/dist/node/adapters/base.js +10 -0
- package/dist/node/adapters/discord.d.ts +29 -0
- package/dist/node/adapters/discord.js +299 -0
- package/dist/node/adapters/email.d.ts +23 -0
- package/dist/node/adapters/email.js +218 -0
- package/dist/node/adapters/imessage.d.ts +17 -0
- package/dist/node/adapters/imessage.js +118 -0
- package/dist/node/adapters/slack.d.ts +26 -0
- package/dist/node/adapters/slack.js +259 -0
- package/dist/node/adapters/sms.d.ts +23 -0
- package/dist/node/adapters/sms.js +161 -0
- package/dist/node/adapters/telegram.d.ts +17 -0
- package/dist/node/adapters/telegram.js +101 -0
- package/dist/node/adapters/webchat.d.ts +27 -0
- package/dist/node/adapters/webchat.js +160 -0
- package/dist/node/adapters/whatsapp.d.ts +28 -0
- package/dist/node/adapters/whatsapp.js +230 -0
- package/dist/node/cli.d.ts +2 -0
- package/dist/node/cli.js +325 -0
- package/dist/node/config.d.ts +17 -0
- package/dist/node/config.js +31 -0
- package/dist/node/runtime.d.ts +50 -0
- package/dist/node/runtime.js +351 -0
- package/dist/server/handlers/__test-utils__/mock-supabase.d.ts +11 -0
- package/dist/server/handlers/__test-utils__/mock-supabase.js +393 -0
- package/dist/server/handlers/analytics.d.ts +17 -0
- package/dist/server/handlers/analytics.js +266 -0
- package/dist/server/handlers/api-keys.d.ts +6 -0
- package/dist/server/handlers/api-keys.js +221 -0
- package/dist/server/handlers/billing.d.ts +33 -0
- package/dist/server/handlers/billing.js +272 -0
- package/dist/server/handlers/browser.d.ts +10 -0
- package/dist/server/handlers/browser.js +517 -0
- package/dist/server/handlers/catalog.d.ts +99 -0
- package/dist/server/handlers/catalog.js +976 -0
- package/dist/server/handlers/comms.d.ts +254 -0
- package/dist/server/handlers/comms.js +588 -0
- package/dist/server/handlers/creations.d.ts +6 -0
- package/dist/server/handlers/creations.js +479 -0
- package/dist/server/handlers/crm.d.ts +89 -0
- package/dist/server/handlers/crm.js +538 -0
- package/dist/server/handlers/discovery.d.ts +6 -0
- package/dist/server/handlers/discovery.js +288 -0
- package/dist/server/handlers/embeddings.d.ts +92 -0
- package/dist/server/handlers/embeddings.js +197 -0
- package/dist/server/handlers/enrichment.d.ts +8 -0
- package/dist/server/handlers/enrichment.js +768 -0
- package/dist/server/handlers/image-gen.d.ts +6 -0
- package/dist/server/handlers/image-gen.js +409 -0
- package/dist/server/handlers/inventory.d.ts +319 -0
- package/dist/server/handlers/inventory.js +447 -0
- package/dist/server/handlers/kali.d.ts +10 -0
- package/dist/server/handlers/kali.js +210 -0
- package/dist/server/handlers/llm-providers.d.ts +6 -0
- package/dist/server/handlers/llm-providers.js +673 -0
- package/dist/server/handlers/local-agent.d.ts +6 -0
- package/dist/server/handlers/local-agent.js +118 -0
- package/dist/server/handlers/meta-ads.d.ts +111 -0
- package/dist/server/handlers/meta-ads.js +2279 -0
- package/dist/server/handlers/nodes.d.ts +33 -0
- package/dist/server/handlers/nodes.js +699 -0
- package/dist/server/handlers/operations.d.ts +138 -0
- package/dist/server/handlers/operations.js +131 -0
- package/dist/server/handlers/platform.d.ts +23 -0
- package/dist/server/handlers/platform.js +227 -0
- package/dist/server/handlers/supply-chain.d.ts +19 -0
- package/dist/server/handlers/supply-chain.js +327 -0
- package/dist/server/handlers/transcription.d.ts +17 -0
- package/dist/server/handlers/transcription.js +121 -0
- package/dist/server/handlers/video-gen.d.ts +6 -0
- package/dist/server/handlers/video-gen.js +466 -0
- package/dist/server/handlers/voice.d.ts +8 -0
- package/dist/server/handlers/voice.js +1146 -0
- package/dist/server/handlers/workflow-steps.d.ts +86 -0
- package/dist/server/handlers/workflow-steps.js +2349 -0
- package/dist/server/handlers/workflows.d.ts +7 -0
- package/dist/server/handlers/workflows.js +989 -0
- package/dist/server/index.d.ts +1 -0
- package/dist/server/index.js +2427 -0
- package/dist/server/lib/batch-client.d.ts +80 -0
- package/dist/server/lib/batch-client.js +467 -0
- package/dist/server/lib/code-worker-pool.d.ts +31 -0
- package/dist/server/lib/code-worker-pool.js +224 -0
- package/dist/server/lib/code-worker.d.ts +1 -0
- package/dist/server/lib/code-worker.js +188 -0
- package/dist/server/lib/compaction-service.d.ts +32 -0
- package/dist/server/lib/compaction-service.js +162 -0
- package/dist/server/lib/logger.d.ts +19 -0
- package/dist/server/lib/logger.js +46 -0
- package/dist/server/lib/otel.d.ts +38 -0
- package/dist/server/lib/otel.js +126 -0
- package/dist/server/lib/pg-rate-limiter.d.ts +21 -0
- package/dist/server/lib/pg-rate-limiter.js +86 -0
- package/dist/server/lib/prompt-sanitizer.d.ts +37 -0
- package/dist/server/lib/prompt-sanitizer.js +177 -0
- package/dist/server/lib/provider-capabilities.d.ts +85 -0
- package/dist/server/lib/provider-capabilities.js +190 -0
- package/dist/server/lib/provider-failover.d.ts +74 -0
- package/dist/server/lib/provider-failover.js +210 -0
- package/dist/server/lib/rate-limiter.d.ts +39 -0
- package/dist/server/lib/rate-limiter.js +147 -0
- package/dist/server/lib/server-agent-loop.d.ts +107 -0
- package/dist/server/lib/server-agent-loop.js +667 -0
- package/dist/server/lib/server-subagent.d.ts +78 -0
- package/dist/server/lib/server-subagent.js +203 -0
- package/dist/server/lib/session-checkpoint.d.ts +51 -0
- package/dist/server/lib/session-checkpoint.js +145 -0
- package/dist/server/lib/ssrf-guard.d.ts +13 -0
- package/dist/server/lib/ssrf-guard.js +240 -0
- package/dist/server/lib/supabase-client.d.ts +7 -0
- package/dist/server/lib/supabase-client.js +78 -0
- package/dist/server/lib/template-resolver.d.ts +31 -0
- package/dist/server/lib/template-resolver.js +215 -0
- package/dist/server/lib/utils.d.ts +16 -0
- package/dist/server/lib/utils.js +147 -0
- package/dist/server/local-agent-gateway.d.ts +82 -0
- package/dist/server/local-agent-gateway.js +426 -0
- package/dist/server/providers/anthropic.d.ts +20 -0
- package/dist/server/providers/anthropic.js +199 -0
- package/dist/server/providers/bedrock.d.ts +20 -0
- package/dist/server/providers/bedrock.js +194 -0
- package/dist/server/providers/gemini.d.ts +24 -0
- package/dist/server/providers/gemini.js +486 -0
- package/dist/server/providers/openai.d.ts +24 -0
- package/dist/server/providers/openai.js +522 -0
- package/dist/server/providers/registry.d.ts +32 -0
- package/dist/server/providers/registry.js +58 -0
- package/dist/server/providers/shared.d.ts +32 -0
- package/dist/server/providers/shared.js +124 -0
- package/dist/server/providers/types.d.ts +92 -0
- package/dist/server/providers/types.js +12 -0
- package/dist/server/proxy-handlers.d.ts +6 -0
- package/dist/server/proxy-handlers.js +89 -0
- package/dist/server/tool-router.d.ts +149 -0
- package/dist/server/tool-router.js +803 -0
- package/dist/server/validation.d.ts +24 -0
- package/dist/server/validation.js +301 -0
- package/dist/server/worker.d.ts +19 -0
- package/dist/server/worker.js +201 -0
- package/dist/setup.d.ts +8 -0
- package/dist/setup.js +181 -0
- package/dist/shared/agent-core.d.ts +157 -0
- package/dist/shared/agent-core.js +534 -0
- package/dist/shared/anthropic-types.d.ts +105 -0
- package/dist/shared/anthropic-types.js +7 -0
- package/dist/shared/api-client.d.ts +90 -0
- package/dist/shared/api-client.js +379 -0
- package/dist/shared/constants.d.ts +33 -0
- package/dist/shared/constants.js +80 -0
- package/dist/shared/sse-parser.d.ts +26 -0
- package/dist/shared/sse-parser.js +259 -0
- package/dist/shared/tool-dispatch.d.ts +52 -0
- package/dist/shared/tool-dispatch.js +191 -0
- package/dist/shared/types.d.ts +72 -0
- package/dist/shared/types.js +7 -0
- package/dist/updater.d.ts +25 -0
- package/dist/updater.js +140 -0
- package/dist/webchat/widget.d.ts +0 -0
- package/dist/webchat/widget.js +397 -0
- package/package.json +95 -0
- package/src/cli/services/builtin-skills/commit.md +19 -0
- package/src/cli/services/builtin-skills/review-pr.md +21 -0
- package/src/cli/services/builtin-skills/review.md +18 -0
|
@@ -0,0 +1,673 @@
|
|
|
1
|
+
// server/handlers/llm-providers.ts — Multi-provider LLM with automatic failover
|
|
2
|
+
// Providers: Anthropic, OpenAI, Google, AWS Bedrock, Ollama (local)
|
|
3
|
+
import Anthropic from "@anthropic-ai/sdk";
|
|
4
|
+
import OpenAI from "openai";
|
|
5
|
+
import { GoogleGenAI } from "@google/genai";
|
|
6
|
+
import { BedrockRuntimeClient, InvokeModelCommand } from "@aws-sdk/client-bedrock-runtime";
|
|
7
|
+
import { MODELS } from "../../shared/constants.js";
|
|
8
|
+
// ============================================================================
|
|
9
|
+
// STATIC MODEL LISTS
|
|
10
|
+
// ============================================================================
|
|
11
|
+
const ANTHROPIC_MODELS = [
|
|
12
|
+
{ model_id: "claude-sonnet-4-6", name: "Claude Sonnet 4.6", context_window: 200000 },
|
|
13
|
+
{ model_id: "claude-opus-4-6", name: "Claude Opus 4.6", context_window: 200000 },
|
|
14
|
+
{ model_id: "claude-sonnet-4-5-20250929", name: "Claude Sonnet 4.5", context_window: 200000 },
|
|
15
|
+
{ model_id: "claude-sonnet-4-20250514", name: "Claude Sonnet 4", context_window: 200000 },
|
|
16
|
+
{ model_id: "claude-haiku-4-5-20251001", name: "Claude Haiku 4.5", context_window: 200000 },
|
|
17
|
+
{ model_id: "claude-3-5-sonnet-20241022", name: "Claude 3.5 Sonnet", context_window: 200000 },
|
|
18
|
+
];
|
|
19
|
+
const OPENAI_MODELS = [
|
|
20
|
+
{ model_id: "gpt-4o", name: "GPT-4o", context_window: 128000 },
|
|
21
|
+
{ model_id: "gpt-4o-mini", name: "GPT-4o Mini", context_window: 128000 },
|
|
22
|
+
{ model_id: "gpt-4-turbo", name: "GPT-4 Turbo", context_window: 128000 },
|
|
23
|
+
{ model_id: "o1", name: "o1", context_window: 200000 },
|
|
24
|
+
{ model_id: "o1-mini", name: "o1-mini", context_window: 128000 },
|
|
25
|
+
{ model_id: "o3-mini", name: "o3-mini", context_window: 200000 },
|
|
26
|
+
];
|
|
27
|
+
const BEDROCK_MODELS = [
|
|
28
|
+
{ model_id: "anthropic.claude-sonnet-4-6", name: "Claude Sonnet 4.6 (Bedrock)", context_window: 200000 },
|
|
29
|
+
{ model_id: "us.anthropic.claude-sonnet-4-20250514-v1:0", name: "Claude Sonnet 4 (Bedrock)", context_window: 200000 },
|
|
30
|
+
{ model_id: "us.anthropic.claude-sonnet-4-5-20250929-v1:0", name: "Claude Sonnet 4.5 (Bedrock)", context_window: 200000 },
|
|
31
|
+
{ model_id: "us.anthropic.claude-haiku-4-5-20251001-v1:0", name: "Claude Haiku 4.5 (Bedrock)", context_window: 200000 },
|
|
32
|
+
{ model_id: "us.anthropic.claude-3-5-haiku-20241022-v1:0", name: "Claude 3.5 Haiku (Bedrock)", context_window: 200000 },
|
|
33
|
+
{ model_id: "us.meta.llama3-1-70b-instruct-v1:0", name: "Llama 3.1 70B (Bedrock)", context_window: 128000 },
|
|
34
|
+
{ model_id: "us.amazon.nova-pro-v1:0", name: "Amazon Nova Pro (Bedrock)", context_window: 300000 },
|
|
35
|
+
];
|
|
36
|
+
const GOOGLE_MODELS = [
|
|
37
|
+
{ model_id: "gemini-3-pro-preview", name: "Gemini 3 Pro", context_window: 1048576 },
|
|
38
|
+
{ model_id: "gemini-3-flash-preview", name: "Gemini 3 Flash", context_window: 1048576 },
|
|
39
|
+
{ model_id: "gemini-2.5-pro", name: "Gemini 2.5 Pro", context_window: 1048576 },
|
|
40
|
+
{ model_id: "gemini-2.5-flash", name: "Gemini 2.5 Flash", context_window: 1048576 },
|
|
41
|
+
{ model_id: "gemini-2.5-flash-lite", name: "Gemini 2.5 Flash Lite", context_window: 1048576 },
|
|
42
|
+
{ model_id: "gemini-2.0-flash", name: "Gemini 2.0 Flash", context_window: 1048576 },
|
|
43
|
+
{ model_id: "gemini-2.0-flash-lite", name: "Gemini 2.0 Flash Lite", context_window: 1048576 },
|
|
44
|
+
];
|
|
45
|
+
const OLLAMA_MODELS = [
|
|
46
|
+
{ model_id: "llama3.2", name: "Llama 3.2 (local)", context_window: 128000 },
|
|
47
|
+
{ model_id: "mistral", name: "Mistral (local)", context_window: 32000 },
|
|
48
|
+
{ model_id: "codellama", name: "Code Llama (local)", context_window: 16000 },
|
|
49
|
+
{ model_id: "phi3", name: "Phi-3 (local)", context_window: 128000 },
|
|
50
|
+
];
|
|
51
|
+
// ============================================================================
|
|
52
|
+
// CLIENT CACHE — reuse SDK clients to leverage internal connection pooling
|
|
53
|
+
// ============================================================================
|
|
54
|
+
const clientCache = new Map();
|
|
55
|
+
const CLIENT_CACHE_TTL = 300_000; // 5 minutes
|
|
56
|
+
function getCachedClient(provider, apiKey, createFn) {
|
|
57
|
+
const key = `${provider}:${apiKey.slice(-8)}`;
|
|
58
|
+
const cached = clientCache.get(key);
|
|
59
|
+
if (cached && Date.now() - cached.createdAt < CLIENT_CACHE_TTL)
|
|
60
|
+
return cached.client;
|
|
61
|
+
const client = createFn();
|
|
62
|
+
clientCache.set(key, { client, createdAt: Date.now() });
|
|
63
|
+
// LRU eviction: drop oldest entry if cache grows too large
|
|
64
|
+
if (clientCache.size > 20) {
|
|
65
|
+
const oldest = clientCache.keys().next().value;
|
|
66
|
+
if (oldest)
|
|
67
|
+
clientCache.delete(oldest);
|
|
68
|
+
}
|
|
69
|
+
return client;
|
|
70
|
+
}
|
|
71
|
+
// ============================================================================
|
|
72
|
+
// CREDENTIAL CACHE — avoid repeated DB round-trips for the same store
|
|
73
|
+
// ============================================================================
|
|
74
|
+
const credentialCache = new Map();
|
|
75
|
+
const CREDENTIAL_CACHE_TTL = 60_000; // 1 minute
|
|
76
|
+
// ============================================================================
|
|
77
|
+
// CREDENTIAL RESOLUTION
|
|
78
|
+
// ============================================================================
|
|
79
|
+
async function getCredentials(sb, storeId) {
|
|
80
|
+
// Check credential cache first
|
|
81
|
+
const cacheKey = `creds:${storeId}`;
|
|
82
|
+
const cached = credentialCache.get(cacheKey);
|
|
83
|
+
if (cached && Date.now() < cached.expiresAt)
|
|
84
|
+
return cached.data;
|
|
85
|
+
const creds = await _fetchCredentials(sb, storeId);
|
|
86
|
+
// Cache for CREDENTIAL_CACHE_TTL
|
|
87
|
+
credentialCache.set(cacheKey, { data: creds, expiresAt: Date.now() + CREDENTIAL_CACHE_TTL });
|
|
88
|
+
// LRU eviction: drop oldest entry if cache grows too large
|
|
89
|
+
if (credentialCache.size > 50) {
|
|
90
|
+
const oldest = credentialCache.keys().next().value;
|
|
91
|
+
if (oldest)
|
|
92
|
+
credentialCache.delete(oldest);
|
|
93
|
+
}
|
|
94
|
+
return creds;
|
|
95
|
+
}
|
|
96
|
+
async function _fetchCredentials(sb, storeId) {
|
|
97
|
+
const creds = {};
|
|
98
|
+
// Anthropic — available via process.env on the server
|
|
99
|
+
const anthropicKey = process.env.ANTHROPIC_API_KEY;
|
|
100
|
+
if (anthropicKey) {
|
|
101
|
+
creds.anthropic = { apiKey: anthropicKey };
|
|
102
|
+
}
|
|
103
|
+
// OpenAI — from user_tool_secrets (encrypted)
|
|
104
|
+
try {
|
|
105
|
+
const { data: openaiKey } = await sb.rpc("decrypt_secret", { p_name: "OPENAI_API_KEY", p_store_id: storeId });
|
|
106
|
+
if (openaiKey) {
|
|
107
|
+
creds.openai = { apiKey: openaiKey };
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
catch { /* not configured */ }
|
|
111
|
+
// Google AI — from user_tool_secrets (encrypted)
|
|
112
|
+
try {
|
|
113
|
+
const { data: googleKey } = await sb.rpc("decrypt_secret", { p_name: "GOOGLE_AI_API_KEY", p_store_id: storeId });
|
|
114
|
+
if (googleKey) {
|
|
115
|
+
creds.google = { apiKey: googleKey };
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
catch { /* not configured */ }
|
|
119
|
+
// AWS Bedrock — from user_tool_secrets
|
|
120
|
+
try {
|
|
121
|
+
const [accessKeyRes, secretKeyRes, regionRes] = await Promise.all([
|
|
122
|
+
sb.rpc("decrypt_secret", { p_name: "AWS_ACCESS_KEY_ID", p_store_id: storeId }),
|
|
123
|
+
sb.rpc("decrypt_secret", { p_name: "AWS_SECRET_ACCESS_KEY", p_store_id: storeId }),
|
|
124
|
+
sb.rpc("decrypt_secret", { p_name: "AWS_REGION", p_store_id: storeId }),
|
|
125
|
+
]);
|
|
126
|
+
if (accessKeyRes.data && secretKeyRes.data) {
|
|
127
|
+
creds.bedrock = {
|
|
128
|
+
accessKeyId: accessKeyRes.data,
|
|
129
|
+
secretAccessKey: secretKeyRes.data,
|
|
130
|
+
region: regionRes.data || "us-east-1",
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
catch { /* not configured */ }
|
|
135
|
+
// P1 FIX: Only check Ollama when explicitly enabled (saves +2s on every credential load in prod)
|
|
136
|
+
if (process.env.OLLAMA_ENABLED === "true") {
|
|
137
|
+
try {
|
|
138
|
+
const controller = new AbortController();
|
|
139
|
+
const timer = setTimeout(() => controller.abort(), 2000);
|
|
140
|
+
const resp = await fetch("http://localhost:11434/api/version", { signal: controller.signal });
|
|
141
|
+
clearTimeout(timer);
|
|
142
|
+
if (resp.ok) {
|
|
143
|
+
creds.ollama = { available: true };
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
catch { /* not available */ }
|
|
147
|
+
}
|
|
148
|
+
return creds;
|
|
149
|
+
}
|
|
150
|
+
// ============================================================================
|
|
151
|
+
// PROVIDER IMPLEMENTATIONS
|
|
152
|
+
// ============================================================================
|
|
153
|
+
const PROVIDER_TIMEOUT_MS = 120_000;
|
|
154
|
+
async function callAnthropic(apiKey, prompt, model, maxTokens, temperature, systemPrompt) {
|
|
155
|
+
const client = getCachedClient("anthropic", apiKey, () => new Anthropic({ apiKey }));
|
|
156
|
+
const selectedModel = model || MODELS.SONNET;
|
|
157
|
+
const controller = new AbortController();
|
|
158
|
+
const timer = setTimeout(() => controller.abort(), PROVIDER_TIMEOUT_MS);
|
|
159
|
+
try {
|
|
160
|
+
const resp = await client.messages.create({
|
|
161
|
+
model: selectedModel,
|
|
162
|
+
max_tokens: maxTokens,
|
|
163
|
+
temperature,
|
|
164
|
+
...(systemPrompt ? { system: systemPrompt } : {}),
|
|
165
|
+
messages: [{ role: "user", content: prompt }],
|
|
166
|
+
}, { signal: controller.signal });
|
|
167
|
+
clearTimeout(timer);
|
|
168
|
+
const text = resp.content
|
|
169
|
+
.filter((b) => b.type === "text")
|
|
170
|
+
.map((b) => b.text)
|
|
171
|
+
.join("\n");
|
|
172
|
+
return {
|
|
173
|
+
provider: "anthropic",
|
|
174
|
+
model: selectedModel,
|
|
175
|
+
text,
|
|
176
|
+
tokens: {
|
|
177
|
+
input: resp.usage.input_tokens,
|
|
178
|
+
output: resp.usage.output_tokens,
|
|
179
|
+
},
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
catch (err) {
|
|
183
|
+
clearTimeout(timer);
|
|
184
|
+
throw err;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
async function callOpenAI(apiKey, prompt, model, maxTokens, temperature, systemPrompt) {
|
|
188
|
+
const client = getCachedClient("openai", apiKey, () => new OpenAI({ apiKey }));
|
|
189
|
+
const selectedModel = model || "gpt-4o";
|
|
190
|
+
const controller = new AbortController();
|
|
191
|
+
const timer = setTimeout(() => controller.abort(), PROVIDER_TIMEOUT_MS);
|
|
192
|
+
try {
|
|
193
|
+
const messages = [];
|
|
194
|
+
if (systemPrompt) {
|
|
195
|
+
messages.push({ role: "system", content: systemPrompt });
|
|
196
|
+
}
|
|
197
|
+
messages.push({ role: "user", content: prompt });
|
|
198
|
+
const resp = await client.chat.completions.create({
|
|
199
|
+
model: selectedModel,
|
|
200
|
+
max_tokens: maxTokens,
|
|
201
|
+
temperature,
|
|
202
|
+
messages,
|
|
203
|
+
}, { signal: controller.signal });
|
|
204
|
+
clearTimeout(timer);
|
|
205
|
+
const text = resp.choices[0]?.message?.content || "";
|
|
206
|
+
return {
|
|
207
|
+
provider: "openai",
|
|
208
|
+
model: selectedModel,
|
|
209
|
+
text,
|
|
210
|
+
tokens: {
|
|
211
|
+
input: resp.usage?.prompt_tokens || 0,
|
|
212
|
+
output: resp.usage?.completion_tokens || 0,
|
|
213
|
+
},
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
catch (err) {
|
|
217
|
+
clearTimeout(timer);
|
|
218
|
+
throw err;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
async function callGoogle(apiKey, prompt, model, maxTokens, temperature, systemPrompt) {
|
|
222
|
+
const client = getCachedClient("google", apiKey, () => new GoogleGenAI({ apiKey }));
|
|
223
|
+
const selectedModel = model || "gemini-2.5-flash";
|
|
224
|
+
// Thinking models (Pro/2.5+) use thinking tokens that count against maxOutputTokens.
|
|
225
|
+
// Enforce a minimum of 2048 so thinking doesn't consume the entire budget.
|
|
226
|
+
const isThinkingModel = /pro|2\.5|3-/.test(selectedModel);
|
|
227
|
+
const effectiveMaxTokens = isThinkingModel ? Math.max(maxTokens, 2048) : maxTokens;
|
|
228
|
+
const controller = new AbortController();
|
|
229
|
+
const timer = setTimeout(() => controller.abort(), PROVIDER_TIMEOUT_MS);
|
|
230
|
+
try {
|
|
231
|
+
const response = await client.models.generateContent({
|
|
232
|
+
model: selectedModel,
|
|
233
|
+
contents: prompt,
|
|
234
|
+
config: {
|
|
235
|
+
maxOutputTokens: effectiveMaxTokens,
|
|
236
|
+
temperature,
|
|
237
|
+
...(systemPrompt ? { systemInstruction: systemPrompt } : {}),
|
|
238
|
+
abortSignal: controller.signal,
|
|
239
|
+
},
|
|
240
|
+
});
|
|
241
|
+
clearTimeout(timer);
|
|
242
|
+
const text = response.text ?? "";
|
|
243
|
+
const usage = response.usageMetadata;
|
|
244
|
+
return {
|
|
245
|
+
provider: "google",
|
|
246
|
+
model: selectedModel,
|
|
247
|
+
text,
|
|
248
|
+
tokens: {
|
|
249
|
+
input: usage?.promptTokenCount || 0,
|
|
250
|
+
output: usage?.candidatesTokenCount || 0,
|
|
251
|
+
},
|
|
252
|
+
};
|
|
253
|
+
}
|
|
254
|
+
catch (err) {
|
|
255
|
+
clearTimeout(timer);
|
|
256
|
+
throw err;
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
async function callBedrock(accessKeyId, secretAccessKey, region, prompt, model, maxTokens, temperature, systemPrompt) {
|
|
260
|
+
const client = getCachedClient(`bedrock:${region || "us-east-1"}`, accessKeyId, () => new BedrockRuntimeClient({
|
|
261
|
+
region: region || "us-east-1",
|
|
262
|
+
credentials: { accessKeyId, secretAccessKey },
|
|
263
|
+
}));
|
|
264
|
+
const selectedModel = model || "us.anthropic.claude-3-5-haiku-20241022-v1:0";
|
|
265
|
+
// P1 FIX: Detect model family and format body accordingly
|
|
266
|
+
const isAnthropicModel = /anthropic|claude/i.test(selectedModel);
|
|
267
|
+
const isTitanModel = /titan/i.test(selectedModel);
|
|
268
|
+
const isLlama = /llama|meta/i.test(selectedModel);
|
|
269
|
+
const isMistral = /mistral/i.test(selectedModel);
|
|
270
|
+
let body;
|
|
271
|
+
if (isAnthropicModel) {
|
|
272
|
+
body = JSON.stringify({
|
|
273
|
+
anthropic_version: "bedrock-2023-05-31",
|
|
274
|
+
max_tokens: maxTokens,
|
|
275
|
+
temperature,
|
|
276
|
+
...(systemPrompt ? { system: systemPrompt } : {}),
|
|
277
|
+
messages: [{ role: "user", content: prompt }],
|
|
278
|
+
});
|
|
279
|
+
}
|
|
280
|
+
else if (isTitanModel) {
|
|
281
|
+
body = JSON.stringify({
|
|
282
|
+
inputText: systemPrompt ? `${systemPrompt}\n\n${prompt}` : prompt,
|
|
283
|
+
textGenerationConfig: { maxTokenCount: maxTokens, temperature },
|
|
284
|
+
});
|
|
285
|
+
}
|
|
286
|
+
else if (isLlama || isMistral) {
|
|
287
|
+
body = JSON.stringify({
|
|
288
|
+
prompt: systemPrompt ? `<s>[INST] ${systemPrompt}\n\n${prompt} [/INST]` : `<s>[INST] ${prompt} [/INST]`,
|
|
289
|
+
max_gen_len: maxTokens,
|
|
290
|
+
temperature,
|
|
291
|
+
});
|
|
292
|
+
}
|
|
293
|
+
else {
|
|
294
|
+
console.warn(`[bedrock] Unknown model family for "${selectedModel}", defaulting to Anthropic format`);
|
|
295
|
+
body = JSON.stringify({
|
|
296
|
+
anthropic_version: "bedrock-2023-05-31",
|
|
297
|
+
max_tokens: maxTokens,
|
|
298
|
+
temperature,
|
|
299
|
+
...(systemPrompt ? { system: systemPrompt } : {}),
|
|
300
|
+
messages: [{ role: "user", content: prompt }],
|
|
301
|
+
});
|
|
302
|
+
}
|
|
303
|
+
const command = new InvokeModelCommand({
|
|
304
|
+
modelId: selectedModel,
|
|
305
|
+
contentType: "application/json",
|
|
306
|
+
accept: "application/json",
|
|
307
|
+
body: new TextEncoder().encode(body),
|
|
308
|
+
});
|
|
309
|
+
// AbortController for timeout
|
|
310
|
+
const controller = new AbortController();
|
|
311
|
+
const timer = setTimeout(() => controller.abort(), PROVIDER_TIMEOUT_MS);
|
|
312
|
+
try {
|
|
313
|
+
const resp = await client.send(command, { abortSignal: controller.signal });
|
|
314
|
+
clearTimeout(timer);
|
|
315
|
+
const respBody = JSON.parse(new TextDecoder().decode(resp.body));
|
|
316
|
+
// P1 FIX: Parse response based on model family
|
|
317
|
+
let text;
|
|
318
|
+
let inputTokens;
|
|
319
|
+
let outputTokens;
|
|
320
|
+
if (isAnthropicModel) {
|
|
321
|
+
text = respBody.content?.[0]?.text || "";
|
|
322
|
+
inputTokens = respBody.usage?.input_tokens || 0;
|
|
323
|
+
outputTokens = respBody.usage?.output_tokens || 0;
|
|
324
|
+
}
|
|
325
|
+
else if (isTitanModel) {
|
|
326
|
+
text = respBody.results?.[0]?.outputText || "";
|
|
327
|
+
inputTokens = respBody.inputTextTokenCount || 0;
|
|
328
|
+
outputTokens = respBody.results?.[0]?.tokenCount || 0;
|
|
329
|
+
}
|
|
330
|
+
else if (isLlama || isMistral) {
|
|
331
|
+
text = respBody.generation || "";
|
|
332
|
+
inputTokens = respBody.prompt_token_count || 0;
|
|
333
|
+
outputTokens = respBody.generation_token_count || 0;
|
|
334
|
+
}
|
|
335
|
+
else {
|
|
336
|
+
text = respBody.content?.[0]?.text || respBody.generation || respBody.results?.[0]?.outputText || "";
|
|
337
|
+
inputTokens = respBody.usage?.input_tokens || 0;
|
|
338
|
+
outputTokens = respBody.usage?.output_tokens || 0;
|
|
339
|
+
}
|
|
340
|
+
return {
|
|
341
|
+
provider: "bedrock",
|
|
342
|
+
model: selectedModel,
|
|
343
|
+
text,
|
|
344
|
+
tokens: { input: inputTokens, output: outputTokens },
|
|
345
|
+
};
|
|
346
|
+
}
|
|
347
|
+
catch (err) {
|
|
348
|
+
clearTimeout(timer);
|
|
349
|
+
throw err;
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
async function callOllama(prompt, model, systemPrompt) {
|
|
353
|
+
const selectedModel = model || "llama3.2";
|
|
354
|
+
const controller = new AbortController();
|
|
355
|
+
const timer = setTimeout(() => controller.abort(), PROVIDER_TIMEOUT_MS);
|
|
356
|
+
try {
|
|
357
|
+
const resp = await fetch("http://localhost:11434/api/generate", {
|
|
358
|
+
method: "POST",
|
|
359
|
+
headers: { "Content-Type": "application/json" },
|
|
360
|
+
body: JSON.stringify({
|
|
361
|
+
model: selectedModel,
|
|
362
|
+
prompt,
|
|
363
|
+
...(systemPrompt ? { system: systemPrompt } : {}),
|
|
364
|
+
stream: false,
|
|
365
|
+
}),
|
|
366
|
+
signal: controller.signal,
|
|
367
|
+
});
|
|
368
|
+
clearTimeout(timer);
|
|
369
|
+
if (!resp.ok) {
|
|
370
|
+
const errText = await resp.text();
|
|
371
|
+
throw new Error(`Ollama HTTP ${resp.status}: ${errText.substring(0, 200)}`);
|
|
372
|
+
}
|
|
373
|
+
const data = await resp.json();
|
|
374
|
+
return {
|
|
375
|
+
provider: "ollama",
|
|
376
|
+
model: selectedModel,
|
|
377
|
+
text: data.response || "",
|
|
378
|
+
tokens: {
|
|
379
|
+
input: data.prompt_eval_count || 0,
|
|
380
|
+
output: data.eval_count || 0,
|
|
381
|
+
},
|
|
382
|
+
};
|
|
383
|
+
}
|
|
384
|
+
catch (err) {
|
|
385
|
+
clearTimeout(timer);
|
|
386
|
+
throw err;
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
// ============================================================================
|
|
390
|
+
// OUTPUT SANITIZATION
|
|
391
|
+
// ============================================================================
|
|
392
|
+
/**
|
|
393
|
+
* Strip markdown code fences that wrap the entire LLM response.
|
|
394
|
+
* Models frequently ignore "no code fences" instructions and wrap HTML/JSON/etc.
|
|
395
|
+
* in ```lang ... ``` — this breaks downstream consumers (e.g. email HTML body).
|
|
396
|
+
* Only strips when fences wrap the ENTIRE response (not inline code blocks).
|
|
397
|
+
*/
|
|
398
|
+
function stripCodeFences(text) {
|
|
399
|
+
const trimmed = text.trim();
|
|
400
|
+
// Match opening ```<optional-lang>\n and closing \n```
|
|
401
|
+
const match = trimmed.match(/^```[\w]*\s*\n([\s\S]*?)\n\s*```\s*$/);
|
|
402
|
+
return match ? match[1] : trimmed;
|
|
403
|
+
}
|
|
404
|
+
// ============================================================================
|
|
405
|
+
// FAILOVER LOGIC
|
|
406
|
+
// ============================================================================
|
|
407
|
+
const FAILOVER_ORDER = ["anthropic", "openai", "google", "bedrock", "ollama"];
|
|
408
|
+
// Model ownership: which provider does a given model belong to?
|
|
409
|
+
const MODEL_PROVIDER_MAP = {};
|
|
410
|
+
for (const m of ANTHROPIC_MODELS)
|
|
411
|
+
MODEL_PROVIDER_MAP[m.model_id] = "anthropic";
|
|
412
|
+
for (const m of OPENAI_MODELS)
|
|
413
|
+
MODEL_PROVIDER_MAP[m.model_id] = "openai";
|
|
414
|
+
for (const m of GOOGLE_MODELS)
|
|
415
|
+
MODEL_PROVIDER_MAP[m.model_id] = "google";
|
|
416
|
+
for (const m of BEDROCK_MODELS)
|
|
417
|
+
MODEL_PROVIDER_MAP[m.model_id] = "bedrock";
|
|
418
|
+
for (const m of OLLAMA_MODELS)
|
|
419
|
+
MODEL_PROVIDER_MAP[m.model_id] = "ollama";
|
|
420
|
+
/** Returns true if the error is transient and worth retrying on the same provider */
|
|
421
|
+
function isRetryableError(err) {
|
|
422
|
+
if (err instanceof Error) {
|
|
423
|
+
const msg = err.message || "";
|
|
424
|
+
// HTTP status codes that are transient
|
|
425
|
+
if (/\b(429|503|529)\b/.test(msg))
|
|
426
|
+
return true;
|
|
427
|
+
// Network errors
|
|
428
|
+
if (/ECONNRESET|ETIMEDOUT|ENOTFOUND|socket hang up/i.test(msg))
|
|
429
|
+
return true;
|
|
430
|
+
// Abort / timeout from our AbortController
|
|
431
|
+
if (msg.includes("aborted") || msg.includes("timeout"))
|
|
432
|
+
return true;
|
|
433
|
+
// Check for status property on the error (e.g. Anthropic SDK errors)
|
|
434
|
+
const statusErr = err;
|
|
435
|
+
if (statusErr.status && [429, 503, 529].includes(statusErr.status))
|
|
436
|
+
return true;
|
|
437
|
+
}
|
|
438
|
+
return false;
|
|
439
|
+
}
|
|
440
|
+
function sleep(ms) {
|
|
441
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
442
|
+
}
|
|
443
|
+
/** Resolve the model to use for a given provider, clearing it if it doesn't belong to that provider */
|
|
444
|
+
function resolveModelForProvider(model, targetProvider) {
|
|
445
|
+
if (!model)
|
|
446
|
+
return undefined;
|
|
447
|
+
const ownerProvider = MODEL_PROVIDER_MAP[model];
|
|
448
|
+
// If the model belongs to the target provider, use it as-is
|
|
449
|
+
if (ownerProvider === targetProvider)
|
|
450
|
+
return model;
|
|
451
|
+
// Otherwise, let the provider use its default
|
|
452
|
+
return undefined;
|
|
453
|
+
}
|
|
454
|
+
async function completeWithFailover(creds, prompt, model, provider, maxTokens, temperature, systemPrompt) {
|
|
455
|
+
// Defense-in-depth: cap max_tokens even if caller didn't
|
|
456
|
+
const MAX_TOKENS_CEILING = 16384;
|
|
457
|
+
const cappedMaxTokens = Math.min(maxTokens, MAX_TOKENS_CEILING);
|
|
458
|
+
const providers = provider ? [provider] : FAILOVER_ORDER;
|
|
459
|
+
const errors = [];
|
|
460
|
+
for (const p of providers) {
|
|
461
|
+
// Resolve model: use original if it belongs to this provider, otherwise let provider default
|
|
462
|
+
const effectiveModel = resolveModelForProvider(model, p);
|
|
463
|
+
// Attempt up to 2 tries (initial + 1 retry for transient errors)
|
|
464
|
+
for (let attempt = 0; attempt < 2; attempt++) {
|
|
465
|
+
try {
|
|
466
|
+
switch (p) {
|
|
467
|
+
case "anthropic": {
|
|
468
|
+
if (!creds.anthropic) {
|
|
469
|
+
errors.push("anthropic: no API key configured");
|
|
470
|
+
break;
|
|
471
|
+
}
|
|
472
|
+
const result = await callAnthropic(creds.anthropic.apiKey, prompt, effectiveModel, cappedMaxTokens, temperature, systemPrompt);
|
|
473
|
+
result.text = stripCodeFences(result.text);
|
|
474
|
+
return { success: true, data: result };
|
|
475
|
+
}
|
|
476
|
+
case "openai": {
|
|
477
|
+
if (!creds.openai) {
|
|
478
|
+
errors.push("openai: no API key configured");
|
|
479
|
+
break;
|
|
480
|
+
}
|
|
481
|
+
const result = await callOpenAI(creds.openai.apiKey, prompt, effectiveModel, cappedMaxTokens, temperature, systemPrompt);
|
|
482
|
+
result.text = stripCodeFences(result.text);
|
|
483
|
+
return { success: true, data: result };
|
|
484
|
+
}
|
|
485
|
+
case "google": {
|
|
486
|
+
if (!creds.google) {
|
|
487
|
+
errors.push("google: no API key configured");
|
|
488
|
+
break;
|
|
489
|
+
}
|
|
490
|
+
const result = await callGoogle(creds.google.apiKey, prompt, effectiveModel, cappedMaxTokens, temperature, systemPrompt);
|
|
491
|
+
result.text = stripCodeFences(result.text);
|
|
492
|
+
return { success: true, data: result };
|
|
493
|
+
}
|
|
494
|
+
case "bedrock": {
|
|
495
|
+
if (!creds.bedrock) {
|
|
496
|
+
errors.push("bedrock: no AWS credentials configured");
|
|
497
|
+
break;
|
|
498
|
+
}
|
|
499
|
+
const result = await callBedrock(creds.bedrock.accessKeyId, creds.bedrock.secretAccessKey, creds.bedrock.region, prompt, effectiveModel, cappedMaxTokens, temperature, systemPrompt);
|
|
500
|
+
result.text = stripCodeFences(result.text);
|
|
501
|
+
return { success: true, data: result };
|
|
502
|
+
}
|
|
503
|
+
case "ollama": {
|
|
504
|
+
if (!creds.ollama?.available) {
|
|
505
|
+
errors.push("ollama: local server not available");
|
|
506
|
+
break;
|
|
507
|
+
}
|
|
508
|
+
const result = await callOllama(prompt, effectiveModel, systemPrompt);
|
|
509
|
+
result.text = stripCodeFences(result.text);
|
|
510
|
+
return { success: true, data: result };
|
|
511
|
+
}
|
|
512
|
+
default:
|
|
513
|
+
errors.push(`${p}: unknown provider`);
|
|
514
|
+
}
|
|
515
|
+
// If we hit a break (no credentials), skip retry and move to next provider
|
|
516
|
+
break;
|
|
517
|
+
}
|
|
518
|
+
catch (err) {
|
|
519
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
520
|
+
if (attempt === 0 && isRetryableError(err)) {
|
|
521
|
+
// Retry the same provider once after a short delay
|
|
522
|
+
console.warn(`[llm-failover] ${p} transient error (attempt ${attempt + 1}), retrying in 1.5s:`, msg);
|
|
523
|
+
await sleep(1500);
|
|
524
|
+
continue;
|
|
525
|
+
}
|
|
526
|
+
errors.push(`${p}: ${msg}`);
|
|
527
|
+
console.error(`[llm-failover] ${p} failed (attempt ${attempt + 1}):`, msg);
|
|
528
|
+
// Break out of retry loop, move to next provider
|
|
529
|
+
break;
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
return {
|
|
534
|
+
success: false,
|
|
535
|
+
error: `All providers failed. ${errors.join(" | ")}`,
|
|
536
|
+
};
|
|
537
|
+
}
|
|
538
|
+
// ============================================================================
|
|
539
|
+
// HANDLER
|
|
540
|
+
// ============================================================================
|
|
541
|
+
export async function handleLLM(sb, args, storeId) {
|
|
542
|
+
const action = args.action;
|
|
543
|
+
if (!storeId) {
|
|
544
|
+
return { success: false, error: "store_id is required for LLM provider operations" };
|
|
545
|
+
}
|
|
546
|
+
switch (action) {
|
|
547
|
+
// ================================================================
|
|
548
|
+
// COMPLETE — send a completion request with failover
|
|
549
|
+
// ================================================================
|
|
550
|
+
case "complete": {
|
|
551
|
+
const prompt = args.prompt;
|
|
552
|
+
if (!prompt) {
|
|
553
|
+
return { success: false, error: "prompt is required for complete action" };
|
|
554
|
+
}
|
|
555
|
+
// P1 FIX: Cap prompt size to prevent unbounded input token costs
|
|
556
|
+
const MAX_PROMPT_CHARS = 500_000; // ~125K tokens at 4 chars/token
|
|
557
|
+
if (prompt.length > MAX_PROMPT_CHARS) {
|
|
558
|
+
return { success: false, error: `Prompt too large: ${(prompt.length / 1000).toFixed(0)}K chars (max ${MAX_PROMPT_CHARS / 1000}K)` };
|
|
559
|
+
}
|
|
560
|
+
const model = args.model;
|
|
561
|
+
const provider = args.provider;
|
|
562
|
+
// P1 FIX: Cap max_tokens at 16384 to prevent runaway cost
|
|
563
|
+
const maxTokens = Math.min(args.max_tokens || 1024, 16384);
|
|
564
|
+
const temperature = args.temperature ?? 0.7;
|
|
565
|
+
const systemPrompt = args.system;
|
|
566
|
+
if (provider && !FAILOVER_ORDER.includes(provider)) {
|
|
567
|
+
return { success: false, error: `Invalid provider: ${provider}. Must be one of: ${FAILOVER_ORDER.join(", ")}` };
|
|
568
|
+
}
|
|
569
|
+
const creds = await getCredentials(sb, storeId);
|
|
570
|
+
return completeWithFailover(creds, prompt, model, provider, maxTokens, temperature, systemPrompt);
|
|
571
|
+
}
|
|
572
|
+
// ================================================================
|
|
573
|
+
// LIST_MODELS — list available models across configured providers
|
|
574
|
+
// ================================================================
|
|
575
|
+
case "list_models": {
|
|
576
|
+
const creds = await getCredentials(sb, storeId);
|
|
577
|
+
const models = [];
|
|
578
|
+
if (creds.anthropic) {
|
|
579
|
+
models.push(...ANTHROPIC_MODELS.map((m) => ({ provider: "anthropic", ...m })));
|
|
580
|
+
}
|
|
581
|
+
if (creds.openai) {
|
|
582
|
+
models.push(...OPENAI_MODELS.map((m) => ({ provider: "openai", ...m })));
|
|
583
|
+
}
|
|
584
|
+
if (creds.google) {
|
|
585
|
+
models.push(...GOOGLE_MODELS.map((m) => ({ provider: "google", ...m })));
|
|
586
|
+
}
|
|
587
|
+
if (creds.bedrock) {
|
|
588
|
+
models.push(...BEDROCK_MODELS.map((m) => ({ provider: "bedrock", ...m })));
|
|
589
|
+
}
|
|
590
|
+
if (creds.ollama?.available) {
|
|
591
|
+
// Try to get actual installed models from Ollama
|
|
592
|
+
try {
|
|
593
|
+
const controller = new AbortController();
|
|
594
|
+
const timer = setTimeout(() => controller.abort(), 3000);
|
|
595
|
+
const resp = await fetch("http://localhost:11434/api/tags", { signal: controller.signal });
|
|
596
|
+
clearTimeout(timer);
|
|
597
|
+
if (resp.ok) {
|
|
598
|
+
const data = await resp.json();
|
|
599
|
+
if (data.models?.length) {
|
|
600
|
+
models.push(...data.models.map((m) => ({
|
|
601
|
+
provider: "ollama",
|
|
602
|
+
model_id: m.name,
|
|
603
|
+
name: `${m.name} (local)`,
|
|
604
|
+
context_window: 0, // Ollama doesn't report this via API
|
|
605
|
+
})));
|
|
606
|
+
}
|
|
607
|
+
else {
|
|
608
|
+
models.push(...OLLAMA_MODELS.map((m) => ({ provider: "ollama", ...m })));
|
|
609
|
+
}
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
catch {
|
|
613
|
+
models.push(...OLLAMA_MODELS.map((m) => ({ provider: "ollama", ...m })));
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
return { success: true, data: { models, total: models.length } };
|
|
617
|
+
}
|
|
618
|
+
// ================================================================
|
|
619
|
+
// FAILOVER_STATUS — show provider configuration and health
|
|
620
|
+
// ================================================================
|
|
621
|
+
case "failover_status": {
|
|
622
|
+
const creds = await getCredentials(sb, storeId);
|
|
623
|
+
const providers = [
|
|
624
|
+
{
|
|
625
|
+
provider: "anthropic",
|
|
626
|
+
configured: !!creds.anthropic,
|
|
627
|
+
source: creds.anthropic ? "process.env.ANTHROPIC_API_KEY" : null,
|
|
628
|
+
priority: 1,
|
|
629
|
+
},
|
|
630
|
+
{
|
|
631
|
+
provider: "openai",
|
|
632
|
+
configured: !!creds.openai,
|
|
633
|
+
source: creds.openai ? "user_tool_secrets (encrypted)" : null,
|
|
634
|
+
priority: 2,
|
|
635
|
+
},
|
|
636
|
+
{
|
|
637
|
+
provider: "google",
|
|
638
|
+
configured: !!creds.google,
|
|
639
|
+
source: creds.google ? "user_tool_secrets (encrypted)" : null,
|
|
640
|
+
priority: 3,
|
|
641
|
+
},
|
|
642
|
+
{
|
|
643
|
+
provider: "bedrock",
|
|
644
|
+
configured: !!creds.bedrock,
|
|
645
|
+
source: creds.bedrock ? "user_tool_secrets (encrypted)" : null,
|
|
646
|
+
region: creds.bedrock?.region || null,
|
|
647
|
+
priority: 4,
|
|
648
|
+
},
|
|
649
|
+
{
|
|
650
|
+
provider: "ollama",
|
|
651
|
+
configured: !!creds.ollama?.available,
|
|
652
|
+
source: creds.ollama?.available ? "localhost:11434" : null,
|
|
653
|
+
priority: 5,
|
|
654
|
+
},
|
|
655
|
+
];
|
|
656
|
+
const configuredCount = providers.filter((p) => p.configured).length;
|
|
657
|
+
return {
|
|
658
|
+
success: true,
|
|
659
|
+
data: {
|
|
660
|
+
providers,
|
|
661
|
+
failover_order: FAILOVER_ORDER,
|
|
662
|
+
configured_count: configuredCount,
|
|
663
|
+
total_providers: providers.length,
|
|
664
|
+
note: configuredCount === 0
|
|
665
|
+
? "No LLM providers configured. Add API keys to user_tool_secrets."
|
|
666
|
+
: `${configuredCount} provider(s) ready. Failover will try in order: ${FAILOVER_ORDER.filter((p) => providers.find((pr) => pr.provider === p)?.configured).join(" → ")}`,
|
|
667
|
+
},
|
|
668
|
+
};
|
|
669
|
+
}
|
|
670
|
+
default:
|
|
671
|
+
return { success: false, error: `Unknown LLM action: ${action}. Valid actions: complete, list_models, failover_status` };
|
|
672
|
+
}
|
|
673
|
+
}
|