whale-code 6.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +95 -0
- package/bin/swag-agent.js +9 -0
- package/bin/swagmanager-mcp.js +321 -0
- package/dist/cli/app.d.ts +26 -0
- package/dist/cli/app.js +64 -0
- package/dist/cli/chat/AgentSelector.d.ts +14 -0
- package/dist/cli/chat/AgentSelector.js +14 -0
- package/dist/cli/chat/ChatApp.d.ts +9 -0
- package/dist/cli/chat/ChatApp.js +267 -0
- package/dist/cli/chat/ChatInput.d.ts +39 -0
- package/dist/cli/chat/ChatInput.js +509 -0
- package/dist/cli/chat/MarkdownText.d.ts +10 -0
- package/dist/cli/chat/MarkdownText.js +20 -0
- package/dist/cli/chat/MessageList.d.ts +37 -0
- package/dist/cli/chat/MessageList.js +80 -0
- package/dist/cli/chat/ModelSelector.d.ts +20 -0
- package/dist/cli/chat/ModelSelector.js +73 -0
- package/dist/cli/chat/RewindViewer.d.ts +26 -0
- package/dist/cli/chat/RewindViewer.js +185 -0
- package/dist/cli/chat/StoreSelector.d.ts +14 -0
- package/dist/cli/chat/StoreSelector.js +24 -0
- package/dist/cli/chat/StreamingText.d.ts +12 -0
- package/dist/cli/chat/StreamingText.js +12 -0
- package/dist/cli/chat/SubagentPanel.d.ts +45 -0
- package/dist/cli/chat/SubagentPanel.js +110 -0
- package/dist/cli/chat/TeamPanel.d.ts +21 -0
- package/dist/cli/chat/TeamPanel.js +42 -0
- package/dist/cli/chat/ToolIndicator.d.ts +25 -0
- package/dist/cli/chat/ToolIndicator.js +436 -0
- package/dist/cli/chat/hooks/useAgentLoop.d.ts +39 -0
- package/dist/cli/chat/hooks/useAgentLoop.js +382 -0
- package/dist/cli/chat/hooks/useSlashCommands.d.ts +37 -0
- package/dist/cli/chat/hooks/useSlashCommands.js +387 -0
- package/dist/cli/commands/config-cmd.d.ts +10 -0
- package/dist/cli/commands/config-cmd.js +99 -0
- package/dist/cli/commands/doctor.d.ts +14 -0
- package/dist/cli/commands/doctor.js +172 -0
- package/dist/cli/commands/init.d.ts +16 -0
- package/dist/cli/commands/init.js +278 -0
- package/dist/cli/commands/mcp.d.ts +12 -0
- package/dist/cli/commands/mcp.js +162 -0
- package/dist/cli/login/LoginApp.d.ts +7 -0
- package/dist/cli/login/LoginApp.js +157 -0
- package/dist/cli/print-mode.d.ts +31 -0
- package/dist/cli/print-mode.js +202 -0
- package/dist/cli/serve-mode.d.ts +37 -0
- package/dist/cli/serve-mode.js +636 -0
- package/dist/cli/services/agent-definitions.d.ts +25 -0
- package/dist/cli/services/agent-definitions.js +91 -0
- package/dist/cli/services/agent-events.d.ts +178 -0
- package/dist/cli/services/agent-events.js +175 -0
- package/dist/cli/services/agent-loop.d.ts +90 -0
- package/dist/cli/services/agent-loop.js +762 -0
- package/dist/cli/services/agent-worker-base.d.ts +97 -0
- package/dist/cli/services/agent-worker-base.js +220 -0
- package/dist/cli/services/auth-service.d.ts +30 -0
- package/dist/cli/services/auth-service.js +160 -0
- package/dist/cli/services/background-processes.d.ts +126 -0
- package/dist/cli/services/background-processes.js +318 -0
- package/dist/cli/services/browser-auth.d.ts +24 -0
- package/dist/cli/services/browser-auth.js +180 -0
- package/dist/cli/services/claude-md-loader.d.ts +16 -0
- package/dist/cli/services/claude-md-loader.js +58 -0
- package/dist/cli/services/config-store.d.ts +47 -0
- package/dist/cli/services/config-store.js +79 -0
- package/dist/cli/services/debug-log.d.ts +10 -0
- package/dist/cli/services/debug-log.js +52 -0
- package/dist/cli/services/error-logger.d.ts +58 -0
- package/dist/cli/services/error-logger.js +269 -0
- package/dist/cli/services/file-history.d.ts +21 -0
- package/dist/cli/services/file-history.js +83 -0
- package/dist/cli/services/format-server-response.d.ts +16 -0
- package/dist/cli/services/format-server-response.js +440 -0
- package/dist/cli/services/git-context.d.ts +11 -0
- package/dist/cli/services/git-context.js +66 -0
- package/dist/cli/services/hooks.d.ts +85 -0
- package/dist/cli/services/hooks.js +258 -0
- package/dist/cli/services/interactive-tools.d.ts +125 -0
- package/dist/cli/services/interactive-tools.js +260 -0
- package/dist/cli/services/keybinding-manager.d.ts +52 -0
- package/dist/cli/services/keybinding-manager.js +115 -0
- package/dist/cli/services/local-tools.d.ts +22 -0
- package/dist/cli/services/local-tools.js +697 -0
- package/dist/cli/services/lsp-manager.d.ts +18 -0
- package/dist/cli/services/lsp-manager.js +717 -0
- package/dist/cli/services/mcp-client.d.ts +48 -0
- package/dist/cli/services/mcp-client.js +157 -0
- package/dist/cli/services/memory-manager.d.ts +16 -0
- package/dist/cli/services/memory-manager.js +57 -0
- package/dist/cli/services/model-manager.d.ts +18 -0
- package/dist/cli/services/model-manager.js +71 -0
- package/dist/cli/services/model-router.d.ts +26 -0
- package/dist/cli/services/model-router.js +149 -0
- package/dist/cli/services/permission-modes.d.ts +13 -0
- package/dist/cli/services/permission-modes.js +43 -0
- package/dist/cli/services/rewind.d.ts +84 -0
- package/dist/cli/services/rewind.js +194 -0
- package/dist/cli/services/ripgrep.d.ts +28 -0
- package/dist/cli/services/ripgrep.js +138 -0
- package/dist/cli/services/sandbox.d.ts +29 -0
- package/dist/cli/services/sandbox.js +97 -0
- package/dist/cli/services/server-tools.d.ts +61 -0
- package/dist/cli/services/server-tools.js +543 -0
- package/dist/cli/services/session-persistence.d.ts +23 -0
- package/dist/cli/services/session-persistence.js +99 -0
- package/dist/cli/services/subagent-worker.d.ts +19 -0
- package/dist/cli/services/subagent-worker.js +41 -0
- package/dist/cli/services/subagent.d.ts +47 -0
- package/dist/cli/services/subagent.js +647 -0
- package/dist/cli/services/system-prompt.d.ts +7 -0
- package/dist/cli/services/system-prompt.js +198 -0
- package/dist/cli/services/team-lead.d.ts +73 -0
- package/dist/cli/services/team-lead.js +512 -0
- package/dist/cli/services/team-state.d.ts +77 -0
- package/dist/cli/services/team-state.js +398 -0
- package/dist/cli/services/teammate.d.ts +31 -0
- package/dist/cli/services/teammate.js +689 -0
- package/dist/cli/services/telemetry.d.ts +61 -0
- package/dist/cli/services/telemetry.js +209 -0
- package/dist/cli/services/tools/agent-tools.d.ts +14 -0
- package/dist/cli/services/tools/agent-tools.js +347 -0
- package/dist/cli/services/tools/file-ops.d.ts +15 -0
- package/dist/cli/services/tools/file-ops.js +487 -0
- package/dist/cli/services/tools/search-tools.d.ts +8 -0
- package/dist/cli/services/tools/search-tools.js +186 -0
- package/dist/cli/services/tools/shell-exec.d.ts +10 -0
- package/dist/cli/services/tools/shell-exec.js +168 -0
- package/dist/cli/services/tools/task-manager.d.ts +28 -0
- package/dist/cli/services/tools/task-manager.js +209 -0
- package/dist/cli/services/tools/web-tools.d.ts +11 -0
- package/dist/cli/services/tools/web-tools.js +395 -0
- package/dist/cli/setup/SetupApp.d.ts +9 -0
- package/dist/cli/setup/SetupApp.js +191 -0
- package/dist/cli/shared/MatrixIntro.d.ts +4 -0
- package/dist/cli/shared/MatrixIntro.js +83 -0
- package/dist/cli/shared/Theme.d.ts +74 -0
- package/dist/cli/shared/Theme.js +127 -0
- package/dist/cli/shared/WhaleBanner.d.ts +10 -0
- package/dist/cli/shared/WhaleBanner.js +12 -0
- package/dist/cli/shared/markdown.d.ts +21 -0
- package/dist/cli/shared/markdown.js +756 -0
- package/dist/cli/status/StatusApp.d.ts +4 -0
- package/dist/cli/status/StatusApp.js +105 -0
- package/dist/cli/stores/StoreApp.d.ts +7 -0
- package/dist/cli/stores/StoreApp.js +81 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.js +538 -0
- package/dist/local-agent/connection.d.ts +48 -0
- package/dist/local-agent/connection.js +332 -0
- package/dist/local-agent/discovery.d.ts +18 -0
- package/dist/local-agent/discovery.js +146 -0
- package/dist/local-agent/executor.d.ts +34 -0
- package/dist/local-agent/executor.js +241 -0
- package/dist/local-agent/index.d.ts +14 -0
- package/dist/local-agent/index.js +198 -0
- package/dist/node/adapters/base.d.ts +35 -0
- package/dist/node/adapters/base.js +10 -0
- package/dist/node/adapters/discord.d.ts +29 -0
- package/dist/node/adapters/discord.js +299 -0
- package/dist/node/adapters/email.d.ts +23 -0
- package/dist/node/adapters/email.js +218 -0
- package/dist/node/adapters/imessage.d.ts +17 -0
- package/dist/node/adapters/imessage.js +118 -0
- package/dist/node/adapters/slack.d.ts +26 -0
- package/dist/node/adapters/slack.js +259 -0
- package/dist/node/adapters/sms.d.ts +23 -0
- package/dist/node/adapters/sms.js +161 -0
- package/dist/node/adapters/telegram.d.ts +17 -0
- package/dist/node/adapters/telegram.js +101 -0
- package/dist/node/adapters/webchat.d.ts +27 -0
- package/dist/node/adapters/webchat.js +160 -0
- package/dist/node/adapters/whatsapp.d.ts +28 -0
- package/dist/node/adapters/whatsapp.js +230 -0
- package/dist/node/cli.d.ts +2 -0
- package/dist/node/cli.js +325 -0
- package/dist/node/config.d.ts +17 -0
- package/dist/node/config.js +31 -0
- package/dist/node/runtime.d.ts +50 -0
- package/dist/node/runtime.js +351 -0
- package/dist/server/handlers/__test-utils__/mock-supabase.d.ts +11 -0
- package/dist/server/handlers/__test-utils__/mock-supabase.js +393 -0
- package/dist/server/handlers/analytics.d.ts +17 -0
- package/dist/server/handlers/analytics.js +266 -0
- package/dist/server/handlers/api-keys.d.ts +6 -0
- package/dist/server/handlers/api-keys.js +221 -0
- package/dist/server/handlers/billing.d.ts +33 -0
- package/dist/server/handlers/billing.js +272 -0
- package/dist/server/handlers/browser.d.ts +10 -0
- package/dist/server/handlers/browser.js +517 -0
- package/dist/server/handlers/catalog.d.ts +99 -0
- package/dist/server/handlers/catalog.js +976 -0
- package/dist/server/handlers/comms.d.ts +254 -0
- package/dist/server/handlers/comms.js +588 -0
- package/dist/server/handlers/creations.d.ts +6 -0
- package/dist/server/handlers/creations.js +479 -0
- package/dist/server/handlers/crm.d.ts +89 -0
- package/dist/server/handlers/crm.js +538 -0
- package/dist/server/handlers/discovery.d.ts +6 -0
- package/dist/server/handlers/discovery.js +288 -0
- package/dist/server/handlers/embeddings.d.ts +92 -0
- package/dist/server/handlers/embeddings.js +197 -0
- package/dist/server/handlers/enrichment.d.ts +8 -0
- package/dist/server/handlers/enrichment.js +768 -0
- package/dist/server/handlers/image-gen.d.ts +6 -0
- package/dist/server/handlers/image-gen.js +409 -0
- package/dist/server/handlers/inventory.d.ts +319 -0
- package/dist/server/handlers/inventory.js +447 -0
- package/dist/server/handlers/kali.d.ts +10 -0
- package/dist/server/handlers/kali.js +210 -0
- package/dist/server/handlers/llm-providers.d.ts +6 -0
- package/dist/server/handlers/llm-providers.js +673 -0
- package/dist/server/handlers/local-agent.d.ts +6 -0
- package/dist/server/handlers/local-agent.js +118 -0
- package/dist/server/handlers/meta-ads.d.ts +111 -0
- package/dist/server/handlers/meta-ads.js +2279 -0
- package/dist/server/handlers/nodes.d.ts +33 -0
- package/dist/server/handlers/nodes.js +699 -0
- package/dist/server/handlers/operations.d.ts +138 -0
- package/dist/server/handlers/operations.js +131 -0
- package/dist/server/handlers/platform.d.ts +23 -0
- package/dist/server/handlers/platform.js +227 -0
- package/dist/server/handlers/supply-chain.d.ts +19 -0
- package/dist/server/handlers/supply-chain.js +327 -0
- package/dist/server/handlers/transcription.d.ts +17 -0
- package/dist/server/handlers/transcription.js +121 -0
- package/dist/server/handlers/video-gen.d.ts +6 -0
- package/dist/server/handlers/video-gen.js +466 -0
- package/dist/server/handlers/voice.d.ts +8 -0
- package/dist/server/handlers/voice.js +1146 -0
- package/dist/server/handlers/workflow-steps.d.ts +86 -0
- package/dist/server/handlers/workflow-steps.js +2349 -0
- package/dist/server/handlers/workflows.d.ts +7 -0
- package/dist/server/handlers/workflows.js +989 -0
- package/dist/server/index.d.ts +1 -0
- package/dist/server/index.js +2427 -0
- package/dist/server/lib/batch-client.d.ts +80 -0
- package/dist/server/lib/batch-client.js +467 -0
- package/dist/server/lib/code-worker-pool.d.ts +31 -0
- package/dist/server/lib/code-worker-pool.js +224 -0
- package/dist/server/lib/code-worker.d.ts +1 -0
- package/dist/server/lib/code-worker.js +188 -0
- package/dist/server/lib/compaction-service.d.ts +32 -0
- package/dist/server/lib/compaction-service.js +162 -0
- package/dist/server/lib/logger.d.ts +19 -0
- package/dist/server/lib/logger.js +46 -0
- package/dist/server/lib/otel.d.ts +38 -0
- package/dist/server/lib/otel.js +126 -0
- package/dist/server/lib/pg-rate-limiter.d.ts +21 -0
- package/dist/server/lib/pg-rate-limiter.js +86 -0
- package/dist/server/lib/prompt-sanitizer.d.ts +37 -0
- package/dist/server/lib/prompt-sanitizer.js +177 -0
- package/dist/server/lib/provider-capabilities.d.ts +85 -0
- package/dist/server/lib/provider-capabilities.js +190 -0
- package/dist/server/lib/provider-failover.d.ts +74 -0
- package/dist/server/lib/provider-failover.js +210 -0
- package/dist/server/lib/rate-limiter.d.ts +39 -0
- package/dist/server/lib/rate-limiter.js +147 -0
- package/dist/server/lib/server-agent-loop.d.ts +107 -0
- package/dist/server/lib/server-agent-loop.js +667 -0
- package/dist/server/lib/server-subagent.d.ts +78 -0
- package/dist/server/lib/server-subagent.js +203 -0
- package/dist/server/lib/session-checkpoint.d.ts +51 -0
- package/dist/server/lib/session-checkpoint.js +145 -0
- package/dist/server/lib/ssrf-guard.d.ts +13 -0
- package/dist/server/lib/ssrf-guard.js +240 -0
- package/dist/server/lib/supabase-client.d.ts +7 -0
- package/dist/server/lib/supabase-client.js +78 -0
- package/dist/server/lib/template-resolver.d.ts +31 -0
- package/dist/server/lib/template-resolver.js +215 -0
- package/dist/server/lib/utils.d.ts +16 -0
- package/dist/server/lib/utils.js +147 -0
- package/dist/server/local-agent-gateway.d.ts +82 -0
- package/dist/server/local-agent-gateway.js +426 -0
- package/dist/server/providers/anthropic.d.ts +20 -0
- package/dist/server/providers/anthropic.js +199 -0
- package/dist/server/providers/bedrock.d.ts +20 -0
- package/dist/server/providers/bedrock.js +194 -0
- package/dist/server/providers/gemini.d.ts +24 -0
- package/dist/server/providers/gemini.js +486 -0
- package/dist/server/providers/openai.d.ts +24 -0
- package/dist/server/providers/openai.js +522 -0
- package/dist/server/providers/registry.d.ts +32 -0
- package/dist/server/providers/registry.js +58 -0
- package/dist/server/providers/shared.d.ts +32 -0
- package/dist/server/providers/shared.js +124 -0
- package/dist/server/providers/types.d.ts +92 -0
- package/dist/server/providers/types.js +12 -0
- package/dist/server/proxy-handlers.d.ts +6 -0
- package/dist/server/proxy-handlers.js +89 -0
- package/dist/server/tool-router.d.ts +149 -0
- package/dist/server/tool-router.js +803 -0
- package/dist/server/validation.d.ts +24 -0
- package/dist/server/validation.js +301 -0
- package/dist/server/worker.d.ts +19 -0
- package/dist/server/worker.js +201 -0
- package/dist/setup.d.ts +8 -0
- package/dist/setup.js +181 -0
- package/dist/shared/agent-core.d.ts +157 -0
- package/dist/shared/agent-core.js +534 -0
- package/dist/shared/anthropic-types.d.ts +105 -0
- package/dist/shared/anthropic-types.js +7 -0
- package/dist/shared/api-client.d.ts +90 -0
- package/dist/shared/api-client.js +379 -0
- package/dist/shared/constants.d.ts +33 -0
- package/dist/shared/constants.js +80 -0
- package/dist/shared/sse-parser.d.ts +26 -0
- package/dist/shared/sse-parser.js +259 -0
- package/dist/shared/tool-dispatch.d.ts +52 -0
- package/dist/shared/tool-dispatch.js +191 -0
- package/dist/shared/types.d.ts +72 -0
- package/dist/shared/types.js +7 -0
- package/dist/updater.d.ts +25 -0
- package/dist/updater.js +140 -0
- package/dist/webchat/widget.d.ts +0 -0
- package/dist/webchat/widget.js +397 -0
- package/package.json +95 -0
- package/src/cli/services/builtin-skills/commit.md +19 -0
- package/src/cli/services/builtin-skills/review-pr.md +21 -0
- package/src/cli/services/builtin-skills/review.md +18 -0
|
@@ -0,0 +1,667 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unified Server Agent Loop — single source of truth for server-side agentic reasoning.
|
|
3
|
+
*
|
|
4
|
+
* Used by:
|
|
5
|
+
* - handleAgentChat (SSE streaming to clients)
|
|
6
|
+
* - setAgentExecutor (workflow "agent" step type)
|
|
7
|
+
*
|
|
8
|
+
* Consolidates: streaming, prompt caching, context management betas, compaction,
|
|
9
|
+
* loop detection, parallel tool execution, subagent delegation, retry, cost tracking.
|
|
10
|
+
*/
|
|
11
|
+
import { LoopDetector, getContextManagement, getMaxOutputTokens, getThinkingConfig, addPromptCaching, estimateCostUsd, isRetryableError, sanitizeError, routeModel, resolveToolChoice, COMPACTION_TRIGGER_TOKENS, COMPACTION_TOTAL_BUDGET, DEFAULT_SESSION_COST_BUDGET_USD, } from "../../shared/agent-core.js";
|
|
12
|
+
import { processStreamWithCallbacks } from "../../shared/sse-parser.js";
|
|
13
|
+
import { MODELS } from "../../shared/constants.js";
|
|
14
|
+
import { dispatchTools, buildAssistantContent } from "../../shared/tool-dispatch.js";
|
|
15
|
+
import { getCachedToolDefs, getFullToolSchemas } from "../tool-router.js";
|
|
16
|
+
import { DELEGATE_TASK_TOOL_DEF, runServerSubagent, } from "./server-subagent.js";
|
|
17
|
+
import { handleTranscribe } from "../handlers/transcription.js";
|
|
18
|
+
import { preCompact } from "./compaction-service.js";
|
|
19
|
+
import { providerFailover } from "./provider-failover.js";
|
|
20
|
+
import { saveCheckpoint } from "./session-checkpoint.js";
|
|
21
|
+
import { createLogger } from "./logger.js";
|
|
22
|
+
const log = createLogger("agent-loop");
|
|
23
|
+
// ============================================================================
|
|
24
|
+
// CONSTANTS
|
|
25
|
+
// ============================================================================
|
|
26
|
+
const MAX_RETRIES = 3;
|
|
27
|
+
const RETRY_BASE_DELAY_MS = 1000;
|
|
28
|
+
const DEFAULT_MAX_CONCURRENT_TOOLS = 7;
|
|
29
|
+
// ============================================================================
|
|
30
|
+
// TOOL CHOICE MAPPING — convert ToolChoice to provider-specific format
|
|
31
|
+
// ============================================================================
|
|
32
|
+
/**
|
|
33
|
+
* Map ToolChoice to Anthropic API `tool_choice` format.
|
|
34
|
+
* Returns undefined if tools should be omitted entirely.
|
|
35
|
+
*/
|
|
36
|
+
function mapToolChoiceForAnthropic(tc) {
|
|
37
|
+
if (tc === "auto")
|
|
38
|
+
return { toolChoice: { type: "auto" } };
|
|
39
|
+
if (tc === "any")
|
|
40
|
+
return { toolChoice: { type: "any" } };
|
|
41
|
+
if (tc === "none")
|
|
42
|
+
return { omitTools: true };
|
|
43
|
+
if (typeof tc === "object" && tc.type === "tool") {
|
|
44
|
+
return { toolChoice: { type: "tool", name: tc.name } };
|
|
45
|
+
}
|
|
46
|
+
return { toolChoice: { type: "auto" } };
|
|
47
|
+
}
|
|
48
|
+
// ============================================================================
|
|
49
|
+
// UNIFIED AGENT LOOP
|
|
50
|
+
// ============================================================================
|
|
51
|
+
export async function runServerAgentLoop(opts) {
|
|
52
|
+
const { anthropic, model, systemPrompt, messages, tools: inputTools, maxTurns, temperature, enableDelegation = true, enablePromptCaching = true, enableStreaming = true, maxConcurrentTools = DEFAULT_MAX_CONCURRENT_TOOLS, maxCostUsd = DEFAULT_SESSION_COST_BUDGET_USD, onText, onToolStart, onCitation, documents, clientDisconnected = { value: false }, startedAt = Date.now(), maxDurationMs = 5 * 60 * 1000, } = opts;
|
|
53
|
+
// Auto-inject delegate_task for all models (subagents always use Claude Haiku/Sonnet)
|
|
54
|
+
// activeTools is mutable — discover_tools adds to it during the session
|
|
55
|
+
const activeTools = [...inputTools];
|
|
56
|
+
if (enableDelegation) {
|
|
57
|
+
if (!activeTools.some((t) => t.name === "delegate_task")) {
|
|
58
|
+
activeTools.push(DELEGATE_TASK_TOOL_DEF);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
// Per-conversation tracking of discovered tools — accumulates tool names
|
|
62
|
+
// activated via discover_tools so they persist across turns within this session.
|
|
63
|
+
// If extendedTools were provided (from a previous session or pre-load), we
|
|
64
|
+
// don't inject them yet (that defeats lazy loading), but we keep the full
|
|
65
|
+
// list available for the executor to reference.
|
|
66
|
+
const discoveredToolNames = new Set();
|
|
67
|
+
// Alias for backward compat within this function
|
|
68
|
+
const tools = activeTools;
|
|
69
|
+
const loopDetector = new LoopDetector();
|
|
70
|
+
// Model routing: extract first user message for complexity estimation
|
|
71
|
+
const firstUserMsg = messages.find((m) => m.role === "user");
|
|
72
|
+
const firstUserText = firstUserMsg
|
|
73
|
+
? typeof firstUserMsg.content === "string"
|
|
74
|
+
? firstUserMsg.content
|
|
75
|
+
: Array.isArray(firstUserMsg.content)
|
|
76
|
+
? firstUserMsg.content
|
|
77
|
+
.filter((b) => b.type === "text" && b.text)
|
|
78
|
+
.map((b) => b.text)
|
|
79
|
+
.join(" ")
|
|
80
|
+
: ""
|
|
81
|
+
: "";
|
|
82
|
+
// Pre-computed tool defs — reused across turns, only rebuilt when tools array changes
|
|
83
|
+
let cachedToolDefsForSession = getCachedToolDefs(tools);
|
|
84
|
+
let lastToolCount = tools.length;
|
|
85
|
+
// Accumulators
|
|
86
|
+
let turnCount = 0;
|
|
87
|
+
let toolCallCount = 0;
|
|
88
|
+
let totalIn = 0;
|
|
89
|
+
let totalOut = 0;
|
|
90
|
+
let cacheCreationTokens = 0;
|
|
91
|
+
let cacheReadTokens = 0;
|
|
92
|
+
let sessionCostUsd = 0;
|
|
93
|
+
let compactionCount = 0;
|
|
94
|
+
let finalResponse = "";
|
|
95
|
+
const allTextResponses = [];
|
|
96
|
+
const allToolNames = [];
|
|
97
|
+
const allCitations = [];
|
|
98
|
+
const turnMetrics = [];
|
|
99
|
+
while (turnCount < maxTurns) {
|
|
100
|
+
// Abort checks
|
|
101
|
+
if (clientDisconnected.value) {
|
|
102
|
+
log.info("client disconnected, stopping");
|
|
103
|
+
break;
|
|
104
|
+
}
|
|
105
|
+
if (Date.now() - startedAt > maxDurationMs) {
|
|
106
|
+
onText?.("[Request timeout exceeded]");
|
|
107
|
+
break;
|
|
108
|
+
}
|
|
109
|
+
// Cost budget enforcement — prevent runaway spending (skip if unlimited)
|
|
110
|
+
if (isFinite(maxCostUsd) && sessionCostUsd >= maxCostUsd) {
|
|
111
|
+
log.warn({ sessionCostUsd, maxCostUsd }, "cost budget exceeded");
|
|
112
|
+
onText?.(`\n[Session cost budget reached ($${sessionCostUsd.toFixed(2)}/$${maxCostUsd.toFixed(2)}). Wrapping up.]`);
|
|
113
|
+
break;
|
|
114
|
+
}
|
|
115
|
+
turnCount++;
|
|
116
|
+
loopDetector.resetTurn();
|
|
117
|
+
// Route model on the FIRST turn only — subsequent turns use the requested model
|
|
118
|
+
// since the conversation may have become complex after tool results.
|
|
119
|
+
const effectiveModel = turnCount === 1
|
|
120
|
+
? routeModel(firstUserText, model)
|
|
121
|
+
: model;
|
|
122
|
+
if (turnCount === 1 && effectiveModel !== model) {
|
|
123
|
+
log.info({ from: model, to: effectiveModel }, "model routed");
|
|
124
|
+
}
|
|
125
|
+
// Per-turn model config — uses effectiveModel so routed models get correct settings
|
|
126
|
+
const maxTokens = opts.maxTokens ?? getMaxOutputTokens(effectiveModel);
|
|
127
|
+
const ctxMgmt = getContextManagement(effectiveModel);
|
|
128
|
+
const thinkingCfg = getThinkingConfig(effectiveModel, true);
|
|
129
|
+
// Prepare tool definitions — use pre-computed cache, only rebuild when tools change
|
|
130
|
+
// (discover_tools can add tools mid-session, detected via length change)
|
|
131
|
+
if (tools.length !== lastToolCount) {
|
|
132
|
+
cachedToolDefsForSession = getCachedToolDefs(tools);
|
|
133
|
+
lastToolCount = tools.length;
|
|
134
|
+
}
|
|
135
|
+
// Pre-compaction: mechanically strip redundant data before LLM call
|
|
136
|
+
// Runs every turn but only modifies messages when there's significant bloat
|
|
137
|
+
const { messages: preCompacted, bytesRemoved } = preCompact(messages);
|
|
138
|
+
if (bytesRemoved > 10000) {
|
|
139
|
+
log.info({ bytesRemoved, bytesRemovedKB: Math.round(bytesRemoved / 1024) }, "pre-compaction applied");
|
|
140
|
+
// Replace messages in-place so the outer array reference stays valid
|
|
141
|
+
messages.length = 0;
|
|
142
|
+
messages.push(...preCompacted);
|
|
143
|
+
}
|
|
144
|
+
// Prompt caching: tools + turn boundary
|
|
145
|
+
let finalToolDefs = cachedToolDefsForSession;
|
|
146
|
+
let finalMessages = messages;
|
|
147
|
+
if (enablePromptCaching) {
|
|
148
|
+
const cached = addPromptCaching(cachedToolDefsForSession, messages);
|
|
149
|
+
finalToolDefs = cached.tools;
|
|
150
|
+
finalMessages = cached.messages;
|
|
151
|
+
}
|
|
152
|
+
// System prompt: cached block + dynamic cost context (skip cost line if unlimited budget)
|
|
153
|
+
const costContext = isFinite(maxCostUsd)
|
|
154
|
+
? `Session cost: $${sessionCostUsd.toFixed(2)} / $${maxCostUsd.toFixed(2)}`
|
|
155
|
+
: `Session cost: $${sessionCostUsd.toFixed(2)}`;
|
|
156
|
+
const system = enablePromptCaching
|
|
157
|
+
? [
|
|
158
|
+
{ type: "text", text: systemPrompt, cache_control: { type: "ephemeral" } },
|
|
159
|
+
{ type: "text", text: costContext },
|
|
160
|
+
]
|
|
161
|
+
: [
|
|
162
|
+
{ type: "text", text: systemPrompt },
|
|
163
|
+
{ type: "text", text: costContext },
|
|
164
|
+
];
|
|
165
|
+
// Resolve tool_choice for this turn
|
|
166
|
+
const recentToolUses = turnMetrics.slice(-3).flatMap(t => t.toolsUsed);
|
|
167
|
+
const resolvedToolChoice = resolveToolChoice({
|
|
168
|
+
toolChoice: opts.toolChoice,
|
|
169
|
+
turnCount,
|
|
170
|
+
recentToolUses,
|
|
171
|
+
availableToolNames: tools.map(t => t.name),
|
|
172
|
+
userMessage: firstUserText,
|
|
173
|
+
});
|
|
174
|
+
const { toolChoice: anthropicToolChoice, omitTools } = mapToolChoiceForAnthropic(resolvedToolChoice);
|
|
175
|
+
if (omitTools) {
|
|
176
|
+
log.info({ turn: turnCount, resolvedToolChoice }, "tool_choice=none — omitting tools");
|
|
177
|
+
}
|
|
178
|
+
// Provider failover check — route to healthy provider (capability-aware)
|
|
179
|
+
const failoverResult = providerFailover.getActiveProvider(effectiveModel, opts.requiredCapabilities);
|
|
180
|
+
const activeModel = failoverResult.model;
|
|
181
|
+
const activeProvider = failoverResult.provider;
|
|
182
|
+
let turnFailoverInfo;
|
|
183
|
+
if (failoverResult.failedOver) {
|
|
184
|
+
turnFailoverInfo = {
|
|
185
|
+
originalProvider: failoverResult.originalProvider,
|
|
186
|
+
activeProvider: failoverResult.provider,
|
|
187
|
+
model: failoverResult.model,
|
|
188
|
+
};
|
|
189
|
+
// Re-compute model config for the failover model
|
|
190
|
+
// (context management, thinking, max tokens may differ)
|
|
191
|
+
}
|
|
192
|
+
// API call with retry
|
|
193
|
+
if (enableStreaming) {
|
|
194
|
+
// ---- STREAMING PATH ----
|
|
195
|
+
let stream = null;
|
|
196
|
+
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
197
|
+
try {
|
|
198
|
+
// Cast through unknown: context_management.edits is Record<string, unknown>[]
|
|
199
|
+
// from getContextManagement() but SDK expects specific edit union types
|
|
200
|
+
const streamBetas = [...ctxMgmt.betas];
|
|
201
|
+
if (thinkingCfg.beta)
|
|
202
|
+
streamBetas.push(thinkingCfg.beta);
|
|
203
|
+
// Citations API: add beta when source documents are provided
|
|
204
|
+
if (documents?.length && !streamBetas.includes("citations-2025-04-15")) {
|
|
205
|
+
streamBetas.push("citations-2025-04-15");
|
|
206
|
+
}
|
|
207
|
+
stream = await anthropic.beta.messages.create({
|
|
208
|
+
model: activeModel,
|
|
209
|
+
max_tokens: maxTokens,
|
|
210
|
+
temperature: thinkingCfg.thinking.type !== "disabled" ? 1 : temperature,
|
|
211
|
+
system,
|
|
212
|
+
...(omitTools ? {} : { tools: finalToolDefs }),
|
|
213
|
+
...(anthropicToolChoice && !omitTools ? { tool_choice: anthropicToolChoice } : {}),
|
|
214
|
+
messages: finalMessages,
|
|
215
|
+
stream: true,
|
|
216
|
+
thinking: thinkingCfg.thinking,
|
|
217
|
+
betas: streamBetas,
|
|
218
|
+
context_management: ctxMgmt.config,
|
|
219
|
+
...(documents?.length ? { documents } : {}),
|
|
220
|
+
});
|
|
221
|
+
providerFailover.recordSuccess(activeProvider);
|
|
222
|
+
break;
|
|
223
|
+
}
|
|
224
|
+
catch (err) {
|
|
225
|
+
providerFailover.recordFailure(activeProvider);
|
|
226
|
+
if (attempt < MAX_RETRIES && isRetryableError(err)) {
|
|
227
|
+
const delay = RETRY_BASE_DELAY_MS * Math.pow(2, attempt);
|
|
228
|
+
log.warn({ attempt: attempt + 1, maxRetries: MAX_RETRIES, delayMs: delay, err: sanitizeError(err) }, "retrying API call");
|
|
229
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
230
|
+
continue;
|
|
231
|
+
}
|
|
232
|
+
throw err;
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
if (!stream)
|
|
236
|
+
throw new Error("Failed to get response after retries");
|
|
237
|
+
// Process stream events via unified parser
|
|
238
|
+
const streamResult = await processStreamWithCallbacks(stream, { onText, onToolStart, onCitation });
|
|
239
|
+
const currentText = streamResult.text;
|
|
240
|
+
const toolUseBlocks = streamResult.toolUseBlocks;
|
|
241
|
+
const compactionContent = streamResult.compactionContent;
|
|
242
|
+
// Accumulate citations from this turn
|
|
243
|
+
if (streamResult.citations.length > 0) {
|
|
244
|
+
allCitations.push(...streamResult.citations);
|
|
245
|
+
}
|
|
246
|
+
// Accumulate per-turn tokens into session totals
|
|
247
|
+
const turnIn = streamResult.usage.inputTokens;
|
|
248
|
+
const turnOut = streamResult.usage.outputTokens;
|
|
249
|
+
const turnCacheRead = streamResult.usage.cacheReadTokens;
|
|
250
|
+
const turnCacheCreation = streamResult.usage.cacheCreationTokens;
|
|
251
|
+
totalIn += turnIn;
|
|
252
|
+
totalOut += turnOut;
|
|
253
|
+
cacheCreationTokens += turnCacheCreation;
|
|
254
|
+
cacheReadTokens += turnCacheRead;
|
|
255
|
+
// Update cost (include cache tokens for accurate pricing)
|
|
256
|
+
sessionCostUsd = estimateCostUsd(totalIn, totalOut, model, 0, cacheReadTokens, cacheCreationTokens);
|
|
257
|
+
// Record per-turn metrics for observability
|
|
258
|
+
const turnToolNames = toolUseBlocks.map(b => b.name);
|
|
259
|
+
turnMetrics.push({
|
|
260
|
+
turn: turnCount,
|
|
261
|
+
inputTokens: turnIn,
|
|
262
|
+
outputTokens: turnOut,
|
|
263
|
+
cacheRead: turnCacheRead,
|
|
264
|
+
cacheCreation: turnCacheCreation,
|
|
265
|
+
toolsUsed: turnToolNames,
|
|
266
|
+
costUsd: estimateCostUsd(turnIn, turnOut, model, 0, turnCacheRead, turnCacheCreation),
|
|
267
|
+
...(turnFailoverInfo ? { failover: turnFailoverInfo } : {}),
|
|
268
|
+
});
|
|
269
|
+
if (currentText)
|
|
270
|
+
allTextResponses.push(currentText);
|
|
271
|
+
// Compaction handling — API paused after generating summary.
|
|
272
|
+
// Preserve last 2 messages (1 user + 1 assistant turn) for continuity,
|
|
273
|
+
// then resume. This is NOT a new turn — just context compression.
|
|
274
|
+
if (streamResult.stopReason === "compaction" && compactionContent) {
|
|
275
|
+
compactionCount++;
|
|
276
|
+
log.info({ compactionCount }, "compaction — preserving last 2 messages, resuming");
|
|
277
|
+
// Budget enforcement: if cumulative compaction cost exceeds budget, force wrap-up
|
|
278
|
+
if (compactionCount * COMPACTION_TRIGGER_TOKENS >= COMPACTION_TOTAL_BUDGET) {
|
|
279
|
+
log.warn({ compactionCount, triggerTokens: COMPACTION_TRIGGER_TOKENS, estimatedTokens: compactionCount * COMPACTION_TRIGGER_TOKENS }, "compaction budget exhausted");
|
|
280
|
+
onText?.("\n[Context budget reached — wrapping up.]");
|
|
281
|
+
// Rebuild messages: compaction summary + wrap-up instruction
|
|
282
|
+
const compactedMessages = [
|
|
283
|
+
{ role: "assistant", content: [{ type: "compaction", content: compactionContent }] },
|
|
284
|
+
{ role: "user", content: [{ type: "text", text: "You have reached the context budget. Please wrap up your current work and provide a final summary of what was accomplished and what remains." }] },
|
|
285
|
+
];
|
|
286
|
+
messages.length = 0;
|
|
287
|
+
messages.push(...compactedMessages);
|
|
288
|
+
continue;
|
|
289
|
+
}
|
|
290
|
+
// Normal compaction: preserve last 2 messages for continuity
|
|
291
|
+
const preserved = messages.slice(-2);
|
|
292
|
+
const compactedMessages = [
|
|
293
|
+
{ role: "assistant", content: [{ type: "compaction", content: compactionContent }] },
|
|
294
|
+
...preserved,
|
|
295
|
+
];
|
|
296
|
+
messages.length = 0;
|
|
297
|
+
messages.push(...compactedMessages);
|
|
298
|
+
turnCount--; // Don't count compaction as a turn
|
|
299
|
+
continue;
|
|
300
|
+
}
|
|
301
|
+
// No tool calls — check if truncated at max_tokens
|
|
302
|
+
if (toolUseBlocks.length === 0) {
|
|
303
|
+
if (streamResult.stopReason === "max_tokens") {
|
|
304
|
+
const assistantContent = buildAssistantContent({ text: currentText, toolUseBlocks: [], compactionContent });
|
|
305
|
+
messages.push({ role: "assistant", content: assistantContent });
|
|
306
|
+
messages.push({ role: "user", content: [{ type: "text", text: "[Your response was truncated due to length. Please continue where you left off.]" }] });
|
|
307
|
+
continue;
|
|
308
|
+
}
|
|
309
|
+
finalResponse = currentText;
|
|
310
|
+
break;
|
|
311
|
+
}
|
|
312
|
+
// Execute tools and build messages for next turn
|
|
313
|
+
const subagentTokens = { input: 0, output: 0, costUsd: 0 };
|
|
314
|
+
const executor = makeToolExecutor(opts, tools, allToolNames, subagentTokens, discoveredToolNames);
|
|
315
|
+
const { results: toolResults } = await dispatchTools(toolUseBlocks, executor, {
|
|
316
|
+
loopDetector,
|
|
317
|
+
maxConcurrent: maxConcurrentTools,
|
|
318
|
+
// maxResultChars removed — Anthropic context_management handles limits
|
|
319
|
+
transcribeAudio: opts.storeId
|
|
320
|
+
? async (base64, mediaType) => {
|
|
321
|
+
const result = await handleTranscribe(opts.supabase, opts.storeId, base64, mediaType);
|
|
322
|
+
if (!result.success || !result.transcript) {
|
|
323
|
+
throw new Error(result.error || "Transcription returned no transcript");
|
|
324
|
+
}
|
|
325
|
+
return result.transcript;
|
|
326
|
+
}
|
|
327
|
+
: undefined,
|
|
328
|
+
});
|
|
329
|
+
toolCallCount += toolUseBlocks.length;
|
|
330
|
+
// Aggregate subagent tokens into parent totals
|
|
331
|
+
totalIn += subagentTokens.input;
|
|
332
|
+
totalOut += subagentTokens.output;
|
|
333
|
+
sessionCostUsd = estimateCostUsd(totalIn, totalOut, model, 0, cacheReadTokens, cacheCreationTokens) + subagentTokens.costUsd;
|
|
334
|
+
const assistantContent = buildAssistantContent({ text: currentText, toolUseBlocks, compactionContent });
|
|
335
|
+
messages.push({ role: "assistant", content: assistantContent });
|
|
336
|
+
messages.push({ role: "user", content: toolResults });
|
|
337
|
+
// Session checkpoint — fire-and-forget, never blocks the loop
|
|
338
|
+
if (opts.conversationId) {
|
|
339
|
+
saveCheckpoint(opts.supabase, opts.conversationId, turnCount, messages, { input: totalIn, output: totalOut, cacheRead: cacheReadTokens, cacheCreation: cacheCreationTokens }, sessionCostUsd, allToolNames).catch(() => { }); // swallow — saveCheckpoint already logs internally
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
else {
|
|
343
|
+
// ---- NON-STREAMING PATH ----
|
|
344
|
+
let response = null;
|
|
345
|
+
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
346
|
+
try {
|
|
347
|
+
// Cast through unknown: context_management.edits is Record<string, unknown>[]
|
|
348
|
+
// from getContextManagement() but SDK expects specific edit union types
|
|
349
|
+
const nsBetas = [...ctxMgmt.betas];
|
|
350
|
+
if (thinkingCfg.beta)
|
|
351
|
+
nsBetas.push(thinkingCfg.beta);
|
|
352
|
+
// Citations API: add beta when source documents are provided
|
|
353
|
+
if (documents?.length && !nsBetas.includes("citations-2025-04-15")) {
|
|
354
|
+
nsBetas.push("citations-2025-04-15");
|
|
355
|
+
}
|
|
356
|
+
response = await anthropic.beta.messages.create({
|
|
357
|
+
model: activeModel,
|
|
358
|
+
max_tokens: maxTokens,
|
|
359
|
+
temperature: thinkingCfg.thinking.type !== "disabled" ? 1 : temperature,
|
|
360
|
+
system,
|
|
361
|
+
...(omitTools ? {} : { tools: finalToolDefs }),
|
|
362
|
+
...(anthropicToolChoice && !omitTools ? { tool_choice: anthropicToolChoice } : {}),
|
|
363
|
+
messages: finalMessages,
|
|
364
|
+
thinking: thinkingCfg.thinking,
|
|
365
|
+
betas: nsBetas,
|
|
366
|
+
context_management: ctxMgmt.config,
|
|
367
|
+
...(documents?.length ? { documents } : {}),
|
|
368
|
+
});
|
|
369
|
+
providerFailover.recordSuccess(activeProvider);
|
|
370
|
+
break;
|
|
371
|
+
}
|
|
372
|
+
catch (err) {
|
|
373
|
+
providerFailover.recordFailure(activeProvider);
|
|
374
|
+
if (attempt < MAX_RETRIES && isRetryableError(err)) {
|
|
375
|
+
const delay = RETRY_BASE_DELAY_MS * Math.pow(2, attempt);
|
|
376
|
+
log.warn({ attempt: attempt + 1, maxRetries: MAX_RETRIES, delayMs: delay, err: sanitizeError(err) }, "retrying API call");
|
|
377
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
378
|
+
continue;
|
|
379
|
+
}
|
|
380
|
+
throw err;
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
if (!response)
|
|
384
|
+
throw new Error("Failed to get response after retries");
|
|
385
|
+
// Track tokens
|
|
386
|
+
const nsTurnIn = response.usage?.input_tokens || 0;
|
|
387
|
+
const nsTurnOut = response.usage?.output_tokens || 0;
|
|
388
|
+
const nsTurnCacheCreation = response.usage?.cache_creation_input_tokens ?? 0;
|
|
389
|
+
const nsTurnCacheRead = response.usage?.cache_read_input_tokens ?? 0;
|
|
390
|
+
totalIn += nsTurnIn;
|
|
391
|
+
totalOut += nsTurnOut;
|
|
392
|
+
cacheCreationTokens += nsTurnCacheCreation;
|
|
393
|
+
cacheReadTokens += nsTurnCacheRead;
|
|
394
|
+
// Extract text, tool_use, cite, and compaction blocks
|
|
395
|
+
let currentText = "";
|
|
396
|
+
let nsCompactionContent = null;
|
|
397
|
+
const toolUseBlocks = [];
|
|
398
|
+
for (const block of response.content) {
|
|
399
|
+
if (block.type === "text") {
|
|
400
|
+
currentText += block.text;
|
|
401
|
+
onText?.(block.text);
|
|
402
|
+
}
|
|
403
|
+
else if (block.type === "tool_use") {
|
|
404
|
+
toolUseBlocks.push({
|
|
405
|
+
id: block.id,
|
|
406
|
+
name: block.name,
|
|
407
|
+
input: block.input,
|
|
408
|
+
});
|
|
409
|
+
onToolStart?.(block.name, block.input);
|
|
410
|
+
}
|
|
411
|
+
else if (block.type === "cite") {
|
|
412
|
+
const citeBlock = block;
|
|
413
|
+
const citation = {
|
|
414
|
+
type: "cite",
|
|
415
|
+
cited_text: citeBlock.cited_text ?? "",
|
|
416
|
+
document_index: citeBlock.document_index ?? 0,
|
|
417
|
+
start_char_index: citeBlock.start_char_index ?? 0,
|
|
418
|
+
end_char_index: citeBlock.end_char_index ?? 0,
|
|
419
|
+
...(citeBlock.document_title ? { document_title: citeBlock.document_title } : {}),
|
|
420
|
+
};
|
|
421
|
+
allCitations.push(citation);
|
|
422
|
+
onCitation?.(citation);
|
|
423
|
+
}
|
|
424
|
+
else if (block.type === "compaction") {
|
|
425
|
+
nsCompactionContent = block.content || "";
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
sessionCostUsd = estimateCostUsd(totalIn, totalOut, model, 0, cacheReadTokens, cacheCreationTokens);
|
|
429
|
+
// Record per-turn metrics (non-streaming)
|
|
430
|
+
const nsTurnToolNames = toolUseBlocks.map(b => b.name);
|
|
431
|
+
turnMetrics.push({
|
|
432
|
+
turn: turnCount,
|
|
433
|
+
inputTokens: nsTurnIn,
|
|
434
|
+
outputTokens: nsTurnOut,
|
|
435
|
+
cacheRead: nsTurnCacheRead,
|
|
436
|
+
cacheCreation: nsTurnCacheCreation,
|
|
437
|
+
toolsUsed: nsTurnToolNames,
|
|
438
|
+
costUsd: estimateCostUsd(nsTurnIn, nsTurnOut, model, 0, nsTurnCacheRead, nsTurnCacheCreation),
|
|
439
|
+
...(turnFailoverInfo ? { failover: turnFailoverInfo } : {}),
|
|
440
|
+
});
|
|
441
|
+
if (currentText)
|
|
442
|
+
allTextResponses.push(currentText);
|
|
443
|
+
// Compaction handling (non-streaming) — same logic as streaming path
|
|
444
|
+
if (response.stop_reason === "compaction" && nsCompactionContent !== null) {
|
|
445
|
+
compactionCount++;
|
|
446
|
+
log.info({ compactionCount, streaming: false }, "compaction — preserving last 2 messages");
|
|
447
|
+
if (compactionCount * COMPACTION_TRIGGER_TOKENS >= COMPACTION_TOTAL_BUDGET) {
|
|
448
|
+
onText?.("\n[Context budget reached — wrapping up.]");
|
|
449
|
+
const compactedMessages = [
|
|
450
|
+
{ role: "assistant", content: [{ type: "compaction", content: nsCompactionContent }] },
|
|
451
|
+
{ role: "user", content: [{ type: "text", text: "You have reached the context budget. Please wrap up your current work and provide a final summary." }] },
|
|
452
|
+
];
|
|
453
|
+
messages.length = 0;
|
|
454
|
+
messages.push(...compactedMessages);
|
|
455
|
+
continue;
|
|
456
|
+
}
|
|
457
|
+
const preserved = messages.slice(-2);
|
|
458
|
+
const compactedMessages = [
|
|
459
|
+
{ role: "assistant", content: [{ type: "compaction", content: nsCompactionContent }] },
|
|
460
|
+
...preserved,
|
|
461
|
+
];
|
|
462
|
+
messages.length = 0;
|
|
463
|
+
messages.push(...compactedMessages);
|
|
464
|
+
turnCount--;
|
|
465
|
+
continue;
|
|
466
|
+
}
|
|
467
|
+
// No tool calls — check if truncated at max_tokens
|
|
468
|
+
if (toolUseBlocks.length === 0) {
|
|
469
|
+
if (response.stop_reason === "max_tokens") {
|
|
470
|
+
const assistantContent = buildAssistantContent({ text: currentText, toolUseBlocks: [] });
|
|
471
|
+
messages.push({ role: "assistant", content: assistantContent });
|
|
472
|
+
messages.push({ role: "user", content: [{ type: "text", text: "[Your response was truncated due to length. Please continue where you left off.]" }] });
|
|
473
|
+
continue;
|
|
474
|
+
}
|
|
475
|
+
finalResponse = currentText;
|
|
476
|
+
break;
|
|
477
|
+
}
|
|
478
|
+
// Execute tools
|
|
479
|
+
const nonStreamSubTokens = { input: 0, output: 0, costUsd: 0 };
|
|
480
|
+
const nsExecutor = makeToolExecutor(opts, tools, allToolNames, nonStreamSubTokens, discoveredToolNames);
|
|
481
|
+
const { results: toolResults } = await dispatchTools(toolUseBlocks, nsExecutor, {
|
|
482
|
+
loopDetector,
|
|
483
|
+
maxConcurrent: maxConcurrentTools,
|
|
484
|
+
// maxResultChars removed — Anthropic context_management handles limits
|
|
485
|
+
});
|
|
486
|
+
toolCallCount += toolUseBlocks.length;
|
|
487
|
+
// Aggregate subagent tokens into parent totals
|
|
488
|
+
totalIn += nonStreamSubTokens.input;
|
|
489
|
+
totalOut += nonStreamSubTokens.output;
|
|
490
|
+
sessionCostUsd = estimateCostUsd(totalIn, totalOut, model, 0, cacheReadTokens, cacheCreationTokens) + nonStreamSubTokens.costUsd;
|
|
491
|
+
const assistantContent = buildAssistantContent({ text: currentText, toolUseBlocks });
|
|
492
|
+
messages.push({ role: "assistant", content: assistantContent });
|
|
493
|
+
messages.push({ role: "user", content: toolResults });
|
|
494
|
+
// Session checkpoint — fire-and-forget, never blocks the loop
|
|
495
|
+
if (opts.conversationId) {
|
|
496
|
+
saveCheckpoint(opts.supabase, opts.conversationId, turnCount, messages, { input: totalIn, output: totalOut, cacheRead: cacheReadTokens, cacheCreation: cacheCreationTokens }, sessionCostUsd, allToolNames).catch(() => { }); // swallow — saveCheckpoint already logs internally
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
const fullText = allTextResponses.join("\n\n") || finalResponse;
|
|
501
|
+
return {
|
|
502
|
+
finalText: fullText,
|
|
503
|
+
allTextResponses,
|
|
504
|
+
turnCount,
|
|
505
|
+
toolCallCount,
|
|
506
|
+
toolsUsed: [...new Set(allToolNames)],
|
|
507
|
+
tokens: {
|
|
508
|
+
input: totalIn,
|
|
509
|
+
output: totalOut,
|
|
510
|
+
cacheCreation: cacheCreationTokens,
|
|
511
|
+
cacheRead: cacheReadTokens,
|
|
512
|
+
},
|
|
513
|
+
costUsd: sessionCostUsd,
|
|
514
|
+
loopDetectorStats: loopDetector.getSessionStats(),
|
|
515
|
+
turns: turnMetrics,
|
|
516
|
+
citations: allCitations,
|
|
517
|
+
};
|
|
518
|
+
}
|
|
519
|
+
// ============================================================================
|
|
520
|
+
// TOOL EXECUTOR FACTORY — creates executor for dispatchTools with delegation
|
|
521
|
+
// ============================================================================
|
|
522
|
+
function makeToolExecutor(opts, tools, allToolNames, subagentTokens, discoveredToolNames) {
|
|
523
|
+
const { anthropic, supabase, storeId, traceId, userId, userEmail, conversationId, agentId, executeTool, onToolResult, onToolProgress, onSubagentProgress, clientDisconnected = { value: false }, startedAt = Date.now(), maxDurationMs = 5 * 60 * 1000, } = opts;
|
|
524
|
+
return async (name, input) => {
|
|
525
|
+
allToolNames.push(name);
|
|
526
|
+
// Subagent delegation
|
|
527
|
+
if (name === "delegate_task") {
|
|
528
|
+
const subPrompt = String(input.prompt || "");
|
|
529
|
+
const subModelInput = String(input.model || "haiku");
|
|
530
|
+
const subModel = (subModelInput === "opus" ? "opus" :
|
|
531
|
+
subModelInput === "sonnet" ? "sonnet" : "haiku");
|
|
532
|
+
const subMaxTurns = Math.min(Math.max(1, Number(input.max_turns) || 6), 12);
|
|
533
|
+
const subTools = tools.filter((t) => t.name !== "delegate_task");
|
|
534
|
+
const subId = `sub-${Date.now().toString(36)}`;
|
|
535
|
+
onSubagentProgress?.({ subagentId: subId, event: "started", model: subModel });
|
|
536
|
+
const subStartTime = Date.now();
|
|
537
|
+
const subResult = await runServerSubagent({
|
|
538
|
+
anthropic, supabase, storeId, prompt: subPrompt, model: subModel,
|
|
539
|
+
maxTurns: subMaxTurns, tools: subTools,
|
|
540
|
+
executeTool: async (toolName, args) => executeTool(toolName, args, "server_subagent"),
|
|
541
|
+
onProgress: onSubagentProgress, clientDisconnected, startedAt, maxDurationMs,
|
|
542
|
+
});
|
|
543
|
+
onSubagentProgress?.({ subagentId: subId, event: "done", output: subResult.output });
|
|
544
|
+
// Audit log
|
|
545
|
+
const subDurationMs = Date.now() - subStartTime;
|
|
546
|
+
const subModelId = subModel === "opus" ? MODELS.OPUS
|
|
547
|
+
: subModel === "sonnet" ? MODELS.SONNET : MODELS.HAIKU;
|
|
548
|
+
try {
|
|
549
|
+
const subEndTime = Date.now();
|
|
550
|
+
const subBytes = new Uint8Array(8);
|
|
551
|
+
crypto.getRandomValues(subBytes);
|
|
552
|
+
const subSpanId = Array.from(subBytes).map(b => b.toString(16).padStart(2, "0")).join("");
|
|
553
|
+
await supabase.from("audit_logs").insert({
|
|
554
|
+
action: "chat.subagent_complete", severity: "info",
|
|
555
|
+
store_id: storeId || null, resource_type: "chat_subagent",
|
|
556
|
+
resource_id: agentId || null, request_id: traceId || null,
|
|
557
|
+
conversation_id: conversationId || null, source: "server_subagent",
|
|
558
|
+
user_id: userId || null, user_email: userEmail || null,
|
|
559
|
+
input_tokens: subResult.tokensUsed.input, output_tokens: subResult.tokensUsed.output,
|
|
560
|
+
total_cost: subResult.costUsd, model: subModelId, duration_ms: subDurationMs,
|
|
561
|
+
// OTEL fields
|
|
562
|
+
trace_id: traceId || null,
|
|
563
|
+
span_id: subSpanId,
|
|
564
|
+
span_kind: "INTERNAL",
|
|
565
|
+
service_name: "agent-server",
|
|
566
|
+
status_code: subResult.success ? "OK" : "ERROR",
|
|
567
|
+
start_time: new Date(subEndTime - subDurationMs).toISOString(),
|
|
568
|
+
end_time: new Date(subEndTime).toISOString(),
|
|
569
|
+
details: {
|
|
570
|
+
subagent_model: subModel, turn_count: subResult.turnCount,
|
|
571
|
+
tool_calls: subResult.toolsUsed.length, tool_names: subResult.toolsUsed,
|
|
572
|
+
cost_usd: subResult.costUsd, success: subResult.success,
|
|
573
|
+
prompt_preview: subPrompt.substring(0, 200),
|
|
574
|
+
// gen_ai fields for SwiftUI cost display
|
|
575
|
+
"gen_ai.request.model": subModelId,
|
|
576
|
+
"gen_ai.usage.input_tokens": subResult.tokensUsed.input,
|
|
577
|
+
"gen_ai.usage.output_tokens": subResult.tokensUsed.output,
|
|
578
|
+
"gen_ai.usage.cost": subResult.costUsd,
|
|
579
|
+
},
|
|
580
|
+
});
|
|
581
|
+
}
|
|
582
|
+
catch (err) {
|
|
583
|
+
log.error({ err: err.message }, "failed to log subagent delegation audit");
|
|
584
|
+
}
|
|
585
|
+
subagentTokens.input += subResult.tokensUsed.input;
|
|
586
|
+
subagentTokens.output += subResult.tokensUsed.output;
|
|
587
|
+
subagentTokens.costUsd += subResult.costUsd;
|
|
588
|
+
allToolNames.push(...subResult.toolsUsed);
|
|
589
|
+
return { success: subResult.success, output: subResult.output };
|
|
590
|
+
}
|
|
591
|
+
// Regular tool execution — pass onToolProgress for streaming tools (kali)
|
|
592
|
+
const result = await executeTool(name, input, undefined, onToolProgress);
|
|
593
|
+
onToolResult?.(name, result.success, result.success ? result.data : result.error);
|
|
594
|
+
// discover_tools: dynamically add discovered tools to the active set
|
|
595
|
+
if (name === "discover_tools" && result.success && result.data) {
|
|
596
|
+
const refreshRequested = !!input.refresh;
|
|
597
|
+
const discovered = result.data?.tools;
|
|
598
|
+
// If refresh requested, remove previously-discovered tools so they can be re-loaded
|
|
599
|
+
// with fresh schemas from DB. Core tools and delegate_task are never removed.
|
|
600
|
+
if (refreshRequested) {
|
|
601
|
+
const coreNames = new Set((opts.tools || []).map(t => t.name));
|
|
602
|
+
coreNames.add("delegate_task");
|
|
603
|
+
for (let i = tools.length - 1; i >= 0; i--) {
|
|
604
|
+
if (discoveredToolNames.has(tools[i].name) && !coreNames.has(tools[i].name)) {
|
|
605
|
+
tools.splice(i, 1);
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
discoveredToolNames.clear();
|
|
609
|
+
log.info("discover_tools refresh — cleared previously discovered tools");
|
|
610
|
+
}
|
|
611
|
+
if (Array.isArray(discovered)) {
|
|
612
|
+
// Use getFullToolSchemas as authoritative source — supplements handler response
|
|
613
|
+
// with any schemas the handler returned, ensuring we have complete definitions.
|
|
614
|
+
const discoveredNames = discovered.filter((t) => t.name).map((t) => t.name);
|
|
615
|
+
const fullSchemas = getFullToolSchemas(discoveredNames);
|
|
616
|
+
// Build a map for O(1) lookup: prefer full schemas from cache, fall back to handler data
|
|
617
|
+
const schemaMap = new Map();
|
|
618
|
+
for (const t of discovered) {
|
|
619
|
+
if (t.name && t.input_schema) {
|
|
620
|
+
schemaMap.set(t.name, { name: t.name, description: t.description || t.name, input_schema: t.input_schema });
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
for (const t of fullSchemas) {
|
|
624
|
+
schemaMap.set(t.name, t); // Full cache schemas override handler data (more complete)
|
|
625
|
+
}
|
|
626
|
+
const activated = [];
|
|
627
|
+
const alreadyActive = [];
|
|
628
|
+
for (const [toolName, toolDef] of schemaMap) {
|
|
629
|
+
// Track as discovered regardless of whether already active
|
|
630
|
+
discoveredToolNames.add(toolName);
|
|
631
|
+
if (!tools.some(existing => existing.name === toolName)) {
|
|
632
|
+
tools.push(toolDef);
|
|
633
|
+
activated.push(toolName);
|
|
634
|
+
}
|
|
635
|
+
else if (refreshRequested) {
|
|
636
|
+
// Re-add after refresh cleared it, or update existing schema
|
|
637
|
+
tools.push(toolDef);
|
|
638
|
+
activated.push(toolName);
|
|
639
|
+
}
|
|
640
|
+
else {
|
|
641
|
+
alreadyActive.push(toolName);
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
if (activated.length > 0) {
|
|
645
|
+
log.info({ activatedTools: activated, refreshed: refreshRequested }, "discover_tools activated");
|
|
646
|
+
}
|
|
647
|
+
// Return a cleaner message to the model
|
|
648
|
+
return {
|
|
649
|
+
success: true,
|
|
650
|
+
output: JSON.stringify({
|
|
651
|
+
activated,
|
|
652
|
+
already_active: alreadyActive,
|
|
653
|
+
refreshed: refreshRequested,
|
|
654
|
+
message: activated.length > 0
|
|
655
|
+
? `Activated ${activated.length} tool(s): ${activated.join(", ")}. You can now use them.`
|
|
656
|
+
: "Requested tools were already active.",
|
|
657
|
+
total_discovered: discoveredToolNames.size,
|
|
658
|
+
}),
|
|
659
|
+
};
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
return {
|
|
663
|
+
success: result.success,
|
|
664
|
+
output: JSON.stringify(result.success ? result.data : { error: result.error }),
|
|
665
|
+
};
|
|
666
|
+
};
|
|
667
|
+
}
|