npm - clodds - Versions diffs - 1.6.13 → 1.6.15 - Mend

clodds 1.6.13 → 1.6.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/agents/index.js +86 -15
package/dist/agents/index.js.map +1 -1
package/dist/channels/index.d.ts +1 -0
package/dist/channels/index.js +3 -0
package/dist/channels/index.js.map +1 -1
package/dist/channels/webchat/index.d.ts +2 -1
package/dist/channels/webchat/index.js +8 -14
package/dist/channels/webchat/index.js.map +1 -1
package/dist/gateway/index.js +4 -0
package/dist/gateway/index.js.map +1 -1
package/dist/gateway/server.d.ts +3 -1
package/dist/gateway/server.js +9 -3
package/dist/gateway/server.js.map +1 -1
package/dist/skills/loader.d.ts +4 -1
package/dist/skills/loader.js +39 -3
package/dist/skills/loader.js.map +1 -1
package/package.json +1 -1

package/dist/agents/index.js CHANGED Viewed

@@ -16410,12 +16410,21 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
                 }
                 return createMessageStreamed(params);
             };
+            // Detect tool/skill hints early — reused for skill budget + tool preloading
+            const hints = processedMessage.text ? (0, tool_registry_js_1.detectToolHints)(processedMessage.text) : { platforms: [], categories: [], hasIntent: false };
             // Build final system prompt (Clawdbot-style)
             // Priority: routed agent prompt > default system prompt
-            const skillContext = skills.getSkillContextForMessage(processedMessage.text || '');
-            const baseSystemPrompt = SYSTEM_PROMPT.replace('{{SKILLS}}', skillContext ? `\n## Skills Reference\n${skillContext}` : '');
-            let finalSystemPrompt = session.context.routedAgentPrompt || baseSystemPrompt;
-            // Add memory context if available
+            const skillContext = skills.getSkillContextForMessage(processedMessage.text || '', hints, messages.length);
+            // Split system prompt into cacheable blocks for prompt caching.
+            // Block 1 (cached): Base system prompt without skills — stable across messages.
+            // Block 2 (uncached): Skills + memory — changes per query.
+            const coreSystemPrompt = session.context.routedAgentPrompt
+                || SYSTEM_PROMPT.replace('{{SKILLS}}', '');
+            // Build dynamic context (skills + memory) — changes every query, not cached
+            let dynamicContext = '';
+            if (skillContext) {
+                dynamicContext += `\n## Skills Reference\n${skillContext}`;
+            }
             if (memory) {
                 const memoryAuto = config.memory?.auto || {};
                 const channelKey = processedMessage.chatId || processedMessage.platform;
@@ -16423,7 +16432,7 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
                 if (memoryAuto.includeMemoryContext !== false) {
                     const memoryContext = memory.buildContextString(session.userId, scope);
                     if (memoryContext) {
-                        finalSystemPrompt += `\n\n## User Memory\n${memoryContext}`;
+                        dynamicContext += `\n\n## User Memory\n${memoryContext}`;
                     }
                 }
                 const semanticTopK = memoryAuto.semanticSearchTopK ?? (process.env.CLODDS_MEMORY_SEARCH === '1'
@@ -16434,7 +16443,7 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
                         const results = await memory.semanticSearch(session.userId, scope, processedMessage.text, semanticTopK);
                         if (results.length > 0) {
                             const lines = results.map((r) => `- ${r.entry.key}: ${r.entry.value} (score ${r.score.toFixed(2)})`);
-                            finalSystemPrompt += `\n\n## Relevant Memory (semantic search)\n${lines.join('\n')}`;
+                            dynamicContext += `\n\n## Relevant Memory (semantic search)\n${lines.join('\n')}`;
                         }
                     }
                     catch (error) {
@@ -16442,6 +16451,8 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
                     }
                 }
             }
+            // Backward-compatible string for hooks
+            let finalSystemPrompt = coreSystemPrompt + dynamicContext;
             // =========================================================================
             // HOOKS: agent:before_start - Can modify system prompt
             // =========================================================================
@@ -16461,6 +16472,22 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
             if (agentStartResult?.prependContext) {
                 finalSystemPrompt = `${agentStartResult.prependContext}\n\n${finalSystemPrompt}`;
             }
+            const hookModified = !!(agentStartResult?.systemPrompt || agentStartResult?.prependContext);
+            let systemBlocks;
+            if (hookModified) {
+                // Hooks changed the prompt — use as single block, still cache it
+                systemBlocks = [{ type: 'text', text: finalSystemPrompt, cache_control: { type: 'ephemeral' } }];
+            }
+            else {
+                systemBlocks = [
+                    // Block 1: Core system prompt (stable — cached)
+                    { type: 'text', text: coreSystemPrompt, cache_control: { type: 'ephemeral' } },
+                ];
+                // Block 2: Dynamic context (changes per query — not cached)
+                if (dynamicContext) {
+                    systemBlocks.push({ type: 'text', text: dynamicContext });
+                }
+            }
             // =========================================================================
             // CONTEXT MANAGEMENT - Check token usage and compact if needed
             // =========================================================================
@@ -16501,12 +16528,12 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
             let lastKnownInputTokens = 0;
             // Dynamic tool loading: tools discovered via tool_search during this request
             const discoveredTools = [];
+            const MAX_DISCOVERED_TOOLS = 50; // Hard cap on all discovered tools (preload + tool_search)
             // Preload platform/category tools based on user message keywords.
             // Uses intersection mode when both platform AND intent are detected
             // to avoid loading all tools from multiple platforms (~150+).
             // Also checks conversation context for platform hints in multi-turn chats.
             if (TOOL_SEARCH_ENABLED && processedMessage.text) {
-                const hints = (0, tool_registry_js_1.detectToolHints)(processedMessage.text);
                 // CONVERSATION CONTEXT: If no platform in current message, borrow from recent history.
                 // "buy YES at 40 cents" after discussing polymarket → still loads polymarket tools.
                 if (hints.platforms.length === 0 && messages.length > 1) {
@@ -16666,10 +16693,14 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
             let response;
             try {
                 const activeTools = getActiveTools();
+                // Add cache_control to last tool for tool definition caching
+                if (activeTools.length > 0) {
+                    activeTools[activeTools.length - 1].cache_control = { type: 'ephemeral' };
+                }
                 response = await createMessage({
                     model: modelId,
                     max_tokens: 1024,
-                    system: finalSystemPrompt,
+                    system: systemBlocks,
                     tools: activeTools,
                     messages,
                 });
@@ -16686,10 +16717,26 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
             // Use actual API token count for accurate context tracking
             if (response.usage) {
                 lastKnownInputTokens = response.usage.input_tokens;
-                logger_1.logger.info({ inputTokens: lastKnownInputTokens, max: modelContextWindow }, 'Actual API token usage');
-            }
-            // Tool use loop
-            while (response.stop_reason === 'tool_use') {
+                // Track prompt cache performance
+                const usage = response.usage;
+                const cacheCreation = usage.cache_creation_input_tokens ?? 0;
+                const cacheRead = usage.cache_read_input_tokens ?? 0;
+                const cacheHitRate = cacheRead > 0
+                    ? (cacheRead / (cacheRead + lastKnownInputTokens)) * 100
+                    : 0;
+                logger_1.logger.info({
+                    inputTokens: lastKnownInputTokens,
+                    max: modelContextWindow,
+                    cacheCreation,
+                    cacheRead,
+                    cacheHitRate: `${cacheHitRate.toFixed(1)}%`,
+                }, 'API token usage (with cache stats)');
+            }
+            // Tool use loop — capped to prevent runaway token costs
+            const MAX_TOOL_TURNS = 10;
+            let toolTurnCount = 0;
+            while (response.stop_reason === 'tool_use' && toolTurnCount < MAX_TOOL_TURNS) {
+                toolTurnCount++;
                 const assistantContent = response.content;
                 messages.push({ role: 'assistant', content: assistantContent });
                 const toolResults = [];
@@ -16766,11 +16813,14 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
                             }
                             // Take top 25 results
                             const topResults = searchResults.slice(0, 25);
-                            // Store discovered tools for next API call (dedupe)
+                            // Store discovered tools for next API call (dedupe, respect global cap)
                             const alreadyDiscovered = new Set(discoveredTools.map(t => t.name));
                             for (const t of topResults) {
+                                if (discoveredTools.length >= MAX_DISCOVERED_TOOLS)
+                                    break;
                                 if (!alreadyDiscovered.has(t.name)) {
                                     discoveredTools.push(t);
+                                    alreadyDiscovered.add(t.name);
                                 }
                             }
                             result = JSON.stringify({
@@ -16811,10 +16861,18 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
                                 toolResult: result,
                             },
                         });
+                        // Truncate oversized tool results to prevent token bloat
+                        // 16K chars ≈ 4K tokens — enough for useful data, prevents runaway costs
+                        const MAX_TOOL_RESULT_CHARS = 16384;
+                        let truncatedResult = result;
+                        if (typeof truncatedResult === 'string' && truncatedResult.length > MAX_TOOL_RESULT_CHARS) {
+                            truncatedResult = truncatedResult.slice(0, MAX_TOOL_RESULT_CHARS) + '\n...[truncated, result too large]';
+                            logger_1.logger.info({ tool: block.name, originalLen: result.length, truncatedTo: MAX_TOOL_RESULT_CHARS }, 'Truncated large tool result');
+                        }
                         toolResults.push({
                             type: 'tool_result',
                             tool_use_id: block.id,
-                            content: result,
+                            content: truncatedResult,
                         });
                     }
                 }
@@ -16848,10 +16906,14 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
                 }
                 try {
                     const activeTools = getActiveTools();
+                    // Add cache_control to last tool for tool definition caching
+                    if (activeTools.length > 0) {
+                        activeTools[activeTools.length - 1].cache_control = { type: 'ephemeral' };
+                    }
                     response = await createMessage({
                         model: modelId,
                         max_tokens: 1024,
-                        system: finalSystemPrompt,
+                        system: systemBlocks,
                         tools: activeTools,
                         messages,
                     });
@@ -16867,6 +16929,12 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
                 // Update actual token usage after each API call
                 if (response.usage) {
                     lastKnownInputTokens = response.usage.input_tokens;
+                    const loopUsage = response.usage;
+                    logger_1.logger.debug({
+                        inputTokens: lastKnownInputTokens,
+                        cacheRead: loopUsage.cache_read_input_tokens ?? 0,
+                        turn: toolTurnCount,
+                    }, 'Tool loop token usage');
                     // If actual usage is approaching limit, force compaction next iteration
                     if (lastKnownInputTokens > modelContextWindow * 0.85) {
                         logger_1.logger.info({ inputTokens: lastKnownInputTokens }, 'API reports high token usage, will compact');
@@ -16884,6 +16952,9 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
                     }
                 }
             }
+            if (toolTurnCount >= MAX_TOOL_TURNS) {
+                logger_1.logger.warn({ toolTurnCount }, 'Tool loop hit max turns cap');
+            }
             // Extract text response
             const responseText = extractResponseText(response);
             // Save assistant response to history