npm - clodds - Versions diffs - 1.6.14 → 1.6.16 - Mend

clodds 1.6.14 → 1.6.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/agents/index.js CHANGED Viewed

@@ -16410,12 +16410,21 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
                 }
                 return createMessageStreamed(params);
             };
+            // Detect tool/skill hints early — reused for skill budget + tool preloading
+            const hints = processedMessage.text ? (0, tool_registry_js_1.detectToolHints)(processedMessage.text) : { platforms: [], categories: [], hasIntent: false };
             // Build final system prompt (Clawdbot-style)
             // Priority: routed agent prompt > default system prompt
-            const skillContext = skills.getSkillContextForMessage(processedMessage.text || '');
-            const baseSystemPrompt = SYSTEM_PROMPT.replace('{{SKILLS}}', skillContext ? `\n## Skills Reference\n${skillContext}` : '');
-            let finalSystemPrompt = session.context.routedAgentPrompt || baseSystemPrompt;
-            // Add memory context if available
+            const skillContext = skills.getSkillContextForMessage(processedMessage.text || '', hints, messages.length);
+            // Split system prompt into cacheable blocks for prompt caching.
+            // Block 1 (cached): Base system prompt without skills — stable across messages.
+            // Block 2 (uncached): Skills + memory — changes per query.
+            const coreSystemPrompt = session.context.routedAgentPrompt
+                || SYSTEM_PROMPT.replace('{{SKILLS}}', '');
+            // Build dynamic context (skills + memory) — changes every query, not cached
+            let dynamicContext = '';
+            if (skillContext) {
+                dynamicContext += `\n## Skills Reference\n${skillContext}`;
+            }
             if (memory) {
                 const memoryAuto = config.memory?.auto || {};
                 const channelKey = processedMessage.chatId || processedMessage.platform;
@@ -16423,7 +16432,7 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
                 if (memoryAuto.includeMemoryContext !== false) {
                     const memoryContext = memory.buildContextString(session.userId, scope);
                     if (memoryContext) {
-                        finalSystemPrompt += `\n\n## User Memory\n${memoryContext}`;
+                        dynamicContext += `\n\n## User Memory\n${memoryContext}`;
                     }
                 }
                 const semanticTopK = memoryAuto.semanticSearchTopK ?? (process.env.CLODDS_MEMORY_SEARCH === '1'
@@ -16434,7 +16443,7 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
                         const results = await memory.semanticSearch(session.userId, scope, processedMessage.text, semanticTopK);
                         if (results.length > 0) {
                             const lines = results.map((r) => `- ${r.entry.key}: ${r.entry.value} (score ${r.score.toFixed(2)})`);
-                            finalSystemPrompt += `\n\n## Relevant Memory (semantic search)\n${lines.join('\n')}`;
+                            dynamicContext += `\n\n## Relevant Memory (semantic search)\n${lines.join('\n')}`;
                         }
                     }
                     catch (error) {
@@ -16442,6 +16451,8 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
                     }
                 }
             }
+            // Backward-compatible string for hooks
+            let finalSystemPrompt = coreSystemPrompt + dynamicContext;
             // =========================================================================
             // HOOKS: agent:before_start - Can modify system prompt
             // =========================================================================
@@ -16461,6 +16472,22 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
             if (agentStartResult?.prependContext) {
                 finalSystemPrompt = `${agentStartResult.prependContext}\n\n${finalSystemPrompt}`;
             }
+            const hookModified = !!(agentStartResult?.systemPrompt || agentStartResult?.prependContext);
+            let systemBlocks;
+            if (hookModified) {
+                // Hooks changed the prompt — use as single block, still cache it
+                systemBlocks = [{ type: 'text', text: finalSystemPrompt, cache_control: { type: 'ephemeral' } }];
+            }
+            else {
+                systemBlocks = [
+                    // Block 1: Core system prompt (stable — cached)
+                    { type: 'text', text: coreSystemPrompt, cache_control: { type: 'ephemeral' } },
+                ];
+                // Block 2: Dynamic context (changes per query — not cached)
+                if (dynamicContext) {
+                    systemBlocks.push({ type: 'text', text: dynamicContext });
+                }
+            }
             // =========================================================================
             // CONTEXT MANAGEMENT - Check token usage and compact if needed
             // =========================================================================
@@ -16507,7 +16534,6 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
             // to avoid loading all tools from multiple platforms (~150+).
             // Also checks conversation context for platform hints in multi-turn chats.
             if (TOOL_SEARCH_ENABLED && processedMessage.text) {
-                const hints = (0, tool_registry_js_1.detectToolHints)(processedMessage.text);
                 // CONVERSATION CONTEXT: If no platform in current message, borrow from recent history.
                 // "buy YES at 40 cents" after discussing polymarket → still loads polymarket tools.
                 if (hints.platforms.length === 0 && messages.length > 1) {
@@ -16664,14 +16690,20 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
                 const extra = discoveredTools.filter(t => !seen.has(t.name));
                 return [...tools, ...extra];
             };
+            // Strip internal metadata before sending to API — Anthropic rejects extra fields
+            const toApiTools = (defs) => defs.map(({ metadata: _, ...rest }) => rest);
             let response;
             try {
-                const activeTools = getActiveTools();
+                const apiTools = toApiTools(getActiveTools());
+                // Add cache_control to last tool for tool definition caching
+                if (apiTools.length > 0) {
+                    apiTools[apiTools.length - 1].cache_control = { type: 'ephemeral' };
+                }
                 response = await createMessage({
                     model: modelId,
                     max_tokens: 1024,
-                    system: finalSystemPrompt,
-                    tools: activeTools,
+                    system: systemBlocks,
+                    tools: apiTools,
                     messages,
                 });
             }
@@ -16687,7 +16719,20 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
             // Use actual API token count for accurate context tracking
             if (response.usage) {
                 lastKnownInputTokens = response.usage.input_tokens;
-                logger_1.logger.info({ inputTokens: lastKnownInputTokens, max: modelContextWindow }, 'Actual API token usage');
+                // Track prompt cache performance
+                const usage = response.usage;
+                const cacheCreation = usage.cache_creation_input_tokens ?? 0;
+                const cacheRead = usage.cache_read_input_tokens ?? 0;
+                const cacheHitRate = cacheRead > 0
+                    ? (cacheRead / (cacheRead + lastKnownInputTokens)) * 100
+                    : 0;
+                logger_1.logger.info({
+                    inputTokens: lastKnownInputTokens,
+                    max: modelContextWindow,
+                    cacheCreation,
+                    cacheRead,
+                    cacheHitRate: `${cacheHitRate.toFixed(1)}%`,
+                }, 'API token usage (with cache stats)');
             }
             // Tool use loop — capped to prevent runaway token costs
             const MAX_TOOL_TURNS = 10;
@@ -16862,12 +16907,16 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
                     }
                 }
                 try {
-                    const activeTools = getActiveTools();
+                    const apiTools = toApiTools(getActiveTools());
+                    // Add cache_control to last tool for tool definition caching
+                    if (apiTools.length > 0) {
+                        apiTools[apiTools.length - 1].cache_control = { type: 'ephemeral' };
+                    }
                     response = await createMessage({
                         model: modelId,
                         max_tokens: 1024,
-                        system: finalSystemPrompt,
-                        tools: activeTools,
+                        system: systemBlocks,
+                        tools: apiTools,
                         messages,
                     });
                 }
@@ -16882,6 +16931,12 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
                 // Update actual token usage after each API call
                 if (response.usage) {
                     lastKnownInputTokens = response.usage.input_tokens;
+                    const loopUsage = response.usage;
+                    logger_1.logger.debug({
+                        inputTokens: lastKnownInputTokens,
+                        cacheRead: loopUsage.cache_read_input_tokens ?? 0,
+                        turn: toolTurnCount,
+                    }, 'Tool loop token usage');
                     // If actual usage is approaching limit, force compaction next iteration
                     if (lastKnownInputTokens > modelContextWindow * 0.85) {
                         logger_1.logger.info({ inputTokens: lastKnownInputTokens }, 'API reports high token usage, will compact');