npm - clodds - Versions diffs - 1.6.1 → 1.6.3 - Mend

clodds 1.6.1 → 1.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/agents/index.js CHANGED Viewed

@@ -16336,9 +16336,14 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
                 'claude-3-opus-20240229': 200000,
             };
             const modelContextWindow = MODEL_CONTEXT_WINDOWS[modelId] || 200000;
+            // Estimate fixed overhead: tool definitions + system prompt (these don't change during conversation)
+            const toolsTokenEstimate = (0, context_1.estimateTokens)(JSON.stringify(tools), modelId);
+            const systemTokenEstimate = (0, context_1.estimateTokens)(finalSystemPrompt, modelId);
+            // Reserve enough for tools + system prompt + response buffer
+            const reserveForFixed = toolsTokenEstimate + systemTokenEstimate + 4096;
             const contextConfig = {
                 maxTokens: modelContextWindow,
-                reserveTokens: 4096,
+                reserveTokens: reserveForFixed,
                 compactThreshold: 0.85,
                 minMessagesAfterCompact: 6,
                 summarizer,
@@ -16349,14 +16354,14 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
                 similarity: memory?.cosineSimilarity,
             };
             const contextManager = (0, context_1.createContextManager)(contextConfig, memory);
-            const effectiveMaxTokens = (contextConfig.maxTokens ?? 128000) - (contextConfig.reserveTokens ?? 4096);
+            const effectiveMaxTokens = (contextConfig.maxTokens ?? 128000) - reserveForFixed;
             const estimateSubmitTokens = () => {
                 const system = (0, context_1.estimateTokens)(finalSystemPrompt, modelId);
                 const msgs = messages.reduce((sum, m) => {
                     const content = typeof m.content === 'string' ? m.content : JSON.stringify(m.content);
                     return sum + (0, context_1.estimateTokens)(content, modelId) + 4;
                 }, 0);
-                return system + msgs;
+                return system + msgs + toolsTokenEstimate;
             };
             // Add all messages to context manager for tracking
             for (const msg of messages) {
@@ -16366,10 +16371,9 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
                     content,
                 });
             }
-            // Add system prompt tokens
-            const systemTokens = (0, context_1.estimateTokens)(finalSystemPrompt, modelId);
             // Check if we need to compact before first API call
-            const guard = contextManager.checkGuard(systemTokens);
+            // (tools + system prompt are already accounted for in reserveTokens)
+            const guard = contextManager.checkGuard();
             if (guard.shouldCompact) {
                 logger_1.logger.info({ percentUsed: guard.percentUsed }, 'Context approaching limit, compacting');
                 // Trigger compaction:before hook
@@ -16411,6 +16415,11 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
             }
             const initialEstimate = estimateSubmitTokens();
             logger_1.logger.info({ tokens: initialEstimate, max: effectiveMaxTokens }, 'Token estimate before submit');
+            // Safety: if still over limit after compaction, return a friendly error
+            if (initialEstimate > effectiveMaxTokens * 1.1) {
+                logger_1.logger.warn({ tokens: initialEstimate, max: effectiveMaxTokens }, 'Context exceeds limit even after compaction');
+                return 'This conversation has gotten too long for me to process. Please start a new conversation and I\'ll be happy to help!';
+            }
             let response = await createMessage({
                 model: modelId,
                 max_tokens: 1024,
@@ -16515,7 +16524,7 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
                         content,
                     });
                 }
-                const loopGuard = contextManager.checkGuard(0);
+                const loopGuard = contextManager.checkGuard();
                 if (loopGuard.shouldCompact) {
                     logger_1.logger.info({ percentUsed: loopGuard.percentUsed }, 'Compacting context during tool loop');
                     const loopCompactResult = await contextManager.compact();
@@ -16533,6 +16542,11 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
                 }
                 const loopEstimate = estimateSubmitTokens();
                 logger_1.logger.info({ tokens: loopEstimate, max: effectiveMaxTokens }, 'Token estimate before submit (tool loop)');
+                // Safety: bail if over limit during tool loop
+                if (loopEstimate > effectiveMaxTokens * 1.1) {
+                    logger_1.logger.warn({ tokens: loopEstimate, max: effectiveMaxTokens }, 'Context exceeds limit during tool loop');
+                    break;
+                }
                 response = await createMessage({
                     model: modelId,
                     max_tokens: 1024,