npm - clementine-agent - Versions diffs - 1.18.36 → 1.18.38 - Mend

clementine-agent 1.18.36 → 1.18.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/agent/assistant.js +96 -25
package/package.json +1 -1

package/dist/agent/assistant.js CHANGED Viewed

@@ -417,7 +417,17 @@ const SESSIONS_FILE = path.join(BASE_DIR, '.sessions.json');
 const MAX_SESSION_EXCHANGES = 40;
 const SESSION_EXPIRY_MS = 24 * 60 * 60 * 1000;
 const AUTO_MEMORY_MIN_LENGTH = 80;
-const AUTO_MEMORY_MODEL = MODELS.sonnet;
+// Model used by the post-exchange memory extractor + the conversation
+// summarizer. Both are routine "read this exchange, extract facts, call
+// memory_write with structured JSON" tasks — Haiku handles them fine and
+// they fire on EVERY substantive exchange, so the multiplier matters.
+// Override with CLEMENTINE_AUTO_MEMORY_MODEL=sonnet if you observe
+// extraction quality drop.
+const AUTO_MEMORY_MODEL = process.env.CLEMENTINE_AUTO_MEMORY_MODEL?.includes('sonnet')
+    ? MODELS.sonnet
+    : process.env.CLEMENTINE_AUTO_MEMORY_MODEL?.includes('opus')
+        ? MODELS.opus
+        : MODELS.haiku;
 const OWNER = OWNER_NAME || 'the user';
 const MCP_SERVER_SCRIPT = path.join(PKG_DIR, 'dist', 'tools', 'mcp-server.js');
 const TOOLS_SERVER = `${ASSISTANT_NAME.toLowerCase()}-tools`;
@@ -1446,8 +1456,17 @@ Large tool outputs blow the context window and rotate your session mid-task —
             if (agentsEntry)
                 parts.push(agentsEntry.content);
         }
+        // ── Per-session-volatile content goes to volatileParts (post-cache-boundary) ──
+        // Anthropic's prompt-caching guidance is explicit: cache is a prefix
+        // hash, so anything that changes between turns must sit AFTER the
+        // breakpoint. The blocks below — retrieved context, working memory,
+        // MEMORY.md, today's notes, yesterday's summary, recent conversations —
+        // all change within a single 5-minute cache TTL window during an
+        // active session. Putting them in the stable prefix caused ~80 KB of
+        // cache_creation per session-content change. After this refactor the
+        // stable prefix stays byte-identical across calls.
         if (retrievalContext) {
-            parts.push(`## Relevant Context (retrieved)\n\n${retrievalContext}\n\n` +
+            volatileParts.push(`## Relevant Context (retrieved)\n\n${retrievalContext}\n\n` +
                 `*When retrieved context contains information from previous conversations relevant to the current topic, naturally reference it. ` +
                 `If the user mentions a person and memory shows their last known status or project, weave that in conversationally. ` +
                 `Only reference if genuinely relevant — do not force callbacks to old context.*`);
@@ -1460,7 +1479,7 @@ Large tool outputs blow the context window and rotate your session mid-task —
                     const wmContent = fs.readFileSync(_wmFileFallback, 'utf-8').trim();
                     if (wmContent) {
                         const truncated = isAutonomous ? wmContent.slice(0, 1500) : wmContent;
-                        parts.push(`## Working Memory (scratchpad)\n\n${truncated}`);
+                        volatileParts.push(`## Working Memory (scratchpad)\n\n${truncated}`);
                     }
                 }
                 catch { /* non-critical */ }
@@ -1470,10 +1489,10 @@ Large tool outputs blow the context window and rotate your session mid-task —
                 // Autonomous runs get truncated memory — just enough for context
                 if (isAutonomous) {
                     const truncated = memoryEntry.content.slice(0, 2000);
-                    parts.push(`## Current Memory\n\n${truncated}${memoryEntry.content.length > 2000 ? '\n...(truncated)' : ''}`);
+                    volatileParts.push(`## Current Memory\n\n${truncated}${memoryEntry.content.length > 2000 ? '\n...(truncated)' : ''}`);
                 }
                 else {
-                    parts.push(`## Current Memory\n\n${memoryEntry.content}`);
+                    volatileParts.push(`## Current Memory\n\n${memoryEntry.content}`);
                 }
             }
         }
@@ -1484,12 +1503,12 @@ Large tool outputs blow the context window and rotate your session mid-task —
             this.promptCache.watch(agentMemPath);
             const agentMemEntry = this.promptCache.get(agentMemPath);
             if (agentMemEntry) {
-                parts.push(`## Agent Memory (${profile.slug})\n\n${agentMemEntry.content}`);
+                volatileParts.push(`## Agent Memory (${profile.slug})\n\n${agentMemEntry.content}`);
             }
         }
         const todayEntry = !skipAmbientContext ? this.promptCache.get(todayPath) : null;
         if (todayEntry) {
-            parts.push(`## Today's Notes (${todayISO()})\n\n${todayEntry.content}`);
+            volatileParts.push(`## Today's Notes (${todayISO()})\n\n${todayEntry.content}`);
         }
         // Skip yesterday's notes and recent conversation summaries for autonomous runs
         if (!isAutonomous && !skipAmbientContext) {
@@ -1501,7 +1520,7 @@ Large tool outputs blow the context window and rotate your session mid-task —
                     const yEntry = this.promptCache.get(yPath);
                     if (yEntry && yEntry.content.includes('## Summary')) {
                         const summary = yEntry.content.slice(yEntry.content.indexOf('## Summary'));
-                        parts.push(`## Yesterday's Summary (${yesterdayISO()})\n\n${summary}`);
+                        volatileParts.push(`## Yesterday's Summary (${yesterdayISO()})\n\n${summary}`);
                     }
                 }
             }
@@ -1513,7 +1532,7 @@ Large tool outputs blow the context window and rotate your session mid-task —
                             const ts = (s.createdAt ?? 'unknown').slice(0, 16);
                             return `### ${ts}\n${s.summary}`;
                         });
-                        parts.push('## Recent Conversations\n\n' + lines.join('\n\n'));
+                        volatileParts.push('## Recent Conversations\n\n' + lines.join('\n\n'));
                     }
                 }
                 catch {
@@ -1522,8 +1541,10 @@ Large tool outputs blow the context window and rotate your session mid-task —
             }
         }
         if (isAutonomous) {
-            // Minimal vault reference for heartbeats/cron — they know their tools
-            parts.push(`Vault: \`${vault}\`. Key files: MEMORY.md, ${todayISO()}.md (today), TASKS.md. Use MCP tools (memory_read/write, task_list/add/update, note_take).`);
+            // Minimal vault reference for heartbeats/cron — they know their tools.
+            // No date reference here: today's date string in the stable prefix
+            // would invalidate the prompt cache once per day.
+            parts.push(`Vault: \`${vault}\`. Key files: MEMORY.md, today's daily note, TASKS.md. Use MCP tools (memory_read/write, task_list/add/update, note_take).`);
             // Deviation rules — tiered autonomy for handling unexpected work during cron/heartbeat
             parts.push(`## Deviation Rules (Tiered Autonomy)
@@ -1554,7 +1575,7 @@ Obsidian vault with YAML frontmatter, [[wikilinks]], #tags.
 **File tools:** Read, Write, Edit, Glob, Grep for direct access.
 **Folders:** 00-System (SOUL/MEMORY/AGENTS.md), 01-Daily-Notes (YYYY-MM-DD.md), 02-People, 03-Projects, 04-Topics, 05-Tasks/TASKS.md, 06-Templates, 07-Inbox.
-**Key files:** MEMORY.md (long-term), ${todayISO()}.md (today), TASKS.md (tasks).
+**Key files:** MEMORY.md (long-term), today's daily note, TASKS.md (tasks).
 **Task IDs:** \`{T-001}\`, subtasks \`{T-001.1}\`. Recurring tasks auto-create next copy on completion.
@@ -1629,21 +1650,19 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
                 parts.push(`Linked projects:\n${projectDetails.join('\n')}`);
             }
         }
-        // Inject hot corrections (explicit behavioral corrections from recent sessions)
+        // Recent Corrections + feedback signals — both refresh as the user
+        // gives feedback during a session. Putting them in volatile keeps the
+        // stable prefix cache-stable across feedback turns. Same per-message
+        // anti-pattern that OpenClaw issue #20894 documented as a 100x cost
+        // amplifier.
         if (this.hotCorrections.length > 0 && !lightweightTurn) {
             const recentCutoff = Date.now() - 24 * 60 * 60 * 1000; // last 24 hours
             const recent = this.hotCorrections.filter(c => new Date(c.timestamp).getTime() > recentCutoff);
             if (recent.length > 0) {
                 const lines = recent.map(c => `- [${c.category}] ${c.correction}`);
-                parts.push(`## Recent Corrections (apply immediately)\n\n${lines.join('\n')}`);
+                volatileParts.push(`## Recent Corrections (apply immediately)\n\n${lines.join('\n')}`);
             }
         }
-        // Inject recent feedback signals (closes the feedback → behavior loop).
-        // Without this block, user thumbs-down + comments live in the feedback
-        // table and never reach the agent's awareness — only the skill-suppress
-        // filter consumed them. We surface aggregates + the last few commented
-        // negatives so the agent can self-adjust on the next turn. Skipped when
-        // there's nothing to report (no noise).
         if (this.memoryStore?.getRecentFeedbackSignals && !lightweightTurn) {
             try {
                 const sig = this.memoryStore.getRecentFeedbackSignals({ days: 14, limit: 3 });
@@ -1659,7 +1678,7 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
                             lines.push(`- (${n.channel}) ${comment}`);
                         }
                     }
-                    parts.push(`## Recent feedback signals\n\n${lines.join('\n')}`);
+                    volatileParts.push(`## Recent feedback signals\n\n${lines.join('\n')}`);
                 }
             }
             catch { /* non-fatal */ }
@@ -1708,7 +1727,9 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
                             skillBlock += `\n\n**Reference files:**\n${attParts.join('\n\n')}`;
                         }
                     }
-                    parts.push(skillBlock);
+                    // Skill matches depend on the user's last message + the live
+                    // suppression list; both refresh per turn. Volatile.
+                    volatileParts.push(skillBlock);
                 }
             }
             catch { /* non-fatal — skills dir may not exist */ }
@@ -1730,7 +1751,9 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
                     parts.push(`## Agent-Specific Preferences (${profile.slug})\n\n${agentPrefs.data.preferences}`);
                 }
             }
-            // User Theory of Mind — structured user model
+            // User Theory of Mind — structured user model. The model file
+            // updates as the user's preferences/priorities are learned, so
+            // its content is volatile within a session.
             const userModelFile = path.join(VAULT_DIR, '00-System', 'USER_MODEL.md');
             this.promptCache.watch(userModelFile);
             const userModel = this.promptCache.get(userModelFile);
@@ -1740,7 +1763,7 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
                 const comm = userModel.data.communication ? `Communication: ${Object.entries(userModel.data.communication).map(([k, v]) => `${k}=${v}`).join(', ')}` : '';
                 const modelParts = [expertise, priorities, comm].filter(Boolean);
                 if (modelParts.length > 0) {
-                    parts.push(`## User Context\n\n${modelParts.join('\n')}`);
+                    volatileParts.push(`## User Context\n\n${modelParts.join('\n')}`);
                 }
             }
             // Proactive feedback capture
@@ -2302,6 +2325,26 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
         const volatileSuffix = volatilePromptPart && volatilePromptPart.trim().length > 0
             ? volatilePromptPart
             : '';
+        // Debug-mode: log a short hash of the stable prefix + volatile suffix
+        // per query. When CLEMENTINE_DEBUG_CACHE=1, mismatched stable hashes
+        // across consecutive turns of the same session indicate a regression
+        // where volatile content silently leaked back into the cached prefix.
+        // No-op (no allocation) in normal mode.
+        if (process.env.CLEMENTINE_DEBUG_CACHE === '1') {
+            const { createHash } = await import('node:crypto');
+            const stableHash = createHash('sha1').update(stablePrefixParts.join('\n\n---\n\n')).digest('hex').slice(0, 8);
+            const volatileHash = volatileSuffix
+                ? createHash('sha1').update(volatileSuffix).digest('hex').slice(0, 8)
+                : 'empty';
+            logger.info({
+                sessionKey,
+                stable_prefix_hash: stableHash,
+                volatile_suffix_hash: volatileHash,
+                stable_chars: stablePrefixParts.reduce((n, s) => n + s.length, 0),
+                volatile_chars: volatileSuffix.length,
+                allowed_tool_count: allowedTools.length,
+            }, 'cache_debug: prompt structure for this query');
+        }
         // If there is no volatile content, a plain string keeps the call simple
         // and behaves identically for the cache. Only use the array form when
         // we actually have dynamic content to split off.
@@ -2574,15 +2617,25 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
             && !isPlanStep
             && (toolRoute.inheritFullClaudeEnv || toolRoute.fullSurface);
         const isolateClaudeConfig = !toolRoute.fullSurface;
+        // Sort tool surface for deterministic cache key. The Anthropic prompt
+        // cache hashes the entire tools/system prefix; insertion-order
+        // serialization is fragile if routing logic ever pushes in a
+        // different order between calls — silent cache miss. Sorting also
+        // lets multiple jobs that arrived at the same tool set (via
+        // different routing paths) share a cache entry.
+        if (!toolsDisabledForCall) {
+            allowedTools.sort();
+        }
         const mcpServerNames = toolsDisabledForCall
             ? []
-            : [TOOLS_SERVER, ...Object.keys(externalMcpServers), ...Object.keys(composioMcpServers)];
+            : [TOOLS_SERVER, ...Object.keys(externalMcpServers).sort(), ...Object.keys(composioMcpServers).sort()];
         const clementineToolPrefix = `mcp__${TOOLS_SERVER}__`;
         const clementineToolAllowlist = toolRoute.fullSurface
             ? '*'
             : allowedTools
                 .filter(t => t.startsWith(clementineToolPrefix))
                 .map(t => t.slice(clementineToolPrefix.length))
+                .sort()
                 .join(',');
         const clementineToolAllowlistCount = clementineToolAllowlist === '*'
             ? CLEMENTINE_ALL_TOOL_NAMES.length
@@ -4580,6 +4633,12 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                     const blocks = getContentBlocks(message);
                     summaryText += extractText(blocks);
                 }
+                else if (message.type === 'result') {
+                    // Make session-summarization cost visible in usage_log. Without
+                    // this, every session rotation spawned a Sonnet summarize call
+                    // that didn't appear in any metric.
+                    this.logQueryResult(message, 'summarize', `summarize:${sessionKey}`);
+                }
             }
             if (summaryText.trim()) {
                 if (this.memoryStore) {
@@ -4950,6 +5009,13 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
             });
             const collectedText = [];
             for await (const message of stream) {
+                if (message.type === 'result') {
+                    // Auto-memory extraction fires after every substantive
+                    // exchange. Before this log call, its cost was invisible in
+                    // usage_log — a per-user-message Sonnet pass running silently.
+                    this.logQueryResult(message, 'auto_memory', `auto-memory:${sessionKey ?? 'unknown'}`, undefined, profile?.slug);
+                    continue;
+                }
                 if (message.type === 'assistant') {
                     const blocks = getContentBlocks(message);
                     for (const block of blocks) {
@@ -5608,6 +5674,11 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                     const blocks = getContentBlocks(message);
                     responseText += extractText(blocks);
                 }
+                else if (message.type === 'result') {
+                    // Cron reflection (post-task quality check) fires after every
+                    // cron run. Cheap (Haiku, 1 turn, ~1KB) but should be visible.
+                    this.logQueryResult(message, 'cron_reflection', `reflection:${jobName}`, jobName);
+                }
             }
             if (responseText.trim()) {
                 const reflection = JSON.parse(responseText.trim());

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clementine-agent",
-  "version": "1.18.36",
+  "version": "1.18.38",
   "description": "Clementine — Personal AI Assistant (TypeScript)",
   "type": "module",
   "main": "dist/index.js",