clementine-agent 1.18.36 → 1.18.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1446,8 +1446,17 @@ Large tool outputs blow the context window and rotate your session mid-task —
1446
1446
  if (agentsEntry)
1447
1447
  parts.push(agentsEntry.content);
1448
1448
  }
1449
+ // ── Per-session-volatile content goes to volatileParts (post-cache-boundary) ──
1450
+ // Anthropic's prompt-caching guidance is explicit: cache is a prefix
1451
+ // hash, so anything that changes between turns must sit AFTER the
1452
+ // breakpoint. The blocks below — retrieved context, working memory,
1453
+ // MEMORY.md, today's notes, yesterday's summary, recent conversations —
1454
+ // all change within a single 5-minute cache TTL window during an
1455
+ // active session. Putting them in the stable prefix caused ~80 KB of
1456
+ // cache_creation per session-content change. After this refactor the
1457
+ // stable prefix stays byte-identical across calls.
1449
1458
  if (retrievalContext) {
1450
- parts.push(`## Relevant Context (retrieved)\n\n${retrievalContext}\n\n` +
1459
+ volatileParts.push(`## Relevant Context (retrieved)\n\n${retrievalContext}\n\n` +
1451
1460
  `*When retrieved context contains information from previous conversations relevant to the current topic, naturally reference it. ` +
1452
1461
  `If the user mentions a person and memory shows their last known status or project, weave that in conversationally. ` +
1453
1462
  `Only reference if genuinely relevant — do not force callbacks to old context.*`);
@@ -1460,7 +1469,7 @@ Large tool outputs blow the context window and rotate your session mid-task —
1460
1469
  const wmContent = fs.readFileSync(_wmFileFallback, 'utf-8').trim();
1461
1470
  if (wmContent) {
1462
1471
  const truncated = isAutonomous ? wmContent.slice(0, 1500) : wmContent;
1463
- parts.push(`## Working Memory (scratchpad)\n\n${truncated}`);
1472
+ volatileParts.push(`## Working Memory (scratchpad)\n\n${truncated}`);
1464
1473
  }
1465
1474
  }
1466
1475
  catch { /* non-critical */ }
@@ -1470,10 +1479,10 @@ Large tool outputs blow the context window and rotate your session mid-task —
1470
1479
  // Autonomous runs get truncated memory — just enough for context
1471
1480
  if (isAutonomous) {
1472
1481
  const truncated = memoryEntry.content.slice(0, 2000);
1473
- parts.push(`## Current Memory\n\n${truncated}${memoryEntry.content.length > 2000 ? '\n...(truncated)' : ''}`);
1482
+ volatileParts.push(`## Current Memory\n\n${truncated}${memoryEntry.content.length > 2000 ? '\n...(truncated)' : ''}`);
1474
1483
  }
1475
1484
  else {
1476
- parts.push(`## Current Memory\n\n${memoryEntry.content}`);
1485
+ volatileParts.push(`## Current Memory\n\n${memoryEntry.content}`);
1477
1486
  }
1478
1487
  }
1479
1488
  }
@@ -1484,12 +1493,12 @@ Large tool outputs blow the context window and rotate your session mid-task —
1484
1493
  this.promptCache.watch(agentMemPath);
1485
1494
  const agentMemEntry = this.promptCache.get(agentMemPath);
1486
1495
  if (agentMemEntry) {
1487
- parts.push(`## Agent Memory (${profile.slug})\n\n${agentMemEntry.content}`);
1496
+ volatileParts.push(`## Agent Memory (${profile.slug})\n\n${agentMemEntry.content}`);
1488
1497
  }
1489
1498
  }
1490
1499
  const todayEntry = !skipAmbientContext ? this.promptCache.get(todayPath) : null;
1491
1500
  if (todayEntry) {
1492
- parts.push(`## Today's Notes (${todayISO()})\n\n${todayEntry.content}`);
1501
+ volatileParts.push(`## Today's Notes (${todayISO()})\n\n${todayEntry.content}`);
1493
1502
  }
1494
1503
  // Skip yesterday's notes and recent conversation summaries for autonomous runs
1495
1504
  if (!isAutonomous && !skipAmbientContext) {
@@ -1501,7 +1510,7 @@ Large tool outputs blow the context window and rotate your session mid-task —
1501
1510
  const yEntry = this.promptCache.get(yPath);
1502
1511
  if (yEntry && yEntry.content.includes('## Summary')) {
1503
1512
  const summary = yEntry.content.slice(yEntry.content.indexOf('## Summary'));
1504
- parts.push(`## Yesterday's Summary (${yesterdayISO()})\n\n${summary}`);
1513
+ volatileParts.push(`## Yesterday's Summary (${yesterdayISO()})\n\n${summary}`);
1505
1514
  }
1506
1515
  }
1507
1516
  }
@@ -1513,7 +1522,7 @@ Large tool outputs blow the context window and rotate your session mid-task —
1513
1522
  const ts = (s.createdAt ?? 'unknown').slice(0, 16);
1514
1523
  return `### ${ts}\n${s.summary}`;
1515
1524
  });
1516
- parts.push('## Recent Conversations\n\n' + lines.join('\n\n'));
1525
+ volatileParts.push('## Recent Conversations\n\n' + lines.join('\n\n'));
1517
1526
  }
1518
1527
  }
1519
1528
  catch {
@@ -1522,8 +1531,10 @@ Large tool outputs blow the context window and rotate your session mid-task —
1522
1531
  }
1523
1532
  }
1524
1533
  if (isAutonomous) {
1525
- // Minimal vault reference for heartbeats/cron — they know their tools
1526
- parts.push(`Vault: \`${vault}\`. Key files: MEMORY.md, ${todayISO()}.md (today), TASKS.md. Use MCP tools (memory_read/write, task_list/add/update, note_take).`);
1534
+ // Minimal vault reference for heartbeats/cron — they know their tools.
1535
+ // No date reference here: today's date string in the stable prefix
1536
+ // would invalidate the prompt cache once per day.
1537
+ parts.push(`Vault: \`${vault}\`. Key files: MEMORY.md, today's daily note, TASKS.md. Use MCP tools (memory_read/write, task_list/add/update, note_take).`);
1527
1538
  // Deviation rules — tiered autonomy for handling unexpected work during cron/heartbeat
1528
1539
  parts.push(`## Deviation Rules (Tiered Autonomy)
1529
1540
 
@@ -1554,7 +1565,7 @@ Obsidian vault with YAML frontmatter, [[wikilinks]], #tags.
1554
1565
  **File tools:** Read, Write, Edit, Glob, Grep for direct access.
1555
1566
 
1556
1567
  **Folders:** 00-System (SOUL/MEMORY/AGENTS.md), 01-Daily-Notes (YYYY-MM-DD.md), 02-People, 03-Projects, 04-Topics, 05-Tasks/TASKS.md, 06-Templates, 07-Inbox.
1557
- **Key files:** MEMORY.md (long-term), ${todayISO()}.md (today), TASKS.md (tasks).
1568
+ **Key files:** MEMORY.md (long-term), today's daily note, TASKS.md (tasks).
1558
1569
 
1559
1570
  **Task IDs:** \`{T-001}\`, subtasks \`{T-001.1}\`. Recurring tasks auto-create next copy on completion.
1560
1571
 
@@ -1629,21 +1640,19 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
1629
1640
  parts.push(`Linked projects:\n${projectDetails.join('\n')}`);
1630
1641
  }
1631
1642
  }
1632
- // Inject hot corrections (explicit behavioral corrections from recent sessions)
1643
+ // Recent Corrections + feedback signals both refresh as the user
1644
+ // gives feedback during a session. Putting them in volatile keeps the
1645
+ // stable prefix cache-stable across feedback turns. Same per-message
1646
+ // anti-pattern that OpenClaw issue #20894 documented as a 100x cost
1647
+ // amplifier.
1633
1648
  if (this.hotCorrections.length > 0 && !lightweightTurn) {
1634
1649
  const recentCutoff = Date.now() - 24 * 60 * 60 * 1000; // last 24 hours
1635
1650
  const recent = this.hotCorrections.filter(c => new Date(c.timestamp).getTime() > recentCutoff);
1636
1651
  if (recent.length > 0) {
1637
1652
  const lines = recent.map(c => `- [${c.category}] ${c.correction}`);
1638
- parts.push(`## Recent Corrections (apply immediately)\n\n${lines.join('\n')}`);
1653
+ volatileParts.push(`## Recent Corrections (apply immediately)\n\n${lines.join('\n')}`);
1639
1654
  }
1640
1655
  }
1641
- // Inject recent feedback signals (closes the feedback → behavior loop).
1642
- // Without this block, user thumbs-down + comments live in the feedback
1643
- // table and never reach the agent's awareness — only the skill-suppress
1644
- // filter consumed them. We surface aggregates + the last few commented
1645
- // negatives so the agent can self-adjust on the next turn. Skipped when
1646
- // there's nothing to report (no noise).
1647
1656
  if (this.memoryStore?.getRecentFeedbackSignals && !lightweightTurn) {
1648
1657
  try {
1649
1658
  const sig = this.memoryStore.getRecentFeedbackSignals({ days: 14, limit: 3 });
@@ -1659,7 +1668,7 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
1659
1668
  lines.push(`- (${n.channel}) ${comment}`);
1660
1669
  }
1661
1670
  }
1662
- parts.push(`## Recent feedback signals\n\n${lines.join('\n')}`);
1671
+ volatileParts.push(`## Recent feedback signals\n\n${lines.join('\n')}`);
1663
1672
  }
1664
1673
  }
1665
1674
  catch { /* non-fatal */ }
@@ -1708,7 +1717,9 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
1708
1717
  skillBlock += `\n\n**Reference files:**\n${attParts.join('\n\n')}`;
1709
1718
  }
1710
1719
  }
1711
- parts.push(skillBlock);
1720
+ // Skill matches depend on the user's last message + the live
1721
+ // suppression list; both refresh per turn. Volatile.
1722
+ volatileParts.push(skillBlock);
1712
1723
  }
1713
1724
  }
1714
1725
  catch { /* non-fatal — skills dir may not exist */ }
@@ -1730,7 +1741,9 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
1730
1741
  parts.push(`## Agent-Specific Preferences (${profile.slug})\n\n${agentPrefs.data.preferences}`);
1731
1742
  }
1732
1743
  }
1733
- // User Theory of Mind — structured user model
1744
+ // User Theory of Mind — structured user model. The model file
1745
+ // updates as the user's preferences/priorities are learned, so
1746
+ // its content is volatile within a session.
1734
1747
  const userModelFile = path.join(VAULT_DIR, '00-System', 'USER_MODEL.md');
1735
1748
  this.promptCache.watch(userModelFile);
1736
1749
  const userModel = this.promptCache.get(userModelFile);
@@ -1740,7 +1753,7 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
1740
1753
  const comm = userModel.data.communication ? `Communication: ${Object.entries(userModel.data.communication).map(([k, v]) => `${k}=${v}`).join(', ')}` : '';
1741
1754
  const modelParts = [expertise, priorities, comm].filter(Boolean);
1742
1755
  if (modelParts.length > 0) {
1743
- parts.push(`## User Context\n\n${modelParts.join('\n')}`);
1756
+ volatileParts.push(`## User Context\n\n${modelParts.join('\n')}`);
1744
1757
  }
1745
1758
  }
1746
1759
  // Proactive feedback capture
@@ -2302,6 +2315,26 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
2302
2315
  const volatileSuffix = volatilePromptPart && volatilePromptPart.trim().length > 0
2303
2316
  ? volatilePromptPart
2304
2317
  : '';
2318
+ // Debug-mode: log a short hash of the stable prefix + volatile suffix
2319
+ // per query. When CLEMENTINE_DEBUG_CACHE=1, mismatched stable hashes
2320
+ // across consecutive turns of the same session indicate a regression
2321
+ // where volatile content silently leaked back into the cached prefix.
2322
+ // No-op (no allocation) in normal mode.
2323
+ if (process.env.CLEMENTINE_DEBUG_CACHE === '1') {
2324
+ const { createHash } = await import('node:crypto');
2325
+ const stableHash = createHash('sha1').update(stablePrefixParts.join('\n\n---\n\n')).digest('hex').slice(0, 8);
2326
+ const volatileHash = volatileSuffix
2327
+ ? createHash('sha1').update(volatileSuffix).digest('hex').slice(0, 8)
2328
+ : 'empty';
2329
+ logger.info({
2330
+ sessionKey,
2331
+ stable_prefix_hash: stableHash,
2332
+ volatile_suffix_hash: volatileHash,
2333
+ stable_chars: stablePrefixParts.reduce((n, s) => n + s.length, 0),
2334
+ volatile_chars: volatileSuffix.length,
2335
+ allowed_tool_count: allowedTools.length,
2336
+ }, 'cache_debug: prompt structure for this query');
2337
+ }
2305
2338
  // If there is no volatile content, a plain string keeps the call simple
2306
2339
  // and behaves identically for the cache. Only use the array form when
2307
2340
  // we actually have dynamic content to split off.
@@ -2574,15 +2607,25 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
2574
2607
  && !isPlanStep
2575
2608
  && (toolRoute.inheritFullClaudeEnv || toolRoute.fullSurface);
2576
2609
  const isolateClaudeConfig = !toolRoute.fullSurface;
2610
+ // Sort tool surface for deterministic cache key. The Anthropic prompt
2611
+ // cache hashes the entire tools/system prefix; insertion-order
2612
+ // serialization is fragile if routing logic ever pushes in a
2613
+ // different order between calls — silent cache miss. Sorting also
2614
+ // lets multiple jobs that arrived at the same tool set (via
2615
+ // different routing paths) share a cache entry.
2616
+ if (!toolsDisabledForCall) {
2617
+ allowedTools.sort();
2618
+ }
2577
2619
  const mcpServerNames = toolsDisabledForCall
2578
2620
  ? []
2579
- : [TOOLS_SERVER, ...Object.keys(externalMcpServers), ...Object.keys(composioMcpServers)];
2621
+ : [TOOLS_SERVER, ...Object.keys(externalMcpServers).sort(), ...Object.keys(composioMcpServers).sort()];
2580
2622
  const clementineToolPrefix = `mcp__${TOOLS_SERVER}__`;
2581
2623
  const clementineToolAllowlist = toolRoute.fullSurface
2582
2624
  ? '*'
2583
2625
  : allowedTools
2584
2626
  .filter(t => t.startsWith(clementineToolPrefix))
2585
2627
  .map(t => t.slice(clementineToolPrefix.length))
2628
+ .sort()
2586
2629
  .join(',');
2587
2630
  const clementineToolAllowlistCount = clementineToolAllowlist === '*'
2588
2631
  ? CLEMENTINE_ALL_TOOL_NAMES.length
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.18.36",
3
+ "version": "1.18.37",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",