clementine-agent 1.18.36 → 1.18.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -417,7 +417,17 @@ const SESSIONS_FILE = path.join(BASE_DIR, '.sessions.json');
417
417
  const MAX_SESSION_EXCHANGES = 40;
418
418
  const SESSION_EXPIRY_MS = 24 * 60 * 60 * 1000;
419
419
  const AUTO_MEMORY_MIN_LENGTH = 80;
420
- const AUTO_MEMORY_MODEL = MODELS.sonnet;
420
+ // Model used by the post-exchange memory extractor + the conversation
421
+ // summarizer. Both are routine "read this exchange, extract facts, call
422
+ // memory_write with structured JSON" tasks — Haiku handles them fine and
423
+ // they fire on EVERY substantive exchange, so the multiplier matters.
424
+ // Override with CLEMENTINE_AUTO_MEMORY_MODEL=sonnet if you observe
425
+ // extraction quality drop.
426
+ const AUTO_MEMORY_MODEL = process.env.CLEMENTINE_AUTO_MEMORY_MODEL?.includes('sonnet')
427
+ ? MODELS.sonnet
428
+ : process.env.CLEMENTINE_AUTO_MEMORY_MODEL?.includes('opus')
429
+ ? MODELS.opus
430
+ : MODELS.haiku;
421
431
  const OWNER = OWNER_NAME || 'the user';
422
432
  const MCP_SERVER_SCRIPT = path.join(PKG_DIR, 'dist', 'tools', 'mcp-server.js');
423
433
  const TOOLS_SERVER = `${ASSISTANT_NAME.toLowerCase()}-tools`;
@@ -1446,8 +1456,17 @@ Large tool outputs blow the context window and rotate your session mid-task —
1446
1456
  if (agentsEntry)
1447
1457
  parts.push(agentsEntry.content);
1448
1458
  }
1459
+ // ── Per-session-volatile content goes to volatileParts (post-cache-boundary) ──
1460
+ // Anthropic's prompt-caching guidance is explicit: cache is a prefix
1461
+ // hash, so anything that changes between turns must sit AFTER the
1462
+ // breakpoint. The blocks below — retrieved context, working memory,
1463
+ // MEMORY.md, today's notes, yesterday's summary, recent conversations —
1464
+ // all change within a single 5-minute cache TTL window during an
1465
+ // active session. Putting them in the stable prefix caused ~80 KB of
1466
+ // cache_creation per session-content change. After this refactor the
1467
+ // stable prefix stays byte-identical across calls.
1449
1468
  if (retrievalContext) {
1450
- parts.push(`## Relevant Context (retrieved)\n\n${retrievalContext}\n\n` +
1469
+ volatileParts.push(`## Relevant Context (retrieved)\n\n${retrievalContext}\n\n` +
1451
1470
  `*When retrieved context contains information from previous conversations relevant to the current topic, naturally reference it. ` +
1452
1471
  `If the user mentions a person and memory shows their last known status or project, weave that in conversationally. ` +
1453
1472
  `Only reference if genuinely relevant — do not force callbacks to old context.*`);
@@ -1460,7 +1479,7 @@ Large tool outputs blow the context window and rotate your session mid-task —
1460
1479
  const wmContent = fs.readFileSync(_wmFileFallback, 'utf-8').trim();
1461
1480
  if (wmContent) {
1462
1481
  const truncated = isAutonomous ? wmContent.slice(0, 1500) : wmContent;
1463
- parts.push(`## Working Memory (scratchpad)\n\n${truncated}`);
1482
+ volatileParts.push(`## Working Memory (scratchpad)\n\n${truncated}`);
1464
1483
  }
1465
1484
  }
1466
1485
  catch { /* non-critical */ }
@@ -1470,10 +1489,10 @@ Large tool outputs blow the context window and rotate your session mid-task —
1470
1489
  // Autonomous runs get truncated memory — just enough for context
1471
1490
  if (isAutonomous) {
1472
1491
  const truncated = memoryEntry.content.slice(0, 2000);
1473
- parts.push(`## Current Memory\n\n${truncated}${memoryEntry.content.length > 2000 ? '\n...(truncated)' : ''}`);
1492
+ volatileParts.push(`## Current Memory\n\n${truncated}${memoryEntry.content.length > 2000 ? '\n...(truncated)' : ''}`);
1474
1493
  }
1475
1494
  else {
1476
- parts.push(`## Current Memory\n\n${memoryEntry.content}`);
1495
+ volatileParts.push(`## Current Memory\n\n${memoryEntry.content}`);
1477
1496
  }
1478
1497
  }
1479
1498
  }
@@ -1484,12 +1503,12 @@ Large tool outputs blow the context window and rotate your session mid-task —
1484
1503
  this.promptCache.watch(agentMemPath);
1485
1504
  const agentMemEntry = this.promptCache.get(agentMemPath);
1486
1505
  if (agentMemEntry) {
1487
- parts.push(`## Agent Memory (${profile.slug})\n\n${agentMemEntry.content}`);
1506
+ volatileParts.push(`## Agent Memory (${profile.slug})\n\n${agentMemEntry.content}`);
1488
1507
  }
1489
1508
  }
1490
1509
  const todayEntry = !skipAmbientContext ? this.promptCache.get(todayPath) : null;
1491
1510
  if (todayEntry) {
1492
- parts.push(`## Today's Notes (${todayISO()})\n\n${todayEntry.content}`);
1511
+ volatileParts.push(`## Today's Notes (${todayISO()})\n\n${todayEntry.content}`);
1493
1512
  }
1494
1513
  // Skip yesterday's notes and recent conversation summaries for autonomous runs
1495
1514
  if (!isAutonomous && !skipAmbientContext) {
@@ -1501,7 +1520,7 @@ Large tool outputs blow the context window and rotate your session mid-task —
1501
1520
  const yEntry = this.promptCache.get(yPath);
1502
1521
  if (yEntry && yEntry.content.includes('## Summary')) {
1503
1522
  const summary = yEntry.content.slice(yEntry.content.indexOf('## Summary'));
1504
- parts.push(`## Yesterday's Summary (${yesterdayISO()})\n\n${summary}`);
1523
+ volatileParts.push(`## Yesterday's Summary (${yesterdayISO()})\n\n${summary}`);
1505
1524
  }
1506
1525
  }
1507
1526
  }
@@ -1513,7 +1532,7 @@ Large tool outputs blow the context window and rotate your session mid-task —
1513
1532
  const ts = (s.createdAt ?? 'unknown').slice(0, 16);
1514
1533
  return `### ${ts}\n${s.summary}`;
1515
1534
  });
1516
- parts.push('## Recent Conversations\n\n' + lines.join('\n\n'));
1535
+ volatileParts.push('## Recent Conversations\n\n' + lines.join('\n\n'));
1517
1536
  }
1518
1537
  }
1519
1538
  catch {
@@ -1522,8 +1541,10 @@ Large tool outputs blow the context window and rotate your session mid-task —
1522
1541
  }
1523
1542
  }
1524
1543
  if (isAutonomous) {
1525
- // Minimal vault reference for heartbeats/cron — they know their tools
1526
- parts.push(`Vault: \`${vault}\`. Key files: MEMORY.md, ${todayISO()}.md (today), TASKS.md. Use MCP tools (memory_read/write, task_list/add/update, note_take).`);
1544
+ // Minimal vault reference for heartbeats/cron — they know their tools.
1545
+ // No date reference here: today's date string in the stable prefix
1546
+ // would invalidate the prompt cache once per day.
1547
+ parts.push(`Vault: \`${vault}\`. Key files: MEMORY.md, today's daily note, TASKS.md. Use MCP tools (memory_read/write, task_list/add/update, note_take).`);
1527
1548
  // Deviation rules — tiered autonomy for handling unexpected work during cron/heartbeat
1528
1549
  parts.push(`## Deviation Rules (Tiered Autonomy)
1529
1550
 
@@ -1554,7 +1575,7 @@ Obsidian vault with YAML frontmatter, [[wikilinks]], #tags.
1554
1575
  **File tools:** Read, Write, Edit, Glob, Grep for direct access.
1555
1576
 
1556
1577
  **Folders:** 00-System (SOUL/MEMORY/AGENTS.md), 01-Daily-Notes (YYYY-MM-DD.md), 02-People, 03-Projects, 04-Topics, 05-Tasks/TASKS.md, 06-Templates, 07-Inbox.
1557
- **Key files:** MEMORY.md (long-term), ${todayISO()}.md (today), TASKS.md (tasks).
1578
+ **Key files:** MEMORY.md (long-term), today's daily note, TASKS.md (tasks).
1558
1579
 
1559
1580
  **Task IDs:** \`{T-001}\`, subtasks \`{T-001.1}\`. Recurring tasks auto-create next copy on completion.
1560
1581
 
@@ -1629,21 +1650,19 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
1629
1650
  parts.push(`Linked projects:\n${projectDetails.join('\n')}`);
1630
1651
  }
1631
1652
  }
1632
- // Inject hot corrections (explicit behavioral corrections from recent sessions)
1653
+ // Recent Corrections + feedback signals both refresh as the user
1654
+ // gives feedback during a session. Putting them in volatile keeps the
1655
+ // stable prefix cache-stable across feedback turns. Same per-message
1656
+ // anti-pattern that OpenClaw issue #20894 documented as a 100x cost
1657
+ // amplifier.
1633
1658
  if (this.hotCorrections.length > 0 && !lightweightTurn) {
1634
1659
  const recentCutoff = Date.now() - 24 * 60 * 60 * 1000; // last 24 hours
1635
1660
  const recent = this.hotCorrections.filter(c => new Date(c.timestamp).getTime() > recentCutoff);
1636
1661
  if (recent.length > 0) {
1637
1662
  const lines = recent.map(c => `- [${c.category}] ${c.correction}`);
1638
- parts.push(`## Recent Corrections (apply immediately)\n\n${lines.join('\n')}`);
1663
+ volatileParts.push(`## Recent Corrections (apply immediately)\n\n${lines.join('\n')}`);
1639
1664
  }
1640
1665
  }
1641
- // Inject recent feedback signals (closes the feedback → behavior loop).
1642
- // Without this block, user thumbs-down + comments live in the feedback
1643
- // table and never reach the agent's awareness — only the skill-suppress
1644
- // filter consumed them. We surface aggregates + the last few commented
1645
- // negatives so the agent can self-adjust on the next turn. Skipped when
1646
- // there's nothing to report (no noise).
1647
1666
  if (this.memoryStore?.getRecentFeedbackSignals && !lightweightTurn) {
1648
1667
  try {
1649
1668
  const sig = this.memoryStore.getRecentFeedbackSignals({ days: 14, limit: 3 });
@@ -1659,7 +1678,7 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
1659
1678
  lines.push(`- (${n.channel}) ${comment}`);
1660
1679
  }
1661
1680
  }
1662
- parts.push(`## Recent feedback signals\n\n${lines.join('\n')}`);
1681
+ volatileParts.push(`## Recent feedback signals\n\n${lines.join('\n')}`);
1663
1682
  }
1664
1683
  }
1665
1684
  catch { /* non-fatal */ }
@@ -1708,7 +1727,9 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
1708
1727
  skillBlock += `\n\n**Reference files:**\n${attParts.join('\n\n')}`;
1709
1728
  }
1710
1729
  }
1711
- parts.push(skillBlock);
1730
+ // Skill matches depend on the user's last message + the live
1731
+ // suppression list; both refresh per turn. Volatile.
1732
+ volatileParts.push(skillBlock);
1712
1733
  }
1713
1734
  }
1714
1735
  catch { /* non-fatal — skills dir may not exist */ }
@@ -1730,7 +1751,9 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
1730
1751
  parts.push(`## Agent-Specific Preferences (${profile.slug})\n\n${agentPrefs.data.preferences}`);
1731
1752
  }
1732
1753
  }
1733
- // User Theory of Mind — structured user model
1754
+ // User Theory of Mind — structured user model. The model file
1755
+ // updates as the user's preferences/priorities are learned, so
1756
+ // its content is volatile within a session.
1734
1757
  const userModelFile = path.join(VAULT_DIR, '00-System', 'USER_MODEL.md');
1735
1758
  this.promptCache.watch(userModelFile);
1736
1759
  const userModel = this.promptCache.get(userModelFile);
@@ -1740,7 +1763,7 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
1740
1763
  const comm = userModel.data.communication ? `Communication: ${Object.entries(userModel.data.communication).map(([k, v]) => `${k}=${v}`).join(', ')}` : '';
1741
1764
  const modelParts = [expertise, priorities, comm].filter(Boolean);
1742
1765
  if (modelParts.length > 0) {
1743
- parts.push(`## User Context\n\n${modelParts.join('\n')}`);
1766
+ volatileParts.push(`## User Context\n\n${modelParts.join('\n')}`);
1744
1767
  }
1745
1768
  }
1746
1769
  // Proactive feedback capture
@@ -2302,6 +2325,26 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
2302
2325
  const volatileSuffix = volatilePromptPart && volatilePromptPart.trim().length > 0
2303
2326
  ? volatilePromptPart
2304
2327
  : '';
2328
+ // Debug-mode: log a short hash of the stable prefix + volatile suffix
2329
+ // per query. When CLEMENTINE_DEBUG_CACHE=1, mismatched stable hashes
2330
+ // across consecutive turns of the same session indicate a regression
2331
+ // where volatile content silently leaked back into the cached prefix.
2332
+ // No-op (no allocation) in normal mode.
2333
+ if (process.env.CLEMENTINE_DEBUG_CACHE === '1') {
2334
+ const { createHash } = await import('node:crypto');
2335
+ const stableHash = createHash('sha1').update(stablePrefixParts.join('\n\n---\n\n')).digest('hex').slice(0, 8);
2336
+ const volatileHash = volatileSuffix
2337
+ ? createHash('sha1').update(volatileSuffix).digest('hex').slice(0, 8)
2338
+ : 'empty';
2339
+ logger.info({
2340
+ sessionKey,
2341
+ stable_prefix_hash: stableHash,
2342
+ volatile_suffix_hash: volatileHash,
2343
+ stable_chars: stablePrefixParts.reduce((n, s) => n + s.length, 0),
2344
+ volatile_chars: volatileSuffix.length,
2345
+ allowed_tool_count: allowedTools.length,
2346
+ }, 'cache_debug: prompt structure for this query');
2347
+ }
2305
2348
  // If there is no volatile content, a plain string keeps the call simple
2306
2349
  // and behaves identically for the cache. Only use the array form when
2307
2350
  // we actually have dynamic content to split off.
@@ -2574,15 +2617,25 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
2574
2617
  && !isPlanStep
2575
2618
  && (toolRoute.inheritFullClaudeEnv || toolRoute.fullSurface);
2576
2619
  const isolateClaudeConfig = !toolRoute.fullSurface;
2620
+ // Sort tool surface for deterministic cache key. The Anthropic prompt
2621
+ // cache hashes the entire tools/system prefix; insertion-order
2622
+ // serialization is fragile if routing logic ever pushes in a
2623
+ // different order between calls — silent cache miss. Sorting also
2624
+ // lets multiple jobs that arrived at the same tool set (via
2625
+ // different routing paths) share a cache entry.
2626
+ if (!toolsDisabledForCall) {
2627
+ allowedTools.sort();
2628
+ }
2577
2629
  const mcpServerNames = toolsDisabledForCall
2578
2630
  ? []
2579
- : [TOOLS_SERVER, ...Object.keys(externalMcpServers), ...Object.keys(composioMcpServers)];
2631
+ : [TOOLS_SERVER, ...Object.keys(externalMcpServers).sort(), ...Object.keys(composioMcpServers).sort()];
2580
2632
  const clementineToolPrefix = `mcp__${TOOLS_SERVER}__`;
2581
2633
  const clementineToolAllowlist = toolRoute.fullSurface
2582
2634
  ? '*'
2583
2635
  : allowedTools
2584
2636
  .filter(t => t.startsWith(clementineToolPrefix))
2585
2637
  .map(t => t.slice(clementineToolPrefix.length))
2638
+ .sort()
2586
2639
  .join(',');
2587
2640
  const clementineToolAllowlistCount = clementineToolAllowlist === '*'
2588
2641
  ? CLEMENTINE_ALL_TOOL_NAMES.length
@@ -4580,6 +4633,12 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
4580
4633
  const blocks = getContentBlocks(message);
4581
4634
  summaryText += extractText(blocks);
4582
4635
  }
4636
+ else if (message.type === 'result') {
4637
+ // Make session-summarization cost visible in usage_log. Without
4638
+ // this, every session rotation spawned a Sonnet summarize call
4639
+ // that didn't appear in any metric.
4640
+ this.logQueryResult(message, 'summarize', `summarize:${sessionKey}`);
4641
+ }
4583
4642
  }
4584
4643
  if (summaryText.trim()) {
4585
4644
  if (this.memoryStore) {
@@ -4950,6 +5009,13 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
4950
5009
  });
4951
5010
  const collectedText = [];
4952
5011
  for await (const message of stream) {
5012
+ if (message.type === 'result') {
5013
+ // Auto-memory extraction fires after every substantive
5014
+ // exchange. Before this log call, its cost was invisible in
5015
+ // usage_log — a per-user-message Sonnet pass running silently.
5016
+ this.logQueryResult(message, 'auto_memory', `auto-memory:${sessionKey ?? 'unknown'}`, undefined, profile?.slug);
5017
+ continue;
5018
+ }
4953
5019
  if (message.type === 'assistant') {
4954
5020
  const blocks = getContentBlocks(message);
4955
5021
  for (const block of blocks) {
@@ -5608,6 +5674,11 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
5608
5674
  const blocks = getContentBlocks(message);
5609
5675
  responseText += extractText(blocks);
5610
5676
  }
5677
+ else if (message.type === 'result') {
5678
+ // Cron reflection (post-task quality check) fires after every
5679
+ // cron run. Cheap (Haiku, 1 turn, ~1KB) but should be visible.
5680
+ this.logQueryResult(message, 'cron_reflection', `reflection:${jobName}`, jobName);
5681
+ }
5611
5682
  }
5612
5683
  if (responseText.trim()) {
5613
5684
  const reflection = JSON.parse(responseText.trim());
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.18.36",
3
+ "version": "1.18.38",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",