clementine-agent 1.18.36 → 1.18.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/assistant.js +96 -25
- package/package.json +1 -1
package/dist/agent/assistant.js
CHANGED
|
@@ -417,7 +417,17 @@ const SESSIONS_FILE = path.join(BASE_DIR, '.sessions.json');
|
|
|
417
417
|
const MAX_SESSION_EXCHANGES = 40;
|
|
418
418
|
const SESSION_EXPIRY_MS = 24 * 60 * 60 * 1000;
|
|
419
419
|
const AUTO_MEMORY_MIN_LENGTH = 80;
|
|
420
|
-
|
|
420
|
+
// Model used by the post-exchange memory extractor + the conversation
|
|
421
|
+
// summarizer. Both are routine "read this exchange, extract facts, call
|
|
422
|
+
// memory_write with structured JSON" tasks — Haiku handles them fine and
|
|
423
|
+
// they fire on EVERY substantive exchange, so the multiplier matters.
|
|
424
|
+
// Override with CLEMENTINE_AUTO_MEMORY_MODEL=sonnet if you observe
|
|
425
|
+
// extraction quality drop.
|
|
426
|
+
const AUTO_MEMORY_MODEL = process.env.CLEMENTINE_AUTO_MEMORY_MODEL?.includes('sonnet')
|
|
427
|
+
? MODELS.sonnet
|
|
428
|
+
: process.env.CLEMENTINE_AUTO_MEMORY_MODEL?.includes('opus')
|
|
429
|
+
? MODELS.opus
|
|
430
|
+
: MODELS.haiku;
|
|
421
431
|
const OWNER = OWNER_NAME || 'the user';
|
|
422
432
|
const MCP_SERVER_SCRIPT = path.join(PKG_DIR, 'dist', 'tools', 'mcp-server.js');
|
|
423
433
|
const TOOLS_SERVER = `${ASSISTANT_NAME.toLowerCase()}-tools`;
|
|
@@ -1446,8 +1456,17 @@ Large tool outputs blow the context window and rotate your session mid-task —
|
|
|
1446
1456
|
if (agentsEntry)
|
|
1447
1457
|
parts.push(agentsEntry.content);
|
|
1448
1458
|
}
|
|
1459
|
+
// ── Per-session-volatile content goes to volatileParts (post-cache-boundary) ──
|
|
1460
|
+
// Anthropic's prompt-caching guidance is explicit: cache is a prefix
|
|
1461
|
+
// hash, so anything that changes between turns must sit AFTER the
|
|
1462
|
+
// breakpoint. The blocks below — retrieved context, working memory,
|
|
1463
|
+
// MEMORY.md, today's notes, yesterday's summary, recent conversations —
|
|
1464
|
+
// all change within a single 5-minute cache TTL window during an
|
|
1465
|
+
// active session. Putting them in the stable prefix caused ~80 KB of
|
|
1466
|
+
// cache_creation per session-content change. After this refactor the
|
|
1467
|
+
// stable prefix stays byte-identical across calls.
|
|
1449
1468
|
if (retrievalContext) {
|
|
1450
|
-
|
|
1469
|
+
volatileParts.push(`## Relevant Context (retrieved)\n\n${retrievalContext}\n\n` +
|
|
1451
1470
|
`*When retrieved context contains information from previous conversations relevant to the current topic, naturally reference it. ` +
|
|
1452
1471
|
`If the user mentions a person and memory shows their last known status or project, weave that in conversationally. ` +
|
|
1453
1472
|
`Only reference if genuinely relevant — do not force callbacks to old context.*`);
|
|
@@ -1460,7 +1479,7 @@ Large tool outputs blow the context window and rotate your session mid-task —
|
|
|
1460
1479
|
const wmContent = fs.readFileSync(_wmFileFallback, 'utf-8').trim();
|
|
1461
1480
|
if (wmContent) {
|
|
1462
1481
|
const truncated = isAutonomous ? wmContent.slice(0, 1500) : wmContent;
|
|
1463
|
-
|
|
1482
|
+
volatileParts.push(`## Working Memory (scratchpad)\n\n${truncated}`);
|
|
1464
1483
|
}
|
|
1465
1484
|
}
|
|
1466
1485
|
catch { /* non-critical */ }
|
|
@@ -1470,10 +1489,10 @@ Large tool outputs blow the context window and rotate your session mid-task —
|
|
|
1470
1489
|
// Autonomous runs get truncated memory — just enough for context
|
|
1471
1490
|
if (isAutonomous) {
|
|
1472
1491
|
const truncated = memoryEntry.content.slice(0, 2000);
|
|
1473
|
-
|
|
1492
|
+
volatileParts.push(`## Current Memory\n\n${truncated}${memoryEntry.content.length > 2000 ? '\n...(truncated)' : ''}`);
|
|
1474
1493
|
}
|
|
1475
1494
|
else {
|
|
1476
|
-
|
|
1495
|
+
volatileParts.push(`## Current Memory\n\n${memoryEntry.content}`);
|
|
1477
1496
|
}
|
|
1478
1497
|
}
|
|
1479
1498
|
}
|
|
@@ -1484,12 +1503,12 @@ Large tool outputs blow the context window and rotate your session mid-task —
|
|
|
1484
1503
|
this.promptCache.watch(agentMemPath);
|
|
1485
1504
|
const agentMemEntry = this.promptCache.get(agentMemPath);
|
|
1486
1505
|
if (agentMemEntry) {
|
|
1487
|
-
|
|
1506
|
+
volatileParts.push(`## Agent Memory (${profile.slug})\n\n${agentMemEntry.content}`);
|
|
1488
1507
|
}
|
|
1489
1508
|
}
|
|
1490
1509
|
const todayEntry = !skipAmbientContext ? this.promptCache.get(todayPath) : null;
|
|
1491
1510
|
if (todayEntry) {
|
|
1492
|
-
|
|
1511
|
+
volatileParts.push(`## Today's Notes (${todayISO()})\n\n${todayEntry.content}`);
|
|
1493
1512
|
}
|
|
1494
1513
|
// Skip yesterday's notes and recent conversation summaries for autonomous runs
|
|
1495
1514
|
if (!isAutonomous && !skipAmbientContext) {
|
|
@@ -1501,7 +1520,7 @@ Large tool outputs blow the context window and rotate your session mid-task —
|
|
|
1501
1520
|
const yEntry = this.promptCache.get(yPath);
|
|
1502
1521
|
if (yEntry && yEntry.content.includes('## Summary')) {
|
|
1503
1522
|
const summary = yEntry.content.slice(yEntry.content.indexOf('## Summary'));
|
|
1504
|
-
|
|
1523
|
+
volatileParts.push(`## Yesterday's Summary (${yesterdayISO()})\n\n${summary}`);
|
|
1505
1524
|
}
|
|
1506
1525
|
}
|
|
1507
1526
|
}
|
|
@@ -1513,7 +1532,7 @@ Large tool outputs blow the context window and rotate your session mid-task —
|
|
|
1513
1532
|
const ts = (s.createdAt ?? 'unknown').slice(0, 16);
|
|
1514
1533
|
return `### ${ts}\n${s.summary}`;
|
|
1515
1534
|
});
|
|
1516
|
-
|
|
1535
|
+
volatileParts.push('## Recent Conversations\n\n' + lines.join('\n\n'));
|
|
1517
1536
|
}
|
|
1518
1537
|
}
|
|
1519
1538
|
catch {
|
|
@@ -1522,8 +1541,10 @@ Large tool outputs blow the context window and rotate your session mid-task —
|
|
|
1522
1541
|
}
|
|
1523
1542
|
}
|
|
1524
1543
|
if (isAutonomous) {
|
|
1525
|
-
// Minimal vault reference for heartbeats/cron — they know their tools
|
|
1526
|
-
|
|
1544
|
+
// Minimal vault reference for heartbeats/cron — they know their tools.
|
|
1545
|
+
// No date reference here: today's date string in the stable prefix
|
|
1546
|
+
// would invalidate the prompt cache once per day.
|
|
1547
|
+
parts.push(`Vault: \`${vault}\`. Key files: MEMORY.md, today's daily note, TASKS.md. Use MCP tools (memory_read/write, task_list/add/update, note_take).`);
|
|
1527
1548
|
// Deviation rules — tiered autonomy for handling unexpected work during cron/heartbeat
|
|
1528
1549
|
parts.push(`## Deviation Rules (Tiered Autonomy)
|
|
1529
1550
|
|
|
@@ -1554,7 +1575,7 @@ Obsidian vault with YAML frontmatter, [[wikilinks]], #tags.
|
|
|
1554
1575
|
**File tools:** Read, Write, Edit, Glob, Grep for direct access.
|
|
1555
1576
|
|
|
1556
1577
|
**Folders:** 00-System (SOUL/MEMORY/AGENTS.md), 01-Daily-Notes (YYYY-MM-DD.md), 02-People, 03-Projects, 04-Topics, 05-Tasks/TASKS.md, 06-Templates, 07-Inbox.
|
|
1557
|
-
**Key files:** MEMORY.md (long-term),
|
|
1578
|
+
**Key files:** MEMORY.md (long-term), today's daily note, TASKS.md (tasks).
|
|
1558
1579
|
|
|
1559
1580
|
**Task IDs:** \`{T-001}\`, subtasks \`{T-001.1}\`. Recurring tasks auto-create next copy on completion.
|
|
1560
1581
|
|
|
@@ -1629,21 +1650,19 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
|
|
|
1629
1650
|
parts.push(`Linked projects:\n${projectDetails.join('\n')}`);
|
|
1630
1651
|
}
|
|
1631
1652
|
}
|
|
1632
|
-
//
|
|
1653
|
+
// Recent Corrections + feedback signals — both refresh as the user
|
|
1654
|
+
// gives feedback during a session. Putting them in volatile keeps the
|
|
1655
|
+
// stable prefix cache-stable across feedback turns. Same per-message
|
|
1656
|
+
// anti-pattern that OpenClaw issue #20894 documented as a 100x cost
|
|
1657
|
+
// amplifier.
|
|
1633
1658
|
if (this.hotCorrections.length > 0 && !lightweightTurn) {
|
|
1634
1659
|
const recentCutoff = Date.now() - 24 * 60 * 60 * 1000; // last 24 hours
|
|
1635
1660
|
const recent = this.hotCorrections.filter(c => new Date(c.timestamp).getTime() > recentCutoff);
|
|
1636
1661
|
if (recent.length > 0) {
|
|
1637
1662
|
const lines = recent.map(c => `- [${c.category}] ${c.correction}`);
|
|
1638
|
-
|
|
1663
|
+
volatileParts.push(`## Recent Corrections (apply immediately)\n\n${lines.join('\n')}`);
|
|
1639
1664
|
}
|
|
1640
1665
|
}
|
|
1641
|
-
// Inject recent feedback signals (closes the feedback → behavior loop).
|
|
1642
|
-
// Without this block, user thumbs-down + comments live in the feedback
|
|
1643
|
-
// table and never reach the agent's awareness — only the skill-suppress
|
|
1644
|
-
// filter consumed them. We surface aggregates + the last few commented
|
|
1645
|
-
// negatives so the agent can self-adjust on the next turn. Skipped when
|
|
1646
|
-
// there's nothing to report (no noise).
|
|
1647
1666
|
if (this.memoryStore?.getRecentFeedbackSignals && !lightweightTurn) {
|
|
1648
1667
|
try {
|
|
1649
1668
|
const sig = this.memoryStore.getRecentFeedbackSignals({ days: 14, limit: 3 });
|
|
@@ -1659,7 +1678,7 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
|
|
|
1659
1678
|
lines.push(`- (${n.channel}) ${comment}`);
|
|
1660
1679
|
}
|
|
1661
1680
|
}
|
|
1662
|
-
|
|
1681
|
+
volatileParts.push(`## Recent feedback signals\n\n${lines.join('\n')}`);
|
|
1663
1682
|
}
|
|
1664
1683
|
}
|
|
1665
1684
|
catch { /* non-fatal */ }
|
|
@@ -1708,7 +1727,9 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
|
|
|
1708
1727
|
skillBlock += `\n\n**Reference files:**\n${attParts.join('\n\n')}`;
|
|
1709
1728
|
}
|
|
1710
1729
|
}
|
|
1711
|
-
|
|
1730
|
+
// Skill matches depend on the user's last message + the live
|
|
1731
|
+
// suppression list; both refresh per turn. Volatile.
|
|
1732
|
+
volatileParts.push(skillBlock);
|
|
1712
1733
|
}
|
|
1713
1734
|
}
|
|
1714
1735
|
catch { /* non-fatal — skills dir may not exist */ }
|
|
@@ -1730,7 +1751,9 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
|
|
|
1730
1751
|
parts.push(`## Agent-Specific Preferences (${profile.slug})\n\n${agentPrefs.data.preferences}`);
|
|
1731
1752
|
}
|
|
1732
1753
|
}
|
|
1733
|
-
// User Theory of Mind — structured user model
|
|
1754
|
+
// User Theory of Mind — structured user model. The model file
|
|
1755
|
+
// updates as the user's preferences/priorities are learned, so
|
|
1756
|
+
// its content is volatile within a session.
|
|
1734
1757
|
const userModelFile = path.join(VAULT_DIR, '00-System', 'USER_MODEL.md');
|
|
1735
1758
|
this.promptCache.watch(userModelFile);
|
|
1736
1759
|
const userModel = this.promptCache.get(userModelFile);
|
|
@@ -1740,7 +1763,7 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
|
|
|
1740
1763
|
const comm = userModel.data.communication ? `Communication: ${Object.entries(userModel.data.communication).map(([k, v]) => `${k}=${v}`).join(', ')}` : '';
|
|
1741
1764
|
const modelParts = [expertise, priorities, comm].filter(Boolean);
|
|
1742
1765
|
if (modelParts.length > 0) {
|
|
1743
|
-
|
|
1766
|
+
volatileParts.push(`## User Context\n\n${modelParts.join('\n')}`);
|
|
1744
1767
|
}
|
|
1745
1768
|
}
|
|
1746
1769
|
// Proactive feedback capture
|
|
@@ -2302,6 +2325,26 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
2302
2325
|
const volatileSuffix = volatilePromptPart && volatilePromptPart.trim().length > 0
|
|
2303
2326
|
? volatilePromptPart
|
|
2304
2327
|
: '';
|
|
2328
|
+
// Debug-mode: log a short hash of the stable prefix + volatile suffix
|
|
2329
|
+
// per query. When CLEMENTINE_DEBUG_CACHE=1, mismatched stable hashes
|
|
2330
|
+
// across consecutive turns of the same session indicate a regression
|
|
2331
|
+
// where volatile content silently leaked back into the cached prefix.
|
|
2332
|
+
// No-op (no allocation) in normal mode.
|
|
2333
|
+
if (process.env.CLEMENTINE_DEBUG_CACHE === '1') {
|
|
2334
|
+
const { createHash } = await import('node:crypto');
|
|
2335
|
+
const stableHash = createHash('sha1').update(stablePrefixParts.join('\n\n---\n\n')).digest('hex').slice(0, 8);
|
|
2336
|
+
const volatileHash = volatileSuffix
|
|
2337
|
+
? createHash('sha1').update(volatileSuffix).digest('hex').slice(0, 8)
|
|
2338
|
+
: 'empty';
|
|
2339
|
+
logger.info({
|
|
2340
|
+
sessionKey,
|
|
2341
|
+
stable_prefix_hash: stableHash,
|
|
2342
|
+
volatile_suffix_hash: volatileHash,
|
|
2343
|
+
stable_chars: stablePrefixParts.reduce((n, s) => n + s.length, 0),
|
|
2344
|
+
volatile_chars: volatileSuffix.length,
|
|
2345
|
+
allowed_tool_count: allowedTools.length,
|
|
2346
|
+
}, 'cache_debug: prompt structure for this query');
|
|
2347
|
+
}
|
|
2305
2348
|
// If there is no volatile content, a plain string keeps the call simple
|
|
2306
2349
|
// and behaves identically for the cache. Only use the array form when
|
|
2307
2350
|
// we actually have dynamic content to split off.
|
|
@@ -2574,15 +2617,25 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
2574
2617
|
&& !isPlanStep
|
|
2575
2618
|
&& (toolRoute.inheritFullClaudeEnv || toolRoute.fullSurface);
|
|
2576
2619
|
const isolateClaudeConfig = !toolRoute.fullSurface;
|
|
2620
|
+
// Sort tool surface for deterministic cache key. The Anthropic prompt
|
|
2621
|
+
// cache hashes the entire tools/system prefix; insertion-order
|
|
2622
|
+
// serialization is fragile if routing logic ever pushes in a
|
|
2623
|
+
// different order between calls — silent cache miss. Sorting also
|
|
2624
|
+
// lets multiple jobs that arrived at the same tool set (via
|
|
2625
|
+
// different routing paths) share a cache entry.
|
|
2626
|
+
if (!toolsDisabledForCall) {
|
|
2627
|
+
allowedTools.sort();
|
|
2628
|
+
}
|
|
2577
2629
|
const mcpServerNames = toolsDisabledForCall
|
|
2578
2630
|
? []
|
|
2579
|
-
: [TOOLS_SERVER, ...Object.keys(externalMcpServers), ...Object.keys(composioMcpServers)];
|
|
2631
|
+
: [TOOLS_SERVER, ...Object.keys(externalMcpServers).sort(), ...Object.keys(composioMcpServers).sort()];
|
|
2580
2632
|
const clementineToolPrefix = `mcp__${TOOLS_SERVER}__`;
|
|
2581
2633
|
const clementineToolAllowlist = toolRoute.fullSurface
|
|
2582
2634
|
? '*'
|
|
2583
2635
|
: allowedTools
|
|
2584
2636
|
.filter(t => t.startsWith(clementineToolPrefix))
|
|
2585
2637
|
.map(t => t.slice(clementineToolPrefix.length))
|
|
2638
|
+
.sort()
|
|
2586
2639
|
.join(',');
|
|
2587
2640
|
const clementineToolAllowlistCount = clementineToolAllowlist === '*'
|
|
2588
2641
|
? CLEMENTINE_ALL_TOOL_NAMES.length
|
|
@@ -4580,6 +4633,12 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
4580
4633
|
const blocks = getContentBlocks(message);
|
|
4581
4634
|
summaryText += extractText(blocks);
|
|
4582
4635
|
}
|
|
4636
|
+
else if (message.type === 'result') {
|
|
4637
|
+
// Make session-summarization cost visible in usage_log. Without
|
|
4638
|
+
// this, every session rotation spawned a Sonnet summarize call
|
|
4639
|
+
// that didn't appear in any metric.
|
|
4640
|
+
this.logQueryResult(message, 'summarize', `summarize:${sessionKey}`);
|
|
4641
|
+
}
|
|
4583
4642
|
}
|
|
4584
4643
|
if (summaryText.trim()) {
|
|
4585
4644
|
if (this.memoryStore) {
|
|
@@ -4950,6 +5009,13 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
4950
5009
|
});
|
|
4951
5010
|
const collectedText = [];
|
|
4952
5011
|
for await (const message of stream) {
|
|
5012
|
+
if (message.type === 'result') {
|
|
5013
|
+
// Auto-memory extraction fires after every substantive
|
|
5014
|
+
// exchange. Before this log call, its cost was invisible in
|
|
5015
|
+
// usage_log — a per-user-message Sonnet pass running silently.
|
|
5016
|
+
this.logQueryResult(message, 'auto_memory', `auto-memory:${sessionKey ?? 'unknown'}`, undefined, profile?.slug);
|
|
5017
|
+
continue;
|
|
5018
|
+
}
|
|
4953
5019
|
if (message.type === 'assistant') {
|
|
4954
5020
|
const blocks = getContentBlocks(message);
|
|
4955
5021
|
for (const block of blocks) {
|
|
@@ -5608,6 +5674,11 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
5608
5674
|
const blocks = getContentBlocks(message);
|
|
5609
5675
|
responseText += extractText(blocks);
|
|
5610
5676
|
}
|
|
5677
|
+
else if (message.type === 'result') {
|
|
5678
|
+
// Cron reflection (post-task quality check) fires after every
|
|
5679
|
+
// cron run. Cheap (Haiku, 1 turn, ~1KB) but should be visible.
|
|
5680
|
+
this.logQueryResult(message, 'cron_reflection', `reflection:${jobName}`, jobName);
|
|
5681
|
+
}
|
|
5611
5682
|
}
|
|
5612
5683
|
if (responseText.trim()) {
|
|
5613
5684
|
const reflection = JSON.parse(responseText.trim());
|