clementine-agent 1.18.35 → 1.18.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/assistant.js +71 -26
- package/dist/agent/fanout-policy.js +2 -0
- package/dist/agent/toolsets.d.ts +7 -1
- package/dist/agent/toolsets.js +15 -1
- package/dist/cli/dashboard.js +9 -0
- package/dist/gateway/agent-heartbeat-scheduler.js +17 -2
- package/dist/gateway/cron-scheduler.js +29 -1
- package/dist/gateway/long-task-preflight.d.ts +9 -0
- package/dist/gateway/long-task-preflight.js +82 -0
- package/package.json +1 -1
package/dist/agent/assistant.js
CHANGED
|
@@ -33,7 +33,7 @@ import { searchSkills as searchSkillsSync } from './skill-extractor.js';
|
|
|
33
33
|
import { classifyIntent, getStrategyGuidance } from './intent-classifier.js';
|
|
34
34
|
import { getEventLog } from './session-event-log.js';
|
|
35
35
|
import { applyServiceDedup, routeToolSurface, TOOL_SURFACE_HARD_LIMIT, TOOL_SURFACE_WARN_THRESHOLD } from './tool-router.js';
|
|
36
|
-
import { isRestrictedToolset, toolsetAllowsLocalWrites } from './toolsets.js';
|
|
36
|
+
import { isRestrictedToolset, toolsetAllowsLocalWrites, toolsetDisablesAllTools } from './toolsets.js';
|
|
37
37
|
import { looksLikeApprovalPrompt } from './local-turn.js';
|
|
38
38
|
import { decideTurn } from './turn-policy.js';
|
|
39
39
|
import { loadClementineJson } from '../config/clementine-json.js';
|
|
@@ -1446,8 +1446,17 @@ Large tool outputs blow the context window and rotate your session mid-task —
|
|
|
1446
1446
|
if (agentsEntry)
|
|
1447
1447
|
parts.push(agentsEntry.content);
|
|
1448
1448
|
}
|
|
1449
|
+
// ── Per-session-volatile content goes to volatileParts (post-cache-boundary) ──
|
|
1450
|
+
// Anthropic's prompt-caching guidance is explicit: cache is a prefix
|
|
1451
|
+
// hash, so anything that changes between turns must sit AFTER the
|
|
1452
|
+
// breakpoint. The blocks below — retrieved context, working memory,
|
|
1453
|
+
// MEMORY.md, today's notes, yesterday's summary, recent conversations —
|
|
1454
|
+
// all change within a single 5-minute cache TTL window during an
|
|
1455
|
+
// active session. Putting them in the stable prefix caused ~80 KB of
|
|
1456
|
+
// cache_creation per session-content change. After this refactor the
|
|
1457
|
+
// stable prefix stays byte-identical across calls.
|
|
1449
1458
|
if (retrievalContext) {
|
|
1450
|
-
|
|
1459
|
+
volatileParts.push(`## Relevant Context (retrieved)\n\n${retrievalContext}\n\n` +
|
|
1451
1460
|
`*When retrieved context contains information from previous conversations relevant to the current topic, naturally reference it. ` +
|
|
1452
1461
|
`If the user mentions a person and memory shows their last known status or project, weave that in conversationally. ` +
|
|
1453
1462
|
`Only reference if genuinely relevant — do not force callbacks to old context.*`);
|
|
@@ -1460,7 +1469,7 @@ Large tool outputs blow the context window and rotate your session mid-task —
|
|
|
1460
1469
|
const wmContent = fs.readFileSync(_wmFileFallback, 'utf-8').trim();
|
|
1461
1470
|
if (wmContent) {
|
|
1462
1471
|
const truncated = isAutonomous ? wmContent.slice(0, 1500) : wmContent;
|
|
1463
|
-
|
|
1472
|
+
volatileParts.push(`## Working Memory (scratchpad)\n\n${truncated}`);
|
|
1464
1473
|
}
|
|
1465
1474
|
}
|
|
1466
1475
|
catch { /* non-critical */ }
|
|
@@ -1470,10 +1479,10 @@ Large tool outputs blow the context window and rotate your session mid-task —
|
|
|
1470
1479
|
// Autonomous runs get truncated memory — just enough for context
|
|
1471
1480
|
if (isAutonomous) {
|
|
1472
1481
|
const truncated = memoryEntry.content.slice(0, 2000);
|
|
1473
|
-
|
|
1482
|
+
volatileParts.push(`## Current Memory\n\n${truncated}${memoryEntry.content.length > 2000 ? '\n...(truncated)' : ''}`);
|
|
1474
1483
|
}
|
|
1475
1484
|
else {
|
|
1476
|
-
|
|
1485
|
+
volatileParts.push(`## Current Memory\n\n${memoryEntry.content}`);
|
|
1477
1486
|
}
|
|
1478
1487
|
}
|
|
1479
1488
|
}
|
|
@@ -1484,12 +1493,12 @@ Large tool outputs blow the context window and rotate your session mid-task —
|
|
|
1484
1493
|
this.promptCache.watch(agentMemPath);
|
|
1485
1494
|
const agentMemEntry = this.promptCache.get(agentMemPath);
|
|
1486
1495
|
if (agentMemEntry) {
|
|
1487
|
-
|
|
1496
|
+
volatileParts.push(`## Agent Memory (${profile.slug})\n\n${agentMemEntry.content}`);
|
|
1488
1497
|
}
|
|
1489
1498
|
}
|
|
1490
1499
|
const todayEntry = !skipAmbientContext ? this.promptCache.get(todayPath) : null;
|
|
1491
1500
|
if (todayEntry) {
|
|
1492
|
-
|
|
1501
|
+
volatileParts.push(`## Today's Notes (${todayISO()})\n\n${todayEntry.content}`);
|
|
1493
1502
|
}
|
|
1494
1503
|
// Skip yesterday's notes and recent conversation summaries for autonomous runs
|
|
1495
1504
|
if (!isAutonomous && !skipAmbientContext) {
|
|
@@ -1501,7 +1510,7 @@ Large tool outputs blow the context window and rotate your session mid-task —
|
|
|
1501
1510
|
const yEntry = this.promptCache.get(yPath);
|
|
1502
1511
|
if (yEntry && yEntry.content.includes('## Summary')) {
|
|
1503
1512
|
const summary = yEntry.content.slice(yEntry.content.indexOf('## Summary'));
|
|
1504
|
-
|
|
1513
|
+
volatileParts.push(`## Yesterday's Summary (${yesterdayISO()})\n\n${summary}`);
|
|
1505
1514
|
}
|
|
1506
1515
|
}
|
|
1507
1516
|
}
|
|
@@ -1513,7 +1522,7 @@ Large tool outputs blow the context window and rotate your session mid-task —
|
|
|
1513
1522
|
const ts = (s.createdAt ?? 'unknown').slice(0, 16);
|
|
1514
1523
|
return `### ${ts}\n${s.summary}`;
|
|
1515
1524
|
});
|
|
1516
|
-
|
|
1525
|
+
volatileParts.push('## Recent Conversations\n\n' + lines.join('\n\n'));
|
|
1517
1526
|
}
|
|
1518
1527
|
}
|
|
1519
1528
|
catch {
|
|
@@ -1522,8 +1531,10 @@ Large tool outputs blow the context window and rotate your session mid-task —
|
|
|
1522
1531
|
}
|
|
1523
1532
|
}
|
|
1524
1533
|
if (isAutonomous) {
|
|
1525
|
-
// Minimal vault reference for heartbeats/cron — they know their tools
|
|
1526
|
-
|
|
1534
|
+
// Minimal vault reference for heartbeats/cron — they know their tools.
|
|
1535
|
+
// No date reference here: today's date string in the stable prefix
|
|
1536
|
+
// would invalidate the prompt cache once per day.
|
|
1537
|
+
parts.push(`Vault: \`${vault}\`. Key files: MEMORY.md, today's daily note, TASKS.md. Use MCP tools (memory_read/write, task_list/add/update, note_take).`);
|
|
1527
1538
|
// Deviation rules — tiered autonomy for handling unexpected work during cron/heartbeat
|
|
1528
1539
|
parts.push(`## Deviation Rules (Tiered Autonomy)
|
|
1529
1540
|
|
|
@@ -1554,7 +1565,7 @@ Obsidian vault with YAML frontmatter, [[wikilinks]], #tags.
|
|
|
1554
1565
|
**File tools:** Read, Write, Edit, Glob, Grep for direct access.
|
|
1555
1566
|
|
|
1556
1567
|
**Folders:** 00-System (SOUL/MEMORY/AGENTS.md), 01-Daily-Notes (YYYY-MM-DD.md), 02-People, 03-Projects, 04-Topics, 05-Tasks/TASKS.md, 06-Templates, 07-Inbox.
|
|
1557
|
-
**Key files:** MEMORY.md (long-term),
|
|
1568
|
+
**Key files:** MEMORY.md (long-term), today's daily note, TASKS.md (tasks).
|
|
1558
1569
|
|
|
1559
1570
|
**Task IDs:** \`{T-001}\`, subtasks \`{T-001.1}\`. Recurring tasks auto-create next copy on completion.
|
|
1560
1571
|
|
|
@@ -1629,21 +1640,19 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
|
|
|
1629
1640
|
parts.push(`Linked projects:\n${projectDetails.join('\n')}`);
|
|
1630
1641
|
}
|
|
1631
1642
|
}
|
|
1632
|
-
//
|
|
1643
|
+
// Recent Corrections + feedback signals — both refresh as the user
|
|
1644
|
+
// gives feedback during a session. Putting them in volatile keeps the
|
|
1645
|
+
// stable prefix cache-stable across feedback turns. Same per-message
|
|
1646
|
+
// anti-pattern that OpenClaw issue #20894 documented as a 100x cost
|
|
1647
|
+
// amplifier.
|
|
1633
1648
|
if (this.hotCorrections.length > 0 && !lightweightTurn) {
|
|
1634
1649
|
const recentCutoff = Date.now() - 24 * 60 * 60 * 1000; // last 24 hours
|
|
1635
1650
|
const recent = this.hotCorrections.filter(c => new Date(c.timestamp).getTime() > recentCutoff);
|
|
1636
1651
|
if (recent.length > 0) {
|
|
1637
1652
|
const lines = recent.map(c => `- [${c.category}] ${c.correction}`);
|
|
1638
|
-
|
|
1653
|
+
volatileParts.push(`## Recent Corrections (apply immediately)\n\n${lines.join('\n')}`);
|
|
1639
1654
|
}
|
|
1640
1655
|
}
|
|
1641
|
-
// Inject recent feedback signals (closes the feedback → behavior loop).
|
|
1642
|
-
// Without this block, user thumbs-down + comments live in the feedback
|
|
1643
|
-
// table and never reach the agent's awareness — only the skill-suppress
|
|
1644
|
-
// filter consumed them. We surface aggregates + the last few commented
|
|
1645
|
-
// negatives so the agent can self-adjust on the next turn. Skipped when
|
|
1646
|
-
// there's nothing to report (no noise).
|
|
1647
1656
|
if (this.memoryStore?.getRecentFeedbackSignals && !lightweightTurn) {
|
|
1648
1657
|
try {
|
|
1649
1658
|
const sig = this.memoryStore.getRecentFeedbackSignals({ days: 14, limit: 3 });
|
|
@@ -1659,7 +1668,7 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
|
|
|
1659
1668
|
lines.push(`- (${n.channel}) ${comment}`);
|
|
1660
1669
|
}
|
|
1661
1670
|
}
|
|
1662
|
-
|
|
1671
|
+
volatileParts.push(`## Recent feedback signals\n\n${lines.join('\n')}`);
|
|
1663
1672
|
}
|
|
1664
1673
|
}
|
|
1665
1674
|
catch { /* non-fatal */ }
|
|
@@ -1708,7 +1717,9 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
|
|
|
1708
1717
|
skillBlock += `\n\n**Reference files:**\n${attParts.join('\n\n')}`;
|
|
1709
1718
|
}
|
|
1710
1719
|
}
|
|
1711
|
-
|
|
1720
|
+
// Skill matches depend on the user's last message + the live
|
|
1721
|
+
// suppression list; both refresh per turn. Volatile.
|
|
1722
|
+
volatileParts.push(skillBlock);
|
|
1712
1723
|
}
|
|
1713
1724
|
}
|
|
1714
1725
|
catch { /* non-fatal — skills dir may not exist */ }
|
|
@@ -1730,7 +1741,9 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
|
|
|
1730
1741
|
parts.push(`## Agent-Specific Preferences (${profile.slug})\n\n${agentPrefs.data.preferences}`);
|
|
1731
1742
|
}
|
|
1732
1743
|
}
|
|
1733
|
-
// User Theory of Mind — structured user model
|
|
1744
|
+
// User Theory of Mind — structured user model. The model file
|
|
1745
|
+
// updates as the user's preferences/priorities are learned, so
|
|
1746
|
+
// its content is volatile within a session.
|
|
1734
1747
|
const userModelFile = path.join(VAULT_DIR, '00-System', 'USER_MODEL.md');
|
|
1735
1748
|
this.promptCache.watch(userModelFile);
|
|
1736
1749
|
const userModel = this.promptCache.get(userModelFile);
|
|
@@ -1740,7 +1753,7 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
|
|
|
1740
1753
|
const comm = userModel.data.communication ? `Communication: ${Object.entries(userModel.data.communication).map(([k, v]) => `${k}=${v}`).join(', ')}` : '';
|
|
1741
1754
|
const modelParts = [expertise, priorities, comm].filter(Boolean);
|
|
1742
1755
|
if (modelParts.length > 0) {
|
|
1743
|
-
|
|
1756
|
+
volatileParts.push(`## User Context\n\n${modelParts.join('\n')}`);
|
|
1744
1757
|
}
|
|
1745
1758
|
}
|
|
1746
1759
|
// Proactive feedback capture
|
|
@@ -2011,7 +2024,9 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
2011
2024
|
async buildOptions(opts = {}) {
|
|
2012
2025
|
const { isHeartbeat = false, cronTier = null, maxTurns = null, model = null, enableTeams = true, retrievalContext = '', profile = null, sessionKey = null, streaming = false, isPlanStep = false, isUnleashed = false, sourceOverride, disableAllTools = false, verboseLevel, abortController, effort, maxBudgetUsd, toolScopeText, thinking, outputFormat, stallGuard, intentClassification, turnPolicy, contextRoutingText, toolset = 'auto', } = opts;
|
|
2013
2026
|
const isCron = cronTier !== null;
|
|
2014
|
-
const toolsDisabledForCall = disableAllTools
|
|
2027
|
+
const toolsDisabledForCall = disableAllTools
|
|
2028
|
+
|| (isHeartbeat && !isCron)
|
|
2029
|
+
|| toolsetDisablesAllTools(toolset);
|
|
2015
2030
|
const promptScopeText = toolScopeText ?? '';
|
|
2016
2031
|
const profileScopeText = [profile?.description, profile?.systemPromptBody]
|
|
2017
2032
|
.filter(Boolean)
|
|
@@ -2300,6 +2315,26 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
2300
2315
|
const volatileSuffix = volatilePromptPart && volatilePromptPart.trim().length > 0
|
|
2301
2316
|
? volatilePromptPart
|
|
2302
2317
|
: '';
|
|
2318
|
+
// Debug-mode: log a short hash of the stable prefix + volatile suffix
|
|
2319
|
+
// per query. When CLEMENTINE_DEBUG_CACHE=1, mismatched stable hashes
|
|
2320
|
+
// across consecutive turns of the same session indicate a regression
|
|
2321
|
+
// where volatile content silently leaked back into the cached prefix.
|
|
2322
|
+
// No-op (no allocation) in normal mode.
|
|
2323
|
+
if (process.env.CLEMENTINE_DEBUG_CACHE === '1') {
|
|
2324
|
+
const { createHash } = await import('node:crypto');
|
|
2325
|
+
const stableHash = createHash('sha1').update(stablePrefixParts.join('\n\n---\n\n')).digest('hex').slice(0, 8);
|
|
2326
|
+
const volatileHash = volatileSuffix
|
|
2327
|
+
? createHash('sha1').update(volatileSuffix).digest('hex').slice(0, 8)
|
|
2328
|
+
: 'empty';
|
|
2329
|
+
logger.info({
|
|
2330
|
+
sessionKey,
|
|
2331
|
+
stable_prefix_hash: stableHash,
|
|
2332
|
+
volatile_suffix_hash: volatileHash,
|
|
2333
|
+
stable_chars: stablePrefixParts.reduce((n, s) => n + s.length, 0),
|
|
2334
|
+
volatile_chars: volatileSuffix.length,
|
|
2335
|
+
allowed_tool_count: allowedTools.length,
|
|
2336
|
+
}, 'cache_debug: prompt structure for this query');
|
|
2337
|
+
}
|
|
2303
2338
|
// If there is no volatile content, a plain string keeps the call simple
|
|
2304
2339
|
// and behaves identically for the cache. Only use the array form when
|
|
2305
2340
|
// we actually have dynamic content to split off.
|
|
@@ -2572,15 +2607,25 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
2572
2607
|
&& !isPlanStep
|
|
2573
2608
|
&& (toolRoute.inheritFullClaudeEnv || toolRoute.fullSurface);
|
|
2574
2609
|
const isolateClaudeConfig = !toolRoute.fullSurface;
|
|
2610
|
+
// Sort tool surface for deterministic cache key. The Anthropic prompt
|
|
2611
|
+
// cache hashes the entire tools/system prefix; insertion-order
|
|
2612
|
+
// serialization is fragile if routing logic ever pushes in a
|
|
2613
|
+
// different order between calls — silent cache miss. Sorting also
|
|
2614
|
+
// lets multiple jobs that arrived at the same tool set (via
|
|
2615
|
+
// different routing paths) share a cache entry.
|
|
2616
|
+
if (!toolsDisabledForCall) {
|
|
2617
|
+
allowedTools.sort();
|
|
2618
|
+
}
|
|
2575
2619
|
const mcpServerNames = toolsDisabledForCall
|
|
2576
2620
|
? []
|
|
2577
|
-
: [TOOLS_SERVER, ...Object.keys(externalMcpServers), ...Object.keys(composioMcpServers)];
|
|
2621
|
+
: [TOOLS_SERVER, ...Object.keys(externalMcpServers).sort(), ...Object.keys(composioMcpServers).sort()];
|
|
2578
2622
|
const clementineToolPrefix = `mcp__${TOOLS_SERVER}__`;
|
|
2579
2623
|
const clementineToolAllowlist = toolRoute.fullSurface
|
|
2580
2624
|
? '*'
|
|
2581
2625
|
: allowedTools
|
|
2582
2626
|
.filter(t => t.startsWith(clementineToolPrefix))
|
|
2583
2627
|
.map(t => t.slice(clementineToolPrefix.length))
|
|
2628
|
+
.sort()
|
|
2584
2629
|
.join(',');
|
|
2585
2630
|
const clementineToolAllowlistCount = clementineToolAllowlist === '*'
|
|
2586
2631
|
? CLEMENTINE_ALL_TOOL_NAMES.length
|
|
@@ -85,6 +85,7 @@ export function buildAlwaysOnParallelizationHint() {
|
|
|
85
85
|
return [
|
|
86
86
|
'## Sub-agent fan-out',
|
|
87
87
|
'When you process multiple items, spawn ONE Agent sub-agent per batch of 3–5 items. Sub-agents return ONE-LINE summaries (no raw tool output). Do not iterate sequentially in this conversation — that fills your context and aborts the run.',
|
|
88
|
+
'Cost: pass `model: "haiku"` to Agent for routine extraction, summarization, or per-item lookups. Use Sonnet only when the sub-agent must reason across many sources or write something durable.',
|
|
88
89
|
].join('\n');
|
|
89
90
|
}
|
|
90
91
|
/**
|
|
@@ -109,6 +110,7 @@ export function buildFanoutDirective(signals) {
|
|
|
109
110
|
'Use the `Agent` tool to spawn parallel sub-agents. Each sub-agent runs in its own isolated context, so big tool responses live and die there — your context only sees the summary.',
|
|
110
111
|
'',
|
|
111
112
|
'- **Batch size**: 3–5 items per sub-agent (or one slice of work per sub-agent for research tasks)',
|
|
113
|
+
'- **Sub-agent model**: pass `model: "haiku"` to the Agent tool by default — sub-agents that just extract fields, summarize a single email, or pull a single record do not need Sonnet. Reserve Sonnet for sub-agents that must reason across multiple sources or write something durable.',
|
|
112
114
|
'- **Sub-agent prompt MUST include**: the narrow task, the exact return format (e.g. `Return ONE LINE: <id> | <status> | <next-action>`), and an explicit "do not include raw tool output" directive',
|
|
113
115
|
'- **Parent context keeps**: only the sub-agent return strings, not their tool transcripts',
|
|
114
116
|
'',
|
package/dist/agent/toolsets.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export type ToolsetName = 'auto' | 'safe' | 'diagnostic' | 'communications' | 'memory' | 'full';
|
|
1
|
+
export type ToolsetName = 'auto' | 'safe' | 'diagnostic' | 'communications' | 'memory' | 'none' | 'full';
|
|
2
2
|
export interface ToolsetPreset {
|
|
3
3
|
name: ToolsetName;
|
|
4
4
|
label: string;
|
|
@@ -11,4 +11,10 @@ export declare function getToolsetPreset(name: ToolsetName): ToolsetPreset;
|
|
|
11
11
|
export declare function formatToolsetChoices(): string;
|
|
12
12
|
export declare function isRestrictedToolset(name: ToolsetName): boolean;
|
|
13
13
|
export declare function toolsetAllowsLocalWrites(name: ToolsetName): boolean;
|
|
14
|
+
/**
|
|
15
|
+
* "none" toolset: not just restricted, but actively disables ALL tools
|
|
16
|
+
* including the core Clementine MCP server. Used by builder/JSON-gen
|
|
17
|
+
* chats where tool schemas in the system prompt are pure cost overhead.
|
|
18
|
+
*/
|
|
19
|
+
export declare function toolsetDisablesAllTools(name: ToolsetName): boolean;
|
|
14
20
|
//# sourceMappingURL=toolsets.d.ts.map
|
package/dist/agent/toolsets.js
CHANGED
|
@@ -29,6 +29,12 @@ export const TOOLSET_PRESETS = [
|
|
|
29
29
|
description: 'Memory, transcript, and relationship tools only unless explicitly changed.',
|
|
30
30
|
directive: 'Toolset memory: use memory_read, memory_search, memory_recall, transcript_search, working_memory, and user_model. Avoid external integrations and local shell/file writes.',
|
|
31
31
|
},
|
|
32
|
+
{
|
|
33
|
+
name: 'none',
|
|
34
|
+
label: 'None',
|
|
35
|
+
description: 'No tools at all — pure-LLM conversation. Used by builders and JSON-generating chats where tool schemas are dead weight in the system prompt.',
|
|
36
|
+
directive: 'Toolset none: do not call any tools. Respond from the prompt context only. Generate JSON, summaries, or text directly.',
|
|
37
|
+
},
|
|
32
38
|
{
|
|
33
39
|
name: 'full',
|
|
34
40
|
label: 'Full',
|
|
@@ -60,9 +66,17 @@ export function formatToolsetChoices() {
|
|
|
60
66
|
.join('\n');
|
|
61
67
|
}
|
|
62
68
|
export function isRestrictedToolset(name) {
|
|
63
|
-
return name === 'safe' || name === 'diagnostic' || name === 'memory';
|
|
69
|
+
return name === 'safe' || name === 'diagnostic' || name === 'memory' || name === 'none';
|
|
64
70
|
}
|
|
65
71
|
export function toolsetAllowsLocalWrites(name) {
|
|
66
72
|
return name === 'auto' || name === 'full';
|
|
67
73
|
}
|
|
74
|
+
/**
|
|
75
|
+
* "none" toolset: not just restricted, but actively disables ALL tools
|
|
76
|
+
* including the core Clementine MCP server. Used by builder/JSON-gen
|
|
77
|
+
* chats where tool schemas in the system prompt are pure cost overhead.
|
|
78
|
+
*/
|
|
79
|
+
export function toolsetDisablesAllTools(name) {
|
|
80
|
+
return name === 'none';
|
|
81
|
+
}
|
|
68
82
|
//# sourceMappingURL=toolsets.js.map
|
package/dist/cli/dashboard.js
CHANGED
|
@@ -8053,6 +8053,12 @@ If the tool returns nothing or errors, return an empty array \`[]\`.`,
|
|
|
8053
8053
|
}
|
|
8054
8054
|
try {
|
|
8055
8055
|
const gateway = await getGateway();
|
|
8056
|
+
// Builder generates JSON artifacts — no tool calls. Pin the session
|
|
8057
|
+
// toolset to 'none' so buildOptions strips all MCP servers and tool
|
|
8058
|
+
// schemas from the system prompt. Without this, every tiny builder
|
|
8059
|
+
// turn writes 60–280 KB of cache_creation for tool schemas the
|
|
8060
|
+
// model never uses.
|
|
8061
|
+
gateway.setSessionToolset(sessionKey, 'none');
|
|
8056
8062
|
const response = await gateway.handleMessage(sessionKey, enrichedMessage);
|
|
8057
8063
|
// Parse any json-artifact blocks from the response
|
|
8058
8064
|
let artifact = null;
|
|
@@ -8180,6 +8186,9 @@ If the tool returns nothing or errors, return an empty array \`[]\`.`,
|
|
|
8180
8186
|
try {
|
|
8181
8187
|
writeEvent('progress', { status: 'thinking…' });
|
|
8182
8188
|
const gateway = await getGateway();
|
|
8189
|
+
// Builder generates JSON artifacts — no tool calls. Pin to 'none'
|
|
8190
|
+
// toolset so the SDK system prompt drops the tool inventory.
|
|
8191
|
+
gateway.setSessionToolset(sessionKey, 'none');
|
|
8183
8192
|
let lastText = '';
|
|
8184
8193
|
const response = await gateway.handleMessage(sessionKey, enrichedMessage, async (text) => {
|
|
8185
8194
|
lastText = text ?? '';
|
|
@@ -171,7 +171,15 @@ export class AgentHeartbeatScheduler {
|
|
|
171
171
|
catch {
|
|
172
172
|
signals.latestGoalUpdate = '';
|
|
173
173
|
}
|
|
174
|
-
// 3. Latest cron run for any of this agent's crons (file mtime is enough)
|
|
174
|
+
// 3. Latest cron run for any of this agent's crons (file mtime is enough).
|
|
175
|
+
//
|
|
176
|
+
// INFO ONLY — kept in `signals` for the LLM prompt context, but NOT
|
|
177
|
+
// included in the fingerprint hash below. A cron firing is expected
|
|
178
|
+
// background activity, not a "wake up Sonnet" signal. Including it
|
|
179
|
+
// caused every cron run to bump the fingerprint → fire a $1+ Sonnet
|
|
180
|
+
// pass that just confirmed "yep, the cron ran, nothing else to do."
|
|
181
|
+
// Actionable wake-ups are pendingTasks growing and goal-state changes;
|
|
182
|
+
// those still trip the fingerprint below.
|
|
175
183
|
try {
|
|
176
184
|
const runsDir = path.join(this.baseDir, 'cron', 'runs');
|
|
177
185
|
let latestMs = 0;
|
|
@@ -195,8 +203,15 @@ export class AgentHeartbeatScheduler {
|
|
|
195
203
|
catch {
|
|
196
204
|
signals.latestCronRunMs = 0;
|
|
197
205
|
}
|
|
206
|
+
// Fingerprint only includes ACTIONABLE signals. latestCronRunMs is
|
|
207
|
+
// info-only and explicitly excluded.
|
|
208
|
+
const fingerprintSource = {
|
|
209
|
+
slug: signals.slug,
|
|
210
|
+
pendingTasks: signals.pendingTasks,
|
|
211
|
+
latestGoalUpdate: signals.latestGoalUpdate,
|
|
212
|
+
};
|
|
198
213
|
const fingerprint = createHash('sha1')
|
|
199
|
-
.update(JSON.stringify(
|
|
214
|
+
.update(JSON.stringify(fingerprintSource))
|
|
200
215
|
.digest('hex')
|
|
201
216
|
.slice(0, 16);
|
|
202
217
|
return { fingerprint, signals };
|
|
@@ -23,7 +23,7 @@ import { listBackgroundTasks, markDone as markBgTaskDone, markFailed as markBgTa
|
|
|
23
23
|
import { outcomeStatusFromGoalDisposition, recentDecisions, recordDecisionOutcome, } from '../agent/proactive-ledger.js';
|
|
24
24
|
import { formatCreditBlock, getBackgroundCreditBlock, isCreditBalanceError, markBackgroundCreditBlocked, } from './credit-guard.js';
|
|
25
25
|
import { isRunHealthFailure } from './job-health.js';
|
|
26
|
-
import { analyzeLongTaskPreflight, compactLongTaskPreflight, formatLongTaskPromptPrefix, } from './long-task-preflight.js';
|
|
26
|
+
import { analyzeLongTaskPreflight, compactLongTaskPreflight, shouldDowngradeUnleashed, formatLongTaskPromptPrefix, } from './long-task-preflight.js';
|
|
27
27
|
const logger = pino({ name: 'clementine.cron' });
|
|
28
28
|
/** Default timeout for standard cron jobs (10 minutes). */
|
|
29
29
|
const CRON_STANDARD_TIMEOUT_MS = 10 * 60 * 1000;
|
|
@@ -1061,6 +1061,34 @@ export class CronScheduler {
|
|
|
1061
1061
|
// Sonnet runs every job by default. Opus 1M is opt-in: set
|
|
1062
1062
|
// `model: claude-opus-4-7[1m]` in CRON.md per-job, or flip
|
|
1063
1063
|
// CLEMENTINE_1M_CONTEXT_MODE=on for global enable.
|
|
1064
|
+
// ── Auto-downgrade unleashed → standard ────────────────────────
|
|
1065
|
+
// CRON.md `mode: unleashed` is a CEILING, not a floor. If the
|
|
1066
|
+
// job's history shows it's a quiet probe that completes in 1
|
|
1067
|
+
// phase with __NOTHING__ or short output, the multi-phase
|
|
1068
|
+
// wrapper is wasteful overhead — each phase is a fresh SDK
|
|
1069
|
+
// query with full system prompt + tool schemas in cache_creation.
|
|
1070
|
+
// For a "did anything new come in?" cron firing every 2 hours,
|
|
1071
|
+
// that's 12+ unleashed runs/day at ~$1/each instead of standard
|
|
1072
|
+
// mode at ~$0.05/each.
|
|
1073
|
+
if (job.mode === 'unleashed') {
|
|
1074
|
+
const downgrade = shouldDowngradeUnleashed(this.runLog.readRecent(job.name, 5));
|
|
1075
|
+
if (downgrade.downgrade) {
|
|
1076
|
+
job = { ...job, mode: 'standard' };
|
|
1077
|
+
logger.info({
|
|
1078
|
+
job: job.name,
|
|
1079
|
+
reason: downgrade.reason,
|
|
1080
|
+
quietRatio: downgrade.quietRatio,
|
|
1081
|
+
avgDurationMs: downgrade.avgDurationMs,
|
|
1082
|
+
}, 'Cron mode downgraded unleashed → standard based on run history');
|
|
1083
|
+
this.logAutonomy('mode_downgrade', job, {
|
|
1084
|
+
from: 'unleashed',
|
|
1085
|
+
to: 'standard',
|
|
1086
|
+
reason: downgrade.reason,
|
|
1087
|
+
quietRatio: downgrade.quietRatio,
|
|
1088
|
+
avgDurationMs: downgrade.avgDurationMs,
|
|
1089
|
+
});
|
|
1090
|
+
}
|
|
1091
|
+
}
|
|
1064
1092
|
let longTaskPreflight;
|
|
1065
1093
|
const preflight = analyzeLongTaskPreflight(job, jobPrompt, this.runLog.readRecent(job.name, 5));
|
|
1066
1094
|
if (preflight.risk !== 'normal') {
|
|
@@ -15,6 +15,15 @@ export interface LongTaskPreflightOptions {
|
|
|
15
15
|
opusModel?: string;
|
|
16
16
|
sonnetModel?: string;
|
|
17
17
|
}
|
|
18
|
+
export interface UnleashedDowngradeDecision {
|
|
19
|
+
downgrade: boolean;
|
|
20
|
+
reason: string;
|
|
21
|
+
/** Quiet ratio observed in the sample (for telemetry). */
|
|
22
|
+
quietRatio?: number;
|
|
23
|
+
/** Average duration of recent runs in ms (for telemetry). */
|
|
24
|
+
avgDurationMs?: number;
|
|
25
|
+
}
|
|
26
|
+
export declare function shouldDowngradeUnleashed(recentRuns: CronRunEntry[], now?: number): UnleashedDowngradeDecision;
|
|
18
27
|
export declare function analyzeLongTaskPreflight(job: CronJobDefinition, prompt: string, recentRuns?: CronRunEntry[], opts?: LongTaskPreflightOptions): LongTaskPreflightDecision;
|
|
19
28
|
export declare function formatLongTaskPromptPrefix(decision: LongTaskPreflightDecision): string;
|
|
20
29
|
export declare function compactLongTaskPreflight(decision: LongTaskPreflightDecision): LongTaskPreflightSnapshot;
|
|
@@ -48,6 +48,88 @@ function recentContextFailures(recentRuns, now = Date.now()) {
|
|
|
48
48
|
}
|
|
49
49
|
return [...new Set(reasons)];
|
|
50
50
|
}
|
|
51
|
+
// ── Auto-downgrade unleashed → standard for quiet/probe jobs ──────────
|
|
52
|
+
//
|
|
53
|
+
// `mode: unleashed` wraps a job in multi-phase machinery: each phase is a
|
|
54
|
+
// fresh SDK query with the full system prompt + tool schemas, and the
|
|
55
|
+
// orchestrator chains phases until TASK_COMPLETE or max-phases. That
|
|
56
|
+
// machinery is essential for genuinely-long tasks (sasha briefs, market
|
|
57
|
+
// outreach), but it's pure overhead on quiet probe jobs that finish in
|
|
58
|
+
// 1 phase with `__NOTHING__` or a short output.
|
|
59
|
+
//
|
|
60
|
+
// Detect that pattern from history and downgrade the next run to
|
|
61
|
+
// standard mode. Single SDK call, single cache write, fraction of the
|
|
62
|
+
// cost. The user's CRON.md `mode: unleashed` becomes a "ceiling" rather
|
|
63
|
+
// than a forced floor — actual mode chosen dynamically per-run.
|
|
64
|
+
//
|
|
65
|
+
// Conservative by design: requires 3+ prior runs of evidence, refuses
|
|
66
|
+
// to downgrade if any recent run hit context overflow (the unleashed
|
|
67
|
+
// wrapper might be actively saving us), and only triggers on jobs that
|
|
68
|
+
// historically complete fast with short or empty output.
|
|
69
|
+
const UNLEASHED_DOWNGRADE_SAMPLE_SIZE = 5;
|
|
70
|
+
const UNLEASHED_DOWNGRADE_MIN_HISTORY = 3;
|
|
71
|
+
const UNLEASHED_DOWNGRADE_QUIET_RATIO = 0.6;
|
|
72
|
+
const UNLEASHED_DOWNGRADE_MAX_DURATION_MS = 90_000;
|
|
73
|
+
const UNLEASHED_DOWNGRADE_AVG_DURATION_MS = 60_000;
|
|
74
|
+
const UNLEASHED_DOWNGRADE_QUIET_PREVIEW_CHARS = 200;
|
|
75
|
+
export function shouldDowngradeUnleashed(recentRuns, now = Date.now()) {
|
|
76
|
+
const sample = recentRuns
|
|
77
|
+
.slice(0, UNLEASHED_DOWNGRADE_SAMPLE_SIZE)
|
|
78
|
+
.filter(r => r.status === 'ok' || r.status === 'error');
|
|
79
|
+
if (sample.length < UNLEASHED_DOWNGRADE_MIN_HISTORY) {
|
|
80
|
+
return { downgrade: false, reason: 'insufficient_history' };
|
|
81
|
+
}
|
|
82
|
+
// Refuse to downgrade if any recent run hit a context-window failure —
|
|
83
|
+
// the unleashed multi-phase wrapper might be the only thing keeping
|
|
84
|
+
// this job from thrashing on a single huge SDK query. Pair this guard
|
|
85
|
+
// with the existing fanout-policy directive (1.18.35) so by the next
|
|
86
|
+
// few runs the agent has learned to fan out and the wrapper can be
|
|
87
|
+
// shed safely.
|
|
88
|
+
const cutoff = now - RECENT_CONTEXT_FAILURE_WINDOW_MS;
|
|
89
|
+
const hadOverflow = sample.some(r => {
|
|
90
|
+
const startedMs = Date.parse(r.startedAt);
|
|
91
|
+
if (!Number.isFinite(startedMs) || startedMs < cutoff)
|
|
92
|
+
return false;
|
|
93
|
+
return r.terminalReason === 'rapid_refill_breaker'
|
|
94
|
+
|| r.terminalReason === 'prompt_too_long';
|
|
95
|
+
});
|
|
96
|
+
if (hadOverflow) {
|
|
97
|
+
return { downgrade: false, reason: 'recent_context_overflow_protect_unleashed' };
|
|
98
|
+
}
|
|
99
|
+
// Quiet pattern: most recent runs returned __NOTHING__ or a short
|
|
100
|
+
// output. These jobs don't need multi-phase orchestration.
|
|
101
|
+
const quietCount = sample.filter(r => {
|
|
102
|
+
const preview = (r.outputPreview ?? '').trim();
|
|
103
|
+
if (!preview)
|
|
104
|
+
return false;
|
|
105
|
+
if (/__nothing__/i.test(preview))
|
|
106
|
+
return true;
|
|
107
|
+
return preview.length < UNLEASHED_DOWNGRADE_QUIET_PREVIEW_CHARS;
|
|
108
|
+
}).length;
|
|
109
|
+
const quietRatio = quietCount / sample.length;
|
|
110
|
+
if (quietRatio >= UNLEASHED_DOWNGRADE_QUIET_RATIO) {
|
|
111
|
+
return {
|
|
112
|
+
downgrade: true,
|
|
113
|
+
reason: `quiet_pattern_${Math.round(quietRatio * 100)}pct`,
|
|
114
|
+
quietRatio,
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
// Fast-completion pattern: every run finishes well under the standard
|
|
118
|
+
// cron timeout, average is short. Multi-phase wrapper is overhead.
|
|
119
|
+
const durations = sample.map(r => r.durationMs || 0).filter(d => d > 0);
|
|
120
|
+
if (durations.length === sample.length) {
|
|
121
|
+
const avgDuration = durations.reduce((a, b) => a + b, 0) / durations.length;
|
|
122
|
+
const allFast = durations.every(d => d < UNLEASHED_DOWNGRADE_MAX_DURATION_MS);
|
|
123
|
+
if (allFast && avgDuration < UNLEASHED_DOWNGRADE_AVG_DURATION_MS) {
|
|
124
|
+
return {
|
|
125
|
+
downgrade: true,
|
|
126
|
+
reason: `fast_completion_avg_${Math.round(avgDuration / 1000)}s`,
|
|
127
|
+
avgDurationMs: Math.round(avgDuration),
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
return { downgrade: false, reason: 'workload_warrants_unleashed' };
|
|
132
|
+
}
|
|
51
133
|
function classifyRisk(args) {
|
|
52
134
|
const { inputTokens, projectedTokens, signalCount, recentContextIssue, job, oneMillionAvailable } = args;
|
|
53
135
|
if (inputTokens >= 185_000)
|