clementine-agent 1.18.34 → 1.18.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/assistant.js +38 -7
- package/dist/agent/fanout-policy.d.ts +59 -0
- package/dist/agent/fanout-policy.js +133 -0
- package/dist/agent/toolsets.d.ts +7 -1
- package/dist/agent/toolsets.js +15 -1
- package/dist/cli/dashboard.js +9 -0
- package/dist/gateway/agent-heartbeat-scheduler.js +17 -2
- package/dist/gateway/cron-scheduler.js +29 -1
- package/dist/gateway/long-task-preflight.d.ts +9 -0
- package/dist/gateway/long-task-preflight.js +82 -0
- package/package.json +1 -1
package/dist/agent/assistant.js
CHANGED
|
@@ -33,7 +33,7 @@ import { searchSkills as searchSkillsSync } from './skill-extractor.js';
|
|
|
33
33
|
import { classifyIntent, getStrategyGuidance } from './intent-classifier.js';
|
|
34
34
|
import { getEventLog } from './session-event-log.js';
|
|
35
35
|
import { applyServiceDedup, routeToolSurface, TOOL_SURFACE_HARD_LIMIT, TOOL_SURFACE_WARN_THRESHOLD } from './tool-router.js';
|
|
36
|
-
import { isRestrictedToolset, toolsetAllowsLocalWrites } from './toolsets.js';
|
|
36
|
+
import { isRestrictedToolset, toolsetAllowsLocalWrites, toolsetDisablesAllTools } from './toolsets.js';
|
|
37
37
|
import { looksLikeApprovalPrompt } from './local-turn.js';
|
|
38
38
|
import { decideTurn } from './turn-policy.js';
|
|
39
39
|
import { loadClementineJson } from '../config/clementine-json.js';
|
|
@@ -2011,7 +2011,9 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
2011
2011
|
async buildOptions(opts = {}) {
|
|
2012
2012
|
const { isHeartbeat = false, cronTier = null, maxTurns = null, model = null, enableTeams = true, retrievalContext = '', profile = null, sessionKey = null, streaming = false, isPlanStep = false, isUnleashed = false, sourceOverride, disableAllTools = false, verboseLevel, abortController, effort, maxBudgetUsd, toolScopeText, thinking, outputFormat, stallGuard, intentClassification, turnPolicy, contextRoutingText, toolset = 'auto', } = opts;
|
|
2013
2013
|
const isCron = cronTier !== null;
|
|
2014
|
-
const toolsDisabledForCall = disableAllTools
|
|
2014
|
+
const toolsDisabledForCall = disableAllTools
|
|
2015
|
+
|| (isHeartbeat && !isCron)
|
|
2016
|
+
|| toolsetDisablesAllTools(toolset);
|
|
2015
2017
|
const promptScopeText = toolScopeText ?? '';
|
|
2016
2018
|
const profileScopeText = [profile?.description, profile?.systemPromptBody]
|
|
2017
2019
|
.filter(Boolean)
|
|
@@ -5390,7 +5392,24 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
5390
5392
|
}
|
|
5391
5393
|
}
|
|
5392
5394
|
catch { /* non-fatal — run without skills */ }
|
|
5395
|
+
// ── Sub-agent fan-out directive (Vision 2) ──────────────────────
|
|
5396
|
+
// Detect multi-item / broad-scope signals in the job spec and
|
|
5397
|
+
// prepend a hard-line fan-out mandate when found. This is what
|
|
5398
|
+
// keeps the parent context clean on long jobs: each slice of work
|
|
5399
|
+
// runs in an Agent sub-agent (its own context window, big tool
|
|
5400
|
+
// responses contained), and the parent only sees compact summaries.
|
|
5401
|
+
const { buildAlwaysOnParallelizationHint, buildFanoutDirectiveForText } = await import('./fanout-policy.js');
|
|
5402
|
+
const fanoutScope = `${jobName}\n${jobPrompt}\n${cronProfile?.description ?? ''}\n${cronProfile?.systemPromptBody ?? ''}`;
|
|
5403
|
+
const { directive: fanoutDirective, report: fanoutReport } = buildFanoutDirectiveForText(fanoutScope);
|
|
5404
|
+
if (fanoutReport.needsFanout) {
|
|
5405
|
+
logger.info({
|
|
5406
|
+
job: jobName,
|
|
5407
|
+
signals: fanoutReport.signals.map(s => s.pattern),
|
|
5408
|
+
}, 'Fanout policy: directive injected for cron job');
|
|
5409
|
+
}
|
|
5393
5410
|
const prompt = `[Scheduled task: ${jobName}]\n\n` +
|
|
5411
|
+
(fanoutDirective ? fanoutDirective + '\n\n' : '') +
|
|
5412
|
+
buildAlwaysOnParallelizationHint() + '\n\n' +
|
|
5394
5413
|
progressContext +
|
|
5395
5414
|
goalContext +
|
|
5396
5415
|
skillContext +
|
|
@@ -5782,22 +5801,30 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
5782
5801
|
}
|
|
5783
5802
|
let prompt;
|
|
5784
5803
|
if (phase === 1) {
|
|
5804
|
+
const { buildAlwaysOnParallelizationHint, buildFanoutDirectiveForText } = await import('./fanout-policy.js');
|
|
5805
|
+
const unleashedFanoutScope = `${jobName}\n${jobPrompt}\n${unleashedProfile?.description ?? ''}\n${unleashedProfile?.systemPromptBody ?? ''}`;
|
|
5806
|
+
const { directive: unleashedFanoutDirective, report: unleashedFanoutReport } = buildFanoutDirectiveForText(unleashedFanoutScope);
|
|
5807
|
+
if (unleashedFanoutReport.needsFanout) {
|
|
5808
|
+
logger.info({
|
|
5809
|
+
job: jobName,
|
|
5810
|
+
phase,
|
|
5811
|
+
signals: unleashedFanoutReport.signals.map(s => s.pattern),
|
|
5812
|
+
}, 'Fanout policy: directive injected for unleashed phase 1');
|
|
5813
|
+
}
|
|
5785
5814
|
prompt =
|
|
5786
5815
|
`[UNLEASHED TASK: ${jobName} — Phase ${phase} — ${timestamp}]\n\n` +
|
|
5787
5816
|
`You are running in unleashed mode — a long-running autonomous task.\n` +
|
|
5788
5817
|
`Time remaining: ${remainingHours} hours. You have ${turnsPerPhase} turns per phase.\n` +
|
|
5789
5818
|
`After each phase completes, your session will be resumed with fresh context.\n\n` +
|
|
5819
|
+
(unleashedFanoutDirective ? unleashedFanoutDirective + '\n\n' : '') +
|
|
5820
|
+
buildAlwaysOnParallelizationHint() + '\n\n' +
|
|
5790
5821
|
`TASK:\n${jobPrompt}\n\n` +
|
|
5791
5822
|
unleashedSkillContext +
|
|
5792
5823
|
`${unleashedContextSafety}\n\n` +
|
|
5793
5824
|
`IMPORTANT:\n` +
|
|
5794
5825
|
`- Work methodically through the task in phases\n` +
|
|
5795
5826
|
`- At the end of this phase, output a STATUS SUMMARY of what you accomplished and what remains\n` +
|
|
5796
|
-
`- Save important intermediate results to files so they persist across phases
|
|
5797
|
-
`PARALLELIZATION: When processing multiple items (prospects, accounts, emails, analyses), ` +
|
|
5798
|
-
`use the Agent tool to spawn sub-agents that work in parallel. For example, if you need to ` +
|
|
5799
|
-
`research 10 prospects, spawn 3-5 sub-agents that each handle a batch — don't process them ` +
|
|
5800
|
-
`one at a time. Each sub-agent should receive specific items and return structured results.`;
|
|
5827
|
+
`- Save important intermediate results to files so they persist across phases`;
|
|
5801
5828
|
}
|
|
5802
5829
|
else {
|
|
5803
5830
|
// Phase 2+ — inject structured checkpoint from previous phase if available
|
|
@@ -5821,6 +5848,8 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
5821
5848
|
}
|
|
5822
5849
|
}
|
|
5823
5850
|
catch { /* fall back to no checkpoint */ }
|
|
5851
|
+
const { buildAlwaysOnParallelizationHint: hintFn } = await import('./fanout-policy.js');
|
|
5852
|
+
const phaseParallelHint = hintFn();
|
|
5824
5853
|
if (sessionId) {
|
|
5825
5854
|
// Resuming existing session — agent has conversation history + structured checkpoint
|
|
5826
5855
|
prompt =
|
|
@@ -5829,6 +5858,7 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
5829
5858
|
`Time remaining: ${remainingHours} hours. You have ${turnsPerPhase} turns this phase.\n` +
|
|
5830
5859
|
checkpointContext +
|
|
5831
5860
|
`\n${unleashedContextSafety}\n` +
|
|
5861
|
+
`\n${phaseParallelHint}\n` +
|
|
5832
5862
|
`\nContinue working on the task. Pick up where you left off.\n` +
|
|
5833
5863
|
`If the task is COMPLETE, output "TASK_COMPLETE:" followed by a final summary.\n\n` +
|
|
5834
5864
|
`IMPORTANT: Output a STATUS SUMMARY at the end of this phase.`;
|
|
@@ -5843,6 +5873,7 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
5843
5873
|
`TASK:\n${jobPrompt}\n` +
|
|
5844
5874
|
checkpointContext +
|
|
5845
5875
|
`\n${unleashedContextSafety}\n` +
|
|
5876
|
+
`\n${phaseParallelHint}\n` +
|
|
5846
5877
|
`\nCheck any files or progress from prior phases, then continue the work.\n` +
|
|
5847
5878
|
`If the task is COMPLETE, output "TASK_COMPLETE:" followed by a final summary.\n\n` +
|
|
5848
5879
|
`IMPORTANT: Output a STATUS SUMMARY at the end of this phase.`;
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sub-agent fan-out policy for autonomous tasks.
|
|
3
|
+
*
|
|
4
|
+
* Why: even with a small tool surface, a single agent context can fill
|
|
5
|
+
* within a few turns when tool responses are large (Outlook list dumps,
|
|
6
|
+
* web search results, file reads, multi-prospect research). The SDK's
|
|
7
|
+
* autocompact then has nothing to compact and aborts with
|
|
8
|
+
* `rapid_refill_breaker`. The fix matching how Claude Code is designed:
|
|
9
|
+
* spawn sub-agents that each handle a slice of work in their own
|
|
10
|
+
* isolated context and return only a compact summary back to the parent.
|
|
11
|
+
*
|
|
12
|
+
* The Agent tool already exists in the SDK. The problem is timing —
|
|
13
|
+
* agents tend to discover the need for fan-out only after thrashing.
|
|
14
|
+
* This module front-loads the directive: scan the task description for
|
|
15
|
+
* signals that fan-out will be needed, and inject a strong, explicit
|
|
16
|
+
* mandate at the top of the prompt.
|
|
17
|
+
*
|
|
18
|
+
* Two outputs:
|
|
19
|
+
* - buildAlwaysOnParallelizationHint()
|
|
20
|
+
* Short reminder injected into every autonomous prompt. Cheap.
|
|
21
|
+
* - buildFanoutDirective(detectFanoutSignals(text).signals)
|
|
22
|
+
* Stronger, explicit fan-out contract. Only injected when signals
|
|
23
|
+
* indicate the task is genuinely multi-item or broad-scope.
|
|
24
|
+
*/
|
|
25
|
+
export interface FanoutSignal {
|
|
26
|
+
/** Why fan-out matters for this task. Surfaced in the directive. */
|
|
27
|
+
reason: string;
|
|
28
|
+
/** The pattern that matched. Used for telemetry. */
|
|
29
|
+
pattern: string;
|
|
30
|
+
}
|
|
31
|
+
export interface FanoutSignalReport {
|
|
32
|
+
needsFanout: boolean;
|
|
33
|
+
signals: FanoutSignal[];
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Detect patterns that strongly predict fan-out is needed. Conservative
|
|
37
|
+
* by design — false positives waste a few hundred tokens per turn; false
|
|
38
|
+
* negatives let the agent thrash. Tune for false positives.
|
|
39
|
+
*/
|
|
40
|
+
export declare function detectFanoutSignals(text: string): FanoutSignalReport;
|
|
41
|
+
/**
|
|
42
|
+
* Always-on parallelization reminder. Short, designed to ride along in
|
|
43
|
+
* every autonomous prompt without inflating token cost.
|
|
44
|
+
*/
|
|
45
|
+
export declare function buildAlwaysOnParallelizationHint(): string;
|
|
46
|
+
/**
|
|
47
|
+
* Strong fan-out contract injected when detector matches. Designed to be
|
|
48
|
+
* unambiguous: failing to fan out on these patterns *will* crash the run.
|
|
49
|
+
*/
|
|
50
|
+
export declare function buildFanoutDirective(signals: FanoutSignal[]): string;
|
|
51
|
+
/**
|
|
52
|
+
* Convenience: detect signals and return the directive string in one
|
|
53
|
+
* call. Returns empty string when no fan-out is indicated.
|
|
54
|
+
*/
|
|
55
|
+
export declare function buildFanoutDirectiveForText(text: string): {
|
|
56
|
+
directive: string;
|
|
57
|
+
report: FanoutSignalReport;
|
|
58
|
+
};
|
|
59
|
+
//# sourceMappingURL=fanout-policy.d.ts.map
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sub-agent fan-out policy for autonomous tasks.
|
|
3
|
+
*
|
|
4
|
+
* Why: even with a small tool surface, a single agent context can fill
|
|
5
|
+
* within a few turns when tool responses are large (Outlook list dumps,
|
|
6
|
+
* web search results, file reads, multi-prospect research). The SDK's
|
|
7
|
+
* autocompact then has nothing to compact and aborts with
|
|
8
|
+
* `rapid_refill_breaker`. The fix matching how Claude Code is designed:
|
|
9
|
+
* spawn sub-agents that each handle a slice of work in their own
|
|
10
|
+
* isolated context and return only a compact summary back to the parent.
|
|
11
|
+
*
|
|
12
|
+
* The Agent tool already exists in the SDK. The problem is timing —
|
|
13
|
+
* agents tend to discover the need for fan-out only after thrashing.
|
|
14
|
+
* This module front-loads the directive: scan the task description for
|
|
15
|
+
* signals that fan-out will be needed, and inject a strong, explicit
|
|
16
|
+
* mandate at the top of the prompt.
|
|
17
|
+
*
|
|
18
|
+
* Two outputs:
|
|
19
|
+
* - buildAlwaysOnParallelizationHint()
|
|
20
|
+
* Short reminder injected into every autonomous prompt. Cheap.
|
|
21
|
+
* - buildFanoutDirective(detectFanoutSignals(text).signals)
|
|
22
|
+
* Stronger, explicit fan-out contract. Only injected when signals
|
|
23
|
+
* indicate the task is genuinely multi-item or broad-scope.
|
|
24
|
+
*/
|
|
25
|
+
/**
|
|
26
|
+
* Detect patterns that strongly predict fan-out is needed. Conservative
|
|
27
|
+
* by design — false positives waste a few hundred tokens per turn; false
|
|
28
|
+
* negatives let the agent thrash. Tune for false positives.
|
|
29
|
+
*/
|
|
30
|
+
export function detectFanoutSignals(text) {
|
|
31
|
+
const signals = [];
|
|
32
|
+
const lower = text.toLowerCase();
|
|
33
|
+
const checks = [
|
|
34
|
+
{
|
|
35
|
+
pattern: 'multi_item_iteration',
|
|
36
|
+
re: /\b(for each|for every|process each|iterate over|loop through|across all|across each)\b/,
|
|
37
|
+
reason: 'task explicitly iterates over multiple items — process them in parallel sub-agents, not one at a time in this conversation',
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
pattern: 'collective_with_quantifier',
|
|
41
|
+
re: /\b(all|every|each)\s+(prospects?|accounts?|leads?|contacts?|customers?|deals?|emails?|messages?|threads?|files?|records?|rows?|tasks?|items?|results?|pages?|articles?|posts?|repos?|repositories|projects?)\b/,
|
|
42
|
+
reason: 'task spans every item in a collection — fan out by batching items across sub-agents',
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
pattern: 'numeric_collection',
|
|
46
|
+
re: /\b\d{2,}\s+(prospects?|accounts?|leads?|contacts?|customers?|deals?|emails?|messages?|threads?|files?|records?|rows?|items?|results?|pages?|articles?)\b/,
|
|
47
|
+
reason: 'task names a numeric count of items (10+) — split into batches of 3-5 per sub-agent',
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
pattern: 'comprehensive_research',
|
|
51
|
+
re: /\b(comprehensive|exhaustive|deep[- ]dive|deep dive|full audit|competitive intel|market map|content intel|brief|landscape|panorama)\b/,
|
|
52
|
+
reason: 'broad-scope research task — each step (news, search, brand, competitor, social) should run in its own sub-agent so the parent context stays clean',
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
pattern: 'broad_scan_or_crawl',
|
|
56
|
+
re: /\b(scan all|crawl|backfill|inventory|migrate|refactor)\b.{0,80}\b(all|entire|every|full)\b/s,
|
|
57
|
+
reason: 'broad scan/crawl — partition by directory, date range, or ID range and fan out per partition',
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
pattern: 'long_history_pull',
|
|
61
|
+
re: /\b(last|past)\s+\d+\s+(days|weeks|months)|\bsince\s+(yesterday|last week|last month)\b/,
|
|
62
|
+
reason: 'pulling a history range that is likely to return many items — sub-agents per day/week chunk',
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
pattern: 'multiple_steps',
|
|
66
|
+
re: /\b(steps?|phases?|stages?)\s*[:0-9]/,
|
|
67
|
+
reason: 'task has explicit multi-step structure — each step in its own sub-agent, parent only sees the step summaries',
|
|
68
|
+
},
|
|
69
|
+
];
|
|
70
|
+
for (const check of checks) {
|
|
71
|
+
if (check.re.test(lower)) {
|
|
72
|
+
signals.push({ pattern: check.pattern, reason: check.reason });
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
return {
|
|
76
|
+
needsFanout: signals.length > 0,
|
|
77
|
+
signals,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Always-on parallelization reminder. Short, designed to ride along in
|
|
82
|
+
* every autonomous prompt without inflating token cost.
|
|
83
|
+
*/
|
|
84
|
+
export function buildAlwaysOnParallelizationHint() {
|
|
85
|
+
return [
|
|
86
|
+
'## Sub-agent fan-out',
|
|
87
|
+
'When you process multiple items, spawn ONE Agent sub-agent per batch of 3–5 items. Sub-agents return ONE-LINE summaries (no raw tool output). Do not iterate sequentially in this conversation — that fills your context and aborts the run.',
|
|
88
|
+
'Cost: pass `model: "haiku"` to Agent for routine extraction, summarization, or per-item lookups. Use Sonnet only when the sub-agent must reason across many sources or write something durable.',
|
|
89
|
+
].join('\n');
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Strong fan-out contract injected when detector matches. Designed to be
|
|
93
|
+
* unambiguous: failing to fan out on these patterns *will* crash the run.
|
|
94
|
+
*/
|
|
95
|
+
export function buildFanoutDirective(signals) {
|
|
96
|
+
if (signals.length === 0)
|
|
97
|
+
return '';
|
|
98
|
+
const reasonLines = signals
|
|
99
|
+
.map((s, i) => `${i + 1}. ${s.reason}`)
|
|
100
|
+
.join('\n');
|
|
101
|
+
return [
|
|
102
|
+
'## Sub-agent fan-out is MANDATORY for this task',
|
|
103
|
+
'',
|
|
104
|
+
'Preflight detected patterns that will fill the context window if you run them sequentially in this conversation:',
|
|
105
|
+
'',
|
|
106
|
+
reasonLines,
|
|
107
|
+
'',
|
|
108
|
+
'### Required pattern',
|
|
109
|
+
'',
|
|
110
|
+
'Use the `Agent` tool to spawn parallel sub-agents. Each sub-agent runs in its own isolated context, so big tool responses live and die there — your context only sees the summary.',
|
|
111
|
+
'',
|
|
112
|
+
'- **Batch size**: 3–5 items per sub-agent (or one slice of work per sub-agent for research tasks)',
|
|
113
|
+
'- **Sub-agent model**: pass `model: "haiku"` to the Agent tool by default — sub-agents that just extract fields, summarize a single email, or pull a single record do not need Sonnet. Reserve Sonnet for sub-agents that must reason across multiple sources or write something durable.',
|
|
114
|
+
'- **Sub-agent prompt MUST include**: the narrow task, the exact return format (e.g. `Return ONE LINE: <id> | <status> | <next-action>`), and an explicit "do not include raw tool output" directive',
|
|
115
|
+
'- **Parent context keeps**: only the sub-agent return strings, not their tool transcripts',
|
|
116
|
+
'',
|
|
117
|
+
'If you anticipate a single tool call returning more than ~5 KB of text (full email lists, web search result pages, large database queries, file dumps), wrap THAT call in an Agent invocation too. The sub-agent runs the tool, extracts only the fields you need, and returns those.',
|
|
118
|
+
'',
|
|
119
|
+
'Failing to fan out on this task will cause the SDK to abort with `rapid_refill_breaker` and the run will be lost.',
|
|
120
|
+
].join('\n');
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Convenience: detect signals and return the directive string in one
|
|
124
|
+
* call. Returns empty string when no fan-out is indicated.
|
|
125
|
+
*/
|
|
126
|
+
export function buildFanoutDirectiveForText(text) {
|
|
127
|
+
const report = detectFanoutSignals(text);
|
|
128
|
+
return {
|
|
129
|
+
directive: buildFanoutDirective(report.signals),
|
|
130
|
+
report,
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
//# sourceMappingURL=fanout-policy.js.map
|
package/dist/agent/toolsets.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export type ToolsetName = 'auto' | 'safe' | 'diagnostic' | 'communications' | 'memory' | 'full';
|
|
1
|
+
export type ToolsetName = 'auto' | 'safe' | 'diagnostic' | 'communications' | 'memory' | 'none' | 'full';
|
|
2
2
|
export interface ToolsetPreset {
|
|
3
3
|
name: ToolsetName;
|
|
4
4
|
label: string;
|
|
@@ -11,4 +11,10 @@ export declare function getToolsetPreset(name: ToolsetName): ToolsetPreset;
|
|
|
11
11
|
export declare function formatToolsetChoices(): string;
|
|
12
12
|
export declare function isRestrictedToolset(name: ToolsetName): boolean;
|
|
13
13
|
export declare function toolsetAllowsLocalWrites(name: ToolsetName): boolean;
|
|
14
|
+
/**
|
|
15
|
+
* "none" toolset: not just restricted, but actively disables ALL tools
|
|
16
|
+
* including the core Clementine MCP server. Used by builder/JSON-gen
|
|
17
|
+
* chats where tool schemas in the system prompt are pure cost overhead.
|
|
18
|
+
*/
|
|
19
|
+
export declare function toolsetDisablesAllTools(name: ToolsetName): boolean;
|
|
14
20
|
//# sourceMappingURL=toolsets.d.ts.map
|
package/dist/agent/toolsets.js
CHANGED
|
@@ -29,6 +29,12 @@ export const TOOLSET_PRESETS = [
|
|
|
29
29
|
description: 'Memory, transcript, and relationship tools only unless explicitly changed.',
|
|
30
30
|
directive: 'Toolset memory: use memory_read, memory_search, memory_recall, transcript_search, working_memory, and user_model. Avoid external integrations and local shell/file writes.',
|
|
31
31
|
},
|
|
32
|
+
{
|
|
33
|
+
name: 'none',
|
|
34
|
+
label: 'None',
|
|
35
|
+
description: 'No tools at all — pure-LLM conversation. Used by builders and JSON-generating chats where tool schemas are dead weight in the system prompt.',
|
|
36
|
+
directive: 'Toolset none: do not call any tools. Respond from the prompt context only. Generate JSON, summaries, or text directly.',
|
|
37
|
+
},
|
|
32
38
|
{
|
|
33
39
|
name: 'full',
|
|
34
40
|
label: 'Full',
|
|
@@ -60,9 +66,17 @@ export function formatToolsetChoices() {
|
|
|
60
66
|
.join('\n');
|
|
61
67
|
}
|
|
62
68
|
export function isRestrictedToolset(name) {
|
|
63
|
-
return name === 'safe' || name === 'diagnostic' || name === 'memory';
|
|
69
|
+
return name === 'safe' || name === 'diagnostic' || name === 'memory' || name === 'none';
|
|
64
70
|
}
|
|
65
71
|
export function toolsetAllowsLocalWrites(name) {
|
|
66
72
|
return name === 'auto' || name === 'full';
|
|
67
73
|
}
|
|
74
|
+
/**
|
|
75
|
+
* "none" toolset: not just restricted, but actively disables ALL tools
|
|
76
|
+
* including the core Clementine MCP server. Used by builder/JSON-gen
|
|
77
|
+
* chats where tool schemas in the system prompt are pure cost overhead.
|
|
78
|
+
*/
|
|
79
|
+
export function toolsetDisablesAllTools(name) {
|
|
80
|
+
return name === 'none';
|
|
81
|
+
}
|
|
68
82
|
//# sourceMappingURL=toolsets.js.map
|
package/dist/cli/dashboard.js
CHANGED
|
@@ -8053,6 +8053,12 @@ If the tool returns nothing or errors, return an empty array \`[]\`.`,
|
|
|
8053
8053
|
}
|
|
8054
8054
|
try {
|
|
8055
8055
|
const gateway = await getGateway();
|
|
8056
|
+
// Builder generates JSON artifacts — no tool calls. Pin the session
|
|
8057
|
+
// toolset to 'none' so buildOptions strips all MCP servers and tool
|
|
8058
|
+
// schemas from the system prompt. Without this, every tiny builder
|
|
8059
|
+
// turn writes 60–280 KB of cache_creation for tool schemas the
|
|
8060
|
+
// model never uses.
|
|
8061
|
+
gateway.setSessionToolset(sessionKey, 'none');
|
|
8056
8062
|
const response = await gateway.handleMessage(sessionKey, enrichedMessage);
|
|
8057
8063
|
// Parse any json-artifact blocks from the response
|
|
8058
8064
|
let artifact = null;
|
|
@@ -8180,6 +8186,9 @@ If the tool returns nothing or errors, return an empty array \`[]\`.`,
|
|
|
8180
8186
|
try {
|
|
8181
8187
|
writeEvent('progress', { status: 'thinking…' });
|
|
8182
8188
|
const gateway = await getGateway();
|
|
8189
|
+
// Builder generates JSON artifacts — no tool calls. Pin to 'none'
|
|
8190
|
+
// toolset so the SDK system prompt drops the tool inventory.
|
|
8191
|
+
gateway.setSessionToolset(sessionKey, 'none');
|
|
8183
8192
|
let lastText = '';
|
|
8184
8193
|
const response = await gateway.handleMessage(sessionKey, enrichedMessage, async (text) => {
|
|
8185
8194
|
lastText = text ?? '';
|
|
@@ -171,7 +171,15 @@ export class AgentHeartbeatScheduler {
|
|
|
171
171
|
catch {
|
|
172
172
|
signals.latestGoalUpdate = '';
|
|
173
173
|
}
|
|
174
|
-
// 3. Latest cron run for any of this agent's crons (file mtime is enough)
|
|
174
|
+
// 3. Latest cron run for any of this agent's crons (file mtime is enough).
|
|
175
|
+
//
|
|
176
|
+
// INFO ONLY — kept in `signals` for the LLM prompt context, but NOT
|
|
177
|
+
// included in the fingerprint hash below. A cron firing is expected
|
|
178
|
+
// background activity, not a "wake up Sonnet" signal. Including it
|
|
179
|
+
// caused every cron run to bump the fingerprint → fire a $1+ Sonnet
|
|
180
|
+
// pass that just confirmed "yep, the cron ran, nothing else to do."
|
|
181
|
+
// Actionable wake-ups are pendingTasks growing and goal-state changes;
|
|
182
|
+
// those still trip the fingerprint below.
|
|
175
183
|
try {
|
|
176
184
|
const runsDir = path.join(this.baseDir, 'cron', 'runs');
|
|
177
185
|
let latestMs = 0;
|
|
@@ -195,8 +203,15 @@ export class AgentHeartbeatScheduler {
|
|
|
195
203
|
catch {
|
|
196
204
|
signals.latestCronRunMs = 0;
|
|
197
205
|
}
|
|
206
|
+
// Fingerprint only includes ACTIONABLE signals. latestCronRunMs is
|
|
207
|
+
// info-only and explicitly excluded.
|
|
208
|
+
const fingerprintSource = {
|
|
209
|
+
slug: signals.slug,
|
|
210
|
+
pendingTasks: signals.pendingTasks,
|
|
211
|
+
latestGoalUpdate: signals.latestGoalUpdate,
|
|
212
|
+
};
|
|
198
213
|
const fingerprint = createHash('sha1')
|
|
199
|
-
.update(JSON.stringify(
|
|
214
|
+
.update(JSON.stringify(fingerprintSource))
|
|
200
215
|
.digest('hex')
|
|
201
216
|
.slice(0, 16);
|
|
202
217
|
return { fingerprint, signals };
|
|
@@ -23,7 +23,7 @@ import { listBackgroundTasks, markDone as markBgTaskDone, markFailed as markBgTa
|
|
|
23
23
|
import { outcomeStatusFromGoalDisposition, recentDecisions, recordDecisionOutcome, } from '../agent/proactive-ledger.js';
|
|
24
24
|
import { formatCreditBlock, getBackgroundCreditBlock, isCreditBalanceError, markBackgroundCreditBlocked, } from './credit-guard.js';
|
|
25
25
|
import { isRunHealthFailure } from './job-health.js';
|
|
26
|
-
import { analyzeLongTaskPreflight, compactLongTaskPreflight, formatLongTaskPromptPrefix, } from './long-task-preflight.js';
|
|
26
|
+
import { analyzeLongTaskPreflight, compactLongTaskPreflight, shouldDowngradeUnleashed, formatLongTaskPromptPrefix, } from './long-task-preflight.js';
|
|
27
27
|
const logger = pino({ name: 'clementine.cron' });
|
|
28
28
|
/** Default timeout for standard cron jobs (10 minutes). */
|
|
29
29
|
const CRON_STANDARD_TIMEOUT_MS = 10 * 60 * 1000;
|
|
@@ -1061,6 +1061,34 @@ export class CronScheduler {
|
|
|
1061
1061
|
// Sonnet runs every job by default. Opus 1M is opt-in: set
|
|
1062
1062
|
// `model: claude-opus-4-7[1m]` in CRON.md per-job, or flip
|
|
1063
1063
|
// CLEMENTINE_1M_CONTEXT_MODE=on for global enable.
|
|
1064
|
+
// ── Auto-downgrade unleashed → standard ────────────────────────
|
|
1065
|
+
// CRON.md `mode: unleashed` is a CEILING, not a floor. If the
|
|
1066
|
+
// job's history shows it's a quiet probe that completes in 1
|
|
1067
|
+
// phase with __NOTHING__ or short output, the multi-phase
|
|
1068
|
+
// wrapper is wasteful overhead — each phase is a fresh SDK
|
|
1069
|
+
// query with full system prompt + tool schemas in cache_creation.
|
|
1070
|
+
// For a "did anything new come in?" cron firing every 2 hours,
|
|
1071
|
+
// that's 12+ unleashed runs/day at ~$1/each instead of standard
|
|
1072
|
+
// mode at ~$0.05/each.
|
|
1073
|
+
if (job.mode === 'unleashed') {
|
|
1074
|
+
const downgrade = shouldDowngradeUnleashed(this.runLog.readRecent(job.name, 5));
|
|
1075
|
+
if (downgrade.downgrade) {
|
|
1076
|
+
job = { ...job, mode: 'standard' };
|
|
1077
|
+
logger.info({
|
|
1078
|
+
job: job.name,
|
|
1079
|
+
reason: downgrade.reason,
|
|
1080
|
+
quietRatio: downgrade.quietRatio,
|
|
1081
|
+
avgDurationMs: downgrade.avgDurationMs,
|
|
1082
|
+
}, 'Cron mode downgraded unleashed → standard based on run history');
|
|
1083
|
+
this.logAutonomy('mode_downgrade', job, {
|
|
1084
|
+
from: 'unleashed',
|
|
1085
|
+
to: 'standard',
|
|
1086
|
+
reason: downgrade.reason,
|
|
1087
|
+
quietRatio: downgrade.quietRatio,
|
|
1088
|
+
avgDurationMs: downgrade.avgDurationMs,
|
|
1089
|
+
});
|
|
1090
|
+
}
|
|
1091
|
+
}
|
|
1064
1092
|
let longTaskPreflight;
|
|
1065
1093
|
const preflight = analyzeLongTaskPreflight(job, jobPrompt, this.runLog.readRecent(job.name, 5));
|
|
1066
1094
|
if (preflight.risk !== 'normal') {
|
|
@@ -15,6 +15,15 @@ export interface LongTaskPreflightOptions {
|
|
|
15
15
|
opusModel?: string;
|
|
16
16
|
sonnetModel?: string;
|
|
17
17
|
}
|
|
18
|
+
export interface UnleashedDowngradeDecision {
|
|
19
|
+
downgrade: boolean;
|
|
20
|
+
reason: string;
|
|
21
|
+
/** Quiet ratio observed in the sample (for telemetry). */
|
|
22
|
+
quietRatio?: number;
|
|
23
|
+
/** Average duration of recent runs in ms (for telemetry). */
|
|
24
|
+
avgDurationMs?: number;
|
|
25
|
+
}
|
|
26
|
+
export declare function shouldDowngradeUnleashed(recentRuns: CronRunEntry[], now?: number): UnleashedDowngradeDecision;
|
|
18
27
|
export declare function analyzeLongTaskPreflight(job: CronJobDefinition, prompt: string, recentRuns?: CronRunEntry[], opts?: LongTaskPreflightOptions): LongTaskPreflightDecision;
|
|
19
28
|
export declare function formatLongTaskPromptPrefix(decision: LongTaskPreflightDecision): string;
|
|
20
29
|
export declare function compactLongTaskPreflight(decision: LongTaskPreflightDecision): LongTaskPreflightSnapshot;
|
|
@@ -48,6 +48,88 @@ function recentContextFailures(recentRuns, now = Date.now()) {
|
|
|
48
48
|
}
|
|
49
49
|
return [...new Set(reasons)];
|
|
50
50
|
}
|
|
51
|
+
// ── Auto-downgrade unleashed → standard for quiet/probe jobs ──────────
|
|
52
|
+
//
|
|
53
|
+
// `mode: unleashed` wraps a job in multi-phase machinery: each phase is a
|
|
54
|
+
// fresh SDK query with the full system prompt + tool schemas, and the
|
|
55
|
+
// orchestrator chains phases until TASK_COMPLETE or max-phases. That
|
|
56
|
+
// machinery is essential for genuinely-long tasks (sasha briefs, market
|
|
57
|
+
// outreach), but it's pure overhead on quiet probe jobs that finish in
|
|
58
|
+
// 1 phase with `__NOTHING__` or a short output.
|
|
59
|
+
//
|
|
60
|
+
// Detect that pattern from history and downgrade the next run to
|
|
61
|
+
// standard mode. Single SDK call, single cache write, fraction of the
|
|
62
|
+
// cost. The user's CRON.md `mode: unleashed` becomes a "ceiling" rather
|
|
63
|
+
// than a forced floor — actual mode chosen dynamically per-run.
|
|
64
|
+
//
|
|
65
|
+
// Conservative by design: requires 3+ prior runs of evidence, refuses
|
|
66
|
+
// to downgrade if any recent run hit context overflow (the unleashed
|
|
67
|
+
// wrapper might be actively saving us), and only triggers on jobs that
|
|
68
|
+
// historically complete fast with short or empty output.
|
|
69
|
+
const UNLEASHED_DOWNGRADE_SAMPLE_SIZE = 5;
|
|
70
|
+
const UNLEASHED_DOWNGRADE_MIN_HISTORY = 3;
|
|
71
|
+
const UNLEASHED_DOWNGRADE_QUIET_RATIO = 0.6;
|
|
72
|
+
const UNLEASHED_DOWNGRADE_MAX_DURATION_MS = 90_000;
|
|
73
|
+
const UNLEASHED_DOWNGRADE_AVG_DURATION_MS = 60_000;
|
|
74
|
+
const UNLEASHED_DOWNGRADE_QUIET_PREVIEW_CHARS = 200;
|
|
75
|
+
export function shouldDowngradeUnleashed(recentRuns, now = Date.now()) {
|
|
76
|
+
const sample = recentRuns
|
|
77
|
+
.slice(0, UNLEASHED_DOWNGRADE_SAMPLE_SIZE)
|
|
78
|
+
.filter(r => r.status === 'ok' || r.status === 'error');
|
|
79
|
+
if (sample.length < UNLEASHED_DOWNGRADE_MIN_HISTORY) {
|
|
80
|
+
return { downgrade: false, reason: 'insufficient_history' };
|
|
81
|
+
}
|
|
82
|
+
// Refuse to downgrade if any recent run hit a context-window failure —
|
|
83
|
+
// the unleashed multi-phase wrapper might be the only thing keeping
|
|
84
|
+
// this job from thrashing on a single huge SDK query. Pair this guard
|
|
85
|
+
// with the existing fanout-policy directive (1.18.35) so by the next
|
|
86
|
+
// few runs the agent has learned to fan out and the wrapper can be
|
|
87
|
+
// shed safely.
|
|
88
|
+
const cutoff = now - RECENT_CONTEXT_FAILURE_WINDOW_MS;
|
|
89
|
+
const hadOverflow = sample.some(r => {
|
|
90
|
+
const startedMs = Date.parse(r.startedAt);
|
|
91
|
+
if (!Number.isFinite(startedMs) || startedMs < cutoff)
|
|
92
|
+
return false;
|
|
93
|
+
return r.terminalReason === 'rapid_refill_breaker'
|
|
94
|
+
|| r.terminalReason === 'prompt_too_long';
|
|
95
|
+
});
|
|
96
|
+
if (hadOverflow) {
|
|
97
|
+
return { downgrade: false, reason: 'recent_context_overflow_protect_unleashed' };
|
|
98
|
+
}
|
|
99
|
+
// Quiet pattern: most recent runs returned __NOTHING__ or a short
|
|
100
|
+
// output. These jobs don't need multi-phase orchestration.
|
|
101
|
+
const quietCount = sample.filter(r => {
|
|
102
|
+
const preview = (r.outputPreview ?? '').trim();
|
|
103
|
+
if (!preview)
|
|
104
|
+
return false;
|
|
105
|
+
if (/__nothing__/i.test(preview))
|
|
106
|
+
return true;
|
|
107
|
+
return preview.length < UNLEASHED_DOWNGRADE_QUIET_PREVIEW_CHARS;
|
|
108
|
+
}).length;
|
|
109
|
+
const quietRatio = quietCount / sample.length;
|
|
110
|
+
if (quietRatio >= UNLEASHED_DOWNGRADE_QUIET_RATIO) {
|
|
111
|
+
return {
|
|
112
|
+
downgrade: true,
|
|
113
|
+
reason: `quiet_pattern_${Math.round(quietRatio * 100)}pct`,
|
|
114
|
+
quietRatio,
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
// Fast-completion pattern: every run finishes well under the standard
|
|
118
|
+
// cron timeout, average is short. Multi-phase wrapper is overhead.
|
|
119
|
+
const durations = sample.map(r => r.durationMs || 0).filter(d => d > 0);
|
|
120
|
+
if (durations.length === sample.length) {
|
|
121
|
+
const avgDuration = durations.reduce((a, b) => a + b, 0) / durations.length;
|
|
122
|
+
const allFast = durations.every(d => d < UNLEASHED_DOWNGRADE_MAX_DURATION_MS);
|
|
123
|
+
if (allFast && avgDuration < UNLEASHED_DOWNGRADE_AVG_DURATION_MS) {
|
|
124
|
+
return {
|
|
125
|
+
downgrade: true,
|
|
126
|
+
reason: `fast_completion_avg_${Math.round(avgDuration / 1000)}s`,
|
|
127
|
+
avgDurationMs: Math.round(avgDuration),
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
return { downgrade: false, reason: 'workload_warrants_unleashed' };
|
|
132
|
+
}
|
|
51
133
|
function classifyRisk(args) {
|
|
52
134
|
const { inputTokens, projectedTokens, signalCount, recentContextIssue, job, oneMillionAvailable } = args;
|
|
53
135
|
if (inputTokens >= 185_000)
|