clementine-agent 1.18.172 → 1.18.173
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/run-agent.js +52 -7
- package/dist/agent/run-skill.d.ts +7 -0
- package/dist/agent/run-skill.js +225 -19
- package/dist/agent/tool-call-dedup.d.ts +93 -0
- package/dist/agent/tool-call-dedup.js +168 -0
- package/dist/cli/dashboard.js +25 -1
- package/package.json +1 -1
package/dist/agent/run-agent.js
CHANGED
|
@@ -86,6 +86,7 @@ export function invalidateMcpStatusEntry(name) {
|
|
|
86
86
|
}
|
|
87
87
|
import { BASE_DIR, PKG_DIR, CLAUDE_CODE_OAUTH_TOKEN, ANTHROPIC_API_KEY as CONFIG_ANTHROPIC_API_KEY, normalizeClaudeSdkOptionsForOneMillionContext, TOOL_OUTPUT_GUARD, } from '../config.js';
|
|
88
88
|
import { buildGuardHooks } from './tool-output-guard.js';
|
|
89
|
+
import { buildDedupHook } from './tool-call-dedup.js';
|
|
89
90
|
import { buildAgentMap } from './agent-definitions.js';
|
|
90
91
|
import { buildExecutionToolPolicy, } from './execution-policy.js';
|
|
91
92
|
const MCP_SERVER_SCRIPT = path.join(PKG_DIR, 'dist', 'tools', 'mcp-server.js');
|
|
@@ -196,13 +197,20 @@ export async function runAgent(prompt, opts) {
|
|
|
196
197
|
? requestedBudget
|
|
197
198
|
: undefined;
|
|
198
199
|
const startedAt = Date.now();
|
|
199
|
-
// Build the AgentDefinition map.
|
|
200
|
-
//
|
|
201
|
-
|
|
200
|
+
// Build the AgentDefinition map.
|
|
201
|
+
// - Default: planner/researcher/cron-fixer + hired-agent profiles.
|
|
202
|
+
// - Caller-supplied agents (opts.agents) MERGE over the defaults rather
|
|
203
|
+
// than REPLACE them (1.18.173). `runSkill`'s auto-delegation path
|
|
204
|
+
// needs to inject a per-run `skill-worker` definition while keeping
|
|
205
|
+
// the planner/researcher/etc. available for deeper delegation.
|
|
206
|
+
// Tests that want a fully isolated map pass an explicit override
|
|
207
|
+
// via the `replaceAgents` option below.
|
|
208
|
+
const defaultAgents = buildAgentMap({
|
|
202
209
|
profileManager: opts.agentManager ?? undefined,
|
|
203
210
|
isAutonomous: source === 'cron' || source === 'heartbeat',
|
|
204
211
|
activeAgentSlug: opts.profile?.slug,
|
|
205
212
|
});
|
|
213
|
+
const agents = opts.agents ? { ...defaultAgents, ...opts.agents } : defaultAgents;
|
|
206
214
|
// Wrap prompt to direct Claude to a specific subagent when caller asks.
|
|
207
215
|
// Per SDK docs: explicit invocation = "Use the X agent to..."
|
|
208
216
|
const effectivePrompt = opts.forceSubagent && agents[opts.forceSubagent]
|
|
@@ -341,6 +349,34 @@ export async function runAgent(prompt, opts) {
|
|
|
341
349
|
},
|
|
342
350
|
})
|
|
343
351
|
: { hooks: {}, stats: { inspected: 0, compressed: 0, ceilingHits: 0, bytesShed: 0, compactions: 0 } };
|
|
352
|
+
// ── Tool-call dedup hook (1.18.173) ─────────────────────────────────
|
|
353
|
+
// Breaks the "re-fetch after compaction" loop that crashed the
|
|
354
|
+
// imessage-triage cron on 2026-05-11 (4× identical tool calls →
|
|
355
|
+
// SDK autocompact-thrashing abort). PreToolUse hook detects same
|
|
356
|
+
// (toolName, inputHash) within 60s: 2nd call gets a soft hint, 3rd+
|
|
357
|
+
// is denied so the model can't burn turns re-calling the same data.
|
|
358
|
+
// Defense-in-depth — the cleaner fix (delegating to a subagent so the
|
|
359
|
+
// parent never re-fetches in the first place) lives in run-skill.ts.
|
|
360
|
+
const dedup = buildDedupHook({
|
|
361
|
+
runId,
|
|
362
|
+
onDecision: (info) => {
|
|
363
|
+
if (info.decision === 'allow')
|
|
364
|
+
return;
|
|
365
|
+
writeEvent({
|
|
366
|
+
kind: 'error',
|
|
367
|
+
ts: new Date().toISOString(),
|
|
368
|
+
sessionId,
|
|
369
|
+
toolError: `_clementine_dedup:${info.decision} ${info.toolName} call#${info.callCount} @${info.sinceFirstMs}ms`,
|
|
370
|
+
});
|
|
371
|
+
},
|
|
372
|
+
});
|
|
373
|
+
// Merge hook maps from the two modules. SDK accepts arrays of
|
|
374
|
+
// HookCallbackMatcher per event; we concatenate.
|
|
375
|
+
const mergedHooks = { ...guard.hooks };
|
|
376
|
+
for (const [evt, matchers] of Object.entries(dedup.hooks)) {
|
|
377
|
+
const existing = mergedHooks[evt] ?? [];
|
|
378
|
+
mergedHooks[evt] = [...existing, ...matchers];
|
|
379
|
+
}
|
|
344
380
|
// Apply 1M-context env normalization (existing infra)
|
|
345
381
|
const sdkOptionsRaw = {
|
|
346
382
|
systemPrompt: profileAppend
|
|
@@ -380,10 +416,11 @@ export async function runAgent(prompt, opts) {
|
|
|
380
416
|
...(opts.additionalDirectories && opts.additionalDirectories.length > 0
|
|
381
417
|
? { additionalDirectories: opts.additionalDirectories }
|
|
382
418
|
: {}),
|
|
383
|
-
// 1.18.169 — install the tool-output guard hooks.
|
|
384
|
-
//
|
|
385
|
-
//
|
|
386
|
-
|
|
419
|
+
// 1.18.169 — install the tool-output guard hooks.
|
|
420
|
+
// 1.18.173 — merged with the tool-call dedup hooks (PreToolUse).
|
|
421
|
+
// SDK types accept `hooks` keyed by HookEvent; the empty object is
|
|
422
|
+
// a no-op when both guards are disabled.
|
|
423
|
+
...(Object.keys(mergedHooks).length > 0 ? { hooks: mergedHooks } : {}),
|
|
387
424
|
};
|
|
388
425
|
const sdkOptions = normalizeClaudeSdkOptionsForOneMillionContext(sdkOptionsRaw);
|
|
389
426
|
logger.info({
|
|
@@ -640,6 +677,14 @@ export async function runAgent(prompt, opts) {
|
|
|
640
677
|
compactions: guard.stats.compactions,
|
|
641
678
|
ceilingHits: guard.stats.ceilingHits,
|
|
642
679
|
} : undefined,
|
|
680
|
+
// 1.18.173 — tool-call dedup summary. Non-zero warned/blocked means
|
|
681
|
+
// the model tried to re-fetch identical data (typically a
|
|
682
|
+
// post-compaction refetch loop).
|
|
683
|
+
dedup: dedup.stats.inspected > 0 ? {
|
|
684
|
+
inspected: dedup.stats.inspected,
|
|
685
|
+
warned: dedup.stats.warned,
|
|
686
|
+
blocked: dedup.stats.blocked,
|
|
687
|
+
} : undefined,
|
|
643
688
|
}, 'runAgent: query complete');
|
|
644
689
|
// PRD §6 Phase 4e: subagent transcript backfill (Path C). The SDK persists
|
|
645
690
|
// every subagent's full message stream to ~/.claude/projects/<encoded-cwd>/
|
|
@@ -145,6 +145,13 @@ export declare function buildSkillPrompt(skill: Skill, inputs: Record<string, st
|
|
|
145
145
|
* After the SDK returns, `clementine.success.schema` (when set) is
|
|
146
146
|
* ajv-validated against the response.
|
|
147
147
|
*
|
|
148
|
+
* **Autonomous runs (1.18.173)**: When `source` is one of
|
|
149
|
+
* AUTONOMOUS_SOURCES, the skill runs through the auto-delegating
|
|
150
|
+
* wrapper: a thin parent dispatches to a `skill-worker` subagent which
|
|
151
|
+
* does all the work in its own context. Closes the
|
|
152
|
+
* "refetch-after-compaction" loop class permanently. Skills can opt out
|
|
153
|
+
* via frontmatter `clementine.execution.inline: true`.
|
|
154
|
+
*
|
|
148
155
|
* This function never throws — failures (skill not found, SDK error,
|
|
149
156
|
* timeout) are returned as `{ ok: false, error }`. The caller (chat,
|
|
150
157
|
* cron, sub-agent, MCP tool) decides how to surface that.
|
package/dist/agent/run-skill.js
CHANGED
|
@@ -28,6 +28,7 @@ import path from 'node:path';
|
|
|
28
28
|
import pino from 'pino';
|
|
29
29
|
import { getSkill } from './skill-store.js';
|
|
30
30
|
import { runAgent } from './run-agent.js';
|
|
31
|
+
import { MODELS } from '../config.js';
|
|
31
32
|
const logger = pino({ name: 'clementine.run-skill' });
|
|
32
33
|
// ── Mustache substitution ─────────────────────────────────────────────
|
|
33
34
|
/** Matches `{{var_name}}` with optional whitespace. var_name is
|
|
@@ -183,6 +184,133 @@ async function validateSkillOutput(output, schema) {
|
|
|
183
184
|
return { tried: true, pass: false, errors: [`schema compile error: ${err}`] };
|
|
184
185
|
}
|
|
185
186
|
}
|
|
187
|
+
// ── Autonomous delegation (1.18.173) ──────────────────────────────────
|
|
188
|
+
/**
|
|
189
|
+
* Sources whose runs should default to the auto-delegating wrapper.
|
|
190
|
+
* In autonomous mode the parent agent immediately dispatches the entire
|
|
191
|
+
* skill body to a `skill-worker` subagent via the Agent tool. That keeps
|
|
192
|
+
* the parent's context tiny (no tool results ever land in it) so the SDK
|
|
193
|
+
* never has to compact mid-run, and post-compaction "refetch loops"
|
|
194
|
+
* become impossible — the parent never had the data to lose.
|
|
195
|
+
*
|
|
196
|
+
* Interactive sources ('chat', 'skill' invoked directly by a chat user)
|
|
197
|
+
* stay on the inline path: the user is waiting on output and the extra
|
|
198
|
+
* subagent dispatch latency is a worse UX tradeoff than the small
|
|
199
|
+
* compaction risk on a single conversational turn.
|
|
200
|
+
*/
|
|
201
|
+
const AUTONOMOUS_SOURCES = new Set([
|
|
202
|
+
'cron',
|
|
203
|
+
'scheduled-skill',
|
|
204
|
+
'heartbeat',
|
|
205
|
+
'team-task',
|
|
206
|
+
]);
|
|
207
|
+
/**
|
|
208
|
+
* Decide whether a runSkill call should use the auto-delegating
|
|
209
|
+
* (subagent) wrapper. Skills can opt out via frontmatter
|
|
210
|
+
* `clementine.execution.inline: true` for procedures the author has
|
|
211
|
+
* verified fit cleanly in one context (e.g., a 2-line script call).
|
|
212
|
+
*/
|
|
213
|
+
function shouldAutoDelegate(skill, source) {
|
|
214
|
+
if (!AUTONOMOUS_SOURCES.has(source))
|
|
215
|
+
return false;
|
|
216
|
+
const execMode = skill.frontmatter?.clementine?.execution?.inline;
|
|
217
|
+
if (execMode === true)
|
|
218
|
+
return false;
|
|
219
|
+
return true;
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
222
|
+
* Resolve the model string to use for an autonomous run. The 1M-context
|
|
223
|
+
* variant gives the worker subagent 5× the room of the standard 200K
|
|
224
|
+
* window — enough headroom that compaction is rare and the
|
|
225
|
+
* "refetch-after-compact" loop pattern (seen in the 2026-05-11
|
|
226
|
+
* imessage-triage failures) never occurs in practice.
|
|
227
|
+
*
|
|
228
|
+
* The actual 1M routing is gated by the user's plan (see
|
|
229
|
+
* config.ts:usesOneMillionContext) and the model family — Haiku doesn't
|
|
230
|
+
* support 1M, and Sonnet 1M needs the [1m] suffix. We return the full
|
|
231
|
+
* Sonnet model ID with [1m] appended; downstream
|
|
232
|
+
* normalizeClaudeSdkOptionsForOneMillionContext strips it back off when
|
|
233
|
+
* the plan doesn't support it.
|
|
234
|
+
*/
|
|
235
|
+
function resolveAutonomousModel(explicitModel, skillModel) {
|
|
236
|
+
// Caller's explicit model wins.
|
|
237
|
+
if (explicitModel)
|
|
238
|
+
return explicitModel;
|
|
239
|
+
// Skill-declared model wins next.
|
|
240
|
+
if (skillModel)
|
|
241
|
+
return skillModel;
|
|
242
|
+
// Default: Sonnet [1m]. The normalizer will strip [1m] if the user's
|
|
243
|
+
// plan doesn't include it, falling back to standard Sonnet — still
|
|
244
|
+
// works, just with less headroom.
|
|
245
|
+
const base = MODELS.sonnet;
|
|
246
|
+
if (!base)
|
|
247
|
+
return undefined;
|
|
248
|
+
if (/\[1m\]/i.test(base))
|
|
249
|
+
return base;
|
|
250
|
+
return `${base}[1m]`;
|
|
251
|
+
}
|
|
252
|
+
/**
|
|
253
|
+
* Build the AgentDefinition for the `skill-worker` subagent that
|
|
254
|
+
* executes this skill in an isolated context. The subagent's system
|
|
255
|
+
* prompt is the skill body; its tools are the skill's computed
|
|
256
|
+
* allowlist; its model is the same 1M-context model the parent uses
|
|
257
|
+
* (the worker is where the real data flows — the parent stays tiny).
|
|
258
|
+
*
|
|
259
|
+
* `description` is what the SDK shows the parent for routing decisions.
|
|
260
|
+
* Since the parent is `forceSubagent`'d to this worker, the description
|
|
261
|
+
* mostly serves as transcript context.
|
|
262
|
+
*/
|
|
263
|
+
function buildSkillWorkerAgent(skill, effectiveTools, model, workerMaxTurns) {
|
|
264
|
+
const def = {
|
|
265
|
+
description: `Executes the "${skill.frontmatter.name}" scheduled skill end-to-end in an isolated context window. ` +
|
|
266
|
+
`Reads any data the skill needs, processes it, performs the skill's described delivery action ` +
|
|
267
|
+
`(e.g., sends a Discord/Slack notification), and returns a concise summary to the orchestrator.`,
|
|
268
|
+
prompt: `You are the worker subagent for the "${skill.frontmatter.name}" scheduled skill.\n\n` +
|
|
269
|
+
`Your job is to execute the procedure below from start to finish in a single subagent run. ` +
|
|
270
|
+
`You have your own isolated context window — do NOT save state for a parent agent; if the ` +
|
|
271
|
+
`procedure calls for sending a notification, YOU send it (you have the relevant tools).\n\n` +
|
|
272
|
+
`Return a single concise final response describing what happened (e.g., "Sent Discord DM about ` +
|
|
273
|
+
`2 actionable items, ignored 8 spam"). Do not return raw tool output; do not narrate every step. ` +
|
|
274
|
+
`If nothing actionable was found and the procedure says exit silently, return "No action needed."\n\n` +
|
|
275
|
+
`## Procedure\n\n${skill.body}`,
|
|
276
|
+
tools: effectiveTools,
|
|
277
|
+
// SDK accepts 'sonnet' / 'opus' / 'haiku' tier aliases OR full model
|
|
278
|
+
// IDs. We pass the full ID with [1m] when present; the SDK strips
|
|
279
|
+
// [1m] internally for plans that don't support it.
|
|
280
|
+
...(model ? { model } : {}),
|
|
281
|
+
effort: 'medium',
|
|
282
|
+
maxTurns: workerMaxTurns,
|
|
283
|
+
};
|
|
284
|
+
return def;
|
|
285
|
+
}
|
|
286
|
+
/**
|
|
287
|
+
* Build the parent orchestrator's prompt. The parent has exactly one
|
|
288
|
+
* job: dispatch to `skill-worker` via the Agent tool and relay its
|
|
289
|
+
* return. Keeping this prompt under ~600 bytes is important — the
|
|
290
|
+
* parent's context grows by the parent prompt + the worker's final
|
|
291
|
+
* return text (typically <2KB). Total parent context per run: ~3KB.
|
|
292
|
+
* Well below any compaction threshold even on a 200K-window model.
|
|
293
|
+
*/
|
|
294
|
+
function buildOrchestratorPrompt(skill, callerContext) {
|
|
295
|
+
const parts = [
|
|
296
|
+
`## Scheduled Skill Execution`,
|
|
297
|
+
``,
|
|
298
|
+
`Dispatch the "${skill.frontmatter.name}" skill to the \`skill-worker\` subagent via the Agent tool.`,
|
|
299
|
+
`The worker has the skill body as its system prompt and the tools required to perform the procedure end-to-end (including any notification delivery).`,
|
|
300
|
+
``,
|
|
301
|
+
`## Your job`,
|
|
302
|
+
``,
|
|
303
|
+
`1. Call the Agent tool ONCE, dispatching to "skill-worker" with this brief: "Execute the ${skill.frontmatter.name} procedure now."`,
|
|
304
|
+
`2. Wait for its return.`,
|
|
305
|
+
`3. Relay its summary as your final response — do not add commentary, do not re-do its work.`,
|
|
306
|
+
``,
|
|
307
|
+
`Do NOT call any other tools directly. The worker handles all data access and delivery.`,
|
|
308
|
+
];
|
|
309
|
+
if (callerContext && callerContext.trim()) {
|
|
310
|
+
parts.push('', '## Caller context (forward this to the worker if relevant)', '', callerContext.trim());
|
|
311
|
+
}
|
|
312
|
+
return parts.join('\n');
|
|
313
|
+
}
|
|
186
314
|
// ── The primitive ─────────────────────────────────────────────────────
|
|
187
315
|
/**
|
|
188
316
|
* Run a skill as a hard-allowlisted sub-call. Returns a structured result.
|
|
@@ -194,6 +322,13 @@ async function validateSkillOutput(output, schema) {
|
|
|
194
322
|
* After the SDK returns, `clementine.success.schema` (when set) is
|
|
195
323
|
* ajv-validated against the response.
|
|
196
324
|
*
|
|
325
|
+
* **Autonomous runs (1.18.173)**: When `source` is one of
|
|
326
|
+
* AUTONOMOUS_SOURCES, the skill runs through the auto-delegating
|
|
327
|
+
* wrapper: a thin parent dispatches to a `skill-worker` subagent which
|
|
328
|
+
* does all the work in its own context. Closes the
|
|
329
|
+
* "refetch-after-compaction" loop class permanently. Skills can opt out
|
|
330
|
+
* via frontmatter `clementine.execution.inline: true`.
|
|
331
|
+
*
|
|
197
332
|
* This function never throws — failures (skill not found, SDK error,
|
|
198
333
|
* timeout) are returned as `{ ok: false, error }`. The caller (chat,
|
|
199
334
|
* cron, sub-agent, MCP tool) decides how to surface that.
|
|
@@ -212,7 +347,17 @@ export async function runSkill(name, options = {}) {
|
|
|
212
347
|
}
|
|
213
348
|
const effectiveTools = computeSkillAllowlist(skill);
|
|
214
349
|
const hasExplicitToolScope = skillHasExplicitToolScope(skill);
|
|
215
|
-
const
|
|
350
|
+
const source = options.source ?? 'skill';
|
|
351
|
+
// 1.18.173: autonomous runs (cron, scheduled-skill, heartbeat,
|
|
352
|
+
// team-task) wrap the skill in a thin orchestrator that dispatches
|
|
353
|
+
// the entire procedure to a `skill-worker` subagent. The parent's
|
|
354
|
+
// context never grows past ~3KB regardless of how much data the
|
|
355
|
+
// skill reads, so post-compaction refetch loops are structurally
|
|
356
|
+
// impossible. See shouldAutoDelegate / buildSkillWorkerAgent above.
|
|
357
|
+
const autoDelegate = shouldAutoDelegate(skill, source);
|
|
358
|
+
const prompt = autoDelegate
|
|
359
|
+
? buildOrchestratorPrompt(skill, options.context)
|
|
360
|
+
: buildSkillPrompt(skill, options.inputs, options.context);
|
|
216
361
|
const limits = skill.frontmatter?.clementine?.limits;
|
|
217
362
|
const maxTurns = options.maxTurns ?? limits?.maxTurns;
|
|
218
363
|
const maxBudgetUsd = options.maxBudgetUsd ?? limits?.maxBudgetUsd;
|
|
@@ -225,6 +370,14 @@ export async function runSkill(name, options = {}) {
|
|
|
225
370
|
...(skill.layout === 'folder' ? [path.dirname(skill.filePath)] : []),
|
|
226
371
|
];
|
|
227
372
|
const mutatingSkill = effectiveTools.some((t) => t === 'Write' || t === 'Edit' || t === 'Bash' || /__(write|edit|update|create|delete|send|post|patch|set)/i.test(t));
|
|
373
|
+
// 1.18.173: resolve the effective model. Autonomous runs default to
|
|
374
|
+
// Sonnet [1m] (1M context window) so the worker subagent has 5× the
|
|
375
|
+
// room of a standard 200K-window model. resolveAutonomousModel honors
|
|
376
|
+
// explicit overrides + skill-declared limits.model first.
|
|
377
|
+
const skillModel = skill.frontmatter?.clementine?.limits?.model;
|
|
378
|
+
const effectiveModel = autoDelegate
|
|
379
|
+
? resolveAutonomousModel(options.model, skillModel)
|
|
380
|
+
: (options.model ?? skillModel);
|
|
228
381
|
logger.info({
|
|
229
382
|
skill: name,
|
|
230
383
|
tools: effectiveTools,
|
|
@@ -232,6 +385,9 @@ export async function runSkill(name, options = {}) {
|
|
|
232
385
|
maxBudgetUsd,
|
|
233
386
|
inputKeys: Object.keys(options.inputs ?? {}),
|
|
234
387
|
hasContext: !!options.context,
|
|
388
|
+
autoDelegate,
|
|
389
|
+
model: effectiveModel,
|
|
390
|
+
source,
|
|
235
391
|
}, 'runSkill: invoking');
|
|
236
392
|
let runResult;
|
|
237
393
|
try {
|
|
@@ -245,24 +401,74 @@ export async function runSkill(name, options = {}) {
|
|
|
245
401
|
].filter(Boolean).join('\n\n'),
|
|
246
402
|
profile: options.profile,
|
|
247
403
|
});
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
404
|
+
// ── Autonomous-delegation branch (1.18.173) ──────────────────────
|
|
405
|
+
// Parent: minimal allowedTools (Agent only) + forceSubagent to
|
|
406
|
+
// skill-worker. Worker: full tool surface + skill body as system
|
|
407
|
+
// prompt. Worker is the SDK AgentDefinition; the SDK wires its
|
|
408
|
+
// tools/model/prompt at query time.
|
|
409
|
+
let sdkOpts;
|
|
410
|
+
if (autoDelegate) {
|
|
411
|
+
// Worker gets enough turns to complete bulk work (skill author's
|
|
412
|
+
// maxTurns cap, or 30 as a safe default for triage-class work).
|
|
413
|
+
const workerMaxTurns = (typeof maxTurns === 'number' && maxTurns > 0) ? maxTurns : 30;
|
|
414
|
+
const workerDef = buildSkillWorkerAgent(skill, effectiveTools, effectiveModel, workerMaxTurns);
|
|
415
|
+
sdkOpts = {
|
|
416
|
+
sessionKey,
|
|
417
|
+
source,
|
|
418
|
+
// Parent's allowedTools: ONLY Agent (delegate-or-fail). Keeps
|
|
419
|
+
// the parent's context shape predictable and prevents it from
|
|
420
|
+
// doing data-heavy work itself even if the LLM disagrees.
|
|
421
|
+
allowedTools: ['Agent'],
|
|
422
|
+
// Force-routing: SDK wraps the prompt with "Use the skill-worker
|
|
423
|
+
// agent to handle this request" so dispatch is the natural
|
|
424
|
+
// first action.
|
|
425
|
+
forceSubagent: 'skill-worker',
|
|
426
|
+
// Inject the skill-worker into the agents map. runAgent merges
|
|
427
|
+
// its `buildAgentMap()` defaults with whatever's passed via
|
|
428
|
+
// opts.agents — see run-agent.ts:362.
|
|
429
|
+
agents: { 'skill-worker': workerDef },
|
|
430
|
+
profile: options.profile,
|
|
431
|
+
agentManager: options.agentManager,
|
|
432
|
+
memoryStore: options.memoryStore,
|
|
433
|
+
cwd: options.projectWorkDir,
|
|
434
|
+
extraMcpServers: mcp.servers,
|
|
435
|
+
enableFileCheckpointing: mutatingSkill || Boolean(options.projectWorkDir),
|
|
436
|
+
// Parent uses the same model family so MCP server reuse is clean
|
|
437
|
+
// (the SDK keys some cache state by model). Parent turns are
|
|
438
|
+
// tightly capped: it should dispatch and relay in ≤3 turns.
|
|
439
|
+
...(effectiveModel ? { model: effectiveModel } : {}),
|
|
440
|
+
maxTurns: 5,
|
|
441
|
+
...(typeof maxBudgetUsd === 'number' ? { maxBudgetUsd } : {}),
|
|
442
|
+
...(additionalDirectories.length > 0 ? { additionalDirectories } : {}),
|
|
443
|
+
...(options.onText ? { onText: options.onText } : {}),
|
|
444
|
+
...(options.abortSignal ? { abortSignal: options.abortSignal } : {}),
|
|
445
|
+
};
|
|
446
|
+
}
|
|
447
|
+
else {
|
|
448
|
+
// ── Inline branch (interactive / opt-out skills) ────────────────
|
|
449
|
+
// Original 1.18.162 behavior — the SDK call runs the skill body
|
|
450
|
+
// directly as the main-agent prompt. Used for chat-invoked skills
|
|
451
|
+
// where the latency of a subagent dispatch is worse UX than the
|
|
452
|
+
// small compaction risk.
|
|
453
|
+
const allowedToolsForRun = hasExplicitToolScope ? effectiveTools : undefined;
|
|
454
|
+
sdkOpts = {
|
|
455
|
+
sessionKey,
|
|
456
|
+
source,
|
|
457
|
+
...(allowedToolsForRun ? { allowedTools: allowedToolsForRun } : {}),
|
|
458
|
+
profile: options.profile,
|
|
459
|
+
agentManager: options.agentManager,
|
|
460
|
+
memoryStore: options.memoryStore,
|
|
461
|
+
cwd: options.projectWorkDir,
|
|
462
|
+
extraMcpServers: mcp.servers,
|
|
463
|
+
enableFileCheckpointing: mutatingSkill || Boolean(options.projectWorkDir),
|
|
464
|
+
...(effectiveModel ? { model: effectiveModel } : {}),
|
|
465
|
+
...(typeof maxTurns === 'number' ? { maxTurns } : {}),
|
|
466
|
+
...(typeof maxBudgetUsd === 'number' ? { maxBudgetUsd } : {}),
|
|
467
|
+
...(additionalDirectories.length > 0 ? { additionalDirectories } : {}),
|
|
468
|
+
...(options.onText ? { onText: options.onText } : {}),
|
|
469
|
+
...(options.abortSignal ? { abortSignal: options.abortSignal } : {}),
|
|
470
|
+
};
|
|
471
|
+
}
|
|
266
472
|
runResult = await runAgent(prompt, sdkOpts);
|
|
267
473
|
}
|
|
268
474
|
catch (err) {
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* tool-call-dedup — PreToolUse hook that detects same-call loops and
|
|
3
|
+
* nudges the model to stop re-fetching identical data.
|
|
4
|
+
*
|
|
5
|
+
* Why this exists (1.18.173)
|
|
6
|
+
* ──────────────────────────
|
|
7
|
+
* The Anthropic SDK's auto-compactor summarizes prior turns when context
|
|
8
|
+
* approaches the model's window. If the working data lived in those
|
|
9
|
+
* earlier turns, compaction loses it — and the model often responds by
|
|
10
|
+
* RE-CALLING the same tool with the same arguments to "re-load" the
|
|
11
|
+
* data. That refill triggers the next compaction, which loses the
|
|
12
|
+
* re-loaded data, which triggers another re-call, … and the SDK's
|
|
13
|
+
* thrashing detector aborts the run after 3 consecutive cycles.
|
|
14
|
+
*
|
|
15
|
+
* Real-world example (2026-05-11 imessage-triage 08:00 UTC, run
|
|
16
|
+
* 839a7d1a-…): four IDENTICAL calls to `get_unread_imessages({limit:20})`
|
|
17
|
+
* in 115 seconds, one after each compaction. The tool-output-guard from
|
|
18
|
+
* 1.18.169 didn't fire because each individual response was under the
|
|
19
|
+
* 30KB cap; the loop was structural, not size-based.
|
|
20
|
+
*
|
|
21
|
+
* What this hook does
|
|
22
|
+
* ───────────────────
|
|
23
|
+
* On every PreToolUse, hash `(toolName, JSON.stringify(input))` and look
|
|
24
|
+
* it up in a per-run cache (60s TTL by default).
|
|
25
|
+
* • count = 1 (first call): let it through, record.
|
|
26
|
+
* • count = 2 (second call within TTL): inject an `additionalContext`
|
|
27
|
+
* hint into the next turn saying "you already called this; the
|
|
28
|
+
* result hasn't changed; reuse it or change the inputs." Tool still
|
|
29
|
+
* executes (the model might have legitimate reasons to re-poll).
|
|
30
|
+
* • count = 3+ (third+ identical call): `permissionDecision: 'deny'`
|
|
31
|
+
* with a reason that directs the model to either change inputs or
|
|
32
|
+
* stop the loop. The model receives a denial result instead of new
|
|
33
|
+
* tool data — breaks the refetch-after-compact cycle.
|
|
34
|
+
*
|
|
35
|
+
* Aligned with Anthropic SDK best practices: PreToolUse + permission
|
|
36
|
+
* decisions are the documented mechanism for controlling tool execution
|
|
37
|
+
* mid-run. `sdk.d.ts:2002-2008` — `PreToolUseHookSpecificOutput` carries
|
|
38
|
+
* `permissionDecision` ('allow'/'deny'/'ask'/'defer') + reason +
|
|
39
|
+
* additionalContext for exactly this case.
|
|
40
|
+
*
|
|
41
|
+
* Failure mode
|
|
42
|
+
* ────────────
|
|
43
|
+
* Never throws. Hash errors, cache errors, anything — degrades to
|
|
44
|
+
* letting the call through. Telemetry must never block execution.
|
|
45
|
+
*/
|
|
46
|
+
import type { HookCallbackMatcher, HookEvent } from '@anthropic-ai/claude-agent-sdk';
|
|
47
|
+
export interface DedupHookOptions {
|
|
48
|
+
/** Stable run identifier — used to scope the cache per run. */
|
|
49
|
+
runId: string;
|
|
50
|
+
/** How long an identical call is considered "the same" (ms). */
|
|
51
|
+
ttlMs?: number;
|
|
52
|
+
/** Override the soft-warn threshold (default 2nd call). */
|
|
53
|
+
softWarnAt?: number;
|
|
54
|
+
/** Override the hard-block threshold (default 3rd call). */
|
|
55
|
+
hardBlockAt?: number;
|
|
56
|
+
/** Optional callback fired on every dedup decision. */
|
|
57
|
+
onDecision?: (info: {
|
|
58
|
+
toolName: string;
|
|
59
|
+
inputHash: string;
|
|
60
|
+
callCount: number;
|
|
61
|
+
decision: 'allow' | 'warn' | 'block';
|
|
62
|
+
sinceFirstMs: number;
|
|
63
|
+
}) => void;
|
|
64
|
+
}
|
|
65
|
+
export interface DedupRunStats {
|
|
66
|
+
/** Total PreToolUse invocations inspected. */
|
|
67
|
+
inspected: number;
|
|
68
|
+
/** Calls that were warned (let through with hint). */
|
|
69
|
+
warned: number;
|
|
70
|
+
/** Calls that were blocked outright. */
|
|
71
|
+
blocked: number;
|
|
72
|
+
}
|
|
73
|
+
export interface DedupHookHandles {
|
|
74
|
+
/** Hook map suitable for SDK `query({ options: { hooks } })`. */
|
|
75
|
+
hooks: Partial<Record<HookEvent, HookCallbackMatcher[]>>;
|
|
76
|
+
/** Aggregated telemetry — read after the run completes. */
|
|
77
|
+
stats: DedupRunStats;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Compute a stable hash of a tool call's input shape. JSON.stringify
|
|
81
|
+
* with a sorted-keys replacer so `{a:1,b:2}` and `{b:2,a:1}` collide
|
|
82
|
+
* (same semantic call); other minor differences (object key order) don't
|
|
83
|
+
* spuriously evade the dedup.
|
|
84
|
+
*/
|
|
85
|
+
export declare function hashToolInput(input: unknown): string;
|
|
86
|
+
/**
|
|
87
|
+
* Build a PreToolUse dedup hook for a single runAgent invocation.
|
|
88
|
+
* Per-run cache (no cross-run state) — short-lived agentic runs don't
|
|
89
|
+
* need persistence and we don't want stale cache to deny legitimate
|
|
90
|
+
* post-restart re-polls.
|
|
91
|
+
*/
|
|
92
|
+
export declare function buildDedupHook(opts: DedupHookOptions): DedupHookHandles;
|
|
93
|
+
//# sourceMappingURL=tool-call-dedup.d.ts.map
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* tool-call-dedup — PreToolUse hook that detects same-call loops and
|
|
3
|
+
* nudges the model to stop re-fetching identical data.
|
|
4
|
+
*
|
|
5
|
+
* Why this exists (1.18.173)
|
|
6
|
+
* ──────────────────────────
|
|
7
|
+
* The Anthropic SDK's auto-compactor summarizes prior turns when context
|
|
8
|
+
* approaches the model's window. If the working data lived in those
|
|
9
|
+
* earlier turns, compaction loses it — and the model often responds by
|
|
10
|
+
* RE-CALLING the same tool with the same arguments to "re-load" the
|
|
11
|
+
* data. That refill triggers the next compaction, which loses the
|
|
12
|
+
* re-loaded data, which triggers another re-call, … and the SDK's
|
|
13
|
+
* thrashing detector aborts the run after 3 consecutive cycles.
|
|
14
|
+
*
|
|
15
|
+
* Real-world example (2026-05-11 imessage-triage 08:00 UTC, run
|
|
16
|
+
* 839a7d1a-…): four IDENTICAL calls to `get_unread_imessages({limit:20})`
|
|
17
|
+
* in 115 seconds, one after each compaction. The tool-output-guard from
|
|
18
|
+
* 1.18.169 didn't fire because each individual response was under the
|
|
19
|
+
* 30KB cap; the loop was structural, not size-based.
|
|
20
|
+
*
|
|
21
|
+
* What this hook does
|
|
22
|
+
* ───────────────────
|
|
23
|
+
* On every PreToolUse, hash `(toolName, JSON.stringify(input))` and look
|
|
24
|
+
* it up in a per-run cache (60s TTL by default).
|
|
25
|
+
* • count = 1 (first call): let it through, record.
|
|
26
|
+
* • count = 2 (second call within TTL): inject an `additionalContext`
|
|
27
|
+
* hint into the next turn saying "you already called this; the
|
|
28
|
+
* result hasn't changed; reuse it or change the inputs." Tool still
|
|
29
|
+
* executes (the model might have legitimate reasons to re-poll).
|
|
30
|
+
* • count = 3+ (third+ identical call): `permissionDecision: 'deny'`
|
|
31
|
+
* with a reason that directs the model to either change inputs or
|
|
32
|
+
* stop the loop. The model receives a denial result instead of new
|
|
33
|
+
* tool data — breaks the refetch-after-compact cycle.
|
|
34
|
+
*
|
|
35
|
+
* Aligned with Anthropic SDK best practices: PreToolUse + permission
|
|
36
|
+
* decisions are the documented mechanism for controlling tool execution
|
|
37
|
+
* mid-run. `sdk.d.ts:2002-2008` — `PreToolUseHookSpecificOutput` carries
|
|
38
|
+
* `permissionDecision` ('allow'/'deny'/'ask'/'defer') + reason +
|
|
39
|
+
* additionalContext for exactly this case.
|
|
40
|
+
*
|
|
41
|
+
* Failure mode
|
|
42
|
+
* ────────────
|
|
43
|
+
* Never throws. Hash errors, cache errors, anything — degrades to
|
|
44
|
+
* letting the call through. Telemetry must never block execution.
|
|
45
|
+
*/
|
|
46
|
+
import { createHash } from 'node:crypto';
|
|
47
|
+
import pino from 'pino';
|
|
48
|
+
const logger = pino({ name: 'clementine.tool-call-dedup' });
|
|
49
|
+
// ── Tunables ──────────────────────────────────────────────────────────
|
|
50
|
+
/** Within this window (ms), identical calls are considered "the same". */
|
|
51
|
+
const DEFAULT_TTL_MS = 60_000;
|
|
52
|
+
/** Second identical call within TTL → soft warn (let it through with a hint). */
|
|
53
|
+
const SOFT_WARN_AT = 2;
|
|
54
|
+
/** Third+ identical call within TTL → hard block (deny). */
|
|
55
|
+
const HARD_BLOCK_AT = 3;
|
|
56
|
+
// ── Hashing ───────────────────────────────────────────────────────────
|
|
57
|
+
/**
|
|
58
|
+
* Compute a stable hash of a tool call's input shape. JSON.stringify
|
|
59
|
+
* with a sorted-keys replacer so `{a:1,b:2}` and `{b:2,a:1}` collide
|
|
60
|
+
* (same semantic call); other minor differences (object key order) don't
|
|
61
|
+
* spuriously evade the dedup.
|
|
62
|
+
*/
|
|
63
|
+
export function hashToolInput(input) {
|
|
64
|
+
try {
|
|
65
|
+
const stable = JSON.stringify(input, replaceForStableHash);
|
|
66
|
+
return createHash('sha256').update(stable).digest('hex').slice(0, 16);
|
|
67
|
+
}
|
|
68
|
+
catch {
|
|
69
|
+
return 'unhashable';
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
function replaceForStableHash(_key, value) {
|
|
73
|
+
if (value && typeof value === 'object' && !Array.isArray(value)) {
|
|
74
|
+
const sorted = {};
|
|
75
|
+
const keys = Object.keys(value).sort();
|
|
76
|
+
for (const k of keys)
|
|
77
|
+
sorted[k] = value[k];
|
|
78
|
+
return sorted;
|
|
79
|
+
}
|
|
80
|
+
return value;
|
|
81
|
+
}
|
|
82
|
+
// ── Hook builder ──────────────────────────────────────────────────────
|
|
83
|
+
/**
|
|
84
|
+
* Build a PreToolUse dedup hook for a single runAgent invocation.
|
|
85
|
+
* Per-run cache (no cross-run state) — short-lived agentic runs don't
|
|
86
|
+
* need persistence and we don't want stale cache to deny legitimate
|
|
87
|
+
* post-restart re-polls.
|
|
88
|
+
*/
|
|
89
|
+
export function buildDedupHook(opts) {
|
|
90
|
+
const cache = new Map();
|
|
91
|
+
const ttl = opts.ttlMs ?? DEFAULT_TTL_MS;
|
|
92
|
+
const softAt = opts.softWarnAt ?? SOFT_WARN_AT;
|
|
93
|
+
const hardAt = opts.hardBlockAt ?? HARD_BLOCK_AT;
|
|
94
|
+
const stats = { inspected: 0, warned: 0, blocked: 0 };
|
|
95
|
+
const preToolUse = async (input) => {
|
|
96
|
+
if (input.hook_event_name !== 'PreToolUse')
|
|
97
|
+
return {};
|
|
98
|
+
const evt = input;
|
|
99
|
+
const toolName = String(evt.tool_name ?? 'unknown');
|
|
100
|
+
const inputHash = hashToolInput(evt.tool_input);
|
|
101
|
+
const key = `${toolName}:${inputHash}`;
|
|
102
|
+
const now = Date.now();
|
|
103
|
+
stats.inspected += 1;
|
|
104
|
+
let entry = cache.get(key);
|
|
105
|
+
// Treat expired entries as fresh — drop and restart the count.
|
|
106
|
+
if (entry && now - entry.lastSeen > ttl) {
|
|
107
|
+
cache.delete(key);
|
|
108
|
+
entry = undefined;
|
|
109
|
+
}
|
|
110
|
+
if (!entry) {
|
|
111
|
+
cache.set(key, { count: 1, firstSeen: now, lastSeen: now });
|
|
112
|
+
opts.onDecision?.({ toolName, inputHash, callCount: 1, decision: 'allow', sinceFirstMs: 0 });
|
|
113
|
+
return {};
|
|
114
|
+
}
|
|
115
|
+
entry.count += 1;
|
|
116
|
+
entry.lastSeen = now;
|
|
117
|
+
const sinceFirstMs = now - entry.firstSeen;
|
|
118
|
+
if (entry.count >= hardAt) {
|
|
119
|
+
stats.blocked += 1;
|
|
120
|
+
logger.warn({
|
|
121
|
+
toolName,
|
|
122
|
+
inputHash,
|
|
123
|
+
callCount: entry.count,
|
|
124
|
+
sinceFirstMs,
|
|
125
|
+
runId: opts.runId,
|
|
126
|
+
}, 'tool-call-dedup: hard-blocking identical call');
|
|
127
|
+
opts.onDecision?.({ toolName, inputHash, callCount: entry.count, decision: 'block', sinceFirstMs });
|
|
128
|
+
return {
|
|
129
|
+
hookSpecificOutput: {
|
|
130
|
+
hookEventName: 'PreToolUse',
|
|
131
|
+
permissionDecision: 'deny',
|
|
132
|
+
permissionDecisionReason: `Tool \`${toolName}\` was already called with these exact arguments ${entry.count - 1} time(s) in the last ${Math.floor(sinceFirstMs / 1000)}s. ` +
|
|
133
|
+
`The result has not changed. STOP re-calling — use the result from your earlier context, ` +
|
|
134
|
+
`change the arguments to fetch different data, or finish the task with what you already know. ` +
|
|
135
|
+
`If you genuinely need fresh data, wait at least ${Math.ceil(ttl / 1000)}s and try again.`,
|
|
136
|
+
},
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
if (entry.count >= softAt) {
|
|
140
|
+
stats.warned += 1;
|
|
141
|
+
logger.info({
|
|
142
|
+
toolName,
|
|
143
|
+
inputHash,
|
|
144
|
+
callCount: entry.count,
|
|
145
|
+
sinceFirstMs,
|
|
146
|
+
runId: opts.runId,
|
|
147
|
+
}, 'tool-call-dedup: warning on repeat call');
|
|
148
|
+
opts.onDecision?.({ toolName, inputHash, callCount: entry.count, decision: 'warn', sinceFirstMs });
|
|
149
|
+
return {
|
|
150
|
+
hookSpecificOutput: {
|
|
151
|
+
hookEventName: 'PreToolUse',
|
|
152
|
+
additionalContext: `Note: you've already called \`${toolName}\` with these exact arguments ${entry.count - 1} time(s) in the last ${Math.floor(sinceFirstMs / 1000)}s. ` +
|
|
153
|
+
`The result will be identical. Consider re-using the prior result rather than letting this call burn turns/budget. ` +
|
|
154
|
+
`One more identical re-call will be blocked.`,
|
|
155
|
+
},
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
opts.onDecision?.({ toolName, inputHash, callCount: entry.count, decision: 'allow', sinceFirstMs });
|
|
159
|
+
return {};
|
|
160
|
+
};
|
|
161
|
+
return {
|
|
162
|
+
hooks: {
|
|
163
|
+
PreToolUse: [{ hooks: [preToolUse] }],
|
|
164
|
+
},
|
|
165
|
+
stats,
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
//# sourceMappingURL=tool-call-dedup.js.map
|
package/dist/cli/dashboard.js
CHANGED
|
@@ -23916,7 +23916,7 @@ function openCommandK() {
|
|
|
23916
23916
|
{ kw: 'home activity', page: 'home', tab: 'activity', label: 'Home · Activity' },
|
|
23917
23917
|
{ kw: 'build workflows workflow builder', page: 'build', tab: 'workflows', label: 'Build · Workflow Builder' },
|
|
23918
23918
|
{ kw: 'build crons schedules scheduled tasks operations automation', page: 'build', tab: 'crons', label: 'Build · Schedules' },
|
|
23919
|
-
{ kw: 'build skills',
|
|
23919
|
+
{ kw: 'build skills skill studio create skill', page: 'skills', tab: '', label: 'Skills · Skill Studio' },
|
|
23920
23920
|
{ kw: 'build templates', page: 'build', tab: 'templates', label: 'Build · Templates' },
|
|
23921
23921
|
{ kw: 'team roster', page: 'team', tab: 'roster', label: 'Team · Roster' },
|
|
23922
23922
|
{ kw: 'team activity', page: 'team', tab: 'activity', label: 'Team · Activity' },
|
|
@@ -29970,6 +29970,30 @@ async function sbRunSkillTest() {
|
|
|
29970
29970
|
}
|
|
29971
29971
|
}
|
|
29972
29972
|
|
|
29973
|
+
function askSkillCreatorForDescription() {
|
|
29974
|
+
var name = (document.getElementById('skill-modal-name')?.value || '').trim();
|
|
29975
|
+
var title = (document.getElementById('skill-modal-title')?.value || '').trim();
|
|
29976
|
+
var desc = (document.getElementById('skill-modal-desc')?.value || '').trim();
|
|
29977
|
+
var body = (document.getElementById('skill-modal-body')?.value || '').trim();
|
|
29978
|
+
var prompt = [
|
|
29979
|
+
'Use skill-creator principles to help write the frontmatter description for this Clementine skill.',
|
|
29980
|
+
'',
|
|
29981
|
+
'Skill name: ' + (name || '(not set yet)'),
|
|
29982
|
+
'Title: ' + (title || '(not set yet)'),
|
|
29983
|
+
'Current description: ' + (desc || '(empty)'),
|
|
29984
|
+
'Procedure preview:',
|
|
29985
|
+
body ? body.slice(0, 1600) : '(empty)',
|
|
29986
|
+
'',
|
|
29987
|
+
'Return one concise description under 1024 characters. It must say what the skill does, when to use it, and trigger phrases. Do not rewrite the whole skill unless I ask.'
|
|
29988
|
+
].join('\\n');
|
|
29989
|
+
if (typeof askClementineWith !== 'function') {
|
|
29990
|
+
toast('Chat is not ready yet. Try again after the dashboard finishes loading.', 'error');
|
|
29991
|
+
return;
|
|
29992
|
+
}
|
|
29993
|
+
askClementineWith(prompt, { autoSend: false });
|
|
29994
|
+
toast('Description prompt loaded in chat. Press send when ready.', 'info');
|
|
29995
|
+
}
|
|
29996
|
+
|
|
29973
29997
|
async function _openSkillModal(opts) {
|
|
29974
29998
|
opts = opts || {};
|
|
29975
29999
|
var prefill = opts.mode === 'create' && opts.prefill ? opts.prefill : {};
|