npm - clementine-agent - Versions diffs - 1.18.172 → 1.18.173 - Mend

clementine-agent 1.18.172 → 1.18.173

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/agent/run-agent.js +52 -7
package/dist/agent/run-skill.d.ts +7 -0
package/dist/agent/run-skill.js +225 -19
package/dist/agent/tool-call-dedup.d.ts +93 -0
package/dist/agent/tool-call-dedup.js +168 -0
package/dist/cli/dashboard.js +25 -1
package/package.json +1 -1

package/dist/agent/run-agent.js CHANGED Viewed

@@ -86,6 +86,7 @@ export function invalidateMcpStatusEntry(name) {
 }
 import { BASE_DIR, PKG_DIR, CLAUDE_CODE_OAUTH_TOKEN, ANTHROPIC_API_KEY as CONFIG_ANTHROPIC_API_KEY, normalizeClaudeSdkOptionsForOneMillionContext, TOOL_OUTPUT_GUARD, } from '../config.js';
 import { buildGuardHooks } from './tool-output-guard.js';
+import { buildDedupHook } from './tool-call-dedup.js';
 import { buildAgentMap } from './agent-definitions.js';
 import { buildExecutionToolPolicy, } from './execution-policy.js';
 const MCP_SERVER_SCRIPT = path.join(PKG_DIR, 'dist', 'tools', 'mcp-server.js');
@@ -196,13 +197,20 @@ export async function runAgent(prompt, opts) {
         ? requestedBudget
         : undefined;
     const startedAt = Date.now();
-    // Build the AgentDefinition map. Caller can override; otherwise we
-    // use the standard system subagents + hired-agent profiles.
-    const agents = opts.agents ?? buildAgentMap({
+    // Build the AgentDefinition map.
+    // - Default: planner/researcher/cron-fixer + hired-agent profiles.
+    // - Caller-supplied agents (opts.agents) MERGE over the defaults rather
+    //   than REPLACE them (1.18.173). `runSkill`'s auto-delegation path
+    //   needs to inject a per-run `skill-worker` definition while keeping
+    //   the planner/researcher/etc. available for deeper delegation.
+    //   Tests that want a fully isolated map pass an explicit override
+    //   via the `replaceAgents` option below.
+    const defaultAgents = buildAgentMap({
         profileManager: opts.agentManager ?? undefined,
         isAutonomous: source === 'cron' || source === 'heartbeat',
         activeAgentSlug: opts.profile?.slug,
     });
+    const agents = opts.agents ? { ...defaultAgents, ...opts.agents } : defaultAgents;
     // Wrap prompt to direct Claude to a specific subagent when caller asks.
     // Per SDK docs: explicit invocation = "Use the X agent to..."
     const effectivePrompt = opts.forceSubagent && agents[opts.forceSubagent]
@@ -341,6 +349,34 @@ export async function runAgent(prompt, opts) {
             },
         })
         : { hooks: {}, stats: { inspected: 0, compressed: 0, ceilingHits: 0, bytesShed: 0, compactions: 0 } };
+    // ── Tool-call dedup hook (1.18.173) ─────────────────────────────────
+    // Breaks the "re-fetch after compaction" loop that crashed the
+    // imessage-triage cron on 2026-05-11 (4× identical tool calls →
+    // SDK autocompact-thrashing abort). PreToolUse hook detects same
+    // (toolName, inputHash) within 60s: 2nd call gets a soft hint, 3rd+
+    // is denied so the model can't burn turns re-calling the same data.
+    // Defense-in-depth — the cleaner fix (delegating to a subagent so the
+    // parent never re-fetches in the first place) lives in run-skill.ts.
+    const dedup = buildDedupHook({
+        runId,
+        onDecision: (info) => {
+            if (info.decision === 'allow')
+                return;
+            writeEvent({
+                kind: 'error',
+                ts: new Date().toISOString(),
+                sessionId,
+                toolError: `_clementine_dedup:${info.decision} ${info.toolName} call#${info.callCount} @${info.sinceFirstMs}ms`,
+            });
+        },
+    });
+    // Merge hook maps from the two modules. SDK accepts arrays of
+    // HookCallbackMatcher per event; we concatenate.
+    const mergedHooks = { ...guard.hooks };
+    for (const [evt, matchers] of Object.entries(dedup.hooks)) {
+        const existing = mergedHooks[evt] ?? [];
+        mergedHooks[evt] = [...existing, ...matchers];
+    }
     // Apply 1M-context env normalization (existing infra)
     const sdkOptionsRaw = {
         systemPrompt: profileAppend
@@ -380,10 +416,11 @@ export async function runAgent(prompt, opts) {
         ...(opts.additionalDirectories && opts.additionalDirectories.length > 0
             ? { additionalDirectories: opts.additionalDirectories }
             : {}),
-        // 1.18.169 — install the tool-output guard hooks. SDK types accept
-        // `hooks` keyed by HookEvent; the empty object is a no-op when the
-        // guard is disabled.
-        ...(Object.keys(guard.hooks).length > 0 ? { hooks: guard.hooks } : {}),
+        // 1.18.169 — install the tool-output guard hooks.
+        // 1.18.173 — merged with the tool-call dedup hooks (PreToolUse).
+        // SDK types accept `hooks` keyed by HookEvent; the empty object is
+        // a no-op when both guards are disabled.
+        ...(Object.keys(mergedHooks).length > 0 ? { hooks: mergedHooks } : {}),
     };
     const sdkOptions = normalizeClaudeSdkOptionsForOneMillionContext(sdkOptionsRaw);
     logger.info({
@@ -640,6 +677,14 @@ export async function runAgent(prompt, opts) {
             compactions: guard.stats.compactions,
             ceilingHits: guard.stats.ceilingHits,
         } : undefined,
+        // 1.18.173 — tool-call dedup summary. Non-zero warned/blocked means
+        // the model tried to re-fetch identical data (typically a
+        // post-compaction refetch loop).
+        dedup: dedup.stats.inspected > 0 ? {
+            inspected: dedup.stats.inspected,
+            warned: dedup.stats.warned,
+            blocked: dedup.stats.blocked,
+        } : undefined,
     }, 'runAgent: query complete');
     // PRD §6 Phase 4e: subagent transcript backfill (Path C). The SDK persists
     // every subagent's full message stream to ~/.claude/projects/<encoded-cwd>/

package/dist/agent/run-skill.d.ts CHANGED Viewed

@@ -145,6 +145,13 @@ export declare function buildSkillPrompt(skill: Skill, inputs: Record<string, st
  * After the SDK returns, `clementine.success.schema` (when set) is
  * ajv-validated against the response.
  *
+ * **Autonomous runs (1.18.173)**: When `source` is one of
+ * AUTONOMOUS_SOURCES, the skill runs through the auto-delegating
+ * wrapper: a thin parent dispatches to a `skill-worker` subagent which
+ * does all the work in its own context. Closes the
+ * "refetch-after-compaction" loop class permanently. Skills can opt out
+ * via frontmatter `clementine.execution.inline: true`.
+ *
  * This function never throws — failures (skill not found, SDK error,
  * timeout) are returned as `{ ok: false, error }`. The caller (chat,
  * cron, sub-agent, MCP tool) decides how to surface that.

package/dist/agent/run-skill.js CHANGED Viewed

@@ -28,6 +28,7 @@ import path from 'node:path';
 import pino from 'pino';
 import { getSkill } from './skill-store.js';
 import { runAgent } from './run-agent.js';
+import { MODELS } from '../config.js';
 const logger = pino({ name: 'clementine.run-skill' });
 // ── Mustache substitution ─────────────────────────────────────────────
 /** Matches `{{var_name}}` with optional whitespace. var_name is
@@ -183,6 +184,133 @@ async function validateSkillOutput(output, schema) {
         return { tried: true, pass: false, errors: [`schema compile error: ${err}`] };
     }
 }
+// ── Autonomous delegation (1.18.173) ──────────────────────────────────
+/**
+ * Sources whose runs should default to the auto-delegating wrapper.
+ * In autonomous mode the parent agent immediately dispatches the entire
+ * skill body to a `skill-worker` subagent via the Agent tool. That keeps
+ * the parent's context tiny (no tool results ever land in it) so the SDK
+ * never has to compact mid-run, and post-compaction "refetch loops"
+ * become impossible — the parent never had the data to lose.
+ *
+ * Interactive sources ('chat', 'skill' invoked directly by a chat user)
+ * stay on the inline path: the user is waiting on output and the extra
+ * subagent dispatch latency is a worse UX tradeoff than the small
+ * compaction risk on a single conversational turn.
+ */
+const AUTONOMOUS_SOURCES = new Set([
+    'cron',
+    'scheduled-skill',
+    'heartbeat',
+    'team-task',
+]);
+/**
+ * Decide whether a runSkill call should use the auto-delegating
+ * (subagent) wrapper. Skills can opt out via frontmatter
+ * `clementine.execution.inline: true` for procedures the author has
+ * verified fit cleanly in one context (e.g., a 2-line script call).
+ */
+function shouldAutoDelegate(skill, source) {
+    if (!AUTONOMOUS_SOURCES.has(source))
+        return false;
+    const execMode = skill.frontmatter?.clementine?.execution?.inline;
+    if (execMode === true)
+        return false;
+    return true;
+}
+/**
+ * Resolve the model string to use for an autonomous run. The 1M-context
+ * variant gives the worker subagent 5× the room of the standard 200K
+ * window — enough headroom that compaction is rare and the
+ * "refetch-after-compact" loop pattern (seen in the 2026-05-11
+ * imessage-triage failures) never occurs in practice.
+ *
+ * The actual 1M routing is gated by the user's plan (see
+ * config.ts:usesOneMillionContext) and the model family — Haiku doesn't
+ * support 1M, and Sonnet 1M needs the [1m] suffix. We return the full
+ * Sonnet model ID with [1m] appended; downstream
+ * normalizeClaudeSdkOptionsForOneMillionContext strips it back off when
+ * the plan doesn't support it.
+ */
+function resolveAutonomousModel(explicitModel, skillModel) {
+    // Caller's explicit model wins.
+    if (explicitModel)
+        return explicitModel;
+    // Skill-declared model wins next.
+    if (skillModel)
+        return skillModel;
+    // Default: Sonnet [1m]. The normalizer will strip [1m] if the user's
+    // plan doesn't include it, falling back to standard Sonnet — still
+    // works, just with less headroom.
+    const base = MODELS.sonnet;
+    if (!base)
+        return undefined;
+    if (/\[1m\]/i.test(base))
+        return base;
+    return `${base}[1m]`;
+}
+/**
+ * Build the AgentDefinition for the `skill-worker` subagent that
+ * executes this skill in an isolated context. The subagent's system
+ * prompt is the skill body; its tools are the skill's computed
+ * allowlist; its model is the same 1M-context model the parent uses
+ * (the worker is where the real data flows — the parent stays tiny).
+ *
+ * `description` is what the SDK shows the parent for routing decisions.
+ * Since the parent is `forceSubagent`'d to this worker, the description
+ * mostly serves as transcript context.
+ */
+function buildSkillWorkerAgent(skill, effectiveTools, model, workerMaxTurns) {
+    const def = {
+        description: `Executes the "${skill.frontmatter.name}" scheduled skill end-to-end in an isolated context window. ` +
+            `Reads any data the skill needs, processes it, performs the skill's described delivery action ` +
+            `(e.g., sends a Discord/Slack notification), and returns a concise summary to the orchestrator.`,
+        prompt: `You are the worker subagent for the "${skill.frontmatter.name}" scheduled skill.\n\n` +
+            `Your job is to execute the procedure below from start to finish in a single subagent run. ` +
+            `You have your own isolated context window — do NOT save state for a parent agent; if the ` +
+            `procedure calls for sending a notification, YOU send it (you have the relevant tools).\n\n` +
+            `Return a single concise final response describing what happened (e.g., "Sent Discord DM about ` +
+            `2 actionable items, ignored 8 spam"). Do not return raw tool output; do not narrate every step. ` +
+            `If nothing actionable was found and the procedure says exit silently, return "No action needed."\n\n` +
+            `## Procedure\n\n${skill.body}`,
+        tools: effectiveTools,
+        // SDK accepts 'sonnet' / 'opus' / 'haiku' tier aliases OR full model
+        // IDs. We pass the full ID with [1m] when present; the SDK strips
+        // [1m] internally for plans that don't support it.
+        ...(model ? { model } : {}),
+        effort: 'medium',
+        maxTurns: workerMaxTurns,
+    };
+    return def;
+}
+/**
+ * Build the parent orchestrator's prompt. The parent has exactly one
+ * job: dispatch to `skill-worker` via the Agent tool and relay its
+ * return. Keeping this prompt under ~600 bytes is important — the
+ * parent's context grows by the parent prompt + the worker's final
+ * return text (typically <2KB). Total parent context per run: ~3KB.
+ * Well below any compaction threshold even on a 200K-window model.
+ */
+function buildOrchestratorPrompt(skill, callerContext) {
+    const parts = [
+        `## Scheduled Skill Execution`,
+        ``,
+        `Dispatch the "${skill.frontmatter.name}" skill to the \`skill-worker\` subagent via the Agent tool.`,
+        `The worker has the skill body as its system prompt and the tools required to perform the procedure end-to-end (including any notification delivery).`,
+        ``,
+        `## Your job`,
+        ``,
+        `1. Call the Agent tool ONCE, dispatching to "skill-worker" with this brief: "Execute the ${skill.frontmatter.name} procedure now."`,
+        `2. Wait for its return.`,
+        `3. Relay its summary as your final response — do not add commentary, do not re-do its work.`,
+        ``,
+        `Do NOT call any other tools directly. The worker handles all data access and delivery.`,
+    ];
+    if (callerContext && callerContext.trim()) {
+        parts.push('', '## Caller context (forward this to the worker if relevant)', '', callerContext.trim());
+    }
+    return parts.join('\n');
+}
 // ── The primitive ─────────────────────────────────────────────────────
 /**
  * Run a skill as a hard-allowlisted sub-call. Returns a structured result.
@@ -194,6 +322,13 @@ async function validateSkillOutput(output, schema) {
  * After the SDK returns, `clementine.success.schema` (when set) is
  * ajv-validated against the response.
  *
+ * **Autonomous runs (1.18.173)**: When `source` is one of
+ * AUTONOMOUS_SOURCES, the skill runs through the auto-delegating
+ * wrapper: a thin parent dispatches to a `skill-worker` subagent which
+ * does all the work in its own context. Closes the
+ * "refetch-after-compaction" loop class permanently. Skills can opt out
+ * via frontmatter `clementine.execution.inline: true`.
+ *
  * This function never throws — failures (skill not found, SDK error,
  * timeout) are returned as `{ ok: false, error }`. The caller (chat,
  * cron, sub-agent, MCP tool) decides how to surface that.
@@ -212,7 +347,17 @@ export async function runSkill(name, options = {}) {
     }
     const effectiveTools = computeSkillAllowlist(skill);
     const hasExplicitToolScope = skillHasExplicitToolScope(skill);
-    const prompt = buildSkillPrompt(skill, options.inputs, options.context);
+    const source = options.source ?? 'skill';
+    // 1.18.173: autonomous runs (cron, scheduled-skill, heartbeat,
+    // team-task) wrap the skill in a thin orchestrator that dispatches
+    // the entire procedure to a `skill-worker` subagent. The parent's
+    // context never grows past ~3KB regardless of how much data the
+    // skill reads, so post-compaction refetch loops are structurally
+    // impossible. See shouldAutoDelegate / buildSkillWorkerAgent above.
+    const autoDelegate = shouldAutoDelegate(skill, source);
+    const prompt = autoDelegate
+        ? buildOrchestratorPrompt(skill, options.context)
+        : buildSkillPrompt(skill, options.inputs, options.context);
     const limits = skill.frontmatter?.clementine?.limits;
     const maxTurns = options.maxTurns ?? limits?.maxTurns;
     const maxBudgetUsd = options.maxBudgetUsd ?? limits?.maxBudgetUsd;
@@ -225,6 +370,14 @@ export async function runSkill(name, options = {}) {
         ...(skill.layout === 'folder' ? [path.dirname(skill.filePath)] : []),
     ];
     const mutatingSkill = effectiveTools.some((t) => t === 'Write' || t === 'Edit' || t === 'Bash' || /__(write|edit|update|create|delete|send|post|patch|set)/i.test(t));
+    // 1.18.173: resolve the effective model. Autonomous runs default to
+    // Sonnet [1m] (1M context window) so the worker subagent has 5× the
+    // room of a standard 200K-window model. resolveAutonomousModel honors
+    // explicit overrides + skill-declared limits.model first.
+    const skillModel = skill.frontmatter?.clementine?.limits?.model;
+    const effectiveModel = autoDelegate
+        ? resolveAutonomousModel(options.model, skillModel)
+        : (options.model ?? skillModel);
     logger.info({
         skill: name,
         tools: effectiveTools,
@@ -232,6 +385,9 @@ export async function runSkill(name, options = {}) {
         maxBudgetUsd,
         inputKeys: Object.keys(options.inputs ?? {}),
         hasContext: !!options.context,
+        autoDelegate,
+        model: effectiveModel,
+        source,
     }, 'runSkill: invoking');
     let runResult;
     try {
@@ -245,24 +401,74 @@ export async function runSkill(name, options = {}) {
             ].filter(Boolean).join('\n\n'),
             profile: options.profile,
         });
-        const allowedToolsForRun = hasExplicitToolScope ? effectiveTools : undefined;
-        const sdkOpts = {
-            sessionKey,
-            source: options.source ?? 'skill',
-            ...(allowedToolsForRun ? { allowedTools: allowedToolsForRun } : {}),
-            profile: options.profile,
-            agentManager: options.agentManager,
-            memoryStore: options.memoryStore,
-            cwd: options.projectWorkDir,
-            extraMcpServers: mcp.servers,
-            enableFileCheckpointing: mutatingSkill || Boolean(options.projectWorkDir),
-            ...(options.model ? { model: options.model } : {}),
-            ...(typeof maxTurns === 'number' ? { maxTurns } : {}),
-            ...(typeof maxBudgetUsd === 'number' ? { maxBudgetUsd } : {}),
-            ...(additionalDirectories.length > 0 ? { additionalDirectories } : {}),
-            ...(options.onText ? { onText: options.onText } : {}),
-            ...(options.abortSignal ? { abortSignal: options.abortSignal } : {}),
-        };
+        // ── Autonomous-delegation branch (1.18.173) ──────────────────────
+        // Parent: minimal allowedTools (Agent only) + forceSubagent to
+        // skill-worker. Worker: full tool surface + skill body as system
+        // prompt. Worker is the SDK AgentDefinition; the SDK wires its
+        // tools/model/prompt at query time.
+        let sdkOpts;
+        if (autoDelegate) {
+            // Worker gets enough turns to complete bulk work (skill author's
+            // maxTurns cap, or 30 as a safe default for triage-class work).
+            const workerMaxTurns = (typeof maxTurns === 'number' && maxTurns > 0) ? maxTurns : 30;
+            const workerDef = buildSkillWorkerAgent(skill, effectiveTools, effectiveModel, workerMaxTurns);
+            sdkOpts = {
+                sessionKey,
+                source,
+                // Parent's allowedTools: ONLY Agent (delegate-or-fail). Keeps
+                // the parent's context shape predictable and prevents it from
+                // doing data-heavy work itself even if the LLM disagrees.
+                allowedTools: ['Agent'],
+                // Force-routing: SDK wraps the prompt with "Use the skill-worker
+                // agent to handle this request" so dispatch is the natural
+                // first action.
+                forceSubagent: 'skill-worker',
+                // Inject the skill-worker into the agents map. runAgent merges
+                // its `buildAgentMap()` defaults with whatever's passed via
+                // opts.agents — see run-agent.ts:362.
+                agents: { 'skill-worker': workerDef },
+                profile: options.profile,
+                agentManager: options.agentManager,
+                memoryStore: options.memoryStore,
+                cwd: options.projectWorkDir,
+                extraMcpServers: mcp.servers,
+                enableFileCheckpointing: mutatingSkill || Boolean(options.projectWorkDir),
+                // Parent uses the same model family so MCP server reuse is clean
+                // (the SDK keys some cache state by model). Parent turns are
+                // tightly capped: it should dispatch and relay in ≤3 turns.
+                ...(effectiveModel ? { model: effectiveModel } : {}),
+                maxTurns: 5,
+                ...(typeof maxBudgetUsd === 'number' ? { maxBudgetUsd } : {}),
+                ...(additionalDirectories.length > 0 ? { additionalDirectories } : {}),
+                ...(options.onText ? { onText: options.onText } : {}),
+                ...(options.abortSignal ? { abortSignal: options.abortSignal } : {}),
+            };
+        }
+        else {
+            // ── Inline branch (interactive / opt-out skills) ────────────────
+            // Original 1.18.162 behavior — the SDK call runs the skill body
+            // directly as the main-agent prompt. Used for chat-invoked skills
+            // where the latency of a subagent dispatch is worse UX than the
+            // small compaction risk.
+            const allowedToolsForRun = hasExplicitToolScope ? effectiveTools : undefined;
+            sdkOpts = {
+                sessionKey,
+                source,
+                ...(allowedToolsForRun ? { allowedTools: allowedToolsForRun } : {}),
+                profile: options.profile,
+                agentManager: options.agentManager,
+                memoryStore: options.memoryStore,
+                cwd: options.projectWorkDir,
+                extraMcpServers: mcp.servers,
+                enableFileCheckpointing: mutatingSkill || Boolean(options.projectWorkDir),
+                ...(effectiveModel ? { model: effectiveModel } : {}),
+                ...(typeof maxTurns === 'number' ? { maxTurns } : {}),
+                ...(typeof maxBudgetUsd === 'number' ? { maxBudgetUsd } : {}),
+                ...(additionalDirectories.length > 0 ? { additionalDirectories } : {}),
+                ...(options.onText ? { onText: options.onText } : {}),
+                ...(options.abortSignal ? { abortSignal: options.abortSignal } : {}),
+            };
+        }
         runResult = await runAgent(prompt, sdkOpts);
     }
     catch (err) {

package/dist/agent/tool-call-dedup.d.ts ADDED Viewed

@@ -0,0 +1,93 @@
+/**
+ * tool-call-dedup — PreToolUse hook that detects same-call loops and
+ * nudges the model to stop re-fetching identical data.
+ *
+ * Why this exists (1.18.173)
+ * ──────────────────────────
+ * The Anthropic SDK's auto-compactor summarizes prior turns when context
+ * approaches the model's window. If the working data lived in those
+ * earlier turns, compaction loses it — and the model often responds by
+ * RE-CALLING the same tool with the same arguments to "re-load" the
+ * data. That refill triggers the next compaction, which loses the
+ * re-loaded data, which triggers another re-call, … and the SDK's
+ * thrashing detector aborts the run after 3 consecutive cycles.
+ *
+ * Real-world example (2026-05-11 imessage-triage 08:00 UTC, run
+ * 839a7d1a-…): four IDENTICAL calls to `get_unread_imessages({limit:20})`
+ * in 115 seconds, one after each compaction. The tool-output-guard from
+ * 1.18.169 didn't fire because each individual response was under the
+ * 30KB cap; the loop was structural, not size-based.
+ *
+ * What this hook does
+ * ───────────────────
+ * On every PreToolUse, hash `(toolName, JSON.stringify(input))` and look
+ * it up in a per-run cache (60s TTL by default).
+ *   • count = 1 (first call): let it through, record.
+ *   • count = 2 (second call within TTL): inject an `additionalContext`
+ *     hint into the next turn saying "you already called this; the
+ *     result hasn't changed; reuse it or change the inputs." Tool still
+ *     executes (the model might have legitimate reasons to re-poll).
+ *   • count = 3+ (third+ identical call): `permissionDecision: 'deny'`
+ *     with a reason that directs the model to either change inputs or
+ *     stop the loop. The model receives a denial result instead of new
+ *     tool data — breaks the refetch-after-compact cycle.
+ *
+ * Aligned with Anthropic SDK best practices: PreToolUse + permission
+ * decisions are the documented mechanism for controlling tool execution
+ * mid-run. `sdk.d.ts:2002-2008` — `PreToolUseHookSpecificOutput` carries
+ * `permissionDecision` ('allow'/'deny'/'ask'/'defer') + reason +
+ * additionalContext for exactly this case.
+ *
+ * Failure mode
+ * ────────────
+ * Never throws. Hash errors, cache errors, anything — degrades to
+ * letting the call through. Telemetry must never block execution.
+ */
+import type { HookCallbackMatcher, HookEvent } from '@anthropic-ai/claude-agent-sdk';
+export interface DedupHookOptions {
+    /** Stable run identifier — used to scope the cache per run. */
+    runId: string;
+    /** How long an identical call is considered "the same" (ms). */
+    ttlMs?: number;
+    /** Override the soft-warn threshold (default 2nd call). */
+    softWarnAt?: number;
+    /** Override the hard-block threshold (default 3rd call). */
+    hardBlockAt?: number;
+    /** Optional callback fired on every dedup decision. */
+    onDecision?: (info: {
+        toolName: string;
+        inputHash: string;
+        callCount: number;
+        decision: 'allow' | 'warn' | 'block';
+        sinceFirstMs: number;
+    }) => void;
+}
+export interface DedupRunStats {
+    /** Total PreToolUse invocations inspected. */
+    inspected: number;
+    /** Calls that were warned (let through with hint). */
+    warned: number;
+    /** Calls that were blocked outright. */
+    blocked: number;
+}
+export interface DedupHookHandles {
+    /** Hook map suitable for SDK `query({ options: { hooks } })`. */
+    hooks: Partial<Record<HookEvent, HookCallbackMatcher[]>>;
+    /** Aggregated telemetry — read after the run completes. */
+    stats: DedupRunStats;
+}
+/**
+ * Compute a stable hash of a tool call's input shape. JSON.stringify
+ * with a sorted-keys replacer so `{a:1,b:2}` and `{b:2,a:1}` collide
+ * (same semantic call); other minor differences (object key order) don't
+ * spuriously evade the dedup.
+ */
+export declare function hashToolInput(input: unknown): string;
+/**
+ * Build a PreToolUse dedup hook for a single runAgent invocation.
+ * Per-run cache (no cross-run state) — short-lived agentic runs don't
+ * need persistence and we don't want stale cache to deny legitimate
+ * post-restart re-polls.
+ */
+export declare function buildDedupHook(opts: DedupHookOptions): DedupHookHandles;
+//# sourceMappingURL=tool-call-dedup.d.ts.map

package/dist/agent/tool-call-dedup.js ADDED Viewed

@@ -0,0 +1,168 @@
+/**
+ * tool-call-dedup — PreToolUse hook that detects same-call loops and
+ * nudges the model to stop re-fetching identical data.
+ *
+ * Why this exists (1.18.173)
+ * ──────────────────────────
+ * The Anthropic SDK's auto-compactor summarizes prior turns when context
+ * approaches the model's window. If the working data lived in those
+ * earlier turns, compaction loses it — and the model often responds by
+ * RE-CALLING the same tool with the same arguments to "re-load" the
+ * data. That refill triggers the next compaction, which loses the
+ * re-loaded data, which triggers another re-call, … and the SDK's
+ * thrashing detector aborts the run after 3 consecutive cycles.
+ *
+ * Real-world example (2026-05-11 imessage-triage 08:00 UTC, run
+ * 839a7d1a-…): four IDENTICAL calls to `get_unread_imessages({limit:20})`
+ * in 115 seconds, one after each compaction. The tool-output-guard from
+ * 1.18.169 didn't fire because each individual response was under the
+ * 30KB cap; the loop was structural, not size-based.
+ *
+ * What this hook does
+ * ───────────────────
+ * On every PreToolUse, hash `(toolName, JSON.stringify(input))` and look
+ * it up in a per-run cache (60s TTL by default).
+ *   • count = 1 (first call): let it through, record.
+ *   • count = 2 (second call within TTL): inject an `additionalContext`
+ *     hint into the next turn saying "you already called this; the
+ *     result hasn't changed; reuse it or change the inputs." Tool still
+ *     executes (the model might have legitimate reasons to re-poll).
+ *   • count = 3+ (third+ identical call): `permissionDecision: 'deny'`
+ *     with a reason that directs the model to either change inputs or
+ *     stop the loop. The model receives a denial result instead of new
+ *     tool data — breaks the refetch-after-compact cycle.
+ *
+ * Aligned with Anthropic SDK best practices: PreToolUse + permission
+ * decisions are the documented mechanism for controlling tool execution
+ * mid-run. `sdk.d.ts:2002-2008` — `PreToolUseHookSpecificOutput` carries
+ * `permissionDecision` ('allow'/'deny'/'ask'/'defer') + reason +
+ * additionalContext for exactly this case.
+ *
+ * Failure mode
+ * ────────────
+ * Never throws. Hash errors, cache errors, anything — degrades to
+ * letting the call through. Telemetry must never block execution.
+ */
+import { createHash } from 'node:crypto';
+import pino from 'pino';
+const logger = pino({ name: 'clementine.tool-call-dedup' });
+// ── Tunables ──────────────────────────────────────────────────────────
+/** Within this window (ms), identical calls are considered "the same". */
+const DEFAULT_TTL_MS = 60_000;
+/** Second identical call within TTL → soft warn (let it through with a hint). */
+const SOFT_WARN_AT = 2;
+/** Third+ identical call within TTL → hard block (deny). */
+const HARD_BLOCK_AT = 3;
+// ── Hashing ───────────────────────────────────────────────────────────
+/**
+ * Compute a stable hash of a tool call's input shape. JSON.stringify
+ * with a sorted-keys replacer so `{a:1,b:2}` and `{b:2,a:1}` collide
+ * (same semantic call); other minor differences (object key order) don't
+ * spuriously evade the dedup.
+ */
+export function hashToolInput(input) {
+    try {
+        const stable = JSON.stringify(input, replaceForStableHash);
+        return createHash('sha256').update(stable).digest('hex').slice(0, 16);
+    }
+    catch {
+        return 'unhashable';
+    }
+}
+function replaceForStableHash(_key, value) {
+    if (value && typeof value === 'object' && !Array.isArray(value)) {
+        const sorted = {};
+        const keys = Object.keys(value).sort();
+        for (const k of keys)
+            sorted[k] = value[k];
+        return sorted;
+    }
+    return value;
+}
+// ── Hook builder ──────────────────────────────────────────────────────
+/**
+ * Build a PreToolUse dedup hook for a single runAgent invocation.
+ * Per-run cache (no cross-run state) — short-lived agentic runs don't
+ * need persistence and we don't want stale cache to deny legitimate
+ * post-restart re-polls.
+ */
+export function buildDedupHook(opts) {
+    const cache = new Map();
+    const ttl = opts.ttlMs ?? DEFAULT_TTL_MS;
+    const softAt = opts.softWarnAt ?? SOFT_WARN_AT;
+    const hardAt = opts.hardBlockAt ?? HARD_BLOCK_AT;
+    const stats = { inspected: 0, warned: 0, blocked: 0 };
+    const preToolUse = async (input) => {
+        if (input.hook_event_name !== 'PreToolUse')
+            return {};
+        const evt = input;
+        const toolName = String(evt.tool_name ?? 'unknown');
+        const inputHash = hashToolInput(evt.tool_input);
+        const key = `${toolName}:${inputHash}`;
+        const now = Date.now();
+        stats.inspected += 1;
+        let entry = cache.get(key);
+        // Treat expired entries as fresh — drop and restart the count.
+        if (entry && now - entry.lastSeen > ttl) {
+            cache.delete(key);
+            entry = undefined;
+        }
+        if (!entry) {
+            cache.set(key, { count: 1, firstSeen: now, lastSeen: now });
+            opts.onDecision?.({ toolName, inputHash, callCount: 1, decision: 'allow', sinceFirstMs: 0 });
+            return {};
+        }
+        entry.count += 1;
+        entry.lastSeen = now;
+        const sinceFirstMs = now - entry.firstSeen;
+        if (entry.count >= hardAt) {
+            stats.blocked += 1;
+            logger.warn({
+                toolName,
+                inputHash,
+                callCount: entry.count,
+                sinceFirstMs,
+                runId: opts.runId,
+            }, 'tool-call-dedup: hard-blocking identical call');
+            opts.onDecision?.({ toolName, inputHash, callCount: entry.count, decision: 'block', sinceFirstMs });
+            return {
+                hookSpecificOutput: {
+                    hookEventName: 'PreToolUse',
+                    permissionDecision: 'deny',
+                    permissionDecisionReason: `Tool \`${toolName}\` was already called with these exact arguments ${entry.count - 1} time(s) in the last ${Math.floor(sinceFirstMs / 1000)}s. ` +
+                        `The result has not changed. STOP re-calling — use the result from your earlier context, ` +
+                        `change the arguments to fetch different data, or finish the task with what you already know. ` +
+                        `If you genuinely need fresh data, wait at least ${Math.ceil(ttl / 1000)}s and try again.`,
+                },
+            };
+        }
+        if (entry.count >= softAt) {
+            stats.warned += 1;
+            logger.info({
+                toolName,
+                inputHash,
+                callCount: entry.count,
+                sinceFirstMs,
+                runId: opts.runId,
+            }, 'tool-call-dedup: warning on repeat call');
+            opts.onDecision?.({ toolName, inputHash, callCount: entry.count, decision: 'warn', sinceFirstMs });
+            return {
+                hookSpecificOutput: {
+                    hookEventName: 'PreToolUse',
+                    additionalContext: `Note: you've already called \`${toolName}\` with these exact arguments ${entry.count - 1} time(s) in the last ${Math.floor(sinceFirstMs / 1000)}s. ` +
+                        `The result will be identical. Consider re-using the prior result rather than letting this call burn turns/budget. ` +
+                        `One more identical re-call will be blocked.`,
+                },
+            };
+        }
+        opts.onDecision?.({ toolName, inputHash, callCount: entry.count, decision: 'allow', sinceFirstMs });
+        return {};
+    };
+    return {
+        hooks: {
+            PreToolUse: [{ hooks: [preToolUse] }],
+        },
+        stats,
+    };
+}
+//# sourceMappingURL=tool-call-dedup.js.map

package/dist/cli/dashboard.js CHANGED Viewed

@@ -23916,7 +23916,7 @@ function openCommandK() {
     { kw: 'home activity',      page: 'home',     tab: 'activity',     label: 'Home · Activity' },
     { kw: 'build workflows workflow builder', page: 'build', tab: 'workflows', label: 'Build · Workflow Builder' },
     { kw: 'build crons schedules scheduled tasks operations automation', page: 'build', tab: 'crons', label: 'Build · Schedules' },
-    { kw: 'build skills',       page: 'build',    tab: 'skills',       label: 'Build · Skills' },
+    { kw: 'build skills skill studio create skill', page: 'skills', tab: '', label: 'Skills · Skill Studio' },
     { kw: 'build templates',    page: 'build',    tab: 'templates',    label: 'Build · Templates' },
     { kw: 'team roster',        page: 'team',     tab: 'roster',       label: 'Team · Roster' },
     { kw: 'team activity',      page: 'team',     tab: 'activity',     label: 'Team · Activity' },
@@ -29970,6 +29970,30 @@ async function sbRunSkillTest() {
   }
 }
+function askSkillCreatorForDescription() {
+  var name = (document.getElementById('skill-modal-name')?.value || '').trim();
+  var title = (document.getElementById('skill-modal-title')?.value || '').trim();
+  var desc = (document.getElementById('skill-modal-desc')?.value || '').trim();
+  var body = (document.getElementById('skill-modal-body')?.value || '').trim();
+  var prompt = [
+    'Use skill-creator principles to help write the frontmatter description for this Clementine skill.',
+    '',
+    'Skill name: ' + (name || '(not set yet)'),
+    'Title: ' + (title || '(not set yet)'),
+    'Current description: ' + (desc || '(empty)'),
+    'Procedure preview:',
+    body ? body.slice(0, 1600) : '(empty)',
+    '',
+    'Return one concise description under 1024 characters. It must say what the skill does, when to use it, and trigger phrases. Do not rewrite the whole skill unless I ask.'
+  ].join('\\n');
+  if (typeof askClementineWith !== 'function') {
+    toast('Chat is not ready yet. Try again after the dashboard finishes loading.', 'error');
+    return;
+  }
+  askClementineWith(prompt, { autoSend: false });
+  toast('Description prompt loaded in chat. Press send when ready.', 'info');
+}
 async function _openSkillModal(opts) {
   opts = opts || {};
   var prefill = opts.mode === 'create' && opts.prefill ? opts.prefill : {};

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clementine-agent",
-  "version": "1.18.172",
+  "version": "1.18.173",
   "description": "Clementine — Personal AI Assistant (TypeScript)",
   "type": "module",
   "main": "dist/index.js",