npm - clementine-agent - Versions diffs - 1.18.41 → 1.18.43 - Mend

clementine-agent 1.18.41 → 1.18.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/agent/agent-definitions.d.ts +43 -0
package/dist/agent/agent-definitions.js +149 -0
package/dist/agent/run-agent.d.ts +96 -0
package/dist/agent/run-agent.js +211 -0
package/dist/cli/dashboard.js +53 -0
package/dist/tools/admin-tools.js +64 -0
package/package.json +1 -1

package/dist/agent/agent-definitions.d.ts ADDED Viewed

@@ -0,0 +1,43 @@
+/**
+ * Clementine TypeScript — AgentDefinition factory.
+ *
+ * The canonical Claude Agent SDK pattern is to pass `agents: { ... }`
+ * to `query()`, where each entry is an `AgentDefinition`. Claude routes
+ * subwork to subagents based on each definition's `description` field.
+ *
+ * Today's Clementine has multiple parallel orchestration paths
+ * (PlanOrchestrator, runUnleashedTask phases, fanout-policy directive,
+ * pre-LLM plan routing). This file is the start of consolidating all
+ * of that into the SDK-native subagent pattern.
+ *
+ * Usage:
+ *   const agents = buildAgentMap({ profileManager, isAutonomous: false });
+ *   query({ prompt, options: { agents, ... } })
+ *
+ * Phase 1 (1.18.43): this file is created but not wired into production
+ * yet. The dashboard's /api/runagent/test endpoint exercises it for
+ * verification before any real migration.
+ */
+import type { AgentDefinition } from '@anthropic-ai/claude-agent-sdk';
+import type { AgentManager } from './agent-manager.js';
+export interface BuildAgentMapOptions {
+    /** Source of hired-agent profiles. When undefined, only the system subagents are returned. */
+    profileManager?: AgentManager;
+    /** When true, restrict the surface to safe-for-cron subagents (no chat-only ones). */
+    isAutonomous?: boolean;
+    /** Active agent slug — when set, hired agents OTHER than this one still get definitions
+     *  but the active one's profile-as-system-prompt is handled by the caller. */
+    activeAgentSlug?: string;
+}
+/**
+ * Build the AgentDefinition map for a runAgent call. Mix of system
+ * subagents (planner, researcher, cron-fixer) and hired-agent profiles.
+ *
+ * The system subagents are intentionally minimal — they exist so Claude
+ * can route specific kinds of work cleanly. Add new ones (per the
+ * migration plan) as we collapse other orchestration paths.
+ */
+export declare function buildAgentMap(opts?: BuildAgentMapOptions): Record<string, AgentDefinition>;
+/** Type guard helper for callers. */
+export declare function hasAgent(map: Record<string, AgentDefinition>, slug: string): boolean;
+//# sourceMappingURL=agent-definitions.d.ts.map

package/dist/agent/agent-definitions.js ADDED Viewed

@@ -0,0 +1,149 @@
+/**
+ * Clementine TypeScript — AgentDefinition factory.
+ *
+ * The canonical Claude Agent SDK pattern is to pass `agents: { ... }`
+ * to `query()`, where each entry is an `AgentDefinition`. Claude routes
+ * subwork to subagents based on each definition's `description` field.
+ *
+ * Today's Clementine has multiple parallel orchestration paths
+ * (PlanOrchestrator, runUnleashedTask phases, fanout-policy directive,
+ * pre-LLM plan routing). This file is the start of consolidating all
+ * of that into the SDK-native subagent pattern.
+ *
+ * Usage:
+ *   const agents = buildAgentMap({ profileManager, isAutonomous: false });
+ *   query({ prompt, options: { agents, ... } })
+ *
+ * Phase 1 (1.18.43): this file is created but not wired into production
+ * yet. The dashboard's /api/runagent/test endpoint exercises it for
+ * verification before any real migration.
+ */
+const PLANNER_PROMPT = [
+    'You are a task planner for Clementine. You receive a multi-step request from the parent agent.',
+    '',
+    'Your job: decompose the request into ATOMIC, parallel-safe steps, then return a JSON plan.',
+    '',
+    'Output ONLY a JSON object (no markdown fences, no prose):',
+    '{',
+    '  "steps": [',
+    '    { "id": "step-1", "description": "...", "subagent": "researcher|cron-fixer|...|null", "prompt": "...", "model": "haiku|sonnet", "dependsOn": [] }',
+    '  ],',
+    '  "synthesisHint": "How the parent should combine step outputs"',
+    '}',
+    '',
+    'Rules:',
+    '- 2-8 steps. Atomic = completes in 5-30 tool calls.',
+    '- MAXIMIZE parallelism: independent steps have empty dependsOn.',
+    '- Pick the right subagent per step:',
+    '  - `researcher` for per-item lookups (1 lead, 1 account, 1 file): model=haiku',
+    '  - `cron-fixer` for diagnose-and-apply on broken cron jobs: model=sonnet',
+    '  - null (parent runs the step) for synthesis or when no specialist fits',
+    '- Each step prompt is SELF-CONTAINED — the sub agent sees no parent history.',
+    '- End each step prompt with "Deliver: <one-line return shape>".',
+].join('\n');
+const RESEARCHER_PROMPT = [
+    'You are a per-item research specialist. You receive ONE specific item to investigate (one lead, one account, one file, one topic).',
+    '',
+    'Use your bounded tools to gather the requested information. Return a ONE-PARAGRAPH summary in the format the parent specified.',
+    '',
+    'NEVER return raw tool output, full lists, or unbounded data. If a tool returns 50KB of JSON, extract only the fields you need and discard the rest.',
+    '',
+    'If you cannot find the requested data, say so in one line. Do not speculate.',
+].join('\n');
+const CRON_FIXER_PROMPT = [
+    'You are the cron-fix specialist. You diagnose and apply fixes to broken cron jobs.',
+    '',
+    'Workflow:',
+    '1. Call `list_broken_jobs` to see what is currently broken with their cached diagnoses.',
+    '2. For each job the user/parent asked about, check the proposed fix:',
+    '   - confidence=high + risk=low + autoApply=true → call `apply_broken_job_fix`.',
+    '   - Otherwise → describe the diagnosis and ask the parent for explicit approval.',
+    '3. After applying a fix, the verification system auto-rolls-back if the next 3 runs do not improve. You do NOT need to monitor manually.',
+    '',
+    'Return: a one-paragraph summary of what you applied (or what is blocking apply), per job.',
+].join('\n');
+/** Map a hired-agent profile to an AgentDefinition.
+ *  Used when Clementine wants to delegate to Ross/Sasha/Nora etc. */
+function profileToAgentDefinition(p) {
+    return {
+        description: p.description ?? `${p.name} (hired agent: ${p.slug})`,
+        prompt: p.systemPromptBody ?? `You are ${p.name}.`,
+        // Honor explicit allowlist when present; otherwise inherit from parent.
+        ...(p.team?.allowedTools?.length ? { tools: p.team.allowedTools } : {}),
+        // Hired agents keep their configured model (Sonnet by default).
+        ...(p.model ? { model: p.model } : { model: 'sonnet' }),
+        // Effort: hired agents do real work, default medium. Caller can override.
+        effort: 'medium',
+    };
+}
+/**
+ * Build the AgentDefinition map for a runAgent call. Mix of system
+ * subagents (planner, researcher, cron-fixer) and hired-agent profiles.
+ *
+ * The system subagents are intentionally minimal — they exist so Claude
+ * can route specific kinds of work cleanly. Add new ones (per the
+ * migration plan) as we collapse other orchestration paths.
+ */
+export function buildAgentMap(opts = {}) {
+    const map = {};
+    // ── System subagents ────────────────────────────────────────────
+    // Planner: opus, no tools, single turn. Used when the parent agent
+    // sees a multi-step request and wants a decomposition.
+    map['planner'] = {
+        description: 'Decompose a multi-step user request into atomic, parallel-safe steps. Use for "research these N items", "build a comprehensive X", "for each Y do Z", or any request that obviously involves multiple distinct sub-tasks. Returns a JSON plan; the parent then executes the steps (often by spawning more subagents per step).',
+        prompt: PLANNER_PROMPT,
+        model: 'opus',
+        tools: [], // pure reasoning, no tools
+        effort: 'high',
+        maxTurns: 1,
+    };
+    // Researcher: haiku, per-item investigation. Cheap fan-out target.
+    map['researcher'] = {
+        description: 'Investigate ONE specific item (one lead, one account, one file, one topic) and return a one-paragraph summary. Use for per-item parallel work spawned by the planner. Cheap and fast.',
+        prompt: RESEARCHER_PROMPT,
+        model: 'haiku',
+        tools: ['Read', 'Grep', 'Glob', 'Bash', 'WebSearch', 'WebFetch'],
+        effort: 'low',
+        maxTurns: 15,
+    };
+    // Cron-fixer: sonnet, owns the broken-job diagnose+apply path.
+    // Tools restricted to the canonical fix path (no parallel mechanisms).
+    map['cron-fixer'] = {
+        description: 'Diagnose and apply fixes to broken cron jobs. Use when the user says "fix X" referring to a job, asks "what jobs are failing", or asks to re-run/repair a cron. Owns the canonical diagnosis-to-apply flow.',
+        prompt: CRON_FIXER_PROMPT,
+        model: 'sonnet',
+        tools: [
+            'mcp__clementine-tools__list_broken_jobs',
+            'mcp__clementine-tools__apply_broken_job_fix',
+            'mcp__clementine-tools__cron_list',
+            'mcp__clementine-tools__cron_run_history',
+            'Read',
+            'Grep',
+        ],
+        effort: 'medium',
+        maxTurns: 10,
+    };
+    // ── Hired-agent profiles ────────────────────────────────────────
+    // Each becomes a subagent the main agent can delegate to.
+    // The "main" agent for a DM-to-bot session is set by the caller
+    // (still uses the profile's identity); these definitions cover the
+    // case where Clementine wants to invoke them mid-conversation.
+    if (opts.profileManager) {
+        const profiles = opts.profileManager.listAll();
+        for (const profile of profiles) {
+            // Skip clementine herself (she's the main agent, not a subagent)
+            if (profile.slug === 'clementine')
+                continue;
+            // Skip the active agent (don't make them their own subagent)
+            if (opts.activeAgentSlug && profile.slug === opts.activeAgentSlug)
+                continue;
+            map[profile.slug] = profileToAgentDefinition(profile);
+        }
+    }
+    return map;
+}
+/** Type guard helper for callers. */
+export function hasAgent(map, slug) {
+    return Object.prototype.hasOwnProperty.call(map, slug);
+}
+//# sourceMappingURL=agent-definitions.js.map

package/dist/agent/run-agent.d.ts ADDED Viewed

@@ -0,0 +1,96 @@
+/**
+ * Clementine TypeScript — runAgent: canonical Claude Agent SDK wrapper.
+ *
+ * Phase 1 of the SDK-canonical migration (see
+ * /Users/nathan.reynolds/.claude/plans/sdk-canonical-migration.md).
+ *
+ * This is the new code path that will eventually replace runCronJob /
+ * runUnleashedTask / runHeartbeat / runTeamTask / chat. For now it
+ * runs in PARALLEL with those — only the dashboard's
+ * /api/runagent/test endpoint exercises it. Production traffic still
+ * uses legacy paths until Phase 2.
+ *
+ * Design principles (from the SDK docs):
+ * 1. ONE query() call — no nested phase wrappers.
+ * 2. Subagents via the `agents` param — not via prompt-injected
+ *    fanout directives.
+ * 3. SDK handles: agent loop, compaction, tool execution, parallel
+ *    sub-spawning, prompt caching, session resume.
+ * 4. App handles: prompt + options assembly, transcript mirroring,
+ *    cost logging, channel delivery.
+ * 5. NO context-thrash recovery, NO manual session rotation, NO
+ *    long-task preflight, NO mode=unleashed wrapper.
+ */
+import { type AgentDefinition } from '@anthropic-ai/claude-agent-sdk';
+import type { AgentProfile } from '../types.js';
+import type { AgentManager } from './agent-manager.js';
+import type { MemoryStore } from '../memory/store.js';
+export interface RunAgentOptions {
+    /** Stable session key for this conversation/run. Used for transcript mirroring + resume. */
+    sessionKey: string;
+    /** Source classification for telemetry: 'chat' | 'cron' | 'heartbeat' | 'team-task' | 'test'. */
+    source: string;
+    /** Optional hired-agent profile. When set, this profile becomes the MAIN
+     *  agent (its system prompt is appended). When unset, Clementine is the main agent. */
+    profile?: AgentProfile | null;
+    /** Optional subagent slug to invoke explicitly (bypasses Claude's automatic routing).
+     *  When set, the prompt is wrapped to direct Claude to use this subagent first. */
+    forceSubagent?: string | null;
+    /** Hired-agent registry — used to construct the AgentDefinition map for delegation. */
+    agentManager?: AgentManager | null;
+    /** Memory store for transcript mirroring + cost logging. */
+    memoryStore?: MemoryStore | null;
+    /** Optional model override. Defaults to SDK default (Sonnet) unless profile sets one. */
+    model?: string;
+    /** Reasoning effort. Defaults vary by source: chat='medium', cron='medium', heartbeat='low'. */
+    effort?: 'low' | 'medium' | 'high' | 'xhigh' | 'max';
+    /** Hard budget cap (USD). Default varies by source. SDK aborts the run when hit. */
+    maxBudgetUsd?: number;
+    /** Hard turn cap. Default: no cap (SDK runs until done). */
+    maxTurns?: number;
+    /** Optional resume — when set, the SDK continues from the prior session. */
+    resumeSessionId?: string;
+    /** Streaming callback for partial assistant text. Best-effort. */
+    onText?: (chunk: string) => void | Promise<void>;
+    /** Streaming callback when a tool is invoked (name + input). Best-effort. */
+    onToolActivity?: (info: {
+        tool: string;
+        input: Record<string, unknown>;
+    }) => void | Promise<void>;
+    /** Abort signal — when triggered, the SDK stream is cancelled. */
+    abortSignal?: AbortSignal;
+    /** Optional override of the AgentDefinition map. Mostly for tests. */
+    agents?: Record<string, AgentDefinition>;
+    /** Optional explicit allowedTools list. When unset, falls back to a sensible default
+     *  including Agent (so subagents can be spawned) + core SDK tools + Clementine MCP. */
+    allowedTools?: string[];
+    /** Optional CLAUDE.md / project setting source. Defaults to ['project']. */
+    settingSources?: ('project' | 'user' | 'local')[];
+}
+export interface RunAgentResult {
+    /** Final text response from the agent. */
+    text: string;
+    /** Total cost in USD as reported by the SDK. */
+    totalCostUsd: number;
+    /** Number of agentic turns the loop took. */
+    numTurns: number;
+    /** SDK session ID — capture for resume. */
+    sessionId: string;
+    /** Final stop reason from the SDK (success, error_max_turns, error_max_budget_usd, etc). */
+    subtype: string;
+    /** Token usage breakdown (input, output, cache). */
+    usage?: {
+        input_tokens?: number;
+        output_tokens?: number;
+        cache_read_input_tokens?: number;
+        cache_creation_input_tokens?: number;
+    };
+}
+/**
+ * Run a single agent invocation via the canonical SDK pattern.
+ *
+ * Returns when the SDK loop completes (final assistant message with no
+ * tool calls, OR maxTurns/maxBudget hit, OR error).
+ */
+export declare function runAgent(prompt: string, opts: RunAgentOptions): Promise<RunAgentResult>;
+//# sourceMappingURL=run-agent.d.ts.map

package/dist/agent/run-agent.js ADDED Viewed

@@ -0,0 +1,211 @@
+/**
+ * Clementine TypeScript — runAgent: canonical Claude Agent SDK wrapper.
+ *
+ * Phase 1 of the SDK-canonical migration (see
+ * /Users/nathan.reynolds/.claude/plans/sdk-canonical-migration.md).
+ *
+ * This is the new code path that will eventually replace runCronJob /
+ * runUnleashedTask / runHeartbeat / runTeamTask / chat. For now it
+ * runs in PARALLEL with those — only the dashboard's
+ * /api/runagent/test endpoint exercises it. Production traffic still
+ * uses legacy paths until Phase 2.
+ *
+ * Design principles (from the SDK docs):
+ * 1. ONE query() call — no nested phase wrappers.
+ * 2. Subagents via the `agents` param — not via prompt-injected
+ *    fanout directives.
+ * 3. SDK handles: agent loop, compaction, tool execution, parallel
+ *    sub-spawning, prompt caching, session resume.
+ * 4. App handles: prompt + options assembly, transcript mirroring,
+ *    cost logging, channel delivery.
+ * 5. NO context-thrash recovery, NO manual session rotation, NO
+ *    long-task preflight, NO mode=unleashed wrapper.
+ */
+import { query } from '@anthropic-ai/claude-agent-sdk';
+import pino from 'pino';
+import { BASE_DIR, normalizeClaudeSdkOptionsForOneMillionContext } from '../config.js';
+import { buildAgentMap } from './agent-definitions.js';
+const logger = pino({ name: 'clementine.run-agent' });
+const DEFAULT_BUDGETS = {
+    chat: 0.50,
+    cron: 1.00,
+    heartbeat: 0.25,
+    'team-task': 1.00,
+    test: 2.00,
+};
+const DEFAULT_EFFORTS = {
+    chat: 'medium',
+    cron: 'medium',
+    heartbeat: 'low',
+    'team-task': 'medium',
+    test: 'medium',
+};
+const CORE_TOOLS_FOR_AGENT_PARENT = [
+    'Agent', // REQUIRED — without this, subagents can't be invoked
+    'Read',
+    'Write',
+    'Edit',
+    'Glob',
+    'Grep',
+    'Bash',
+    'WebSearch',
+    'WebFetch',
+    'TodoWrite',
+];
+/**
+ * Run a single agent invocation via the canonical SDK pattern.
+ *
+ * Returns when the SDK loop completes (final assistant message with no
+ * tool calls, OR maxTurns/maxBudget hit, OR error).
+ */
+export async function runAgent(prompt, opts) {
+    const source = opts.source ?? 'chat';
+    const effort = opts.effort ?? DEFAULT_EFFORTS[source] ?? 'medium';
+    const maxBudgetUsd = opts.maxBudgetUsd ?? DEFAULT_BUDGETS[source] ?? 0.50;
+    const startedAt = Date.now();
+    // Build the AgentDefinition map. Caller can override; otherwise we
+    // use the standard system subagents + hired-agent profiles.
+    const agents = opts.agents ?? buildAgentMap({
+        profileManager: opts.agentManager ?? undefined,
+        isAutonomous: source === 'cron' || source === 'heartbeat',
+        activeAgentSlug: opts.profile?.slug,
+    });
+    // Wrap prompt to direct Claude to a specific subagent when caller asks.
+    // Per SDK docs: explicit invocation = "Use the X agent to..."
+    const effectivePrompt = opts.forceSubagent && agents[opts.forceSubagent]
+        ? `Use the ${opts.forceSubagent} agent to handle this request:\n\n${prompt}`
+        : prompt;
+    // Compose system prompt. When a hired-agent profile is active, that
+    // becomes the main agent's identity — append to the claude_code preset.
+    const profileAppend = opts.profile?.systemPromptBody
+        ? opts.profile.systemPromptBody
+        : undefined;
+    // Allowed tools. Default to core + Clementine MCP. Per-subagent tool
+    // restrictions live on each AgentDefinition.tools field.
+    const allowedTools = opts.allowedTools ?? CORE_TOOLS_FOR_AGENT_PARENT;
+    // Apply 1M-context env normalization (existing infra)
+    const sdkOptionsRaw = {
+        systemPrompt: profileAppend
+            ? { type: 'preset', preset: 'claude_code', append: profileAppend }
+            : { type: 'preset', preset: 'claude_code' },
+        settingSources: opts.settingSources ?? ['project'],
+        agents,
+        allowedTools,
+        permissionMode: 'bypassPermissions',
+        cwd: BASE_DIR,
+        maxBudgetUsd,
+        effort,
+        ...(opts.maxTurns ? { maxTurns: opts.maxTurns } : {}),
+        ...(opts.model ? { model: opts.model } : {}),
+        ...(opts.resumeSessionId ? { resume: opts.resumeSessionId } : {}),
+        ...(opts.abortSignal ? { abortController: { signal: opts.abortSignal } } : {}),
+    };
+    const sdkOptions = normalizeClaudeSdkOptionsForOneMillionContext(sdkOptionsRaw);
+    logger.info({
+        sessionKey: opts.sessionKey,
+        source,
+        profile: opts.profile?.slug,
+        forceSubagent: opts.forceSubagent,
+        effort,
+        maxBudgetUsd,
+        agentCount: Object.keys(agents).length,
+        allowedToolCount: allowedTools.length,
+    }, 'runAgent: starting query');
+    let finalText = '';
+    let sessionId = '';
+    let totalCostUsd = 0;
+    let numTurns = 0;
+    let subtype = 'unknown';
+    let usage;
+    const stream = query({ prompt: effectivePrompt, options: sdkOptions });
+    for await (const message of stream) {
+        if (message.type === 'system' && message.subtype === 'init') {
+            sessionId = message.session_id ?? '';
+            logger.debug({ sessionKey: opts.sessionKey, sdkSessionId: sessionId }, 'runAgent: SDK session initialized');
+            continue;
+        }
+        if (message.type === 'assistant') {
+            const am = message;
+            const blocks = (am.message?.content ?? []);
+            for (const block of blocks) {
+                if (block.type === 'text' && typeof block.text === 'string') {
+                    finalText += block.text;
+                    if (opts.onText) {
+                        try {
+                            await opts.onText(block.text);
+                        }
+                        catch { /* streaming is best-effort */ }
+                    }
+                }
+                else if (block.type === 'tool_use' && typeof block.name === 'string') {
+                    if (opts.onToolActivity) {
+                        try {
+                            await opts.onToolActivity({ tool: block.name, input: block.input ?? {} });
+                        }
+                        catch { /* best-effort */ }
+                    }
+                }
+            }
+            continue;
+        }
+        if (message.type === 'result') {
+            const result = message;
+            sessionId = sessionId || (result.session_id ?? '');
+            subtype = result.subtype ?? 'unknown';
+            numTurns = result.num_turns ?? numTurns;
+            totalCostUsd = result.total_cost_usd ?? 0;
+            const u = result.usage;
+            if (u)
+                usage = u;
+            if (subtype === 'success') {
+                // success carries `result` field with the final text.
+                const r = result.result;
+                if (r)
+                    finalText = r;
+            }
+            // Mirror cost to usage_log. Same shape as the existing
+            // logQueryResult, but standalone so we don't depend on
+            // PersonalAssistant's instance state.
+            const modelUsage = result.modelUsage;
+            if (opts.memoryStore && modelUsage) {
+                try {
+                    opts.memoryStore.logUsage({
+                        sessionKey: `${source}:${opts.sessionKey}`,
+                        source: `runagent.${source}`,
+                        modelUsage,
+                        numTurns,
+                        durationMs: Date.now() - startedAt,
+                        agentSlug: opts.profile?.slug,
+                        totalCostUsd: totalCostUsd,
+                    });
+                }
+                catch (err) {
+                    logger.debug({ err }, 'runAgent: usage logging failed (non-fatal)');
+                }
+            }
+            continue;
+        }
+        // Other message types (UserMessage with tool_result, StreamEvent,
+        // SDKCompactBoundaryMessage) — observed but not acted on. The SDK
+        // handles compaction internally; we just let it run.
+    }
+    logger.info({
+        sessionKey: opts.sessionKey,
+        source,
+        sdkSessionId: sessionId,
+        subtype,
+        numTurns,
+        totalCostUsd: Number(totalCostUsd.toFixed(4)),
+        durationMs: Date.now() - startedAt,
+        finalTextChars: finalText.length,
+    }, 'runAgent: query complete');
+    return {
+        text: finalText,
+        totalCostUsd,
+        numTurns,
+        sessionId,
+        subtype,
+        ...(usage ? { usage } : {}),
+    };
+}
+//# sourceMappingURL=run-agent.js.map

package/dist/cli/dashboard.js CHANGED Viewed

@@ -5428,6 +5428,59 @@ If the tool returns nothing or errors, return an empty array \`[]\`.`,
             res.status(500).json({ error: String(err) });
         }
     });
+    // ── runAgent test endpoint (Phase 1 of SDK-canonical migration) ──────
+    //
+    // POST /api/runagent/test
+    //   body: { prompt, agentSlug?, forceSubagent?, model?, effort?, maxBudgetUsd?, source? }
+    //
+    // Lightweight endpoint to verify the new canonical SDK call path
+    // without rerouting any production traffic. Owner-only.
+    // Migration plan: /Users/nathan.reynolds/.claude/plans/sdk-canonical-migration.md
+    app.post('/api/runagent/test', async (req, res) => {
+        const { prompt, agentSlug, forceSubagent, model, effort, maxBudgetUsd, source } = req.body ?? {};
+        if (!prompt || typeof prompt !== 'string') {
+            res.status(400).json({ error: 'prompt is required' });
+            return;
+        }
+        try {
+            const gw = await getGateway();
+            const agentMgr = gw.getAgentManager();
+            const profile = agentSlug ? agentMgr.get(agentSlug) ?? null : null;
+            const memoryStore = gw.assistant.getMemoryStore?.();
+            const { runAgent } = await import('../agent/run-agent.js');
+            const startedAt = Date.now();
+            const toolActivity = [];
+            const result = await runAgent(prompt, {
+                sessionKey: `dashboard:runagent-test:${Date.now()}`,
+                source: typeof source === 'string' ? source : 'test',
+                profile,
+                forceSubagent: typeof forceSubagent === 'string' ? forceSubagent : null,
+                agentManager: agentMgr,
+                memoryStore: memoryStore,
+                model: typeof model === 'string' ? model : undefined,
+                effort: typeof effort === 'string' ? effort : undefined,
+                maxBudgetUsd: typeof maxBudgetUsd === 'number' ? maxBudgetUsd : undefined,
+                onToolActivity: ({ tool, input }) => {
+                    toolActivity.push({ tool, inputPreview: JSON.stringify(input).slice(0, 200) });
+                },
+            });
+            res.json({
+                ok: true,
+                text: result.text,
+                sessionId: result.sessionId,
+                subtype: result.subtype,
+                numTurns: result.numTurns,
+                totalCostUsd: Number(result.totalCostUsd.toFixed(4)),
+                durationMs: Date.now() - startedAt,
+                toolCallCount: toolActivity.length,
+                toolActivity: toolActivity.slice(0, 50), // cap for sanity
+                usage: result.usage,
+            });
+        }
+        catch (err) {
+            res.status(500).json({ error: String(err) });
+        }
+    });
     /** Dismiss a diagnosis without applying — clears the cached result. */
     app.post('/api/cron/broken-jobs/:jobName/dismiss-diagnosis', async (req, res) => {
         try {

package/dist/tools/admin-tools.js CHANGED Viewed

@@ -1881,5 +1881,69 @@ export function registerAdminTools(server) {
         const result = await runConnectNonInteractive({ allowQuitChrome: !!force_quit });
         return textResult(result.message);
     });
+    // ── Broken-job diagnosis + fix-application (chat-equivalent of dashboard buttons) ──
+    //
+    // Before this, when the user asked "fix audit-inbox-check" in chat,
+    // Clementine could read run logs and describe the failure but had no
+    // tool to actually APPLY the stored fix — so she'd just keep returning
+    // the same diagnosis text on every retry. The dashboard had the
+    // "Apply Fix" button; the agent had nothing equivalent. These two
+    // tools close that gap.
+    server.tool('list_broken_jobs', 'List cron jobs that are currently failing repeatedly, with their cached diagnosis (if any) and whether each has an auto-applicable fix proposal. Use this when the user asks "what\'s broken?" or "what jobs are failing?" — it surfaces the same data the dashboard\'s broken-jobs panel shows.', {}, async () => {
+        const { computeBrokenJobs } = await import('../gateway/failure-monitor.js');
+        const { getDiagnosisIfFresh } = await import('../gateway/failure-diagnostics.js');
+        const broken = computeBrokenJobs();
+        if (broken.length === 0) {
+            return textResult('No cron jobs are currently flagged as broken.');
+        }
+        const lines = [`${broken.length} cron job${broken.length === 1 ? '' : 's'} flagged as broken:`];
+        for (const b of broken) {
+            const d = getDiagnosisIfFresh(b.jobName);
+            const fix = d?.proposedFix;
+            const autoApplyAvailable = !!fix?.autoApply && d?.riskLevel === 'low';
+            lines.push(`\n• \`${b.jobName}\``, `  failures last 48h: ${b.errorCount48h}/${b.totalRuns48h}`, b.lastErrors[0] ? `  last error: ${String(b.lastErrors[0]).slice(0, 200)}` : '', d ? `  diagnosis: ${d.rootCause?.slice(0, 200) ?? "(no root cause)"}` : '  diagnosis: pending — wait for next failure-monitor sweep', d ? `  proposed fix: type=${fix?.type ?? 'unknown'} confidence=${d.confidence ?? 'unknown'} risk=${d.riskLevel ?? 'unknown'}` : '', autoApplyAvailable
+                ? `  ✓ auto-applicable — call apply_broken_job_fix with jobName="${b.jobName}"`
+                : '  ✗ not auto-applicable — manual review or dashboard intervention needed');
+        }
+        return textResult(lines.filter(Boolean).join('\n'));
+    });
+    server.tool('apply_broken_job_fix', 'Apply the cached auto-applicable fix for a broken cron job. Use this when the user explicitly asks to "fix" a job that has a confirmed diagnosis with autoApply=true and risk=low. Pass dryRun=true to preview without writing. Returns the applied operations, or refuses with a clear reason when the diagnosis is missing/risky/non-auto-applicable.', {
+        jobName: z.string().describe('The job name as shown in CRON.md or list_broken_jobs output (e.g. "audit-inbox-check" or "ross-the-sdr:reply-detection").'),
+        dryRun: z.boolean().optional().describe('If true, validate + show what would change but do not write. Default false.'),
+    }, async ({ jobName, dryRun }) => {
+        const { getDiagnosisIfFresh, clearDiagnosis } = await import('../gateway/failure-diagnostics.js');
+        const { applyFix } = await import('../gateway/fix-applier.js');
+        const d = getDiagnosisIfFresh(jobName);
+        if (!d) {
+            return textResult(`No fresh diagnosis for \`${jobName}\`. The failure-monitor sweep hasn't produced one yet, ` +
+                `or the diagnosis expired. Wait for the next sweep, or dig into ~/.clementine/cron/runs/${jobName}.jsonl ` +
+                `and the run-trace files for the actual error.`);
+        }
+        if (!d.proposedFix?.autoApply) {
+            return textResult(`Diagnosis for \`${jobName}\` has no auto-applicable operations. ` +
+                `Type: ${d.proposedFix?.type ?? 'unknown'}. ` +
+                `This usually means the fix needs manual review — surface the diagnosis to the owner ` +
+                `(${d.rootCause ?? "(no root cause)"}) instead of attempting auto-fix.`);
+        }
+        if (d.riskLevel !== 'low') {
+            return textResult(`Diagnosis for \`${jobName}\` has riskLevel=${d.riskLevel}. ` +
+                `Auto-apply is gated to risk=low only. ` +
+                `Show the proposed fix to the owner for explicit approval.`);
+        }
+        const isDryRun = dryRun === true;
+        const result = applyFix(jobName, d.proposedFix.autoApply, { dryRun: isDryRun });
+        if (result.ok && !isDryRun)
+            clearDiagnosis(jobName);
+        if (!result.ok) {
+            return textResult(`Apply failed for \`${jobName}\`: ${'error' in result ? result.error : 'unknown error'}`);
+        }
+        const opsCount = 'operations' in result ? result.operations.length : 0;
+        return textResult(isDryRun
+            ? `[DRY RUN] Would apply ${opsCount} operation${opsCount === 1 ? '' : 's'} to fix \`${jobName}\`. ` +
+                `Root cause: ${d.rootCause?.slice(0, 200) ?? ""}. Re-run without dryRun to commit.`
+            : `Applied fix for \`${jobName}\` (${opsCount} operation${opsCount === 1 ? '' : 's'}). ` +
+                `The fix-verification tracker will roll it back automatically if the next runs don't improve. ` +
+                `Root cause: ${d.rootCause?.slice(0, 200) ?? ""}.`);
+    });
 }
 //# sourceMappingURL=admin-tools.js.map

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clementine-agent",
-  "version": "1.18.41",
+  "version": "1.18.43",
   "description": "Clementine — Personal AI Assistant (TypeScript)",
   "type": "module",
   "main": "dist/index.js",