npm - clementine-agent - Versions diffs - 1.18.42 → 1.18.44 - Mend

clementine-agent 1.18.42 → 1.18.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/agent/agent-definitions.d.ts +43 -0
package/dist/agent/agent-definitions.js +149 -0
package/dist/agent/run-agent.d.ts +96 -0
package/dist/agent/run-agent.js +267 -0
package/dist/cli/dashboard.js +55 -1
package/dist/tools/admin-tools.js +8 -141
package/package.json +1 -1

package/dist/agent/agent-definitions.d.ts ADDED Viewed

@@ -0,0 +1,43 @@
+/**
+ * Clementine TypeScript — AgentDefinition factory.
+ *
+ * The canonical Claude Agent SDK pattern is to pass `agents: { ... }`
+ * to `query()`, where each entry is an `AgentDefinition`. Claude routes
+ * subwork to subagents based on each definition's `description` field.
+ *
+ * Today's Clementine has multiple parallel orchestration paths
+ * (PlanOrchestrator, runUnleashedTask phases, fanout-policy directive,
+ * pre-LLM plan routing). This file is the start of consolidating all
+ * of that into the SDK-native subagent pattern.
+ *
+ * Usage:
+ *   const agents = buildAgentMap({ profileManager, isAutonomous: false });
+ *   query({ prompt, options: { agents, ... } })
+ *
+ * Phase 1 (1.18.43): this file is created but not wired into production
+ * yet. The dashboard's /api/runagent/test endpoint exercises it for
+ * verification before any real migration.
+ */
+import type { AgentDefinition } from '@anthropic-ai/claude-agent-sdk';
+import type { AgentManager } from './agent-manager.js';
+export interface BuildAgentMapOptions {
+    /** Source of hired-agent profiles. When undefined, only the system subagents are returned. */
+    profileManager?: AgentManager;
+    /** When true, restrict the surface to safe-for-cron subagents (no chat-only ones). */
+    isAutonomous?: boolean;
+    /** Active agent slug — when set, hired agents OTHER than this one still get definitions
+     *  but the active one's profile-as-system-prompt is handled by the caller. */
+    activeAgentSlug?: string;
+}
+/**
+ * Build the AgentDefinition map for a runAgent call. Mix of system
+ * subagents (planner, researcher, cron-fixer) and hired-agent profiles.
+ *
+ * The system subagents are intentionally minimal — they exist so Claude
+ * can route specific kinds of work cleanly. Add new ones (per the
+ * migration plan) as we collapse other orchestration paths.
+ */
+export declare function buildAgentMap(opts?: BuildAgentMapOptions): Record<string, AgentDefinition>;
+/** Type guard helper for callers. */
+export declare function hasAgent(map: Record<string, AgentDefinition>, slug: string): boolean;
+//# sourceMappingURL=agent-definitions.d.ts.map

package/dist/agent/agent-definitions.js ADDED Viewed

@@ -0,0 +1,149 @@
+/**
+ * Clementine TypeScript — AgentDefinition factory.
+ *
+ * The canonical Claude Agent SDK pattern is to pass `agents: { ... }`
+ * to `query()`, where each entry is an `AgentDefinition`. Claude routes
+ * subwork to subagents based on each definition's `description` field.
+ *
+ * Today's Clementine has multiple parallel orchestration paths
+ * (PlanOrchestrator, runUnleashedTask phases, fanout-policy directive,
+ * pre-LLM plan routing). This file is the start of consolidating all
+ * of that into the SDK-native subagent pattern.
+ *
+ * Usage:
+ *   const agents = buildAgentMap({ profileManager, isAutonomous: false });
+ *   query({ prompt, options: { agents, ... } })
+ *
+ * Phase 1 (1.18.43): this file is created but not wired into production
+ * yet. The dashboard's /api/runagent/test endpoint exercises it for
+ * verification before any real migration.
+ */
+const PLANNER_PROMPT = [
+    'You are a task planner for Clementine. You receive a multi-step request from the parent agent.',
+    '',
+    'Your job: decompose the request into ATOMIC, parallel-safe steps, then return a JSON plan.',
+    '',
+    'Output ONLY a JSON object (no markdown fences, no prose):',
+    '{',
+    '  "steps": [',
+    '    { "id": "step-1", "description": "...", "subagent": "researcher|cron-fixer|...|null", "prompt": "...", "model": "haiku|sonnet", "dependsOn": [] }',
+    '  ],',
+    '  "synthesisHint": "How the parent should combine step outputs"',
+    '}',
+    '',
+    'Rules:',
+    '- 2-8 steps. Atomic = completes in 5-30 tool calls.',
+    '- MAXIMIZE parallelism: independent steps have empty dependsOn.',
+    '- Pick the right subagent per step:',
+    '  - `researcher` for per-item lookups (1 lead, 1 account, 1 file): model=haiku',
+    '  - `cron-fixer` for diagnose-and-apply on broken cron jobs: model=sonnet',
+    '  - null (parent runs the step) for synthesis or when no specialist fits',
+    '- Each step prompt is SELF-CONTAINED — the sub agent sees no parent history.',
+    '- End each step prompt with "Deliver: <one-line return shape>".',
+].join('\n');
+const RESEARCHER_PROMPT = [
+    'You are a per-item research specialist. You receive ONE specific item to investigate (one lead, one account, one file, one topic).',
+    '',
+    'Use your bounded tools to gather the requested information. Return a ONE-PARAGRAPH summary in the format the parent specified.',
+    '',
+    'NEVER return raw tool output, full lists, or unbounded data. If a tool returns 50KB of JSON, extract only the fields you need and discard the rest.',
+    '',
+    'If you cannot find the requested data, say so in one line. Do not speculate.',
+].join('\n');
+const CRON_FIXER_PROMPT = [
+    'You are the cron-fix specialist. You diagnose and apply fixes to broken cron jobs.',
+    '',
+    'Workflow:',
+    '1. Call `list_broken_jobs` to see what is currently broken with their cached diagnoses.',
+    '2. For each job the user/parent asked about, check the proposed fix:',
+    '   - confidence=high + risk=low + autoApply=true → call `apply_broken_job_fix`.',
+    '   - Otherwise → describe the diagnosis and ask the parent for explicit approval.',
+    '3. After applying a fix, the verification system auto-rolls-back if the next 3 runs do not improve. You do NOT need to monitor manually.',
+    '',
+    'Return: a one-paragraph summary of what you applied (or what is blocking apply), per job.',
+].join('\n');
+/** Map a hired-agent profile to an AgentDefinition.
+ *  Used when Clementine wants to delegate to Ross/Sasha/Nora etc. */
+function profileToAgentDefinition(p) {
+    return {
+        description: p.description ?? `${p.name} (hired agent: ${p.slug})`,
+        prompt: p.systemPromptBody ?? `You are ${p.name}.`,
+        // Honor explicit allowlist when present; otherwise inherit from parent.
+        ...(p.team?.allowedTools?.length ? { tools: p.team.allowedTools } : {}),
+        // Hired agents keep their configured model (Sonnet by default).
+        ...(p.model ? { model: p.model } : { model: 'sonnet' }),
+        // Effort: hired agents do real work, default medium. Caller can override.
+        effort: 'medium',
+    };
+}
+/**
+ * Build the AgentDefinition map for a runAgent call. Mix of system
+ * subagents (planner, researcher, cron-fixer) and hired-agent profiles.
+ *
+ * The system subagents are intentionally minimal — they exist so Claude
+ * can route specific kinds of work cleanly. Add new ones (per the
+ * migration plan) as we collapse other orchestration paths.
+ */
+export function buildAgentMap(opts = {}) {
+    const map = {};
+    // ── System subagents ────────────────────────────────────────────
+    // Planner: opus, no tools, single turn. Used when the parent agent
+    // sees a multi-step request and wants a decomposition.
+    map['planner'] = {
+        description: 'Decompose a multi-step user request into atomic, parallel-safe steps. Use for "research these N items", "build a comprehensive X", "for each Y do Z", or any request that obviously involves multiple distinct sub-tasks. Returns a JSON plan; the parent then executes the steps (often by spawning more subagents per step).',
+        prompt: PLANNER_PROMPT,
+        model: 'opus',
+        tools: [], // pure reasoning, no tools
+        effort: 'high',
+        maxTurns: 1,
+    };
+    // Researcher: haiku, per-item investigation. Cheap fan-out target.
+    map['researcher'] = {
+        description: 'Investigate ONE specific item (one lead, one account, one file, one topic) and return a one-paragraph summary. Use for per-item parallel work spawned by the planner. Cheap and fast.',
+        prompt: RESEARCHER_PROMPT,
+        model: 'haiku',
+        tools: ['Read', 'Grep', 'Glob', 'Bash', 'WebSearch', 'WebFetch'],
+        effort: 'low',
+        maxTurns: 15,
+    };
+    // Cron-fixer: sonnet, owns the broken-job diagnose+apply path.
+    // Tools restricted to the canonical fix path (no parallel mechanisms).
+    map['cron-fixer'] = {
+        description: 'Diagnose and apply fixes to broken cron jobs. Use when the user says "fix X" referring to a job, asks "what jobs are failing", or asks to re-run/repair a cron. Owns the canonical diagnosis-to-apply flow.',
+        prompt: CRON_FIXER_PROMPT,
+        model: 'sonnet',
+        tools: [
+            'mcp__clementine-tools__list_broken_jobs',
+            'mcp__clementine-tools__apply_broken_job_fix',
+            'mcp__clementine-tools__cron_list',
+            'mcp__clementine-tools__cron_run_history',
+            'Read',
+            'Grep',
+        ],
+        effort: 'medium',
+        maxTurns: 10,
+    };
+    // ── Hired-agent profiles ────────────────────────────────────────
+    // Each becomes a subagent the main agent can delegate to.
+    // The "main" agent for a DM-to-bot session is set by the caller
+    // (still uses the profile's identity); these definitions cover the
+    // case where Clementine wants to invoke them mid-conversation.
+    if (opts.profileManager) {
+        const profiles = opts.profileManager.listAll();
+        for (const profile of profiles) {
+            // Skip clementine herself (she's the main agent, not a subagent)
+            if (profile.slug === 'clementine')
+                continue;
+            // Skip the active agent (don't make them their own subagent)
+            if (opts.activeAgentSlug && profile.slug === opts.activeAgentSlug)
+                continue;
+            map[profile.slug] = profileToAgentDefinition(profile);
+        }
+    }
+    return map;
+}
+/** Type guard helper for callers. */
+export function hasAgent(map, slug) {
+    return Object.prototype.hasOwnProperty.call(map, slug);
+}
+//# sourceMappingURL=agent-definitions.js.map

package/dist/agent/run-agent.d.ts ADDED Viewed

@@ -0,0 +1,96 @@
+/**
+ * Clementine TypeScript — runAgent: canonical Claude Agent SDK wrapper.
+ *
+ * Phase 1 of the SDK-canonical migration (see
+ * /Users/nathan.reynolds/.claude/plans/sdk-canonical-migration.md).
+ *
+ * This is the new code path that will eventually replace runCronJob /
+ * runUnleashedTask / runHeartbeat / runTeamTask / chat. For now it
+ * runs in PARALLEL with those — only the dashboard's
+ * /api/runagent/test endpoint exercises it. Production traffic still
+ * uses legacy paths until Phase 2.
+ *
+ * Design principles (from the SDK docs):
+ * 1. ONE query() call — no nested phase wrappers.
+ * 2. Subagents via the `agents` param — not via prompt-injected
+ *    fanout directives.
+ * 3. SDK handles: agent loop, compaction, tool execution, parallel
+ *    sub-spawning, prompt caching, session resume.
+ * 4. App handles: prompt + options assembly, transcript mirroring,
+ *    cost logging, channel delivery.
+ * 5. NO context-thrash recovery, NO manual session rotation, NO
+ *    long-task preflight, NO mode=unleashed wrapper.
+ */
+import { type AgentDefinition } from '@anthropic-ai/claude-agent-sdk';
+import type { AgentProfile } from '../types.js';
+import type { AgentManager } from './agent-manager.js';
+import type { MemoryStore } from '../memory/store.js';
+export interface RunAgentOptions {
+    /** Stable session key for this conversation/run. Used for transcript mirroring + resume. */
+    sessionKey: string;
+    /** Source classification for telemetry: 'chat' | 'cron' | 'heartbeat' | 'team-task' | 'test'. */
+    source: string;
+    /** Optional hired-agent profile. When set, this profile becomes the MAIN
+     *  agent (its system prompt is appended). When unset, Clementine is the main agent. */
+    profile?: AgentProfile | null;
+    /** Optional subagent slug to invoke explicitly (bypasses Claude's automatic routing).
+     *  When set, the prompt is wrapped to direct Claude to use this subagent first. */
+    forceSubagent?: string | null;
+    /** Hired-agent registry — used to construct the AgentDefinition map for delegation. */
+    agentManager?: AgentManager | null;
+    /** Memory store for transcript mirroring + cost logging. */
+    memoryStore?: MemoryStore | null;
+    /** Optional model override. Defaults to SDK default (Sonnet) unless profile sets one. */
+    model?: string;
+    /** Reasoning effort. Defaults vary by source: chat='medium', cron='medium', heartbeat='low'. */
+    effort?: 'low' | 'medium' | 'high' | 'xhigh' | 'max';
+    /** Hard budget cap (USD). Default varies by source. SDK aborts the run when hit. */
+    maxBudgetUsd?: number;
+    /** Hard turn cap. Default: no cap (SDK runs until done). */
+    maxTurns?: number;
+    /** Optional resume — when set, the SDK continues from the prior session. */
+    resumeSessionId?: string;
+    /** Streaming callback for partial assistant text. Best-effort. */
+    onText?: (chunk: string) => void | Promise<void>;
+    /** Streaming callback when a tool is invoked (name + input). Best-effort. */
+    onToolActivity?: (info: {
+        tool: string;
+        input: Record<string, unknown>;
+    }) => void | Promise<void>;
+    /** Abort signal — when triggered, the SDK stream is cancelled. */
+    abortSignal?: AbortSignal;
+    /** Optional override of the AgentDefinition map. Mostly for tests. */
+    agents?: Record<string, AgentDefinition>;
+    /** Optional explicit allowedTools list. When unset, falls back to a sensible default
+     *  including Agent (so subagents can be spawned) + core SDK tools + Clementine MCP. */
+    allowedTools?: string[];
+    /** Optional CLAUDE.md / project setting source. Defaults to ['project']. */
+    settingSources?: ('project' | 'user' | 'local')[];
+}
+export interface RunAgentResult {
+    /** Final text response from the agent. */
+    text: string;
+    /** Total cost in USD as reported by the SDK. */
+    totalCostUsd: number;
+    /** Number of agentic turns the loop took. */
+    numTurns: number;
+    /** SDK session ID — capture for resume. */
+    sessionId: string;
+    /** Final stop reason from the SDK (success, error_max_turns, error_max_budget_usd, etc). */
+    subtype: string;
+    /** Token usage breakdown (input, output, cache). */
+    usage?: {
+        input_tokens?: number;
+        output_tokens?: number;
+        cache_read_input_tokens?: number;
+        cache_creation_input_tokens?: number;
+    };
+}
+/**
+ * Run a single agent invocation via the canonical SDK pattern.
+ *
+ * Returns when the SDK loop completes (final assistant message with no
+ * tool calls, OR maxTurns/maxBudget hit, OR error).
+ */
+export declare function runAgent(prompt: string, opts: RunAgentOptions): Promise<RunAgentResult>;
+//# sourceMappingURL=run-agent.d.ts.map

package/dist/agent/run-agent.js ADDED Viewed

@@ -0,0 +1,267 @@
+/**
+ * Clementine TypeScript — runAgent: canonical Claude Agent SDK wrapper.
+ *
+ * Phase 1 of the SDK-canonical migration (see
+ * /Users/nathan.reynolds/.claude/plans/sdk-canonical-migration.md).
+ *
+ * This is the new code path that will eventually replace runCronJob /
+ * runUnleashedTask / runHeartbeat / runTeamTask / chat. For now it
+ * runs in PARALLEL with those — only the dashboard's
+ * /api/runagent/test endpoint exercises it. Production traffic still
+ * uses legacy paths until Phase 2.
+ *
+ * Design principles (from the SDK docs):
+ * 1. ONE query() call — no nested phase wrappers.
+ * 2. Subagents via the `agents` param — not via prompt-injected
+ *    fanout directives.
+ * 3. SDK handles: agent loop, compaction, tool execution, parallel
+ *    sub-spawning, prompt caching, session resume.
+ * 4. App handles: prompt + options assembly, transcript mirroring,
+ *    cost logging, channel delivery.
+ * 5. NO context-thrash recovery, NO manual session rotation, NO
+ *    long-task preflight, NO mode=unleashed wrapper.
+ */
+import path from 'node:path';
+import { query } from '@anthropic-ai/claude-agent-sdk';
+import pino from 'pino';
+import { BASE_DIR, PKG_DIR, CLAUDE_CODE_OAUTH_TOKEN, ANTHROPIC_API_KEY as CONFIG_ANTHROPIC_API_KEY, normalizeClaudeSdkOptionsForOneMillionContext, } from '../config.js';
+import { buildAgentMap } from './agent-definitions.js';
+const MCP_SERVER_SCRIPT = path.join(PKG_DIR, 'dist', 'tools', 'mcp-server.js');
+const ASSISTANT_NAME = (process.env.ASSISTANT_NAME ?? 'Clementine').toLowerCase();
+const TOOLS_SERVER = `${ASSISTANT_NAME}-tools`;
+/**
+ * Build a minimal env for the SDK subprocess. Mirrors the existing
+ * SAFE_ENV pattern in assistant.ts but exposed here so runAgent can be
+ * its own thing without depending on the legacy assistant module.
+ *
+ * Priority: CLAUDE_CODE_OAUTH_TOKEN > ANTHROPIC_AUTH_TOKEN > ANTHROPIC_API_KEY.
+ * When all are absent, HOME lets the subprocess find Keychain OAuth.
+ */
+function buildRunAgentEnv() {
+    const env = {
+        PATH: process.env.PATH ?? '',
+        HOME: process.env.HOME ?? '',
+        LANG: process.env.LANG ?? 'en_US.UTF-8',
+        TERM: process.env.TERM ?? 'xterm-256color',
+        USER: process.env.USER ?? '',
+        SHELL: process.env.SHELL ?? '',
+        CLEMENTINE_HOME: BASE_DIR,
+    };
+    const oauthTok = CLAUDE_CODE_OAUTH_TOKEN || process.env.CLAUDE_CODE_OAUTH_TOKEN;
+    const authTok = process.env.ANTHROPIC_AUTH_TOKEN;
+    const apiKey = CONFIG_ANTHROPIC_API_KEY || process.env.ANTHROPIC_API_KEY;
+    if (oauthTok) {
+        env.CLAUDE_CODE_OAUTH_TOKEN = oauthTok;
+    }
+    else if (authTok) {
+        env.ANTHROPIC_AUTH_TOKEN = authTok;
+    }
+    else if (apiKey) {
+        env.ANTHROPIC_API_KEY = apiKey;
+    }
+    return env;
+}
+const logger = pino({ name: 'clementine.run-agent' });
+const DEFAULT_BUDGETS = {
+    chat: 0.50,
+    cron: 1.00,
+    heartbeat: 0.25,
+    'team-task': 1.00,
+    test: 2.00,
+};
+const DEFAULT_EFFORTS = {
+    chat: 'medium',
+    cron: 'medium',
+    heartbeat: 'low',
+    'team-task': 'medium',
+    test: 'medium',
+};
+const CORE_TOOLS_FOR_AGENT_PARENT = [
+    'Agent', // REQUIRED — without this, subagents can't be invoked
+    'Read',
+    'Write',
+    'Edit',
+    'Glob',
+    'Grep',
+    'Bash',
+    'WebSearch',
+    'WebFetch',
+    'TodoWrite',
+];
+/**
+ * Run a single agent invocation via the canonical SDK pattern.
+ *
+ * Returns when the SDK loop completes (final assistant message with no
+ * tool calls, OR maxTurns/maxBudget hit, OR error).
+ */
+export async function runAgent(prompt, opts) {
+    const source = opts.source ?? 'chat';
+    const effort = opts.effort ?? DEFAULT_EFFORTS[source] ?? 'medium';
+    const maxBudgetUsd = opts.maxBudgetUsd ?? DEFAULT_BUDGETS[source] ?? 0.50;
+    const startedAt = Date.now();
+    // Build the AgentDefinition map. Caller can override; otherwise we
+    // use the standard system subagents + hired-agent profiles.
+    const agents = opts.agents ?? buildAgentMap({
+        profileManager: opts.agentManager ?? undefined,
+        isAutonomous: source === 'cron' || source === 'heartbeat',
+        activeAgentSlug: opts.profile?.slug,
+    });
+    // Wrap prompt to direct Claude to a specific subagent when caller asks.
+    // Per SDK docs: explicit invocation = "Use the X agent to..."
+    const effectivePrompt = opts.forceSubagent && agents[opts.forceSubagent]
+        ? `Use the ${opts.forceSubagent} agent to handle this request:\n\n${prompt}`
+        : prompt;
+    // Compose system prompt. When a hired-agent profile is active, that
+    // becomes the main agent's identity — append to the claude_code preset.
+    const profileAppend = opts.profile?.systemPromptBody
+        ? opts.profile.systemPromptBody
+        : undefined;
+    // Allowed tools. Default to core + Clementine MCP. Per-subagent tool
+    // restrictions live on each AgentDefinition.tools field.
+    const allowedTools = opts.allowedTools ?? CORE_TOOLS_FOR_AGENT_PARENT;
+    // Wire the Clementine MCP server so the agent can reach memory/cron/
+    // broken-job tools. Without this, the cron-fixer subagent's `tools`
+    // list references mcp__clementine-tools__* that don't exist in the
+    // session, and the agent falls back to reading raw JSON files.
+    const subprocessEnv = buildRunAgentEnv();
+    const mcpServers = {
+        [TOOLS_SERVER]: {
+            type: 'stdio',
+            command: 'node',
+            args: [MCP_SERVER_SCRIPT],
+            env: {
+                ...subprocessEnv,
+                CLEMENTINE_HOME: BASE_DIR,
+                ...(opts.profile?.slug ? { CLEMENTINE_TEAM_AGENT: opts.profile.slug } : {}),
+                CLEMENTINE_INTERACTION_SOURCE: source === 'cron' || source === 'heartbeat' ? 'autonomous' : 'interactive',
+            },
+        },
+    };
+    // Apply 1M-context env normalization (existing infra)
+    const sdkOptionsRaw = {
+        systemPrompt: profileAppend
+            ? { type: 'preset', preset: 'claude_code', append: profileAppend }
+            : { type: 'preset', preset: 'claude_code' },
+        settingSources: opts.settingSources ?? ['project'],
+        agents,
+        mcpServers,
+        allowedTools,
+        permissionMode: 'bypassPermissions',
+        cwd: BASE_DIR,
+        env: subprocessEnv,
+        maxBudgetUsd,
+        effort,
+        ...(opts.maxTurns ? { maxTurns: opts.maxTurns } : {}),
+        ...(opts.model ? { model: opts.model } : {}),
+        ...(opts.resumeSessionId ? { resume: opts.resumeSessionId } : {}),
+        ...(opts.abortSignal ? { abortController: { signal: opts.abortSignal } } : {}),
+    };
+    const sdkOptions = normalizeClaudeSdkOptionsForOneMillionContext(sdkOptionsRaw);
+    logger.info({
+        sessionKey: opts.sessionKey,
+        source,
+        profile: opts.profile?.slug,
+        forceSubagent: opts.forceSubagent,
+        effort,
+        maxBudgetUsd,
+        agentCount: Object.keys(agents).length,
+        allowedToolCount: allowedTools.length,
+    }, 'runAgent: starting query');
+    let finalText = '';
+    let sessionId = '';
+    let totalCostUsd = 0;
+    let numTurns = 0;
+    let subtype = 'unknown';
+    let usage;
+    const stream = query({ prompt: effectivePrompt, options: sdkOptions });
+    for await (const message of stream) {
+        if (message.type === 'system' && message.subtype === 'init') {
+            sessionId = message.session_id ?? '';
+            logger.debug({ sessionKey: opts.sessionKey, sdkSessionId: sessionId }, 'runAgent: SDK session initialized');
+            continue;
+        }
+        if (message.type === 'assistant') {
+            const am = message;
+            const blocks = (am.message?.content ?? []);
+            for (const block of blocks) {
+                if (block.type === 'text' && typeof block.text === 'string') {
+                    finalText += block.text;
+                    if (opts.onText) {
+                        try {
+                            await opts.onText(block.text);
+                        }
+                        catch { /* streaming is best-effort */ }
+                    }
+                }
+                else if (block.type === 'tool_use' && typeof block.name === 'string') {
+                    if (opts.onToolActivity) {
+                        try {
+                            await opts.onToolActivity({ tool: block.name, input: block.input ?? {} });
+                        }
+                        catch { /* best-effort */ }
+                    }
+                }
+            }
+            continue;
+        }
+        if (message.type === 'result') {
+            const result = message;
+            sessionId = sessionId || (result.session_id ?? '');
+            subtype = result.subtype ?? 'unknown';
+            numTurns = result.num_turns ?? numTurns;
+            totalCostUsd = result.total_cost_usd ?? 0;
+            const u = result.usage;
+            if (u)
+                usage = u;
+            if (subtype === 'success') {
+                // success carries `result` field with the final text.
+                const r = result.result;
+                if (r)
+                    finalText = r;
+            }
+            // Mirror cost to usage_log. Same shape as the existing
+            // logQueryResult, but standalone so we don't depend on
+            // PersonalAssistant's instance state.
+            const modelUsage = result.modelUsage;
+            if (opts.memoryStore && modelUsage) {
+                try {
+                    opts.memoryStore.logUsage({
+                        sessionKey: `${source}:${opts.sessionKey}`,
+                        source: `runagent.${source}`,
+                        modelUsage,
+                        numTurns,
+                        durationMs: Date.now() - startedAt,
+                        agentSlug: opts.profile?.slug,
+                        totalCostUsd: totalCostUsd,
+                    });
+                }
+                catch (err) {
+                    logger.debug({ err }, 'runAgent: usage logging failed (non-fatal)');
+                }
+            }
+            continue;
+        }
+        // Other message types (UserMessage with tool_result, StreamEvent,
+        // SDKCompactBoundaryMessage) — observed but not acted on. The SDK
+        // handles compaction internally; we just let it run.
+    }
+    logger.info({
+        sessionKey: opts.sessionKey,
+        source,
+        sdkSessionId: sessionId,
+        subtype,
+        numTurns,
+        totalCostUsd: Number(totalCostUsd.toFixed(4)),
+        durationMs: Date.now() - startedAt,
+        finalTextChars: finalText.length,
+    }, 'runAgent: query complete');
+    return {
+        text: finalText,
+        totalCostUsd,
+        numTurns,
+        sessionId,
+        subtype,
+        ...(usage ? { usage } : {}),
+    };
+}
+//# sourceMappingURL=run-agent.js.map

package/dist/cli/dashboard.js CHANGED Viewed

@@ -2347,7 +2347,8 @@ export async function cmdDashboard(opts) {
         const isLongRunning = req.path.startsWith('/brain/')
             || req.path.endsWith('/stream')
             || req.path === '/chat'
-            || req.path === '/builder/chat';
+            || req.path === '/builder/chat'
+            || req.path === '/runagent/test';
         const timeoutMs = isLongRunning ? 10 * 60 * 1000 : 8000;
         const timeout = setTimeout(() => {
             if (!res.headersSent) {
@@ -5428,6 +5429,59 @@ If the tool returns nothing or errors, return an empty array \`[]\`.`,
             res.status(500).json({ error: String(err) });
         }
     });
+    // ── runAgent test endpoint (Phase 1 of SDK-canonical migration) ──────
+    //
+    // POST /api/runagent/test
+    //   body: { prompt, agentSlug?, forceSubagent?, model?, effort?, maxBudgetUsd?, source? }
+    //
+    // Lightweight endpoint to verify the new canonical SDK call path
+    // without rerouting any production traffic. Owner-only.
+    // Migration plan: /Users/nathan.reynolds/.claude/plans/sdk-canonical-migration.md
+    app.post('/api/runagent/test', async (req, res) => {
+        const { prompt, agentSlug, forceSubagent, model, effort, maxBudgetUsd, source } = req.body ?? {};
+        if (!prompt || typeof prompt !== 'string') {
+            res.status(400).json({ error: 'prompt is required' });
+            return;
+        }
+        try {
+            const gw = await getGateway();
+            const agentMgr = gw.getAgentManager();
+            const profile = agentSlug ? agentMgr.get(agentSlug) ?? null : null;
+            const memoryStore = gw.assistant.getMemoryStore?.();
+            const { runAgent } = await import('../agent/run-agent.js');
+            const startedAt = Date.now();
+            const toolActivity = [];
+            const result = await runAgent(prompt, {
+                sessionKey: `dashboard:runagent-test:${Date.now()}`,
+                source: typeof source === 'string' ? source : 'test',
+                profile,
+                forceSubagent: typeof forceSubagent === 'string' ? forceSubagent : null,
+                agentManager: agentMgr,
+                memoryStore: memoryStore,
+                model: typeof model === 'string' ? model : undefined,
+                effort: typeof effort === 'string' ? effort : undefined,
+                maxBudgetUsd: typeof maxBudgetUsd === 'number' ? maxBudgetUsd : undefined,
+                onToolActivity: ({ tool, input }) => {
+                    toolActivity.push({ tool, inputPreview: JSON.stringify(input).slice(0, 200) });
+                },
+            });
+            res.json({
+                ok: true,
+                text: result.text,
+                sessionId: result.sessionId,
+                subtype: result.subtype,
+                numTurns: result.numTurns,
+                totalCostUsd: Number(result.totalCostUsd.toFixed(4)),
+                durationMs: Date.now() - startedAt,
+                toolCallCount: toolActivity.length,
+                toolActivity: toolActivity.slice(0, 50), // cap for sanity
+                usage: result.usage,
+            });
+        }
+        catch (err) {
+            res.status(500).json({ error: String(err) });
+        }
+    });
     /** Dismiss a diagnosis without applying — clears the cached result. */
     app.post('/api/cron/broken-jobs/:jobName/dismiss-diagnosis', async (req, res) => {
         try {

package/dist/tools/admin-tools.js CHANGED Viewed

@@ -1170,147 +1170,14 @@ export function registerAdminTools(server) {
         return textResult(`Triggered "${job_name}" — the daemon will pick it up within a few seconds and run it in the background. ` +
             `Results will be delivered via notifications when complete.`);
     });
-    // ── Workflow Tools ──────────────────────────────────────────────────────
-    const WORKFLOWS_DIR = path.join(SYSTEM_DIR, 'workflows');
-    server.tool('workflow_list', 'List all multi-step workflows with name, description, step count, trigger, and enabled status.', { _empty: z.string().optional().describe('(no parameters needed)') }, async () => {
-        if (!existsSync(WORKFLOWS_DIR)) {
-            return textResult('No workflows directory found. Create `vault/00-System/workflows/` and add workflow .md files.');
-        }
-        const { parseAllWorkflows } = await import('../agent/workflow-runner.js');
-        const workflows = parseAllWorkflows(WORKFLOWS_DIR);
-        if (workflows.length === 0) {
-            return textResult('No workflow files found in `vault/00-System/workflows/`.');
-        }
-        const lines = [];
-        for (const wf of workflows) {
-            const status = wf.enabled ? 'enabled' : 'disabled';
-            const trigger = wf.trigger.schedule ? `schedule: \`${wf.trigger.schedule}\`` : 'manual only';
-            lines.push(`**${wf.name}** [${status}]` +
-                `\n  ${wf.description || '(no description)'}` +
-                `\n  Trigger: ${trigger}` +
-                `\n  Steps (${wf.steps.length}): ${wf.steps.map(s => s.id).join(' → ')}` +
-                (Object.keys(wf.inputs).length > 0
-                    ? `\n  Inputs: ${Object.entries(wf.inputs).map(([k, v]) => `${k}${v.default ? `="${v.default}"` : ''}`).join(', ')}`
-                    : ''));
-        }
-        return textResult(lines.join('\n\n'));
-    });
-    server.tool('workflow_create', 'Create a new multi-step workflow file. Validates dependencies and writes to vault/00-System/workflows/. The daemon auto-reloads on file change.', {
-        name: z.string().describe('Workflow name (used as filename and identifier)'),
-        description: z.string().describe('What the workflow does'),
-        steps: z.array(z.object({
-            id: z.string().describe('Unique step identifier'),
-            prompt: z.string().describe('Prompt for the step (supports {{input.*}}, {{steps.*.output}}, {{date}} variables)'),
-            dependsOn: z.array(z.string()).default([]).describe('Step IDs this depends on'),
-            model: z.string().optional().describe('Model tier: haiku or sonnet'),
-            tier: z.number().optional().default(1).describe('Security tier (1-3)'),
-            maxTurns: z.number().optional().default(15).describe('Max agent turns'),
-        })).describe('Workflow steps'),
-        trigger_schedule: z.string().optional().describe('Cron expression for scheduled trigger'),
-        inputs: z.record(z.string(), z.object({
-            type: z.enum(['string', 'number']).default('string'),
-            default: z.string().optional(),
-            description: z.string().optional(),
-        })).optional().default({}).describe('Input parameters with optional defaults'),
-        synthesis_prompt: z.string().optional().describe('Prompt to synthesize final output from all step results'),
-    }, async ({ name, description, steps, trigger_schedule, inputs, synthesis_prompt }) => {
-        // Validate step IDs are unique
-        const ids = new Set(steps.map(s => s.id));
-        if (ids.size !== steps.length) {
-            return textResult('Error: Duplicate step IDs found.');
-        }
-        // Validate dependencies exist
-        for (const step of steps) {
-            for (const dep of step.dependsOn) {
-                if (!ids.has(dep)) {
-                    return textResult(`Error: Step "${step.id}" depends on unknown step "${dep}".`);
-                }
-            }
-        }
-        // Validate cron expression if provided
-        if (trigger_schedule) {
-            const cronMod = await import('node-cron');
-            if (!cronMod.default.validate(trigger_schedule)) {
-                return textResult(`Invalid cron expression: "${trigger_schedule}".`);
-            }
-        }
-        // Build frontmatter
-        const frontmatter = {
-            type: 'workflow',
-            name,
-            description,
-            enabled: true,
-            trigger: {
-                ...(trigger_schedule ? { schedule: trigger_schedule } : {}),
-                manual: true,
-            },
-        };
-        if (Object.keys(inputs).length > 0) {
-            frontmatter.inputs = inputs;
-        }
-        frontmatter.steps = steps.map(s => ({
-            id: s.id,
-            prompt: s.prompt,
-            dependsOn: s.dependsOn,
-            ...(s.model ? { model: s.model } : {}),
-            ...(s.tier && s.tier !== 1 ? { tier: s.tier } : {}),
-            ...(s.maxTurns && s.maxTurns !== 15 ? { maxTurns: s.maxTurns } : {}),
-        }));
-        if (synthesis_prompt) {
-            frontmatter.synthesis = { prompt: synthesis_prompt };
-        }
-        // Write file
-        if (!existsSync(WORKFLOWS_DIR)) {
-            mkdirSync(WORKFLOWS_DIR, { recursive: true });
-        }
-        const matterMod = await import('gray-matter');
-        const safeName = name.replace(/[^a-zA-Z0-9_-]/g, '-').toLowerCase();
-        const filePath = path.join(WORKFLOWS_DIR, `${safeName}.md`);
-        if (existsSync(filePath)) {
-            return textResult(`Workflow file already exists: ${safeName}.md. Delete or rename it first.`);
-        }
-        const body = `# ${name}\n\n${description}\n`;
-        const output = matterMod.default.stringify(body, frontmatter);
-        writeFileSync(filePath, output);
-        logger.info({ name, steps: steps.length }, 'Created workflow via MCP tool');
-        const goalHint = `\n\n💡 **Goal tracking:** What goal does this workflow serve? Consider creating a persistent goal (\`goal_create\`) and linking related cron jobs so self-improvement can optimize this workflow against measurable outcomes.`;
-        return textResult(`Created workflow "${name}" with ${steps.length} steps.\n` +
-            `File: vault/00-System/workflows/${safeName}.md\n` +
-            `Steps: ${steps.map(s => s.id).join(' → ')}\n` +
-            (trigger_schedule ? `Schedule: ${trigger_schedule}\n` : 'Trigger: manual\n') +
-            'The daemon will auto-detect it via file watcher.' +
-            goalHint);
-    });
-    server.tool('workflow_run', 'Trigger a workflow by name with optional input overrides. Returns the workflow result.', {
-        name: z.string().describe('Workflow name'),
-        inputs: z.record(z.string(), z.string()).optional().default({}).describe('Input overrides (key=value pairs)'),
-    }, async ({ name: workflowName, inputs }) => {
-        const { parseAllWorkflows } = await import('../agent/workflow-runner.js');
-        const { WorkflowRunner } = await import('../agent/workflow-runner.js');
-        const workflows = parseAllWorkflows(WORKFLOWS_DIR);
-        const wf = workflows.find(w => w.name === workflowName);
-        if (!wf) {
-            const available = workflows.map(w => w.name).join(', ');
-            return textResult(`Workflow "${workflowName}" not found. Available: ${available || 'none'}`);
-        }
-        if (!wf.enabled) {
-            return textResult(`Workflow "${workflowName}" is disabled.`);
-        }
-        // Build a minimal assistant for standalone MCP execution
-        // In daemon mode, the CronScheduler.runWorkflow() path is preferred
-        // For MCP standalone, we need to create an assistant instance
-        try {
-            const { PersonalAssistant } = await import('../agent/assistant.js');
-            const assistant = new PersonalAssistant();
-            const runner = new WorkflowRunner(assistant);
-            const result = await runner.run(wf, inputs);
-            return textResult(`**Workflow: ${workflowName}** — ${result.status}\n\n${result.output.slice(0, 3000)}`);
-        }
-        catch (err) {
-            logger.error({ err, workflow: workflowName }, 'Workflow execution failed');
-            return textResult(`Workflow "${workflowName}" failed: ${err instanceof Error ? err.message : err}`);
-        }
-    });
+    // ── Workflow Tools moved to builder-tools.ts ────────────────────────────
+    //
+    // `workflow_list`, `workflow_create`, and `workflow_run` were duplicated
+    // here AND in builder-tools.ts (the newer Trick Builder). The duplicate
+    // registration was crashing the MCP server on startup with
+    // "Tool X is already registered" — silently breaking every fresh MCP
+    // subprocess and forcing fallback to manual file reads.
+    // All three live in builder-tools.ts now.
     // ── Analyze Image ───────────────────────────────────────────────────────
     server.tool('analyze_image', 'Analyze an image by URL. Fetches the image, converts to base64, and uses Claude vision to describe it. Works with any image URL — channel attachments, email attachments, web images.', {
         url: z.string().describe('URL of the image to analyze'),

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clementine-agent",
-  "version": "1.18.42",
+  "version": "1.18.44",
   "description": "Clementine — Personal AI Assistant (TypeScript)",
   "type": "module",
   "main": "dist/index.js",