clementine-agent 1.18.42 → 1.18.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Clementine TypeScript — AgentDefinition factory.
3
+ *
4
+ * The canonical Claude Agent SDK pattern is to pass `agents: { ... }`
5
+ * to `query()`, where each entry is an `AgentDefinition`. Claude routes
6
+ * subwork to subagents based on each definition's `description` field.
7
+ *
8
+ * Today's Clementine has multiple parallel orchestration paths
9
+ * (PlanOrchestrator, runUnleashedTask phases, fanout-policy directive,
10
+ * pre-LLM plan routing). This file is the start of consolidating all
11
+ * of that into the SDK-native subagent pattern.
12
+ *
13
+ * Usage:
14
+ * const agents = buildAgentMap({ profileManager, isAutonomous: false });
15
+ * query({ prompt, options: { agents, ... } })
16
+ *
17
+ * Phase 1 (1.18.43): this file is created but not wired into production
18
+ * yet. The dashboard's /api/runagent/test endpoint exercises it for
19
+ * verification before any real migration.
20
+ */
21
+ import type { AgentDefinition } from '@anthropic-ai/claude-agent-sdk';
22
+ import type { AgentManager } from './agent-manager.js';
23
+ export interface BuildAgentMapOptions {
24
+ /** Source of hired-agent profiles. When undefined, only the system subagents are returned. */
25
+ profileManager?: AgentManager;
26
+ /** When true, restrict the surface to safe-for-cron subagents (no chat-only ones). */
27
+ isAutonomous?: boolean;
28
+ /** Active agent slug — when set, hired agents OTHER than this one still get definitions
29
+ * but the active one's profile-as-system-prompt is handled by the caller. */
30
+ activeAgentSlug?: string;
31
+ }
32
+ /**
33
+ * Build the AgentDefinition map for a runAgent call. Mix of system
34
+ * subagents (planner, researcher, cron-fixer) and hired-agent profiles.
35
+ *
36
+ * The system subagents are intentionally minimal — they exist so Claude
37
+ * can route specific kinds of work cleanly. Add new ones (per the
38
+ * migration plan) as we collapse other orchestration paths.
39
+ */
40
+ export declare function buildAgentMap(opts?: BuildAgentMapOptions): Record<string, AgentDefinition>;
41
+ /** Type guard helper for callers. */
42
+ export declare function hasAgent(map: Record<string, AgentDefinition>, slug: string): boolean;
43
+ //# sourceMappingURL=agent-definitions.d.ts.map
@@ -0,0 +1,149 @@
1
+ /**
2
+ * Clementine TypeScript — AgentDefinition factory.
3
+ *
4
+ * The canonical Claude Agent SDK pattern is to pass `agents: { ... }`
5
+ * to `query()`, where each entry is an `AgentDefinition`. Claude routes
6
+ * subwork to subagents based on each definition's `description` field.
7
+ *
8
+ * Today's Clementine has multiple parallel orchestration paths
9
+ * (PlanOrchestrator, runUnleashedTask phases, fanout-policy directive,
10
+ * pre-LLM plan routing). This file is the start of consolidating all
11
+ * of that into the SDK-native subagent pattern.
12
+ *
13
+ * Usage:
14
+ * const agents = buildAgentMap({ profileManager, isAutonomous: false });
15
+ * query({ prompt, options: { agents, ... } })
16
+ *
17
+ * Phase 1 (1.18.43): this file is created but not wired into production
18
+ * yet. The dashboard's /api/runagent/test endpoint exercises it for
19
+ * verification before any real migration.
20
+ */
21
+ const PLANNER_PROMPT = [
22
+ 'You are a task planner for Clementine. You receive a multi-step request from the parent agent.',
23
+ '',
24
+ 'Your job: decompose the request into ATOMIC, parallel-safe steps, then return a JSON plan.',
25
+ '',
26
+ 'Output ONLY a JSON object (no markdown fences, no prose):',
27
+ '{',
28
+ ' "steps": [',
29
+ ' { "id": "step-1", "description": "...", "subagent": "researcher|cron-fixer|...|null", "prompt": "...", "model": "haiku|sonnet", "dependsOn": [] }',
30
+ ' ],',
31
+ ' "synthesisHint": "How the parent should combine step outputs"',
32
+ '}',
33
+ '',
34
+ 'Rules:',
35
+ '- 2-8 steps. Atomic = completes in 5-30 tool calls.',
36
+ '- MAXIMIZE parallelism: independent steps have empty dependsOn.',
37
+ '- Pick the right subagent per step:',
38
+ ' - `researcher` for per-item lookups (1 lead, 1 account, 1 file): model=haiku',
39
+ ' - `cron-fixer` for diagnose-and-apply on broken cron jobs: model=sonnet',
40
+ ' - null (parent runs the step) for synthesis or when no specialist fits',
41
+ '- Each step prompt is SELF-CONTAINED — the sub agent sees no parent history.',
42
+ '- End each step prompt with "Deliver: <one-line return shape>".',
43
+ ].join('\n');
44
+ const RESEARCHER_PROMPT = [
45
+ 'You are a per-item research specialist. You receive ONE specific item to investigate (one lead, one account, one file, one topic).',
46
+ '',
47
+ 'Use your bounded tools to gather the requested information. Return a ONE-PARAGRAPH summary in the format the parent specified.',
48
+ '',
49
+ 'NEVER return raw tool output, full lists, or unbounded data. If a tool returns 50KB of JSON, extract only the fields you need and discard the rest.',
50
+ '',
51
+ 'If you cannot find the requested data, say so in one line. Do not speculate.',
52
+ ].join('\n');
53
+ const CRON_FIXER_PROMPT = [
54
+ 'You are the cron-fix specialist. You diagnose and apply fixes to broken cron jobs.',
55
+ '',
56
+ 'Workflow:',
57
+ '1. Call `list_broken_jobs` to see what is currently broken with their cached diagnoses.',
58
+ '2. For each job the user/parent asked about, check the proposed fix:',
59
+ ' - confidence=high + risk=low + autoApply=true → call `apply_broken_job_fix`.',
60
+ ' - Otherwise → describe the diagnosis and ask the parent for explicit approval.',
61
+ '3. After applying a fix, the verification system auto-rolls-back if the next 3 runs do not improve. You do NOT need to monitor manually.',
62
+ '',
63
+ 'Return: a one-paragraph summary of what you applied (or what is blocking apply), per job.',
64
+ ].join('\n');
65
+ /** Map a hired-agent profile to an AgentDefinition.
66
+ * Used when Clementine wants to delegate to Ross/Sasha/Nora etc. */
67
+ function profileToAgentDefinition(p) {
68
+ return {
69
+ description: p.description ?? `${p.name} (hired agent: ${p.slug})`,
70
+ prompt: p.systemPromptBody ?? `You are ${p.name}.`,
71
+ // Honor explicit allowlist when present; otherwise inherit from parent.
72
+ ...(p.team?.allowedTools?.length ? { tools: p.team.allowedTools } : {}),
73
+ // Hired agents keep their configured model (Sonnet by default).
74
+ ...(p.model ? { model: p.model } : { model: 'sonnet' }),
75
+ // Effort: hired agents do real work, default medium. Caller can override.
76
+ effort: 'medium',
77
+ };
78
+ }
79
+ /**
80
+ * Build the AgentDefinition map for a runAgent call. Mix of system
81
+ * subagents (planner, researcher, cron-fixer) and hired-agent profiles.
82
+ *
83
+ * The system subagents are intentionally minimal — they exist so Claude
84
+ * can route specific kinds of work cleanly. Add new ones (per the
85
+ * migration plan) as we collapse other orchestration paths.
86
+ */
87
+ export function buildAgentMap(opts = {}) {
88
+ const map = {};
89
+ // ── System subagents ────────────────────────────────────────────
90
+ // Planner: opus, no tools, single turn. Used when the parent agent
91
+ // sees a multi-step request and wants a decomposition.
92
+ map['planner'] = {
93
+ description: 'Decompose a multi-step user request into atomic, parallel-safe steps. Use for "research these N items", "build a comprehensive X", "for each Y do Z", or any request that obviously involves multiple distinct sub-tasks. Returns a JSON plan; the parent then executes the steps (often by spawning more subagents per step).',
94
+ prompt: PLANNER_PROMPT,
95
+ model: 'opus',
96
+ tools: [], // pure reasoning, no tools
97
+ effort: 'high',
98
+ maxTurns: 1,
99
+ };
100
+ // Researcher: haiku, per-item investigation. Cheap fan-out target.
101
+ map['researcher'] = {
102
+ description: 'Investigate ONE specific item (one lead, one account, one file, one topic) and return a one-paragraph summary. Use for per-item parallel work spawned by the planner. Cheap and fast.',
103
+ prompt: RESEARCHER_PROMPT,
104
+ model: 'haiku',
105
+ tools: ['Read', 'Grep', 'Glob', 'Bash', 'WebSearch', 'WebFetch'],
106
+ effort: 'low',
107
+ maxTurns: 15,
108
+ };
109
+ // Cron-fixer: sonnet, owns the broken-job diagnose+apply path.
110
+ // Tools restricted to the canonical fix path (no parallel mechanisms).
111
+ map['cron-fixer'] = {
112
+ description: 'Diagnose and apply fixes to broken cron jobs. Use when the user says "fix X" referring to a job, asks "what jobs are failing", or asks to re-run/repair a cron. Owns the canonical diagnosis-to-apply flow.',
113
+ prompt: CRON_FIXER_PROMPT,
114
+ model: 'sonnet',
115
+ tools: [
116
+ 'mcp__clementine-tools__list_broken_jobs',
117
+ 'mcp__clementine-tools__apply_broken_job_fix',
118
+ 'mcp__clementine-tools__cron_list',
119
+ 'mcp__clementine-tools__cron_run_history',
120
+ 'Read',
121
+ 'Grep',
122
+ ],
123
+ effort: 'medium',
124
+ maxTurns: 10,
125
+ };
126
+ // ── Hired-agent profiles ────────────────────────────────────────
127
+ // Each becomes a subagent the main agent can delegate to.
128
+ // The "main" agent for a DM-to-bot session is set by the caller
129
+ // (still uses the profile's identity); these definitions cover the
130
+ // case where Clementine wants to invoke them mid-conversation.
131
+ if (opts.profileManager) {
132
+ const profiles = opts.profileManager.listAll();
133
+ for (const profile of profiles) {
134
+ // Skip clementine herself (she's the main agent, not a subagent)
135
+ if (profile.slug === 'clementine')
136
+ continue;
137
+ // Skip the active agent (don't make them their own subagent)
138
+ if (opts.activeAgentSlug && profile.slug === opts.activeAgentSlug)
139
+ continue;
140
+ map[profile.slug] = profileToAgentDefinition(profile);
141
+ }
142
+ }
143
+ return map;
144
+ }
145
+ /** Type guard helper for callers. */
146
+ export function hasAgent(map, slug) {
147
+ return Object.prototype.hasOwnProperty.call(map, slug);
148
+ }
149
+ //# sourceMappingURL=agent-definitions.js.map
@@ -0,0 +1,96 @@
1
+ /**
2
+ * Clementine TypeScript — runAgent: canonical Claude Agent SDK wrapper.
3
+ *
4
+ * Phase 1 of the SDK-canonical migration (see
5
+ * /Users/nathan.reynolds/.claude/plans/sdk-canonical-migration.md).
6
+ *
7
+ * This is the new code path that will eventually replace runCronJob /
8
+ * runUnleashedTask / runHeartbeat / runTeamTask / chat. For now it
9
+ * runs in PARALLEL with those — only the dashboard's
10
+ * /api/runagent/test endpoint exercises it. Production traffic still
11
+ * uses legacy paths until Phase 2.
12
+ *
13
+ * Design principles (from the SDK docs):
14
+ * 1. ONE query() call — no nested phase wrappers.
15
+ * 2. Subagents via the `agents` param — not via prompt-injected
16
+ * fanout directives.
17
+ * 3. SDK handles: agent loop, compaction, tool execution, parallel
18
+ * sub-spawning, prompt caching, session resume.
19
+ * 4. App handles: prompt + options assembly, transcript mirroring,
20
+ * cost logging, channel delivery.
21
+ * 5. NO context-thrash recovery, NO manual session rotation, NO
22
+ * long-task preflight, NO mode=unleashed wrapper.
23
+ */
24
+ import { type AgentDefinition } from '@anthropic-ai/claude-agent-sdk';
25
+ import type { AgentProfile } from '../types.js';
26
+ import type { AgentManager } from './agent-manager.js';
27
+ import type { MemoryStore } from '../memory/store.js';
28
+ export interface RunAgentOptions {
29
+ /** Stable session key for this conversation/run. Used for transcript mirroring + resume. */
30
+ sessionKey: string;
31
+ /** Source classification for telemetry: 'chat' | 'cron' | 'heartbeat' | 'team-task' | 'test'. */
32
+ source: string;
33
+ /** Optional hired-agent profile. When set, this profile becomes the MAIN
34
+ * agent (its system prompt is appended). When unset, Clementine is the main agent. */
35
+ profile?: AgentProfile | null;
36
+ /** Optional subagent slug to invoke explicitly (bypasses Claude's automatic routing).
37
+ * When set, the prompt is wrapped to direct Claude to use this subagent first. */
38
+ forceSubagent?: string | null;
39
+ /** Hired-agent registry — used to construct the AgentDefinition map for delegation. */
40
+ agentManager?: AgentManager | null;
41
+ /** Memory store for transcript mirroring + cost logging. */
42
+ memoryStore?: MemoryStore | null;
43
+ /** Optional model override. Defaults to SDK default (Sonnet) unless profile sets one. */
44
+ model?: string;
45
+ /** Reasoning effort. Defaults vary by source: chat='medium', cron='medium', heartbeat='low'. */
46
+ effort?: 'low' | 'medium' | 'high' | 'xhigh' | 'max';
47
+ /** Hard budget cap (USD). Default varies by source. SDK aborts the run when hit. */
48
+ maxBudgetUsd?: number;
49
+ /** Hard turn cap. Default: no cap (SDK runs until done). */
50
+ maxTurns?: number;
51
+ /** Optional resume — when set, the SDK continues from the prior session. */
52
+ resumeSessionId?: string;
53
+ /** Streaming callback for partial assistant text. Best-effort. */
54
+ onText?: (chunk: string) => void | Promise<void>;
55
+ /** Streaming callback when a tool is invoked (name + input). Best-effort. */
56
+ onToolActivity?: (info: {
57
+ tool: string;
58
+ input: Record<string, unknown>;
59
+ }) => void | Promise<void>;
60
+ /** Abort signal — when triggered, the SDK stream is cancelled. */
61
+ abortSignal?: AbortSignal;
62
+ /** Optional override of the AgentDefinition map. Mostly for tests. */
63
+ agents?: Record<string, AgentDefinition>;
64
+ /** Optional explicit allowedTools list. When unset, falls back to a sensible default
65
+ * including Agent (so subagents can be spawned) + core SDK tools + Clementine MCP. */
66
+ allowedTools?: string[];
67
+ /** Optional CLAUDE.md / project setting source. Defaults to ['project']. */
68
+ settingSources?: ('project' | 'user' | 'local')[];
69
+ }
70
+ export interface RunAgentResult {
71
+ /** Final text response from the agent. */
72
+ text: string;
73
+ /** Total cost in USD as reported by the SDK. */
74
+ totalCostUsd: number;
75
+ /** Number of agentic turns the loop took. */
76
+ numTurns: number;
77
+ /** SDK session ID — capture for resume. */
78
+ sessionId: string;
79
+ /** Final stop reason from the SDK (success, error_max_turns, error_max_budget_usd, etc). */
80
+ subtype: string;
81
+ /** Token usage breakdown (input, output, cache). */
82
+ usage?: {
83
+ input_tokens?: number;
84
+ output_tokens?: number;
85
+ cache_read_input_tokens?: number;
86
+ cache_creation_input_tokens?: number;
87
+ };
88
+ }
89
+ /**
90
+ * Run a single agent invocation via the canonical SDK pattern.
91
+ *
92
+ * Returns when the SDK loop completes (final assistant message with no
93
+ * tool calls, OR maxTurns/maxBudget hit, OR error).
94
+ */
95
+ export declare function runAgent(prompt: string, opts: RunAgentOptions): Promise<RunAgentResult>;
96
+ //# sourceMappingURL=run-agent.d.ts.map
@@ -0,0 +1,267 @@
1
+ /**
2
+ * Clementine TypeScript — runAgent: canonical Claude Agent SDK wrapper.
3
+ *
4
+ * Phase 1 of the SDK-canonical migration (see
5
+ * /Users/nathan.reynolds/.claude/plans/sdk-canonical-migration.md).
6
+ *
7
+ * This is the new code path that will eventually replace runCronJob /
8
+ * runUnleashedTask / runHeartbeat / runTeamTask / chat. For now it
9
+ * runs in PARALLEL with those — only the dashboard's
10
+ * /api/runagent/test endpoint exercises it. Production traffic still
11
+ * uses legacy paths until Phase 2.
12
+ *
13
+ * Design principles (from the SDK docs):
14
+ * 1. ONE query() call — no nested phase wrappers.
15
+ * 2. Subagents via the `agents` param — not via prompt-injected
16
+ * fanout directives.
17
+ * 3. SDK handles: agent loop, compaction, tool execution, parallel
18
+ * sub-spawning, prompt caching, session resume.
19
+ * 4. App handles: prompt + options assembly, transcript mirroring,
20
+ * cost logging, channel delivery.
21
+ * 5. NO context-thrash recovery, NO manual session rotation, NO
22
+ * long-task preflight, NO mode=unleashed wrapper.
23
+ */
24
+ import path from 'node:path';
25
+ import { query } from '@anthropic-ai/claude-agent-sdk';
26
+ import pino from 'pino';
27
+ import { BASE_DIR, PKG_DIR, CLAUDE_CODE_OAUTH_TOKEN, ANTHROPIC_API_KEY as CONFIG_ANTHROPIC_API_KEY, normalizeClaudeSdkOptionsForOneMillionContext, } from '../config.js';
28
+ import { buildAgentMap } from './agent-definitions.js';
29
+ const MCP_SERVER_SCRIPT = path.join(PKG_DIR, 'dist', 'tools', 'mcp-server.js');
30
+ const ASSISTANT_NAME = (process.env.ASSISTANT_NAME ?? 'Clementine').toLowerCase();
31
+ const TOOLS_SERVER = `${ASSISTANT_NAME}-tools`;
32
+ /**
33
+ * Build a minimal env for the SDK subprocess. Mirrors the existing
34
+ * SAFE_ENV pattern in assistant.ts but exposed here so runAgent can be
35
+ * its own thing without depending on the legacy assistant module.
36
+ *
37
+ * Priority: CLAUDE_CODE_OAUTH_TOKEN > ANTHROPIC_AUTH_TOKEN > ANTHROPIC_API_KEY.
38
+ * When all are absent, HOME lets the subprocess find Keychain OAuth.
39
+ */
40
+ function buildRunAgentEnv() {
41
+ const env = {
42
+ PATH: process.env.PATH ?? '',
43
+ HOME: process.env.HOME ?? '',
44
+ LANG: process.env.LANG ?? 'en_US.UTF-8',
45
+ TERM: process.env.TERM ?? 'xterm-256color',
46
+ USER: process.env.USER ?? '',
47
+ SHELL: process.env.SHELL ?? '',
48
+ CLEMENTINE_HOME: BASE_DIR,
49
+ };
50
+ const oauthTok = CLAUDE_CODE_OAUTH_TOKEN || process.env.CLAUDE_CODE_OAUTH_TOKEN;
51
+ const authTok = process.env.ANTHROPIC_AUTH_TOKEN;
52
+ const apiKey = CONFIG_ANTHROPIC_API_KEY || process.env.ANTHROPIC_API_KEY;
53
+ if (oauthTok) {
54
+ env.CLAUDE_CODE_OAUTH_TOKEN = oauthTok;
55
+ }
56
+ else if (authTok) {
57
+ env.ANTHROPIC_AUTH_TOKEN = authTok;
58
+ }
59
+ else if (apiKey) {
60
+ env.ANTHROPIC_API_KEY = apiKey;
61
+ }
62
+ return env;
63
+ }
64
+ const logger = pino({ name: 'clementine.run-agent' });
65
+ const DEFAULT_BUDGETS = {
66
+ chat: 0.50,
67
+ cron: 1.00,
68
+ heartbeat: 0.25,
69
+ 'team-task': 1.00,
70
+ test: 2.00,
71
+ };
72
+ const DEFAULT_EFFORTS = {
73
+ chat: 'medium',
74
+ cron: 'medium',
75
+ heartbeat: 'low',
76
+ 'team-task': 'medium',
77
+ test: 'medium',
78
+ };
79
+ const CORE_TOOLS_FOR_AGENT_PARENT = [
80
+ 'Agent', // REQUIRED — without this, subagents can't be invoked
81
+ 'Read',
82
+ 'Write',
83
+ 'Edit',
84
+ 'Glob',
85
+ 'Grep',
86
+ 'Bash',
87
+ 'WebSearch',
88
+ 'WebFetch',
89
+ 'TodoWrite',
90
+ ];
91
+ /**
92
+ * Run a single agent invocation via the canonical SDK pattern.
93
+ *
94
+ * Returns when the SDK loop completes (final assistant message with no
95
+ * tool calls, OR maxTurns/maxBudget hit, OR error).
96
+ */
97
+ export async function runAgent(prompt, opts) {
98
+ const source = opts.source ?? 'chat';
99
+ const effort = opts.effort ?? DEFAULT_EFFORTS[source] ?? 'medium';
100
+ const maxBudgetUsd = opts.maxBudgetUsd ?? DEFAULT_BUDGETS[source] ?? 0.50;
101
+ const startedAt = Date.now();
102
+ // Build the AgentDefinition map. Caller can override; otherwise we
103
+ // use the standard system subagents + hired-agent profiles.
104
+ const agents = opts.agents ?? buildAgentMap({
105
+ profileManager: opts.agentManager ?? undefined,
106
+ isAutonomous: source === 'cron' || source === 'heartbeat',
107
+ activeAgentSlug: opts.profile?.slug,
108
+ });
109
+ // Wrap prompt to direct Claude to a specific subagent when caller asks.
110
+ // Per SDK docs: explicit invocation = "Use the X agent to..."
111
+ const effectivePrompt = opts.forceSubagent && agents[opts.forceSubagent]
112
+ ? `Use the ${opts.forceSubagent} agent to handle this request:\n\n${prompt}`
113
+ : prompt;
114
+ // Compose system prompt. When a hired-agent profile is active, that
115
+ // becomes the main agent's identity — append to the claude_code preset.
116
+ const profileAppend = opts.profile?.systemPromptBody
117
+ ? opts.profile.systemPromptBody
118
+ : undefined;
119
+ // Allowed tools. Default to core + Clementine MCP. Per-subagent tool
120
+ // restrictions live on each AgentDefinition.tools field.
121
+ const allowedTools = opts.allowedTools ?? CORE_TOOLS_FOR_AGENT_PARENT;
122
+ // Wire the Clementine MCP server so the agent can reach memory/cron/
123
+ // broken-job tools. Without this, the cron-fixer subagent's `tools`
124
+ // list references mcp__clementine-tools__* that don't exist in the
125
+ // session, and the agent falls back to reading raw JSON files.
126
+ const subprocessEnv = buildRunAgentEnv();
127
+ const mcpServers = {
128
+ [TOOLS_SERVER]: {
129
+ type: 'stdio',
130
+ command: 'node',
131
+ args: [MCP_SERVER_SCRIPT],
132
+ env: {
133
+ ...subprocessEnv,
134
+ CLEMENTINE_HOME: BASE_DIR,
135
+ ...(opts.profile?.slug ? { CLEMENTINE_TEAM_AGENT: opts.profile.slug } : {}),
136
+ CLEMENTINE_INTERACTION_SOURCE: source === 'cron' || source === 'heartbeat' ? 'autonomous' : 'interactive',
137
+ },
138
+ },
139
+ };
140
+ // Apply 1M-context env normalization (existing infra)
141
+ const sdkOptionsRaw = {
142
+ systemPrompt: profileAppend
143
+ ? { type: 'preset', preset: 'claude_code', append: profileAppend }
144
+ : { type: 'preset', preset: 'claude_code' },
145
+ settingSources: opts.settingSources ?? ['project'],
146
+ agents,
147
+ mcpServers,
148
+ allowedTools,
149
+ permissionMode: 'bypassPermissions',
150
+ cwd: BASE_DIR,
151
+ env: subprocessEnv,
152
+ maxBudgetUsd,
153
+ effort,
154
+ ...(opts.maxTurns ? { maxTurns: opts.maxTurns } : {}),
155
+ ...(opts.model ? { model: opts.model } : {}),
156
+ ...(opts.resumeSessionId ? { resume: opts.resumeSessionId } : {}),
157
+ ...(opts.abortSignal ? { abortController: { signal: opts.abortSignal } } : {}),
158
+ };
159
+ const sdkOptions = normalizeClaudeSdkOptionsForOneMillionContext(sdkOptionsRaw);
160
+ logger.info({
161
+ sessionKey: opts.sessionKey,
162
+ source,
163
+ profile: opts.profile?.slug,
164
+ forceSubagent: opts.forceSubagent,
165
+ effort,
166
+ maxBudgetUsd,
167
+ agentCount: Object.keys(agents).length,
168
+ allowedToolCount: allowedTools.length,
169
+ }, 'runAgent: starting query');
170
+ let finalText = '';
171
+ let sessionId = '';
172
+ let totalCostUsd = 0;
173
+ let numTurns = 0;
174
+ let subtype = 'unknown';
175
+ let usage;
176
+ const stream = query({ prompt: effectivePrompt, options: sdkOptions });
177
+ for await (const message of stream) {
178
+ if (message.type === 'system' && message.subtype === 'init') {
179
+ sessionId = message.session_id ?? '';
180
+ logger.debug({ sessionKey: opts.sessionKey, sdkSessionId: sessionId }, 'runAgent: SDK session initialized');
181
+ continue;
182
+ }
183
+ if (message.type === 'assistant') {
184
+ const am = message;
185
+ const blocks = (am.message?.content ?? []);
186
+ for (const block of blocks) {
187
+ if (block.type === 'text' && typeof block.text === 'string') {
188
+ finalText += block.text;
189
+ if (opts.onText) {
190
+ try {
191
+ await opts.onText(block.text);
192
+ }
193
+ catch { /* streaming is best-effort */ }
194
+ }
195
+ }
196
+ else if (block.type === 'tool_use' && typeof block.name === 'string') {
197
+ if (opts.onToolActivity) {
198
+ try {
199
+ await opts.onToolActivity({ tool: block.name, input: block.input ?? {} });
200
+ }
201
+ catch { /* best-effort */ }
202
+ }
203
+ }
204
+ }
205
+ continue;
206
+ }
207
+ if (message.type === 'result') {
208
+ const result = message;
209
+ sessionId = sessionId || (result.session_id ?? '');
210
+ subtype = result.subtype ?? 'unknown';
211
+ numTurns = result.num_turns ?? numTurns;
212
+ totalCostUsd = result.total_cost_usd ?? 0;
213
+ const u = result.usage;
214
+ if (u)
215
+ usage = u;
216
+ if (subtype === 'success') {
217
+ // success carries `result` field with the final text.
218
+ const r = result.result;
219
+ if (r)
220
+ finalText = r;
221
+ }
222
+ // Mirror cost to usage_log. Same shape as the existing
223
+ // logQueryResult, but standalone so we don't depend on
224
+ // PersonalAssistant's instance state.
225
+ const modelUsage = result.modelUsage;
226
+ if (opts.memoryStore && modelUsage) {
227
+ try {
228
+ opts.memoryStore.logUsage({
229
+ sessionKey: `${source}:${opts.sessionKey}`,
230
+ source: `runagent.${source}`,
231
+ modelUsage,
232
+ numTurns,
233
+ durationMs: Date.now() - startedAt,
234
+ agentSlug: opts.profile?.slug,
235
+ totalCostUsd: totalCostUsd,
236
+ });
237
+ }
238
+ catch (err) {
239
+ logger.debug({ err }, 'runAgent: usage logging failed (non-fatal)');
240
+ }
241
+ }
242
+ continue;
243
+ }
244
+ // Other message types (UserMessage with tool_result, StreamEvent,
245
+ // SDKCompactBoundaryMessage) — observed but not acted on. The SDK
246
+ // handles compaction internally; we just let it run.
247
+ }
248
+ logger.info({
249
+ sessionKey: opts.sessionKey,
250
+ source,
251
+ sdkSessionId: sessionId,
252
+ subtype,
253
+ numTurns,
254
+ totalCostUsd: Number(totalCostUsd.toFixed(4)),
255
+ durationMs: Date.now() - startedAt,
256
+ finalTextChars: finalText.length,
257
+ }, 'runAgent: query complete');
258
+ return {
259
+ text: finalText,
260
+ totalCostUsd,
261
+ numTurns,
262
+ sessionId,
263
+ subtype,
264
+ ...(usage ? { usage } : {}),
265
+ };
266
+ }
267
+ //# sourceMappingURL=run-agent.js.map
@@ -2347,7 +2347,8 @@ export async function cmdDashboard(opts) {
2347
2347
  const isLongRunning = req.path.startsWith('/brain/')
2348
2348
  || req.path.endsWith('/stream')
2349
2349
  || req.path === '/chat'
2350
- || req.path === '/builder/chat';
2350
+ || req.path === '/builder/chat'
2351
+ || req.path === '/runagent/test';
2351
2352
  const timeoutMs = isLongRunning ? 10 * 60 * 1000 : 8000;
2352
2353
  const timeout = setTimeout(() => {
2353
2354
  if (!res.headersSent) {
@@ -5428,6 +5429,59 @@ If the tool returns nothing or errors, return an empty array \`[]\`.`,
5428
5429
  res.status(500).json({ error: String(err) });
5429
5430
  }
5430
5431
  });
5432
+ // ── runAgent test endpoint (Phase 1 of SDK-canonical migration) ──────
5433
+ //
5434
+ // POST /api/runagent/test
5435
+ // body: { prompt, agentSlug?, forceSubagent?, model?, effort?, maxBudgetUsd?, source? }
5436
+ //
5437
+ // Lightweight endpoint to verify the new canonical SDK call path
5438
+ // without rerouting any production traffic. Owner-only.
5439
+ // Migration plan: /Users/nathan.reynolds/.claude/plans/sdk-canonical-migration.md
5440
+ app.post('/api/runagent/test', async (req, res) => {
5441
+ const { prompt, agentSlug, forceSubagent, model, effort, maxBudgetUsd, source } = req.body ?? {};
5442
+ if (!prompt || typeof prompt !== 'string') {
5443
+ res.status(400).json({ error: 'prompt is required' });
5444
+ return;
5445
+ }
5446
+ try {
5447
+ const gw = await getGateway();
5448
+ const agentMgr = gw.getAgentManager();
5449
+ const profile = agentSlug ? agentMgr.get(agentSlug) ?? null : null;
5450
+ const memoryStore = gw.assistant.getMemoryStore?.();
5451
+ const { runAgent } = await import('../agent/run-agent.js');
5452
+ const startedAt = Date.now();
5453
+ const toolActivity = [];
5454
+ const result = await runAgent(prompt, {
5455
+ sessionKey: `dashboard:runagent-test:${Date.now()}`,
5456
+ source: typeof source === 'string' ? source : 'test',
5457
+ profile,
5458
+ forceSubagent: typeof forceSubagent === 'string' ? forceSubagent : null,
5459
+ agentManager: agentMgr,
5460
+ memoryStore: memoryStore,
5461
+ model: typeof model === 'string' ? model : undefined,
5462
+ effort: typeof effort === 'string' ? effort : undefined,
5463
+ maxBudgetUsd: typeof maxBudgetUsd === 'number' ? maxBudgetUsd : undefined,
5464
+ onToolActivity: ({ tool, input }) => {
5465
+ toolActivity.push({ tool, inputPreview: JSON.stringify(input).slice(0, 200) });
5466
+ },
5467
+ });
5468
+ res.json({
5469
+ ok: true,
5470
+ text: result.text,
5471
+ sessionId: result.sessionId,
5472
+ subtype: result.subtype,
5473
+ numTurns: result.numTurns,
5474
+ totalCostUsd: Number(result.totalCostUsd.toFixed(4)),
5475
+ durationMs: Date.now() - startedAt,
5476
+ toolCallCount: toolActivity.length,
5477
+ toolActivity: toolActivity.slice(0, 50), // cap for sanity
5478
+ usage: result.usage,
5479
+ });
5480
+ }
5481
+ catch (err) {
5482
+ res.status(500).json({ error: String(err) });
5483
+ }
5484
+ });
5431
5485
  /** Dismiss a diagnosis without applying — clears the cached result. */
5432
5486
  app.post('/api/cron/broken-jobs/:jobName/dismiss-diagnosis', async (req, res) => {
5433
5487
  try {
@@ -1170,147 +1170,14 @@ export function registerAdminTools(server) {
1170
1170
  return textResult(`Triggered "${job_name}" — the daemon will pick it up within a few seconds and run it in the background. ` +
1171
1171
  `Results will be delivered via notifications when complete.`);
1172
1172
  });
1173
- // ── Workflow Tools ──────────────────────────────────────────────────────
1174
- const WORKFLOWS_DIR = path.join(SYSTEM_DIR, 'workflows');
1175
- server.tool('workflow_list', 'List all multi-step workflows with name, description, step count, trigger, and enabled status.', { _empty: z.string().optional().describe('(no parameters needed)') }, async () => {
1176
- if (!existsSync(WORKFLOWS_DIR)) {
1177
- return textResult('No workflows directory found. Create `vault/00-System/workflows/` and add workflow .md files.');
1178
- }
1179
- const { parseAllWorkflows } = await import('../agent/workflow-runner.js');
1180
- const workflows = parseAllWorkflows(WORKFLOWS_DIR);
1181
- if (workflows.length === 0) {
1182
- return textResult('No workflow files found in `vault/00-System/workflows/`.');
1183
- }
1184
- const lines = [];
1185
- for (const wf of workflows) {
1186
- const status = wf.enabled ? 'enabled' : 'disabled';
1187
- const trigger = wf.trigger.schedule ? `schedule: \`${wf.trigger.schedule}\`` : 'manual only';
1188
- lines.push(`**${wf.name}** [${status}]` +
1189
- `\n ${wf.description || '(no description)'}` +
1190
- `\n Trigger: ${trigger}` +
1191
- `\n Steps (${wf.steps.length}): ${wf.steps.map(s => s.id).join(' → ')}` +
1192
- (Object.keys(wf.inputs).length > 0
1193
- ? `\n Inputs: ${Object.entries(wf.inputs).map(([k, v]) => `${k}${v.default ? `="${v.default}"` : ''}`).join(', ')}`
1194
- : ''));
1195
- }
1196
- return textResult(lines.join('\n\n'));
1197
- });
1198
- server.tool('workflow_create', 'Create a new multi-step workflow file. Validates dependencies and writes to vault/00-System/workflows/. The daemon auto-reloads on file change.', {
1199
- name: z.string().describe('Workflow name (used as filename and identifier)'),
1200
- description: z.string().describe('What the workflow does'),
1201
- steps: z.array(z.object({
1202
- id: z.string().describe('Unique step identifier'),
1203
- prompt: z.string().describe('Prompt for the step (supports {{input.*}}, {{steps.*.output}}, {{date}} variables)'),
1204
- dependsOn: z.array(z.string()).default([]).describe('Step IDs this depends on'),
1205
- model: z.string().optional().describe('Model tier: haiku or sonnet'),
1206
- tier: z.number().optional().default(1).describe('Security tier (1-3)'),
1207
- maxTurns: z.number().optional().default(15).describe('Max agent turns'),
1208
- })).describe('Workflow steps'),
1209
- trigger_schedule: z.string().optional().describe('Cron expression for scheduled trigger'),
1210
- inputs: z.record(z.string(), z.object({
1211
- type: z.enum(['string', 'number']).default('string'),
1212
- default: z.string().optional(),
1213
- description: z.string().optional(),
1214
- })).optional().default({}).describe('Input parameters with optional defaults'),
1215
- synthesis_prompt: z.string().optional().describe('Prompt to synthesize final output from all step results'),
1216
- }, async ({ name, description, steps, trigger_schedule, inputs, synthesis_prompt }) => {
1217
- // Validate step IDs are unique
1218
- const ids = new Set(steps.map(s => s.id));
1219
- if (ids.size !== steps.length) {
1220
- return textResult('Error: Duplicate step IDs found.');
1221
- }
1222
- // Validate dependencies exist
1223
- for (const step of steps) {
1224
- for (const dep of step.dependsOn) {
1225
- if (!ids.has(dep)) {
1226
- return textResult(`Error: Step "${step.id}" depends on unknown step "${dep}".`);
1227
- }
1228
- }
1229
- }
1230
- // Validate cron expression if provided
1231
- if (trigger_schedule) {
1232
- const cronMod = await import('node-cron');
1233
- if (!cronMod.default.validate(trigger_schedule)) {
1234
- return textResult(`Invalid cron expression: "${trigger_schedule}".`);
1235
- }
1236
- }
1237
- // Build frontmatter
1238
- const frontmatter = {
1239
- type: 'workflow',
1240
- name,
1241
- description,
1242
- enabled: true,
1243
- trigger: {
1244
- ...(trigger_schedule ? { schedule: trigger_schedule } : {}),
1245
- manual: true,
1246
- },
1247
- };
1248
- if (Object.keys(inputs).length > 0) {
1249
- frontmatter.inputs = inputs;
1250
- }
1251
- frontmatter.steps = steps.map(s => ({
1252
- id: s.id,
1253
- prompt: s.prompt,
1254
- dependsOn: s.dependsOn,
1255
- ...(s.model ? { model: s.model } : {}),
1256
- ...(s.tier && s.tier !== 1 ? { tier: s.tier } : {}),
1257
- ...(s.maxTurns && s.maxTurns !== 15 ? { maxTurns: s.maxTurns } : {}),
1258
- }));
1259
- if (synthesis_prompt) {
1260
- frontmatter.synthesis = { prompt: synthesis_prompt };
1261
- }
1262
- // Write file
1263
- if (!existsSync(WORKFLOWS_DIR)) {
1264
- mkdirSync(WORKFLOWS_DIR, { recursive: true });
1265
- }
1266
- const matterMod = await import('gray-matter');
1267
- const safeName = name.replace(/[^a-zA-Z0-9_-]/g, '-').toLowerCase();
1268
- const filePath = path.join(WORKFLOWS_DIR, `${safeName}.md`);
1269
- if (existsSync(filePath)) {
1270
- return textResult(`Workflow file already exists: ${safeName}.md. Delete or rename it first.`);
1271
- }
1272
- const body = `# ${name}\n\n${description}\n`;
1273
- const output = matterMod.default.stringify(body, frontmatter);
1274
- writeFileSync(filePath, output);
1275
- logger.info({ name, steps: steps.length }, 'Created workflow via MCP tool');
1276
- const goalHint = `\n\n💡 **Goal tracking:** What goal does this workflow serve? Consider creating a persistent goal (\`goal_create\`) and linking related cron jobs so self-improvement can optimize this workflow against measurable outcomes.`;
1277
- return textResult(`Created workflow "${name}" with ${steps.length} steps.\n` +
1278
- `File: vault/00-System/workflows/${safeName}.md\n` +
1279
- `Steps: ${steps.map(s => s.id).join(' → ')}\n` +
1280
- (trigger_schedule ? `Schedule: ${trigger_schedule}\n` : 'Trigger: manual\n') +
1281
- 'The daemon will auto-detect it via file watcher.' +
1282
- goalHint);
1283
- });
1284
- server.tool('workflow_run', 'Trigger a workflow by name with optional input overrides. Returns the workflow result.', {
1285
- name: z.string().describe('Workflow name'),
1286
- inputs: z.record(z.string(), z.string()).optional().default({}).describe('Input overrides (key=value pairs)'),
1287
- }, async ({ name: workflowName, inputs }) => {
1288
- const { parseAllWorkflows } = await import('../agent/workflow-runner.js');
1289
- const { WorkflowRunner } = await import('../agent/workflow-runner.js');
1290
- const workflows = parseAllWorkflows(WORKFLOWS_DIR);
1291
- const wf = workflows.find(w => w.name === workflowName);
1292
- if (!wf) {
1293
- const available = workflows.map(w => w.name).join(', ');
1294
- return textResult(`Workflow "${workflowName}" not found. Available: ${available || 'none'}`);
1295
- }
1296
- if (!wf.enabled) {
1297
- return textResult(`Workflow "${workflowName}" is disabled.`);
1298
- }
1299
- // Build a minimal assistant for standalone MCP execution
1300
- // In daemon mode, the CronScheduler.runWorkflow() path is preferred
1301
- // For MCP standalone, we need to create an assistant instance
1302
- try {
1303
- const { PersonalAssistant } = await import('../agent/assistant.js');
1304
- const assistant = new PersonalAssistant();
1305
- const runner = new WorkflowRunner(assistant);
1306
- const result = await runner.run(wf, inputs);
1307
- return textResult(`**Workflow: ${workflowName}** — ${result.status}\n\n${result.output.slice(0, 3000)}`);
1308
- }
1309
- catch (err) {
1310
- logger.error({ err, workflow: workflowName }, 'Workflow execution failed');
1311
- return textResult(`Workflow "${workflowName}" failed: ${err instanceof Error ? err.message : err}`);
1312
- }
1313
- });
1173
+ // ── Workflow Tools moved to builder-tools.ts ────────────────────────────
1174
+ //
1175
+ // `workflow_list`, `workflow_create`, and `workflow_run` were duplicated
1176
+ // here AND in builder-tools.ts (the newer Trick Builder). The duplicate
1177
+ // registration was crashing the MCP server on startup with
1178
+ // "Tool X is already registered" — silently breaking every fresh MCP
1179
+ // subprocess and forcing fallback to manual file reads.
1180
+ // All three live in builder-tools.ts now.
1314
1181
  // ── Analyze Image ───────────────────────────────────────────────────────
1315
1182
  server.tool('analyze_image', 'Analyze an image by URL. Fetches the image, converts to base64, and uses Claude vision to describe it. Works with any image URL — channel attachments, email attachments, web images.', {
1316
1183
  url: z.string().describe('URL of the image to analyze'),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.18.42",
3
+ "version": "1.18.44",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",