npm - clementine-agent - Versions diffs - 1.18.192 → 1.18.194 - Mend

clementine-agent 1.18.192 → 1.18.194

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/agent/bg-planner.js +5 -1
package/dist/agent/daily-planner.js +3 -1
package/dist/agent/intent-classifier.d.ts +28 -0
package/dist/agent/intent-classifier.js +10 -0
package/dist/agent/mcp-bridge.js +3 -1
package/dist/agent/run-agent-context.js +8 -1
package/dist/agent/strategic-planner.js +3 -1
package/dist/brain/adapters/pdf.js +3 -1
package/dist/cli/dashboard.js +5 -1
package/dist/config.d.ts +25 -0
package/dist/config.js +45 -0
package/dist/gateway/router.d.ts +0 -1
package/dist/gateway/router.js +76 -118
package/dist/index.js +2 -0
package/package.json +1 -1

package/dist/agent/bg-planner.js CHANGED Viewed

@@ -44,7 +44,7 @@ import fs from 'node:fs';
 import path from 'node:path';
 import { randomUUID } from 'node:crypto';
 import pino from 'pino';
-import { BASE_DIR, MODELS, applyOneMillionContextRecovery, claudeCodeSystemPrompt, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext } from '../config.js';
+import { BASE_DIR, MODELS, applyOneMillionContextRecovery, claudeCodeSubprocessEnv, claudeCodeSystemPrompt, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext } from '../config.js';
 const logger = pino({ name: 'clementine.bg-planner' });
 // ── Persistence ──────────────────────────────────────────────────────
 /**
@@ -287,6 +287,10 @@ async function runPlannerLlm(userPrompt, systemPrompt, model) {
             // from knowing the working directory + git status so it can decompose
             // accurately. See claudeCodeSystemPrompt() in config.ts.
             systemPrompt: claudeCodeSystemPrompt(systemPrompt),
+            // 1.18.194 — pass OAuth token to the SDK subprocess. Without this,
+            // process.env doesn't have CLAUDE_CODE_OAUTH_TOKEN (config.ts keeps
+            // secrets out of process.env) and the SDK fails with "Not logged in".
+            env: claudeCodeSubprocessEnv(),
         }),
     });
     for await (const msg of stream) {

package/dist/agent/daily-planner.js CHANGED Viewed

@@ -8,7 +8,7 @@
 import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from 'node:fs';
 import path from 'node:path';
 import pino from 'pino';
-import { BASE_DIR, CRON_REFLECTIONS_DIR, TASKS_FILE, INBOX_DIR, MODELS, applyOneMillionContextRecovery, claudeCodeSystemPrompt, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext, } from '../config.js';
+import { BASE_DIR, CRON_REFLECTIONS_DIR, TASKS_FILE, INBOX_DIR, MODELS, applyOneMillionContextRecovery, claudeCodeSubprocessEnv, claudeCodeSystemPrompt, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext, } from '../config.js';
 import { listAllGoals } from '../tools/shared.js';
 const logger = pino({ name: 'clementine.daily-planner' });
 const PLANS_DIR = path.join(BASE_DIR, 'plans', 'daily');
@@ -259,6 +259,8 @@ Rules:
                     // 1.18.192 — preset form so SDK uses Claude Code subscription auth
                     // (raw string → API-key auth → "Not logged in" failure for Max users).
                     systemPrompt: claudeCodeSystemPrompt('You are a planning assistant. Analyze the context and produce a prioritized daily plan as JSON. Return only valid JSON, no markdown fencing.', { minimal: true }),
+                    // 1.18.194 — propagate OAuth token to SDK subprocess.
+                    env: claudeCodeSubprocessEnv(),
                 }),
             });
             for await (const msg of stream) {

package/dist/agent/intent-classifier.d.ts CHANGED Viewed

@@ -76,6 +76,34 @@ export declare function classifyMessageShape(text: string, opts?: {
  */
 export type PlanApprovalSignal = 'approve' | 'revise' | 'cancel' | 'other';
 export declare function detectPlanApproval(message: string): PlanApprovalSignal;
+/**
+ * 1.18.193 — plan-mode opt-in detector.
+ *
+ * Plan-mode used to auto-trigger when `classifyMessageShape` flagged a
+ * message as 'multi-step'. That was too aggressive — Nora's April 28-29
+ * work (38 Bash calls in one chat session) would have been routed through
+ * the planner unnecessarily. Comparison vs friend's 1.18.62 install showed
+ * the auto-route was the main behavior divergence.
+ *
+ * Now plan-mode is opt-in via explicit owner intent:
+ *   - Message starts with `/plan` (case-insensitive)
+ *   - Message contains the `[plan-mode]` token anywhere
+ *
+ * The chat-overflow recovery path (queueBackgroundTaskAfterContextOverflow)
+ * still routes to the planner when the SDK session ACTUALLY overflows —
+ * that's a separate escape hatch, not an auto-trigger.
+ *
+ * Returns `{ requested: true, cleaned }` if the owner asked for plan mode,
+ * where `cleaned` is the message with the trigger token stripped.
+ * Returns `{ requested: false }` otherwise.
+ */
+export type PlanModeRequest = {
+    requested: true;
+    cleaned: string;
+} | {
+    requested: false;
+};
+export declare function detectPlanModeRequest(message: string): PlanModeRequest;
 /**
  * Generate a follow-up suggestion prompt suffix based on completed work.
  *

package/dist/agent/intent-classifier.js CHANGED Viewed

@@ -332,6 +332,16 @@ export function detectPlanApproval(message) {
         return 'revise';
     return 'other';
 }
+const PLAN_MODE_TRIGGER = /^\s*\/plan\b|\[plan-mode\]/i;
+export function detectPlanModeRequest(message) {
+    if (!message || !PLAN_MODE_TRIGGER.test(message))
+        return { requested: false };
+    const cleaned = message
+        .replace(/^\s*\/plan\b\s*/i, '')
+        .replace(/\[plan-mode\]/gi, '')
+        .trim();
+    return { requested: true, cleaned };
+}
 /**
  * Generate a follow-up suggestion prompt suffix based on completed work.
  *

package/dist/agent/mcp-bridge.js CHANGED Viewed

@@ -10,7 +10,7 @@ import { existsSync, readFileSync, readdirSync, writeFileSync } from 'node:fs';
 import os from 'node:os';
 import path from 'node:path';
 import pino from 'pino';
-import { BASE_DIR, applyOneMillionContextRecovery, claudeCodeSystemPrompt, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext, } from '../config.js';
+import { BASE_DIR, applyOneMillionContextRecovery, claudeCodeSubprocessEnv, claudeCodeSystemPrompt, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext, } from '../config.js';
 const logger = pino({ name: 'clementine.mcp-bridge' });
 const MCP_SERVERS_FILE = path.join(BASE_DIR, 'mcp-servers.json');
 const INTEGRATIONS_FILE = path.join(BASE_DIR, 'claude-integrations.json');
@@ -453,6 +453,8 @@ export async function probeAvailableTools(force = false) {
             options: normalizeClaudeSdkOptionsForOneMillionContext({
                 // 1.18.192 — preset form for Claude Code subscription auth.
                 systemPrompt: claudeCodeSystemPrompt('Reply ok.', { minimal: true }),
+                // 1.18.194 — propagate OAuth token to SDK subprocess.
+                env: claudeCodeSubprocessEnv(),
                 model: 'claude-haiku-4-5',
                 permissionMode: 'dontAsk',
                 mcpServers: externalMcpServers,

package/dist/agent/run-agent-context.js CHANGED Viewed

@@ -123,7 +123,14 @@ const BEHAVIORAL_POSTURE = `## How you operate
 **Discovering new projects.** If the owner mentions a project by name that isn't in your registry, don't free-float — call \`project_discover\` with the name. It searches common locations (~/Downloads, ~/Desktop, ~/Projects, ~/Documents) and returns ranked candidates. Confirm the right one with the owner, then call \`project_link\` to register it. Future turns will then resolve it automatically.
-**Verification posture for disputed claims.** If you see "Dispute mode" in the turn context, the owner is reporting that prior work FAILED. Past \`done\` claims in memory are NOT authoritative — your recall is biased. Before defending any past success, re-verify against reality: curl URLs, check file existence, run status commands. Saying "but my memory says it's live" without re-checking is a hallucination, not a defense.`;
+**Verification posture for disputed claims.** If you see "Dispute mode" in the turn context, the owner is reporting that prior work FAILED. Past \`done\` claims in memory are NOT authoritative — your recall is biased. Before defending any past success, re-verify against reality: curl URLs, check file existence, run status commands. Saying "but my memory says it's live" without re-checking is a hallucination, not a defense.
+**Fan-out posture (1.18.194).** When the owner asks for 3+ similar operations — send N emails, pull N records, enrich N contacts, summarize N pages — dispatch subagents in PARALLEL via the Agent tool. One subagent per item. Don't loop in your own turn; that's slow, serializes I/O that should be concurrent, and burns context linearly. Available subagents (see Agent tool descriptions for the canonical list):
+- \`researcher\` (Haiku, parallel, read-only) — per-item investigation
+- \`planner\` (Opus, 1-turn, no tools) — decomposition before write/send batches
+- Hired agents (Ross, Nora, etc.) — cross-delegation when relevant
+A 25-contact enrichment that fans out to 25 \`researcher\` calls finishes in ~30s. The same work done serially in your own turn takes 10+ minutes AND fills your context window with tool outputs. Default to fan-out for batch work.`;
 /**
  * Read the long-term memory block for an autonomous run (cron, team-task).
  * Returns the agent-specific MEMORY.md when a hired agent is active, the

package/dist/agent/strategic-planner.js CHANGED Viewed

@@ -12,7 +12,7 @@
 import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from 'node:fs';
 import path from 'node:path';
 import pino from 'pino';
-import { BASE_DIR, GOALS_DIR, MODELS, applyOneMillionContextRecovery, claudeCodeSystemPrompt, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext, } from '../config.js';
+import { BASE_DIR, GOALS_DIR, MODELS, applyOneMillionContextRecovery, claudeCodeSubprocessEnv, claudeCodeSystemPrompt, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext, } from '../config.js';
 import { listAllGoals } from '../tools/shared.js';
 const logger = pino({ name: 'clementine.strategic-planner' });
 const DAILY_PLANS_DIR = path.join(BASE_DIR, 'plans', 'daily');
@@ -31,6 +31,8 @@ async function llmJsonCall(prompt, systemPrompt) {
             // failures on Max-only installs. Logs confirmed weekly review was
             // silently falling through to the fallback path here since the bug landed.
             systemPrompt: claudeCodeSystemPrompt(systemPrompt, { minimal: true }),
+            // 1.18.194 — propagate OAuth token to SDK subprocess.
+            env: claudeCodeSubprocessEnv(),
         }),
     });
     for await (const msg of stream) {

package/dist/brain/adapters/pdf.js CHANGED Viewed

@@ -14,7 +14,7 @@ import { readFileSync } from 'node:fs';
 import path from 'node:path';
 import pdfParse from 'pdf-parse';
 import { contentHash } from './common.js';
-import { MODELS, applyOneMillionContextRecovery, claudeCodeSystemPrompt, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext, } from '../../config.js';
+import { MODELS, applyOneMillionContextRecovery, claudeCodeSubprocessEnv, claudeCodeSystemPrompt, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext, } from '../../config.js';
 export async function* parsePdf(filePath) {
     let buf;
     try {
@@ -98,6 +98,8 @@ async function ocrPdfViaClaude(filePath) {
                 // Without this, every scanned-PDF ingest hit "Not logged in" and
                 // silently fell back to empty OCR output.
                 systemPrompt: claudeCodeSystemPrompt('You are a faithful OCR transcriber. Copy text exactly as written. When the PDF has images or scans, read the text from them using vision. Never invent content.', { minimal: true }),
+                // 1.18.194 — propagate OAuth token to SDK subprocess.
+                env: claudeCodeSubprocessEnv(),
                 // Claude Code's built-in Read tool handles PDFs (text + vision)
                 tools: ['Read'],
                 allowedTools: ['Read'],

package/dist/cli/dashboard.js CHANGED Viewed

@@ -19,7 +19,7 @@ import { TunnelManager } from './tunnel.js';
 import { AgentManager } from '../agent/agent-manager.js';
 import { discoverMcpServers, getClaudeIntegrations, KNOWN_MCP_DESCRIPTIONS } from '../agent/mcp-bridge.js';
 import { buildBuilderEnrichedMessage, builderSessionKey } from '../dashboard/builder/prompt.js';
-import { AGENTS_DIR, MEMORY_FILE, MODELS, SESSIONS_FILE, TIMEZONE, applyOneMillionContextRecovery, claudeCodeSystemPrompt, currentTimeZone, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext, setEnvOverride, } from '../config.js';
+import { AGENTS_DIR, MEMORY_FILE, MODELS, SESSIONS_FILE, TIMEZONE, applyOneMillionContextRecovery, claudeCodeSubprocessEnv, claudeCodeSystemPrompt, currentTimeZone, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext, setEnvOverride, } from '../config.js';
 import { parseTasks } from '../tools/shared.js';
 // 1.18.160 — also pull parseCronJobs + parseAgentCronJobs so getCronJobs()
 // returns the same merged set the runtime fires (CRON.md + agent CRON +
@@ -6401,6 +6401,8 @@ If the tool returns nothing or errors, return an empty array \`[]\`.`,
                     maxTurns: 3,
                     // 1.18.192 — preset form for Claude Code subscription auth.
                     systemPrompt: claudeCodeSystemPrompt('You are a data enumerator. You call the given tool once, extract the items from its response, and emit a strict JSON array. No commentary.', { minimal: true }),
+                    // 1.18.194 — propagate OAuth token to SDK subprocess.
+                    env: claudeCodeSubprocessEnv(),
                     allowedTools: [tool],
                     mcpServers,
                     permissionMode: 'dontAsk',
@@ -9728,6 +9730,8 @@ If the tool returns nothing or errors, return an empty array \`[]\`.`,
                             maxTurns: 1,
                             // 1.18.192 — preset form for Claude Code subscription auth.
                             systemPrompt: claudeCodeSystemPrompt('You are a memory consolidation assistant. Extract only facts directly evidenced by the corpus. Be terse. Output exactly the requested format.', { minimal: true }),
+                            // 1.18.194 — propagate OAuth token to SDK subprocess.
+                            env: claudeCodeSubprocessEnv(),
                         }),
                     });
                     for await (const msg of stream) {

package/dist/config.d.ts CHANGED Viewed

@@ -63,6 +63,31 @@ export declare function claudeCodeSystemPrompt(append: string, opts?: {
     append: string;
     excludeDynamicSections?: boolean;
 };
+/**
+ * 1.18.194 — Build the env Record for a direct SDK `query()` call so the
+ * Claude Code OAuth token reaches the SDK subprocess.
+ *
+ * Why this matters: `getSecret('CLAUDE_CODE_OAUTH_TOKEN')` reads
+ * ~/.clementine/.env and stores the value in the in-memory constant
+ * `CLAUDE_CODE_OAUTH_TOKEN` — it intentionally does NOT write to
+ * process.env (config.ts:478 keeps secrets out of process.env to prevent
+ * leakage). When a direct `query()` call omits `env:`, the SDK defaults
+ * to `process.env` — which doesn't have the token — and authentication
+ * silently falls back to API-key mode and fails with "Not logged in".
+ *
+ * 1.18.192 fixed the systemPrompt shape (preset vs raw string) but missed
+ * this env-propagation half of the same auth bug. Daily-planner, weekly
+ * review, bg-planner, and several other Haiku utility paths were still
+ * silently failing AFTER 1.18.192 because env: wasn't being passed.
+ *
+ * Use this alongside `claudeCodeSystemPrompt()` for every direct `query()`
+ * call. The runAgent path already builds its own env via buildRunAgentEnv;
+ * the assistant.ts auto-memory + verifier already use SAFE_ENV (same
+ * pattern). This helper exists for the lightweight utility callers.
+ *
+ * Priority order matches buildRunAgentEnv: OAuth > ANTHROPIC_AUTH_TOKEN > API key.
+ */
+export declare function claudeCodeSubprocessEnv(): Record<string, string>;
 export declare function normalizeClaudeModelForOneMillionContext(model: string, mode?: OneMillionContextMode): string;
 export declare function usesOneMillionContext(model: string | null | undefined, mode?: OneMillionContextMode, plan?: ClaudePlan): boolean;
 /**

package/dist/config.js CHANGED Viewed

@@ -225,6 +225,51 @@ export function claudeCodeSystemPrompt(append, opts) {
         ...(opts?.minimal ? { excludeDynamicSections: true } : {}),
     };
 }
+/**
+ * 1.18.194 — Build the env Record for a direct SDK `query()` call so the
+ * Claude Code OAuth token reaches the SDK subprocess.
+ *
+ * Why this matters: `getSecret('CLAUDE_CODE_OAUTH_TOKEN')` reads
+ * ~/.clementine/.env and stores the value in the in-memory constant
+ * `CLAUDE_CODE_OAUTH_TOKEN` — it intentionally does NOT write to
+ * process.env (config.ts:478 keeps secrets out of process.env to prevent
+ * leakage). When a direct `query()` call omits `env:`, the SDK defaults
+ * to `process.env` — which doesn't have the token — and authentication
+ * silently falls back to API-key mode and fails with "Not logged in".
+ *
+ * 1.18.192 fixed the systemPrompt shape (preset vs raw string) but missed
+ * this env-propagation half of the same auth bug. Daily-planner, weekly
+ * review, bg-planner, and several other Haiku utility paths were still
+ * silently failing AFTER 1.18.192 because env: wasn't being passed.
+ *
+ * Use this alongside `claudeCodeSystemPrompt()` for every direct `query()`
+ * call. The runAgent path already builds its own env via buildRunAgentEnv;
+ * the assistant.ts auto-memory + verifier already use SAFE_ENV (same
+ * pattern). This helper exists for the lightweight utility callers.
+ *
+ * Priority order matches buildRunAgentEnv: OAuth > ANTHROPIC_AUTH_TOKEN > API key.
+ */
+export function claudeCodeSubprocessEnv() {
+    const env = {
+        PATH: process.env.PATH ?? '',
+        HOME: process.env.HOME ?? '',
+        LANG: process.env.LANG ?? 'en_US.UTF-8',
+        TERM: process.env.TERM ?? 'xterm-256color',
+        USER: process.env.USER ?? '',
+        SHELL: process.env.SHELL ?? '',
+        CLEMENTINE_HOME: BASE_DIR,
+    };
+    const oauthTok = CLAUDE_CODE_OAUTH_TOKEN || process.env.CLAUDE_CODE_OAUTH_TOKEN;
+    const authTok = process.env.ANTHROPIC_AUTH_TOKEN;
+    const apiKey = ANTHROPIC_API_KEY || process.env.ANTHROPIC_API_KEY;
+    if (oauthTok)
+        env.CLAUDE_CODE_OAUTH_TOKEN = oauthTok;
+    else if (authTok)
+        env.ANTHROPIC_AUTH_TOKEN = authTok;
+    else if (apiKey)
+        env.ANTHROPIC_API_KEY = apiKey;
+    return env;
+}
 export function normalizeClaudeModelForOneMillionContext(model, mode = currentOneMillionContextMode()) {
     const family = modelFamily(model);
     if (mode === 'on')

package/dist/gateway/router.d.ts CHANGED Viewed

@@ -87,7 +87,6 @@ export declare class Gateway {
     private createBackgroundOffer;
     private queueBackgroundOffer;
     private formatBackgroundQueuedResponse;
-    private queueBackgroundTaskAfterContextOverflow;
     /**
      * 1.18.191 — chat-side plan mode state machine.
      *

package/dist/gateway/router.js CHANGED Viewed

@@ -49,15 +49,13 @@ const CHAT_TIMEOUT_MS = 10 * 60 * 1000;
  *  Safety net so no session runs forever, even if active.
  *  Primary guardrail is cost budget (maxBudgetUsd), not this timer. */
 const CHAT_MAX_WALL_MS = 30 * 60 * 1000;
-// 1.18.189 — tightened from 6_000 / 16_000 because the recovery prompt
-// was eating ~22KB of the bg-task worker's context window before any
-// real work started. On 2026-05-12 the worker autocompact-thrashed while
-// reading project files; the new tighter caps give it ~10KB more headroom
-// to do actual tool calls. The dropped content (older memory recall,
-// less-relevant bg-task headlines) is recoverable via memory_search if
-// the model actually needs it.
+// 1.18.189 — tightened cap on retry-recovery context. 1.18.194 — only
+// CHAT_CONTEXT_RETRY_CONTEXT_MAX_CHARS still has a caller (the legacy
+// buildContextOverflowRetryPrompt, exported for an existing unit test
+// but no longer invoked from the chat path). The system-prompt-cap was
+// removed when we deleted the in-line retry loop in favor of trusting
+// the SDK's own autocompact.
 const CHAT_CONTEXT_RETRY_CONTEXT_MAX_CHARS = 3_000;
-const CHAT_CONTEXT_RETRY_SYSTEM_MAX_CHARS = 8_000;
 const BACKGROUND_TASK_ID_RE = /\bbg-[a-z0-9]+-[a-f0-9]{6}\b/i;
 function collectRunToolNames(runId) {
     if (!runId)
@@ -429,46 +427,12 @@ export class Gateway {
             `Use \`status ${task.id}\` or check the dashboard Background Tasks panel for progress.`,
         ].join('\n');
     }
-    queueBackgroundTaskAfterContextOverflow(sessionKey, prompt) {
-        // 1.18.190 — the chat-overflow recovery path is now the canonical
-        // entry to the planner-orchestrator chain. Instead of queuing a
-        // monolithic bg-task that tries to do everything in one worker
-        // (which thrashed when the worker's context filled), we queue a
-        // planner task. The planner is a tiny Sonnet LLM call that
-        // decomposes the user's request into 3-7 PlanSteps; the
-        // orchestrator then dispatches one step at a time, each with
-        // its own fresh 200K worker window. See agent/bg-planner.ts +
-        // agent/bg-orchestrator.ts for the full pattern.
-        //
-        // The legacy `detectComplexTaskForBackground` heuristic is no
-        // longer used here — the planner itself decides how to decompose,
-        // and per-step maxMinutes is governed by orchestrator settings.
-        // Planner tasks get a tight 5-minute cap; total chain wall-clock
-        // is the sum of each step's own maxMinutes.
-        const task = createBackgroundTask({
-            fromAgent: this.backgroundAgentForSession(sessionKey),
-            prompt,
-            maxMinutes: 5, // planner needs minutes, not hours
-            sessionKey,
-            kind: 'planner',
-        });
-        logger.warn({
-            taskId: task.id,
-            sessionKey,
-            fromAgent: task.fromAgent,
-            kind: 'planner',
-        }, 'Queued planner task after repeated chat context overflow');
-        return {
-            task,
-            response: [
-                `The live chat context hit the limit, so I'm decomposing your request into chained steps via background task **${task.id}**.`,
-                '',
-                `Step 1: a Sonnet planner reads the request and emits a plan (~30 seconds).`,
-                `Then each step runs as its own fresh task — you'll see step-by-step updates rather than one big "done" at the end.`,
-                `Use \`status ${task.id}\` or the dashboard Background Tasks panel for progress.`,
-            ].join('\n'),
-        };
-    }
+    // 1.18.194 — `queueBackgroundTaskAfterContextOverflow` removed.
+    // The chat-overflow path no longer auto-fires the planner. Instead
+    // the chat-error handler surfaces a clean "rephrase or `/plan`"
+    // message and trusts the SDK's own autocompact. The planner +
+    // orchestrator stay available as the implementation behind explicit
+    // `/plan` (see `_maybeHandlePlanMode`).
     /**
      * 1.18.191 — chat-side plan mode state machine.
      *
@@ -493,7 +457,7 @@ export class Gateway {
      */
     async _maybeHandlePlanMode(opts) {
         const sess = this.sessions.get(opts.sessionKey);
-        const { detectPlanApproval } = await import('../agent/intent-classifier.js');
+        const { detectPlanApproval, detectPlanModeRequest } = await import('../agent/intent-classifier.js');
         const { planRequest, savePlan, loadPlan } = await import('../agent/bg-planner.js');
         const { dispatchChain } = await import('../agent/bg-orchestrator.js');
         // ── Path A: approval-pending ────────────────────────────────────
@@ -564,11 +528,31 @@ export class Gateway {
             // pending state. The model will see the user message normally.
             return { handled: false };
         }
-        // ── Path B: multi-step entry ────────────────────────────────────
-        if (opts.shape === 'multi-step' && sess) {
+        // ── Path B: explicit plan-mode entry (1.18.193) ─────────────────
+        //
+        // Plan mode is now OPT-IN, not auto-triggered by message shape.
+        // Default chat behavior matches 1.18.62: the SDK query runs the
+        // work in one continuous Sonnet session, like Nora did on April
+        // 28-29 (38 Bash calls in one session — no decomposition needed).
+        //
+        // The planner-orchestrator remains available for genuinely huge
+        // jobs, but the owner has to opt in:
+        //
+        //   - Message starts with `/plan` (case-insensitive)
+        //   - Message contains `[plan-mode]` token anywhere
+        //
+        // The chat-overflow escape hatch (queueBackgroundTaskAfterContext-
+        // Overflow → planner) still works as a separate path when the SDK
+        // session ACTUALLY overflows. That's the legitimate "this job is
+        // too big for one session" trigger.
+        //
+        // We keep shape classification (it still gates turn-context
+        // density for token savings on 'simple' messages), but shape no
+        // longer routes execution.
+        const planRequestSignal = detectPlanModeRequest(opts.userMessage);
+        if (planRequestSignal.requested && sess) {
+            const cleanedRequest = planRequestSignal.cleaned;
             try {
-                // Stream a "thinking..." update so the user knows planning is
-                // happening rather than seeing 30s of silence.
                 if (opts.onText) {
                     try {
                         opts.onText('🤔 Planning the steps...');
@@ -576,7 +560,7 @@ export class Gateway {
                     catch { /* non-fatal */ }
                 }
                 const plan = await planRequest({
-                    userRequest: opts.userMessage,
+                    userRequest: cleanedRequest || opts.userMessage,
                     originatingSessionKey: opts.sessionKey,
                     ...(opts.activeProject ? { project: opts.activeProject } : {}),
                 });
@@ -592,12 +576,14 @@ export class Gateway {
                 };
             }
             catch (err) {
-                logger.warn({ err, sessionKey: opts.sessionKey }, 'Plan mode: planRequest failed at entry');
+                logger.warn({ err, sessionKey: opts.sessionKey }, 'Plan mode: planRequest failed at explicit entry');
                 // Fall through to normal chat. Better than blocking the owner.
                 return { handled: false };
             }
         }
         // Not a plan-mode case — fall through to normal chat.
+        // This is the path 99% of messages take. Like 1.18.62 — the SDK
+        // query runs the work in one continuous Sonnet session.
         return { handled: false };
     }
     /** Format a plan for owner approval in chat. */
@@ -2328,7 +2314,6 @@ export class Gateway {
                     // Interrupt flag was set but no useful partial text — just clear it.
                     delete sessState.pendingInterrupt;
                 }
-                let contextOverflowRecoveryPrompt = '';
                 try {
                     // ── Canonical SDK chat path (Phase 5) ────────────────────────
                     // runAgent() owns chat. No legacy fallback — errors propagate
@@ -2471,7 +2456,6 @@ export class Gateway {
                     const chatSystemAppend = resolvedSkills && resolvedSkills.promptBlock
                         ? (baseSystemAppend ? `${baseSystemAppend}\n\n${resolvedSkills.promptBlock}` : resolvedSkills.promptBlock)
                         : baseSystemAppend;
-                    const retrySystemAppend = trimContextRecoveryText(chatSystemAppend, CHAT_CONTEXT_RETRY_SYSTEM_MAX_CHARS);
                     // Per-turn context (recall + persistent learnings + silent
                     // blocks + security/toolset directives) — real chat only.
                     // Builder doesn't need recall of unrelated transcripts.
@@ -2616,34 +2600,12 @@ export class Gateway {
                         },
                         abortSignal: chatAc.signal,
                     });
-                    let didContextOverflowRetry = false;
-                    const contextOverflowAfterRetryError = () => new Error('rapid_refill_breaker after context overflow retry');
-                    const retryAfterContextOverflow = async () => {
-                        if (didContextOverflowRetry)
-                            throw contextOverflowAfterRetryError();
-                        didContextOverflowRetry = true;
-                        const retryPrompt = buildContextOverflowRetryPrompt({
-                            chatPrompt,
-                            turnContextPrefix,
-                            project: sess?.project ?? null,
-                        });
-                        contextOverflowRecoveryPrompt = retryPrompt;
-                        logger.info({
-                            sessionKey: effectiveSessionKey,
-                            hadResume: !!priorSdkSessionId,
-                            promptChars: finalPrompt.length,
-                            retryPromptChars: retryPrompt.length,
-                            systemAppendChars: chatSystemAppend.length,
-                            retrySystemAppendChars: retrySystemAppend.length,
-                        }, 'Context overflow — retrying current message in fresh SDK session');
-                        if (onProgress) {
-                            await onProgress('refreshing conversation context...').catch(() => { });
-                        }
-                        this.assistant.clearSession(effectiveSessionKey);
-                        return runAgent(retryPrompt, buildRunAgentChatOptions({
-                            ...(retrySystemAppend ? { systemPromptAppend: retrySystemAppend } : {}),
-                        }));
-                    };
+                    // 1.18.194 — single SDK call. The SDK does its own autocompact
+                    // internally; we don't layer our own compress-and-retry on top.
+                    // If the SDK returns context_overflow (either thrown or as a
+                    // result with terminalReason), we surface a clean "rephrase or
+                    // /plan" message via the `case 'context_overflow':` handler
+                    // below. No more "Planning failed" half-finished chains.
                     let runAgentResult;
                     try {
                         runAgentResult = await runAgent(finalPrompt, buildRunAgentChatOptions({
@@ -2655,34 +2617,18 @@ export class Gateway {
                         if (chatAc.signal.aborted || classifyChatError(err) !== 'context_overflow') {
                             throw err;
                         }
-                        runAgentResult = await retryAfterContextOverflow();
+                        // Re-throw so the outer catch's classifyChatError gets it
+                        // and routes to the 'context_overflow' case.
+                        throw err;
                     }
                     if (!chatAc.signal.aborted && runAgentResultIndicatesContextOverflow(runAgentResult)) {
-                        if (didContextOverflowRetry) {
-                            logger.info({
-                                sessionKey: effectiveSessionKey,
-                                subtype: runAgentResult.subtype,
-                                terminalReason: runAgentResult.terminalReason,
-                                textPreview: runAgentResult.text?.slice(0, 240),
-                            }, 'Context overflow result after retry — queueing background task');
-                            throw contextOverflowAfterRetryError();
-                        }
                         logger.info({
                             sessionKey: effectiveSessionKey,
                             subtype: runAgentResult.subtype,
                             terminalReason: runAgentResult.terminalReason,
                             textPreview: runAgentResult.text?.slice(0, 240),
-                        }, 'Context overflow result — retrying current message in fresh SDK session');
-                        runAgentResult = await retryAfterContextOverflow();
-                        if (runAgentResultIndicatesContextOverflow(runAgentResult)) {
-                            logger.info({
-                                sessionKey: effectiveSessionKey,
-                                subtype: runAgentResult.subtype,
-                                terminalReason: runAgentResult.terminalReason,
-                                textPreview: runAgentResult.text?.slice(0, 240),
-                            }, 'Context overflow result after retry — queueing background task');
-                            throw contextOverflowAfterRetryError();
-                        }
+                        }, 'Context overflow result — autocompact ceiling reached, surfacing recovery message');
+                        throw new Error('context_overflow_after_autocompact');
                     }
                     if (ledgerRunMetadata) {
                         ledgerRunMetadata.runId = runAgentResult.runId;
@@ -2767,19 +2713,31 @@ export class Gateway {
                             applyOneMillionContextRecovery();
                             this.clearSession(effectiveSessionKey);
                             return oneMillionContextRecoveryMessage();
-                        case 'context_overflow':
-                            logger.info({ sessionKey }, 'Context overflow after retry — queueing background task');
+                        case 'context_overflow': {
+                            // 1.18.194 — trust the SDK. By the time we see context_overflow
+                            // here, the SDK has ALREADY tried autocompact (it's built-in).
+                            // Our previous behavior was to compress + retry + queue a
+                            // separate planner background task. That layered our own
+                            // retry on top of the SDK's, and when any step in the planner
+                            // pipeline failed (auth, planning, chain dispatch), users saw
+                            // a confusing "Planning failed" message — that's what bit
+                            // Zach. SDK best practice: when autocompact + retry have both
+                            // failed, that's a real context ceiling. Surface a clean
+                            // message that gives the owner two recovery options, clear
+                            // the session pointer, and trust them to resend smaller or
+                            // opt in to explicit plan mode.
+                            logger.info({ sessionKey }, 'Context overflow — autocompact ceiling reached, resetting');
                             this.assistant.clearSession(effectiveSessionKey);
-                            {
-                                const promptForBackground = contextOverflowRecoveryPrompt || chatPrompt;
-                                const { response, task } = this.queueBackgroundTaskAfterContextOverflow(sessionKey, promptForBackground);
-                                if (ledgerRunMetadata) {
-                                    ledgerRunMetadata.executionMode = 'background_queued';
-                                    ledgerRunMetadata.backgroundTaskId = task.id;
-                                }
-                                this.mirrorChatExchange(sessionKey, originalText, response, { model: 'chat-control' });
-                                return response;
-                            }
+                            const response = [
+                                "That work pushed past the context limit even with autocompact.",
+                                "I've reset our conversation. Two ways forward:",
+                                "",
+                                "1. Rephrase the task in smaller scope (e.g. 'just the first 10' instead of 'all 100')",
+                                "2. Use `/plan` to have me decompose it into chained workers before running",
+                            ].join('\n');
+                            this.mirrorChatExchange(sessionKey, originalText, response, { model: 'chat-control' });
+                            return response;
+                        }
                         case 'auth':
                             this.recordAuthFailure();
                             return "I'm temporarily offline due to an authentication issue. The owner needs to re-authenticate — I'll recover automatically once it's resolved.";

package/dist/index.js CHANGED Viewed

@@ -638,6 +638,8 @@ async function asyncMain() {
                             // 1.18.192 — preset form so SDK uses Claude Code subscription
                             // auth (raw string → API-key path → "Not logged in" for Max users).
                             systemPrompt: config.claudeCodeSystemPrompt('You are a memory consolidation assistant. Be concise.', { minimal: true }),
+                            // 1.18.194 — propagate OAuth token to SDK subprocess.
+                            env: config.claudeCodeSubprocessEnv(),
                         }),
                     });
                     for await (const msg of stream) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clementine-agent",
-  "version": "1.18.192",
+  "version": "1.18.194",
   "description": "Clementine — Personal AI Assistant (TypeScript)",
   "type": "module",
   "main": "dist/index.js",