clementine-agent 1.18.192 → 1.18.194

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -44,7 +44,7 @@ import fs from 'node:fs';
44
44
  import path from 'node:path';
45
45
  import { randomUUID } from 'node:crypto';
46
46
  import pino from 'pino';
47
- import { BASE_DIR, MODELS, applyOneMillionContextRecovery, claudeCodeSystemPrompt, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext } from '../config.js';
47
+ import { BASE_DIR, MODELS, applyOneMillionContextRecovery, claudeCodeSubprocessEnv, claudeCodeSystemPrompt, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext } from '../config.js';
48
48
  const logger = pino({ name: 'clementine.bg-planner' });
49
49
  // ── Persistence ──────────────────────────────────────────────────────
50
50
  /**
@@ -287,6 +287,10 @@ async function runPlannerLlm(userPrompt, systemPrompt, model) {
287
287
  // from knowing the working directory + git status so it can decompose
288
288
  // accurately. See claudeCodeSystemPrompt() in config.ts.
289
289
  systemPrompt: claudeCodeSystemPrompt(systemPrompt),
290
+ // 1.18.194 — pass OAuth token to the SDK subprocess. Without this,
291
+ // process.env doesn't have CLAUDE_CODE_OAUTH_TOKEN (config.ts keeps
292
+ // secrets out of process.env) and the SDK fails with "Not logged in".
293
+ env: claudeCodeSubprocessEnv(),
290
294
  }),
291
295
  });
292
296
  for await (const msg of stream) {
@@ -8,7 +8,7 @@
8
8
  import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from 'node:fs';
9
9
  import path from 'node:path';
10
10
  import pino from 'pino';
11
- import { BASE_DIR, CRON_REFLECTIONS_DIR, TASKS_FILE, INBOX_DIR, MODELS, applyOneMillionContextRecovery, claudeCodeSystemPrompt, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext, } from '../config.js';
11
+ import { BASE_DIR, CRON_REFLECTIONS_DIR, TASKS_FILE, INBOX_DIR, MODELS, applyOneMillionContextRecovery, claudeCodeSubprocessEnv, claudeCodeSystemPrompt, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext, } from '../config.js';
12
12
  import { listAllGoals } from '../tools/shared.js';
13
13
  const logger = pino({ name: 'clementine.daily-planner' });
14
14
  const PLANS_DIR = path.join(BASE_DIR, 'plans', 'daily');
@@ -259,6 +259,8 @@ Rules:
259
259
  // 1.18.192 — preset form so SDK uses Claude Code subscription auth
260
260
  // (raw string → API-key auth → "Not logged in" failure for Max users).
261
261
  systemPrompt: claudeCodeSystemPrompt('You are a planning assistant. Analyze the context and produce a prioritized daily plan as JSON. Return only valid JSON, no markdown fencing.', { minimal: true }),
262
+ // 1.18.194 — propagate OAuth token to SDK subprocess.
263
+ env: claudeCodeSubprocessEnv(),
262
264
  }),
263
265
  });
264
266
  for await (const msg of stream) {
@@ -76,6 +76,34 @@ export declare function classifyMessageShape(text: string, opts?: {
76
76
  */
77
77
  export type PlanApprovalSignal = 'approve' | 'revise' | 'cancel' | 'other';
78
78
  export declare function detectPlanApproval(message: string): PlanApprovalSignal;
79
+ /**
80
+ * 1.18.193 — plan-mode opt-in detector.
81
+ *
82
+ * Plan-mode used to auto-trigger when `classifyMessageShape` flagged a
83
+ * message as 'multi-step'. That was too aggressive — Nora's April 28-29
84
+ * work (38 Bash calls in one chat session) would have been routed through
85
+ * the planner unnecessarily. Comparison vs friend's 1.18.62 install showed
86
+ * the auto-route was the main behavior divergence.
87
+ *
88
+ * Now plan-mode is opt-in via explicit owner intent:
89
+ * - Message starts with `/plan` (case-insensitive)
90
+ * - Message contains the `[plan-mode]` token anywhere
91
+ *
92
+ * The chat-overflow recovery path (queueBackgroundTaskAfterContextOverflow)
93
+ * still routes to the planner when the SDK session ACTUALLY overflows —
94
+ * that's a separate escape hatch, not an auto-trigger.
95
+ *
96
+ * Returns `{ requested: true, cleaned }` if the owner asked for plan mode,
97
+ * where `cleaned` is the message with the trigger token stripped.
98
+ * Returns `{ requested: false }` otherwise.
99
+ */
100
+ export type PlanModeRequest = {
101
+ requested: true;
102
+ cleaned: string;
103
+ } | {
104
+ requested: false;
105
+ };
106
+ export declare function detectPlanModeRequest(message: string): PlanModeRequest;
79
107
  /**
80
108
  * Generate a follow-up suggestion prompt suffix based on completed work.
81
109
  *
@@ -332,6 +332,16 @@ export function detectPlanApproval(message) {
332
332
  return 'revise';
333
333
  return 'other';
334
334
  }
335
+ const PLAN_MODE_TRIGGER = /^\s*\/plan\b|\[plan-mode\]/i;
336
+ export function detectPlanModeRequest(message) {
337
+ if (!message || !PLAN_MODE_TRIGGER.test(message))
338
+ return { requested: false };
339
+ const cleaned = message
340
+ .replace(/^\s*\/plan\b\s*/i, '')
341
+ .replace(/\[plan-mode\]/gi, '')
342
+ .trim();
343
+ return { requested: true, cleaned };
344
+ }
335
345
  /**
336
346
  * Generate a follow-up suggestion prompt suffix based on completed work.
337
347
  *
@@ -10,7 +10,7 @@ import { existsSync, readFileSync, readdirSync, writeFileSync } from 'node:fs';
10
10
  import os from 'node:os';
11
11
  import path from 'node:path';
12
12
  import pino from 'pino';
13
- import { BASE_DIR, applyOneMillionContextRecovery, claudeCodeSystemPrompt, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext, } from '../config.js';
13
+ import { BASE_DIR, applyOneMillionContextRecovery, claudeCodeSubprocessEnv, claudeCodeSystemPrompt, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext, } from '../config.js';
14
14
  const logger = pino({ name: 'clementine.mcp-bridge' });
15
15
  const MCP_SERVERS_FILE = path.join(BASE_DIR, 'mcp-servers.json');
16
16
  const INTEGRATIONS_FILE = path.join(BASE_DIR, 'claude-integrations.json');
@@ -453,6 +453,8 @@ export async function probeAvailableTools(force = false) {
453
453
  options: normalizeClaudeSdkOptionsForOneMillionContext({
454
454
  // 1.18.192 — preset form for Claude Code subscription auth.
455
455
  systemPrompt: claudeCodeSystemPrompt('Reply ok.', { minimal: true }),
456
+ // 1.18.194 — propagate OAuth token to SDK subprocess.
457
+ env: claudeCodeSubprocessEnv(),
456
458
  model: 'claude-haiku-4-5',
457
459
  permissionMode: 'dontAsk',
458
460
  mcpServers: externalMcpServers,
@@ -123,7 +123,14 @@ const BEHAVIORAL_POSTURE = `## How you operate
123
123
 
124
124
  **Discovering new projects.** If the owner mentions a project by name that isn't in your registry, don't free-float — call \`project_discover\` with the name. It searches common locations (~/Downloads, ~/Desktop, ~/Projects, ~/Documents) and returns ranked candidates. Confirm the right one with the owner, then call \`project_link\` to register it. Future turns will then resolve it automatically.
125
125
 
126
- **Verification posture for disputed claims.** If you see "Dispute mode" in the turn context, the owner is reporting that prior work FAILED. Past \`done\` claims in memory are NOT authoritative — your recall is biased. Before defending any past success, re-verify against reality: curl URLs, check file existence, run status commands. Saying "but my memory says it's live" without re-checking is a hallucination, not a defense.`;
126
+ **Verification posture for disputed claims.** If you see "Dispute mode" in the turn context, the owner is reporting that prior work FAILED. Past \`done\` claims in memory are NOT authoritative — your recall is biased. Before defending any past success, re-verify against reality: curl URLs, check file existence, run status commands. Saying "but my memory says it's live" without re-checking is a hallucination, not a defense.
127
+
128
+ **Fan-out posture (1.18.194).** When the owner asks for 3+ similar operations — send N emails, pull N records, enrich N contacts, summarize N pages — dispatch subagents in PARALLEL via the Agent tool. One subagent per item. Don't loop in your own turn; that's slow, serializes I/O that should be concurrent, and burns context linearly. Available subagents (see Agent tool descriptions for the canonical list):
129
+ - \`researcher\` (Haiku, parallel, read-only) — per-item investigation
130
+ - \`planner\` (Opus, 1-turn, no tools) — decomposition before write/send batches
131
+ - Hired agents (Ross, Nora, etc.) — cross-delegation when relevant
132
+
133
+ A 25-contact enrichment that fans out to 25 \`researcher\` calls finishes in ~30s. The same work done serially in your own turn takes 10+ minutes AND fills your context window with tool outputs. Default to fan-out for batch work.`;
127
134
  /**
128
135
  * Read the long-term memory block for an autonomous run (cron, team-task).
129
136
  * Returns the agent-specific MEMORY.md when a hired agent is active, the
@@ -12,7 +12,7 @@
12
12
  import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from 'node:fs';
13
13
  import path from 'node:path';
14
14
  import pino from 'pino';
15
- import { BASE_DIR, GOALS_DIR, MODELS, applyOneMillionContextRecovery, claudeCodeSystemPrompt, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext, } from '../config.js';
15
+ import { BASE_DIR, GOALS_DIR, MODELS, applyOneMillionContextRecovery, claudeCodeSubprocessEnv, claudeCodeSystemPrompt, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext, } from '../config.js';
16
16
  import { listAllGoals } from '../tools/shared.js';
17
17
  const logger = pino({ name: 'clementine.strategic-planner' });
18
18
  const DAILY_PLANS_DIR = path.join(BASE_DIR, 'plans', 'daily');
@@ -31,6 +31,8 @@ async function llmJsonCall(prompt, systemPrompt) {
31
31
  // failures on Max-only installs. Logs confirmed weekly review was
32
32
  // silently falling through to the fallback path here since the bug landed.
33
33
  systemPrompt: claudeCodeSystemPrompt(systemPrompt, { minimal: true }),
34
+ // 1.18.194 — propagate OAuth token to SDK subprocess.
35
+ env: claudeCodeSubprocessEnv(),
34
36
  }),
35
37
  });
36
38
  for await (const msg of stream) {
@@ -14,7 +14,7 @@ import { readFileSync } from 'node:fs';
14
14
  import path from 'node:path';
15
15
  import pdfParse from 'pdf-parse';
16
16
  import { contentHash } from './common.js';
17
- import { MODELS, applyOneMillionContextRecovery, claudeCodeSystemPrompt, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext, } from '../../config.js';
17
+ import { MODELS, applyOneMillionContextRecovery, claudeCodeSubprocessEnv, claudeCodeSystemPrompt, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext, } from '../../config.js';
18
18
  export async function* parsePdf(filePath) {
19
19
  let buf;
20
20
  try {
@@ -98,6 +98,8 @@ async function ocrPdfViaClaude(filePath) {
98
98
  // Without this, every scanned-PDF ingest hit "Not logged in" and
99
99
  // silently fell back to empty OCR output.
100
100
  systemPrompt: claudeCodeSystemPrompt('You are a faithful OCR transcriber. Copy text exactly as written. When the PDF has images or scans, read the text from them using vision. Never invent content.', { minimal: true }),
101
+ // 1.18.194 — propagate OAuth token to SDK subprocess.
102
+ env: claudeCodeSubprocessEnv(),
101
103
  // Claude Code's built-in Read tool handles PDFs (text + vision)
102
104
  tools: ['Read'],
103
105
  allowedTools: ['Read'],
@@ -19,7 +19,7 @@ import { TunnelManager } from './tunnel.js';
19
19
  import { AgentManager } from '../agent/agent-manager.js';
20
20
  import { discoverMcpServers, getClaudeIntegrations, KNOWN_MCP_DESCRIPTIONS } from '../agent/mcp-bridge.js';
21
21
  import { buildBuilderEnrichedMessage, builderSessionKey } from '../dashboard/builder/prompt.js';
22
- import { AGENTS_DIR, MEMORY_FILE, MODELS, SESSIONS_FILE, TIMEZONE, applyOneMillionContextRecovery, claudeCodeSystemPrompt, currentTimeZone, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext, setEnvOverride, } from '../config.js';
22
+ import { AGENTS_DIR, MEMORY_FILE, MODELS, SESSIONS_FILE, TIMEZONE, applyOneMillionContextRecovery, claudeCodeSubprocessEnv, claudeCodeSystemPrompt, currentTimeZone, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext, setEnvOverride, } from '../config.js';
23
23
  import { parseTasks } from '../tools/shared.js';
24
24
  // 1.18.160 — also pull parseCronJobs + parseAgentCronJobs so getCronJobs()
25
25
  // returns the same merged set the runtime fires (CRON.md + agent CRON +
@@ -6401,6 +6401,8 @@ If the tool returns nothing or errors, return an empty array \`[]\`.`,
6401
6401
  maxTurns: 3,
6402
6402
  // 1.18.192 — preset form for Claude Code subscription auth.
6403
6403
  systemPrompt: claudeCodeSystemPrompt('You are a data enumerator. You call the given tool once, extract the items from its response, and emit a strict JSON array. No commentary.', { minimal: true }),
6404
+ // 1.18.194 — propagate OAuth token to SDK subprocess.
6405
+ env: claudeCodeSubprocessEnv(),
6404
6406
  allowedTools: [tool],
6405
6407
  mcpServers,
6406
6408
  permissionMode: 'dontAsk',
@@ -9728,6 +9730,8 @@ If the tool returns nothing or errors, return an empty array \`[]\`.`,
9728
9730
  maxTurns: 1,
9729
9731
  // 1.18.192 — preset form for Claude Code subscription auth.
9730
9732
  systemPrompt: claudeCodeSystemPrompt('You are a memory consolidation assistant. Extract only facts directly evidenced by the corpus. Be terse. Output exactly the requested format.', { minimal: true }),
9733
+ // 1.18.194 — propagate OAuth token to SDK subprocess.
9734
+ env: claudeCodeSubprocessEnv(),
9731
9735
  }),
9732
9736
  });
9733
9737
  for await (const msg of stream) {
package/dist/config.d.ts CHANGED
@@ -63,6 +63,31 @@ export declare function claudeCodeSystemPrompt(append: string, opts?: {
63
63
  append: string;
64
64
  excludeDynamicSections?: boolean;
65
65
  };
66
+ /**
67
+ * 1.18.194 — Build the env Record for a direct SDK `query()` call so the
68
+ * Claude Code OAuth token reaches the SDK subprocess.
69
+ *
70
+ * Why this matters: `getSecret('CLAUDE_CODE_OAUTH_TOKEN')` reads
71
+ * ~/.clementine/.env and stores the value in the in-memory constant
72
+ * `CLAUDE_CODE_OAUTH_TOKEN` — it intentionally does NOT write to
73
+ * process.env (config.ts:478 keeps secrets out of process.env to prevent
74
+ * leakage). When a direct `query()` call omits `env:`, the SDK defaults
75
+ * to `process.env` — which doesn't have the token — and authentication
76
+ * silently falls back to API-key mode and fails with "Not logged in".
77
+ *
78
+ * 1.18.192 fixed the systemPrompt shape (preset vs raw string) but missed
79
+ * this env-propagation half of the same auth bug. Daily-planner, weekly
80
+ * review, bg-planner, and several other Haiku utility paths were still
81
+ * silently failing AFTER 1.18.192 because env: wasn't being passed.
82
+ *
83
+ * Use this alongside `claudeCodeSystemPrompt()` for every direct `query()`
84
+ * call. The runAgent path already builds its own env via buildRunAgentEnv;
85
+ * the assistant.ts auto-memory + verifier already use SAFE_ENV (same
86
+ * pattern). This helper exists for the lightweight utility callers.
87
+ *
88
+ * Priority order matches buildRunAgentEnv: OAuth > ANTHROPIC_AUTH_TOKEN > API key.
89
+ */
90
+ export declare function claudeCodeSubprocessEnv(): Record<string, string>;
66
91
  export declare function normalizeClaudeModelForOneMillionContext(model: string, mode?: OneMillionContextMode): string;
67
92
  export declare function usesOneMillionContext(model: string | null | undefined, mode?: OneMillionContextMode, plan?: ClaudePlan): boolean;
68
93
  /**
package/dist/config.js CHANGED
@@ -225,6 +225,51 @@ export function claudeCodeSystemPrompt(append, opts) {
225
225
  ...(opts?.minimal ? { excludeDynamicSections: true } : {}),
226
226
  };
227
227
  }
228
+ /**
229
+ * 1.18.194 — Build the env Record for a direct SDK `query()` call so the
230
+ * Claude Code OAuth token reaches the SDK subprocess.
231
+ *
232
+ * Why this matters: `getSecret('CLAUDE_CODE_OAUTH_TOKEN')` reads
233
+ * ~/.clementine/.env and stores the value in the in-memory constant
234
+ * `CLAUDE_CODE_OAUTH_TOKEN` — it intentionally does NOT write to
235
+ * process.env (config.ts:478 keeps secrets out of process.env to prevent
236
+ * leakage). When a direct `query()` call omits `env:`, the SDK defaults
237
+ * to `process.env` — which doesn't have the token — and authentication
238
+ * silently falls back to API-key mode and fails with "Not logged in".
239
+ *
240
+ * 1.18.192 fixed the systemPrompt shape (preset vs raw string) but missed
241
+ * this env-propagation half of the same auth bug. Daily-planner, weekly
242
+ * review, bg-planner, and several other Haiku utility paths were still
243
+ * silently failing AFTER 1.18.192 because env: wasn't being passed.
244
+ *
245
+ * Use this alongside `claudeCodeSystemPrompt()` for every direct `query()`
246
+ * call. The runAgent path already builds its own env via buildRunAgentEnv;
247
+ * the assistant.ts auto-memory + verifier already use SAFE_ENV (same
248
+ * pattern). This helper exists for the lightweight utility callers.
249
+ *
250
+ * Priority order matches buildRunAgentEnv: OAuth > ANTHROPIC_AUTH_TOKEN > API key.
251
+ */
252
+ export function claudeCodeSubprocessEnv() {
253
+ const env = {
254
+ PATH: process.env.PATH ?? '',
255
+ HOME: process.env.HOME ?? '',
256
+ LANG: process.env.LANG ?? 'en_US.UTF-8',
257
+ TERM: process.env.TERM ?? 'xterm-256color',
258
+ USER: process.env.USER ?? '',
259
+ SHELL: process.env.SHELL ?? '',
260
+ CLEMENTINE_HOME: BASE_DIR,
261
+ };
262
+ const oauthTok = CLAUDE_CODE_OAUTH_TOKEN || process.env.CLAUDE_CODE_OAUTH_TOKEN;
263
+ const authTok = process.env.ANTHROPIC_AUTH_TOKEN;
264
+ const apiKey = ANTHROPIC_API_KEY || process.env.ANTHROPIC_API_KEY;
265
+ if (oauthTok)
266
+ env.CLAUDE_CODE_OAUTH_TOKEN = oauthTok;
267
+ else if (authTok)
268
+ env.ANTHROPIC_AUTH_TOKEN = authTok;
269
+ else if (apiKey)
270
+ env.ANTHROPIC_API_KEY = apiKey;
271
+ return env;
272
+ }
228
273
  export function normalizeClaudeModelForOneMillionContext(model, mode = currentOneMillionContextMode()) {
229
274
  const family = modelFamily(model);
230
275
  if (mode === 'on')
@@ -87,7 +87,6 @@ export declare class Gateway {
87
87
  private createBackgroundOffer;
88
88
  private queueBackgroundOffer;
89
89
  private formatBackgroundQueuedResponse;
90
- private queueBackgroundTaskAfterContextOverflow;
91
90
  /**
92
91
  * 1.18.191 — chat-side plan mode state machine.
93
92
  *
@@ -49,15 +49,13 @@ const CHAT_TIMEOUT_MS = 10 * 60 * 1000;
49
49
  * Safety net so no session runs forever, even if active.
50
50
  * Primary guardrail is cost budget (maxBudgetUsd), not this timer. */
51
51
  const CHAT_MAX_WALL_MS = 30 * 60 * 1000;
52
- // 1.18.189 — tightened from 6_000 / 16_000 because the recovery prompt
53
- // was eating ~22KB of the bg-task worker's context window before any
54
- // real work started. On 2026-05-12 the worker autocompact-thrashed while
55
- // reading project files; the new tighter caps give it ~10KB more headroom
56
- // to do actual tool calls. The dropped content (older memory recall,
57
- // less-relevant bg-task headlines) is recoverable via memory_search if
58
- // the model actually needs it.
52
+ // 1.18.189 — tightened cap on retry-recovery context. 1.18.194 only
53
+ // CHAT_CONTEXT_RETRY_CONTEXT_MAX_CHARS still has a caller (the legacy
54
+ // buildContextOverflowRetryPrompt, exported for an existing unit test
55
+ // but no longer invoked from the chat path). The system-prompt-cap was
56
+ // removed when we deleted the in-line retry loop in favor of trusting
57
+ // the SDK's own autocompact.
59
58
  const CHAT_CONTEXT_RETRY_CONTEXT_MAX_CHARS = 3_000;
60
- const CHAT_CONTEXT_RETRY_SYSTEM_MAX_CHARS = 8_000;
61
59
  const BACKGROUND_TASK_ID_RE = /\bbg-[a-z0-9]+-[a-f0-9]{6}\b/i;
62
60
  function collectRunToolNames(runId) {
63
61
  if (!runId)
@@ -429,46 +427,12 @@ export class Gateway {
429
427
  `Use \`status ${task.id}\` or check the dashboard Background Tasks panel for progress.`,
430
428
  ].join('\n');
431
429
  }
432
- queueBackgroundTaskAfterContextOverflow(sessionKey, prompt) {
433
- // 1.18.190 — the chat-overflow recovery path is now the canonical
434
- // entry to the planner-orchestrator chain. Instead of queuing a
435
- // monolithic bg-task that tries to do everything in one worker
436
- // (which thrashed when the worker's context filled), we queue a
437
- // planner task. The planner is a tiny Sonnet LLM call that
438
- // decomposes the user's request into 3-7 PlanSteps; the
439
- // orchestrator then dispatches one step at a time, each with
440
- // its own fresh 200K worker window. See agent/bg-planner.ts +
441
- // agent/bg-orchestrator.ts for the full pattern.
442
- //
443
- // The legacy `detectComplexTaskForBackground` heuristic is no
444
- // longer used here — the planner itself decides how to decompose,
445
- // and per-step maxMinutes is governed by orchestrator settings.
446
- // Planner tasks get a tight 5-minute cap; total chain wall-clock
447
- // is the sum of each step's own maxMinutes.
448
- const task = createBackgroundTask({
449
- fromAgent: this.backgroundAgentForSession(sessionKey),
450
- prompt,
451
- maxMinutes: 5, // planner needs minutes, not hours
452
- sessionKey,
453
- kind: 'planner',
454
- });
455
- logger.warn({
456
- taskId: task.id,
457
- sessionKey,
458
- fromAgent: task.fromAgent,
459
- kind: 'planner',
460
- }, 'Queued planner task after repeated chat context overflow');
461
- return {
462
- task,
463
- response: [
464
- `The live chat context hit the limit, so I'm decomposing your request into chained steps via background task **${task.id}**.`,
465
- '',
466
- `Step 1: a Sonnet planner reads the request and emits a plan (~30 seconds).`,
467
- `Then each step runs as its own fresh task — you'll see step-by-step updates rather than one big "done" at the end.`,
468
- `Use \`status ${task.id}\` or the dashboard Background Tasks panel for progress.`,
469
- ].join('\n'),
470
- };
471
- }
430
+ // 1.18.194 — `queueBackgroundTaskAfterContextOverflow` removed.
431
+ // The chat-overflow path no longer auto-fires the planner. Instead
432
+ // the chat-error handler surfaces a clean "rephrase or `/plan`"
433
+ // message and trusts the SDK's own autocompact. The planner +
434
+ // orchestrator stay available as the implementation behind explicit
435
+ // `/plan` (see `_maybeHandlePlanMode`).
472
436
  /**
473
437
  * 1.18.191 — chat-side plan mode state machine.
474
438
  *
@@ -493,7 +457,7 @@ export class Gateway {
493
457
  */
494
458
  async _maybeHandlePlanMode(opts) {
495
459
  const sess = this.sessions.get(opts.sessionKey);
496
- const { detectPlanApproval } = await import('../agent/intent-classifier.js');
460
+ const { detectPlanApproval, detectPlanModeRequest } = await import('../agent/intent-classifier.js');
497
461
  const { planRequest, savePlan, loadPlan } = await import('../agent/bg-planner.js');
498
462
  const { dispatchChain } = await import('../agent/bg-orchestrator.js');
499
463
  // ── Path A: approval-pending ────────────────────────────────────
@@ -564,11 +528,31 @@ export class Gateway {
564
528
  // pending state. The model will see the user message normally.
565
529
  return { handled: false };
566
530
  }
567
- // ── Path B: multi-step entry ────────────────────────────────────
568
- if (opts.shape === 'multi-step' && sess) {
531
+ // ── Path B: explicit plan-mode entry (1.18.193) ─────────────────
532
+ //
533
+ // Plan mode is now OPT-IN, not auto-triggered by message shape.
534
+ // Default chat behavior matches 1.18.62: the SDK query runs the
535
+ // work in one continuous Sonnet session, like Nora did on April
536
+ // 28-29 (38 Bash calls in one session — no decomposition needed).
537
+ //
538
+ // The planner-orchestrator remains available for genuinely huge
539
+ // jobs, but the owner has to opt in:
540
+ //
541
+ // - Message starts with `/plan` (case-insensitive)
542
+ // - Message contains `[plan-mode]` token anywhere
543
+ //
544
+ // The chat-overflow escape hatch (queueBackgroundTaskAfterContext-
545
+ // Overflow → planner) still works as a separate path when the SDK
546
+ // session ACTUALLY overflows. That's the legitimate "this job is
547
+ // too big for one session" trigger.
548
+ //
549
+ // We keep shape classification (it still gates turn-context
550
+ // density for token savings on 'simple' messages), but shape no
551
+ // longer routes execution.
552
+ const planRequestSignal = detectPlanModeRequest(opts.userMessage);
553
+ if (planRequestSignal.requested && sess) {
554
+ const cleanedRequest = planRequestSignal.cleaned;
569
555
  try {
570
- // Stream a "thinking..." update so the user knows planning is
571
- // happening rather than seeing 30s of silence.
572
556
  if (opts.onText) {
573
557
  try {
574
558
  opts.onText('🤔 Planning the steps...');
@@ -576,7 +560,7 @@ export class Gateway {
576
560
  catch { /* non-fatal */ }
577
561
  }
578
562
  const plan = await planRequest({
579
- userRequest: opts.userMessage,
563
+ userRequest: cleanedRequest || opts.userMessage,
580
564
  originatingSessionKey: opts.sessionKey,
581
565
  ...(opts.activeProject ? { project: opts.activeProject } : {}),
582
566
  });
@@ -592,12 +576,14 @@ export class Gateway {
592
576
  };
593
577
  }
594
578
  catch (err) {
595
- logger.warn({ err, sessionKey: opts.sessionKey }, 'Plan mode: planRequest failed at entry');
579
+ logger.warn({ err, sessionKey: opts.sessionKey }, 'Plan mode: planRequest failed at explicit entry');
596
580
  // Fall through to normal chat. Better than blocking the owner.
597
581
  return { handled: false };
598
582
  }
599
583
  }
600
584
  // Not a plan-mode case — fall through to normal chat.
585
+ // This is the path 99% of messages take. Like 1.18.62 — the SDK
586
+ // query runs the work in one continuous Sonnet session.
601
587
  return { handled: false };
602
588
  }
603
589
  /** Format a plan for owner approval in chat. */
@@ -2328,7 +2314,6 @@ export class Gateway {
2328
2314
  // Interrupt flag was set but no useful partial text — just clear it.
2329
2315
  delete sessState.pendingInterrupt;
2330
2316
  }
2331
- let contextOverflowRecoveryPrompt = '';
2332
2317
  try {
2333
2318
  // ── Canonical SDK chat path (Phase 5) ────────────────────────
2334
2319
  // runAgent() owns chat. No legacy fallback — errors propagate
@@ -2471,7 +2456,6 @@ export class Gateway {
2471
2456
  const chatSystemAppend = resolvedSkills && resolvedSkills.promptBlock
2472
2457
  ? (baseSystemAppend ? `${baseSystemAppend}\n\n${resolvedSkills.promptBlock}` : resolvedSkills.promptBlock)
2473
2458
  : baseSystemAppend;
2474
- const retrySystemAppend = trimContextRecoveryText(chatSystemAppend, CHAT_CONTEXT_RETRY_SYSTEM_MAX_CHARS);
2475
2459
  // Per-turn context (recall + persistent learnings + silent
2476
2460
  // blocks + security/toolset directives) — real chat only.
2477
2461
  // Builder doesn't need recall of unrelated transcripts.
@@ -2616,34 +2600,12 @@ export class Gateway {
2616
2600
  },
2617
2601
  abortSignal: chatAc.signal,
2618
2602
  });
2619
- let didContextOverflowRetry = false;
2620
- const contextOverflowAfterRetryError = () => new Error('rapid_refill_breaker after context overflow retry');
2621
- const retryAfterContextOverflow = async () => {
2622
- if (didContextOverflowRetry)
2623
- throw contextOverflowAfterRetryError();
2624
- didContextOverflowRetry = true;
2625
- const retryPrompt = buildContextOverflowRetryPrompt({
2626
- chatPrompt,
2627
- turnContextPrefix,
2628
- project: sess?.project ?? null,
2629
- });
2630
- contextOverflowRecoveryPrompt = retryPrompt;
2631
- logger.info({
2632
- sessionKey: effectiveSessionKey,
2633
- hadResume: !!priorSdkSessionId,
2634
- promptChars: finalPrompt.length,
2635
- retryPromptChars: retryPrompt.length,
2636
- systemAppendChars: chatSystemAppend.length,
2637
- retrySystemAppendChars: retrySystemAppend.length,
2638
- }, 'Context overflow — retrying current message in fresh SDK session');
2639
- if (onProgress) {
2640
- await onProgress('refreshing conversation context...').catch(() => { });
2641
- }
2642
- this.assistant.clearSession(effectiveSessionKey);
2643
- return runAgent(retryPrompt, buildRunAgentChatOptions({
2644
- ...(retrySystemAppend ? { systemPromptAppend: retrySystemAppend } : {}),
2645
- }));
2646
- };
2603
+ // 1.18.194 single SDK call. The SDK does its own autocompact
2604
+ // internally; we don't layer our own compress-and-retry on top.
2605
+ // If the SDK returns context_overflow (either thrown or as a
2606
+ // result with terminalReason), we surface a clean "rephrase or
2607
+ // /plan" message via the `case 'context_overflow':` handler
2608
+ // below. No more "Planning failed" half-finished chains.
2647
2609
  let runAgentResult;
2648
2610
  try {
2649
2611
  runAgentResult = await runAgent(finalPrompt, buildRunAgentChatOptions({
@@ -2655,34 +2617,18 @@ export class Gateway {
2655
2617
  if (chatAc.signal.aborted || classifyChatError(err) !== 'context_overflow') {
2656
2618
  throw err;
2657
2619
  }
2658
- runAgentResult = await retryAfterContextOverflow();
2620
+ // Re-throw so the outer catch's classifyChatError gets it
2621
+ // and routes to the 'context_overflow' case.
2622
+ throw err;
2659
2623
  }
2660
2624
  if (!chatAc.signal.aborted && runAgentResultIndicatesContextOverflow(runAgentResult)) {
2661
- if (didContextOverflowRetry) {
2662
- logger.info({
2663
- sessionKey: effectiveSessionKey,
2664
- subtype: runAgentResult.subtype,
2665
- terminalReason: runAgentResult.terminalReason,
2666
- textPreview: runAgentResult.text?.slice(0, 240),
2667
- }, 'Context overflow result after retry — queueing background task');
2668
- throw contextOverflowAfterRetryError();
2669
- }
2670
2625
  logger.info({
2671
2626
  sessionKey: effectiveSessionKey,
2672
2627
  subtype: runAgentResult.subtype,
2673
2628
  terminalReason: runAgentResult.terminalReason,
2674
2629
  textPreview: runAgentResult.text?.slice(0, 240),
2675
- }, 'Context overflow result — retrying current message in fresh SDK session');
2676
- runAgentResult = await retryAfterContextOverflow();
2677
- if (runAgentResultIndicatesContextOverflow(runAgentResult)) {
2678
- logger.info({
2679
- sessionKey: effectiveSessionKey,
2680
- subtype: runAgentResult.subtype,
2681
- terminalReason: runAgentResult.terminalReason,
2682
- textPreview: runAgentResult.text?.slice(0, 240),
2683
- }, 'Context overflow result after retry — queueing background task');
2684
- throw contextOverflowAfterRetryError();
2685
- }
2630
+ }, 'Context overflow result — autocompact ceiling reached, surfacing recovery message');
2631
+ throw new Error('context_overflow_after_autocompact');
2686
2632
  }
2687
2633
  if (ledgerRunMetadata) {
2688
2634
  ledgerRunMetadata.runId = runAgentResult.runId;
@@ -2767,19 +2713,31 @@ export class Gateway {
2767
2713
  applyOneMillionContextRecovery();
2768
2714
  this.clearSession(effectiveSessionKey);
2769
2715
  return oneMillionContextRecoveryMessage();
2770
- case 'context_overflow':
2771
- logger.info({ sessionKey }, 'Context overflow after retry queueing background task');
2716
+ case 'context_overflow': {
2717
+ // 1.18.194 trust the SDK. By the time we see context_overflow
2718
+ // here, the SDK has ALREADY tried autocompact (it's built-in).
2719
+ // Our previous behavior was to compress + retry + queue a
2720
+ // separate planner background task. That layered our own
2721
+ // retry on top of the SDK's, and when any step in the planner
2722
+ // pipeline failed (auth, planning, chain dispatch), users saw
2723
+ // a confusing "Planning failed" message — that's what bit
2724
+ // Zach. SDK best practice: when autocompact + retry have both
2725
+ // failed, that's a real context ceiling. Surface a clean
2726
+ // message that gives the owner two recovery options, clear
2727
+ // the session pointer, and trust them to resend smaller or
2728
+ // opt in to explicit plan mode.
2729
+ logger.info({ sessionKey }, 'Context overflow — autocompact ceiling reached, resetting');
2772
2730
  this.assistant.clearSession(effectiveSessionKey);
2773
- {
2774
- const promptForBackground = contextOverflowRecoveryPrompt || chatPrompt;
2775
- const { response, task } = this.queueBackgroundTaskAfterContextOverflow(sessionKey, promptForBackground);
2776
- if (ledgerRunMetadata) {
2777
- ledgerRunMetadata.executionMode = 'background_queued';
2778
- ledgerRunMetadata.backgroundTaskId = task.id;
2779
- }
2780
- this.mirrorChatExchange(sessionKey, originalText, response, { model: 'chat-control' });
2781
- return response;
2782
- }
2731
+ const response = [
2732
+ "That work pushed past the context limit even with autocompact.",
2733
+ "I've reset our conversation. Two ways forward:",
2734
+ "",
2735
+ "1. Rephrase the task in smaller scope (e.g. 'just the first 10' instead of 'all 100')",
2736
+ "2. Use `/plan` to have me decompose it into chained workers before running",
2737
+ ].join('\n');
2738
+ this.mirrorChatExchange(sessionKey, originalText, response, { model: 'chat-control' });
2739
+ return response;
2740
+ }
2783
2741
  case 'auth':
2784
2742
  this.recordAuthFailure();
2785
2743
  return "I'm temporarily offline due to an authentication issue. The owner needs to re-authenticate — I'll recover automatically once it's resolved.";
package/dist/index.js CHANGED
@@ -638,6 +638,8 @@ async function asyncMain() {
638
638
  // 1.18.192 — preset form so SDK uses Claude Code subscription
639
639
  // auth (raw string → API-key path → "Not logged in" for Max users).
640
640
  systemPrompt: config.claudeCodeSystemPrompt('You are a memory consolidation assistant. Be concise.', { minimal: true }),
641
+ // 1.18.194 — propagate OAuth token to SDK subprocess.
642
+ env: config.claudeCodeSubprocessEnv(),
641
643
  }),
642
644
  });
643
645
  for await (const msg of stream) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.18.192",
3
+ "version": "1.18.194",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",