npm - clementine-agent - Versions diffs - 1.18.11 → 1.18.13 - Mend

clementine-agent 1.18.11 → 1.18.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/README.md +14 -3
package/dist/agent/assistant.d.ts +2 -0
package/dist/agent/assistant.js +171 -36
package/dist/agent/complexity-classifier.js +3 -0
package/dist/agent/self-improve-loop.d.ts +8 -2
package/dist/agent/self-improve-loop.js +35 -2
package/dist/agent/tool-router.d.ts +1 -0
package/dist/agent/tool-router.js +25 -2
package/dist/agent/turn-policy.d.ts +10 -0
package/dist/agent/turn-policy.js +34 -1
package/dist/brain/connector-recipes.d.ts +7 -5
package/dist/brain/connector-recipes.js +176 -4
package/dist/cli/dashboard.js +104 -12
package/dist/cli/index.js +332 -3
package/dist/config/config-doctor.d.ts +12 -0
package/dist/config/config-doctor.js +100 -1
package/dist/config/effective-config.js +3 -3
package/dist/config.js +3 -3
package/dist/gateway/credit-guard.d.ts +12 -0
package/dist/gateway/credit-guard.js +46 -0
package/dist/gateway/cron-scheduler.js +26 -0
package/dist/gateway/failure-diagnostics.d.ts +3 -0
package/dist/gateway/failure-diagnostics.js +49 -16
package/dist/gateway/failure-monitor.d.ts +3 -1
package/dist/gateway/failure-monitor.js +50 -4
package/dist/gateway/heartbeat-scheduler.js +29 -3
package/dist/gateway/job-health.d.ts +14 -0
package/dist/gateway/job-health.js +108 -0
package/dist/gateway/lanes.js +1 -1
package/dist/gateway/notification-context.d.ts +40 -0
package/dist/gateway/notification-context.js +159 -0
package/dist/gateway/router.d.ts +9 -1
package/dist/gateway/router.js +127 -55
package/dist/integrations/composio/mcp-bridge.d.ts +1 -0
package/dist/integrations/composio/mcp-bridge.js +29 -5
package/dist/memory/seed-user-model.d.ts +2 -0
package/dist/memory/seed-user-model.js +13 -4
package/dist/memory/store.d.ts +58 -15
package/dist/memory/store.js +189 -13
package/dist/tools/memory-tools.js +10 -2
package/dist/tools/shared.d.ts +19 -0
package/dist/types.d.ts +1 -0
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -324,14 +324,25 @@ clementine restart                    # apply changes
 Your overrides live in `~/.clementine/.env` — **they survive every `npm update -g` / `clementine update`** because they're in your data home, not the package directory.
+For spend/context tuning, `clementine budgets` gives a safer shortcut:
+```bash
+clementine budgets              # show chat/cron/heartbeat caps and 1M context state
+clementine budgets safe         # lower background budgets and disable Claude 1M context
+clementine budgets 1m on        # enable 1M context for eligible accounts / Extra Usage
+clementine budgets 1m off       # disable 1M context for maximum compatibility
+clementine budgets set chat 10  # raise one budget cap
+```
 **Commonly tuned knobs:**
 | Key | Default | What it does |
 |-----|---------|--------------|
 | `BUDGET_CHAT_USD` | `5.00` | Max spend per interactive chat message |
-| `BUDGET_CRON_T1_USD` | `2.00` | Max spend per tier-1 cron job |
-| `BUDGET_CRON_T2_USD` | `5.00` | Max spend per tier-2 cron job |
-| `BUDGET_HEARTBEAT_USD` | `0.50` | Max spend per heartbeat tick |
+| `BUDGET_CRON_T1_USD` | `0.75` | Max spend per tier-1 cron job |
+| `BUDGET_CRON_T2_USD` | `1.50` | Max spend per tier-2 cron job |
+| `BUDGET_HEARTBEAT_USD` | `0.25` | Max spend per heartbeat tick |
+| `CLAUDE_CODE_DISABLE_1M_CONTEXT` | `true` | `true`/`1` keeps Claude Code on 200K context unless the user explicitly enables 1M |
 | `DEFAULT_MODEL_TIER` | `sonnet` | Default model: `haiku` / `sonnet` / `opus` |
 | `HEARTBEAT_INTERVAL_MINUTES` | `30` | How often the agent auto-checks in |
 | `HEARTBEAT_ACTIVE_START` | `8` | First hour of the active window (0–23) |

package/dist/agent/assistant.d.ts CHANGED Viewed

@@ -30,6 +30,8 @@ export declare function estimateTokens(text: string): number;
 export declare function looksLikeContextThrashText(value: unknown): boolean;
 export declare function contextThrashRecoveryNotice(): string;
 export declare function buildContextThrashRecoveryPrompt(userRequest: string, priorFailureText?: string): string;
+export declare function looksLikeOneMillionContextError(value: unknown): boolean;
+export declare function looksLikeNoResponseRequested(value: unknown): boolean;
 /** Autonomous jobs use this sentinel to mean "completed, but do not notify the owner." */
 export declare function isAutonomousNothingOutput(response: string): boolean;
 export interface ProjectMeta {

package/dist/agent/assistant.js CHANGED Viewed

@@ -28,14 +28,14 @@ import { StallGuard } from './stall-guard.js';
 import { collectToolCalls, detectContradiction, buildCorrectionPrompt } from './contradiction-validator.js';
 import { recordToolOutcome as recordMcpToolOutcome } from './mcp-circuit-breaker.js';
 import { assembleContext } from '../memory/context-assembler.js';
-import * as embeddingsModule from '../memory/embeddings.js';
 import { PromptCache } from './prompt-cache.js';
 import { searchSkills as searchSkillsSync } from './skill-extractor.js';
 import { classifyIntent, getStrategyGuidance } from './intent-classifier.js';
 import { getEventLog } from './session-event-log.js';
-import { routeToolSurface, TOOL_SURFACE_WARN_THRESHOLD } from './tool-router.js';
-import { decideTurnPolicy } from './turn-policy.js';
+import { routeToolSurface, TOOL_SURFACE_HARD_LIMIT, TOOL_SURFACE_WARN_THRESHOLD } from './tool-router.js';
+import { decideTurn } from './turn-policy.js';
 import { loadClementineJson } from '../config/clementine-json.js';
+import { isCreditBalanceError, markBackgroundCreditBlocked } from '../gateway/credit-guard.js';
 // ── Channel capabilities ────────────────────────────────────────────
 /** Map channel label to its capabilities so the agent adapts its responses. */
 function getChannelCapabilities(channel) {
@@ -311,6 +311,8 @@ function formatTimeAgo(ms) {
 const CONTEXT_GUARD_MIN_TOKENS = 16_000;
 /** Warn threshold — context is getting tight. */
 const CONTEXT_GUARD_WARN_TOKENS = 32_000;
+/** Rotate SDK sessions before hidden resume history approaches the 200K cap. */
+const SESSION_ROTATE_INPUT_TOKENS = 140_000;
 /** Approximate context window sizes by model family. */
 const MODEL_CONTEXT_WINDOWS = {
     'haiku': 200_000,
@@ -324,6 +326,30 @@ function getContextWindow(model) {
     }
     return 200_000; // safe default
 }
+function resultInputTokens(result) {
+    let total = 0;
+    const modelUsage = result.modelUsage;
+    if (!modelUsage)
+        return 0;
+    for (const usage of Object.values(modelUsage)) {
+        total += usage.inputTokens ?? 0;
+        total += usage.cacheReadInputTokens ?? 0;
+        total += usage.cacheCreationInputTokens ?? 0;
+    }
+    return total;
+}
+function oneMillionContextDisabled() {
+    const value = process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT;
+    return value === undefined || !/^(0|false|no)$/i.test(value);
+}
+export function looksLikeOneMillionContextError(value) {
+    const text = String(value ?? '');
+    return /extra usage.*1m context|1m context.*extra usage|context-1m/i.test(text);
+}
+export function looksLikeNoResponseRequested(value) {
+    const text = String(value ?? '').trim();
+    return /^no response requested\.?$/i.test(text);
+}
 // ── Constants ────────────────────────────────────────────────────────
 const logger = pino({ name: 'clementine.assistant' });
 const SESSIONS_FILE = path.join(BASE_DIR, '.sessions.json');
@@ -685,6 +711,8 @@ export function isAutonomousNothingOutput(response) {
         return true;
     if (/^(_*NOTHING_*\s*)?\[MONITORING\]\s*$/i.test(trimmed))
         return true;
+    if (looksLikeNoResponseRequested(trimmed))
+        return true;
     if (trimmed.length > 80)
         return false;
     const lower = trimmed.toLowerCase();
@@ -1929,7 +1957,8 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
         const profileScopeText = [profile?.description, profile?.systemPromptBody]
             .filter(Boolean)
             .join('\n');
-        const directScopeText = [promptScopeText, profileScopeText].filter(Boolean).join('\n');
+        const autonomousToolRun = isHeartbeat || isCron || isPlanStep || isUnleashed;
+        const directScopeText = [promptScopeText, autonomousToolRun ? profileScopeText : ''].filter(Boolean).join('\n');
         const emptyToolRoute = () => ({
             bundles: [],
             externalMcpServers: [],
@@ -1959,7 +1988,6 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                 reason: bundles.length > 0 ? 'matched' : 'empty',
             };
         };
-        const autonomousToolRun = isHeartbeat || isCron || isPlanStep || isUnleashed;
         const promptToolRoute = routeToolSurface(promptScopeText);
         const profileToolRoute = routeToolSurface(profileScopeText);
         const contextToolRoute = routeToolSurface(contextRoutingText);
@@ -1967,7 +1995,9 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
         const directFollowupNeedsContextTools = intentClassification?.type === 'followup'
             || /^(yes|yep|yeah|go|go ahead|do it|continue|pick up|use that|run it|send it|same thing)\b/i.test(promptScopeText.trim());
         const allowContextToolRoute = autonomousToolRun || (!promptHasToolRoute && directFollowupNeedsContextTools);
-        const safeProfileToolRoute = profileToolRoute.fullSurface ? emptyToolRoute() : profileToolRoute;
+        const safeProfileToolRoute = autonomousToolRun && !profileToolRoute.fullSurface
+            ? profileToolRoute
+            : emptyToolRoute();
         const safeContextToolRoute = allowContextToolRoute && !contextToolRoute.fullSurface
             ? contextToolRoute
             : emptyToolRoute();
@@ -2309,6 +2339,24 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                 whitelist.add(mcpTool('goal_work'));
                 allowedTools = allowedTools.filter(t => whitelist.has(t));
             }
+            if (!toolRoute.fullSurface
+                && !adminNeeded
+                && !autonomousToolRun
+                && allowedTools.length > TOOL_SURFACE_HARD_LIMIT) {
+                const beforeAllowedToolCount = allowedTools.length;
+                const coreSdkTools = new Set(['Read', 'Write', 'Edit', 'Bash', 'Glob', 'Grep', 'WebSearch', 'WebFetch']);
+                const clementineToolPrefixForCap = `mcp__${TOOLS_SERVER}__`;
+                allowedTools = allowedTools.filter(tool => coreSdkTools.has(tool) || tool.startsWith(clementineToolPrefixForCap));
+                externalMcpServers = {};
+                composioMcpServers = {};
+                logger.warn({
+                    sessionKey,
+                    beforeAllowedToolCount,
+                    afterAllowedToolCount: allowedTools.length,
+                    hardLimit: TOOL_SURFACE_HARD_LIMIT,
+                    bundles: toolRoute.bundles,
+                }, 'SDK allowed tool surface exceeded hard limit; falling back to core Clementine tools for this interactive turn');
+            }
         }
         // Permission mode: always 'bypassPermissions' — this is a daemon/harness with no interactive
         // terminal, so 'auto' mode (which requires plan support + human approval) doesn't apply.
@@ -2373,6 +2421,7 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
             systemPrompt: fullSystemPrompt,
             model: resolvedModel,
             ...(fallback ? { fallbackModel: fallback } : {}),
+            ...(oneMillionContextDisabled() ? { betas: [] } : {}),
             permissionMode: effectivePermissionMode,
             allowDangerouslySkipPermissions: true,
             ...(sessionStore ? { sessionStore } : {}),
@@ -2423,7 +2472,14 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
             // env only when the prompt/job mentions a connector-backed service.
             // Per-MCP-server env isolation still happens inside each mcpServers
             // entry; this only affects the Claude Code subprocess itself.
-            ...(shouldInheritClaudeEnv ? {} : { env: SAFE_ENV }),
+            ...(shouldInheritClaudeEnv ? {} : {
+                env: {
+                    ...SAFE_ENV,
+                    ...(process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT !== undefined
+                        ? { CLAUDE_CODE_DISABLE_1M_CONTEXT: process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT }
+                        : {}),
+                },
+            }),
             // Avoid ambient Claude Code user/project/local settings and plugins by
             // default. Those can silently attach hundreds of tools. Explicit MCP
             // servers above still work; "all integrations/full tool surface" keeps
@@ -2485,28 +2541,18 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
             const useSearch = tier === 'search' || tier === 'full';
             const useDense = tier === 'full';
             const useProceduralAndGraph = tier === 'full';
-            // Pre-compute dense query embedding if the model is ready. Done outside
-            // searchContext (which is sync) so the dense path doesn't force the
-            // entire call chain to be async. If embedDense fails or isn't available,
-            // searchContext falls back to TF-IDF.
-            let queryDenseVec;
-            try {
-                if (useDense && embeddingsModule.isDenseReady()) {
-                    const v = await embeddingsModule.embedDense(enrichedQuery, true);
-                    if (v)
-                        queryDenseVec = v;
-                }
-            }
-            catch { /* fallback to sparse */ }
+            const searchOpts = {
+                limit: tier === 'full' ? SEARCH_CONTEXT_LIMIT : Math.min(SEARCH_CONTEXT_LIMIT, 4),
+                recencyLimit: tier === 'full' ? SEARCH_RECENCY_LIMIT : Math.min(SEARCH_RECENCY_LIMIT, 2),
+                agentSlug,
+                strict: strictIsolation,
+                sessionKey: sessionKey ?? undefined,
+                useDense,
+            };
             const results = useSearch
-                ? this.memoryStore.searchContext(enrichedQuery, {
-                    limit: tier === 'full' ? SEARCH_CONTEXT_LIMIT : Math.min(SEARCH_CONTEXT_LIMIT, 4),
-                    recencyLimit: tier === 'full' ? SEARCH_RECENCY_LIMIT : Math.min(SEARCH_RECENCY_LIMIT, 2),
-                    agentSlug,
-                    strict: strictIsolation,
-                    sessionKey: sessionKey ?? undefined,
-                    queryDenseVec,
-                })
+                ? await (this.memoryStore.searchContextAsync
+                    ? this.memoryStore.searchContextAsync(enrichedQuery, searchOpts)
+                    : Promise.resolve(this.memoryStore.searchContext(enrichedQuery, searchOpts)))
                 : [];
             if (results?.length > 0) {
                 const accessedIds = results
@@ -2776,11 +2822,12 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
         let effectivePrompt = text;
         const recentExchangesForIntent = key ? this.lastExchanges.get(key) : undefined;
         const intent = classifyIntent(text, recentExchangesForIntent);
-        const turnPolicy = decideTurnPolicy({
+        const turnDecision = decideTurn({
             text,
             intent,
             hasRecentContext: !!(recentExchangesForIntent?.length || (key && this.sessions.has(key))),
         });
+        const turnPolicy = turnDecision.policy;
         const suppressContextInjection = turnPolicy.suppressContextInjection === true;
         if (key && turnPolicy.suppressSessionResume) {
             this.sessions.delete(key);
@@ -2905,7 +2952,7 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
             intent: intent.type,
             confidence: intent.confidence,
             strategy: intent.suggestedStrategy,
-            turnPolicy,
+            turnDecision,
         }, 'Intent classified');
         // If caller explicitly passed maxTurns (e.g. cron), respect it.
         // Otherwise apply the turn policy. Complex/routed turns still get their
@@ -3034,11 +3081,11 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
         // If a project override is set, skip auto-matching entirely
         const hasActiveSession = !!(sessionKey && this.sessions.has(sessionKey));
         const effectiveTurnPolicy = turnPolicy ?? (intentClassification
-            ? decideTurnPolicy({
+            ? decideTurn({
                 text: prompt,
                 intent: intentClassification,
                 hasRecentContext: hasActiveSession || ((sessionKey ? this.lastExchanges.get(sessionKey)?.length : 0) ?? 0) > 0,
-            })
+            }).policy
             : undefined);
         const retrievalTier = effectiveTurnPolicy?.retrievalTier ?? 'full';
         const [rawContext, autoMatchedProject, linkContexts] = await Promise.all([
@@ -3117,6 +3164,8 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
         // un-validated (but still logged).
         let contradictionRetried = false;
         let contextRecoveryRetries = 0;
+        let noResponseRetried = false;
+        let rotateSessionAfterTurn = false;
         try {
             for (let attempt = 0; attempt <= PersonalAssistant.RATE_LIMIT_MAX_RETRIES; attempt++) {
                 const sdkOptions = await this.buildOptions({
@@ -3342,6 +3391,15 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                             sessionId = result.session_id;
                             this._lastTerminalReason = result.terminal_reason ?? undefined;
                             this.logQueryResult(result, 'chat', sessionKey ?? 'unknown', undefined, profile?.slug);
+                            const hiddenSessionTokens = resultInputTokens(result);
+                            if (sessionKey && hiddenSessionTokens >= SESSION_ROTATE_INPUT_TOKENS) {
+                                rotateSessionAfterTurn = true;
+                                logger.warn({
+                                    sessionKey,
+                                    inputTokens: hiddenSessionTokens,
+                                    threshold: SESSION_ROTATE_INPUT_TOKENS,
+                                }, 'SDK session near context ceiling — will rotate after this turn');
+                            }
                             if (result.is_error) {
                                 // Error subtypes have `errors` array; success subtype has `result` string
                                 const errorText = 'errors' in result ? result.errors.join('; ') : ('result' in result ? result.result : '');
@@ -3358,6 +3416,19 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                                             `• Reply "deep mode" to queue this as a background task with a bigger budget\n` +
                                             `• Raise the cap permanently: \`clementine config set BUDGET_CHAT_USD 10\` then \`clementine restart\``);
                                     }
+                                    else if (isCreditBalanceError(errorText)) {
+                                        markBackgroundCreditBlocked(errorText);
+                                        responseText = responseText || ('Claude says the account credit balance is too low. I paused background jobs for a few hours so they stop draining/retrying, but interactive chat will also fail until credits are available again.');
+                                    }
+                                    else if (looksLikeOneMillionContextError(errorText)) {
+                                        process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT = '1';
+                                        if (sessionKey) {
+                                            this.sessions.delete(sessionKey);
+                                            this.exchangeCounts.set(sessionKey, 0);
+                                            this._compactedSessions.delete(sessionKey);
+                                        }
+                                        responseText = responseText || ("Claude rejected the 1M context beta for this account. I've disabled 1M context for this process and reset the session. To persist the fix across restarts, run `clementine config doctor --fix`, then `clementine restart`.");
+                                    }
                                     else if (lower.includes('rate') && lower.includes('limit')) {
                                         hitRateLimit = true;
                                     }
@@ -3476,6 +3547,19 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                             responseText += '\n\nI ran out of time but here\'s what I have so far. Want me to continue?';
                         }
                     }
+                    else if (isCreditBalanceError(e)) {
+                        markBackgroundCreditBlocked(e);
+                        responseText = responseText || ('Claude says the account credit balance is too low. I paused background jobs for a few hours so they stop draining/retrying, but interactive chat will also fail until credits are available again.');
+                    }
+                    else if (looksLikeOneMillionContextError(e)) {
+                        process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT = '1';
+                        if (sessionKey) {
+                            this.sessions.delete(sessionKey);
+                            this.exchangeCounts.set(sessionKey, 0);
+                            this._compactedSessions.delete(sessionKey);
+                        }
+                        responseText = responseText || ("Claude rejected the 1M context beta for this account. I've disabled 1M context for this process and reset the session. To persist the fix across restarts, run `clementine config doctor --fix`, then `clementine restart`.");
+                    }
                     else if (errStr.includes('rate') && (errStr.includes('limit') || errStr.includes('rate_limit'))) {
                         hitRateLimit = true;
                         // Try to respect any retry hint the server surfaced in the error text.
@@ -3616,6 +3700,27 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                     }
                     responseText = contextThrashRecoveryNotice();
                 }
+                if (looksLikeNoResponseRequested(responseText)) {
+                    logger.warn({ sessionKey, attempt }, 'SDK/model returned no-response sentinel during interactive chat');
+                    if (!noResponseRetried && attempt < PersonalAssistant.RATE_LIMIT_MAX_RETRIES) {
+                        noResponseRetried = true;
+                        if (sessionKey) {
+                            this.sessions.delete(sessionKey);
+                            this.exchangeCounts.set(sessionKey, 0);
+                            this._compactedSessions.delete(sessionKey);
+                        }
+                        prompt =
+                            `[RESPONSE REQUIRED]\n` +
+                                `This is an interactive user message. The previous attempt returned "No response requested", which is invalid for a direct chat turn.\n\n` +
+                                `Answer the user's message directly and briefly. If you need more information, ask one clear question.\n\n` +
+                                `User message:\n${prompt}`;
+                        responseText = '';
+                        sessionId = '';
+                        rotateSessionAfterTurn = false;
+                        continue;
+                    }
+                    responseText = "I'm here. What would you like me to do?";
+                }
                 // ── Response guarantee ─────────────────────────────────────────
                 // The model often generates 30+ tool calls with minimal/no text. Ensure
                 // the user always gets a substantive response after real work is done.
@@ -3636,9 +3741,15 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                         responseText = `I started working on that (${toolCalls.length} tool calls). The gateway should be continuing this in the background.`;
                     }
                 }
-                if (sessionKey && sessionId) {
+                if (sessionKey && sessionId && !rotateSessionAfterTurn) {
                     this.sessions.set(sessionKey, sessionId);
                 }
+                else if (sessionKey && rotateSessionAfterTurn) {
+                    this.sessions.delete(sessionKey);
+                    this.exchangeCounts.set(sessionKey, 0);
+                    this._compactedSessions.delete(sessionKey);
+                    logger.info({ sessionKey }, 'Rotated SDK session after high-token turn');
+                }
                 // Log tool calls to transcript for audit trail
                 if (sessionKey && toolCalls.length > 0 && this.memoryStore) {
                     try {
@@ -4693,7 +4804,21 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                 }
             }
             else if (message.type === 'result') {
-                this.logQueryResult(message, 'heartbeat', 'heartbeat');
+                const result = message;
+                if (result.is_error) {
+                    const errText = 'errors' in result
+                        ? result.errors.join('; ')
+                        : String(result.result ?? '');
+                    if (isCreditBalanceError(errText)) {
+                        markBackgroundCreditBlocked(errText);
+                        throw new Error(errText);
+                    }
+                    if (looksLikeOneMillionContextError(errText)) {
+                        process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT = '1';
+                        throw new Error(errText);
+                    }
+                }
+                this.logQueryResult(result, 'heartbeat', 'heartbeat');
             }
             else if (message.type === 'system') {
                 this.captureMcpStatus(message);
@@ -5024,12 +5149,22 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                         // "budget" was catching Anthropic's unrelated "does not support
                         // user-configurable task budgets" error and pinning perfectly
                         // healthy Haiku jobs as permanent failures.
-                        if (result.is_error && 'result' in result) {
-                            const exitText = String(result.result ?? '');
+                        if (result.is_error) {
+                            const exitText = 'errors' in result
+                                ? result.errors.join('; ')
+                                : String(result.result ?? '');
                             if (exitText.includes('max_budget_usd')) {
                                 logger.warn({ job: jobName }, 'Cron job hit dollar budget cap — treating as permanent error');
                                 throw new Error(`Budget exceeded for cron job '${jobName}'`);
                             }
+                            if (isCreditBalanceError(exitText)) {
+                                markBackgroundCreditBlocked(exitText);
+                                throw new Error(exitText);
+                            }
+                            if (looksLikeOneMillionContextError(exitText)) {
+                                process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT = '1';
+                                throw new Error(exitText);
+                            }
                         }
                         this.logQueryResult(result, 'cron', `cron:${jobName}`, jobName, sdkOptions.env?.CLEMENTINE_TEAM_AGENT || undefined);
                     }

package/dist/agent/complexity-classifier.js CHANGED Viewed

@@ -22,6 +22,9 @@ const DEEP_MODE_ASKS = [
     /\brun\s+in\s+the\s+background\b/i,
     /\bdeep\s+(mode|dive|work)\b/i,
     /\bbackground\s+(task|work|job)\b/i,
+    /\bkeep\s+working\b/i,
+    /\bdon'?t\s+stop\b/i,
+    /\buntil\s+(it'?s\s+)?(done|finished|complete|fixed)\b/i,
     /\btake\s+your\s+time\b/i,
 ];
 /**

package/dist/agent/self-improve-loop.d.ts CHANGED Viewed

@@ -7,8 +7,8 @@
  * when consecutiveErrors >= 3), classifies the failure pattern from
  * recentErrors, and either:
  *
- *   - Auto-applies a safe cron-config fix (mode, max_hours, max_turns)
- *     and DMs the OWNING agent via their bot
+ *   - Writes a proposal for safe cron-config fixes by default so the owner
+ *     can approve before Clementine edits CRON.md
  *   - Writes a proposal to self-improve/pending-changes/ and DMs the
  *     owning agent the diagnosis (full audit-inbox button approval is
  *     a separate Phase 8b ship)
@@ -91,6 +91,11 @@ export interface SelfImproveLoopOptions {
      * call tick() directly without racing the watcher.
      */
     disableWatch?: boolean;
+    /**
+     * Opt into the legacy behavior where recognized low-risk CRON.md scalar
+     * edits are applied immediately. Default false: write a pending proposal.
+     */
+    allowAutoApplySafeFixes?: boolean;
 }
 export declare function classifyFailure(recentErrors: string[]): FixRecipe;
 export declare class SelfImproveLoop {
@@ -101,6 +106,7 @@ export declare class SelfImproveLoop {
     private readonly agentsDir;
     private readonly dispatcher;
     private readonly watchEnabled;
+    private readonly allowAutoApplySafeFixes;
     private timer;
     private watcher;
     private debounceTimer;

package/dist/agent/self-improve-loop.js CHANGED Viewed

@@ -7,8 +7,8 @@
  * when consecutiveErrors >= 3), classifies the failure pattern from
  * recentErrors, and either:
  *
- *   - Auto-applies a safe cron-config fix (mode, max_hours, max_turns)
- *     and DMs the OWNING agent via their bot
+ *   - Writes a proposal for safe cron-config fixes by default so the owner
+ *     can approve before Clementine edits CRON.md
  *   - Writes a proposal to self-improve/pending-changes/ and DMs the
  *     owning agent the diagnosis (full audit-inbox button approval is
  *     a separate Phase 8b ship)
@@ -239,6 +239,7 @@ export class SelfImproveLoop {
     agentsDir;
     dispatcher;
     watchEnabled;
+    allowAutoApplySafeFixes;
     timer = null;
     watcher = null;
     debounceTimer = null;
@@ -252,6 +253,7 @@ export class SelfImproveLoop {
         this.cronPath = opts.cronPath ?? CRON_PATH;
         this.agentsDir = opts.agentsDir ?? AGENTS_ROOT;
         this.watchEnabled = opts.disableWatch !== true;
+        this.allowAutoApplySafeFixes = opts.allowAutoApplySafeFixes === true;
     }
     start() {
         if (this.running)
@@ -394,6 +396,37 @@ export class SelfImproveLoop {
                 logger.warn({ jobName: trigger.jobName, agentSlug }, 'Job not found in any CRON.md — cannot apply fix');
                 return;
             }
+            const wouldChange = recipe.apply ? recipe.apply({ ...lookup.job }) : true;
+            if (!wouldChange) {
+                counts.noop++;
+                logger.info({ jobName: trigger.jobName, agentSlug }, 'Fix recipe is already in place — trigger removed without further action');
+                this.logAutonomy('fix_noop', trigger, { reason: 'already-applied' });
+                return;
+            }
+            if (!this.allowAutoApplySafeFixes) {
+                const id = `proposal-${Date.now()}-${trigger.jobName.replace(/[^a-z0-9-]/gi, '_')}`;
+                const record = {
+                    id,
+                    jobName: trigger.jobName,
+                    ...(agentSlug ? { agentSlug } : {}),
+                    category: recipe.category,
+                    description: recipe.description,
+                    recentErrors: trigger.recentErrors,
+                    consecutiveErrors: trigger.consecutiveErrors,
+                    proposedAt: new Date().toISOString(),
+                };
+                const file = writePendingChange(record, this.pendingDir);
+                counts.pending++;
+                this.logAutonomy('proposal_written', trigger, { category: recipe.category, proposalId: id, autoApplyAllowed: false });
+                await this.notifyAgent(agentSlug, [
+                    `⚠️ **${trigger.jobName}** has failed ${trigger.consecutiveErrors} times in a row.`,
+                    '',
+                    recipe.description,
+                    '',
+                    `Fix proposal saved to \`${file}\`. Review and approve before editing CRON.md.`,
+                ].join('\n'));
+                return;
+            }
             const prevFields = applyCronEdit(lookup, recipe);
             if (prevFields) {
                 counts.applied++;

package/dist/agent/tool-router.d.ts CHANGED Viewed

@@ -28,6 +28,7 @@ interface ToolBundleDefinition {
     inheritFullClaudeEnv?: boolean;
 }
 export declare const TOOL_SURFACE_WARN_THRESHOLD = 150;
+export declare const TOOL_SURFACE_HARD_LIMIT = 220;
 export declare const TOOL_BUNDLES: readonly ToolBundleDefinition[];
 export declare function routeToolSurface(text: string | undefined): ToolRouteDecision;
 export {};

package/dist/agent/tool-router.js CHANGED Viewed

@@ -7,6 +7,7 @@
  * broader access.
  */
 export const TOOL_SURFACE_WARN_THRESHOLD = 150;
+export const TOOL_SURFACE_HARD_LIMIT = 220;
 export const TOOL_BUNDLES = [
     {
         id: 'email_outlook',
@@ -78,7 +79,7 @@ export const TOOL_BUNDLES = [
     },
     {
         id: 'browser',
-        patterns: [/\b(browser|playwright|localhost|web page|webpage|screenshot|click|fill form|navigate)\b/i],
+        patterns: [/\b(playwright|localhost|127\.0\.0\.1|web\s?page|webpage|website|screenshot|click|fill(?: out)? form|navigate to|open .*browser|use .*browser|inspect .*page)\b/i],
         externalMcpServers: ['browser-harness', 'browsermcp', 'playwright', 'kernel', 'plugin:playwright:playwright'],
     },
     {
@@ -125,6 +126,15 @@ export const TOOL_BUNDLES = [
 function uniqueStrings(values) {
     return [...new Set([...values].filter((v) => !!v && v.trim().length > 0))];
 }
+function explicitMcpServers(scopeText) {
+    const servers = new Set();
+    const re = /\bmcp__([A-Za-z0-9_-]+)__[A-Za-z0-9_.:-]+\b/g;
+    let match;
+    while ((match = re.exec(scopeText)) !== null) {
+        servers.add(match[1]);
+    }
+    return uniqueStrings(servers);
+}
 export function routeToolSurface(text) {
     const scopeText = text?.trim() ?? '';
     if (!scopeText) {
@@ -161,13 +171,26 @@ export function routeToolSurface(text) {
             composio.add(slug);
         inheritFullClaudeEnv = inheritFullClaudeEnv || bundle.inheritFullClaudeEnv === true;
     }
+    for (const server of explicitMcpServers(scopeText)) {
+        if (server.startsWith('claude_ai_')) {
+            external.add(server.slice('claude_ai_'.length));
+        }
+        else {
+            // Exact `mcp__<server>__<tool>` mentions are authoritative. Add the
+            // name as both a direct MCP server and a Composio toolkit; whichever
+            // source is actually connected will mount, and the other path no-ops.
+            external.add(server);
+            composio.add(server);
+        }
+        inheritFullClaudeEnv = true;
+    }
     return {
         bundles: uniqueStrings(bundles),
         externalMcpServers: uniqueStrings(external),
         composioToolkits: uniqueStrings(composio),
         inheritFullClaudeEnv,
         fullSurface: false,
-        reason: bundles.size > 0 ? 'matched' : 'empty',
+        reason: bundles.size > 0 || external.size > 0 || composio.size > 0 ? 'matched' : 'empty',
     };
 }
 //# sourceMappingURL=tool-router.js.map

package/dist/agent/turn-policy.d.ts CHANGED Viewed

@@ -5,8 +5,10 @@
  * small prompt, but any sign of memory dependence, tool work, or ambiguity
  * promotes the request to a richer path.
  */
+import { type ToolRouteDecision } from './tool-router.js';
 import type { IntentClassification } from './intent-classifier.js';
 export type RetrievalTier = 'none' | 'core' | 'search' | 'full';
+export type TurnExecutionMode = 'local' | 'lightweight_llm' | 'tool_llm' | 'background';
 export interface TurnPolicy {
     retrievalTier: RetrievalTier;
     disableAllTools: boolean;
@@ -27,6 +29,14 @@ export interface TurnPolicyInput {
     hasRecentContext: boolean;
     isAutonomous?: boolean;
 }
+export interface TurnDecision {
+    mode: TurnExecutionMode;
+    policy: TurnPolicy;
+    toolRoute: ToolRouteDecision;
+    userVisibleStatus: string;
+    reason: string;
+}
 export declare function isStandaloneGreeting(text: string): boolean;
 export declare function decideTurnPolicy(input: TurnPolicyInput): TurnPolicy;
+export declare function decideTurn(input: TurnPolicyInput): TurnDecision;
 //# sourceMappingURL=turn-policy.d.ts.map