clementine-agent 1.18.11 → 1.18.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +14 -3
  2. package/dist/agent/assistant.d.ts +2 -0
  3. package/dist/agent/assistant.js +171 -36
  4. package/dist/agent/complexity-classifier.js +3 -0
  5. package/dist/agent/self-improve-loop.d.ts +8 -2
  6. package/dist/agent/self-improve-loop.js +35 -2
  7. package/dist/agent/tool-router.d.ts +1 -0
  8. package/dist/agent/tool-router.js +25 -2
  9. package/dist/agent/turn-policy.d.ts +10 -0
  10. package/dist/agent/turn-policy.js +34 -1
  11. package/dist/brain/connector-recipes.d.ts +7 -5
  12. package/dist/brain/connector-recipes.js +176 -4
  13. package/dist/cli/dashboard.js +104 -12
  14. package/dist/cli/index.js +332 -3
  15. package/dist/config/config-doctor.d.ts +12 -0
  16. package/dist/config/config-doctor.js +100 -1
  17. package/dist/config/effective-config.js +3 -3
  18. package/dist/config.js +3 -3
  19. package/dist/gateway/credit-guard.d.ts +12 -0
  20. package/dist/gateway/credit-guard.js +46 -0
  21. package/dist/gateway/cron-scheduler.js +26 -0
  22. package/dist/gateway/failure-diagnostics.d.ts +3 -0
  23. package/dist/gateway/failure-diagnostics.js +49 -16
  24. package/dist/gateway/failure-monitor.d.ts +3 -1
  25. package/dist/gateway/failure-monitor.js +50 -4
  26. package/dist/gateway/heartbeat-scheduler.js +29 -3
  27. package/dist/gateway/job-health.d.ts +14 -0
  28. package/dist/gateway/job-health.js +108 -0
  29. package/dist/gateway/lanes.js +1 -1
  30. package/dist/gateway/notification-context.d.ts +40 -0
  31. package/dist/gateway/notification-context.js +159 -0
  32. package/dist/gateway/router.d.ts +9 -1
  33. package/dist/gateway/router.js +127 -55
  34. package/dist/integrations/composio/mcp-bridge.d.ts +1 -0
  35. package/dist/integrations/composio/mcp-bridge.js +29 -5
  36. package/dist/memory/seed-user-model.d.ts +2 -0
  37. package/dist/memory/seed-user-model.js +13 -4
  38. package/dist/memory/store.d.ts +58 -15
  39. package/dist/memory/store.js +189 -13
  40. package/dist/tools/memory-tools.js +10 -2
  41. package/dist/tools/shared.d.ts +19 -0
  42. package/dist/types.d.ts +1 -0
  43. package/package.json +1 -1
package/README.md CHANGED
@@ -324,14 +324,25 @@ clementine restart # apply changes
324
324
 
325
325
  Your overrides live in `~/.clementine/.env` — **they survive every `npm update -g` / `clementine update`** because they're in your data home, not the package directory.
326
326
 
327
+ For spend/context tuning, `clementine budgets` gives a safer shortcut:
328
+
329
+ ```bash
330
+ clementine budgets # show chat/cron/heartbeat caps and 1M context state
331
+ clementine budgets safe # lower background budgets and disable Claude 1M context
332
+ clementine budgets 1m on # enable 1M context for eligible accounts / Extra Usage
333
+ clementine budgets 1m off # disable 1M context for maximum compatibility
334
+ clementine budgets set chat 10 # raise one budget cap
335
+ ```
336
+
327
337
  **Commonly tuned knobs:**
328
338
 
329
339
  | Key | Default | What it does |
330
340
  |-----|---------|--------------|
331
341
  | `BUDGET_CHAT_USD` | `5.00` | Max spend per interactive chat message |
332
- | `BUDGET_CRON_T1_USD` | `2.00` | Max spend per tier-1 cron job |
333
- | `BUDGET_CRON_T2_USD` | `5.00` | Max spend per tier-2 cron job |
334
- | `BUDGET_HEARTBEAT_USD` | `0.50` | Max spend per heartbeat tick |
342
+ | `BUDGET_CRON_T1_USD` | `0.75` | Max spend per tier-1 cron job |
343
+ | `BUDGET_CRON_T2_USD` | `1.50` | Max spend per tier-2 cron job |
344
+ | `BUDGET_HEARTBEAT_USD` | `0.25` | Max spend per heartbeat tick |
345
+ | `CLAUDE_CODE_DISABLE_1M_CONTEXT` | `true` | `true`/`1` keeps Claude Code on 200K context unless the user explicitly enables 1M |
335
346
  | `DEFAULT_MODEL_TIER` | `sonnet` | Default model: `haiku` / `sonnet` / `opus` |
336
347
  | `HEARTBEAT_INTERVAL_MINUTES` | `30` | How often the agent auto-checks in |
337
348
  | `HEARTBEAT_ACTIVE_START` | `8` | First hour of the active window (0–23) |
@@ -30,6 +30,8 @@ export declare function estimateTokens(text: string): number;
30
30
  export declare function looksLikeContextThrashText(value: unknown): boolean;
31
31
  export declare function contextThrashRecoveryNotice(): string;
32
32
  export declare function buildContextThrashRecoveryPrompt(userRequest: string, priorFailureText?: string): string;
33
+ export declare function looksLikeOneMillionContextError(value: unknown): boolean;
34
+ export declare function looksLikeNoResponseRequested(value: unknown): boolean;
33
35
  /** Autonomous jobs use this sentinel to mean "completed, but do not notify the owner." */
34
36
  export declare function isAutonomousNothingOutput(response: string): boolean;
35
37
  export interface ProjectMeta {
@@ -28,14 +28,14 @@ import { StallGuard } from './stall-guard.js';
28
28
  import { collectToolCalls, detectContradiction, buildCorrectionPrompt } from './contradiction-validator.js';
29
29
  import { recordToolOutcome as recordMcpToolOutcome } from './mcp-circuit-breaker.js';
30
30
  import { assembleContext } from '../memory/context-assembler.js';
31
- import * as embeddingsModule from '../memory/embeddings.js';
32
31
  import { PromptCache } from './prompt-cache.js';
33
32
  import { searchSkills as searchSkillsSync } from './skill-extractor.js';
34
33
  import { classifyIntent, getStrategyGuidance } from './intent-classifier.js';
35
34
  import { getEventLog } from './session-event-log.js';
36
- import { routeToolSurface, TOOL_SURFACE_WARN_THRESHOLD } from './tool-router.js';
37
- import { decideTurnPolicy } from './turn-policy.js';
35
+ import { routeToolSurface, TOOL_SURFACE_HARD_LIMIT, TOOL_SURFACE_WARN_THRESHOLD } from './tool-router.js';
36
+ import { decideTurn } from './turn-policy.js';
38
37
  import { loadClementineJson } from '../config/clementine-json.js';
38
+ import { isCreditBalanceError, markBackgroundCreditBlocked } from '../gateway/credit-guard.js';
39
39
  // ── Channel capabilities ────────────────────────────────────────────
40
40
  /** Map channel label to its capabilities so the agent adapts its responses. */
41
41
  function getChannelCapabilities(channel) {
@@ -311,6 +311,8 @@ function formatTimeAgo(ms) {
311
311
  const CONTEXT_GUARD_MIN_TOKENS = 16_000;
312
312
  /** Warn threshold — context is getting tight. */
313
313
  const CONTEXT_GUARD_WARN_TOKENS = 32_000;
314
+ /** Rotate SDK sessions before hidden resume history approaches the 200K cap. */
315
+ const SESSION_ROTATE_INPUT_TOKENS = 140_000;
314
316
  /** Approximate context window sizes by model family. */
315
317
  const MODEL_CONTEXT_WINDOWS = {
316
318
  'haiku': 200_000,
@@ -324,6 +326,30 @@ function getContextWindow(model) {
324
326
  }
325
327
  return 200_000; // safe default
326
328
  }
329
+ function resultInputTokens(result) {
330
+ let total = 0;
331
+ const modelUsage = result.modelUsage;
332
+ if (!modelUsage)
333
+ return 0;
334
+ for (const usage of Object.values(modelUsage)) {
335
+ total += usage.inputTokens ?? 0;
336
+ total += usage.cacheReadInputTokens ?? 0;
337
+ total += usage.cacheCreationInputTokens ?? 0;
338
+ }
339
+ return total;
340
+ }
341
+ function oneMillionContextDisabled() {
342
+ const value = process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT;
343
+ return value === undefined || !/^(0|false|no)$/i.test(value);
344
+ }
345
+ export function looksLikeOneMillionContextError(value) {
346
+ const text = String(value ?? '');
347
+ return /extra usage.*1m context|1m context.*extra usage|context-1m/i.test(text);
348
+ }
349
+ export function looksLikeNoResponseRequested(value) {
350
+ const text = String(value ?? '').trim();
351
+ return /^no response requested\.?$/i.test(text);
352
+ }
327
353
  // ── Constants ────────────────────────────────────────────────────────
328
354
  const logger = pino({ name: 'clementine.assistant' });
329
355
  const SESSIONS_FILE = path.join(BASE_DIR, '.sessions.json');
@@ -685,6 +711,8 @@ export function isAutonomousNothingOutput(response) {
685
711
  return true;
686
712
  if (/^(_*NOTHING_*\s*)?\[MONITORING\]\s*$/i.test(trimmed))
687
713
  return true;
714
+ if (looksLikeNoResponseRequested(trimmed))
715
+ return true;
688
716
  if (trimmed.length > 80)
689
717
  return false;
690
718
  const lower = trimmed.toLowerCase();
@@ -1929,7 +1957,8 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
1929
1957
  const profileScopeText = [profile?.description, profile?.systemPromptBody]
1930
1958
  .filter(Boolean)
1931
1959
  .join('\n');
1932
- const directScopeText = [promptScopeText, profileScopeText].filter(Boolean).join('\n');
1960
+ const autonomousToolRun = isHeartbeat || isCron || isPlanStep || isUnleashed;
1961
+ const directScopeText = [promptScopeText, autonomousToolRun ? profileScopeText : ''].filter(Boolean).join('\n');
1933
1962
  const emptyToolRoute = () => ({
1934
1963
  bundles: [],
1935
1964
  externalMcpServers: [],
@@ -1959,7 +1988,6 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
1959
1988
  reason: bundles.length > 0 ? 'matched' : 'empty',
1960
1989
  };
1961
1990
  };
1962
- const autonomousToolRun = isHeartbeat || isCron || isPlanStep || isUnleashed;
1963
1991
  const promptToolRoute = routeToolSurface(promptScopeText);
1964
1992
  const profileToolRoute = routeToolSurface(profileScopeText);
1965
1993
  const contextToolRoute = routeToolSurface(contextRoutingText);
@@ -1967,7 +1995,9 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
1967
1995
  const directFollowupNeedsContextTools = intentClassification?.type === 'followup'
1968
1996
  || /^(yes|yep|yeah|go|go ahead|do it|continue|pick up|use that|run it|send it|same thing)\b/i.test(promptScopeText.trim());
1969
1997
  const allowContextToolRoute = autonomousToolRun || (!promptHasToolRoute && directFollowupNeedsContextTools);
1970
- const safeProfileToolRoute = profileToolRoute.fullSurface ? emptyToolRoute() : profileToolRoute;
1998
+ const safeProfileToolRoute = autonomousToolRun && !profileToolRoute.fullSurface
1999
+ ? profileToolRoute
2000
+ : emptyToolRoute();
1971
2001
  const safeContextToolRoute = allowContextToolRoute && !contextToolRoute.fullSurface
1972
2002
  ? contextToolRoute
1973
2003
  : emptyToolRoute();
@@ -2309,6 +2339,24 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
2309
2339
  whitelist.add(mcpTool('goal_work'));
2310
2340
  allowedTools = allowedTools.filter(t => whitelist.has(t));
2311
2341
  }
2342
+ if (!toolRoute.fullSurface
2343
+ && !adminNeeded
2344
+ && !autonomousToolRun
2345
+ && allowedTools.length > TOOL_SURFACE_HARD_LIMIT) {
2346
+ const beforeAllowedToolCount = allowedTools.length;
2347
+ const coreSdkTools = new Set(['Read', 'Write', 'Edit', 'Bash', 'Glob', 'Grep', 'WebSearch', 'WebFetch']);
2348
+ const clementineToolPrefixForCap = `mcp__${TOOLS_SERVER}__`;
2349
+ allowedTools = allowedTools.filter(tool => coreSdkTools.has(tool) || tool.startsWith(clementineToolPrefixForCap));
2350
+ externalMcpServers = {};
2351
+ composioMcpServers = {};
2352
+ logger.warn({
2353
+ sessionKey,
2354
+ beforeAllowedToolCount,
2355
+ afterAllowedToolCount: allowedTools.length,
2356
+ hardLimit: TOOL_SURFACE_HARD_LIMIT,
2357
+ bundles: toolRoute.bundles,
2358
+ }, 'SDK allowed tool surface exceeded hard limit; falling back to core Clementine tools for this interactive turn');
2359
+ }
2312
2360
  }
2313
2361
  // Permission mode: always 'bypassPermissions' — this is a daemon/harness with no interactive
2314
2362
  // terminal, so 'auto' mode (which requires plan support + human approval) doesn't apply.
@@ -2373,6 +2421,7 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
2373
2421
  systemPrompt: fullSystemPrompt,
2374
2422
  model: resolvedModel,
2375
2423
  ...(fallback ? { fallbackModel: fallback } : {}),
2424
+ ...(oneMillionContextDisabled() ? { betas: [] } : {}),
2376
2425
  permissionMode: effectivePermissionMode,
2377
2426
  allowDangerouslySkipPermissions: true,
2378
2427
  ...(sessionStore ? { sessionStore } : {}),
@@ -2423,7 +2472,14 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
2423
2472
  // env only when the prompt/job mentions a connector-backed service.
2424
2473
  // Per-MCP-server env isolation still happens inside each mcpServers
2425
2474
  // entry; this only affects the Claude Code subprocess itself.
2426
- ...(shouldInheritClaudeEnv ? {} : { env: SAFE_ENV }),
2475
+ ...(shouldInheritClaudeEnv ? {} : {
2476
+ env: {
2477
+ ...SAFE_ENV,
2478
+ ...(process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT !== undefined
2479
+ ? { CLAUDE_CODE_DISABLE_1M_CONTEXT: process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT }
2480
+ : {}),
2481
+ },
2482
+ }),
2427
2483
  // Avoid ambient Claude Code user/project/local settings and plugins by
2428
2484
  // default. Those can silently attach hundreds of tools. Explicit MCP
2429
2485
  // servers above still work; "all integrations/full tool surface" keeps
@@ -2485,28 +2541,18 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
2485
2541
  const useSearch = tier === 'search' || tier === 'full';
2486
2542
  const useDense = tier === 'full';
2487
2543
  const useProceduralAndGraph = tier === 'full';
2488
- // Pre-compute dense query embedding if the model is ready. Done outside
2489
- // searchContext (which is sync) so the dense path doesn't force the
2490
- // entire call chain to be async. If embedDense fails or isn't available,
2491
- // searchContext falls back to TF-IDF.
2492
- let queryDenseVec;
2493
- try {
2494
- if (useDense && embeddingsModule.isDenseReady()) {
2495
- const v = await embeddingsModule.embedDense(enrichedQuery, true);
2496
- if (v)
2497
- queryDenseVec = v;
2498
- }
2499
- }
2500
- catch { /* fallback to sparse */ }
2544
+ const searchOpts = {
2545
+ limit: tier === 'full' ? SEARCH_CONTEXT_LIMIT : Math.min(SEARCH_CONTEXT_LIMIT, 4),
2546
+ recencyLimit: tier === 'full' ? SEARCH_RECENCY_LIMIT : Math.min(SEARCH_RECENCY_LIMIT, 2),
2547
+ agentSlug,
2548
+ strict: strictIsolation,
2549
+ sessionKey: sessionKey ?? undefined,
2550
+ useDense,
2551
+ };
2501
2552
  const results = useSearch
2502
- ? this.memoryStore.searchContext(enrichedQuery, {
2503
- limit: tier === 'full' ? SEARCH_CONTEXT_LIMIT : Math.min(SEARCH_CONTEXT_LIMIT, 4),
2504
- recencyLimit: tier === 'full' ? SEARCH_RECENCY_LIMIT : Math.min(SEARCH_RECENCY_LIMIT, 2),
2505
- agentSlug,
2506
- strict: strictIsolation,
2507
- sessionKey: sessionKey ?? undefined,
2508
- queryDenseVec,
2509
- })
2553
+ ? await (this.memoryStore.searchContextAsync
2554
+ ? this.memoryStore.searchContextAsync(enrichedQuery, searchOpts)
2555
+ : Promise.resolve(this.memoryStore.searchContext(enrichedQuery, searchOpts)))
2510
2556
  : [];
2511
2557
  if (results?.length > 0) {
2512
2558
  const accessedIds = results
@@ -2776,11 +2822,12 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
2776
2822
  let effectivePrompt = text;
2777
2823
  const recentExchangesForIntent = key ? this.lastExchanges.get(key) : undefined;
2778
2824
  const intent = classifyIntent(text, recentExchangesForIntent);
2779
- const turnPolicy = decideTurnPolicy({
2825
+ const turnDecision = decideTurn({
2780
2826
  text,
2781
2827
  intent,
2782
2828
  hasRecentContext: !!(recentExchangesForIntent?.length || (key && this.sessions.has(key))),
2783
2829
  });
2830
+ const turnPolicy = turnDecision.policy;
2784
2831
  const suppressContextInjection = turnPolicy.suppressContextInjection === true;
2785
2832
  if (key && turnPolicy.suppressSessionResume) {
2786
2833
  this.sessions.delete(key);
@@ -2905,7 +2952,7 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
2905
2952
  intent: intent.type,
2906
2953
  confidence: intent.confidence,
2907
2954
  strategy: intent.suggestedStrategy,
2908
- turnPolicy,
2955
+ turnDecision,
2909
2956
  }, 'Intent classified');
2910
2957
  // If caller explicitly passed maxTurns (e.g. cron), respect it.
2911
2958
  // Otherwise apply the turn policy. Complex/routed turns still get their
@@ -3034,11 +3081,11 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
3034
3081
  // If a project override is set, skip auto-matching entirely
3035
3082
  const hasActiveSession = !!(sessionKey && this.sessions.has(sessionKey));
3036
3083
  const effectiveTurnPolicy = turnPolicy ?? (intentClassification
3037
- ? decideTurnPolicy({
3084
+ ? decideTurn({
3038
3085
  text: prompt,
3039
3086
  intent: intentClassification,
3040
3087
  hasRecentContext: hasActiveSession || ((sessionKey ? this.lastExchanges.get(sessionKey)?.length : 0) ?? 0) > 0,
3041
- })
3088
+ }).policy
3042
3089
  : undefined);
3043
3090
  const retrievalTier = effectiveTurnPolicy?.retrievalTier ?? 'full';
3044
3091
  const [rawContext, autoMatchedProject, linkContexts] = await Promise.all([
@@ -3117,6 +3164,8 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
3117
3164
  // un-validated (but still logged).
3118
3165
  let contradictionRetried = false;
3119
3166
  let contextRecoveryRetries = 0;
3167
+ let noResponseRetried = false;
3168
+ let rotateSessionAfterTurn = false;
3120
3169
  try {
3121
3170
  for (let attempt = 0; attempt <= PersonalAssistant.RATE_LIMIT_MAX_RETRIES; attempt++) {
3122
3171
  const sdkOptions = await this.buildOptions({
@@ -3342,6 +3391,15 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
3342
3391
  sessionId = result.session_id;
3343
3392
  this._lastTerminalReason = result.terminal_reason ?? undefined;
3344
3393
  this.logQueryResult(result, 'chat', sessionKey ?? 'unknown', undefined, profile?.slug);
3394
+ const hiddenSessionTokens = resultInputTokens(result);
3395
+ if (sessionKey && hiddenSessionTokens >= SESSION_ROTATE_INPUT_TOKENS) {
3396
+ rotateSessionAfterTurn = true;
3397
+ logger.warn({
3398
+ sessionKey,
3399
+ inputTokens: hiddenSessionTokens,
3400
+ threshold: SESSION_ROTATE_INPUT_TOKENS,
3401
+ }, 'SDK session near context ceiling — will rotate after this turn');
3402
+ }
3345
3403
  if (result.is_error) {
3346
3404
  // Error subtypes have `errors` array; success subtype has `result` string
3347
3405
  const errorText = 'errors' in result ? result.errors.join('; ') : ('result' in result ? result.result : '');
@@ -3358,6 +3416,19 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
3358
3416
  `• Reply "deep mode" to queue this as a background task with a bigger budget\n` +
3359
3417
  `• Raise the cap permanently: \`clementine config set BUDGET_CHAT_USD 10\` then \`clementine restart\``);
3360
3418
  }
3419
+ else if (isCreditBalanceError(errorText)) {
3420
+ markBackgroundCreditBlocked(errorText);
3421
+ responseText = responseText || ('Claude says the account credit balance is too low. I paused background jobs for a few hours so they stop draining/retrying, but interactive chat will also fail until credits are available again.');
3422
+ }
3423
+ else if (looksLikeOneMillionContextError(errorText)) {
3424
+ process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT = '1';
3425
+ if (sessionKey) {
3426
+ this.sessions.delete(sessionKey);
3427
+ this.exchangeCounts.set(sessionKey, 0);
3428
+ this._compactedSessions.delete(sessionKey);
3429
+ }
3430
+ responseText = responseText || ("Claude rejected the 1M context beta for this account. I've disabled 1M context for this process and reset the session. To persist the fix across restarts, run `clementine config doctor --fix`, then `clementine restart`.");
3431
+ }
3361
3432
  else if (lower.includes('rate') && lower.includes('limit')) {
3362
3433
  hitRateLimit = true;
3363
3434
  }
@@ -3476,6 +3547,19 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
3476
3547
  responseText += '\n\nI ran out of time but here\'s what I have so far. Want me to continue?';
3477
3548
  }
3478
3549
  }
3550
+ else if (isCreditBalanceError(e)) {
3551
+ markBackgroundCreditBlocked(e);
3552
+ responseText = responseText || ('Claude says the account credit balance is too low. I paused background jobs for a few hours so they stop draining/retrying, but interactive chat will also fail until credits are available again.');
3553
+ }
3554
+ else if (looksLikeOneMillionContextError(e)) {
3555
+ process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT = '1';
3556
+ if (sessionKey) {
3557
+ this.sessions.delete(sessionKey);
3558
+ this.exchangeCounts.set(sessionKey, 0);
3559
+ this._compactedSessions.delete(sessionKey);
3560
+ }
3561
+ responseText = responseText || ("Claude rejected the 1M context beta for this account. I've disabled 1M context for this process and reset the session. To persist the fix across restarts, run `clementine config doctor --fix`, then `clementine restart`.");
3562
+ }
3479
3563
  else if (errStr.includes('rate') && (errStr.includes('limit') || errStr.includes('rate_limit'))) {
3480
3564
  hitRateLimit = true;
3481
3565
  // Try to respect any retry hint the server surfaced in the error text.
@@ -3616,6 +3700,27 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
3616
3700
  }
3617
3701
  responseText = contextThrashRecoveryNotice();
3618
3702
  }
3703
+ if (looksLikeNoResponseRequested(responseText)) {
3704
+ logger.warn({ sessionKey, attempt }, 'SDK/model returned no-response sentinel during interactive chat');
3705
+ if (!noResponseRetried && attempt < PersonalAssistant.RATE_LIMIT_MAX_RETRIES) {
3706
+ noResponseRetried = true;
3707
+ if (sessionKey) {
3708
+ this.sessions.delete(sessionKey);
3709
+ this.exchangeCounts.set(sessionKey, 0);
3710
+ this._compactedSessions.delete(sessionKey);
3711
+ }
3712
+ prompt =
3713
+ `[RESPONSE REQUIRED]\n` +
3714
+ `This is an interactive user message. The previous attempt returned "No response requested", which is invalid for a direct chat turn.\n\n` +
3715
+ `Answer the user's message directly and briefly. If you need more information, ask one clear question.\n\n` +
3716
+ `User message:\n${prompt}`;
3717
+ responseText = '';
3718
+ sessionId = '';
3719
+ rotateSessionAfterTurn = false;
3720
+ continue;
3721
+ }
3722
+ responseText = "I'm here. What would you like me to do?";
3723
+ }
3619
3724
  // ── Response guarantee ─────────────────────────────────────────
3620
3725
  // The model often generates 30+ tool calls with minimal/no text. Ensure
3621
3726
  // the user always gets a substantive response after real work is done.
@@ -3636,9 +3741,15 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
3636
3741
  responseText = `I started working on that (${toolCalls.length} tool calls). The gateway should be continuing this in the background.`;
3637
3742
  }
3638
3743
  }
3639
- if (sessionKey && sessionId) {
3744
+ if (sessionKey && sessionId && !rotateSessionAfterTurn) {
3640
3745
  this.sessions.set(sessionKey, sessionId);
3641
3746
  }
3747
+ else if (sessionKey && rotateSessionAfterTurn) {
3748
+ this.sessions.delete(sessionKey);
3749
+ this.exchangeCounts.set(sessionKey, 0);
3750
+ this._compactedSessions.delete(sessionKey);
3751
+ logger.info({ sessionKey }, 'Rotated SDK session after high-token turn');
3752
+ }
3642
3753
  // Log tool calls to transcript for audit trail
3643
3754
  if (sessionKey && toolCalls.length > 0 && this.memoryStore) {
3644
3755
  try {
@@ -4693,7 +4804,21 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
4693
4804
  }
4694
4805
  }
4695
4806
  else if (message.type === 'result') {
4696
- this.logQueryResult(message, 'heartbeat', 'heartbeat');
4807
+ const result = message;
4808
+ if (result.is_error) {
4809
+ const errText = 'errors' in result
4810
+ ? result.errors.join('; ')
4811
+ : String(result.result ?? '');
4812
+ if (isCreditBalanceError(errText)) {
4813
+ markBackgroundCreditBlocked(errText);
4814
+ throw new Error(errText);
4815
+ }
4816
+ if (looksLikeOneMillionContextError(errText)) {
4817
+ process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT = '1';
4818
+ throw new Error(errText);
4819
+ }
4820
+ }
4821
+ this.logQueryResult(result, 'heartbeat', 'heartbeat');
4697
4822
  }
4698
4823
  else if (message.type === 'system') {
4699
4824
  this.captureMcpStatus(message);
@@ -5024,12 +5149,22 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
5024
5149
  // "budget" was catching Anthropic's unrelated "does not support
5025
5150
  // user-configurable task budgets" error and pinning perfectly
5026
5151
  // healthy Haiku jobs as permanent failures.
5027
- if (result.is_error && 'result' in result) {
5028
- const exitText = String(result.result ?? '');
5152
+ if (result.is_error) {
5153
+ const exitText = 'errors' in result
5154
+ ? result.errors.join('; ')
5155
+ : String(result.result ?? '');
5029
5156
  if (exitText.includes('max_budget_usd')) {
5030
5157
  logger.warn({ job: jobName }, 'Cron job hit dollar budget cap — treating as permanent error');
5031
5158
  throw new Error(`Budget exceeded for cron job '${jobName}'`);
5032
5159
  }
5160
+ if (isCreditBalanceError(exitText)) {
5161
+ markBackgroundCreditBlocked(exitText);
5162
+ throw new Error(exitText);
5163
+ }
5164
+ if (looksLikeOneMillionContextError(exitText)) {
5165
+ process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT = '1';
5166
+ throw new Error(exitText);
5167
+ }
5033
5168
  }
5034
5169
  this.logQueryResult(result, 'cron', `cron:${jobName}`, jobName, sdkOptions.env?.CLEMENTINE_TEAM_AGENT || undefined);
5035
5170
  }
@@ -22,6 +22,9 @@ const DEEP_MODE_ASKS = [
22
22
  /\brun\s+in\s+the\s+background\b/i,
23
23
  /\bdeep\s+(mode|dive|work)\b/i,
24
24
  /\bbackground\s+(task|work|job)\b/i,
25
+ /\bkeep\s+working\b/i,
26
+ /\bdon'?t\s+stop\b/i,
27
+ /\buntil\s+(it'?s\s+)?(done|finished|complete|fixed)\b/i,
25
28
  /\btake\s+your\s+time\b/i,
26
29
  ];
27
30
  /**
@@ -7,8 +7,8 @@
7
7
  * when consecutiveErrors >= 3), classifies the failure pattern from
8
8
  * recentErrors, and either:
9
9
  *
10
- * - Auto-applies a safe cron-config fix (mode, max_hours, max_turns)
11
- * and DMs the OWNING agent via their bot
10
+ * - Writes a proposal for safe cron-config fixes by default so the owner
11
+ * can approve before Clementine edits CRON.md
12
12
  * - Writes a proposal to self-improve/pending-changes/ and DMs the
13
13
  * owning agent the diagnosis (full audit-inbox button approval is
14
14
  * a separate Phase 8b ship)
@@ -91,6 +91,11 @@ export interface SelfImproveLoopOptions {
91
91
  * call tick() directly without racing the watcher.
92
92
  */
93
93
  disableWatch?: boolean;
94
+ /**
95
+ * Opt into the legacy behavior where recognized low-risk CRON.md scalar
96
+ * edits are applied immediately. Default false: write a pending proposal.
97
+ */
98
+ allowAutoApplySafeFixes?: boolean;
94
99
  }
95
100
  export declare function classifyFailure(recentErrors: string[]): FixRecipe;
96
101
  export declare class SelfImproveLoop {
@@ -101,6 +106,7 @@ export declare class SelfImproveLoop {
101
106
  private readonly agentsDir;
102
107
  private readonly dispatcher;
103
108
  private readonly watchEnabled;
109
+ private readonly allowAutoApplySafeFixes;
104
110
  private timer;
105
111
  private watcher;
106
112
  private debounceTimer;
@@ -7,8 +7,8 @@
7
7
  * when consecutiveErrors >= 3), classifies the failure pattern from
8
8
  * recentErrors, and either:
9
9
  *
10
- * - Auto-applies a safe cron-config fix (mode, max_hours, max_turns)
11
- * and DMs the OWNING agent via their bot
10
+ * - Writes a proposal for safe cron-config fixes by default so the owner
11
+ * can approve before Clementine edits CRON.md
12
12
  * - Writes a proposal to self-improve/pending-changes/ and DMs the
13
13
  * owning agent the diagnosis (full audit-inbox button approval is
14
14
  * a separate Phase 8b ship)
@@ -239,6 +239,7 @@ export class SelfImproveLoop {
239
239
  agentsDir;
240
240
  dispatcher;
241
241
  watchEnabled;
242
+ allowAutoApplySafeFixes;
242
243
  timer = null;
243
244
  watcher = null;
244
245
  debounceTimer = null;
@@ -252,6 +253,7 @@ export class SelfImproveLoop {
252
253
  this.cronPath = opts.cronPath ?? CRON_PATH;
253
254
  this.agentsDir = opts.agentsDir ?? AGENTS_ROOT;
254
255
  this.watchEnabled = opts.disableWatch !== true;
256
+ this.allowAutoApplySafeFixes = opts.allowAutoApplySafeFixes === true;
255
257
  }
256
258
  start() {
257
259
  if (this.running)
@@ -394,6 +396,37 @@ export class SelfImproveLoop {
394
396
  logger.warn({ jobName: trigger.jobName, agentSlug }, 'Job not found in any CRON.md — cannot apply fix');
395
397
  return;
396
398
  }
399
+ const wouldChange = recipe.apply ? recipe.apply({ ...lookup.job }) : true;
400
+ if (!wouldChange) {
401
+ counts.noop++;
402
+ logger.info({ jobName: trigger.jobName, agentSlug }, 'Fix recipe is already in place — trigger removed without further action');
403
+ this.logAutonomy('fix_noop', trigger, { reason: 'already-applied' });
404
+ return;
405
+ }
406
+ if (!this.allowAutoApplySafeFixes) {
407
+ const id = `proposal-${Date.now()}-${trigger.jobName.replace(/[^a-z0-9-]/gi, '_')}`;
408
+ const record = {
409
+ id,
410
+ jobName: trigger.jobName,
411
+ ...(agentSlug ? { agentSlug } : {}),
412
+ category: recipe.category,
413
+ description: recipe.description,
414
+ recentErrors: trigger.recentErrors,
415
+ consecutiveErrors: trigger.consecutiveErrors,
416
+ proposedAt: new Date().toISOString(),
417
+ };
418
+ const file = writePendingChange(record, this.pendingDir);
419
+ counts.pending++;
420
+ this.logAutonomy('proposal_written', trigger, { category: recipe.category, proposalId: id, autoApplyAllowed: false });
421
+ await this.notifyAgent(agentSlug, [
422
+ `⚠️ **${trigger.jobName}** has failed ${trigger.consecutiveErrors} times in a row.`,
423
+ '',
424
+ recipe.description,
425
+ '',
426
+ `Fix proposal saved to \`${file}\`. Review and approve before editing CRON.md.`,
427
+ ].join('\n'));
428
+ return;
429
+ }
397
430
  const prevFields = applyCronEdit(lookup, recipe);
398
431
  if (prevFields) {
399
432
  counts.applied++;
@@ -28,6 +28,7 @@ interface ToolBundleDefinition {
28
28
  inheritFullClaudeEnv?: boolean;
29
29
  }
30
30
  export declare const TOOL_SURFACE_WARN_THRESHOLD = 150;
31
+ export declare const TOOL_SURFACE_HARD_LIMIT = 220;
31
32
  export declare const TOOL_BUNDLES: readonly ToolBundleDefinition[];
32
33
  export declare function routeToolSurface(text: string | undefined): ToolRouteDecision;
33
34
  export {};
@@ -7,6 +7,7 @@
7
7
  * broader access.
8
8
  */
9
9
  export const TOOL_SURFACE_WARN_THRESHOLD = 150;
10
+ export const TOOL_SURFACE_HARD_LIMIT = 220;
10
11
  export const TOOL_BUNDLES = [
11
12
  {
12
13
  id: 'email_outlook',
@@ -78,7 +79,7 @@ export const TOOL_BUNDLES = [
78
79
  },
79
80
  {
80
81
  id: 'browser',
81
- patterns: [/\b(browser|playwright|localhost|web page|webpage|screenshot|click|fill form|navigate)\b/i],
82
+ patterns: [/\b(playwright|localhost|127\.0\.0\.1|web\s?page|webpage|website|screenshot|click|fill(?: out)? form|navigate to|open .*browser|use .*browser|inspect .*page)\b/i],
82
83
  externalMcpServers: ['browser-harness', 'browsermcp', 'playwright', 'kernel', 'plugin:playwright:playwright'],
83
84
  },
84
85
  {
@@ -125,6 +126,15 @@ export const TOOL_BUNDLES = [
125
126
  function uniqueStrings(values) {
126
127
  return [...new Set([...values].filter((v) => !!v && v.trim().length > 0))];
127
128
  }
129
+ function explicitMcpServers(scopeText) {
130
+ const servers = new Set();
131
+ const re = /\bmcp__([A-Za-z0-9_-]+)__[A-Za-z0-9_.:-]+\b/g;
132
+ let match;
133
+ while ((match = re.exec(scopeText)) !== null) {
134
+ servers.add(match[1]);
135
+ }
136
+ return uniqueStrings(servers);
137
+ }
128
138
  export function routeToolSurface(text) {
129
139
  const scopeText = text?.trim() ?? '';
130
140
  if (!scopeText) {
@@ -161,13 +171,26 @@ export function routeToolSurface(text) {
161
171
  composio.add(slug);
162
172
  inheritFullClaudeEnv = inheritFullClaudeEnv || bundle.inheritFullClaudeEnv === true;
163
173
  }
174
+ for (const server of explicitMcpServers(scopeText)) {
175
+ if (server.startsWith('claude_ai_')) {
176
+ external.add(server.slice('claude_ai_'.length));
177
+ }
178
+ else {
179
+ // Exact `mcp__<server>__<tool>` mentions are authoritative. Add the
180
+ // name as both a direct MCP server and a Composio toolkit; whichever
181
+ // source is actually connected will mount, and the other path no-ops.
182
+ external.add(server);
183
+ composio.add(server);
184
+ }
185
+ inheritFullClaudeEnv = true;
186
+ }
164
187
  return {
165
188
  bundles: uniqueStrings(bundles),
166
189
  externalMcpServers: uniqueStrings(external),
167
190
  composioToolkits: uniqueStrings(composio),
168
191
  inheritFullClaudeEnv,
169
192
  fullSurface: false,
170
- reason: bundles.size > 0 ? 'matched' : 'empty',
193
+ reason: bundles.size > 0 || external.size > 0 || composio.size > 0 ? 'matched' : 'empty',
171
194
  };
172
195
  }
173
196
  //# sourceMappingURL=tool-router.js.map
@@ -5,8 +5,10 @@
5
5
  * small prompt, but any sign of memory dependence, tool work, or ambiguity
6
6
  * promotes the request to a richer path.
7
7
  */
8
+ import { type ToolRouteDecision } from './tool-router.js';
8
9
  import type { IntentClassification } from './intent-classifier.js';
9
10
  export type RetrievalTier = 'none' | 'core' | 'search' | 'full';
11
+ export type TurnExecutionMode = 'local' | 'lightweight_llm' | 'tool_llm' | 'background';
10
12
  export interface TurnPolicy {
11
13
  retrievalTier: RetrievalTier;
12
14
  disableAllTools: boolean;
@@ -27,6 +29,14 @@ export interface TurnPolicyInput {
27
29
  hasRecentContext: boolean;
28
30
  isAutonomous?: boolean;
29
31
  }
32
+ export interface TurnDecision {
33
+ mode: TurnExecutionMode;
34
+ policy: TurnPolicy;
35
+ toolRoute: ToolRouteDecision;
36
+ userVisibleStatus: string;
37
+ reason: string;
38
+ }
30
39
  export declare function isStandaloneGreeting(text: string): boolean;
31
40
  export declare function decideTurnPolicy(input: TurnPolicyInput): TurnPolicy;
41
+ export declare function decideTurn(input: TurnPolicyInput): TurnDecision;
32
42
  //# sourceMappingURL=turn-policy.d.ts.map