clementine-agent 1.18.10 → 1.18.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +14 -3
  2. package/dist/agent/assistant.d.ts +2 -0
  3. package/dist/agent/assistant.js +201 -48
  4. package/dist/agent/complexity-classifier.js +3 -0
  5. package/dist/agent/self-improve-loop.d.ts +8 -2
  6. package/dist/agent/self-improve-loop.js +35 -2
  7. package/dist/agent/tool-router.d.ts +1 -0
  8. package/dist/agent/tool-router.js +2 -1
  9. package/dist/agent/turn-policy.d.ts +10 -0
  10. package/dist/agent/turn-policy.js +34 -1
  11. package/dist/brain/connector-recipes.d.ts +6 -4
  12. package/dist/brain/connector-recipes.js +76 -4
  13. package/dist/cli/dashboard.js +67 -11
  14. package/dist/cli/index.js +334 -3
  15. package/dist/config/config-doctor.d.ts +12 -0
  16. package/dist/config/config-doctor.js +100 -1
  17. package/dist/config/effective-config.js +3 -3
  18. package/dist/config.js +3 -3
  19. package/dist/gateway/credit-guard.d.ts +12 -0
  20. package/dist/gateway/credit-guard.js +46 -0
  21. package/dist/gateway/cron-diagnostic-turn.d.ts +11 -0
  22. package/dist/gateway/cron-diagnostic-turn.js +242 -0
  23. package/dist/gateway/cron-scheduler.js +26 -0
  24. package/dist/gateway/failure-diagnostics.d.ts +4 -0
  25. package/dist/gateway/failure-diagnostics.js +171 -23
  26. package/dist/gateway/failure-monitor.d.ts +3 -1
  27. package/dist/gateway/failure-monitor.js +50 -4
  28. package/dist/gateway/heartbeat-scheduler.js +29 -3
  29. package/dist/gateway/job-health.d.ts +14 -0
  30. package/dist/gateway/job-health.js +108 -0
  31. package/dist/gateway/lanes.js +1 -1
  32. package/dist/gateway/notification-context.d.ts +40 -0
  33. package/dist/gateway/notification-context.js +159 -0
  34. package/dist/gateway/router.d.ts +9 -1
  35. package/dist/gateway/router.js +155 -55
  36. package/dist/index.js +3 -0
  37. package/dist/integrations/composio/mcp-bridge.d.ts +1 -0
  38. package/dist/integrations/composio/mcp-bridge.js +29 -5
  39. package/dist/memory/seed-user-model.d.ts +2 -0
  40. package/dist/memory/seed-user-model.js +13 -4
  41. package/dist/memory/store.d.ts +58 -15
  42. package/dist/memory/store.js +189 -13
  43. package/dist/tools/memory-tools.js +10 -2
  44. package/dist/tools/shared.d.ts +19 -0
  45. package/dist/types.d.ts +1 -0
  46. package/package.json +1 -1
package/README.md CHANGED
@@ -324,14 +324,25 @@ clementine restart # apply changes
324
324
 
325
325
  Your overrides live in `~/.clementine/.env` — **they survive every `npm update -g` / `clementine update`** because they're in your data home, not the package directory.
326
326
 
327
+ For spend/context tuning, `clementine budgets` gives a safer shortcut:
328
+
329
+ ```bash
330
+ clementine budgets # show chat/cron/heartbeat caps and 1M context state
331
+ clementine budgets safe # lower background budgets and disable Claude 1M context
332
+ clementine budgets 1m on # enable 1M context for eligible accounts / Extra Usage
333
+ clementine budgets 1m off # disable 1M context for maximum compatibility
334
+ clementine budgets set chat 10 # raise one budget cap
335
+ ```
336
+
327
337
  **Commonly tuned knobs:**
328
338
 
329
339
  | Key | Default | What it does |
330
340
  |-----|---------|--------------|
331
341
  | `BUDGET_CHAT_USD` | `5.00` | Max spend per interactive chat message |
332
- | `BUDGET_CRON_T1_USD` | `2.00` | Max spend per tier-1 cron job |
333
- | `BUDGET_CRON_T2_USD` | `5.00` | Max spend per tier-2 cron job |
334
- | `BUDGET_HEARTBEAT_USD` | `0.50` | Max spend per heartbeat tick |
342
+ | `BUDGET_CRON_T1_USD` | `0.75` | Max spend per tier-1 cron job |
343
+ | `BUDGET_CRON_T2_USD` | `1.50` | Max spend per tier-2 cron job |
344
+ | `BUDGET_HEARTBEAT_USD` | `0.25` | Max spend per heartbeat tick |
345
+ | `CLAUDE_CODE_DISABLE_1M_CONTEXT` | `true` | `true`/`1` keeps Claude Code on 200K context unless the user explicitly enables 1M |
335
346
  | `DEFAULT_MODEL_TIER` | `sonnet` | Default model: `haiku` / `sonnet` / `opus` |
336
347
  | `HEARTBEAT_INTERVAL_MINUTES` | `30` | How often the agent auto-checks in |
337
348
  | `HEARTBEAT_ACTIVE_START` | `8` | First hour of the active window (0–23) |
@@ -30,6 +30,8 @@ export declare function estimateTokens(text: string): number;
30
30
  export declare function looksLikeContextThrashText(value: unknown): boolean;
31
31
  export declare function contextThrashRecoveryNotice(): string;
32
32
  export declare function buildContextThrashRecoveryPrompt(userRequest: string, priorFailureText?: string): string;
33
+ export declare function looksLikeOneMillionContextError(value: unknown): boolean;
34
+ export declare function looksLikeNoResponseRequested(value: unknown): boolean;
33
35
  /** Autonomous jobs use this sentinel to mean "completed, but do not notify the owner." */
34
36
  export declare function isAutonomousNothingOutput(response: string): boolean;
35
37
  export interface ProjectMeta {
@@ -28,14 +28,14 @@ import { StallGuard } from './stall-guard.js';
28
28
  import { collectToolCalls, detectContradiction, buildCorrectionPrompt } from './contradiction-validator.js';
29
29
  import { recordToolOutcome as recordMcpToolOutcome } from './mcp-circuit-breaker.js';
30
30
  import { assembleContext } from '../memory/context-assembler.js';
31
- import * as embeddingsModule from '../memory/embeddings.js';
32
31
  import { PromptCache } from './prompt-cache.js';
33
32
  import { searchSkills as searchSkillsSync } from './skill-extractor.js';
34
33
  import { classifyIntent, getStrategyGuidance } from './intent-classifier.js';
35
34
  import { getEventLog } from './session-event-log.js';
36
- import { routeToolSurface, TOOL_SURFACE_WARN_THRESHOLD } from './tool-router.js';
37
- import { decideTurnPolicy } from './turn-policy.js';
35
+ import { routeToolSurface, TOOL_SURFACE_HARD_LIMIT, TOOL_SURFACE_WARN_THRESHOLD } from './tool-router.js';
36
+ import { decideTurn } from './turn-policy.js';
38
37
  import { loadClementineJson } from '../config/clementine-json.js';
38
+ import { isCreditBalanceError, markBackgroundCreditBlocked } from '../gateway/credit-guard.js';
39
39
  // ── Channel capabilities ────────────────────────────────────────────
40
40
  /** Map channel label to its capabilities so the agent adapts its responses. */
41
41
  function getChannelCapabilities(channel) {
@@ -311,6 +311,8 @@ function formatTimeAgo(ms) {
311
311
  const CONTEXT_GUARD_MIN_TOKENS = 16_000;
312
312
  /** Warn threshold — context is getting tight. */
313
313
  const CONTEXT_GUARD_WARN_TOKENS = 32_000;
314
+ /** Rotate SDK sessions before hidden resume history approaches the 200K cap. */
315
+ const SESSION_ROTATE_INPUT_TOKENS = 140_000;
314
316
  /** Approximate context window sizes by model family. */
315
317
  const MODEL_CONTEXT_WINDOWS = {
316
318
  'haiku': 200_000,
@@ -324,6 +326,30 @@ function getContextWindow(model) {
324
326
  }
325
327
  return 200_000; // safe default
326
328
  }
329
+ function resultInputTokens(result) {
330
+ let total = 0;
331
+ const modelUsage = result.modelUsage;
332
+ if (!modelUsage)
333
+ return 0;
334
+ for (const usage of Object.values(modelUsage)) {
335
+ total += usage.inputTokens ?? 0;
336
+ total += usage.cacheReadInputTokens ?? 0;
337
+ total += usage.cacheCreationInputTokens ?? 0;
338
+ }
339
+ return total;
340
+ }
341
+ function oneMillionContextDisabled() {
342
+ const value = process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT;
343
+ return value === undefined || !/^(0|false|no)$/i.test(value);
344
+ }
345
+ export function looksLikeOneMillionContextError(value) {
346
+ const text = String(value ?? '');
347
+ return /extra usage.*1m context|1m context.*extra usage|context-1m/i.test(text);
348
+ }
349
+ export function looksLikeNoResponseRequested(value) {
350
+ const text = String(value ?? '').trim();
351
+ return /^no response requested\.?$/i.test(text);
352
+ }
327
353
  // ── Constants ────────────────────────────────────────────────────────
328
354
  const logger = pino({ name: 'clementine.assistant' });
329
355
  const SESSIONS_FILE = path.join(BASE_DIR, '.sessions.json');
@@ -685,6 +711,8 @@ export function isAutonomousNothingOutput(response) {
685
711
  return true;
686
712
  if (/^(_*NOTHING_*\s*)?\[MONITORING\]\s*$/i.test(trimmed))
687
713
  return true;
714
+ if (looksLikeNoResponseRequested(trimmed))
715
+ return true;
688
716
  if (trimmed.length > 80)
689
717
  return false;
690
718
  const lower = trimmed.toLowerCase();
@@ -1929,7 +1957,8 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
1929
1957
  const profileScopeText = [profile?.description, profile?.systemPromptBody]
1930
1958
  .filter(Boolean)
1931
1959
  .join('\n');
1932
- const directScopeText = [promptScopeText, profileScopeText].filter(Boolean).join('\n');
1960
+ const autonomousToolRun = isHeartbeat || isCron || isPlanStep || isUnleashed;
1961
+ const directScopeText = [promptScopeText, autonomousToolRun ? profileScopeText : ''].filter(Boolean).join('\n');
1933
1962
  const emptyToolRoute = () => ({
1934
1963
  bundles: [],
1935
1964
  externalMcpServers: [],
@@ -1962,8 +1991,16 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
1962
1991
  const promptToolRoute = routeToolSurface(promptScopeText);
1963
1992
  const profileToolRoute = routeToolSurface(profileScopeText);
1964
1993
  const contextToolRoute = routeToolSurface(contextRoutingText);
1965
- const safeProfileToolRoute = profileToolRoute.fullSurface ? emptyToolRoute() : profileToolRoute;
1966
- const safeContextToolRoute = contextToolRoute.fullSurface ? emptyToolRoute() : contextToolRoute;
1994
+ const promptHasToolRoute = promptToolRoute.fullSurface || promptToolRoute.bundles.length > 0;
1995
+ const directFollowupNeedsContextTools = intentClassification?.type === 'followup'
1996
+ || /^(yes|yep|yeah|go|go ahead|do it|continue|pick up|use that|run it|send it|same thing)\b/i.test(promptScopeText.trim());
1997
+ const allowContextToolRoute = autonomousToolRun || (!promptHasToolRoute && directFollowupNeedsContextTools);
1998
+ const safeProfileToolRoute = autonomousToolRun && !profileToolRoute.fullSurface
1999
+ ? profileToolRoute
2000
+ : emptyToolRoute();
2001
+ const safeContextToolRoute = allowContextToolRoute && !contextToolRoute.fullSurface
2002
+ ? contextToolRoute
2003
+ : emptyToolRoute();
1967
2004
  const toolRoute = mergeToolRoutes(promptToolRoute, mergeToolRoutes(safeProfileToolRoute, safeContextToolRoute));
1968
2005
  let allowedTools = [];
1969
2006
  const addAllowed = (...tools) => {
@@ -1977,10 +2014,9 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
1977
2014
  };
1978
2015
  const scopeText = [
1979
2016
  directScopeText,
1980
- contextRoutingText,
2017
+ allowContextToolRoute ? contextRoutingText : '',
1981
2018
  ].filter(Boolean).join('\n').toLowerCase();
1982
2019
  const promptScopeLower = promptScopeText.toLowerCase();
1983
- const autonomousToolRun = isHeartbeat || isCron || isPlanStep || isUnleashed;
1984
2020
  const taskIntent = intentClassification?.type === 'task' || autonomousToolRun;
1985
2021
  const memoryNeeded = autonomousToolRun
1986
2022
  || retrievalContext.trim().length > 0
@@ -2303,6 +2339,24 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
2303
2339
  whitelist.add(mcpTool('goal_work'));
2304
2340
  allowedTools = allowedTools.filter(t => whitelist.has(t));
2305
2341
  }
2342
+ if (!toolRoute.fullSurface
2343
+ && !adminNeeded
2344
+ && !autonomousToolRun
2345
+ && allowedTools.length > TOOL_SURFACE_HARD_LIMIT) {
2346
+ const beforeAllowedToolCount = allowedTools.length;
2347
+ const coreSdkTools = new Set(['Read', 'Write', 'Edit', 'Bash', 'Glob', 'Grep', 'WebSearch', 'WebFetch']);
2348
+ const clementineToolPrefixForCap = `mcp__${TOOLS_SERVER}__`;
2349
+ allowedTools = allowedTools.filter(tool => coreSdkTools.has(tool) || tool.startsWith(clementineToolPrefixForCap));
2350
+ externalMcpServers = {};
2351
+ composioMcpServers = {};
2352
+ logger.warn({
2353
+ sessionKey,
2354
+ beforeAllowedToolCount,
2355
+ afterAllowedToolCount: allowedTools.length,
2356
+ hardLimit: TOOL_SURFACE_HARD_LIMIT,
2357
+ bundles: toolRoute.bundles,
2358
+ }, 'SDK allowed tool surface exceeded hard limit; falling back to core Clementine tools for this interactive turn');
2359
+ }
2306
2360
  }
2307
2361
  // Permission mode: always 'bypassPermissions' — this is a daemon/harness with no interactive
2308
2362
  // terminal, so 'auto' mode (which requires plan support + human approval) doesn't apply.
@@ -2367,6 +2421,7 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
2367
2421
  systemPrompt: fullSystemPrompt,
2368
2422
  model: resolvedModel,
2369
2423
  ...(fallback ? { fallbackModel: fallback } : {}),
2424
+ ...(oneMillionContextDisabled() ? { betas: [] } : {}),
2370
2425
  permissionMode: effectivePermissionMode,
2371
2426
  allowDangerouslySkipPermissions: true,
2372
2427
  ...(sessionStore ? { sessionStore } : {}),
@@ -2417,7 +2472,14 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
2417
2472
  // env only when the prompt/job mentions a connector-backed service.
2418
2473
  // Per-MCP-server env isolation still happens inside each mcpServers
2419
2474
  // entry; this only affects the Claude Code subprocess itself.
2420
- ...(shouldInheritClaudeEnv ? {} : { env: SAFE_ENV }),
2475
+ ...(shouldInheritClaudeEnv ? {} : {
2476
+ env: {
2477
+ ...SAFE_ENV,
2478
+ ...(process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT !== undefined
2479
+ ? { CLAUDE_CODE_DISABLE_1M_CONTEXT: process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT }
2480
+ : {}),
2481
+ },
2482
+ }),
2421
2483
  // Avoid ambient Claude Code user/project/local settings and plugins by
2422
2484
  // default. Those can silently attach hundreds of tools. Explicit MCP
2423
2485
  // servers above still work; "all integrations/full tool surface" keeps
@@ -2479,28 +2541,18 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
2479
2541
  const useSearch = tier === 'search' || tier === 'full';
2480
2542
  const useDense = tier === 'full';
2481
2543
  const useProceduralAndGraph = tier === 'full';
2482
- // Pre-compute dense query embedding if the model is ready. Done outside
2483
- // searchContext (which is sync) so the dense path doesn't force the
2484
- // entire call chain to be async. If embedDense fails or isn't available,
2485
- // searchContext falls back to TF-IDF.
2486
- let queryDenseVec;
2487
- try {
2488
- if (useDense && embeddingsModule.isDenseReady()) {
2489
- const v = await embeddingsModule.embedDense(enrichedQuery, true);
2490
- if (v)
2491
- queryDenseVec = v;
2492
- }
2493
- }
2494
- catch { /* fallback to sparse */ }
2544
+ const searchOpts = {
2545
+ limit: tier === 'full' ? SEARCH_CONTEXT_LIMIT : Math.min(SEARCH_CONTEXT_LIMIT, 4),
2546
+ recencyLimit: tier === 'full' ? SEARCH_RECENCY_LIMIT : Math.min(SEARCH_RECENCY_LIMIT, 2),
2547
+ agentSlug,
2548
+ strict: strictIsolation,
2549
+ sessionKey: sessionKey ?? undefined,
2550
+ useDense,
2551
+ };
2495
2552
  const results = useSearch
2496
- ? this.memoryStore.searchContext(enrichedQuery, {
2497
- limit: tier === 'full' ? SEARCH_CONTEXT_LIMIT : Math.min(SEARCH_CONTEXT_LIMIT, 4),
2498
- recencyLimit: tier === 'full' ? SEARCH_RECENCY_LIMIT : Math.min(SEARCH_RECENCY_LIMIT, 2),
2499
- agentSlug,
2500
- strict: strictIsolation,
2501
- sessionKey: sessionKey ?? undefined,
2502
- queryDenseVec,
2503
- })
2553
+ ? await (this.memoryStore.searchContextAsync
2554
+ ? this.memoryStore.searchContextAsync(enrichedQuery, searchOpts)
2555
+ : Promise.resolve(this.memoryStore.searchContext(enrichedQuery, searchOpts)))
2504
2556
  : [];
2505
2557
  if (results?.length > 0) {
2506
2558
  const accessedIds = results
@@ -2770,11 +2822,12 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
2770
2822
  let effectivePrompt = text;
2771
2823
  const recentExchangesForIntent = key ? this.lastExchanges.get(key) : undefined;
2772
2824
  const intent = classifyIntent(text, recentExchangesForIntent);
2773
- const turnPolicy = decideTurnPolicy({
2825
+ const turnDecision = decideTurn({
2774
2826
  text,
2775
2827
  intent,
2776
2828
  hasRecentContext: !!(recentExchangesForIntent?.length || (key && this.sessions.has(key))),
2777
2829
  });
2830
+ const turnPolicy = turnDecision.policy;
2778
2831
  const suppressContextInjection = turnPolicy.suppressContextInjection === true;
2779
2832
  if (key && turnPolicy.suppressSessionResume) {
2780
2833
  this.sessions.delete(key);
@@ -2899,7 +2952,7 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
2899
2952
  intent: intent.type,
2900
2953
  confidence: intent.confidence,
2901
2954
  strategy: intent.suggestedStrategy,
2902
- turnPolicy,
2955
+ turnDecision,
2903
2956
  }, 'Intent classified');
2904
2957
  // If caller explicitly passed maxTurns (e.g. cron), respect it.
2905
2958
  // Otherwise apply the turn policy. Complex/routed turns still get their
@@ -3028,11 +3081,11 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
3028
3081
  // If a project override is set, skip auto-matching entirely
3029
3082
  const hasActiveSession = !!(sessionKey && this.sessions.has(sessionKey));
3030
3083
  const effectiveTurnPolicy = turnPolicy ?? (intentClassification
3031
- ? decideTurnPolicy({
3084
+ ? decideTurn({
3032
3085
  text: prompt,
3033
3086
  intent: intentClassification,
3034
3087
  hasRecentContext: hasActiveSession || ((sessionKey ? this.lastExchanges.get(sessionKey)?.length : 0) ?? 0) > 0,
3035
- })
3088
+ }).policy
3036
3089
  : undefined);
3037
3090
  const retrievalTier = effectiveTurnPolicy?.retrievalTier ?? 'full';
3038
3091
  const [rawContext, autoMatchedProject, linkContexts] = await Promise.all([
@@ -3110,6 +3163,9 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
3110
3163
  // Flipped true on the first intervention; subsequent replies go through
3111
3164
  // un-validated (but still logged).
3112
3165
  let contradictionRetried = false;
3166
+ let contextRecoveryRetries = 0;
3167
+ let noResponseRetried = false;
3168
+ let rotateSessionAfterTurn = false;
3113
3169
  try {
3114
3170
  for (let attempt = 0; attempt <= PersonalAssistant.RATE_LIMIT_MAX_RETRIES; attempt++) {
3115
3171
  const sdkOptions = await this.buildOptions({
@@ -3335,6 +3391,15 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
3335
3391
  sessionId = result.session_id;
3336
3392
  this._lastTerminalReason = result.terminal_reason ?? undefined;
3337
3393
  this.logQueryResult(result, 'chat', sessionKey ?? 'unknown', undefined, profile?.slug);
3394
+ const hiddenSessionTokens = resultInputTokens(result);
3395
+ if (sessionKey && hiddenSessionTokens >= SESSION_ROTATE_INPUT_TOKENS) {
3396
+ rotateSessionAfterTurn = true;
3397
+ logger.warn({
3398
+ sessionKey,
3399
+ inputTokens: hiddenSessionTokens,
3400
+ threshold: SESSION_ROTATE_INPUT_TOKENS,
3401
+ }, 'SDK session near context ceiling — will rotate after this turn');
3402
+ }
3338
3403
  if (result.is_error) {
3339
3404
  // Error subtypes have `errors` array; success subtype has `result` string
3340
3405
  const errorText = 'errors' in result ? result.errors.join('; ') : ('result' in result ? result.result : '');
@@ -3351,6 +3416,19 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
3351
3416
  `• Reply "deep mode" to queue this as a background task with a bigger budget\n` +
3352
3417
  `• Raise the cap permanently: \`clementine config set BUDGET_CHAT_USD 10\` then \`clementine restart\``);
3353
3418
  }
3419
+ else if (isCreditBalanceError(errorText)) {
3420
+ markBackgroundCreditBlocked(errorText);
3421
+ responseText = responseText || ('Claude says the account credit balance is too low. I paused background jobs for a few hours so they stop draining/retrying, but interactive chat will also fail until credits are available again.');
3422
+ }
3423
+ else if (looksLikeOneMillionContextError(errorText)) {
3424
+ process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT = '1';
3425
+ if (sessionKey) {
3426
+ this.sessions.delete(sessionKey);
3427
+ this.exchangeCounts.set(sessionKey, 0);
3428
+ this._compactedSessions.delete(sessionKey);
3429
+ }
3430
+ responseText = responseText || ("Claude rejected the 1M context beta for this account. I've disabled 1M context for this process and reset the session. To persist the fix across restarts, run `clementine config doctor --fix`, then `clementine restart`.");
3431
+ }
3354
3432
  else if (lower.includes('rate') && lower.includes('limit')) {
3355
3433
  hitRateLimit = true;
3356
3434
  }
@@ -3469,6 +3547,19 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
3469
3547
  responseText += '\n\nI ran out of time but here\'s what I have so far. Want me to continue?';
3470
3548
  }
3471
3549
  }
3550
+ else if (isCreditBalanceError(e)) {
3551
+ markBackgroundCreditBlocked(e);
3552
+ responseText = responseText || ('Claude says the account credit balance is too low. I paused background jobs for a few hours so they stop draining/retrying, but interactive chat will also fail until credits are available again.');
3553
+ }
3554
+ else if (looksLikeOneMillionContextError(e)) {
3555
+ process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT = '1';
3556
+ if (sessionKey) {
3557
+ this.sessions.delete(sessionKey);
3558
+ this.exchangeCounts.set(sessionKey, 0);
3559
+ this._compactedSessions.delete(sessionKey);
3560
+ }
3561
+ responseText = responseText || ("Claude rejected the 1M context beta for this account. I've disabled 1M context for this process and reset the session. To persist the fix across restarts, run `clementine config doctor --fix`, then `clementine restart`.");
3562
+ }
3472
3563
  else if (errStr.includes('rate') && (errStr.includes('limit') || errStr.includes('rate_limit'))) {
3473
3564
  hitRateLimit = true;
3474
3565
  // Try to respect any retry hint the server surfaced in the error text.
@@ -3501,7 +3592,8 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
3501
3592
  this.exchangeCounts.set(sessionKey, 0);
3502
3593
  this._compactedSessions.delete(sessionKey);
3503
3594
  }
3504
- if (attempt < PersonalAssistant.RATE_LIMIT_MAX_RETRIES) {
3595
+ if (attempt < PersonalAssistant.RATE_LIMIT_MAX_RETRIES && contextRecoveryRetries < 1) {
3596
+ contextRecoveryRetries++;
3505
3597
  prompt = buildContextRecoveredPrompt(prompt, preRotationSnapshot);
3506
3598
  preRotationSnapshot = null;
3507
3599
  responseText = '';
@@ -3554,11 +3646,22 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
3554
3646
  if (staleSession && attempt < PersonalAssistant.RATE_LIMIT_MAX_RETRIES) {
3555
3647
  responseText = '';
3556
3648
  if (contextRecovery) {
3557
- prompt = buildContextRecoveredPrompt(prompt, preRotationSnapshot);
3558
- preRotationSnapshot = null;
3559
- contextRecovery = false;
3649
+ if (contextRecoveryRetries >= 1) {
3650
+ responseText = contextThrashRecoveryNotice();
3651
+ staleSession = false;
3652
+ contextRecovery = false;
3653
+ }
3654
+ else {
3655
+ contextRecoveryRetries++;
3656
+ prompt = buildContextRecoveredPrompt(prompt, preRotationSnapshot);
3657
+ preRotationSnapshot = null;
3658
+ contextRecovery = false;
3659
+ continue;
3660
+ }
3661
+ }
3662
+ else {
3663
+ continue;
3560
3664
  }
3561
- continue;
3562
3665
  }
3563
3666
  if (staleSession && contextRecovery && !responseText.trim()) {
3564
3667
  responseText = contextThrashRecoveryNotice();
@@ -3588,16 +3691,36 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
3588
3691
  this.exchangeCounts.set(sessionKey, 0);
3589
3692
  this._compactedSessions.delete(sessionKey);
3590
3693
  }
3591
- if (attempt < PersonalAssistant.RATE_LIMIT_MAX_RETRIES) {
3592
- prompt = buildContextRecoveredPrompt(prompt, {
3593
- toolCalls: stallGuard?.getToolCalls() ?? [],
3594
- partialText: '',
3595
- });
3694
+ if (attempt < PersonalAssistant.RATE_LIMIT_MAX_RETRIES && contextRecoveryRetries < 1) {
3695
+ contextRecoveryRetries++;
3696
+ prompt = buildContextRecoveredPrompt(prompt, preRotationSnapshot);
3697
+ preRotationSnapshot = null;
3596
3698
  responseText = '';
3597
3699
  continue;
3598
3700
  }
3599
3701
  responseText = contextThrashRecoveryNotice();
3600
3702
  }
3703
+ if (looksLikeNoResponseRequested(responseText)) {
3704
+ logger.warn({ sessionKey, attempt }, 'SDK/model returned no-response sentinel during interactive chat');
3705
+ if (!noResponseRetried && attempt < PersonalAssistant.RATE_LIMIT_MAX_RETRIES) {
3706
+ noResponseRetried = true;
3707
+ if (sessionKey) {
3708
+ this.sessions.delete(sessionKey);
3709
+ this.exchangeCounts.set(sessionKey, 0);
3710
+ this._compactedSessions.delete(sessionKey);
3711
+ }
3712
+ prompt =
3713
+ `[RESPONSE REQUIRED]\n` +
3714
+ `This is an interactive user message. The previous attempt returned "No response requested", which is invalid for a direct chat turn.\n\n` +
3715
+ `Answer the user's message directly and briefly. If you need more information, ask one clear question.\n\n` +
3716
+ `User message:\n${prompt}`;
3717
+ responseText = '';
3718
+ sessionId = '';
3719
+ rotateSessionAfterTurn = false;
3720
+ continue;
3721
+ }
3722
+ responseText = "I'm here. What would you like me to do?";
3723
+ }
3601
3724
  // ── Response guarantee ─────────────────────────────────────────
3602
3725
  // The model often generates 30+ tool calls with minimal/no text. Ensure
3603
3726
  // the user always gets a substantive response after real work is done.
@@ -3618,9 +3741,15 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
3618
3741
  responseText = `I started working on that (${toolCalls.length} tool calls). The gateway should be continuing this in the background.`;
3619
3742
  }
3620
3743
  }
3621
- if (sessionKey && sessionId) {
3744
+ if (sessionKey && sessionId && !rotateSessionAfterTurn) {
3622
3745
  this.sessions.set(sessionKey, sessionId);
3623
3746
  }
3747
+ else if (sessionKey && rotateSessionAfterTurn) {
3748
+ this.sessions.delete(sessionKey);
3749
+ this.exchangeCounts.set(sessionKey, 0);
3750
+ this._compactedSessions.delete(sessionKey);
3751
+ logger.info({ sessionKey }, 'Rotated SDK session after high-token turn');
3752
+ }
3624
3753
  // Log tool calls to transcript for audit trail
3625
3754
  if (sessionKey && toolCalls.length > 0 && this.memoryStore) {
3626
3755
  try {
@@ -4675,7 +4804,21 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
4675
4804
  }
4676
4805
  }
4677
4806
  else if (message.type === 'result') {
4678
- this.logQueryResult(message, 'heartbeat', 'heartbeat');
4807
+ const result = message;
4808
+ if (result.is_error) {
4809
+ const errText = 'errors' in result
4810
+ ? result.errors.join('; ')
4811
+ : String(result.result ?? '');
4812
+ if (isCreditBalanceError(errText)) {
4813
+ markBackgroundCreditBlocked(errText);
4814
+ throw new Error(errText);
4815
+ }
4816
+ if (looksLikeOneMillionContextError(errText)) {
4817
+ process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT = '1';
4818
+ throw new Error(errText);
4819
+ }
4820
+ }
4821
+ this.logQueryResult(result, 'heartbeat', 'heartbeat');
4679
4822
  }
4680
4823
  else if (message.type === 'system') {
4681
4824
  this.captureMcpStatus(message);
@@ -5006,12 +5149,22 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
5006
5149
  // "budget" was catching Anthropic's unrelated "does not support
5007
5150
  // user-configurable task budgets" error and pinning perfectly
5008
5151
  // healthy Haiku jobs as permanent failures.
5009
- if (result.is_error && 'result' in result) {
5010
- const exitText = String(result.result ?? '');
5152
+ if (result.is_error) {
5153
+ const exitText = 'errors' in result
5154
+ ? result.errors.join('; ')
5155
+ : String(result.result ?? '');
5011
5156
  if (exitText.includes('max_budget_usd')) {
5012
5157
  logger.warn({ job: jobName }, 'Cron job hit dollar budget cap — treating as permanent error');
5013
5158
  throw new Error(`Budget exceeded for cron job '${jobName}'`);
5014
5159
  }
5160
+ if (isCreditBalanceError(exitText)) {
5161
+ markBackgroundCreditBlocked(exitText);
5162
+ throw new Error(exitText);
5163
+ }
5164
+ if (looksLikeOneMillionContextError(exitText)) {
5165
+ process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT = '1';
5166
+ throw new Error(exitText);
5167
+ }
5015
5168
  }
5016
5169
  this.logQueryResult(result, 'cron', `cron:${jobName}`, jobName, sdkOptions.env?.CLEMENTINE_TEAM_AGENT || undefined);
5017
5170
  }
@@ -22,6 +22,9 @@ const DEEP_MODE_ASKS = [
22
22
  /\brun\s+in\s+the\s+background\b/i,
23
23
  /\bdeep\s+(mode|dive|work)\b/i,
24
24
  /\bbackground\s+(task|work|job)\b/i,
25
+ /\bkeep\s+working\b/i,
26
+ /\bdon'?t\s+stop\b/i,
27
+ /\buntil\s+(it'?s\s+)?(done|finished|complete|fixed)\b/i,
25
28
  /\btake\s+your\s+time\b/i,
26
29
  ];
27
30
  /**
@@ -7,8 +7,8 @@
7
7
  * when consecutiveErrors >= 3), classifies the failure pattern from
8
8
  * recentErrors, and either:
9
9
  *
10
- * - Auto-applies a safe cron-config fix (mode, max_hours, max_turns)
11
- * and DMs the OWNING agent via their bot
10
+ * - Writes a proposal for safe cron-config fixes by default so the owner
11
+ * can approve before Clementine edits CRON.md
12
12
  * - Writes a proposal to self-improve/pending-changes/ and DMs the
13
13
  * owning agent the diagnosis (full audit-inbox button approval is
14
14
  * a separate Phase 8b ship)
@@ -91,6 +91,11 @@ export interface SelfImproveLoopOptions {
91
91
  * call tick() directly without racing the watcher.
92
92
  */
93
93
  disableWatch?: boolean;
94
+ /**
95
+ * Opt into the legacy behavior where recognized low-risk CRON.md scalar
96
+ * edits are applied immediately. Default false: write a pending proposal.
97
+ */
98
+ allowAutoApplySafeFixes?: boolean;
94
99
  }
95
100
  export declare function classifyFailure(recentErrors: string[]): FixRecipe;
96
101
  export declare class SelfImproveLoop {
@@ -101,6 +106,7 @@ export declare class SelfImproveLoop {
101
106
  private readonly agentsDir;
102
107
  private readonly dispatcher;
103
108
  private readonly watchEnabled;
109
+ private readonly allowAutoApplySafeFixes;
104
110
  private timer;
105
111
  private watcher;
106
112
  private debounceTimer;
@@ -7,8 +7,8 @@
7
7
  * when consecutiveErrors >= 3), classifies the failure pattern from
8
8
  * recentErrors, and either:
9
9
  *
10
- * - Auto-applies a safe cron-config fix (mode, max_hours, max_turns)
11
- * and DMs the OWNING agent via their bot
10
+ * - Writes a proposal for safe cron-config fixes by default so the owner
11
+ * can approve before Clementine edits CRON.md
12
12
  * - Writes a proposal to self-improve/pending-changes/ and DMs the
13
13
  * owning agent the diagnosis (full audit-inbox button approval is
14
14
  * a separate Phase 8b ship)
@@ -239,6 +239,7 @@ export class SelfImproveLoop {
239
239
  agentsDir;
240
240
  dispatcher;
241
241
  watchEnabled;
242
+ allowAutoApplySafeFixes;
242
243
  timer = null;
243
244
  watcher = null;
244
245
  debounceTimer = null;
@@ -252,6 +253,7 @@ export class SelfImproveLoop {
252
253
  this.cronPath = opts.cronPath ?? CRON_PATH;
253
254
  this.agentsDir = opts.agentsDir ?? AGENTS_ROOT;
254
255
  this.watchEnabled = opts.disableWatch !== true;
256
+ this.allowAutoApplySafeFixes = opts.allowAutoApplySafeFixes === true;
255
257
  }
256
258
  start() {
257
259
  if (this.running)
@@ -394,6 +396,37 @@ export class SelfImproveLoop {
394
396
  logger.warn({ jobName: trigger.jobName, agentSlug }, 'Job not found in any CRON.md — cannot apply fix');
395
397
  return;
396
398
  }
399
+ const wouldChange = recipe.apply ? recipe.apply({ ...lookup.job }) : true;
400
+ if (!wouldChange) {
401
+ counts.noop++;
402
+ logger.info({ jobName: trigger.jobName, agentSlug }, 'Fix recipe is already in place — trigger removed without further action');
403
+ this.logAutonomy('fix_noop', trigger, { reason: 'already-applied' });
404
+ return;
405
+ }
406
+ if (!this.allowAutoApplySafeFixes) {
407
+ const id = `proposal-${Date.now()}-${trigger.jobName.replace(/[^a-z0-9-]/gi, '_')}`;
408
+ const record = {
409
+ id,
410
+ jobName: trigger.jobName,
411
+ ...(agentSlug ? { agentSlug } : {}),
412
+ category: recipe.category,
413
+ description: recipe.description,
414
+ recentErrors: trigger.recentErrors,
415
+ consecutiveErrors: trigger.consecutiveErrors,
416
+ proposedAt: new Date().toISOString(),
417
+ };
418
+ const file = writePendingChange(record, this.pendingDir);
419
+ counts.pending++;
420
+ this.logAutonomy('proposal_written', trigger, { category: recipe.category, proposalId: id, autoApplyAllowed: false });
421
+ await this.notifyAgent(agentSlug, [
422
+ `⚠️ **${trigger.jobName}** has failed ${trigger.consecutiveErrors} times in a row.`,
423
+ '',
424
+ recipe.description,
425
+ '',
426
+ `Fix proposal saved to \`${file}\`. Review and approve before editing CRON.md.`,
427
+ ].join('\n'));
428
+ return;
429
+ }
397
430
  const prevFields = applyCronEdit(lookup, recipe);
398
431
  if (prevFields) {
399
432
  counts.applied++;
@@ -28,6 +28,7 @@ interface ToolBundleDefinition {
28
28
  inheritFullClaudeEnv?: boolean;
29
29
  }
30
30
  export declare const TOOL_SURFACE_WARN_THRESHOLD = 150;
31
+ export declare const TOOL_SURFACE_HARD_LIMIT = 220;
31
32
  export declare const TOOL_BUNDLES: readonly ToolBundleDefinition[];
32
33
  export declare function routeToolSurface(text: string | undefined): ToolRouteDecision;
33
34
  export {};
@@ -7,6 +7,7 @@
7
7
  * broader access.
8
8
  */
9
9
  export const TOOL_SURFACE_WARN_THRESHOLD = 150;
10
+ export const TOOL_SURFACE_HARD_LIMIT = 220;
10
11
  export const TOOL_BUNDLES = [
11
12
  {
12
13
  id: 'email_outlook',
@@ -78,7 +79,7 @@ export const TOOL_BUNDLES = [
78
79
  },
79
80
  {
80
81
  id: 'browser',
81
- patterns: [/\b(browser|playwright|localhost|web page|webpage|screenshot|click|fill form|navigate)\b/i],
82
+ patterns: [/\b(playwright|localhost|127\.0\.0\.1|web\s?page|webpage|website|screenshot|click|fill(?: out)? form|navigate to|open .*browser|use .*browser|inspect .*page)\b/i],
82
83
  externalMcpServers: ['browser-harness', 'browsermcp', 'playwright', 'kernel', 'plugin:playwright:playwright'],
83
84
  },
84
85
  {
@@ -5,8 +5,10 @@
5
5
  * small prompt, but any sign of memory dependence, tool work, or ambiguity
6
6
  * promotes the request to a richer path.
7
7
  */
8
+ import { type ToolRouteDecision } from './tool-router.js';
8
9
  import type { IntentClassification } from './intent-classifier.js';
9
10
  export type RetrievalTier = 'none' | 'core' | 'search' | 'full';
11
+ export type TurnExecutionMode = 'local' | 'lightweight_llm' | 'tool_llm' | 'background';
10
12
  export interface TurnPolicy {
11
13
  retrievalTier: RetrievalTier;
12
14
  disableAllTools: boolean;
@@ -27,6 +29,14 @@ export interface TurnPolicyInput {
27
29
  hasRecentContext: boolean;
28
30
  isAutonomous?: boolean;
29
31
  }
32
+ export interface TurnDecision {
33
+ mode: TurnExecutionMode;
34
+ policy: TurnPolicy;
35
+ toolRoute: ToolRouteDecision;
36
+ userVisibleStatus: string;
37
+ reason: string;
38
+ }
30
39
  export declare function isStandaloneGreeting(text: string): boolean;
31
40
  export declare function decideTurnPolicy(input: TurnPolicyInput): TurnPolicy;
41
+ export declare function decideTurn(input: TurnPolicyInput): TurnDecision;
32
42
  //# sourceMappingURL=turn-policy.d.ts.map