npm - clementine-agent - Versions diffs - 1.18.19 → 1.18.21 - Mend

clementine-agent 1.18.19 → 1.18.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/README.md +17 -0
package/dist/agent/action-enforcer.d.ts +29 -0
package/dist/agent/action-enforcer.js +120 -0
package/dist/agent/assistant.d.ts +14 -0
package/dist/agent/assistant.js +190 -35
package/dist/agent/auto-update.js +46 -2
package/dist/agent/local-turn.d.ts +16 -0
package/dist/agent/local-turn.js +54 -1
package/dist/agent/route-classifier.d.ts +1 -0
package/dist/agent/route-classifier.js +30 -3
package/dist/agent/toolsets.d.ts +14 -0
package/dist/agent/toolsets.js +68 -0
package/dist/brain/ingestion-pipeline.d.ts +7 -0
package/dist/brain/ingestion-pipeline.js +107 -21
package/dist/channels/discord.js +38 -7
package/dist/channels/telegram.js +5 -6
package/dist/cli/dashboard.js +112 -6
package/dist/cli/index.js +174 -0
package/dist/cli/ingest.js +8 -2
package/dist/gateway/context-hygiene.d.ts +17 -0
package/dist/gateway/context-hygiene.js +31 -0
package/dist/gateway/heartbeat-scheduler.d.ts +20 -0
package/dist/gateway/heartbeat-scheduler.js +27 -10
package/dist/gateway/router.d.ts +8 -1
package/dist/gateway/router.js +326 -12
package/dist/gateway/turn-ledger.d.ts +32 -0
package/dist/gateway/turn-ledger.js +55 -0
package/dist/memory/embeddings.d.ts +2 -0
package/dist/memory/embeddings.js +8 -1
package/dist/memory/store.d.ts +88 -1
package/dist/memory/store.js +349 -18
package/dist/memory/write-queue.d.ts +16 -0
package/dist/memory/write-queue.js +5 -0
package/dist/tools/shared.d.ts +89 -0
package/dist/types.d.ts +11 -0
package/package.json +1 -1
package/scripts/postinstall.js +56 -6

package/dist/agent/assistant.js CHANGED Viewed

@@ -21,7 +21,7 @@ import { detectFrustrationSignals, detectRepeatedTopics } from './insight-engine
 import { DEFAULT_CHANNEL_CAPABILITIES } from '../types.js';
 import { enforceToolPermissions, getSecurityPrompt, getHeartbeatSecurityPrompt, getCronSecurityPrompt, getHeartbeatDisallowedTools, logToolUse, setProfileTier, setProfileAllowedTools, setAgentDir, setSendPolicy, setInteractionSource, logAuditJsonl, } from './hooks.js';
 import { scanner } from '../security/scanner.js';
-import { agentWorkingMemoryFile, listAllGoals } from '../tools/shared.js';
+import { agentWorkingMemoryFile, capOutput, listAllGoals } from '../tools/shared.js';
 import { AgentManager } from './agent-manager.js';
 import { extractLinks } from './link-extractor.js';
 import { StallGuard } from './stall-guard.js';
@@ -33,6 +33,8 @@ import { searchSkills as searchSkillsSync } from './skill-extractor.js';
 import { classifyIntent, getStrategyGuidance } from './intent-classifier.js';
 import { getEventLog } from './session-event-log.js';
 import { routeToolSurface, TOOL_SURFACE_HARD_LIMIT, TOOL_SURFACE_WARN_THRESHOLD } from './tool-router.js';
+import { isRestrictedToolset, toolsetAllowsLocalWrites } from './toolsets.js';
+import { looksLikeApprovalPrompt } from './local-turn.js';
 import { decideTurn } from './turn-policy.js';
 import { loadClementineJson } from '../config/clementine-json.js';
 import { isCreditBalanceError, markBackgroundCreditBlocked } from '../gateway/credit-guard.js';
@@ -294,9 +296,23 @@ const query = ((args) => {
     }
     return rawQuery(args);
 });
+function parseMemoryTimestampMs(value) {
+    const text = String(value ?? '').trim();
+    if (!text)
+        return NaN;
+    // SQLite datetime('now') returns UTC as "YYYY-MM-DD HH:mm:ss" with no zone.
+    // Parse it explicitly as UTC so summaries don't appear hours in the future.
+    if (/^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}$/.test(text)) {
+        return Date.parse(`${text.replace(' ', 'T')}Z`);
+    }
+    return Date.parse(text);
+}
 /** Format a millisecond duration as a human-friendly "X ago" string. */
-function formatTimeAgo(ms) {
-    const minutes = Math.floor(ms / 60_000);
+export function formatTimeAgo(ms) {
+    const safeMs = Number.isFinite(ms) ? Math.max(0, ms) : 0;
+    if (safeMs < 60_000)
+        return 'just now';
+    const minutes = Math.floor(safeMs / 60_000);
     if (minutes < 60)
         return `${minutes}m ago`;
     const hours = Math.floor(minutes / 60);
@@ -311,6 +327,11 @@ function formatTimeAgo(ms) {
 const CONTEXT_GUARD_MIN_TOKENS = 16_000;
 /** Warn threshold — context is getting tight. */
 const CONTEXT_GUARD_WARN_TOKENS = 32_000;
+const PENDING_CONTEXT_USER_MAX_CHARS = 1000;
+const PENDING_CONTEXT_ASSISTANT_MAX_CHARS = 3000;
+const CRON_PROGRESS_NOTES_MAX_CHARS = 2000;
+const CRON_PROGRESS_PENDING_MAX_ITEMS = 20;
+const CRON_PROGRESS_ITEM_MAX_CHARS = 300;
 /** Rotate SDK sessions before hidden resume history approaches the 200K cap. */
 const SESSION_ROTATE_INPUT_TOKENS = 140_000;
 /** Approximate context window sizes by model family. */
@@ -328,6 +349,12 @@ function getContextWindow(model) {
     }
     return 200_000; // safe default
 }
+function capContextBlock(text, maxChars) {
+    return capOutput(String(text ?? ''), maxChars);
+}
+function capContextItem(text) {
+    return capContextBlock(text, CRON_PROGRESS_ITEM_MAX_CHARS).replace(/\s+/g, ' ').trim();
+}
 function resultInputTokens(result) {
     let total = 0;
     const modelUsage = result.modelUsage;
@@ -343,6 +370,15 @@ function resultInputTokens(result) {
 export function looksLikeOneMillionContextError(value) {
     return looksLikeClaudeOneMillionContextError(value);
 }
+export function oneMillionContextRecoveryMessage() {
+    return "Claude rejected 1M context for this account. I've switched Clementine to persistent 200K recovery mode and reset the session. Restart Clementine once so every background worker starts with the same safe setting.";
+}
+export function looksLikeProviderApiErrorResponse(value) {
+    const text = String(value ?? '').trim();
+    return /^api error:/i.test(text)
+        || /^error:\s*api error:/i.test(text)
+        || looksLikeOneMillionContextError(text);
+}
 export function looksLikeNoResponseRequested(value) {
     const text = String(value ?? '').trim();
     return /^no response requested\.?$/i.test(text);
@@ -1310,6 +1346,10 @@ export class PersonalAssistant {
     getExchangeCount(sessionKey) {
         return this.exchangeCounts.get(sessionKey) ?? 0;
     }
+    hasRecentApprovalPrompt(sessionKey) {
+        const lastAssistant = this.lastExchanges.get(sessionKey)?.at(-1)?.assistant ?? '';
+        return looksLikeApprovalPrompt(lastAssistant);
+    }
     getMemoryChunkCount() {
         if (!this.memoryStore)
             return 0;
@@ -1941,7 +1981,7 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
     }
     // ── Build SDK Options ─────────────────────────────────────────────
     async buildOptions(opts = {}) {
-        const { isHeartbeat = false, cronTier = null, maxTurns = null, model = null, enableTeams = true, retrievalContext = '', profile = null, sessionKey = null, streaming = false, isPlanStep = false, isUnleashed = false, sourceOverride, disableAllTools = false, verboseLevel, abortController, effort, maxBudgetUsd, toolScopeText, thinking, outputFormat, stallGuard, intentClassification, turnPolicy, contextRoutingText, } = opts;
+        const { isHeartbeat = false, cronTier = null, maxTurns = null, model = null, enableTeams = true, retrievalContext = '', profile = null, sessionKey = null, streaming = false, isPlanStep = false, isUnleashed = false, sourceOverride, disableAllTools = false, verboseLevel, abortController, effort, maxBudgetUsd, toolScopeText, thinking, outputFormat, stallGuard, intentClassification, turnPolicy, contextRoutingText, toolset = 'auto', } = opts;
         const isCron = cronTier !== null;
         const toolsDisabledForCall = disableAllTools || (isHeartbeat && !isCron);
         const promptScopeText = toolScopeText ?? '';
@@ -1992,7 +2032,27 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
         const safeContextToolRoute = allowContextToolRoute && !contextToolRoute.fullSurface
             ? contextToolRoute
             : emptyToolRoute();
-        const toolRoute = mergeToolRoutes(promptToolRoute, mergeToolRoutes(safeProfileToolRoute, safeContextToolRoute));
+        let toolRoute = mergeToolRoutes(promptToolRoute, mergeToolRoutes(safeProfileToolRoute, safeContextToolRoute));
+        if (toolset === 'full') {
+            toolRoute = {
+                bundles: [],
+                externalMcpServers: undefined,
+                composioToolkits: undefined,
+                inheritFullClaudeEnv: true,
+                fullSurface: true,
+                reason: 'full_surface',
+            };
+        }
+        else if (isRestrictedToolset(toolset)) {
+            toolRoute = {
+                ...toolRoute,
+                bundles: [],
+                externalMcpServers: [],
+                composioToolkits: [],
+                inheritFullClaudeEnv: false,
+                fullSurface: false,
+            };
+        }
         let allowedTools = [];
         const addAllowed = (...tools) => {
             for (const tool of tools) {
@@ -2012,9 +2072,13 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
         const memoryNeeded = autonomousToolRun
             || retrievalContext.trim().length > 0
             || (turnPolicy?.retrievalTier !== undefined && turnPolicy.retrievalTier !== 'none');
-        const localReadNeeded = taskIntent || /\b(repo|repository|code|file|files|folder|directory|path|log|logs|config|read|show|grep|diff|search)\b/i.test(promptScopeLower);
-        const localWriteNeeded = taskIntent || /\b(write|edit|fix|implement|refactor|build|test|run|npm|git|commit|push|pull|deploy|install|configure)\b/i.test(promptScopeLower);
-        const adminNeeded = toolRoute.fullSurface || /\b(self[- ]?update|restart|daemon|doctor|env|credential|integration|setup|set up|configure|npm publish|publish to npm)\b/i.test(promptScopeLower);
+        const localReadNeeded = taskIntent || toolset === 'diagnostic' || /\b(repo|repository|code|file|files|folder|directory|path|log|logs|config|read|show|grep|diff|search)\b/i.test(promptScopeLower);
+        const diagnosticCommandNeeded = toolset === 'diagnostic'
+            && /\b(run|test|npm|pnpm|yarn|node|git|logs?|tail|ps|status|diagnos(?:e|tic)|check)\b/i.test(promptScopeLower);
+        const localWriteNeeded = diagnosticCommandNeeded
+            || (toolsetAllowsLocalWrites(toolset) && (taskIntent || /\b(write|edit|fix|implement|refactor|build|test|run|npm|git|commit|push|pull|deploy|install|configure)\b/i.test(promptScopeLower)));
+        const adminNeeded = toolRoute.fullSurface
+            || (toolsetAllowsLocalWrites(toolset) && /\b(self[- ]?update|restart|daemon|doctor|env|credential|integration|setup|set up|configure|npm publish|publish to npm)\b/i.test(promptScopeLower));
         if (!toolsDisabledForCall) {
             if (toolRoute.fullSurface) {
                 addAllowed('Read', 'Write', 'Edit', 'Bash', 'Glob', 'Grep', 'WebSearch', 'WebFetch');
@@ -2023,8 +2087,12 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
             else {
                 if (localReadNeeded)
                     addAllowed('Read', 'Glob', 'Grep');
-                if (localWriteNeeded)
-                    addAllowed('Write', 'Edit', 'Bash');
+                if (localWriteNeeded) {
+                    if (toolset === 'diagnostic')
+                        addAllowed('Bash');
+                    else
+                        addAllowed('Write', 'Edit', 'Bash');
+                }
                 if (toolRoute.bundles.includes('web_research') || toolRoute.bundles.includes('docs_lookup')) {
                     addAllowed('WebSearch', 'WebFetch');
                 }
@@ -2032,7 +2100,12 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                     addClementineTools(CLEMENTINE_CORE_TOOL_NAMES);
                     addClementineTools(CLEMENTINE_RELATIONSHIP_TOOL_NAMES);
                 }
-                if (taskIntent || intentClassification?.type === 'correction') {
+                const clementineMemoryWritesAllowed = toolset === 'auto'
+                    || toolset === 'full'
+                    || toolset === 'communications'
+                    || intentClassification?.type === 'feedback'
+                    || intentClassification?.type === 'correction';
+                if ((taskIntent || intentClassification?.type === 'correction') && clementineMemoryWritesAllowed) {
                     addClementineTools(CLEMENTINE_MEMORY_WRITE_TOOL_NAMES);
                     addClementineTools(CLEMENTINE_WORKSPACE_TOOL_NAMES);
                 }
@@ -2049,20 +2122,22 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                     addClementineTools(CLEMENTINE_INTEGRATION_TOOL_NAMES);
                     addClementineTools(CLEMENTINE_ADMIN_TOOL_NAMES);
                 }
-                if (toolRoute.bundles.includes('email_outlook') || /\b(outlook|email|mailbox|inbox|calendar|follow-?up)\b/i.test(scopeText)) {
+                if ((toolset === 'auto' || toolset === 'full' || toolset === 'communications')
+                    && (toolRoute.bundles.includes('email_outlook') || /\b(outlook|email|mailbox|inbox|calendar|follow-?up)\b/i.test(scopeText))) {
                     addClementineTools(CLEMENTINE_COMM_TOOL_NAMES);
                 }
-                if (toolRoute.bundles.includes('github') || toolRoute.bundles.includes('browser') || toolRoute.bundles.includes('web_research')) {
+                if ((toolset === 'auto' || toolset === 'full')
+                    && (toolRoute.bundles.includes('github') || toolRoute.bundles.includes('browser') || toolRoute.bundles.includes('web_research'))) {
                     addClementineTools(CLEMENTINE_RESEARCH_TOOL_NAMES);
                 }
-                if (enableTeams) {
+                if (enableTeams && (toolset === 'auto' || toolset === 'full')) {
                     addAllowed('Task', 'Agent');
                     addClementineTools(CLEMENTINE_TEAM_TOOL_NAMES);
                     addClementineTools(CLEMENTINE_JOB_TOOL_NAMES);
                 }
             }
             // Include local user scripts/plugins for task-like or explicit full-surface turns.
-            if (taskIntent || toolRoute.fullSurface || adminNeeded) {
+            if (toolsetAllowsLocalWrites(toolset) && (taskIntent || toolRoute.fullSurface || adminNeeded)) {
                 try {
                     const toolsDir = path.join(BASE_DIR, 'tools');
                     const pluginsDir = path.join(BASE_DIR, 'plugins');
@@ -2405,6 +2480,7 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
             isolateClaudeConfig,
             inheritFullClaudeEnv: shouldInheritClaudeEnv,
             maxBudgetUsd: enforcedBudget,
+            toolset,
             isCron,
             cronTier,
             isPlanStep,
@@ -2797,6 +2873,7 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
         const projectOverride = options?.projectOverride;
         const verboseLevel = options?.verboseLevel;
         const abortController = options?.abortController;
+        const toolset = options?.toolset ?? 'auto';
         const key = sessionKey ?? undefined;
         this._lastUserMessage = text;
         let sessionRotated = false;
@@ -2897,11 +2974,14 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
             const exchanges = this.lastExchanges.get(key) ?? [];
             if (exchanges.length === 0 && this.memoryStore) {
                 try {
-                    const recentSummaries = this.memoryStore.getRecentSummaries(1);
+                    const recentSummaries = typeof this.memoryStore.getRecentSummariesForSession === 'function'
+                        ? this.memoryStore.getRecentSummariesForSession(key, 1)
+                        : this.memoryStore.getRecentSummaries(5).filter((s) => s.sessionKey === key).slice(0, 1);
                     if (recentSummaries.length > 0) {
                         const last = recentSummaries[0];
-                        const ageMs = Date.now() - new Date(last.createdAt).getTime();
-                        if (ageMs < 7 * 24 * 60 * 60 * 1000) { // within 7 days
+                        const createdAtMs = parseMemoryTimestampMs(last.createdAt);
+                        const ageMs = Date.now() - createdAtMs;
+                        if (Number.isFinite(ageMs) && ageMs >= -5 * 60_000 && ageMs < 7 * 24 * 60 * 60 * 1000) { // within 7 days
                             const ago = formatTimeAgo(ageMs);
                             effectivePrompt =
                                 `[Last conversation (${ago}):\n${last.summary.slice(0, 600)}]\n\n` +
@@ -2937,7 +3017,9 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
             if (allPending.length > 0) {
                 const contextLines = [];
                 for (const ctx of allPending) {
-                    contextLines.push(`[${ctx.user}]\n${ctx.assistant}`);
+                    const user = capContextBlock(ctx.user, PENDING_CONTEXT_USER_MAX_CHARS);
+                    const assistant = capContextBlock(ctx.assistant, PENDING_CONTEXT_ASSISTANT_MAX_CHARS);
+                    contextLines.push(`[${user}]\n${assistant}`);
                 }
                 effectivePrompt =
                     `[Since we last talked, you did some background work. Naturally mention what happened — lead with anything that needs attention, briefly note routine completions. Don't dump raw tool calls or list job names. Be conversational.\nBackground:\n${contextLines.join('\n\n')}]\n\n${effectivePrompt}`;
@@ -2966,7 +3048,7 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
         const effectiveMaxTurns = maxTurns ?? turnPolicy.maxTurns;
         const CHAT_TIMEOUT_MS = 30 * 60 * 1000;
         const guard = new StallGuard();
-        let [responseText, sessionId] = await this.runQuery(effectivePrompt, key, onText, model, profile, securityAnnotation, effectiveMaxTurns, projectOverride, onToolActivity, verboseLevel, abortController, guard, CHAT_TIMEOUT_MS, intent, turnPolicy);
+        let [responseText, sessionId] = await this.runQuery(effectivePrompt, key, onText, model, profile, securityAnnotation, effectiveMaxTurns, projectOverride, onToolActivity, verboseLevel, abortController, guard, CHAT_TIMEOUT_MS, intent, turnPolicy, toolset);
         // If we got a context-length / prompt-too-long error, retry with a fresh session
         const errLower = responseText.toLowerCase();
         const isContextOverflow = errLower.includes('prompt is too long') ||
@@ -2987,12 +3069,12 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                         `If this task involves pulling data for multiple entities, delegate each to a sub-agent using the Agent tool ` +
                         `instead of calling data-heavy tools directly.\n\n${text}`;
             }
-            [responseText, sessionId] = await this.runQuery(retryPrompt, key, onText, model, profile, securityAnnotation, maxTurns, undefined, onToolActivity, verboseLevel, abortController, undefined, CHAT_TIMEOUT_MS, intent, turnPolicy);
+            [responseText, sessionId] = await this.runQuery(retryPrompt, key, onText, model, profile, securityAnnotation, maxTurns, undefined, onToolActivity, verboseLevel, abortController, undefined, CHAT_TIMEOUT_MS, intent, turnPolicy, toolset);
         }
         // Track exchange count, timestamp, and last exchange.
         // Never store API error responses — they poison session history and create
         // a self-reinforcing loop where every subsequent request replays the errors.
-        const isApiError = responseText.startsWith('Error:') && responseText.includes('API Error:');
+        const isApiError = looksLikeProviderApiErrorResponse(responseText);
         if (key && !isApiError) {
             this.exchangeCounts.set(key, (this.exchangeCounts.get(key) ?? 0) + 1);
             this.sessionTimestamps.set(key, new Date());
@@ -3081,7 +3163,7 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
     // ── Run Query ─────────────────────────────────────────────────────
     static RATE_LIMIT_MAX_RETRIES = 3;
     static RATE_LIMIT_BACKOFF = [5000, 15000, 30000];
-    async runQuery(prompt, sessionKey, onText, model, profile, securityAnnotation, maxTurnsOverride, projectOverride, onToolActivity, verboseLevel, abortController, stallGuard, timeoutMs, intentClassification, turnPolicy) {
+    async runQuery(prompt, sessionKey, onText, model, profile, securityAnnotation, maxTurnsOverride, projectOverride, onToolActivity, verboseLevel, abortController, stallGuard, timeoutMs, intentClassification, turnPolicy, toolset = 'auto') {
         // Parallelize context retrieval and project matching — they're independent
         // If a project override is set, skip auto-matching entirely
         const hasActiveSession = !!(sessionKey && this.sessions.has(sessionKey));
@@ -3188,6 +3270,7 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                     intentClassification,
                     turnPolicy: effectiveTurnPolicy,
                     effort: effectiveTurnPolicy?.effort ?? intentClassification?.suggestedEffort,
+                    toolset,
                     // Route destructive/admin/local write decisions from the direct user
                     // request only. Retrieved memory may still contribute integration
                     // continuity via contextRoutingText, but stale memories should not
@@ -3432,7 +3515,7 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                                             this.exchangeCounts.set(sessionKey, 0);
                                             this._compactedSessions.delete(sessionKey);
                                         }
-                                        responseText = responseText || ("Claude rejected 1M context for this account. I've switched Clementine to persistent 200K recovery mode and reset the session. Restart Clementine once so every background worker starts with the same safe setting.");
+                                        responseText = responseText || (oneMillionContextRecoveryMessage());
                                     }
                                     else if (lower.includes('rate') && lower.includes('limit')) {
                                         hitRateLimit = true;
@@ -3485,7 +3568,19 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                             else if ('result' in result && result.result) {
                                 // Success: use SDK result text if streaming didn't capture a substantive response
                                 const sdkResult = result.result;
-                                if (looksLikeContextThrashText(sdkResult)) {
+                                if (looksLikeOneMillionContextError(sdkResult)) {
+                                    logger.warn({ sessionKey }, '1M context error surfaced as SDK result text — forcing recovery');
+                                    applyOneMillionContextRecovery();
+                                    if (sessionKey) {
+                                        this.sessions.delete(sessionKey);
+                                        this.exchangeCounts.set(sessionKey, 0);
+                                        this._compactedSessions.delete(sessionKey);
+                                    }
+                                    responseText = oneMillionContextRecoveryMessage();
+                                    if (onText)
+                                        await onText(responseText);
+                                }
+                                else if (looksLikeContextThrashText(sdkResult)) {
                                     logger.warn({ sessionKey }, 'Autocompact thrashing surfaced as SDK result text — rotating session');
                                     preRotationSnapshot = {
                                         toolCalls: stallGuard?.getToolCalls() ?? [],
@@ -3563,7 +3658,7 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                             this.exchangeCounts.set(sessionKey, 0);
                             this._compactedSessions.delete(sessionKey);
                         }
-                        responseText = responseText || ("Claude rejected 1M context for this account. I've switched Clementine to persistent 200K recovery mode and reset the session. Restart Clementine once so every background worker starts with the same safe setting.");
+                        responseText = responseText || (oneMillionContextRecoveryMessage());
                     }
                     else if (errStr.includes('rate') && (errStr.includes('limit') || errStr.includes('rate_limit'))) {
                         hitRateLimit = true;
@@ -3960,18 +4055,20 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
      *
      * No LLM call — uses buildLocalSummary for instant summarization.
      */
-    compactContext(sessionKey) {
-        const summary = this.buildLocalSummary(sessionKey);
+    compactContext(sessionKey, reason = 'context_guard') {
+        const summary = this.buildStructuredCompactionSummary(sessionKey);
         if (!summary)
-            return;
+            return null;
         // Build compaction block for working memory
         const exchangeCount = this.exchangeCounts.get(sessionKey) ?? 0;
+        const parentSessionId = this.sessions.get(sessionKey) ?? null;
         const COMPACTION_START = '<!-- COMPACTION_START -->';
         const COMPACTION_END = '<!-- COMPACTION_END -->';
         const compactionBlock = [
             COMPACTION_START,
             `## Session Compaction (auto-generated)`,
             `Session ${sessionKey} compacted at ${exchangeCount} exchanges.`,
+            `Reason: ${reason}.`,
             ``,
             summary,
             ``,
@@ -4009,6 +4106,20 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
         catch {
             // If working memory write fails, still rotate — better than hitting the hard limit
         }
+        try {
+            this.memoryStore?.saveSessionSummary?.(sessionKey, summary, exchangeCount);
+            this.memoryStore?.recordSessionLineage?.({
+                sessionKey,
+                parentSessionId,
+                childSessionId: null,
+                reason,
+                summary,
+                exchangeCount,
+            });
+        }
+        catch {
+            // Durable lineage is helpful, not required for compaction safety.
+        }
         // Rotate session — clear the session ID so next query starts fresh
         // The working memory summary will provide continuity
         this.sessions.delete(sessionKey);
@@ -4017,6 +4128,14 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
         this.sessionTimestamps.delete(sessionKey);
         this.stallNudges.delete(sessionKey);
         this.saveSessions();
+        return summary;
+    }
+    compactSessionForGateway(sessionKey, reason = 'gateway_preflight') {
+        const exchangeCount = this.exchangeCounts.get(sessionKey) ?? 0;
+        const summary = this.compactContext(sessionKey, reason);
+        return summary
+            ? { compacted: true, exchangeCount, summary, reason }
+            : { compacted: false, exchangeCount, reason };
     }
     /**
      * Expire sessions inactive for more than 24 hours.
@@ -4038,7 +4157,39 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
      * to avoid blocking the user's query.
      */
     buildLocalSummary(sessionKey) {
-        return this.buildLocalSummaryFromTurns(this.lastExchanges.get(sessionKey) ?? []);
+        let exchanges = this.lastExchanges.get(sessionKey) ?? [];
+        if (exchanges.length === 0 && this.memoryStore && typeof this.memoryStore.getTranscriptTail === 'function') {
+            try {
+                const recent = this.memoryStore.getTranscriptTail(sessionKey, 0, SESSION_EXCHANGE_HISTORY_SIZE * 2);
+                exchanges = this.pairTranscriptTurns(recent ?? []);
+            }
+            catch {
+                exchanges = [];
+            }
+        }
+        return this.buildLocalSummaryFromTurns(exchanges);
+    }
+    buildStructuredCompactionSummary(sessionKey) {
+        const exchanges = this.lastExchanges.get(sessionKey) ?? [];
+        const summary = this.buildLocalSummary(sessionKey);
+        if (!summary)
+            return '';
+        const latest = exchanges.at(-1);
+        const lastUser = latest?.user
+            ? latest.user.slice(0, 400).replace(/\s+/g, ' ')
+            : '';
+        const continuity = [
+            '- Exact details remain in transcripts; use transcript_search before relying on this handoff for names, dates, IDs, files, or sent-message status.',
+            '- Keep tool outputs bounded and prefer targeted reads over full log dumps.',
+            lastUser ? `- Last visible user request: ${lastUser}` : '',
+        ].filter(Boolean);
+        return [
+            '### Recent Conversation',
+            summary,
+            '',
+            '### Continuity Notes',
+            continuity.join('\n'),
+        ].join('\n');
     }
     buildLocalSummaryFromTurns(turns, opts) {
         if (turns.length === 0)
@@ -4950,13 +5101,17 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                 const progress = JSON.parse(fs.readFileSync(progressFile, 'utf-8'));
                 const parts = [`## Previous Progress (run #${progress.runCount}, ${progress.lastRunAt})`];
                 if (progress.completedItems?.length > 0) {
-                    parts.push(`Completed: ${progress.completedItems.slice(-10).join(', ')}`);
+                    parts.push(`Completed: ${progress.completedItems.slice(-10).map(capContextItem).join(', ')}`);
                 }
                 if (progress.pendingItems?.length > 0) {
-                    parts.push(`Pending: ${progress.pendingItems.join(', ')}`);
+                    const pendingItems = progress.pendingItems.slice(0, CRON_PROGRESS_PENDING_MAX_ITEMS).map(capContextItem);
+                    const suffix = progress.pendingItems.length > CRON_PROGRESS_PENDING_MAX_ITEMS
+                        ? ` (${progress.pendingItems.length - CRON_PROGRESS_PENDING_MAX_ITEMS} more omitted)`
+                        : '';
+                    parts.push(`Pending: ${pendingItems.join(', ')}${suffix}`);
                 }
                 if (progress.notes) {
-                    parts.push(`Notes: ${progress.notes}`);
+                    parts.push(`Notes: ${capContextBlock(progress.notes, CRON_PROGRESS_NOTES_MAX_CHARS)}`);
                 }
                 progressContext = parts.join('\n') + '\n\n' +
                     'Continue from where you left off. Use `cron_progress_write` at the end to save what you completed and what\'s pending.\n\n';
@@ -5978,8 +6133,8 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
      * so follow-up conversation has context.
      */
     injectContext(sessionKey, userText, assistantText) {
-        const trimmedUser = userText.slice(0, INJECTED_CONTEXT_MAX_CHARS);
-        const trimmedAssistant = assistantText.slice(0, INJECTED_CONTEXT_MAX_CHARS);
+        const trimmedUser = capContextBlock(userText, INJECTED_CONTEXT_MAX_CHARS);
+        const trimmedAssistant = capContextBlock(assistantText, INJECTED_CONTEXT_MAX_CHARS);
         // Add to in-memory exchange history
         const history = this.lastExchanges.get(sessionKey) ?? [];
         history.push({ user: trimmedUser, assistant: trimmedAssistant });

package/dist/agent/auto-update.js CHANGED Viewed

@@ -5,14 +5,41 @@
  * Source modifications from self-improve are tracked in ~/.clementine/ (not git),
  * so git pull is always clean. After pulling, source mods are reconciled.
  */
-import { execSync } from 'node:child_process';
-import { writeFileSync } from 'node:fs';
+import { execFileSync, execSync } from 'node:child_process';
+import { existsSync, readFileSync, writeFileSync } from 'node:fs';
 import path from 'node:path';
 import pino from 'pino';
 import { BASE_DIR } from '../config.js';
 import { reconcileSourceMods } from './source-mods.js';
 const logger = pino({ name: 'clementine.auto-update' });
 const SENTINEL_PATH = path.join(BASE_DIR, '.restart-sentinel.json');
+function readDataEnv() {
+    const envPath = path.join(BASE_DIR, '.env');
+    if (!existsSync(envPath))
+        return {};
+    try {
+        return Object.fromEntries(readFileSync(envPath, 'utf-8')
+            .split(/\r?\n/)
+            .map((line) => line.trim())
+            .filter((line) => line && !line.startsWith('#') && line.includes('='))
+            .map((line) => {
+            const idx = line.indexOf('=');
+            return [line.slice(0, idx).trim(), line.slice(idx + 1).trim().replace(/^["']|["']$/g, '')];
+        }));
+    }
+    catch {
+        return {};
+    }
+}
+function flagEnabled(name, envFile) {
+    const raw = process.env[name] ?? envFile[name];
+    return /^(1|true|yes|on)$/i.test(String(raw ?? ''));
+}
+function shouldPrefetchEmbeddings() {
+    const envFile = readDataEnv();
+    return flagEnabled('CLEMENTINE_INSTALL_EMBEDDINGS', envFile)
+        || flagEnabled('CLEMENTINE_PREFETCH_EMBEDDINGS', envFile);
+}
 /**
  * Check if upstream has new commits. Safe to call from cron — no side effects.
  */
@@ -121,6 +148,23 @@ export async function applyUpdate(pkgDir) {
             logger.error({ err }, 'Build failed after update');
             return { success: false, error: `Build failed after update: ${String(err)}` };
         }
+        // 4b. Optional embedding model prefetch. npm postinstall may run before
+        // the freshly pulled TypeScript has been built; this second pass uses the
+        // just-built CLI so repo updates and npm-style updates behave the same.
+        if (shouldPrefetchEmbeddings()) {
+            try {
+                execFileSync(process.execPath, [path.join(pkgDir, 'dist', 'cli', 'index.js'), 'memory', 'model', 'install'], {
+                    cwd: pkgDir,
+                    stdio: 'pipe',
+                    env: { ...process.env, CLEMENTINE_HOME: BASE_DIR },
+                    timeout: 10 * 60_000,
+                });
+                logger.info('Local embedding model prefetch succeeded after update');
+            }
+            catch (err) {
+                logger.warn({ err }, 'Local embedding model prefetch failed after update');
+            }
+        }
         // 5. Reconcile source modifications
         const reconcileResult = reconcileSourceMods(pkgDir);
         logger.info({

package/dist/agent/local-turn.d.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import type { ClementineJson } from '../config/clementine-json.js';
+import { type ToolsetName } from './toolsets.js';
 export type ProactivityMode = 'quiet' | 'balanced' | 'proactive' | 'operator';
 export type ResponseStyle = 'concise' | 'balanced' | 'detailed';
 export type ProgressVisibility = 'quiet' | 'normal' | 'detailed';
@@ -19,14 +20,29 @@ export type LocalTurnIntent = {
     kind: 'stop';
 } | {
     kind: 'status';
+} | {
+    kind: 'last_action';
+} | {
+    kind: 'compress_context';
+} | {
+    kind: 'debug_status';
+} | {
+    kind: 'toolset';
+    toolset: ToolsetName;
 } | {
     kind: 'preference_update';
     updates: AssistantExperienceUpdate;
     summary: string;
 };
+export type ApprovalReply = true | false | 'always' | null;
 export declare function isStopRequest(text: string): boolean;
 export declare function isStatusRequest(text: string): boolean;
+export declare function isLastActionRequest(text: string): boolean;
+export declare function isCompressContextRequest(text: string): boolean;
+export declare function isDebugStatusRequest(text: string): boolean;
 export declare function isTinyAcknowledgment(text: string): boolean;
+export declare function detectApprovalReply(text: string): ApprovalReply;
+export declare function looksLikeApprovalPrompt(text: string): boolean;
 export declare function detectLocalTurn(text: string): LocalTurnIntent;
 export declare function applyAssistantExperienceUpdate(cfg: ClementineJson, updates: AssistantExperienceUpdate): ClementineJson;
 //# sourceMappingURL=local-turn.d.ts.map

package/dist/agent/local-turn.js CHANGED Viewed

@@ -1,8 +1,10 @@
 import { isStandaloneGreeting } from './turn-policy.js';
+import { normalizeToolsetName } from './toolsets.js';
 function normalize(text) {
     return text
         .trim()
         .toLowerCase()
+        .replace(/[‘’`]/g, "'")
         .replace(/[.!?]+$/g, '')
         .replace(/\s+/g, ' ');
 }
@@ -20,7 +22,31 @@ export function isStatusRequest(text) {
     const n = normalize(text);
     if (wordCount(n) > 8)
         return false;
-    return /^(status|task status|deep status|progress|what'?s happening|what'?s going on|what are you doing|are you working|anything running|what'?s running|background status|check status|where are we)$/.test(n);
+    return /^(status|task status|deep status|progress|what'?s happening|what'?s going on|what are you doing|what are you working on|what are you running|are you working|anything running|what'?s runnin?g?(?: now| right now)?|what is runnin?g?(?: now| right now)?|background status|check status|where are we)$/.test(n);
+}
+export function isLastActionRequest(text) {
+    const n = normalize(text);
+    if (wordCount(n) > 10)
+        return false;
+    return /^(last action|last turn|what happened last turn|what did you do|did you do it|did that actually run|did you actually do it|why didn'?t you do it|why did that not run|what happened)$/.test(n);
+}
+export function isCompressContextRequest(text) {
+    const n = normalize(text);
+    if (wordCount(n) > 8)
+        return false;
+    return /^(compress context|compact context|compress session|compact session|context compact|context compress|save and reset context|reset context but keep memory)$/.test(n);
+}
+export function isDebugStatusRequest(text) {
+    const n = normalize(text);
+    if (wordCount(n) > 6)
+        return false;
+    return /^(debug|debug status|session debug|agent debug|diagnostics|show diagnostics)$/.test(n);
+}
+function parseToolsetRequest(text) {
+    const n = normalize(text);
+    const match = n.match(/^(?:set |switch |use |enable )?(?:toolset|tool set|tools mode|tool mode)(?: to|:)? ([a-z _-]+)$/)
+        ?? n.match(/^toolset ([a-z _-]+)$/);
+    return match ? normalizeToolsetName(match[1]) : null;
 }
 export function isTinyAcknowledgment(text) {
     const n = normalize(text);
@@ -28,6 +54,24 @@ export function isTinyAcknowledgment(text) {
         return false;
     return /^(thanks|thank you|thx|ty|nice|great|perfect|awesome|cool|ok|okay|sounds good|got it|makes sense|love it)$/.test(n);
 }
+export function detectApprovalReply(text) {
+    const n = normalize(text);
+    if (wordCount(n) > 4)
+        return null;
+    if (/^(always)$/.test(n))
+        return 'always';
+    if (/^(no|nope|deny|denied|skip)$/.test(n))
+        return false;
+    if (/^(yes|y|yep|yeah|ok|okay|approve|approved|go|go ahead|do it|send it|perfect|sounds good|looks good|lgtm)$/.test(n)) {
+        return true;
+    }
+    return null;
+}
+export function looksLikeApprovalPrompt(text) {
+    const n = normalize(text);
+    return /\b(good to go|okay to send|ok to send|ready to send|should i send|want me to send|approve|confirm|fire it off)\b/.test(n)
+        || /\b(send|email|message|post|publish|delete|change|update|run|execute)\b[\s\S]{0,120}\?$/i.test(text.trim());
+}
 function parseProactivity(text) {
     if (/\b(operator mode|operator)\b/i.test(text))
         return 'operator';
@@ -71,6 +115,15 @@ export function detectLocalTurn(text) {
         return { kind: 'stop' };
     if (isStatusRequest(text))
         return { kind: 'status' };
+    if (isLastActionRequest(text))
+        return { kind: 'last_action' };
+    if (isCompressContextRequest(text))
+        return { kind: 'compress_context' };
+    if (isDebugStatusRequest(text))
+        return { kind: 'debug_status' };
+    const toolset = parseToolsetRequest(text);
+    if (toolset)
+        return { kind: 'toolset', toolset };
     if (isStandaloneGreeting(text))
         return { kind: 'greeting' };
     if (isTinyAcknowledgment(text))