npm - clementine-agent - Versions diffs - 1.0.66 → 1.0.68 - Mend

clementine-agent 1.0.66 → 1.0.68

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/agent/assistant.js +91 -57
package/package.json +1 -1

package/dist/agent/assistant.js CHANGED Viewed

@@ -174,6 +174,55 @@ function stripLoneSurrogates(s) {
     // Replace any surrogate not properly paired with the Unicode replacement char
     return s.replace(/[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]/g, '\uFFFD');
 }
+/**
+ * Build a context-recovered retry prompt that carries mid-task state
+ * forward across an autocompact-rotation. The old session was blown by
+ * too-large tool outputs; the new session must know which tool calls
+ * were already made this turn (so it doesn't redo them) AND tighten its
+ * output discipline (the thing that caused the blow-up in the first
+ * place). Called from both thrash-handling paths in runQuery.
+ *
+ * `snapshot` is captured BEFORE session rotation from stallGuard + the
+ * partial responseText. Safe to pass null when no snapshot exists.
+ */
+function buildContextRecoveredPrompt(originalPrompt, snapshot) {
+    const parts = [
+        '[CONTEXT RECOVERED] Your previous session was rotated because tool outputs filled the context window. A fresh session has been started.',
+        '',
+        '**Rules for this session (non-negotiable):**',
+        '- Add `LIMIT 20` to every SQL query unless you need a count (use `SELECT COUNT(*)`).',
+        '- Pipe long Bash / API / log output through `head -50` (or redirect to a file and read the path in a later turn).',
+        '- Break multi-entity work into batches of ≤ 20 and deliver partial results between batches.',
+    ];
+    if (snapshot && snapshot.toolCalls.length > 0) {
+        // De-duplicate the list while preserving order — agents often make the
+        // same API call against different inputs; collapse to tool name only
+        // so the continuation prompt fits a short budget.
+        const uniqueTools = [];
+        const seen = new Set();
+        for (const c of snapshot.toolCalls) {
+            const name = c.replace(/\(.*$/, '').trim();
+            if (!seen.has(name)) {
+                seen.add(name);
+                uniqueTools.push(name);
+            }
+        }
+        parts.push('');
+        parts.push('**Progress from the rotated session (DO NOT repeat these calls):**');
+        parts.push(`- ${snapshot.toolCalls.length} tool calls across ${uniqueTools.length} distinct tools: ${uniqueTools.slice(0, 12).join(', ')}${uniqueTools.length > 12 ? ', …' : ''}`);
+        parts.push(`- Total calls made: ${snapshot.toolCalls.slice(-20).join(' → ')}`);
+    }
+    if (snapshot && snapshot.partialText.trim().length > 0) {
+        parts.push('');
+        parts.push(`**Partial response you had already started (last ${Math.min(snapshot.partialText.length, 1000)} chars):**`);
+        parts.push('> ' + snapshot.partialText.trim().replace(/\n/g, '\n> ').slice(0, 1200));
+        parts.push('Continue from where that left off — don\'t restart the reasoning.');
+    }
+    parts.push('');
+    parts.push('**Original user request to continue working on:**');
+    parts.push(originalPrompt);
+    return parts.join('\n');
+}
 /**
  * Wrapper around the SDK's query() that sanitizes lone Unicode surrogates in
  * prompt, systemPrompt, and appendSystemPrompt. Covers every call site in one
@@ -961,6 +1010,23 @@ export class PersonalAssistant {
                 parts.push(isAutonomous ? soulEntry.content.slice(0, 1500) : soulEntry.content);
             }
         }
+        // Universal output discipline — applies to Clementine AND every team agent.
+        // Autocompact thrashing (SDK mid-turn session rotation from too-large
+        // tool outputs) is almost always caused by unbounded Bash / SQL / API
+        // responses filling the context window. The `[CONTEXT RECOVERED]`
+        // prefix already tells agents these rules, but only AFTER thrash. This
+        // block lands them in the cacheable prefix so they're active from turn 1.
+        parts.push(`## Output discipline (required to avoid context thrashing)
+Large tool outputs blow the context window and rotate your session mid-task — you lose state and start over. Prevent it:
+- **Bash / shell**: always pipe to \`head -50\` (or \`tail -50\`) for logs, JSON dumps, SQL rows, API blobs. If you need the full output, redirect to a file under \`~/.clementine/vault/07-Inbox/\` or a dedicated scratch dir, then read the path in a later turn.
+- **SQL**: add \`LIMIT 20\` to every query unless you genuinely need more. If you need a count, use \`SELECT COUNT(*)\` not \`SELECT * \`.
+- **Web scrapes / API fetches**: paginate instead of asking for everything at once. Page size ≤ 20 rows / 5 pages at a time.
+- **File reads**: for anything bigger than ~300 lines, read with an offset+limit or grep for what you need rather than reading whole.
+- **Summarize as you go**: if you've done 5+ tool calls in a turn, write a one-line progress note to working memory before the next call. That state survives if the session rotates.
+**If you see "[CONTEXT RECOVERED]"** in your next prompt: the session was just rotated mid-work because output ballooned. Read the "progress so far" notes, DO NOT repeat completed work, and continue from where you left off with tighter outputs.`);
         // Skip AGENTS.md for autonomous runs — not relevant for heartbeats/cron
         if (!isAutonomous) {
             const agentsEntry = this.promptCache.get(AGENTS_FILE);
@@ -2256,6 +2322,13 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                 // blocks) so we can pair them and compare against the outgoing reply.
                 const collectedSdkMessages = [];
                 const queryStartMs = Date.now();
+                // Mid-task state snapshotted when autocompact thrashing rotates the
+                // session. Captures the tool-call sequence + partial responseText
+                // so the retried session gets a "you've already done X, Y, Z —
+                // continue from Z+1" note instead of starting over and redoing
+                // the same Bash/API calls that blew the context in the first place.
+                // Cleared once consumed in the retry prompt.
+                let preRotationSnapshot = null;
                 // Event log: track query lifecycle
                 const eventLog = getEventLog();
                 if (sessionKey) {
@@ -2268,39 +2341,6 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                     // always-on canaries for future SDK regressions.)
                     const stream = query({ prompt, options: sdkOptions });
                     let gotStreamEvents = false;
-                    // Live status text shown to the user while model is thinking / calling
-                    // tools. Rendered as italic markdown lines prepended to the reply.
-                    // Stripped from the final `responseText` before return so transcripts
-                    // stay clean. Feels like motion — a 30s turn no longer looks frozen.
-                    let statusText = '';
-                    const hasStreamingSurface = typeof onText === 'function';
-                    const flushStatus = async () => {
-                        if (!hasStreamingSurface)
-                            return;
-                        const combined = statusText
-                            ? (responseText ? `${statusText}\n\n${responseText}` : statusText)
-                            : responseText;
-                        try {
-                            await onText(combined);
-                        }
-                        catch { /* non-fatal */ }
-                    };
-                    // Pre-first-token status: show something within the first ~2s so the
-                    // user knows the daemon got the message and is working. Derived from
-                    // intent classifier type → short phrase; generic otherwise.
-                    if (hasStreamingSurface) {
-                        const hintMap = {
-                            question: 'Looking into that',
-                            task: 'On it',
-                            feedback: 'Got it',
-                            casual: 'One sec',
-                            followup: 'Picking that up',
-                            correction: 'Got it — correcting',
-                        };
-                        const hint = (intentClassification?.type && hintMap[intentClassification.type]) || 'Working on it';
-                        statusText = `_${hint}…_`;
-                        await flushStatus();
-                    }
                     for await (const message of stream) {
                         // Capture assistant + user messages for post-turn contradiction
                         // validation. Must happen before the switch below so we catch
@@ -2317,20 +2357,12 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                                     // received stream_event deltas (which already accumulated text)
                                     responseText += block.text;
                                     if (onText)
-                                        await onText((statusText ? `${statusText}\n\n` : '') + responseText);
+                                        await onText(responseText);
                                 }
                                 else if (block.type === 'tool_use' && block.name) {
                                     logToolUse(block.name, (block.input ?? {}));
                                     if (sessionKey)
                                         eventLog.emitToolCall(sessionKey, block.name, (block.input ?? {}));
-                                    // Append a one-line tool-use status to the live stream so
-                                    // the user sees real progress during multi-turn ops.
-                                    if (hasStreamingSurface) {
-                                        const shortName = block.name.replace(/^mcp__[^_]+(?:_[^_]+)*__/, '').slice(0, 50);
-                                        const line = `_→ ${shortName}_`;
-                                        statusText = statusText ? `${statusText}\n${line}` : line;
-                                        await flushStatus();
-                                    }
                                     if (onToolActivity) {
                                         try {
                                             await onToolActivity(block.name, (block.input ?? {}));
@@ -2359,7 +2391,7 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                             if (evt.type === 'content_block_delta' && evt.delta?.type === 'text_delta' && evt.delta.text) {
                                 responseText += evt.delta.text;
                                 if (onText)
-                                    await onText((statusText ? `${statusText}\n\n` : '') + responseText);
+                                    await onText(responseText);
                             }
                         }
                         else if (message.type === 'result') {
@@ -2393,6 +2425,12 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                                     else if (lower.includes('autocompact') || lower.includes('thrash') || lower.includes('context refilled to the limit')) {
                                         // Autocompact thrashing — treat like the exception path
                                         logger.warn({ sessionKey }, 'Autocompact thrashing (result error) — will rotate session');
+                                        // Capture mid-task state BEFORE rotating, so the retry
+                                        // prompt can tell the new session what's already done.
+                                        preRotationSnapshot = {
+                                            toolCalls: stallGuard?.getToolCalls() ?? [],
+                                            partialText: responseText.slice(-1000),
+                                        };
                                         if (sessionKey) {
                                             try {
                                                 this.compactContext(sessionKey);
@@ -2488,6 +2526,12 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                         // SDK autocompact thrashing — tool outputs are too large for the context window.
                         // Rotate session and retry with a fresh context so the agent can continue.
                         logger.warn({ sessionKey }, 'Autocompact thrashing — rotating session and retrying');
+                        // Capture mid-task state BEFORE rotating so the retry prompt
+                        // can reference completed work and avoid redoing it.
+                        preRotationSnapshot = {
+                            toolCalls: stallGuard?.getToolCalls() ?? [],
+                            partialText: responseText.slice(-1000),
+                        };
                         if (sessionKey) {
                             try {
                                 this.compactContext(sessionKey);
@@ -2498,13 +2542,8 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                             this._compactedSessions.delete(sessionKey);
                         }
                         if (attempt < PersonalAssistant.RATE_LIMIT_MAX_RETRIES) {
-                            // Prepend a warning so the agent knows to use smaller queries
-                            prompt = `[CONTEXT RECOVERED] Your previous session ran out of context space because tool outputs were too large. ` +
-                                `A fresh session has been started. Key rules for this session:\n` +
-                                `- Add LIMIT clauses to database queries (max 20 rows)\n` +
-                                `- Pipe large command output through \`head -50\` or similar\n` +
-                                `- If a task needs many queries, break it into smaller batches and deliver partial results between batches\n\n` +
-                                `Continue with the user's request: ${prompt}`;
+                            prompt = buildContextRecoveredPrompt(prompt, preRotationSnapshot);
+                            preRotationSnapshot = null;
                             responseText = '';
                             continue;
                         }
@@ -2554,13 +2593,8 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                 if (staleSession && attempt < PersonalAssistant.RATE_LIMIT_MAX_RETRIES) {
                     responseText = '';
                     if (contextRecovery) {
-                        // Inject guidance so the agent avoids repeating the same large-output pattern
-                        prompt = `[CONTEXT RECOVERED] Your previous session ran out of context space because tool outputs were too large. ` +
-                            `A fresh session has been started. Key rules for this session:\n` +
-                            `- Add LIMIT clauses to database queries (max 20 rows)\n` +
-                            `- Pipe large command output through \`head -50\` or similar\n` +
-                            `- If a task needs many queries, break it into smaller batches and deliver partial results between batches\n\n` +
-                            `Continue with the user's request: ${prompt}`;
+                        prompt = buildContextRecoveredPrompt(prompt, preRotationSnapshot);
+                        preRotationSnapshot = null;
                         contextRecovery = false;
                     }
                     continue;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clementine-agent",
-  "version": "1.0.66",
+  "version": "1.0.68",
   "description": "Clementine — Personal AI Assistant (TypeScript)",
   "type": "module",
   "main": "dist/index.js",