clementine-agent 1.0.66 → 1.0.68

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -174,6 +174,55 @@ function stripLoneSurrogates(s) {
174
174
  // Replace any surrogate not properly paired with the Unicode replacement char
175
175
  return s.replace(/[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]/g, '\uFFFD');
176
176
  }
177
+ /**
178
+ * Build a context-recovered retry prompt that carries mid-task state
179
+ * forward across an autocompact-rotation. The old session was blown by
180
+ * too-large tool outputs; the new session must know which tool calls
181
+ * were already made this turn (so it doesn't redo them) AND tighten its
182
+ * output discipline (the thing that caused the blow-up in the first
183
+ * place). Called from both thrash-handling paths in runQuery.
184
+ *
185
+ * `snapshot` is captured BEFORE session rotation from stallGuard + the
186
+ * partial responseText. Safe to pass null when no snapshot exists.
187
+ */
188
+ function buildContextRecoveredPrompt(originalPrompt, snapshot) {
189
+ const parts = [
190
+ '[CONTEXT RECOVERED] Your previous session was rotated because tool outputs filled the context window. A fresh session has been started.',
191
+ '',
192
+ '**Rules for this session (non-negotiable):**',
193
+ '- Add `LIMIT 20` to every SQL query unless you need a count (use `SELECT COUNT(*)`).',
194
+ '- Pipe long Bash / API / log output through `head -50` (or redirect to a file and read the path in a later turn).',
195
+ '- Break multi-entity work into batches of ≤ 20 and deliver partial results between batches.',
196
+ ];
197
+ if (snapshot && snapshot.toolCalls.length > 0) {
198
+ // De-duplicate the list while preserving order — agents often make the
199
+ // same API call against different inputs; collapse to tool name only
200
+ // so the continuation prompt fits a short budget.
201
+ const uniqueTools = [];
202
+ const seen = new Set();
203
+ for (const c of snapshot.toolCalls) {
204
+ const name = c.replace(/\(.*$/, '').trim();
205
+ if (!seen.has(name)) {
206
+ seen.add(name);
207
+ uniqueTools.push(name);
208
+ }
209
+ }
210
+ parts.push('');
211
+ parts.push('**Progress from the rotated session (DO NOT repeat these calls):**');
212
+ parts.push(`- ${snapshot.toolCalls.length} tool calls across ${uniqueTools.length} distinct tools: ${uniqueTools.slice(0, 12).join(', ')}${uniqueTools.length > 12 ? ', …' : ''}`);
213
+ parts.push(`- Total calls made: ${snapshot.toolCalls.slice(-20).join(' → ')}`);
214
+ }
215
+ if (snapshot && snapshot.partialText.trim().length > 0) {
216
+ parts.push('');
217
+ parts.push(`**Partial response you had already started (last ${Math.min(snapshot.partialText.length, 1000)} chars):**`);
218
+ parts.push('> ' + snapshot.partialText.trim().replace(/\n/g, '\n> ').slice(0, 1200));
219
+ parts.push('Continue from where that left off — don\'t restart the reasoning.');
220
+ }
221
+ parts.push('');
222
+ parts.push('**Original user request to continue working on:**');
223
+ parts.push(originalPrompt);
224
+ return parts.join('\n');
225
+ }
177
226
  /**
178
227
  * Wrapper around the SDK's query() that sanitizes lone Unicode surrogates in
179
228
  * prompt, systemPrompt, and appendSystemPrompt. Covers every call site in one
@@ -961,6 +1010,23 @@ export class PersonalAssistant {
961
1010
  parts.push(isAutonomous ? soulEntry.content.slice(0, 1500) : soulEntry.content);
962
1011
  }
963
1012
  }
1013
+ // Universal output discipline — applies to Clementine AND every team agent.
1014
+ // Autocompact thrashing (SDK mid-turn session rotation from too-large
1015
+ // tool outputs) is almost always caused by unbounded Bash / SQL / API
1016
+ // responses filling the context window. The `[CONTEXT RECOVERED]`
1017
+ // prefix already tells agents these rules, but only AFTER thrash. This
1018
+ // block lands them in the cacheable prefix so they're active from turn 1.
1019
+ parts.push(`## Output discipline (required to avoid context thrashing)
1020
+
1021
+ Large tool outputs blow the context window and rotate your session mid-task — you lose state and start over. Prevent it:
1022
+
1023
+ - **Bash / shell**: always pipe to \`head -50\` (or \`tail -50\`) for logs, JSON dumps, SQL rows, API blobs. If you need the full output, redirect to a file under \`~/.clementine/vault/07-Inbox/\` or a dedicated scratch dir, then read the path in a later turn.
1024
+ - **SQL**: add \`LIMIT 20\` to every query unless you genuinely need more. If you need a count, use \`SELECT COUNT(*)\` not \`SELECT * \`.
1025
+ - **Web scrapes / API fetches**: paginate instead of asking for everything at once. Page size ≤ 20 rows / 5 pages at a time.
1026
+ - **File reads**: for anything bigger than ~300 lines, read with an offset+limit or grep for what you need rather than reading whole.
1027
+ - **Summarize as you go**: if you've done 5+ tool calls in a turn, write a one-line progress note to working memory before the next call. That state survives if the session rotates.
1028
+
1029
+ **If you see "[CONTEXT RECOVERED]"** in your next prompt: the session was just rotated mid-work because output ballooned. Read the "progress so far" notes, DO NOT repeat completed work, and continue from where you left off with tighter outputs.`);
964
1030
  // Skip AGENTS.md for autonomous runs — not relevant for heartbeats/cron
965
1031
  if (!isAutonomous) {
966
1032
  const agentsEntry = this.promptCache.get(AGENTS_FILE);
@@ -2256,6 +2322,13 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
2256
2322
  // blocks) so we can pair them and compare against the outgoing reply.
2257
2323
  const collectedSdkMessages = [];
2258
2324
  const queryStartMs = Date.now();
2325
+ // Mid-task state snapshotted when autocompact thrashing rotates the
2326
+ // session. Captures the tool-call sequence + partial responseText
2327
+ // so the retried session gets a "you've already done X, Y, Z —
2328
+ // continue from Z+1" note instead of starting over and redoing
2329
+ // the same Bash/API calls that blew the context in the first place.
2330
+ // Cleared once consumed in the retry prompt.
2331
+ let preRotationSnapshot = null;
2259
2332
  // Event log: track query lifecycle
2260
2333
  const eventLog = getEventLog();
2261
2334
  if (sessionKey) {
@@ -2268,39 +2341,6 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
2268
2341
  // always-on canaries for future SDK regressions.)
2269
2342
  const stream = query({ prompt, options: sdkOptions });
2270
2343
  let gotStreamEvents = false;
2271
- // Live status text shown to the user while model is thinking / calling
2272
- // tools. Rendered as italic markdown lines prepended to the reply.
2273
- // Stripped from the final `responseText` before return so transcripts
2274
- // stay clean. Feels like motion — a 30s turn no longer looks frozen.
2275
- let statusText = '';
2276
- const hasStreamingSurface = typeof onText === 'function';
2277
- const flushStatus = async () => {
2278
- if (!hasStreamingSurface)
2279
- return;
2280
- const combined = statusText
2281
- ? (responseText ? `${statusText}\n\n${responseText}` : statusText)
2282
- : responseText;
2283
- try {
2284
- await onText(combined);
2285
- }
2286
- catch { /* non-fatal */ }
2287
- };
2288
- // Pre-first-token status: show something within the first ~2s so the
2289
- // user knows the daemon got the message and is working. Derived from
2290
- // intent classifier type → short phrase; generic otherwise.
2291
- if (hasStreamingSurface) {
2292
- const hintMap = {
2293
- question: 'Looking into that',
2294
- task: 'On it',
2295
- feedback: 'Got it',
2296
- casual: 'One sec',
2297
- followup: 'Picking that up',
2298
- correction: 'Got it — correcting',
2299
- };
2300
- const hint = (intentClassification?.type && hintMap[intentClassification.type]) || 'Working on it';
2301
- statusText = `_${hint}…_`;
2302
- await flushStatus();
2303
- }
2304
2344
  for await (const message of stream) {
2305
2345
  // Capture assistant + user messages for post-turn contradiction
2306
2346
  // validation. Must happen before the switch below so we catch
@@ -2317,20 +2357,12 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
2317
2357
  // received stream_event deltas (which already accumulated text)
2318
2358
  responseText += block.text;
2319
2359
  if (onText)
2320
- await onText((statusText ? `${statusText}\n\n` : '') + responseText);
2360
+ await onText(responseText);
2321
2361
  }
2322
2362
  else if (block.type === 'tool_use' && block.name) {
2323
2363
  logToolUse(block.name, (block.input ?? {}));
2324
2364
  if (sessionKey)
2325
2365
  eventLog.emitToolCall(sessionKey, block.name, (block.input ?? {}));
2326
- // Append a one-line tool-use status to the live stream so
2327
- // the user sees real progress during multi-turn ops.
2328
- if (hasStreamingSurface) {
2329
- const shortName = block.name.replace(/^mcp__[^_]+(?:_[^_]+)*__/, '').slice(0, 50);
2330
- const line = `_→ ${shortName}_`;
2331
- statusText = statusText ? `${statusText}\n${line}` : line;
2332
- await flushStatus();
2333
- }
2334
2366
  if (onToolActivity) {
2335
2367
  try {
2336
2368
  await onToolActivity(block.name, (block.input ?? {}));
@@ -2359,7 +2391,7 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
2359
2391
  if (evt.type === 'content_block_delta' && evt.delta?.type === 'text_delta' && evt.delta.text) {
2360
2392
  responseText += evt.delta.text;
2361
2393
  if (onText)
2362
- await onText((statusText ? `${statusText}\n\n` : '') + responseText);
2394
+ await onText(responseText);
2363
2395
  }
2364
2396
  }
2365
2397
  else if (message.type === 'result') {
@@ -2393,6 +2425,12 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
2393
2425
  else if (lower.includes('autocompact') || lower.includes('thrash') || lower.includes('context refilled to the limit')) {
2394
2426
  // Autocompact thrashing — treat like the exception path
2395
2427
  logger.warn({ sessionKey }, 'Autocompact thrashing (result error) — will rotate session');
2428
+ // Capture mid-task state BEFORE rotating, so the retry
2429
+ // prompt can tell the new session what's already done.
2430
+ preRotationSnapshot = {
2431
+ toolCalls: stallGuard?.getToolCalls() ?? [],
2432
+ partialText: responseText.slice(-1000),
2433
+ };
2396
2434
  if (sessionKey) {
2397
2435
  try {
2398
2436
  this.compactContext(sessionKey);
@@ -2488,6 +2526,12 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
2488
2526
  // SDK autocompact thrashing — tool outputs are too large for the context window.
2489
2527
  // Rotate session and retry with a fresh context so the agent can continue.
2490
2528
  logger.warn({ sessionKey }, 'Autocompact thrashing — rotating session and retrying');
2529
+ // Capture mid-task state BEFORE rotating so the retry prompt
2530
+ // can reference completed work and avoid redoing it.
2531
+ preRotationSnapshot = {
2532
+ toolCalls: stallGuard?.getToolCalls() ?? [],
2533
+ partialText: responseText.slice(-1000),
2534
+ };
2491
2535
  if (sessionKey) {
2492
2536
  try {
2493
2537
  this.compactContext(sessionKey);
@@ -2498,13 +2542,8 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
2498
2542
  this._compactedSessions.delete(sessionKey);
2499
2543
  }
2500
2544
  if (attempt < PersonalAssistant.RATE_LIMIT_MAX_RETRIES) {
2501
- // Prepend a warning so the agent knows to use smaller queries
2502
- prompt = `[CONTEXT RECOVERED] Your previous session ran out of context space because tool outputs were too large. ` +
2503
- `A fresh session has been started. Key rules for this session:\n` +
2504
- `- Add LIMIT clauses to database queries (max 20 rows)\n` +
2505
- `- Pipe large command output through \`head -50\` or similar\n` +
2506
- `- If a task needs many queries, break it into smaller batches and deliver partial results between batches\n\n` +
2507
- `Continue with the user's request: ${prompt}`;
2545
+ prompt = buildContextRecoveredPrompt(prompt, preRotationSnapshot);
2546
+ preRotationSnapshot = null;
2508
2547
  responseText = '';
2509
2548
  continue;
2510
2549
  }
@@ -2554,13 +2593,8 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
2554
2593
  if (staleSession && attempt < PersonalAssistant.RATE_LIMIT_MAX_RETRIES) {
2555
2594
  responseText = '';
2556
2595
  if (contextRecovery) {
2557
- // Inject guidance so the agent avoids repeating the same large-output pattern
2558
- prompt = `[CONTEXT RECOVERED] Your previous session ran out of context space because tool outputs were too large. ` +
2559
- `A fresh session has been started. Key rules for this session:\n` +
2560
- `- Add LIMIT clauses to database queries (max 20 rows)\n` +
2561
- `- Pipe large command output through \`head -50\` or similar\n` +
2562
- `- If a task needs many queries, break it into smaller batches and deliver partial results between batches\n\n` +
2563
- `Continue with the user's request: ${prompt}`;
2596
+ prompt = buildContextRecoveredPrompt(prompt, preRotationSnapshot);
2597
+ preRotationSnapshot = null;
2564
2598
  contextRecovery = false;
2565
2599
  }
2566
2600
  continue;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.0.66",
3
+ "version": "1.0.68",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",