maestro-agent-sdk 0.1.30 → 0.1.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/dist/core/agent.d.ts +4 -0
  2. package/dist/core/agent.d.ts.map +1 -1
  3. package/dist/core/agent.js +1 -0
  4. package/dist/core/agent.js.map +1 -1
  5. package/dist/core/loop.d.ts.map +1 -1
  6. package/dist/core/loop.js +120 -6
  7. package/dist/core/loop.js.map +1 -1
  8. package/dist/memory/compressor.d.ts +65 -2
  9. package/dist/memory/compressor.d.ts.map +1 -1
  10. package/dist/memory/compressor.js +555 -162
  11. package/dist/memory/compressor.js.map +1 -1
  12. package/dist/memory/prune.d.ts +2 -29
  13. package/dist/memory/prune.d.ts.map +1 -1
  14. package/dist/memory/prune.js +2 -74
  15. package/dist/memory/prune.js.map +1 -1
  16. package/dist/memory/state.d.ts +18 -0
  17. package/dist/memory/state.d.ts.map +1 -0
  18. package/dist/memory/state.js +85 -0
  19. package/dist/memory/state.js.map +1 -0
  20. package/dist/provider.d.ts +6 -0
  21. package/dist/provider.d.ts.map +1 -1
  22. package/dist/provider.js +17 -1
  23. package/dist/provider.js.map +1 -1
  24. package/dist/providers/anthropic.d.ts +12 -1
  25. package/dist/providers/anthropic.d.ts.map +1 -1
  26. package/dist/providers/anthropic.js +23 -9
  27. package/dist/providers/anthropic.js.map +1 -1
  28. package/dist/providers/base.d.ts +29 -0
  29. package/dist/providers/base.d.ts.map +1 -1
  30. package/dist/providers/codex-stream.d.ts.map +1 -1
  31. package/dist/providers/codex-stream.js +31 -16
  32. package/dist/providers/codex-stream.js.map +1 -1
  33. package/dist/providers/codex.d.ts +114 -25
  34. package/dist/providers/codex.d.ts.map +1 -1
  35. package/dist/providers/codex.js +99 -55
  36. package/dist/providers/codex.js.map +1 -1
  37. package/dist/providers/deepseek.d.ts +11 -1
  38. package/dist/providers/deepseek.d.ts.map +1 -1
  39. package/dist/providers/deepseek.js +22 -7
  40. package/dist/providers/deepseek.js.map +1 -1
  41. package/dist/providers/fallback.d.ts +71 -0
  42. package/dist/providers/fallback.d.ts.map +1 -0
  43. package/dist/providers/fallback.js +223 -0
  44. package/dist/providers/fallback.js.map +1 -0
  45. package/dist/providers/node-fetch.d.ts +63 -0
  46. package/dist/providers/node-fetch.d.ts.map +1 -0
  47. package/dist/providers/node-fetch.js +164 -0
  48. package/dist/providers/node-fetch.js.map +1 -0
  49. package/dist/session-store.d.ts.map +1 -1
  50. package/dist/session-store.js +9 -0
  51. package/dist/session-store.js.map +1 -1
  52. package/dist/types.d.ts +3 -0
  53. package/dist/types.d.ts.map +1 -1
  54. package/package.json +1 -1
@@ -1,10 +1,12 @@
1
+ import { randomUUID } from "node:crypto";
2
+ import { mkdirSync, unlinkSync, writeFileSync } from "node:fs";
3
+ import { tmpdir } from "node:os";
4
+ import { join } from "node:path";
1
5
  import { ACTIVE_TASK_TEMPLATE, wrapCompactedSummary } from "../memory/active-task-template.js";
2
6
  import { pruneMessages } from "../memory/prune.js";
3
7
  import { estimateTokens } from "../memory/token-estimate.js";
4
8
  import { logger } from "../platform/logger.js";
5
- const compactorAntiThrash = new WeakMap();
6
9
  const COMPACTOR_MIN_SAVINGS_RATIO = 0.1;
7
- const COMPACTOR_ANTI_THRASH_LIMIT = 2;
8
10
  /** Sentinel user message that marks a compaction block pair.
9
11
  * Uses NUL-bytes to make accidental user-content collision extremely unlikely. */
10
12
  const COMPACTION_MARKER = "\x00maestro-compaction\x00";
@@ -25,6 +27,27 @@ function incrementalPrompt(previousSummary) {
25
27
  "</previous-summary>",
26
28
  ].join("\n");
27
29
  }
30
+ /** Hermes-style guided-compaction directive. Appended to the aux system
31
+ * prompt when a focus topic is supplied so the summarizer preserves the live
32
+ * work thread in full and sheds unrelated tangents. Mirrors Hermes'
33
+ * `context_compressor` focus block (full detail for related content, ~60-70%
34
+ * of the budget, secrets always redacted). */
35
+ export function focusInstruction(focusTopic) {
36
+ const trimmed = focusTopic.trim();
37
+ if (!trimmed)
38
+ return "";
39
+ return [
40
+ "",
41
+ "---",
42
+ `FOCUS TOPIC: "${trimmed}"`,
43
+ "PRIORITISE preserving every detail related to the focus topic above —",
44
+ "exact values, file paths, command outputs, error messages, and decisions.",
45
+ "For content NOT related to the focus topic, summarise aggressively",
46
+ "(one-liners, or omit if truly irrelevant). Give the focus topic roughly",
47
+ "60-70% of the summary budget. NEVER preserve API keys, tokens, passwords,",
48
+ "or other credentials even for the focus topic — replace them with [REDACTED].",
49
+ ].join("\n");
50
+ }
28
51
  function defaultContextWindow() {
29
52
  const env = process.env.MAESTRO_CONTEXT_WINDOW;
30
53
  if (env) {
@@ -107,6 +130,33 @@ function hasToolResultBlocks(msg) {
107
130
  return false;
108
131
  return content.some((b) => b.type === "tool_result");
109
132
  }
133
+ /**
134
+ * Build the wire payload reused by the v0.1.32+ effective-token fast-path
135
+ * when a previous compaction's summary + delta is already small enough to
136
+ * fit under threshold.
137
+ *
138
+ * Shape mirrors a freshly-compacted wire — `[ ...head, summary-user, ...post ]`
139
+ * — so downstream code (provider, host event handlers) sees the same
140
+ * structure regardless of whether the aux LLM ran this turn or not.
141
+ *
142
+ * `head` is taken from the compaction-stripped view so sentinel markers
143
+ * never leak onto the wire. The `post` slice is `messages[assistantIdx+1..]`
144
+ * verbatim — it already excludes the sentinel pair and is small by
145
+ * definition (caller verified effective < threshold).
146
+ */
147
+ function buildCompactedWire(cleanMessages, summary, headProtect, tail) {
148
+ const cleanHeadEnd = snapHeadEnd(cleanMessages, Math.min(headProtect, cleanMessages.length));
149
+ const head = cleanMessages.slice(0, cleanHeadEnd);
150
+ const headEndsUser = head.length > 0 && head[head.length - 1].role === "user";
151
+ return [
152
+ ...head,
153
+ ...(headEndsUser
154
+ ? [{ role: "assistant", content: [{ type: "text", text: "" }] }]
155
+ : []),
156
+ { role: "user", content: wrapCompactedSummary(summary) },
157
+ ...tail,
158
+ ];
159
+ }
110
160
  /**
111
161
  * Find the most recent compaction block pair in messages.
112
162
  * Returns indices and the summary text, or undefined.
@@ -126,6 +176,9 @@ function findLastCompaction(messages) {
126
176
  }
127
177
  return undefined;
128
178
  }
179
+ export function findLastCompactionSummary(messages) {
180
+ return findLastCompaction(messages)?.summary;
181
+ }
129
182
  /**
130
183
  * Collect indices of all compaction block pairs in messages.
131
184
  */
@@ -165,165 +218,461 @@ function compactionBlockIndices(messages) {
165
218
  * view of messages so the wire never leaks internal sentinels.
166
219
  */
167
220
  export async function compressIfNeeded(messages, opts = {}) {
168
- const contextWindow = opts.contextWindow ?? defaultContextWindow();
169
- const triggerRatio = opts.triggerRatio ?? 0.6;
170
- const headProtect = opts.headProtect ?? 2;
171
- const tailProtect = opts.tailProtect ?? 6;
172
- const auxModel = opts.auxModel;
173
- // Fast-path: short conversations can't trigger compaction.
174
- const minSize = headProtect + 1 + tailProtect;
175
- if (messages.length < minSize) {
176
- return messages;
177
- }
178
- // Cheap pre-gate: skip prune when well under threshold.
179
- const threshold = contextWindow * triggerRatio;
180
- const rawTokens = estimateTokens(messages);
181
- if (rawTokens < threshold * 0.5) {
182
- return messages;
183
- }
184
- // Step 1: prune.
185
- const pruned = pruneMessages(messages);
186
- const prunedTokens = estimateTokens(pruned);
187
- if (prunedTokens < threshold) {
188
- return pruned;
189
- }
190
- // Anti-thrash check.
191
- const state = compactorAntiThrash.get(messages);
192
- if (state && state.failedCompactions >= COMPACTOR_ANTI_THRASH_LIMIT) {
193
- return pruned;
194
- }
195
- // Step 4: find previous compaction for incremental prompt.
196
- const prevCompaction = findLastCompaction(messages);
197
- const previousSummary = prevCompaction?.summary;
198
- // Build a compaction-free view of canonical messages for all wire
199
- // boundary calculations (FIX #1: head/tail must never contain
200
- // sentinel markers).
201
- const skipIndices = compactionBlockIndices(messages);
202
- const cleanMessages = messages.filter((_, i) => !skipIndices.has(i));
203
- // Snap wire boundaries on the clean view.
204
- const cleanHeadEnd = snapHeadEnd(cleanMessages, Math.min(headProtect, cleanMessages.length));
205
- const cleanTailStart = snapTailStart(cleanMessages, Math.max(cleanMessages.length - tailProtect, 0));
206
- if (cleanTailStart <= cleanHeadEnd) {
207
- return pruned;
208
- }
209
- // Build middle for aux LLM.
210
- // FIX #2: when a previous summary exists, limit the aux input to
211
- // the *delta* after the last compaction (messages *including* the
212
- // sentinel pair are canonical; the delta starts right after the
213
- // summary assistant). Otherwise use the full clean middle.
214
- let auxMiddle;
215
- if (prevCompaction) {
216
- // Delta: everything after the summary assistant up to (but not including) the tail.
217
- const deltaStart = prevCompaction.assistantIdx + 1;
218
- const deltaEnd = messages.length - tailProtect;
219
- auxMiddle = messages.slice(deltaStart, Math.max(deltaStart, deltaEnd));
220
- }
221
- else {
222
- auxMiddle = cleanMessages.slice(cleanHeadEnd, cleanTailStart);
223
- }
224
- // Step 5: aux LLM call.
225
- if (!opts.auxProvider) {
226
- logger.warn({ prunedTokens, threshold }, "compressIfNeeded: no auxProvider — prune-only");
227
- return pruned;
228
- }
229
- if (!auxModel) {
230
- logger.warn({ prunedTokens, threshold }, "compressIfNeeded: no auxModel — prune-only");
231
- return pruned;
232
- }
233
- // FIX #4: incremental prompt now includes the full ACTIVE_TASK_TEMPLATE
234
- // so the schema contract is restated every time.
235
- const systemPrompt = previousSummary
236
- ? incrementalPrompt(previousSummary)
237
- : ACTIVE_TASK_TEMPLATE;
238
- let summaryText;
221
+ // Per-call status meta declared at the outer scope so the outer
222
+ // try/finally below fires `onCompactionResult` exactly once regardless of
223
+ // which return path is taken (fast-paths, prune-only,
224
+ // emergencyTail, or full compaction). See CompressOptions.onCompactionResult.
225
+ let didStartAux = false;
226
+ let didCompact = false;
239
227
  try {
240
- const auxResponse = await opts.auxProvider.complete({
241
- model: auxModel,
242
- // The aux model is summarizing history, not continuing tool execution.
243
- // Send a text-only transcript so provider-specific tool pairing rules
244
- // (notably DeepSeek/OpenAI's assistant tool_calls → tool messages
245
- // invariant) cannot reject a middle slice that starts/ends inside a
246
- // tool round-trip.
247
- messages: linearizeForAuxLLM(auxMiddle),
248
- system: systemPrompt,
249
- maxTokens: 2048,
250
- ...(opts.abortSignal ? { abortSignal: opts.abortSignal } : {}),
251
- });
252
- summaryText = extractText(auxResponse.content).trim();
253
- if (!summaryText) {
254
- throw new Error("aux LLM returned empty summary");
228
+ const contextWindow = opts.contextWindow ?? defaultContextWindow();
229
+ const triggerRatio = opts.triggerRatio ?? 0.6;
230
+ const headProtect = opts.headProtect ?? 2;
231
+ const tailProtect = opts.tailProtect ?? 6;
232
+ const auxModel = opts.auxModel;
233
+ // Fast-path: short conversations can't trigger compaction.
234
+ const minSize = headProtect + 1 + tailProtect;
235
+ if (messages.length < minSize) {
236
+ return messages;
255
237
  }
256
- }
257
- catch (err) {
258
- logger.warn({ err, prunedTokens, threshold }, "compressIfNeeded: aux LLM failed");
259
- if (opts.disablePruneFallback)
238
+ // Cheap pre-gate: skip prune when well under threshold.
239
+ const threshold = contextWindow * triggerRatio;
240
+ const rawTokens = estimateTokens(messages);
241
+ if (rawTokens < threshold * 0.5) {
260
242
  return messages;
261
- const target = opts.emergencyTargetTokens;
262
- const effectiveTarget = target !== undefined && Number.isFinite(target) && target > 0 ? target : 50_000;
263
- if (target === 0)
243
+ }
244
+ // Fast-path: wire payload (summary + delta) 이미 threshold 이하면 aux 생략.
245
+ const prevCompaction = findLastCompaction(messages);
246
+ if (prevCompaction) {
247
+ const summaryMsg = {
248
+ role: "assistant",
249
+ content: prevCompaction.summary,
250
+ };
251
+ const post = messages.slice(prevCompaction.assistantIdx + 1);
252
+ const effectiveTokens = estimateTokens([summaryMsg, ...post]);
253
+ if (effectiveTokens < threshold) {
254
+ const skipIndices = compactionBlockIndices(messages);
255
+ const cleanMessages = messages.filter((_, i) => !skipIndices.has(i));
256
+ const tail = messages.slice(prevCompaction.assistantIdx + 1);
257
+ return buildCompactedWire(cleanMessages, prevCompaction.summary, headProtect, tail);
258
+ }
259
+ }
260
+ // Step 1: prune.
261
+ const pruned = pruneMessages(messages);
262
+ const prunedTokens = estimateTokens(pruned);
263
+ if (prunedTokens < threshold) {
264
+ return pruned;
265
+ }
266
+ // Step 4: previous compaction already located by the effective-token
267
+ // fast-path above. Reuse the result for the incremental prompt path.
268
+ const previousSummary = prevCompaction?.summary;
269
+ // Build a compaction-free view of canonical messages for all wire
270
+ // boundary calculations (FIX #1: head/tail must never contain
271
+ // sentinel markers).
272
+ const skipIndices = compactionBlockIndices(messages);
273
+ const cleanMessages = messages.filter((_, i) => !skipIndices.has(i));
274
+ // Snap wire boundaries on the clean view.
275
+ const cleanHeadEnd = snapHeadEnd(cleanMessages, Math.min(headProtect, cleanMessages.length));
276
+ const cleanTailStart = snapTailStart(cleanMessages, Math.max(cleanMessages.length - tailProtect, 0));
277
+ if (cleanTailStart <= cleanHeadEnd) {
278
+ return pruned;
279
+ }
280
+ // Build middle for aux LLM.
281
+ // FIX #2: when a previous summary exists, limit the aux input to
282
+ // the *delta* after the last compaction (messages *including* the
283
+ // sentinel pair are canonical; the delta starts right after the
284
+ // summary assistant). Otherwise use the full clean middle.
285
+ let auxMiddle;
286
+ if (prevCompaction) {
287
+ // Delta: everything after the summary assistant up to (but not including) the tail.
288
+ const deltaStart = prevCompaction.assistantIdx + 1;
289
+ const deltaEnd = messages.length - tailProtect;
290
+ auxMiddle = messages.slice(deltaStart, Math.max(deltaStart, deltaEnd));
291
+ }
292
+ else {
293
+ auxMiddle = cleanMessages.slice(cleanHeadEnd, cleanTailStart);
294
+ }
295
+ // Step 5: aux LLM call.
296
+ if (!opts.auxProvider) {
297
+ logger.warn({ prunedTokens, threshold }, "compressIfNeeded: no auxProvider — prune-only");
298
+ return pruned;
299
+ }
300
+ if (!auxModel) {
301
+ logger.warn({ prunedTokens, threshold }, "compressIfNeeded: no auxModel — prune-only");
264
302
  return pruned;
265
- const notice = "[메모리 압축 실패로 이전 대화 일부가 잘렸습니다. 최근 대화만 모델에 전달됨.]";
266
- if (opts.onEmergencyTrim) {
303
+ }
304
+ // FIX #4: incremental prompt now includes the full ACTIVE_TASK_TEMPLATE
305
+ // so the schema contract is restated every time.
306
+ let summaryText = "";
307
+ let tmpFile;
308
+ // didStartAux / didCompact are now declared at the outermost scope so
309
+ // the outer try/finally at the end of this function fires
310
+ // `onCompactionResult` for every return path, including the fast-paths
311
+ // and short-circuits above this point.
312
+ try {
313
+ didStartAux = true;
267
314
  try {
268
- opts.onEmergencyTrim(notice);
315
+ opts.onCompactionStart?.();
316
+ }
317
+ catch { }
318
+ const maxAuxChars = opts.maxAuxChars ?? 400_000;
319
+ // Linearize first to measure chars, then trim oldest if needed.
320
+ let auxMessages = linearizeForAuxLLM(auxMiddle);
321
+ let auxInputChars = auxMessages.reduce((sum, msg) => sum + (typeof msg.content === "string" ? msg.content.length : 0), 0);
322
+ if (auxInputChars > maxAuxChars && auxMessages.length > 1) {
323
+ // Drop oldest messages until under the cap. Walk forward
324
+ // (deque from front) so we keep the most recent middle.
325
+ const trimmed = [];
326
+ for (let i = 0; i < auxMessages.length; i++) {
327
+ const c = typeof auxMessages[i].content === "string"
328
+ ? auxMessages[i].content.length
329
+ : 0;
330
+ if (auxInputChars - c <= maxAuxChars)
331
+ break;
332
+ auxInputChars -= c;
333
+ trimmed.push(auxMessages[i].content.slice(0, 80));
334
+ }
335
+ auxMessages = auxMessages.slice(trimmed.length);
336
+ logger.info({
337
+ originalChars: auxInputChars + trimmed.reduce((s, t) => s + t.length, 0),
338
+ cappedChars: auxInputChars,
339
+ maxAuxChars,
340
+ droppedMessages: trimmed.length,
341
+ droppedPreviews: trimmed.slice(0, 3),
342
+ }, "compressIfNeeded: aux middle capped");
343
+ }
344
+ // Write linearized transcript to temp file for tool-based chunked reading.
345
+ const tmpDir = join(tmpdir(), ".maestro", "tmp");
346
+ mkdirSync(tmpDir, { recursive: true });
347
+ tmpFile = join(tmpDir, `compaction-${randomUUID()}.txt`);
348
+ const fileText = auxMessages
349
+ .map((m) => `[${m.role}] ${typeof m.content === "string" ? m.content : JSON.stringify(m.content)}`)
350
+ .join("\n");
351
+ writeFileSync(tmpFile, fileText, "utf-8");
352
+ const totalLines = fileText.split("\n").length;
353
+ const readTool = {
354
+ name: "read_compaction_log",
355
+ description: `Read a chunk of the compaction log file. The file contains ${totalLines} lines of linearized conversation messages.
356
+ Each line is prefixed with the role: [user], [assistant], [tool_result id=...], etc.
357
+ Use offset (1-based line number) and limit to read portions sequentially. Start from offset 1 with limit 300, then continue with offset = previous offset + limit until done. When you have enough context, stop reading and provide your summary.`,
358
+ input_schema: {
359
+ type: "object",
360
+ properties: {
361
+ offset: { type: "number", description: "Line number to start from (1-based)" },
362
+ limit: {
363
+ type: "number",
364
+ description: "Number of lines to read (default 300, max 500)",
365
+ },
366
+ },
367
+ required: ["offset"],
368
+ },
369
+ };
370
+ // Mini tool loop: aux reads file in chunks and produces summary.
371
+ const basePrompt = previousSummary
372
+ ? incrementalPrompt(previousSummary)
373
+ : ACTIVE_TASK_TEMPLATE;
374
+ // Append the guided-compaction focus directive (if the loop supplied one).
375
+ // Used by both the tool-loop call and the single-call fallback below.
376
+ const systemPrompt = opts.focusTopic
377
+ ? `${basePrompt}${focusInstruction(opts.focusTopic)}`
378
+ : basePrompt;
379
+ const loopMessages = [
380
+ {
381
+ role: "user",
382
+ content: `A conversation log has been saved to a file. Use the read_compaction_log tool to read it in chunks and produce a comprehensive summary.
383
+
384
+ Instructions:
385
+ 1. Start reading from offset 1 with limit 300.
386
+ 2. Continue reading chunks until you have full context.
387
+ 3. When you've read enough, stop calling the tool and provide your summary.
388
+ 4. If the log is too long, prioritize the most recent messages.
389
+
390
+ ${previousSummary ? `Previous summary for context:\n${previousSummary}` : ""}`,
391
+ },
392
+ ];
393
+ // Fix #4 (maestro review 2026-05-25): split file lines ONCE, outside the
394
+ // tool loop. The previous code re-split fileText for every tool call,
395
+ // which on a long log (multi-MB) wasted a full O(n) string walk every
396
+ // round.
397
+ const fileLines = fileText.split("\n");
398
+ // Fix #2 (maestro review 2026-05-25): hard caps on the aux tool loop so
399
+ // a long log can't blow up request bodies round after round.
400
+ //
401
+ // Each round, the previous chunk(s) ride along inside `loopMessages` as
402
+ // tool_result blocks the aux LLM already saw. Without a cap a 100-round
403
+ // run on a multi-MB log re-sends every prior chunk on every call —
404
+ // exactly the failure mode that prompted v0.1.31's file-based design.
405
+ //
406
+ // The hard caps below cause an early "produce summary now" signal:
407
+ // - MAX_TOTAL_READ_CHARS: stop offering more file content once the
408
+ // aux LLM has read this many bytes across all rounds.
409
+ // - MAX_ACCUMULATED_TOOL_RESULT_CHARS: stop offering more file
410
+ // content once the loopMessages tool_results approach the cap.
411
+ // - MAX_ROUNDS: existing absolute ceiling.
412
+ //
413
+ // When any cap trips we replace the next tool_result with an explicit
414
+ // instruction ("you've read enough — emit the summary now") instead of
415
+ // more raw lines. The aux LLM then has to choose between honoring the
416
+ // instruction or being treated as "no summary" on the next round (in
417
+ // which case we fall through to emergencyTail with a clear log).
418
+ const MAX_ROUNDS = 15;
419
+ const MAX_TOTAL_READ_CHARS = 800_000;
420
+ const MAX_ACCUMULATED_TOOL_RESULT_CHARS = 600_000;
421
+ let round = 0;
422
+ let totalReadChars = 0;
423
+ let accumulatedToolResultChars = 0;
424
+ let capExhausted = false;
425
+ for (; round < MAX_ROUNDS; round++) {
426
+ if (opts.abortSignal?.aborted) {
427
+ throw new Error("aborted");
428
+ }
429
+ // v0.1.31 H7 (maestro review 2026-05-25): aux tool-loop kept failing on
430
+ // reasoning models. Two failure shapes observed in prod:
431
+ //
432
+ // - DeepSeek `deepseek-v4-flash`: `stopReason: "max_tokens"` with
433
+ // empty content for every round 7+, 15 rounds straight → "aux LLM
434
+ // did not produce summary after max rounds". The 2048-token cap
435
+ // was being burned by the model's internal reasoning before any
436
+ // visible text or tool_call could land.
437
+ // - Codex `gpt-5.4-mini`: `stopReason: "end_turn"` with empty content
438
+ // after 107s of reasoning. Codex 5.x reasoning models always run
439
+ // some reasoning even when the caller omits `reasoning.effort`
440
+ // (they default to medium internally); the 2048 cap left no
441
+ // visible-output budget after that.
442
+ //
443
+ // Fix: pass `effort: "low"` to nudge both providers to the cheapest
444
+ // reasoning tier (deepseek `body.thinking` stays enabled but at low,
445
+ // codex `reasoning.effort = "low"`), and bump `maxTokens` to 8192 so
446
+ // there's headroom for visible output after the reasoning pass. The
447
+ // aux summary itself is bounded by MAX_ROUNDS × per-round budget, so
448
+ // 8192 doesn't blow up cost — but it leaves room for the model to
449
+ // actually emit the tool_call / summary text it owes us.
450
+ const auxResponse = await opts.auxProvider.complete({
451
+ model: auxModel,
452
+ messages: loopMessages,
453
+ system: systemPrompt,
454
+ tools: [readTool],
455
+ maxTokens: 8192,
456
+ effort: "low",
457
+ ...(opts.abortSignal ? { abortSignal: opts.abortSignal } : {}),
458
+ });
459
+ // Append assistant message to loop
460
+ loopMessages.push({ role: "assistant", content: auxResponse.content });
461
+ const toolUses = auxResponse.content.filter((c) => c.type === "tool_use");
462
+ if (toolUses.length === 0) {
463
+ // No more tool calls — extract text as summary
464
+ summaryText = extractText(auxResponse.content).trim();
465
+ if (summaryText)
466
+ break;
467
+ // Empty text + no tools → retry
468
+ logger.warn({ round, stopReason: auxResponse.stopReason }, "compressIfNeeded: empty round, retrying");
469
+ continue;
470
+ }
471
+ // Process tool calls
472
+ const toolResults = [];
473
+ for (const tu of toolUses) {
474
+ if (tu.name === "read_compaction_log") {
475
+ // Fix #5 (maestro review 2026-05-25): explicit Number/isFinite/
476
+ // floor/clamp for the model-supplied inputs. NaN, decimal,
477
+ // negative, string offsets used to slip through `|| 1` /
478
+ // `Math.min(..., 500)` and produce surprising slices.
479
+ const rawOffset = Number(tu.input.offset);
480
+ const offset = Number.isFinite(rawOffset) ? Math.max(1, Math.floor(rawOffset)) : 1;
481
+ const rawLimit = Number(tu.input.limit);
482
+ const limit = Number.isFinite(rawLimit)
483
+ ? Math.min(Math.max(1, Math.floor(rawLimit)), 500)
484
+ : 300;
485
+ let chunk;
486
+ if (capExhausted) {
487
+ // Cap already tripped on an earlier round — keep refusing
488
+ // until the aux LLM emits the summary or we run out of rounds.
489
+ chunk =
490
+ "[compaction log: read budget exhausted — produce the summary now from the chunks already read; further reads will return this same message]";
491
+ }
492
+ else {
493
+ const start = Math.max(0, offset - 1);
494
+ const end = Math.min(fileLines.length, start + limit);
495
+ chunk = fileLines.slice(start, end).join("\n");
496
+ if (!chunk)
497
+ chunk = "(end of file)";
498
+ totalReadChars += chunk.length;
499
+ if (totalReadChars >= MAX_TOTAL_READ_CHARS ||
500
+ accumulatedToolResultChars + chunk.length >= MAX_ACCUMULATED_TOOL_RESULT_CHARS) {
501
+ capExhausted = true;
502
+ chunk +=
503
+ "\n\n[compaction log: read budget exhausted — produce the summary now from this and prior chunks; further read_compaction_log calls will be refused]";
504
+ logger.info({
505
+ round,
506
+ totalReadChars,
507
+ accumulatedToolResultChars: accumulatedToolResultChars + chunk.length,
508
+ MAX_TOTAL_READ_CHARS,
509
+ MAX_ACCUMULATED_TOOL_RESULT_CHARS,
510
+ }, "compressIfNeeded: aux read budget exhausted — instructing summary");
511
+ }
512
+ }
513
+ accumulatedToolResultChars += chunk.length;
514
+ toolResults.push({
515
+ type: "tool_result",
516
+ tool_use_id: tu.id,
517
+ content: chunk,
518
+ });
519
+ }
520
+ else {
521
+ toolResults.push({
522
+ type: "tool_result",
523
+ tool_use_id: tu.id,
524
+ content: `Unknown tool: ${tu.name}`,
525
+ is_error: true,
526
+ });
527
+ }
528
+ }
529
+ loopMessages.push({ role: "user", content: toolResults });
530
+ }
531
+ if (!summaryText) {
532
+ logger.warn({
533
+ model: auxModel,
534
+ rounds: round,
535
+ auxInput: {
536
+ middleMessages: auxMiddle.length,
537
+ linearizedMessages: auxMessages.length,
538
+ chars: auxInputChars,
539
+ },
540
+ }, "compressIfNeeded: aux tool-loop produced no summary — trying single-call fallback");
541
+ const fallbackResponse = await opts.auxProvider.complete({
542
+ model: auxModel,
543
+ messages: [
544
+ {
545
+ role: "user",
546
+ content: [
547
+ "Update the cumulative conversation summary from the transcript below.",
548
+ "Prioritize durable user requirements, decisions, pending work, files, and recent context.",
549
+ "Return only the structured summary required by the system prompt.",
550
+ "",
551
+ "<conversation-transcript>",
552
+ fileText,
553
+ "</conversation-transcript>",
554
+ ].join("\n"),
555
+ },
556
+ ],
557
+ system: systemPrompt,
558
+ maxTokens: 8192,
559
+ effort: "low",
560
+ ...(opts.abortSignal ? { abortSignal: opts.abortSignal } : {}),
561
+ });
562
+ summaryText = extractText(fallbackResponse.content).trim();
563
+ }
564
+ if (!summaryText) {
565
+ logger.warn({
566
+ model: auxModel,
567
+ rounds: round,
568
+ auxInput: {
569
+ middleMessages: auxMiddle.length,
570
+ linearizedMessages: auxMessages.length,
571
+ chars: auxInputChars,
572
+ },
573
+ }, "compressIfNeeded: aux LLM did not produce summary after max rounds");
574
+ throw new Error("aux LLM did not produce a summary");
575
+ }
576
+ // ─── Build / persist compaction (moved inside try for finally meta) ───
577
+ // The post-aux success path used to live below the try/catch/finally,
578
+ // which meant `finally` fired before `didCompact` could be set →
579
+ // onCompactionResult always reported `didCompact: false` on success.
580
+ // Moving it inside the try lets the meta callback see the truth.
581
+ const tail = cleanMessages.slice(cleanTailStart);
582
+ const compacted = buildCompactedWire(cleanMessages, summaryText, headProtect, tail);
583
+ const compactedTokens = estimateTokens(compacted);
584
+ // Degenerate check — MUST run before persisting compaction blocks.
585
+ const savings = prunedTokens - compactedTokens;
586
+ const ratio = savings / prunedTokens;
587
+ if (ratio < COMPACTOR_MIN_SAVINGS_RATIO) {
588
+ logger.info({
589
+ prunedTokens,
590
+ compactedTokens,
591
+ ratio,
592
+ }, "compressIfNeeded: low savings — discarding compacted result");
593
+ return pruned;
594
+ }
595
+ // Persist compaction blocks AFTER savings gate.
596
+ if (prevCompaction) {
597
+ messages[prevCompaction.userIdx] = { role: "user", content: COMPACTION_MARKER };
598
+ messages[prevCompaction.assistantIdx] = { role: "assistant", content: summaryText };
599
+ }
600
+ else {
601
+ messages.push({ role: "user", content: COMPACTION_MARKER });
602
+ messages.push({ role: "assistant", content: summaryText });
603
+ }
604
+ try {
605
+ opts.onCompactionSummary?.(summaryText);
269
606
  }
270
607
  catch (cbErr) {
271
- logger.warn({ err: cbErr }, "onEmergencyTrim threw — swallowed");
608
+ logger.warn({ err: cbErr }, "onCompactionSummary threw — swallowed");
609
+ }
610
+ didCompact = true;
611
+ logger.info({
612
+ prunedTokens,
613
+ compactedTokens,
614
+ ratio,
615
+ incremental: !!previousSummary,
616
+ auxMiddleSize: auxMiddle.length,
617
+ }, "compressIfNeeded: applied compaction");
618
+ return compacted;
619
+ }
620
+ catch (err) {
621
+ logger.warn({ err, prunedTokens, threshold }, "compressIfNeeded: aux LLM failed");
622
+ if (opts.disablePruneFallback)
623
+ return messages;
624
+ const fallbackSummary = opts.lastGoodSummary?.trim() || previousSummary;
625
+ if (fallbackSummary?.trim()) {
626
+ const tail = cleanMessages.slice(cleanTailStart);
627
+ const fallback = buildCompactedWire(cleanMessages, fallbackSummary, headProtect, tail);
628
+ logger.info({
629
+ prunedTokens,
630
+ fallbackTokens: estimateTokens(fallback),
631
+ summaryChars: fallbackSummary.length,
632
+ source: opts.lastGoodSummary?.trim() ? "sidecar" : "prior-compaction",
633
+ }, "compressIfNeeded: using last-good memory summary after aux failure");
634
+ return fallback;
635
+ }
636
+ const target = opts.emergencyTargetTokens;
637
+ const effectiveTarget = target !== undefined && Number.isFinite(target) && target > 0 ? target : 50_000;
638
+ if (target === 0)
639
+ return pruned;
640
+ const notice = "[메모리 압축 실패로 이전 대화 일부가 잘렸습니다. 최근 대화만 모델에 전달됨.]";
641
+ if (opts.onEmergencyTrim) {
642
+ try {
643
+ opts.onEmergencyTrim(notice);
644
+ }
645
+ catch (cbErr) {
646
+ logger.warn({ err: cbErr }, "onEmergencyTrim threw — swallowed");
647
+ }
648
+ }
649
+ return emergencyTail(pruned, effectiveTarget, notice);
650
+ }
651
+ finally {
652
+ if (tmpFile) {
653
+ try {
654
+ unlinkSync(tmpFile);
655
+ }
656
+ catch { }
272
657
  }
273
658
  }
274
- return emergencyTail(pruned, effectiveTarget, notice);
275
- }
276
- // Build wire from clean messages (FIX #1).
277
- const head = cleanMessages.slice(0, cleanHeadEnd);
278
- const tail = cleanMessages.slice(cleanTailStart);
279
- // H2 defense (2026-05-24): if head ends with a user message and the
280
- // summary user is prepended directly after it, we create a user-user
281
- // consecutive pattern that some providers reject. Insert a dummy
282
- // assistant to restore the alternating-role invariant.
283
- const headEndsUser = head.length > 0 && head[head.length - 1].role === "user";
284
- const compacted = [
285
- ...head,
286
- ...(headEndsUser
287
- ? [{ role: "assistant", content: [{ type: "text", text: "" }] }]
288
- : []),
289
- { role: "user", content: wrapCompactedSummary(summaryText) },
290
- ...tail,
291
- ];
292
- const compactedTokens = estimateTokens(compacted);
293
- // Degenerate check — MUST run before persisting compaction blocks (FIX #3).
294
- const savings = prunedTokens - compactedTokens;
295
- const ratio = savings / prunedTokens;
296
- if (ratio < COMPACTOR_MIN_SAVINGS_RATIO) {
297
- const next = state ?? { failedCompactions: 0 };
298
- next.failedCompactions++;
299
- compactorAntiThrash.set(messages, next);
300
- logger.info({
301
- prunedTokens,
302
- compactedTokens,
303
- ratio,
304
- failedCompactions: next.failedCompactions,
305
- }, "compressIfNeeded: low savings — anti-thrash incremented");
306
- return pruned;
307
- }
308
- // Step 6: persist compaction blocks AFTER savings gate (FIX #3).
309
- if (prevCompaction) {
310
- messages[prevCompaction.userIdx] = { role: "user", content: COMPACTION_MARKER };
311
- messages[prevCompaction.assistantIdx] = { role: "assistant", content: summaryText };
312
659
  }
313
- else {
314
- messages.push({ role: "user", content: COMPACTION_MARKER });
315
- messages.push({ role: "assistant", content: summaryText });
660
+ finally {
661
+ // Outermost: truthful per-call status meta. Fires exactly once for
662
+ // every return path fast-path skips, prune-only,
663
+ // emergencyTail fallback, AND the successful compaction path. The
664
+ // host (loop.ts) uses {didStartAux, didCompact} to decide whether
665
+ // to surface "🔄 압축 완료" without falsely reporting it for turns
666
+ // that took a short-circuit. See CompressOptions.onCompactionResult.
667
+ if (opts.onCompactionResult) {
668
+ try {
669
+ opts.onCompactionResult({ didStartAux, didCompact });
670
+ }
671
+ catch (cbErr) {
672
+ logger.warn({ err: cbErr }, "onCompactionResult threw — swallowed");
673
+ }
674
+ }
316
675
  }
317
- if (state)
318
- compactorAntiThrash.delete(messages);
319
- logger.info({
320
- prunedTokens,
321
- compactedTokens,
322
- ratio,
323
- incremental: !!previousSummary,
324
- auxMiddleSize: auxMiddle.length,
325
- }, "compressIfNeeded: applied compaction");
326
- return compacted;
327
676
  }
328
677
  // ─── helpers ──────────────────────────────────────────────────────────────
329
678
  /**
@@ -400,11 +749,15 @@ function snapHeadEnd(messages, idealEnd) {
400
749
  * Skips user messages that are tool_result carriers (FIX #6).
401
750
  */
402
751
  function snapTailStart(messages, idealStart) {
403
- const floor = Math.max(0, idealStart - 4);
404
- let i = Math.max(idealStart, 0);
405
- while (i > floor &&
406
- messages[i] &&
407
- (messages[i].role !== "user" || hasToolResultBlocks(messages[i]))) {
752
+ let i = Math.min(Math.max(idealStart, 0), messages.length);
753
+ // The tail is spliced directly after the synthetic summary user message.
754
+ // It must therefore start at a boundary that cannot introduce orphaned
755
+ // tool_result/function_call_output blocks. The old implementation only
756
+ // searched back four messages; long tool rounds can exceed that window and
757
+ // return a user(tool_result) boundary anyway, which Codex rejects with
758
+ // "No tool call found for function call output".
759
+ while (i > 0 &&
760
+ (!messages[i] || messages[i].role !== "user" || hasToolResultBlocks(messages[i]))) {
408
761
  i--;
409
762
  }
410
763
  return i;
@@ -429,20 +782,60 @@ function emergencyTail(messages, targetTokens, notice) {
429
782
  }
430
783
  // FIX #5: if history fits entirely within target, return full history.
431
784
  if (!reachedThreshold) {
432
- return [{ role: "user", content: `<emergency-truncation>\n${notice}\n</emergency-truncation>` }, ...messages];
785
+ return [
786
+ { role: "user", content: `<emergency-truncation>\n${notice}\n</emergency-truncation>` },
787
+ ...messages,
788
+ ];
433
789
  }
434
- // Snap cut to a safe user message boundary.
790
+ // Snap cut to a safe user message boundary. Must land on a *plain* user
791
+ // (no tool_result blocks) so the tail doesn't start with orphaned
792
+ // function_call_output items whose matching tool_use was cut off.
793
+ // This mirrors the H1 pattern in snapHeadEnd (v0.1.29).
435
794
  while (cut < messages.length && messages[cut]?.role !== "user")
436
795
  cut++;
437
796
  if (cut >= messages.length)
438
797
  cut = messages.length - 1;
439
798
  while (cut > 0 && messages[cut]?.role !== "user")
440
799
  cut--;
800
+ // H3 defense: skip tool_result-carrying users to avoid Codex/Anthropic 400.
801
+ while (cut > 0 && hasToolResultBlocks(messages[cut])) {
802
+ cut--;
803
+ while (cut > 0 && messages[cut]?.role !== "user")
804
+ cut--;
805
+ }
441
806
  const tail = messages.slice(cut);
442
- return [{ role: "user", content: `<emergency-truncation>\n${notice}\n</emergency-truncation>` }, ...tail];
443
- }
444
- /** Test-only: reset the compactor anti-thrash WeakMap entry for an array. */
445
- export function __resetCompactorState(messages) {
446
- compactorAntiThrash.delete(messages);
807
+ // H3 post-condition: if the tail starts with a tool_result user despite
808
+ // the backward walk (corner case — every user message carries results),
809
+ // drop the tool_result blocks so the wire doesn't 400.
810
+ const sanitized = tail.length > 0 && hasToolResultBlocks(tail[0])
811
+ ? [
812
+ {
813
+ role: "user",
814
+ content: [
815
+ {
816
+ type: "text",
817
+ text: "[truncated: tool results stripped to avoid orphaned function_call_output]",
818
+ },
819
+ ],
820
+ },
821
+ ...tail.slice(1),
822
+ ]
823
+ : tail;
824
+ // H2 defense: the emergency notice is always a user message, and the
825
+ // boundary snap above guarantees tail[0] is also a user. This creates
826
+ // a user-user consecutive pattern that DeepSeek/Codex may reject.
827
+ // Insert a dummy assistant text block to restore alternating-role order.
828
+ const noticeMsg = {
829
+ role: "user",
830
+ content: `<emergency-truncation>\n${notice}\n</emergency-truncation>`,
831
+ };
832
+ const tailStartsUser = sanitized.length > 0 && sanitized[0].role === "user";
833
+ return [
834
+ noticeMsg,
835
+ ...(tailStartsUser
836
+ ? [{ role: "assistant", content: [{ type: "text", text: "" }] }]
837
+ : []),
838
+ ...sanitized,
839
+ ];
447
840
  }
448
841
  //# sourceMappingURL=compressor.js.map