pi-ui-extend 0.1.38 → 0.1.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/dist/app/app.d.ts +0 -1
  2. package/dist/app/app.js +22 -21
  3. package/dist/app/input/input-controller.d.ts +1 -0
  4. package/dist/app/input/input-controller.js +40 -12
  5. package/dist/app/model/model-usage-status.js +4 -2
  6. package/dist/app/session/request-history.js +2 -0
  7. package/dist/app/session/session-event-controller.d.ts +13 -0
  8. package/dist/app/session/session-event-controller.js +27 -0
  9. package/dist/app/session/tabs-controller.d.ts +8 -0
  10. package/dist/app/session/tabs-controller.js +37 -6
  11. package/dist/app/workspace/workspace-actions-controller.d.ts +1 -0
  12. package/dist/app/workspace/workspace-actions-controller.js +2 -1
  13. package/dist/bundled-extensions/terminal-bell/index.js +1 -1
  14. package/dist/markdown-format.js +14 -25
  15. package/dist/terminal-width.d.ts +14 -0
  16. package/dist/terminal-width.js +31 -2
  17. package/dist/theme.js +2 -2
  18. package/external/pi-tools-suite/README.md +34 -9
  19. package/external/pi-tools-suite/package.json +3 -3
  20. package/external/pi-tools-suite/src/async-subagents/async-subagents.sample.jsonc +35 -21
  21. package/external/pi-tools-suite/src/async-subagents/commands.ts +1 -1
  22. package/external/pi-tools-suite/src/async-subagents/core/agent-strategy.ts +2 -2
  23. package/external/pi-tools-suite/src/async-subagents/core/config.ts +70 -12
  24. package/external/pi-tools-suite/src/async-subagents/core/routing.ts +1 -1
  25. package/external/pi-tools-suite/src/async-subagents/core/spawn.ts +1 -1
  26. package/external/pi-tools-suite/src/async-subagents/core/types.ts +1 -1
  27. package/external/pi-tools-suite/src/async-subagents/index.ts +6 -6
  28. package/external/pi-tools-suite/src/async-subagents/lib.ts +1 -1
  29. package/external/pi-tools-suite/src/async-subagents/tools/spawn.ts +4 -2
  30. package/external/pi-tools-suite/src/async-subagents/tools/subagents.ts +2 -2
  31. package/external/pi-tools-suite/src/{glm-coding-discipline → coding-discipline}/index.ts +17 -8
  32. package/external/pi-tools-suite/src/config.ts +1 -1
  33. package/external/pi-tools-suite/src/dcp/auto-compress.ts +368 -0
  34. package/external/pi-tools-suite/src/dcp/compress-tool.ts +3 -0
  35. package/external/pi-tools-suite/src/dcp/config.ts +23 -0
  36. package/external/pi-tools-suite/src/dcp/index.ts +112 -7
  37. package/external/pi-tools-suite/src/dcp/prompts.ts +8 -0
  38. package/external/pi-tools-suite/src/dcp/state.ts +41 -0
  39. package/external/pi-tools-suite/src/default-pi-tools-suite-config.ts +30 -22
  40. package/external/pi-tools-suite/src/index.ts +2 -1
  41. package/external/pi-tools-suite/src/session-name/index.ts +37 -0
  42. package/external/pi-tools-suite/src/tool-descriptions.ts +16 -4
  43. package/package.json +4 -4
@@ -15,7 +15,7 @@ export interface PiToolsSuiteConfig {
15
15
  enabled: boolean;
16
16
  disabledModules: string[];
17
17
  todoThinking: boolean;
18
- /** Vision-capable model used by the GLM lookup tool; unset disables lookup. */
18
+ /** Vision-capable model used by the coding-discipline lookup tool; unset disables lookup. */
19
19
  lookupModel?: string;
20
20
  telegramMirror?: TelegramMirrorConfig;
21
21
  }
@@ -0,0 +1,368 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Dynamic Context Pruning (DCP) — auto-compress fallback
3
+ //
4
+ // When a model ignores repeated context-strong nudges above the emergency
5
+ // threshold (observed with gpt-5.5 in session 019edfe3: 59 strong nudges,
6
+ // 0 compress calls), DCP creates a compression block itself instead of
7
+ // waiting for the model. This is the model-independent safety net.
8
+ //
9
+ // Lossy and irreversible within a session; disabled by default and gated by a
10
+ // patience counter + the emergency threshold. The summary can be produced
11
+ // either by a deterministic programmatic digest (default) or by a configured
12
+ // list of summarizer models (e.g. a cheap model like zai/glm-5.2), with
13
+ // automatic fallback to the programmatic digest on any failure/timeout.
14
+ // ---------------------------------------------------------------------------
15
+
16
+ import { complete } from "@earendil-works/pi-ai"
17
+ import type { Model, Api } from "@earendil-works/pi-ai"
18
+ import type { DcpState } from "./state.js"
19
+ import type { DcpConfig } from "./config.js"
20
+ import type { CompressionCandidate } from "./pruner-types.js"
21
+ import {
22
+ createRangeCompressionBlock,
23
+ resolveAnchorBoundary,
24
+ } from "./compression-blocks.js"
25
+
26
+ /**
27
+ * Pure decision: should the auto-compress fallback fire this pass?
28
+ *
29
+ * Fires when ALL hold:
30
+ * - the master switch `autoCompress.enabled` is on,
31
+ * - the model has ignored at least `patience` consecutive context-strong
32
+ * nudges (`consecutiveIgnoredStrongNudges > patience` — the model gets
33
+ * `patience` genuine strong chances before DCP takes over),
34
+ * - context is still above the emergency threshold (maxContextPercent),
35
+ * - a safe compression candidate exists outside the recent turns.
36
+ */
37
+ export function decideAutoCompress(
38
+ state: DcpState,
39
+ config: DcpConfig,
40
+ contextPercent: number,
41
+ maxContextPercent: number,
42
+ candidate: CompressionCandidate | null,
43
+ ): { shouldFire: boolean; reason: string } {
44
+ const settings = config.compress.autoCompress
45
+ if (!settings?.enabled) return { shouldFire: false, reason: "disabled" }
46
+ if (state.consecutiveIgnoredStrongNudges <= settings.patience) {
47
+ return { shouldFire: false, reason: "below-patience" }
48
+ }
49
+ if (!(contextPercent > maxContextPercent)) {
50
+ return { shouldFire: false, reason: "below-emergency-threshold" }
51
+ }
52
+ if (!candidate) return { shouldFire: false, reason: "no-candidate" }
53
+ return { shouldFire: true, reason: "ignored-strongs" }
54
+ }
55
+
56
+ /** Flatten a single message's content blocks into plain text. */
57
+ function messageToText(message: any): string {
58
+ const content = message?.content
59
+ if (typeof content === "string") return content
60
+ if (!Array.isArray(content)) return ""
61
+ return content
62
+ .map((block: any) => {
63
+ if (typeof block === "string") return block
64
+ if (block?.type === "text") return block.text ?? ""
65
+ if (block?.type === "toolCall") {
66
+ const name = block.name ?? block.function?.name ?? "tool"
67
+ return `[tool call: ${name}]`
68
+ }
69
+ if (block?.type === "toolResult" || block?.role === "toolResult") {
70
+ return block.text ?? ""
71
+ }
72
+ return ""
73
+ })
74
+ .join("\n")
75
+ .trim()
76
+ }
77
+
78
+ /** Extract a short tool-usage digest from messages in the range. */
79
+ function toolUsageDigest(messages: any[]): string {
80
+ const counts = new Map<string, number>()
81
+ for (const msg of messages) {
82
+ const content = msg?.content
83
+ if (!Array.isArray(content)) continue
84
+ for (const block of content) {
85
+ if (block?.type === "toolCall" && typeof block.name === "string") {
86
+ counts.set(block.name, (counts.get(block.name) ?? 0) + 1)
87
+ }
88
+ }
89
+ }
90
+ if (counts.size === 0) return ""
91
+ const entries = [...counts.entries()].sort((a, b) => b[1] - a[1])
92
+ return entries.map(([name, n]) => `${name}×${n}`).join(", ")
93
+ }
94
+
95
+ /**
96
+ * Deterministic, model-free summary of the compressed range. Deliberately
97
+ * short: `createRangeCompressionBlock` appends protected user messages and
98
+ * protected tool outputs on top of this, so the digest itself only needs to
99
+ * label the slice and record the tool-call shape.
100
+ */
101
+ export function buildProgrammaticSummary(
102
+ topic: string,
103
+ candidate: CompressionCandidate,
104
+ messagesInRange: any[],
105
+ ): string {
106
+ const toolDigest = toolUsageDigest(messagesInRange)
107
+ const lines = [
108
+ `[Auto-compressed by DCP — model did not compress after repeated context-strong nudges]`,
109
+ `Topic: ${topic}`,
110
+ `Range: ${candidate.startId}..${candidate.endId} (${candidate.messageCount} messages, ~${candidate.estimatedTokens} tokens)`,
111
+ ]
112
+ if (toolDigest) lines.push(`Tool calls in range: ${toolDigest}`)
113
+ lines.push(
114
+ `This slice was summarized automatically to protect the context window. Protected user messages and tool outputs are preserved below by the compression block.`,
115
+ )
116
+ return lines.join("\n")
117
+ }
118
+
119
+ const SUMMARIZER_SYSTEM_PROMPT = `You summarize a slice of a coding agent's conversation so it can replace the raw messages in context. Produce a dense, continuation-focused summary: preserve user intent, decisions made, files/symbols changed or inspected, exact errors still actionable, verification status, and next steps. Drop full logs, repeated output, and incidental detail. Be concise (roughly 4-10 bullets). Output ONLY the summary text, no preamble.`
120
+
121
+ /** Outcome of one summarizer-model attempt, surfaced in DCP debug logs. */
122
+ export interface ModelSummaryAttempt {
123
+ ref: string
124
+ outcome: "ok" | "no-model" | "no-auth" | "empty" | "error"
125
+ error?: string
126
+ }
127
+
128
+ /** Result of {@link generateModelSummary}: optional text plus per-model attempts. */
129
+ export interface ModelSummaryResult {
130
+ text?: string
131
+ /** Model ref that produced {@link text}, if any. */
132
+ usedModelRef?: string
133
+ /** One entry per model ref tried, in order, for debug visibility. */
134
+ attempts: ModelSummaryAttempt[]
135
+ }
136
+
137
+ /**
138
+ * Try to produce a model-generated summary by calling each model in
139
+ * `modelRefs` in order. On success returns `{ text, usedModelRef, attempts }`;
140
+ * if every model fails, returns `{ attempts }` with `text` undefined so the
141
+ * caller falls back to the programmatic digest while still recording which
142
+ * models were tried and why.
143
+ *
144
+ * Never throws: a summarizer failure must never block the agent — the
145
+ * programmatic digest is always available as a floor.
146
+ */
147
+ export async function generateModelSummary(
148
+ modelRefs: string[],
149
+ modelRegistry: any,
150
+ signal: AbortSignal | undefined,
151
+ topic: string,
152
+ messagesInRange: any[],
153
+ timeoutMs: number,
154
+ ): Promise<ModelSummaryResult> {
155
+ const attempts: ModelSummaryAttempt[] = []
156
+ if (!modelRefs || modelRefs.length === 0) return { attempts }
157
+ if (!modelRegistry || typeof modelRegistry.find !== "function" || typeof modelRegistry.getApiKeyAndHeaders !== "function") {
158
+ return { attempts }
159
+ }
160
+
161
+ // Build a compact transcript from the range. Cap token budget so the
162
+ // summarizer call stays cheap and bounded.
163
+ const transcript = messagesInRange
164
+ .map((msg, i) => {
165
+ const role = msg?.role ?? "message"
166
+ return `### ${role} #${i + 1}\n${messageToText(msg)}`
167
+ })
168
+ .join("\n\n")
169
+ const userPrompt = `Summarize this conversation slice (topic: ${topic}).\n\nTranscript:\n${transcript}`
170
+
171
+ let lastError: unknown
172
+ for (const ref of modelRefs) {
173
+ const parsed = parseModelRef(ref)
174
+ if (!parsed) continue
175
+ const model: Model<Api> | undefined = modelRegistry.find(parsed.provider, parsed.id)
176
+ if (!model) {
177
+ attempts.push({ ref, outcome: "no-model" })
178
+ continue
179
+ }
180
+
181
+ let auth
182
+ try {
183
+ auth = await modelRegistry.getApiKeyAndHeaders(model)
184
+ } catch (error) {
185
+ lastError = error
186
+ attempts.push({ ref, outcome: "no-auth", error: error instanceof Error ? error.message : String(error) })
187
+ continue
188
+ }
189
+ if (!auth?.ok || !auth.apiKey) {
190
+ attempts.push({ ref, outcome: "no-auth" })
191
+ continue
192
+ }
193
+
194
+ // Combine the agent signal with a local timeout so a slow summarizer
195
+ // cannot stall the context event indefinitely.
196
+ const controller = new AbortController()
197
+ const timer = setTimeout(() => controller.abort(), Math.max(1000, timeoutMs))
198
+ const onParentAbort = () => controller.abort()
199
+ if (signal) {
200
+ if (signal.aborted) controller.abort()
201
+ else signal.addEventListener("abort", onParentAbort, { once: true })
202
+ }
203
+
204
+ try {
205
+ const result = await complete(
206
+ model,
207
+ { systemPrompt: SUMMARIZER_SYSTEM_PROMPT, messages: [{ role: "user", content: userPrompt, timestamp: Date.now() }] },
208
+ {
209
+ apiKey: auth.apiKey,
210
+ headers: auth.headers,
211
+ env: auth.env,
212
+ signal: controller.signal,
213
+ maxRetries: 0,
214
+ } as any,
215
+ )
216
+ const text = extractAssistantText(result)
217
+ if (text) {
218
+ attempts.push({ ref, outcome: "ok" })
219
+ return { text, usedModelRef: ref, attempts }
220
+ }
221
+ attempts.push({ ref, outcome: "empty" })
222
+ } catch (error) {
223
+ lastError = error
224
+ attempts.push({ ref, outcome: "error", error: error instanceof Error ? error.message : String(error) })
225
+ // try next model in the fallback list
226
+ } finally {
227
+ clearTimeout(timer)
228
+ if (signal) signal.removeEventListener("abort", onParentAbort)
229
+ }
230
+ }
231
+
232
+ if (lastError) {
233
+ // Swallowed on purpose: callers use the programmatic digest floor.
234
+ }
235
+ return { attempts }
236
+ }
237
+
238
+ function extractAssistantText(result: any): string | undefined {
239
+ const content = result?.content
240
+ if (!Array.isArray(content)) return undefined
241
+ const text = content
242
+ .filter((block: any) => block?.type === "text" && typeof block.text === "string")
243
+ .map((block: any) => block.text)
244
+ .join("\n")
245
+ .trim()
246
+ return text.length > 0 ? text : undefined
247
+ }
248
+
249
+ function parseModelRef(ref: string): { provider: string; id: string } | undefined {
250
+ const trimmed = ref.trim()
251
+ const slash = trimmed.lastIndexOf("/")
252
+ if (slash <= 0 || slash === trimmed.length - 1) return undefined
253
+ return { provider: trimmed.slice(0, slash), id: trimmed.slice(slash + 1) }
254
+ }
255
+
256
+ export interface CreateAutoCompressionBlockOptions {
257
+ candidate: CompressionCandidate
258
+ topic: string
259
+ state: DcpState
260
+ config: DcpConfig
261
+ messages: any[]
262
+ modelRegistry?: any
263
+ signal?: AbortSignal
264
+ }
265
+
266
+ export interface AutoCompressionResult {
267
+ blockId: number
268
+ summaryMode: "programmatic" | "model" | "programmatic_fallback"
269
+ summaryTokens: number
270
+ removedTokenEstimate: number
271
+ /** Model ref that produced the summary; set only when `summaryMode === "model"`. */
272
+ summarizerModelRef?: string
273
+ /** Per-model attempts, surfaced for DCP debug visibility on fallback. */
274
+ summarizerAttempts?: ModelSummaryAttempt[]
275
+ }
276
+
277
+ /**
278
+ * Create the auto-compression block. Selects the summary source based on
279
+ * `config.compress.autoCompress.summarizerModel`: empty → programmatic digest;
280
+ * non-empty → model summary with programmatic fallback. Then delegates block
281
+ * creation to the shared `createRangeCompressionBlock` path so protected
282
+ * content (user messages, tool outputs, prompt info) is handled identically to
283
+ * a model-initiated compress.
284
+ */
285
+ export async function createAutoCompressionBlock(
286
+ options: CreateAutoCompressionBlockOptions,
287
+ ): Promise<AutoCompressionResult> {
288
+ const { candidate, topic, state, config, messages, modelRegistry, signal } = options
289
+ const settings = config.compress.autoCompress
290
+
291
+ // Resolve candidate message IDs (mNNN) to timestamps via the snapshot.
292
+ const startMeta = state.messageMetaSnapshot.get(candidate.startId)
293
+ const endMeta = state.messageMetaSnapshot.get(candidate.endId)
294
+ const rawStart = startMeta?.timestamp ?? state.messageIdSnapshot.get(candidate.startId)
295
+ const rawEnd = endMeta?.timestamp ?? state.messageIdSnapshot.get(candidate.endId)
296
+
297
+ if (!Number.isFinite(rawStart) || !Number.isFinite(rawEnd)) {
298
+ throw new Error(
299
+ `Auto-compress candidate ${candidate.startId}..${candidate.endId} did not resolve to finite timestamps`,
300
+ )
301
+ }
302
+ const startTimestamp: number = rawStart as number
303
+ const endTimestamp: number = rawEnd as number
304
+
305
+ const messagesInRange = messages.filter(
306
+ (msg) =>
307
+ Number.isFinite(msg?.timestamp) && msg.timestamp >= startTimestamp && msg.timestamp <= endTimestamp,
308
+ )
309
+
310
+ // Summary source selection. `summaryMode` distinguishes three cases so the
311
+ // DCP debug log can tell a real model summary from a programmatic fallback
312
+ // caused by summarizer failure:
313
+ // - "model": a configured model produced the summary.
314
+ // - "programmatic": no summarizer models configured (floor by design).
315
+ // - "programmatic_fallback": models were configured but all failed/empty.
316
+ let summary = buildProgrammaticSummary(topic, candidate, messagesInRange)
317
+ let summaryMode: "programmatic" | "model" | "programmatic_fallback" = "programmatic"
318
+ let summarizerModelRef: string | undefined
319
+ let summarizerAttempts: ModelSummaryAttempt[] | undefined
320
+
321
+ const modelRefs = settings.summarizerModel
322
+ if (modelRefs.length > 0) {
323
+ const modelResult = await generateModelSummary(
324
+ modelRefs,
325
+ modelRegistry,
326
+ signal,
327
+ topic,
328
+ messagesInRange,
329
+ settings.timeoutMs,
330
+ )
331
+ summarizerAttempts = modelResult.attempts.length > 0 ? modelResult.attempts : undefined
332
+ if (modelResult.text) {
333
+ summary = modelResult.text
334
+ summaryMode = "model"
335
+ summarizerModelRef = modelResult.usedModelRef
336
+ } else {
337
+ // All configured models failed or returned empty — fall back to the
338
+ // programmatic digest, but mark the mode distinctly so the fallback
339
+ // is visible in DCP debug logs.
340
+ summaryMode = "programmatic_fallback"
341
+ }
342
+ }
343
+
344
+ const anchor = resolveAnchorBoundary(endTimestamp, state)
345
+ const created = createRangeCompressionBlock({
346
+ topic,
347
+ summary,
348
+ startTimestamp,
349
+ endTimestamp,
350
+ startMessageId: startMeta?.stableId,
351
+ endMessageId: endMeta?.stableId,
352
+ anchorTimestamp: anchor.timestamp,
353
+ anchorMessageId: anchor.stableId,
354
+ createdByToolCallId: undefined,
355
+ state,
356
+ config,
357
+ mode: "range",
358
+ })
359
+
360
+ return {
361
+ blockId: created.block.id,
362
+ summaryMode,
363
+ summaryTokens: created.summaryTokenEstimate,
364
+ removedTokenEstimate: created.removedTokenEstimate,
365
+ summarizerModelRef,
366
+ summarizerAttempts,
367
+ }
368
+ }
@@ -355,6 +355,9 @@ export function registerCompressTool(
355
355
  }
356
356
 
357
357
  const clearedNudgeAnchors = newBlockIds.length > 0 ? clearDcpNudgeAnchors(state) : 0
358
+ if (newBlockIds.length > 0) {
359
+ state.consecutiveIgnoredStrongNudges = 0
360
+ }
358
361
  if (clearedNudgeAnchors > 0) {
359
362
  try {
360
363
  pi.appendEntry("dcp-nudge", {
@@ -49,6 +49,23 @@ export interface DcpConfig {
49
49
  highTokens: number
50
50
  maxSuggestions: number
51
51
  }
52
+ /**
53
+ * Auto-compress fallback: when the model ignores repeated context-strong
54
+ * nudges above the emergency threshold, DCP creates a compression block
55
+ * itself (without waiting for the model). Lossy and irreversible within a
56
+ * session — disabled by default; opt in via config.
57
+ */
58
+ autoCompress: {
59
+ enabled: boolean
60
+ /** Number of context-strong nudges emitted (and ignored) before DCP
61
+ * auto-compresses. The model gets `patience` genuine strong chances. */
62
+ patience: number
63
+ /** Models to try, in order, when producing a model-generated summary.
64
+ * Empty array → deterministic programmatic digest (no model call). */
65
+ summarizerModel: string[]
66
+ /** Hard ceiling in ms for a single summarizer model call. */
67
+ timeoutMs: number
68
+ }
52
69
  }
53
70
  strategies: {
54
71
  deduplication: {
@@ -120,6 +137,12 @@ const DEFAULT_CONFIG: DcpConfig = {
120
137
  highTokens: 5000,
121
138
  maxSuggestions: 5,
122
139
  },
140
+ autoCompress: {
141
+ enabled: false,
142
+ patience: 2,
143
+ summarizerModel: [],
144
+ timeoutMs: 20000,
145
+ },
123
146
  },
124
147
  strategies: {
125
148
  deduplication: {
@@ -51,6 +51,7 @@ import {
51
51
  import { summarizeDcpState, writeDcpDebugLog } from "./debug-log.js"
52
52
  import type { DcpNudgeType } from "./pruner-types.js"
53
53
  import { registerCompressTool } from "./compress-tool.js"
54
+ import { decideAutoCompress, createAutoCompressionBlock } from "./auto-compress.js"
54
55
  import { DCP_STATS_MESSAGE_TYPE, registerCommands } from "./commands.js"
55
56
  import { normalizeDcpContextUsage } from "./ui.js"
56
57
  import { safeGetContextUsage } from "../context-usage.js"
@@ -417,6 +418,21 @@ export default async function dcpModule(pi: ExtensionAPI): Promise<void> {
417
418
  return finishContext("unknown-context-percent", prunedMessages, { clearedAnchors })
418
419
  }
419
420
 
421
+ // Record the observed context window on EVERY context event (before
422
+ // any early return) so a mid-session model/window downgrade is
423
+ // detectable even when earlier passes were below threshold. We
424
+ // snapshot the previous value first so the downgrade check below
425
+ // compares against the window the prior pass actually saw.
426
+ const currentContextWindow = usage.contextWindow
427
+ const previousContextWindow = state.lastContextWindow
428
+ if (
429
+ typeof currentContextWindow === "number" &&
430
+ Number.isFinite(currentContextWindow) &&
431
+ currentContextWindow > 0
432
+ ) {
433
+ state.lastContextWindow = currentContextWindow
434
+ }
435
+
420
436
  const ctxModel = (ctx as any).model
421
437
  const provider = ctxModel?.provider ?? ctxModel?.providerId ?? ctxModel?.providerID
422
438
  const model = ctxModel?.id ?? ctxModel?.model ?? ctxModel?.modelId ?? ctxModel?.modelID
@@ -448,13 +464,30 @@ export default async function dcpModule(pi: ExtensionAPI): Promise<void> {
448
464
  if (msg.role === "toolResult") toolCallsSinceLastUser++
449
465
  }
450
466
 
451
- const nudgeType = getNudgeType(
452
- contextPercent,
453
- state,
454
- effectiveConfig,
455
- toolCallsSinceLastUser,
456
- thresholds,
457
- )
467
+ // Switch-aware pre-emptive nudge: detect a mid-session context-window
468
+ // downgrade (e.g. model switch from a 1M window to a 275K window).
469
+ // Inherited tokens that were cheap on the larger window can suddenly
470
+ // sit above minContextPercent on the smaller one. When that happens,
471
+ // force a strong nudge on this pass so the model is told to compress
472
+ // before the smaller window fills, instead of waiting for cadence.
473
+ const windowDowngraded =
474
+ typeof previousContextWindow === "number" &&
475
+ Number.isFinite(previousContextWindow) &&
476
+ previousContextWindow > 0 &&
477
+ typeof currentContextWindow === "number" &&
478
+ Number.isFinite(currentContextWindow) &&
479
+ currentContextWindow < previousContextWindow * 0.9 &&
480
+ contextPercent > thresholds.minContextPercent
481
+
482
+ const nudgeType = windowDowngraded
483
+ ? "context-strong"
484
+ : getNudgeType(
485
+ contextPercent,
486
+ state,
487
+ effectiveConfig,
488
+ toolCallsSinceLastUser,
489
+ thresholds,
490
+ )
458
491
 
459
492
  const manualEmergencyOnly =
460
493
  state.manualMode &&
@@ -483,6 +516,78 @@ export default async function dcpModule(pi: ExtensionAPI): Promise<void> {
483
516
  }, ctx)
484
517
  }
485
518
 
519
+ // Track consecutive ignored context-strong nudges for the
520
+ // auto-compress fallback. A strong nudge is "ignored" if it fires
521
+ // again on a later context event without a successful compress in
522
+ // between. Reset on non-strong nudges and when pressure drops below
523
+ // the emergency threshold (handled by the below-threshold early
524
+ // return above, which clears anchors; counter is also reset on any
525
+ // successful compress in the compress tool).
526
+ if (nudgeType === "context-strong" || nudgeType === "context-soft") {
527
+ state.consecutiveIgnoredStrongNudges += 1
528
+ } else if (nudgeType === null) {
529
+ state.consecutiveIgnoredStrongNudges = 0
530
+ }
531
+
532
+ // Auto-compress fallback: if the model has ignored enough strong
533
+ // nudges while above the emergency threshold, DCP creates a
534
+ // compression block itself instead of nudging again.
535
+ if (!manualEmergencyOnly) {
536
+ const autoDecision = decideAutoCompress(
537
+ state,
538
+ effectiveConfig,
539
+ contextPercent,
540
+ thresholds.maxContextPercent,
541
+ candidate,
542
+ )
543
+ if (autoDecision.shouldFire && candidate) {
544
+ try {
545
+ const autoResult = await createAutoCompressionBlock({
546
+ candidate,
547
+ topic: "Auto-compressed slice",
548
+ state,
549
+ config: effectiveConfig,
550
+ messages: prunedMessages,
551
+ modelRegistry: (ctx as any).modelRegistry,
552
+ signal: (ctx as any).signal,
553
+ })
554
+ // Re-apply pruning so the new block takes effect on this
555
+ // same context pass instead of the next one.
556
+ prunedMessages = applyPruning(prunedMessages, state, effectiveConfig)
557
+ const clearedAnchors = clearDcpNudgeAnchors(state)
558
+ state.consecutiveIgnoredStrongNudges = 0
559
+ await saveDcpState(ctx, state)
560
+ writeDcpDebugLog(effectiveConfig, "compress.auto", {
561
+ trigger: autoDecision.reason,
562
+ blockId: `b${autoResult.blockId}`,
563
+ summaryMode: autoResult.summaryMode,
564
+ summarizerModelRef: autoResult.summarizerModelRef,
565
+ summarizerAttempts: autoResult.summarizerAttempts,
566
+ summaryTokens: autoResult.summaryTokens,
567
+ removedTokenEstimate: autoResult.removedTokenEstimate,
568
+ candidate,
569
+ clearedAnchors,
570
+ state: summarizeDcpState(state),
571
+ }, ctx)
572
+ return finishContext("compress.auto", prunedMessages, {
573
+ candidate,
574
+ messageCandidates,
575
+ contextPercent,
576
+ thresholds,
577
+ clearedAnchors,
578
+ })
579
+ } catch (error) {
580
+ writeDcpDebugLog(effectiveConfig, "compress.auto_failed", {
581
+ trigger: autoDecision.reason,
582
+ error: error instanceof Error ? error.message : String(error),
583
+ candidate,
584
+ state: summarizeDcpState(state),
585
+ }, ctx)
586
+ // Fall through to normal nudge emission on failure.
587
+ }
588
+ }
589
+ }
590
+
486
591
  if (nudgeType && !manualEmergencyOnly) {
487
592
  const nudgeText = appendConcreteNudgeGuidance(
488
593
  baseNudgeText(nudgeType),
@@ -30,6 +30,8 @@ Use \`compress\` as context-pressure housekeeping, not as a reflex after every s
30
30
 
31
31
  A closed slice is any finished implementation, verification, config edit, answered exploration, dead-end debugging branch, or test/log inspection. Passing logs are summary-only: preserve command, pass/fail, key failures if any, and whether follow-up is needed; never keep a full passing log in live context. Treat large shell/read/repo/web outputs as disposable evidence once their facts are extracted.
32
32
 
33
+ Completed todo/task/subtask milestones are strong boundary signals, not automatic triggers. When context pressure makes compression useful, prefer the range covering a just-finished todo item if it is closed and non-trivial; do not compress merely because a todo was completed while context remains low.
34
+
33
35
  Before compressing while work is unfinished, ensure one \`todo in_progress\` captures the active objective and next step.
34
36
 
35
37
  When a \`<dcp-system-reminder>\` appears, treat it as a context-pressure signal. Follow critical/high-context reminders promptly. For routine reminders, compress only if a genuinely closed, useful-to-summarize slice exists; otherwise continue the next atomic step and re-check later.
@@ -183,6 +185,7 @@ You MUST use the \`compress\` tool now. Do not continue normal exploration until
183
185
 
184
186
  If you are in the middle of a critical atomic operation, finish that atomic step first, then compress immediately.
185
187
  If any closed slice exists (finished implementation, verification, config/doc edit, answered exploration, dead end, or test/log inspection), compress it before replying or starting another task. Passing logs should become command + pass/fail + follow-up status only.
188
+ Recently completed todo/task/subtask items are preferred boundaries when they form a high-yield closed slice.
186
189
 
187
190
  RANGE STRATEGY (MANDATORY)
188
191
  Prioritize one large, closed, high-yield compression range first.
@@ -210,6 +213,7 @@ ACTION REQUIRED: Context usage is high.
210
213
  Before doing more exploration, look for a high-yield closed range that no longer needs to stay raw. Compress it now if one is safe and useful.
211
214
 
212
215
  This is context-pressure guidance, not a request to compress tiny or still-needed slices. If completed research, implementation, verification, config/doc edit, CI-log inspection, or dead-end debugging is large enough to reduce signal, call the \`compress\` tool before continuing normal work.
216
+ Recently completed todo/task/subtask items are preferred candidates when they form a non-trivial closed slice; do not compress merely because a tiny todo was completed.
213
217
  High-priority stale shell/read/repo/web outputs should be compressed once no exact raw text is needed. Passing logs should not remain raw after they are understood.
214
218
 
215
219
  RANGE SELECTION
@@ -230,6 +234,8 @@ CONTEXT CHECK: Evaluate whether compression would materially improve the live co
230
234
 
231
235
  If a range is cleanly closed, non-trivial, and unlikely to be needed verbatim again, use the \`compress\` tool. If direction has shifted, consider whether earlier ranges are now less relevant.
232
236
 
237
+ If a todo/task/subtask was just completed, treat that completed work as a preferred compression boundary when it is large enough and no longer needed raw; completion alone is not a reason to compress while context is still low.
238
+
233
239
  Do not compress just because a small slice closed while context is still low. Prefer compression before another large batch of searches, reads, CI log fetches, or tests when a high-yield stale slice exists.
234
240
  High-priority stale shell/read/repo/web outputs and understood passing logs should be compressed once no exact raw text is needed.
235
241
 
@@ -247,6 +253,8 @@ CONTEXT CHECK: You've been iterating for a while after the last user message.
247
253
 
248
254
  Pause before the next large non-atomic tool batch. If there is a closed portion that is unlikely to be referenced immediately and is worth summarizing (for example, finished research before implementation, completed config edit, completed CI-log triage, a verified fix, or a dead-end investigation), use the \`compress\` tool on it.
249
255
 
256
+ If a todo/task/subtask was just completed, prefer that completed work as the compression boundary when it is non-trivial and safe to summarize; do not compress merely because the todo status changed.
257
+
250
258
  Avoid accumulating large tool outputs while a high-yield completed slice remains raw. If only small or still-needed ranges are closed, continue the next atomic step and re-check later.
251
259
 
252
260
  Prefer multiple short, closed ranges over one large range when several independent slices are ready.