pi-ui-extend 0.1.37 → 0.1.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/app/app.d.ts +0 -1
- package/dist/app/app.js +22 -21
- package/dist/app/input/input-controller.d.ts +1 -0
- package/dist/app/input/input-controller.js +40 -12
- package/dist/app/model/model-usage-status.js +4 -2
- package/dist/app/session/request-history.js +2 -0
- package/dist/app/session/session-event-controller.d.ts +13 -0
- package/dist/app/session/session-event-controller.js +27 -0
- package/dist/app/session/tabs-controller.d.ts +8 -0
- package/dist/app/session/tabs-controller.js +37 -6
- package/dist/app/workspace/workspace-actions-controller.d.ts +1 -0
- package/dist/app/workspace/workspace-actions-controller.js +2 -1
- package/dist/bundled-extensions/terminal-bell/index.js +1 -1
- package/dist/markdown-format.js +14 -25
- package/dist/terminal-width.d.ts +14 -0
- package/dist/terminal-width.js +31 -2
- package/dist/theme.js +2 -2
- package/external/pi-tools-suite/README.md +34 -9
- package/external/pi-tools-suite/package.json +3 -3
- package/external/pi-tools-suite/src/async-subagents/async-subagents.sample.jsonc +35 -21
- package/external/pi-tools-suite/src/async-subagents/commands.ts +1 -1
- package/external/pi-tools-suite/src/async-subagents/core/agent-strategy.ts +2 -2
- package/external/pi-tools-suite/src/async-subagents/core/config.ts +70 -12
- package/external/pi-tools-suite/src/async-subagents/core/routing.ts +1 -1
- package/external/pi-tools-suite/src/async-subagents/core/spawn.ts +1 -1
- package/external/pi-tools-suite/src/async-subagents/core/types.ts +1 -1
- package/external/pi-tools-suite/src/async-subagents/index.ts +6 -6
- package/external/pi-tools-suite/src/async-subagents/lib.ts +1 -1
- package/external/pi-tools-suite/src/async-subagents/tools/spawn.ts +4 -2
- package/external/pi-tools-suite/src/async-subagents/tools/subagents.ts +2 -2
- package/external/pi-tools-suite/src/{glm-coding-discipline → coding-discipline}/index.ts +17 -8
- package/external/pi-tools-suite/src/config.ts +1 -1
- package/external/pi-tools-suite/src/dcp/auto-compress.ts +368 -0
- package/external/pi-tools-suite/src/dcp/compress-tool.ts +3 -0
- package/external/pi-tools-suite/src/dcp/config.ts +23 -0
- package/external/pi-tools-suite/src/dcp/index.ts +112 -7
- package/external/pi-tools-suite/src/dcp/prompts.ts +8 -0
- package/external/pi-tools-suite/src/dcp/state.ts +41 -0
- package/external/pi-tools-suite/src/default-pi-tools-suite-config.ts +30 -22
- package/external/pi-tools-suite/src/index.ts +2 -1
- package/external/pi-tools-suite/src/session-name/index.ts +37 -0
- package/external/pi-tools-suite/src/tool-descriptions.ts +16 -4
- package/package.json +4 -4
|
@@ -15,7 +15,7 @@ export interface PiToolsSuiteConfig {
|
|
|
15
15
|
enabled: boolean;
|
|
16
16
|
disabledModules: string[];
|
|
17
17
|
todoThinking: boolean;
|
|
18
|
-
/** Vision-capable model used by the
|
|
18
|
+
/** Vision-capable model used by the coding-discipline lookup tool; unset disables lookup. */
|
|
19
19
|
lookupModel?: string;
|
|
20
20
|
telegramMirror?: TelegramMirrorConfig;
|
|
21
21
|
}
|
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// Dynamic Context Pruning (DCP) — auto-compress fallback
|
|
3
|
+
//
|
|
4
|
+
// When a model ignores repeated context-strong nudges above the emergency
|
|
5
|
+
// threshold (observed with gpt-5.5 in session 019edfe3: 59 strong nudges,
|
|
6
|
+
// 0 compress calls), DCP creates a compression block itself instead of
|
|
7
|
+
// waiting for the model. This is the model-independent safety net.
|
|
8
|
+
//
|
|
9
|
+
// Lossy and irreversible within a session; disabled by default and gated by a
|
|
10
|
+
// patience counter + the emergency threshold. The summary can be produced
|
|
11
|
+
// either by a deterministic programmatic digest (default) or by a configured
|
|
12
|
+
// list of summarizer models (e.g. a cheap model like zai/glm-5.2), with
|
|
13
|
+
// automatic fallback to the programmatic digest on any failure/timeout.
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
|
|
16
|
+
import { complete } from "@earendil-works/pi-ai"
|
|
17
|
+
import type { Model, Api } from "@earendil-works/pi-ai"
|
|
18
|
+
import type { DcpState } from "./state.js"
|
|
19
|
+
import type { DcpConfig } from "./config.js"
|
|
20
|
+
import type { CompressionCandidate } from "./pruner-types.js"
|
|
21
|
+
import {
|
|
22
|
+
createRangeCompressionBlock,
|
|
23
|
+
resolveAnchorBoundary,
|
|
24
|
+
} from "./compression-blocks.js"
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Pure decision: should the auto-compress fallback fire this pass?
|
|
28
|
+
*
|
|
29
|
+
* Fires when ALL hold:
|
|
30
|
+
* - the master switch `autoCompress.enabled` is on,
|
|
31
|
+
* - the model has ignored at least `patience` consecutive context-strong
|
|
32
|
+
* nudges (`consecutiveIgnoredStrongNudges > patience` — the model gets
|
|
33
|
+
* `patience` genuine strong chances before DCP takes over),
|
|
34
|
+
* - context is still above the emergency threshold (maxContextPercent),
|
|
35
|
+
* - a safe compression candidate exists outside the recent turns.
|
|
36
|
+
*/
|
|
37
|
+
export function decideAutoCompress(
|
|
38
|
+
state: DcpState,
|
|
39
|
+
config: DcpConfig,
|
|
40
|
+
contextPercent: number,
|
|
41
|
+
maxContextPercent: number,
|
|
42
|
+
candidate: CompressionCandidate | null,
|
|
43
|
+
): { shouldFire: boolean; reason: string } {
|
|
44
|
+
const settings = config.compress.autoCompress
|
|
45
|
+
if (!settings?.enabled) return { shouldFire: false, reason: "disabled" }
|
|
46
|
+
if (state.consecutiveIgnoredStrongNudges <= settings.patience) {
|
|
47
|
+
return { shouldFire: false, reason: "below-patience" }
|
|
48
|
+
}
|
|
49
|
+
if (!(contextPercent > maxContextPercent)) {
|
|
50
|
+
return { shouldFire: false, reason: "below-emergency-threshold" }
|
|
51
|
+
}
|
|
52
|
+
if (!candidate) return { shouldFire: false, reason: "no-candidate" }
|
|
53
|
+
return { shouldFire: true, reason: "ignored-strongs" }
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/** Flatten a single message's content blocks into plain text. */
|
|
57
|
+
function messageToText(message: any): string {
|
|
58
|
+
const content = message?.content
|
|
59
|
+
if (typeof content === "string") return content
|
|
60
|
+
if (!Array.isArray(content)) return ""
|
|
61
|
+
return content
|
|
62
|
+
.map((block: any) => {
|
|
63
|
+
if (typeof block === "string") return block
|
|
64
|
+
if (block?.type === "text") return block.text ?? ""
|
|
65
|
+
if (block?.type === "toolCall") {
|
|
66
|
+
const name = block.name ?? block.function?.name ?? "tool"
|
|
67
|
+
return `[tool call: ${name}]`
|
|
68
|
+
}
|
|
69
|
+
if (block?.type === "toolResult" || block?.role === "toolResult") {
|
|
70
|
+
return block.text ?? ""
|
|
71
|
+
}
|
|
72
|
+
return ""
|
|
73
|
+
})
|
|
74
|
+
.join("\n")
|
|
75
|
+
.trim()
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/** Extract a short tool-usage digest from messages in the range. */
|
|
79
|
+
function toolUsageDigest(messages: any[]): string {
|
|
80
|
+
const counts = new Map<string, number>()
|
|
81
|
+
for (const msg of messages) {
|
|
82
|
+
const content = msg?.content
|
|
83
|
+
if (!Array.isArray(content)) continue
|
|
84
|
+
for (const block of content) {
|
|
85
|
+
if (block?.type === "toolCall" && typeof block.name === "string") {
|
|
86
|
+
counts.set(block.name, (counts.get(block.name) ?? 0) + 1)
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
if (counts.size === 0) return ""
|
|
91
|
+
const entries = [...counts.entries()].sort((a, b) => b[1] - a[1])
|
|
92
|
+
return entries.map(([name, n]) => `${name}×${n}`).join(", ")
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Deterministic, model-free summary of the compressed range. Deliberately
|
|
97
|
+
* short: `createRangeCompressionBlock` appends protected user messages and
|
|
98
|
+
* protected tool outputs on top of this, so the digest itself only needs to
|
|
99
|
+
* label the slice and record the tool-call shape.
|
|
100
|
+
*/
|
|
101
|
+
export function buildProgrammaticSummary(
|
|
102
|
+
topic: string,
|
|
103
|
+
candidate: CompressionCandidate,
|
|
104
|
+
messagesInRange: any[],
|
|
105
|
+
): string {
|
|
106
|
+
const toolDigest = toolUsageDigest(messagesInRange)
|
|
107
|
+
const lines = [
|
|
108
|
+
`[Auto-compressed by DCP — model did not compress after repeated context-strong nudges]`,
|
|
109
|
+
`Topic: ${topic}`,
|
|
110
|
+
`Range: ${candidate.startId}..${candidate.endId} (${candidate.messageCount} messages, ~${candidate.estimatedTokens} tokens)`,
|
|
111
|
+
]
|
|
112
|
+
if (toolDigest) lines.push(`Tool calls in range: ${toolDigest}`)
|
|
113
|
+
lines.push(
|
|
114
|
+
`This slice was summarized automatically to protect the context window. Protected user messages and tool outputs are preserved below by the compression block.`,
|
|
115
|
+
)
|
|
116
|
+
return lines.join("\n")
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const SUMMARIZER_SYSTEM_PROMPT = `You summarize a slice of a coding agent's conversation so it can replace the raw messages in context. Produce a dense, continuation-focused summary: preserve user intent, decisions made, files/symbols changed or inspected, exact errors still actionable, verification status, and next steps. Drop full logs, repeated output, and incidental detail. Be concise (roughly 4-10 bullets). Output ONLY the summary text, no preamble.`
|
|
120
|
+
|
|
121
|
+
/** Outcome of one summarizer-model attempt, surfaced in DCP debug logs. */
|
|
122
|
+
export interface ModelSummaryAttempt {
|
|
123
|
+
ref: string
|
|
124
|
+
outcome: "ok" | "no-model" | "no-auth" | "empty" | "error"
|
|
125
|
+
error?: string
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/** Result of {@link generateModelSummary}: optional text plus per-model attempts. */
|
|
129
|
+
export interface ModelSummaryResult {
|
|
130
|
+
text?: string
|
|
131
|
+
/** Model ref that produced {@link text}, if any. */
|
|
132
|
+
usedModelRef?: string
|
|
133
|
+
/** One entry per model ref tried, in order, for debug visibility. */
|
|
134
|
+
attempts: ModelSummaryAttempt[]
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Try to produce a model-generated summary by calling each model in
|
|
139
|
+
* `modelRefs` in order. On success returns `{ text, usedModelRef, attempts }`;
|
|
140
|
+
* if every model fails, returns `{ attempts }` with `text` undefined so the
|
|
141
|
+
* caller falls back to the programmatic digest while still recording which
|
|
142
|
+
* models were tried and why.
|
|
143
|
+
*
|
|
144
|
+
* Never throws: a summarizer failure must never block the agent — the
|
|
145
|
+
* programmatic digest is always available as a floor.
|
|
146
|
+
*/
|
|
147
|
+
export async function generateModelSummary(
|
|
148
|
+
modelRefs: string[],
|
|
149
|
+
modelRegistry: any,
|
|
150
|
+
signal: AbortSignal | undefined,
|
|
151
|
+
topic: string,
|
|
152
|
+
messagesInRange: any[],
|
|
153
|
+
timeoutMs: number,
|
|
154
|
+
): Promise<ModelSummaryResult> {
|
|
155
|
+
const attempts: ModelSummaryAttempt[] = []
|
|
156
|
+
if (!modelRefs || modelRefs.length === 0) return { attempts }
|
|
157
|
+
if (!modelRegistry || typeof modelRegistry.find !== "function" || typeof modelRegistry.getApiKeyAndHeaders !== "function") {
|
|
158
|
+
return { attempts }
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Build a compact transcript from the range. Cap token budget so the
|
|
162
|
+
// summarizer call stays cheap and bounded.
|
|
163
|
+
const transcript = messagesInRange
|
|
164
|
+
.map((msg, i) => {
|
|
165
|
+
const role = msg?.role ?? "message"
|
|
166
|
+
return `### ${role} #${i + 1}\n${messageToText(msg)}`
|
|
167
|
+
})
|
|
168
|
+
.join("\n\n")
|
|
169
|
+
const userPrompt = `Summarize this conversation slice (topic: ${topic}).\n\nTranscript:\n${transcript}`
|
|
170
|
+
|
|
171
|
+
let lastError: unknown
|
|
172
|
+
for (const ref of modelRefs) {
|
|
173
|
+
const parsed = parseModelRef(ref)
|
|
174
|
+
if (!parsed) continue
|
|
175
|
+
const model: Model<Api> | undefined = modelRegistry.find(parsed.provider, parsed.id)
|
|
176
|
+
if (!model) {
|
|
177
|
+
attempts.push({ ref, outcome: "no-model" })
|
|
178
|
+
continue
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
let auth
|
|
182
|
+
try {
|
|
183
|
+
auth = await modelRegistry.getApiKeyAndHeaders(model)
|
|
184
|
+
} catch (error) {
|
|
185
|
+
lastError = error
|
|
186
|
+
attempts.push({ ref, outcome: "no-auth", error: error instanceof Error ? error.message : String(error) })
|
|
187
|
+
continue
|
|
188
|
+
}
|
|
189
|
+
if (!auth?.ok || !auth.apiKey) {
|
|
190
|
+
attempts.push({ ref, outcome: "no-auth" })
|
|
191
|
+
continue
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// Combine the agent signal with a local timeout so a slow summarizer
|
|
195
|
+
// cannot stall the context event indefinitely.
|
|
196
|
+
const controller = new AbortController()
|
|
197
|
+
const timer = setTimeout(() => controller.abort(), Math.max(1000, timeoutMs))
|
|
198
|
+
const onParentAbort = () => controller.abort()
|
|
199
|
+
if (signal) {
|
|
200
|
+
if (signal.aborted) controller.abort()
|
|
201
|
+
else signal.addEventListener("abort", onParentAbort, { once: true })
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
try {
|
|
205
|
+
const result = await complete(
|
|
206
|
+
model,
|
|
207
|
+
{ systemPrompt: SUMMARIZER_SYSTEM_PROMPT, messages: [{ role: "user", content: userPrompt, timestamp: Date.now() }] },
|
|
208
|
+
{
|
|
209
|
+
apiKey: auth.apiKey,
|
|
210
|
+
headers: auth.headers,
|
|
211
|
+
env: auth.env,
|
|
212
|
+
signal: controller.signal,
|
|
213
|
+
maxRetries: 0,
|
|
214
|
+
} as any,
|
|
215
|
+
)
|
|
216
|
+
const text = extractAssistantText(result)
|
|
217
|
+
if (text) {
|
|
218
|
+
attempts.push({ ref, outcome: "ok" })
|
|
219
|
+
return { text, usedModelRef: ref, attempts }
|
|
220
|
+
}
|
|
221
|
+
attempts.push({ ref, outcome: "empty" })
|
|
222
|
+
} catch (error) {
|
|
223
|
+
lastError = error
|
|
224
|
+
attempts.push({ ref, outcome: "error", error: error instanceof Error ? error.message : String(error) })
|
|
225
|
+
// try next model in the fallback list
|
|
226
|
+
} finally {
|
|
227
|
+
clearTimeout(timer)
|
|
228
|
+
if (signal) signal.removeEventListener("abort", onParentAbort)
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
if (lastError) {
|
|
233
|
+
// Swallowed on purpose: callers use the programmatic digest floor.
|
|
234
|
+
}
|
|
235
|
+
return { attempts }
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
function extractAssistantText(result: any): string | undefined {
|
|
239
|
+
const content = result?.content
|
|
240
|
+
if (!Array.isArray(content)) return undefined
|
|
241
|
+
const text = content
|
|
242
|
+
.filter((block: any) => block?.type === "text" && typeof block.text === "string")
|
|
243
|
+
.map((block: any) => block.text)
|
|
244
|
+
.join("\n")
|
|
245
|
+
.trim()
|
|
246
|
+
return text.length > 0 ? text : undefined
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
function parseModelRef(ref: string): { provider: string; id: string } | undefined {
|
|
250
|
+
const trimmed = ref.trim()
|
|
251
|
+
const slash = trimmed.lastIndexOf("/")
|
|
252
|
+
if (slash <= 0 || slash === trimmed.length - 1) return undefined
|
|
253
|
+
return { provider: trimmed.slice(0, slash), id: trimmed.slice(slash + 1) }
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
export interface CreateAutoCompressionBlockOptions {
|
|
257
|
+
candidate: CompressionCandidate
|
|
258
|
+
topic: string
|
|
259
|
+
state: DcpState
|
|
260
|
+
config: DcpConfig
|
|
261
|
+
messages: any[]
|
|
262
|
+
modelRegistry?: any
|
|
263
|
+
signal?: AbortSignal
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
export interface AutoCompressionResult {
|
|
267
|
+
blockId: number
|
|
268
|
+
summaryMode: "programmatic" | "model" | "programmatic_fallback"
|
|
269
|
+
summaryTokens: number
|
|
270
|
+
removedTokenEstimate: number
|
|
271
|
+
/** Model ref that produced the summary; set only when `summaryMode === "model"`. */
|
|
272
|
+
summarizerModelRef?: string
|
|
273
|
+
/** Per-model attempts, surfaced for DCP debug visibility on fallback. */
|
|
274
|
+
summarizerAttempts?: ModelSummaryAttempt[]
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Create the auto-compression block. Selects the summary source based on
|
|
279
|
+
* `config.compress.autoCompress.summarizerModel`: empty → programmatic digest;
|
|
280
|
+
* non-empty → model summary with programmatic fallback. Then delegates block
|
|
281
|
+
* creation to the shared `createRangeCompressionBlock` path so protected
|
|
282
|
+
* content (user messages, tool outputs, prompt info) is handled identically to
|
|
283
|
+
* a model-initiated compress.
|
|
284
|
+
*/
|
|
285
|
+
export async function createAutoCompressionBlock(
|
|
286
|
+
options: CreateAutoCompressionBlockOptions,
|
|
287
|
+
): Promise<AutoCompressionResult> {
|
|
288
|
+
const { candidate, topic, state, config, messages, modelRegistry, signal } = options
|
|
289
|
+
const settings = config.compress.autoCompress
|
|
290
|
+
|
|
291
|
+
// Resolve candidate message IDs (mNNN) to timestamps via the snapshot.
|
|
292
|
+
const startMeta = state.messageMetaSnapshot.get(candidate.startId)
|
|
293
|
+
const endMeta = state.messageMetaSnapshot.get(candidate.endId)
|
|
294
|
+
const rawStart = startMeta?.timestamp ?? state.messageIdSnapshot.get(candidate.startId)
|
|
295
|
+
const rawEnd = endMeta?.timestamp ?? state.messageIdSnapshot.get(candidate.endId)
|
|
296
|
+
|
|
297
|
+
if (!Number.isFinite(rawStart) || !Number.isFinite(rawEnd)) {
|
|
298
|
+
throw new Error(
|
|
299
|
+
`Auto-compress candidate ${candidate.startId}..${candidate.endId} did not resolve to finite timestamps`,
|
|
300
|
+
)
|
|
301
|
+
}
|
|
302
|
+
const startTimestamp: number = rawStart as number
|
|
303
|
+
const endTimestamp: number = rawEnd as number
|
|
304
|
+
|
|
305
|
+
const messagesInRange = messages.filter(
|
|
306
|
+
(msg) =>
|
|
307
|
+
Number.isFinite(msg?.timestamp) && msg.timestamp >= startTimestamp && msg.timestamp <= endTimestamp,
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
// Summary source selection. `summaryMode` distinguishes three cases so the
|
|
311
|
+
// DCP debug log can tell a real model summary from a programmatic fallback
|
|
312
|
+
// caused by summarizer failure:
|
|
313
|
+
// - "model": a configured model produced the summary.
|
|
314
|
+
// - "programmatic": no summarizer models configured (floor by design).
|
|
315
|
+
// - "programmatic_fallback": models were configured but all failed/empty.
|
|
316
|
+
let summary = buildProgrammaticSummary(topic, candidate, messagesInRange)
|
|
317
|
+
let summaryMode: "programmatic" | "model" | "programmatic_fallback" = "programmatic"
|
|
318
|
+
let summarizerModelRef: string | undefined
|
|
319
|
+
let summarizerAttempts: ModelSummaryAttempt[] | undefined
|
|
320
|
+
|
|
321
|
+
const modelRefs = settings.summarizerModel
|
|
322
|
+
if (modelRefs.length > 0) {
|
|
323
|
+
const modelResult = await generateModelSummary(
|
|
324
|
+
modelRefs,
|
|
325
|
+
modelRegistry,
|
|
326
|
+
signal,
|
|
327
|
+
topic,
|
|
328
|
+
messagesInRange,
|
|
329
|
+
settings.timeoutMs,
|
|
330
|
+
)
|
|
331
|
+
summarizerAttempts = modelResult.attempts.length > 0 ? modelResult.attempts : undefined
|
|
332
|
+
if (modelResult.text) {
|
|
333
|
+
summary = modelResult.text
|
|
334
|
+
summaryMode = "model"
|
|
335
|
+
summarizerModelRef = modelResult.usedModelRef
|
|
336
|
+
} else {
|
|
337
|
+
// All configured models failed or returned empty — fall back to the
|
|
338
|
+
// programmatic digest, but mark the mode distinctly so the fallback
|
|
339
|
+
// is visible in DCP debug logs.
|
|
340
|
+
summaryMode = "programmatic_fallback"
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
const anchor = resolveAnchorBoundary(endTimestamp, state)
|
|
345
|
+
const created = createRangeCompressionBlock({
|
|
346
|
+
topic,
|
|
347
|
+
summary,
|
|
348
|
+
startTimestamp,
|
|
349
|
+
endTimestamp,
|
|
350
|
+
startMessageId: startMeta?.stableId,
|
|
351
|
+
endMessageId: endMeta?.stableId,
|
|
352
|
+
anchorTimestamp: anchor.timestamp,
|
|
353
|
+
anchorMessageId: anchor.stableId,
|
|
354
|
+
createdByToolCallId: undefined,
|
|
355
|
+
state,
|
|
356
|
+
config,
|
|
357
|
+
mode: "range",
|
|
358
|
+
})
|
|
359
|
+
|
|
360
|
+
return {
|
|
361
|
+
blockId: created.block.id,
|
|
362
|
+
summaryMode,
|
|
363
|
+
summaryTokens: created.summaryTokenEstimate,
|
|
364
|
+
removedTokenEstimate: created.removedTokenEstimate,
|
|
365
|
+
summarizerModelRef,
|
|
366
|
+
summarizerAttempts,
|
|
367
|
+
}
|
|
368
|
+
}
|
|
@@ -355,6 +355,9 @@ export function registerCompressTool(
|
|
|
355
355
|
}
|
|
356
356
|
|
|
357
357
|
const clearedNudgeAnchors = newBlockIds.length > 0 ? clearDcpNudgeAnchors(state) : 0
|
|
358
|
+
if (newBlockIds.length > 0) {
|
|
359
|
+
state.consecutiveIgnoredStrongNudges = 0
|
|
360
|
+
}
|
|
358
361
|
if (clearedNudgeAnchors > 0) {
|
|
359
362
|
try {
|
|
360
363
|
pi.appendEntry("dcp-nudge", {
|
|
@@ -49,6 +49,23 @@ export interface DcpConfig {
|
|
|
49
49
|
highTokens: number
|
|
50
50
|
maxSuggestions: number
|
|
51
51
|
}
|
|
52
|
+
/**
|
|
53
|
+
* Auto-compress fallback: when the model ignores repeated context-strong
|
|
54
|
+
* nudges above the emergency threshold, DCP creates a compression block
|
|
55
|
+
* itself (without waiting for the model). Lossy and irreversible within a
|
|
56
|
+
* session — disabled by default; opt in via config.
|
|
57
|
+
*/
|
|
58
|
+
autoCompress: {
|
|
59
|
+
enabled: boolean
|
|
60
|
+
/** Number of context-strong nudges emitted (and ignored) before DCP
|
|
61
|
+
* auto-compresses. The model gets `patience` genuine strong chances. */
|
|
62
|
+
patience: number
|
|
63
|
+
/** Models to try, in order, when producing a model-generated summary.
|
|
64
|
+
* Empty array → deterministic programmatic digest (no model call). */
|
|
65
|
+
summarizerModel: string[]
|
|
66
|
+
/** Hard ceiling in ms for a single summarizer model call. */
|
|
67
|
+
timeoutMs: number
|
|
68
|
+
}
|
|
52
69
|
}
|
|
53
70
|
strategies: {
|
|
54
71
|
deduplication: {
|
|
@@ -120,6 +137,12 @@ const DEFAULT_CONFIG: DcpConfig = {
|
|
|
120
137
|
highTokens: 5000,
|
|
121
138
|
maxSuggestions: 5,
|
|
122
139
|
},
|
|
140
|
+
autoCompress: {
|
|
141
|
+
enabled: false,
|
|
142
|
+
patience: 2,
|
|
143
|
+
summarizerModel: [],
|
|
144
|
+
timeoutMs: 20000,
|
|
145
|
+
},
|
|
123
146
|
},
|
|
124
147
|
strategies: {
|
|
125
148
|
deduplication: {
|
|
@@ -51,6 +51,7 @@ import {
|
|
|
51
51
|
import { summarizeDcpState, writeDcpDebugLog } from "./debug-log.js"
|
|
52
52
|
import type { DcpNudgeType } from "./pruner-types.js"
|
|
53
53
|
import { registerCompressTool } from "./compress-tool.js"
|
|
54
|
+
import { decideAutoCompress, createAutoCompressionBlock } from "./auto-compress.js"
|
|
54
55
|
import { DCP_STATS_MESSAGE_TYPE, registerCommands } from "./commands.js"
|
|
55
56
|
import { normalizeDcpContextUsage } from "./ui.js"
|
|
56
57
|
import { safeGetContextUsage } from "../context-usage.js"
|
|
@@ -417,6 +418,21 @@ export default async function dcpModule(pi: ExtensionAPI): Promise<void> {
|
|
|
417
418
|
return finishContext("unknown-context-percent", prunedMessages, { clearedAnchors })
|
|
418
419
|
}
|
|
419
420
|
|
|
421
|
+
// Record the observed context window on EVERY context event (before
|
|
422
|
+
// any early return) so a mid-session model/window downgrade is
|
|
423
|
+
// detectable even when earlier passes were below threshold. We
|
|
424
|
+
// snapshot the previous value first so the downgrade check below
|
|
425
|
+
// compares against the window the prior pass actually saw.
|
|
426
|
+
const currentContextWindow = usage.contextWindow
|
|
427
|
+
const previousContextWindow = state.lastContextWindow
|
|
428
|
+
if (
|
|
429
|
+
typeof currentContextWindow === "number" &&
|
|
430
|
+
Number.isFinite(currentContextWindow) &&
|
|
431
|
+
currentContextWindow > 0
|
|
432
|
+
) {
|
|
433
|
+
state.lastContextWindow = currentContextWindow
|
|
434
|
+
}
|
|
435
|
+
|
|
420
436
|
const ctxModel = (ctx as any).model
|
|
421
437
|
const provider = ctxModel?.provider ?? ctxModel?.providerId ?? ctxModel?.providerID
|
|
422
438
|
const model = ctxModel?.id ?? ctxModel?.model ?? ctxModel?.modelId ?? ctxModel?.modelID
|
|
@@ -448,13 +464,30 @@ export default async function dcpModule(pi: ExtensionAPI): Promise<void> {
|
|
|
448
464
|
if (msg.role === "toolResult") toolCallsSinceLastUser++
|
|
449
465
|
}
|
|
450
466
|
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
467
|
+
// Switch-aware pre-emptive nudge: detect a mid-session context-window
|
|
468
|
+
// downgrade (e.g. model switch from a 1M window to a 275K window).
|
|
469
|
+
// Inherited tokens that were cheap on the larger window can suddenly
|
|
470
|
+
// sit above minContextPercent on the smaller one. When that happens,
|
|
471
|
+
// force a strong nudge on this pass so the model is told to compress
|
|
472
|
+
// before the smaller window fills, instead of waiting for cadence.
|
|
473
|
+
const windowDowngraded =
|
|
474
|
+
typeof previousContextWindow === "number" &&
|
|
475
|
+
Number.isFinite(previousContextWindow) &&
|
|
476
|
+
previousContextWindow > 0 &&
|
|
477
|
+
typeof currentContextWindow === "number" &&
|
|
478
|
+
Number.isFinite(currentContextWindow) &&
|
|
479
|
+
currentContextWindow < previousContextWindow * 0.9 &&
|
|
480
|
+
contextPercent > thresholds.minContextPercent
|
|
481
|
+
|
|
482
|
+
const nudgeType = windowDowngraded
|
|
483
|
+
? "context-strong"
|
|
484
|
+
: getNudgeType(
|
|
485
|
+
contextPercent,
|
|
486
|
+
state,
|
|
487
|
+
effectiveConfig,
|
|
488
|
+
toolCallsSinceLastUser,
|
|
489
|
+
thresholds,
|
|
490
|
+
)
|
|
458
491
|
|
|
459
492
|
const manualEmergencyOnly =
|
|
460
493
|
state.manualMode &&
|
|
@@ -483,6 +516,78 @@ export default async function dcpModule(pi: ExtensionAPI): Promise<void> {
|
|
|
483
516
|
}, ctx)
|
|
484
517
|
}
|
|
485
518
|
|
|
519
|
+
// Track consecutive ignored context-strong nudges for the
|
|
520
|
+
// auto-compress fallback. A strong nudge is "ignored" if it fires
|
|
521
|
+
// again on a later context event without a successful compress in
|
|
522
|
+
// between. Reset on non-strong nudges and when pressure drops below
|
|
523
|
+
// the emergency threshold (handled by the below-threshold early
|
|
524
|
+
// return above, which clears anchors; counter is also reset on any
|
|
525
|
+
// successful compress in the compress tool).
|
|
526
|
+
if (nudgeType === "context-strong" || nudgeType === "context-soft") {
|
|
527
|
+
state.consecutiveIgnoredStrongNudges += 1
|
|
528
|
+
} else if (nudgeType === null) {
|
|
529
|
+
state.consecutiveIgnoredStrongNudges = 0
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
// Auto-compress fallback: if the model has ignored enough strong
|
|
533
|
+
// nudges while above the emergency threshold, DCP creates a
|
|
534
|
+
// compression block itself instead of nudging again.
|
|
535
|
+
if (!manualEmergencyOnly) {
|
|
536
|
+
const autoDecision = decideAutoCompress(
|
|
537
|
+
state,
|
|
538
|
+
effectiveConfig,
|
|
539
|
+
contextPercent,
|
|
540
|
+
thresholds.maxContextPercent,
|
|
541
|
+
candidate,
|
|
542
|
+
)
|
|
543
|
+
if (autoDecision.shouldFire && candidate) {
|
|
544
|
+
try {
|
|
545
|
+
const autoResult = await createAutoCompressionBlock({
|
|
546
|
+
candidate,
|
|
547
|
+
topic: "Auto-compressed slice",
|
|
548
|
+
state,
|
|
549
|
+
config: effectiveConfig,
|
|
550
|
+
messages: prunedMessages,
|
|
551
|
+
modelRegistry: (ctx as any).modelRegistry,
|
|
552
|
+
signal: (ctx as any).signal,
|
|
553
|
+
})
|
|
554
|
+
// Re-apply pruning so the new block takes effect on this
|
|
555
|
+
// same context pass instead of the next one.
|
|
556
|
+
prunedMessages = applyPruning(prunedMessages, state, effectiveConfig)
|
|
557
|
+
const clearedAnchors = clearDcpNudgeAnchors(state)
|
|
558
|
+
state.consecutiveIgnoredStrongNudges = 0
|
|
559
|
+
await saveDcpState(ctx, state)
|
|
560
|
+
writeDcpDebugLog(effectiveConfig, "compress.auto", {
|
|
561
|
+
trigger: autoDecision.reason,
|
|
562
|
+
blockId: `b${autoResult.blockId}`,
|
|
563
|
+
summaryMode: autoResult.summaryMode,
|
|
564
|
+
summarizerModelRef: autoResult.summarizerModelRef,
|
|
565
|
+
summarizerAttempts: autoResult.summarizerAttempts,
|
|
566
|
+
summaryTokens: autoResult.summaryTokens,
|
|
567
|
+
removedTokenEstimate: autoResult.removedTokenEstimate,
|
|
568
|
+
candidate,
|
|
569
|
+
clearedAnchors,
|
|
570
|
+
state: summarizeDcpState(state),
|
|
571
|
+
}, ctx)
|
|
572
|
+
return finishContext("compress.auto", prunedMessages, {
|
|
573
|
+
candidate,
|
|
574
|
+
messageCandidates,
|
|
575
|
+
contextPercent,
|
|
576
|
+
thresholds,
|
|
577
|
+
clearedAnchors,
|
|
578
|
+
})
|
|
579
|
+
} catch (error) {
|
|
580
|
+
writeDcpDebugLog(effectiveConfig, "compress.auto_failed", {
|
|
581
|
+
trigger: autoDecision.reason,
|
|
582
|
+
error: error instanceof Error ? error.message : String(error),
|
|
583
|
+
candidate,
|
|
584
|
+
state: summarizeDcpState(state),
|
|
585
|
+
}, ctx)
|
|
586
|
+
// Fall through to normal nudge emission on failure.
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
|
|
486
591
|
if (nudgeType && !manualEmergencyOnly) {
|
|
487
592
|
const nudgeText = appendConcreteNudgeGuidance(
|
|
488
593
|
baseNudgeText(nudgeType),
|
|
@@ -30,6 +30,8 @@ Use \`compress\` as context-pressure housekeeping, not as a reflex after every s
|
|
|
30
30
|
|
|
31
31
|
A closed slice is any finished implementation, verification, config edit, answered exploration, dead-end debugging branch, or test/log inspection. Passing logs are summary-only: preserve command, pass/fail, key failures if any, and whether follow-up is needed; never keep a full passing log in live context. Treat large shell/read/repo/web outputs as disposable evidence once their facts are extracted.
|
|
32
32
|
|
|
33
|
+
Completed todo/task/subtask milestones are strong boundary signals, not automatic triggers. When context pressure makes compression useful, prefer the range covering a just-finished todo item if it is closed and non-trivial; do not compress merely because a todo was completed while context remains low.
|
|
34
|
+
|
|
33
35
|
Before compressing while work is unfinished, ensure one \`todo in_progress\` captures the active objective and next step.
|
|
34
36
|
|
|
35
37
|
When a \`<dcp-system-reminder>\` appears, treat it as a context-pressure signal. Follow critical/high-context reminders promptly. For routine reminders, compress only if a genuinely closed, useful-to-summarize slice exists; otherwise continue the next atomic step and re-check later.
|
|
@@ -183,6 +185,7 @@ You MUST use the \`compress\` tool now. Do not continue normal exploration until
|
|
|
183
185
|
|
|
184
186
|
If you are in the middle of a critical atomic operation, finish that atomic step first, then compress immediately.
|
|
185
187
|
If any closed slice exists (finished implementation, verification, config/doc edit, answered exploration, dead end, or test/log inspection), compress it before replying or starting another task. Passing logs should become command + pass/fail + follow-up status only.
|
|
188
|
+
Recently completed todo/task/subtask items are preferred boundaries when they form a high-yield closed slice.
|
|
186
189
|
|
|
187
190
|
RANGE STRATEGY (MANDATORY)
|
|
188
191
|
Prioritize one large, closed, high-yield compression range first.
|
|
@@ -210,6 +213,7 @@ ACTION REQUIRED: Context usage is high.
|
|
|
210
213
|
Before doing more exploration, look for a high-yield closed range that no longer needs to stay raw. Compress it now if one is safe and useful.
|
|
211
214
|
|
|
212
215
|
This is context-pressure guidance, not a request to compress tiny or still-needed slices. If completed research, implementation, verification, config/doc edit, CI-log inspection, or dead-end debugging is large enough to reduce signal, call the \`compress\` tool before continuing normal work.
|
|
216
|
+
Recently completed todo/task/subtask items are preferred candidates when they form a non-trivial closed slice; do not compress merely because a tiny todo was completed.
|
|
213
217
|
High-priority stale shell/read/repo/web outputs should be compressed once no exact raw text is needed. Passing logs should not remain raw after they are understood.
|
|
214
218
|
|
|
215
219
|
RANGE SELECTION
|
|
@@ -230,6 +234,8 @@ CONTEXT CHECK: Evaluate whether compression would materially improve the live co
|
|
|
230
234
|
|
|
231
235
|
If a range is cleanly closed, non-trivial, and unlikely to be needed verbatim again, use the \`compress\` tool. If direction has shifted, consider whether earlier ranges are now less relevant.
|
|
232
236
|
|
|
237
|
+
If a todo/task/subtask was just completed, treat that completed work as a preferred compression boundary when it is large enough and no longer needed raw; completion alone is not a reason to compress while context is still low.
|
|
238
|
+
|
|
233
239
|
Do not compress just because a small slice closed while context is still low. Prefer compression before another large batch of searches, reads, CI log fetches, or tests when a high-yield stale slice exists.
|
|
234
240
|
High-priority stale shell/read/repo/web outputs and understood passing logs should be compressed once no exact raw text is needed.
|
|
235
241
|
|
|
@@ -247,6 +253,8 @@ CONTEXT CHECK: You've been iterating for a while after the last user message.
|
|
|
247
253
|
|
|
248
254
|
Pause before the next large non-atomic tool batch. If there is a closed portion that is unlikely to be referenced immediately and is worth summarizing (for example, finished research before implementation, completed config edit, completed CI-log triage, a verified fix, or a dead-end investigation), use the \`compress\` tool on it.
|
|
249
255
|
|
|
256
|
+
If a todo/task/subtask was just completed, prefer that completed work as the compression boundary when it is non-trivial and safe to summarize; do not compress merely because the todo status changed.
|
|
257
|
+
|
|
250
258
|
Avoid accumulating large tool outputs while a high-yield completed slice remains raw. If only small or still-needed ranges are closed, continue the next atomic step and re-check later.
|
|
251
259
|
|
|
252
260
|
Prefer multiple short, closed ranges over one large range when several independent slices are ready.
|