@kkelly-offical/kkcode 0.1.3 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/README.md +110 -172
  2. package/package.json +46 -46
  3. package/src/agent/agent.mjs +220 -170
  4. package/src/agent/prompt/bug-hunter.txt +90 -0
  5. package/src/agent/prompt/frontend-designer.txt +58 -0
  6. package/src/agent/prompt/longagent-blueprint-agent.txt +83 -0
  7. package/src/agent/prompt/longagent-coding-agent.txt +37 -0
  8. package/src/agent/prompt/longagent-debugging-agent.txt +46 -0
  9. package/src/agent/prompt/longagent-preview-agent.txt +63 -0
  10. package/src/config/defaults.mjs +260 -195
  11. package/src/config/schema.mjs +71 -6
  12. package/src/core/constants.mjs +91 -46
  13. package/src/index.mjs +1 -1
  14. package/src/knowledge/frontend-aesthetics.txt +39 -0
  15. package/src/knowledge/loader.mjs +2 -1
  16. package/src/knowledge/tailwind.txt +12 -3
  17. package/src/mcp/client-http.mjs +141 -157
  18. package/src/mcp/client-sse.mjs +288 -286
  19. package/src/mcp/client-stdio.mjs +533 -451
  20. package/src/mcp/constants.mjs +2 -0
  21. package/src/mcp/registry.mjs +479 -394
  22. package/src/mcp/stdio-framing.mjs +133 -127
  23. package/src/mcp/tool-result.mjs +24 -0
  24. package/src/observability/index.mjs +42 -0
  25. package/src/observability/metrics.mjs +137 -0
  26. package/src/observability/tracer.mjs +137 -0
  27. package/src/orchestration/background-manager.mjs +372 -358
  28. package/src/orchestration/background-worker.mjs +305 -245
  29. package/src/orchestration/longagent-manager.mjs +171 -116
  30. package/src/orchestration/stage-scheduler.mjs +728 -489
  31. package/src/permission/exec-policy.mjs +9 -11
  32. package/src/provider/anthropic.mjs +1 -0
  33. package/src/provider/openai.mjs +340 -339
  34. package/src/provider/retry-policy.mjs +68 -68
  35. package/src/provider/router.mjs +241 -228
  36. package/src/provider/sse.mjs +104 -91
  37. package/src/repl.mjs +59 -7
  38. package/src/session/checkpoint.mjs +66 -3
  39. package/src/session/compaction.mjs +298 -276
  40. package/src/session/engine.mjs +232 -225
  41. package/src/session/longagent-4stage.mjs +460 -0
  42. package/src/session/longagent-hybrid.mjs +1097 -0
  43. package/src/session/longagent-plan.mjs +365 -329
  44. package/src/session/longagent-project-memory.mjs +53 -0
  45. package/src/session/longagent-scaffold.mjs +291 -100
  46. package/src/session/longagent-task-bus.mjs +54 -0
  47. package/src/session/longagent-utils.mjs +472 -0
  48. package/src/session/longagent.mjs +900 -1462
  49. package/src/session/loop.mjs +65 -40
  50. package/src/session/project-context.mjs +30 -0
  51. package/src/session/prompt/agent.txt +25 -0
  52. package/src/session/prompt/plan.txt +31 -9
  53. package/src/session/rollback.mjs +196 -0
  54. package/src/session/store.mjs +519 -503
  55. package/src/session/system-prompt.mjs +273 -260
  56. package/src/session/task-validator.mjs +4 -3
  57. package/src/skill/builtin/design.mjs +76 -0
  58. package/src/skill/builtin/frontend.mjs +8 -0
  59. package/src/skill/registry.mjs +390 -336
  60. package/src/storage/ghost-commit-store.mjs +18 -8
  61. package/src/tool/executor.mjs +11 -0
  62. package/src/tool/git-auto.mjs +0 -19
  63. package/src/tool/question-prompt.mjs +93 -86
  64. package/src/tool/registry.mjs +71 -37
  65. package/src/ui/activity-renderer.mjs +664 -410
  66. package/src/util/git.mjs +23 -0
@@ -1,276 +1,298 @@
1
- import { requestProvider } from "../provider/router.mjs"
2
- import { getConversationHistory, replaceMessages } from "./store.mjs"
3
- import { HookBus } from "../plugin/hook-bus.mjs"
4
- import { saveCheckpoint } from "./checkpoint.mjs"
5
- import { recordTurn } from "../usage/usage-meter.mjs"
6
- import { loadPricing, calculateCost } from "../usage/pricing.mjs"
7
-
8
- const COMPACTION_SYSTEM = `You are a conversation summarizer. Create a structured summary preserving all critical information for continued work.
9
-
10
- ## Output Format
11
-
12
- <summary>
13
- <goal>The user's overall goal or current task</goal>
14
- <completed>
15
- - Completed task with specific details (file paths, function names, line numbers)
16
- </completed>
17
- <in_progress>Current work being done, if any</in_progress>
18
- <files_modified>
19
- - path/to/file: specific change description
20
- </files_modified>
21
- <key_decisions>
22
- - Decision and reasoning
23
- - User preferences or constraints
24
- </key_decisions>
25
- <errors_resolved>
26
- - Error description → fix applied
27
- </errors_resolved>
28
- <next_steps>
29
- - Specific next action items
30
- </next_steps>
31
- </summary>
32
-
33
- Rules:
34
- - Use the SAME LANGUAGE as the conversation
35
- - Preserve ALL file paths, function names, variable names, and technical identifiers exactly
36
- - Include specific code changes, not just "modified file X"
37
- - Omit tool call metadata and message formatting details
38
- - Be concise but never drop actionable information`
39
-
40
- const DEFAULT_THRESHOLD_MESSAGES = 50
41
- const DEFAULT_THRESHOLD_RATIO = 0.7
42
- const DEFAULT_KEEP_RECENT = 6
43
- const TOOL_RESULT_PREVIEW_LIMIT = 200
44
-
45
- // Estimate tokens from a string, accounting for CJK characters (~1.5 chars/token vs ~4 for Latin)
46
- export function estimateStringTokens(str) {
47
- if (!str) return 0
48
- let cjk = 0
49
- for (let i = 0; i < str.length; i++) {
50
- const code = str.charCodeAt(i)
51
- if ((code >= 0x4E00 && code <= 0x9FFF) || (code >= 0x3000 && code <= 0x30FF) ||
52
- (code >= 0xAC00 && code <= 0xD7AF)) cjk++
53
- }
54
- const latin = str.length - cjk
55
- return Math.ceil(latin / 4 + cjk / 1.5)
56
- }
57
-
58
- const MSG_OVERHEAD = 4 // ~4 tokens per message for role/metadata
59
-
60
- export function estimateTokenCount(messages) {
61
- let tokens = 0
62
- for (const msg of messages) {
63
- tokens += MSG_OVERHEAD
64
- const content = msg.content
65
- if (Array.isArray(content)) {
66
- for (const block of content) {
67
- if (block.type === "image") {
68
- tokens += 1600 // conservative estimate for a typical image
69
- } else if (block.type === "tool_use") {
70
- tokens += estimateStringTokens(block.name || "")
71
- tokens += estimateStringTokens(JSON.stringify(block.input || {}))
72
- } else if (block.type === "tool_result") {
73
- tokens += estimateStringTokens(String(block.content || ""))
74
- } else {
75
- tokens += estimateStringTokens(block.text || block.content || "")
76
- }
77
- }
78
- } else {
79
- tokens += estimateStringTokens(content || "")
80
- }
81
- }
82
- return tokens
83
- }
84
-
85
- /**
86
- * Pre-prune messages before LLM summarization.
87
- * - Truncate large tool_result content to a short preview
88
- * - Keep tool_use blocks intact (they show model intent)
89
- * - Truncate very long plain-text assistant/user messages
90
- */
91
- export function pruneForSummary(messages, previewLimit = TOOL_RESULT_PREVIEW_LIMIT) {
92
- return messages.map((msg) => {
93
- const content = msg.content
94
- if (Array.isArray(content)) {
95
- const pruned = content.map((block) => {
96
- if (block.type === "tool_result") {
97
- const raw = String(block.content || "")
98
- if (raw.length > previewLimit) {
99
- return {
100
- ...block,
101
- content: `${raw.slice(0, previewLimit)}... [truncated ${raw.length} chars]`
102
- }
103
- }
104
- }
105
- return block
106
- })
107
- return { ...msg, content: pruned }
108
- }
109
- // Truncate very long plain-text messages (e.g. large tool output pasted as text)
110
- if (typeof content === "string" && content.length > 2000) {
111
- return { ...msg, content: `${content.slice(0, 2000)}... [truncated ${content.length} chars]` }
112
- }
113
- return msg
114
- })
115
- }
116
-
117
- const BUILTIN_CONTEXT = {
118
- "gpt-5": 272000, "o3": 200000, "o1": 200000,
119
- "claude-opus-4": 200000, "claude-3-5": 200000, "claude-3.5": 200000, "claude": 200000,
120
- "gemini-2": 1048576, "gemini-1.5": 1048576, "gemini": 128000,
121
- "gpt-4o": 128000, "gpt-4": 128000, "gpt-3.5": 16000,
122
- "deepseek": 64000, "qwen": 128000
123
- }
124
-
125
- export function modelContextLimit(model, configState = null) {
126
- const m = String(model || "").toLowerCase()
127
- // 1) Check provider-level context_limit for the active provider
128
- const providerCfg = configState?.config?.provider
129
- if (providerCfg) {
130
- // Per-model override from provider.model_context map
131
- const mc = providerCfg.model_context
132
- if (mc) {
133
- if (mc[model]) return mc[model]
134
- for (const key of Object.keys(mc)) {
135
- if (m.startsWith(key.toLowerCase())) return mc[key]
136
- }
137
- }
138
- // Provider-level context_limit
139
- const active = providerCfg[providerCfg.default]
140
- if (active?.context_limit > 0) return active.context_limit
141
- }
142
- // 2) Builtin prefix match
143
- for (const [prefix, limit] of Object.entries(BUILTIN_CONTEXT)) {
144
- if (m.includes(prefix)) return limit
145
- }
146
- return 128000
147
- }
148
-
149
- export function contextUtilization(messages, model, configState = null) {
150
- const tokens = estimateTokenCount(messages)
151
- const limit = modelContextLimit(model, configState)
152
- const ratio = limit > 0 ? Math.min(1, tokens / limit) : 0
153
- return {
154
- tokens,
155
- limit,
156
- ratio,
157
- percent: Math.round(ratio * 100)
158
- }
159
- }
160
-
161
- export function supportsNativeCompaction(providerType, model) {
162
- if (providerType !== "anthropic") return false
163
- const m = String(model || "").toLowerCase()
164
- return m.includes("claude") && (m.includes("opus") || m.includes("sonnet"))
165
- }
166
-
167
- export function shouldCompact({ messages, model, thresholdMessages = DEFAULT_THRESHOLD_MESSAGES, thresholdRatio = DEFAULT_THRESHOLD_RATIO, configState = null, realTokenCount = null }) {
168
- if (messages.length >= thresholdMessages) return true
169
- const limit = modelContextLimit(model, configState)
170
- const tokens = realTokenCount != null ? realTokenCount : estimateTokenCount(messages)
171
- return tokens >= limit * thresholdRatio
172
- }
173
-
174
- export async function compactSession({
175
- sessionId,
176
- model,
177
- providerType,
178
- configState,
179
- keepRecent = DEFAULT_KEEP_RECENT,
180
- baseUrl = null,
181
- apiKeyEnv = null
182
- }) {
183
- const history = await getConversationHistory(sessionId, 9999)
184
- if (history.length <= keepRecent + 2) return { compacted: false, reason: "too few messages" }
185
-
186
- // Find split point that doesn't break tool_use/tool_result pairs
187
- let splitIdx = history.length - keepRecent
188
- while (splitIdx > 0 && splitIdx < history.length) {
189
- const msg = history[splitIdx]
190
- const content = msg.content
191
- if (Array.isArray(content) && content.some(b => b.type === "tool_result")) {
192
- splitIdx-- // include the paired assistant tool_use message
193
- continue
194
- }
195
- break
196
- }
197
- const toSummarize = history.slice(0, splitIdx)
198
- const kept = history.slice(splitIdx)
199
-
200
- // Layer 1: prune large tool outputs before sending to LLM
201
- const pruned = pruneForSummary(toSummarize)
202
- const summaryPrompt = pruned.map((m) => {
203
- const content = m.content
204
- if (Array.isArray(content)) {
205
- return `[${m.role}]: ${content.map((b) => {
206
- if (b.type === "text") return b.text || ""
207
- if (b.type === "tool_use") return `[tool_use:${b.name}(${JSON.stringify(b.input || {}).slice(0, 120)})]`
208
- if (b.type === "tool_result") return `[tool_result:${b.is_error ? "ERROR " : ""}${b.content || ""}]`
209
- return ""
210
- }).filter(Boolean).join("\n")}`
211
- }
212
- return `[${m.role}]: ${content}`
213
- }).join("\n\n")
214
-
215
- const hookPayload = await HookBus.sessionCompacting({
216
- sessionId,
217
- messageCount: history.length,
218
- summarizeCount: toSummarize.length,
219
- keepCount: kept.length
220
- })
221
- if (hookPayload?.skip) return { compacted: false, reason: "skipped by hook" }
222
-
223
- let summaryText
224
- let compactionUsage = null
225
- try {
226
- const response = await requestProvider({
227
- configState,
228
- providerType,
229
- model,
230
- system: COMPACTION_SYSTEM,
231
- messages: [{ role: "user", content: summaryPrompt }],
232
- tools: [],
233
- baseUrl,
234
- apiKeyEnv
235
- })
236
- summaryText = (response.text || "").trim()
237
- compactionUsage = response.usage || null
238
- } catch (error) {
239
- return { compacted: false, reason: `compaction LLM call failed: ${error.message}` }
240
- }
241
-
242
- if (!summaryText) return { compacted: false, reason: "empty summary from LLM" }
243
-
244
- // Replace all messages with: [summary] + [kept recent messages]
245
- const summaryMessage = {
246
- role: "user",
247
- content: `<compaction-summary>\n${summaryText}\n</compaction-summary>`
248
- }
249
- await replaceMessages(sessionId, [summaryMessage, ...kept])
250
-
251
- // Record compaction LLM usage so it's not "invisible"
252
- if (compactionUsage) {
253
- try {
254
- const { pricing } = await loadPricing(configState)
255
- const { amount } = calculateCost(pricing, model, compactionUsage)
256
- await recordTurn({ sessionId, usage: compactionUsage, cost: amount })
257
- } catch { /* best-effort */ }
258
- }
259
-
260
- await saveCheckpoint(sessionId, {
261
- kind: "compaction",
262
- iteration: 0,
263
- compactedAt: Date.now(),
264
- summarizeCount: toSummarize.length,
265
- keepCount: kept.length,
266
- summaryVersion: 1,
267
- summaryLength: summaryText.length
268
- })
269
-
270
- return {
271
- compacted: true,
272
- summarizedCount: toSummarize.length,
273
- keptCount: kept.length,
274
- summaryLength: summaryText.length
275
- }
276
- }
1
+ import { requestProvider } from "../provider/router.mjs"
2
+ import { getConversationHistory, replaceMessages } from "./store.mjs"
3
+ import { HookBus } from "../plugin/hook-bus.mjs"
4
+ import { saveCheckpoint } from "./checkpoint.mjs"
5
+ import { recordTurn } from "../usage/usage-meter.mjs"
6
+ import { loadPricing, calculateCost } from "../usage/pricing.mjs"
7
+
8
+ const COMPACTION_SYSTEM = `You are a conversation summarizer. Create a structured summary preserving all critical information for continued work.
9
+
10
+ ## Output Format
11
+
12
+ <summary>
13
+ <goal>The user's overall goal or current task</goal>
14
+ <completed>
15
+ - Completed task with specific details (file paths, function names, line numbers)
16
+ </completed>
17
+ <in_progress>Current work being done, if any</in_progress>
18
+ <files_modified>
19
+ - path/to/file: specific change description
20
+ </files_modified>
21
+ <key_decisions>
22
+ - Decision and reasoning
23
+ - User preferences or constraints
24
+ </key_decisions>
25
+ <errors_resolved>
26
+ - Error description → fix applied
27
+ </errors_resolved>
28
+ <next_steps>
29
+ - Specific next action items
30
+ </next_steps>
31
+ </summary>
32
+
33
+ Rules:
34
+ - Use the SAME LANGUAGE as the conversation
35
+ - Preserve ALL file paths, function names, variable names, and technical identifiers exactly
36
+ - Include specific code changes, not just "modified file X"
37
+ - Omit tool call metadata and message formatting details
38
+ - Be concise but never drop actionable information`
39
+
40
+ const DEFAULT_THRESHOLD_MESSAGES = 50
41
+ const DEFAULT_THRESHOLD_RATIO = 0.7
42
+ const DEFAULT_KEEP_RECENT = 6
43
+ const DEFAULT_KEEP_RECENT_TURNS = 3
44
+ const TOOL_RESULT_PREVIEW_LIMIT = 200
45
+
46
+ // Estimate tokens from a string, accounting for CJK characters (~1.5 chars/token vs ~4 for Latin)
47
+ export function estimateStringTokens(str) {
48
+ if (!str) return 0
49
+ let cjk = 0
50
+ for (let i = 0; i < str.length; i++) {
51
+ const code = str.charCodeAt(i)
52
+ if ((code >= 0x4E00 && code <= 0x9FFF) || (code >= 0x3000 && code <= 0x30FF) ||
53
+ (code >= 0xAC00 && code <= 0xD7AF)) cjk++
54
+ }
55
+ const latin = str.length - cjk
56
+ return Math.ceil(latin / 4 + cjk / 1.5)
57
+ }
58
+
59
+ const MSG_OVERHEAD = 4 // ~4 tokens per message for role/metadata
60
+
61
+ export function estimateTokenCount(messages) {
62
+ let tokens = 0
63
+ for (const msg of messages) {
64
+ tokens += MSG_OVERHEAD
65
+ const content = msg.content
66
+ if (Array.isArray(content)) {
67
+ for (const block of content) {
68
+ if (block.type === "image") {
69
+ tokens += 1600 // conservative estimate for a typical image
70
+ } else if (block.type === "tool_use") {
71
+ tokens += estimateStringTokens(block.name || "")
72
+ tokens += estimateStringTokens(JSON.stringify(block.input || {}))
73
+ } else if (block.type === "tool_result") {
74
+ tokens += estimateStringTokens(String(block.content || ""))
75
+ } else {
76
+ tokens += estimateStringTokens(block.text || block.content || "")
77
+ }
78
+ }
79
+ } else {
80
+ tokens += estimateStringTokens(content || "")
81
+ }
82
+ }
83
+ return tokens
84
+ }
85
+
86
+ /**
87
+ * Pre-prune messages before LLM summarization.
88
+ * - Strip synthetic scaffolding messages (continuation noise)
89
+ * - Truncate large tool_result content with aging: older steps get shorter previews
90
+ * - Keep tool_use blocks intact (they show model intent)
91
+ * - Truncate very long plain-text assistant/user messages
92
+ */
93
+ export function pruneForSummary(messages, previewLimit = TOOL_RESULT_PREVIEW_LIMIT) {
94
+ // Strip synthetic scaffolding messages (continuation prompts, fake tool_result errors)
95
+ const real = messages.filter(msg => !msg.synthetic)
96
+
97
+ // #2 工具结果老化: find max step to compute relative age per message
98
+ const maxStep = real.reduce((m, msg) => Math.max(m, msg.step || 0), 0)
99
+
100
+ return real.map((msg) => {
101
+ // Aging: older tool_results get more aggressive truncation
102
+ const age = maxStep - (msg.step || 0)
103
+ const effectiveLimit = Math.max(50, previewLimit - age * 15)
104
+
105
+ const content = msg.content
106
+ if (Array.isArray(content)) {
107
+ const pruned = content.map((block) => {
108
+ if (block.type === "tool_result") {
109
+ const raw = String(block.content || "")
110
+ if (raw.length > effectiveLimit) {
111
+ return {
112
+ ...block,
113
+ content: `${raw.slice(0, effectiveLimit)}... [truncated ${raw.length} chars, age=${age}]`
114
+ }
115
+ }
116
+ }
117
+ return block
118
+ })
119
+ return { ...msg, content: pruned }
120
+ }
121
+ // Truncate very long plain-text messages (e.g. large tool output pasted as text)
122
+ if (typeof content === "string" && content.length > 2000) {
123
+ return { ...msg, content: `${content.slice(0, 2000)}... [truncated ${content.length} chars]` }
124
+ }
125
+ return msg
126
+ })
127
+ }
128
+
129
+ const BUILTIN_CONTEXT = {
130
+ "gpt-5": 272000, "o3": 200000, "o1": 200000,
131
+ "claude-opus-4": 200000, "claude-3-5": 200000, "claude-3.5": 200000, "claude": 200000,
132
+ "gemini-2": 1048576, "gemini-1.5": 1048576, "gemini": 128000,
133
+ "gpt-4o": 128000, "gpt-4": 128000, "gpt-3.5": 16000,
134
+ "deepseek": 64000, "qwen": 128000
135
+ }
136
+
137
+ export function modelContextLimit(model, configState = null) {
138
+ const m = String(model || "").toLowerCase()
139
+ // 1) Check provider-level context_limit for the active provider
140
+ const providerCfg = configState?.config?.provider
141
+ if (providerCfg) {
142
+ // Per-model override from provider.model_context map
143
+ const mc = providerCfg.model_context
144
+ if (mc) {
145
+ if (mc[model]) return mc[model]
146
+ for (const key of Object.keys(mc)) {
147
+ if (m.startsWith(key.toLowerCase())) return mc[key]
148
+ }
149
+ }
150
+ // Provider-level context_limit
151
+ const active = providerCfg[providerCfg.default]
152
+ if (active?.context_limit > 0) return active.context_limit
153
+ }
154
+ // 2) Builtin prefix match
155
+ for (const [prefix, limit] of Object.entries(BUILTIN_CONTEXT)) {
156
+ if (m.includes(prefix)) return limit
157
+ }
158
+ return 128000
159
+ }
160
+
161
+ export function contextUtilization(messages, model, configState = null) {
162
+ const tokens = estimateTokenCount(messages)
163
+ const limit = modelContextLimit(model, configState)
164
+ const ratio = limit > 0 ? Math.min(1, tokens / limit) : 0
165
+ return {
166
+ tokens,
167
+ limit,
168
+ ratio,
169
+ percent: Math.round(ratio * 100)
170
+ }
171
+ }
172
+
173
+ export function supportsNativeCompaction(providerType, model) {
174
+ if (providerType !== "anthropic") return false
175
+ const m = String(model || "").toLowerCase()
176
+ return m.includes("claude") && (m.includes("opus") || m.includes("sonnet"))
177
+ }
178
+
179
+ export function shouldCompact({ messages, model, thresholdMessages = DEFAULT_THRESHOLD_MESSAGES, thresholdRatio = DEFAULT_THRESHOLD_RATIO, configState = null, realTokenCount = null }) {
180
+ if (messages.length >= thresholdMessages) return true
181
+ const limit = modelContextLimit(model, configState)
182
+ const tokens = realTokenCount != null ? realTokenCount : estimateTokenCount(messages)
183
+ return tokens >= limit * thresholdRatio
184
+ }
185
+
186
+ export async function compactSession({
187
+ sessionId,
188
+ model,
189
+ providerType,
190
+ configState,
191
+ keepRecent = DEFAULT_KEEP_RECENT,
192
+ keepRecentTurns = DEFAULT_KEEP_RECENT_TURNS,
193
+ baseUrl = null,
194
+ apiKeyEnv = null
195
+ }) {
196
+ const history = await getConversationHistory(sessionId, 9999)
197
+ if (history.length <= keepRecent + 2) return { compacted: false, reason: "too few messages" }
198
+
199
+ // Turn-based split: keep last keepRecentTurns complete turns
200
+ // A "turn" = one user interaction cycle (user msg + model response + all tool calls)
201
+ // Falls back to message-count if no turnId metadata is present
202
+ let splitIdx
203
+ const turnIds = []
204
+ const seenTurns = new Set()
205
+ for (const msg of history) {
206
+ if (msg.turnId && !seenTurns.has(msg.turnId)) {
207
+ seenTurns.add(msg.turnId)
208
+ turnIds.push(msg.turnId)
209
+ }
210
+ }
211
+ if (turnIds.length > keepRecentTurns) {
212
+ const keepFromTurnId = turnIds[turnIds.length - keepRecentTurns]
213
+ splitIdx = history.findIndex(msg => msg.turnId === keepFromTurnId)
214
+ if (splitIdx < 0) splitIdx = history.length - keepRecent
215
+ } else {
216
+ // Fallback: not enough turns, use message count
217
+ splitIdx = history.length - keepRecent
218
+ }
219
+ const toSummarize = history.slice(0, splitIdx)
220
+ const kept = history.slice(splitIdx)
221
+
222
+ // Layer 1: prune large tool outputs before sending to LLM
223
+ const pruned = pruneForSummary(toSummarize)
224
+ const summaryPrompt = pruned.map((m) => {
225
+ const content = m.content
226
+ if (Array.isArray(content)) {
227
+ return `[${m.role}]: ${content.map((b) => {
228
+ if (b.type === "text") return b.text || ""
229
+ if (b.type === "tool_use") return `[tool_use:${b.name}(${JSON.stringify(b.input || {}).slice(0, 120)})]`
230
+ if (b.type === "tool_result") return `[tool_result:${b.is_error ? "ERROR " : ""}${b.content || ""}]`
231
+ return ""
232
+ }).filter(Boolean).join("\n")}`
233
+ }
234
+ return `[${m.role}]: ${content}`
235
+ }).join("\n\n")
236
+
237
+ const hookPayload = await HookBus.sessionCompacting({
238
+ sessionId,
239
+ messageCount: history.length,
240
+ summarizeCount: toSummarize.length,
241
+ keepCount: kept.length
242
+ })
243
+ if (hookPayload?.skip) return { compacted: false, reason: "skipped by hook" }
244
+
245
+ let summaryText
246
+ let compactionUsage = null
247
+ try {
248
+ const response = await requestProvider({
249
+ configState,
250
+ providerType,
251
+ model,
252
+ system: COMPACTION_SYSTEM,
253
+ messages: [{ role: "user", content: summaryPrompt }],
254
+ tools: [],
255
+ baseUrl,
256
+ apiKeyEnv
257
+ })
258
+ summaryText = (response.text || "").trim()
259
+ compactionUsage = response.usage || null
260
+ } catch (error) {
261
+ return { compacted: false, reason: `compaction LLM call failed: ${error.message}` }
262
+ }
263
+
264
+ if (!summaryText) return { compacted: false, reason: "empty summary from LLM" }
265
+
266
+ // Replace all messages with: [summary] + [kept recent messages]
267
+ const summaryMessage = {
268
+ role: "user",
269
+ content: `<compaction-summary>\n${summaryText}\n</compaction-summary>`
270
+ }
271
+ await replaceMessages(sessionId, [summaryMessage, ...kept])
272
+
273
+ // Record compaction LLM usage so it's not "invisible"
274
+ if (compactionUsage) {
275
+ try {
276
+ const { pricing } = await loadPricing(configState)
277
+ const { amount } = calculateCost(pricing, model, compactionUsage)
278
+ await recordTurn({ sessionId, usage: compactionUsage, cost: amount })
279
+ } catch { /* best-effort */ }
280
+ }
281
+
282
+ await saveCheckpoint(sessionId, {
283
+ kind: "compaction",
284
+ iteration: 0,
285
+ compactedAt: Date.now(),
286
+ summarizeCount: toSummarize.length,
287
+ keepCount: kept.length,
288
+ summaryVersion: 1,
289
+ summaryLength: summaryText.length
290
+ })
291
+
292
+ return {
293
+ compacted: true,
294
+ summarizedCount: toSummarize.length,
295
+ keptCount: kept.length,
296
+ summaryLength: summaryText.length
297
+ }
298
+ }