openhermes 1.12.1 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/README.md +126 -207
  2. package/autorecall.mjs +79 -12
  3. package/bootstrap.mjs +123 -24
  4. package/curator.mjs +4 -40
  5. package/harness/commands/harness-audit.md +1 -1
  6. package/harness/commands/learn.md +2 -2
  7. package/harness/commands/memory-search.md +2 -2
  8. package/harness/commands/ohc.md +13 -0
  9. package/harness/constitution/soul.md +16 -4
  10. package/harness/instructions/RUNTIME.md +6 -3
  11. package/harness/prompts/architect.txt +14 -0
  12. package/harness/prompts/build-cpp.md +15 -1
  13. package/harness/prompts/build-error-resolver.md +15 -9
  14. package/harness/prompts/build-go.md +14 -0
  15. package/harness/prompts/build-java.md +15 -1
  16. package/harness/prompts/build-kotlin.md +15 -1
  17. package/harness/prompts/build-rust.md +14 -0
  18. package/harness/prompts/code-reviewer.md +15 -9
  19. package/harness/prompts/doc-updater.md +13 -0
  20. package/harness/prompts/docs-lookup.md +11 -0
  21. package/harness/prompts/e2e-runner.txt +12 -0
  22. package/harness/prompts/explore.md +16 -4
  23. package/harness/prompts/harness-optimizer.md +12 -0
  24. package/harness/prompts/loop-operator.md +11 -0
  25. package/harness/prompts/planner.md +15 -9
  26. package/harness/prompts/refactor-cleaner.md +14 -0
  27. package/harness/prompts/review-cpp.md +14 -1
  28. package/harness/prompts/review-database.md +13 -0
  29. package/harness/prompts/review-go.md +13 -0
  30. package/harness/prompts/review-java.md +14 -1
  31. package/harness/prompts/review-kotlin.md +13 -0
  32. package/harness/prompts/review-python.md +14 -1
  33. package/harness/prompts/review-rust.md +13 -0
  34. package/harness/prompts/security-reviewer.md +15 -9
  35. package/harness/prompts/tdd-guide.md +14 -0
  36. package/harness/rules/audit.md +2 -2
  37. package/harness/rules/delegation.md +0 -2
  38. package/harness/rules/handoff.md +267 -0
  39. package/harness/rules/memory-management.md +4 -4
  40. package/harness/rules/precedence.md +1 -1
  41. package/harness/rules/retrieval.md +5 -5
  42. package/harness/rules/runtime-guards.md +1 -1
  43. package/harness/rules/self-heal.md +1 -1
  44. package/harness/rules/session-start.md +5 -5
  45. package/harness/rules/skills-management.md +2 -2
  46. package/harness/rules/verification.md +4 -4
  47. package/harness/scripts/sync-commands.mjs +259 -0
  48. package/index.mjs +6 -2
  49. package/lib/ambient-memory.mjs +167 -0
  50. package/lib/handoff.mjs +176 -0
  51. package/lib/hardening.mjs +13 -8
  52. package/lib/memory-tools-plugin.mjs +107 -54
  53. package/lib/ohc/block-sync.mjs +69 -0
  54. package/lib/ohc/compress/search.mjs +152 -0
  55. package/lib/ohc/compress/state.mjs +76 -0
  56. package/lib/ohc/config.mjs +172 -16
  57. package/lib/ohc/message-ids.mjs +168 -0
  58. package/lib/ohc/notify.mjs +150 -0
  59. package/lib/ohc/protected-patterns.mjs +54 -0
  60. package/lib/ohc/prune-apply.mjs +134 -0
  61. package/lib/ohc/pruner.mjs +406 -55
  62. package/lib/ohc/reaper.mjs +12 -3
  63. package/lib/ohc/state.mjs +246 -15
  64. package/lib/ohc/strategies/deduplication.mjs +72 -0
  65. package/lib/ohc/strategies/index.mjs +2 -0
  66. package/lib/ohc/strategies/purge-errors.mjs +43 -0
  67. package/lib/ohc/token-utils.mjs +26 -0
  68. package/lib/ohc/updater.mjs +36 -13
  69. package/lib/paths.mjs +0 -3
  70. package/lib/search.mjs +48 -0
  71. package/package.json +6 -2
  72. package/schemas/audit.schema.json +22 -1
  73. package/schemas/backlog.schema.json +23 -2
  74. package/schemas/checkpoint.schema.json +23 -2
  75. package/schemas/constraint.schema.json +23 -2
  76. package/schemas/decision.schema.json +23 -2
  77. package/schemas/instinct.schema.json +23 -2
  78. package/schemas/mistake.schema.json +23 -2
  79. package/schemas/verification_receipt.schema.json +23 -2
  80. package/skill-builder.mjs +12 -23
@@ -1,7 +1,41 @@
1
1
  import { tool } from "@opencode-ai/plugin"
2
2
  import { loadConfig } from "./config.mjs"
3
3
  import { selectMessagesToReap, totalTokens } from "./reaper.mjs"
4
- import { loadOhcState, saveOhcState } from "./state.mjs"
4
+ import {
5
+ loadOhcState, saveOhcState, createSessionState,
6
+ serializeState, deserializeState,
7
+ buildToolIdList, syncToolCache, countTurns,
8
+ } from "./state.mjs"
9
+ import { sendCompressNotification } from "./notify.mjs"
10
+ import { deduplicate, purgeErrors } from "./strategies/index.mjs"
11
+ import { applyPruneTools, filterCompressedBlocks } from "./prune-apply.mjs"
12
+ import { assignMessageRefs, injectMessageIds } from "./message-ids.mjs"
13
+ import { syncCompressionBlocks } from "./block-sync.mjs"
14
+ import { buildSearchContext, resolveBoundaryIds, resolveSelection, resolveAnchorMessageId, validateNonOverlapping } from "./compress/search.mjs"
15
+ import { allocateBlockId, allocateRunId, wrapBlockSummary, applyCompressionState } from "./compress/state.mjs"
16
+
17
+ const AUTOPRUNE_COOLDOWN = 10000
18
+ const stateCache = new Map()
19
+
20
+ function getOrCreateState(sessionId) {
21
+ if (!sessionId) return null
22
+ let s = stateCache.get(sessionId)
23
+ if (!s) {
24
+ const persisted = loadOhcState(sessionId)
25
+ s = deserializeState(persisted)
26
+ s.sessionId = sessionId
27
+ stateCache.set(sessionId, s)
28
+ }
29
+ return s
30
+ }
31
+
32
+ function getSessionId(messages) {
33
+ if (!messages?.length) return null
34
+ for (const m of messages) {
35
+ if (m.info?.sessionID) return m.info.sessionID
36
+ }
37
+ return null
38
+ }
5
39
 
6
40
  function buildNudge(pct, max) {
7
41
  if (pct > 0.95) return `[OHC] Context critically high (${Math.round(pct * 100)}% of ${max.toLocaleString()} token budget). Oldest messages will be pruned immediately if limit exceeded. Use the \`compress\` tool now.`
@@ -22,7 +56,13 @@ function createSummaryMessage(text) {
22
56
 
23
57
  async function applyCompress(ctx, sessionId, summary, max, min, targetTokens) {
24
58
  const ss = getOrCreateState(sessionId)
25
- ss.prunedIds.clear()
59
+ if (ss) {
60
+ ss.prunedIds.clear()
61
+ ss.summary = null
62
+ ss.anchorMessageId = null
63
+ ss._pruneCycleDone = false
64
+ ss.lastAutoPruneAt = null
65
+ }
26
66
 
27
67
  const res = await ctx.client.session.messages({ path: { id: sessionId } })
28
68
  const msgs = res?.data || res || []
@@ -31,38 +71,115 @@ async function applyCompress(ctx, sessionId, summary, max, min, targetTokens) {
31
71
  const selected = selectMessagesToReap(msgs, max, min, "compress", targetTokens)
32
72
  if (selected.length === 0) return { removed: 0, message: "already within target" }
33
73
 
34
- for (const r of selected) ss.prunedIds.add(r.id)
35
- ss.summary = summarizeRemoved(selected, summary)
36
- ss.anchorMessageId = selected[0].id
37
- saveOhcState(sessionId, {
38
- prunedMessageIds: [...ss.prunedIds],
39
- summary: ss.summary,
40
- anchorMessageId: ss.anchorMessageId,
41
- })
74
+ if (ss) {
75
+ for (const r of selected) ss.prunedIds.add(r.id)
76
+ ss.summary = summarizeRemoved(selected, summary)
77
+ ss.anchorMessageId = selected[0].id
78
+ saveOhcState(sessionId, serializeState(ss))
79
+ }
42
80
 
43
- return { removed: selected.length }
81
+ const tokensRemoved = selected.reduce((s, r) => s + r.tokens, 0)
82
+ const beforeTotal = totalTokens(msgs)
83
+ const afterTotal = beforeTotal - tokensRemoved
84
+ if (ss) {
85
+ ss.blockCount++
86
+ ss.totalTokensSaved += tokensRemoved
87
+ }
88
+ return { removed: selected.length, afterTotal, tokensRemoved, beforeTotal, beforeCount: msgs.length, afterCount: msgs.length - selected.length }
44
89
  }
45
90
 
46
- const stateCache = new Map()
91
+ function truncateText(s, n) {
92
+ if (!s || s.length <= n) return s || ""
93
+ return s.slice(0, n) + "..."
94
+ }
47
95
 
48
- function getOrCreateState(sessionId) {
49
- if (!sessionId) return null
50
- let s = stateCache.get(sessionId)
51
- if (!s) {
52
- const persisted = loadOhcState(sessionId)
53
- s = {
54
- prunedIds: new Set(persisted?.prunedMessageIds || []),
55
- summary: persisted?.summary || null,
56
- anchorMessageId: persisted?.anchorMessageId || null,
57
- lastNudgePct: 0,
96
+ function estimateSummaryTokens(messages) {
97
+ let total = 0
98
+ for (const m of messages) {
99
+ if (m.info?.role === "system" && Array.isArray(m.parts)) {
100
+ for (const p of m.parts) {
101
+ if (p.type === "text" && p.text?.startsWith("[Compressed:") || p.text?.startsWith("[Auto-pruned:")) {
102
+ total += Math.ceil(p.text.length / 4)
103
+ }
104
+ }
58
105
  }
59
- stateCache.set(sessionId, s)
60
106
  }
61
- return s
107
+ return total
108
+ }
109
+
110
+ async function executeRangeCompress(ctx, sessionId, callId, topic, content) {
111
+ const ss = getOrCreateState(sessionId)
112
+ if (!ss) throw new Error("No session state")
113
+ ss.prunedIds.clear()
114
+ ss.summary = null
115
+ ss.anchorMessageId = null
116
+ ss._pruneCycleDone = false
117
+ ss.lastAutoPruneAt = null
118
+
119
+ const res = await ctx.client.session.messages({ path: { id: sessionId } })
120
+ const rawMessages = res?.data || res || []
121
+ if (!Array.isArray(rawMessages) || rawMessages.length === 0) throw new Error("No messages")
122
+
123
+ const searchContext = buildSearchContext(ss, rawMessages)
124
+
125
+ const plans = content.map((entry, idx) => {
126
+ const { startReference, endReference } = resolveBoundaryIds(searchContext, ss, entry.startId.trim(), entry.endId.trim())
127
+ const selection = resolveSelection(searchContext, startReference, endReference)
128
+ const anchorMessageId = resolveAnchorMessageId(startReference)
129
+ return { index: idx, entry, selection, anchorMessageId }
130
+ })
131
+
132
+ validateNonOverlapping(plans)
133
+
134
+ const runId = allocateRunId(ss)
135
+ const notifications = []
136
+
137
+ for (const plan of plans) {
138
+ const blockId = allocateBlockId(ss)
139
+ const storedSummary = wrapBlockSummary(blockId, plan.entry.summary)
140
+ const summaryTokens = Math.ceil(storedSummary.length / 4)
141
+
142
+ applyCompressionState(
143
+ ss,
144
+ {
145
+ topic,
146
+ batchTopic: topic,
147
+ startId: plan.entry.startId,
148
+ endId: plan.entry.endId,
149
+ mode: "range",
150
+ runId,
151
+ compressMessageId: plan.selection.messageIds[0],
152
+ compressCallId: callId,
153
+ summaryTokens,
154
+ },
155
+ plan.selection,
156
+ plan.anchorMessageId,
157
+ blockId,
158
+ storedSummary,
159
+ plan.selection.requiredBlockIds,
160
+ )
161
+
162
+ ss.blockCount++
163
+ ss.totalTokensSaved += summaryTokens
164
+
165
+ notifications.push({
166
+ blockId,
167
+ runId,
168
+ summary: plan.entry.summary,
169
+ messageIds: plan.selection.messageIds,
170
+ })
171
+ }
172
+
173
+ return {
174
+ messageIds: plans.flatMap(p => p.selection.messageIds),
175
+ compressedTokens: 0,
176
+ summaryRef: content[0]?.summary || topic,
177
+ blockCount: plans.length,
178
+ }
62
179
  }
63
180
 
64
181
  export const OhcPlugin = async (ctx) => {
65
- const config = loadConfig()
182
+ const config = loadConfig(ctx)
66
183
  if (!config.enabled) return {}
67
184
 
68
185
  const max = config.max
@@ -71,51 +188,114 @@ export const OhcPlugin = async (ctx) => {
71
188
 
72
189
  let systemInjected = false
73
190
 
191
+ function buildTimingStr(ss) {
192
+ if (!ss?.compressionTiming?.totalDurationMs) return ""
193
+ const last = ss.compressionTiming.lastDurationMs || 0
194
+ const total = ss.compressionTiming.totalDurationMs || 0
195
+ const lastSec = (last / 1000).toFixed(1)
196
+ const totalSec = (total / 1000).toFixed(1)
197
+ return `, last compress ${lastSec}s, total ${totalSec}s`
198
+ }
199
+
200
+ function computeRoleBreakdown(messages) {
201
+ const roles = {}
202
+ for (const m of messages) {
203
+ const role = m.info?.role || "unknown"
204
+ if (!roles[role]) roles[role] = { count: 0, tokens: 0 }
205
+ roles[role].count++
206
+ if (Array.isArray(m.parts)) {
207
+ for (const p of m.parts) {
208
+ if (p.type === "text") roles[role].tokens += Math.ceil((p.text || "").length / 4)
209
+ else if (p.type === "tool") {
210
+ if (p.state?.input) roles[role].tokens += JSON.stringify(p.state.input).length / 4
211
+ if (p.state?.output)
212
+ roles[role].tokens += (typeof p.state.output === "string" ? p.state.output : JSON.stringify(p.state.output ?? "")).length / 4
213
+ }
214
+ }
215
+ }
216
+ }
217
+ return roles
218
+ }
219
+
220
+ function countIterationsSinceLastUser(messages) {
221
+ let lastUserIdx = -1
222
+ for (let i = messages.length - 1; i >= 0; i--) {
223
+ if (messages[i].info?.role === "user") {
224
+ lastUserIdx = i
225
+ break
226
+ }
227
+ }
228
+ if (lastUserIdx === -1) return 0
229
+ return messages.length - 1 - lastUserIdx
230
+ }
231
+
74
232
  return {
75
233
  "experimental.chat.system.transform": async (_input, output) => {
76
234
  if (systemInjected || !output.system?.length) return
77
235
  systemInjected = true
78
- output.system[output.system.length - 1] += `\n\n## Context Management (OHC)\n- OHC manages all compression. Set \`compaction.auto: false\` in opencode.json to prevent double pruning.\n- Default soft budget: ${max.toLocaleString()} tokens. Soft floor: ${min.toLocaleString()}.\n- These are advisory — the agent can override by passing \`targetTokens\` to the \`compress\` tool.\n- Call \`compress\` with a summary to free context space. Optionally specify \`targetTokens\` (lower = more aggressive).`
236
+ const summaryBufNote = config.compress?.summaryBuffer ? ` Summary messages extend the budget.` : ``
237
+ const protectedToolsList = (config.compress?.protectedTools || []).length
238
+ ? ` Protected tools (${(config.compress.protectedTools).join(", ")}) are preserved.`
239
+ : ``
240
+ output.system[output.system.length - 1] += `\n\n## OHC Context Management\n- OHC manages all compression. Set \`compaction.auto: false\` in opencode.json.\n- Default budget: ${max.toLocaleString()} tokens. Floor: ${min.toLocaleString()}.${summaryBufNote}${protectedToolsList}\n\n### Compress Tool\nUse \`compress\` to free context space. Two modes:\n- **Legacy**: \`{ summary }\` — oldest messages first\n- **Range**: \`{ topic, content: [{startId, endId, summary}] }\` — target specific ranges\n - \`startId\` / \`endId\`: \`ohcNNNN\` (message) or \`bkNN\` (block)\n - Each message in context has an \`<ohc-ref>\` tag with its ID\n - Ranges must be non-overlapping in one call\n - Summary replaces the entire range`
79
241
  },
80
242
 
81
243
  "experimental.chat.messages.transform": async (_input, output) => {
82
244
  if (!output?.messages?.length) return
83
245
 
84
- const sessionId = output.messages.find(m => m.info?.sessionID)?.info?.sessionID
246
+ const sessionId = getSessionId(output.messages)
85
247
  if (!sessionId) return
86
248
 
87
249
  const ss = getOrCreateState(sessionId)
88
250
  if (!ss) return
89
251
 
90
- if (ss.prunedIds.size > 0) {
252
+ syncToolCache(ss, output.messages)
253
+ buildToolIdList(ss, output.messages)
254
+ ss.currentTurn = countTurns(ss, output.messages)
255
+
256
+ assignMessageRefs(ss, output.messages)
257
+ injectMessageIds(ss, output.messages)
258
+ syncCompressionBlocks(ss, output.messages)
259
+ filterCompressedBlocks(ss, output.messages)
260
+
261
+ deduplicate(ss, config, output.messages)
262
+ purgeErrors(ss, config, output.messages)
263
+ applyPruneTools(ss, output.messages)
264
+
265
+ const now = Date.now()
266
+ const recentlyPruned = ss.lastAutoPruneAt && (now - ss.lastAutoPruneAt) < AUTOPRUNE_COOLDOWN
267
+
268
+ if (ss.prunedIds.size > 0 && !recentlyPruned) {
91
269
  const currentIds = new Set(output.messages.map(m => m.info?.id).filter(Boolean))
92
270
  if ([...ss.prunedIds].every(id => !currentIds.has(id))) {
93
271
  ss.prunedIds.clear()
94
272
  ss.summary = null
95
273
  ss.anchorMessageId = null
96
- saveOhcState(sessionId, {
97
- prunedMessageIds: [],
98
- summary: null,
99
- anchorMessageId: null,
100
- })
274
+ ss._pruneCycleDone = false
275
+ saveOhcState(sessionId, serializeState(ss))
101
276
  }
102
277
  }
103
278
 
104
279
  const currentTotal = totalTokens(output.messages)
105
- if (currentTotal > max) {
280
+ if (currentTotal > max && !recentlyPruned && !ss._pruneCycleDone) {
106
281
  const selected = selectMessagesToReap(output.messages, max, min)
107
282
  if (selected.length > 0) {
108
283
  for (const r of selected) ss.prunedIds.add(r.id)
109
284
  if (!ss.summary) ss.summary = summarizeRemoved(selected, null)
110
285
  if (!ss.anchorMessageId) ss.anchorMessageId = selected[0].id
111
- saveOhcState(sessionId, {
112
- prunedMessageIds: [...ss.prunedIds],
113
- summary: ss.summary,
114
- anchorMessageId: ss.anchorMessageId,
115
- })
286
+ saveOhcState(sessionId, serializeState(ss))
287
+ const tokensRemoved = selected.reduce((s, r) => s + r.tokens, 0)
288
+ ss.blockCount++
289
+ ss.totalTokensSaved += tokensRemoved
290
+ ss.lastAutoPruneAt = now
291
+ ss._pruneCycleDone = true
116
292
  }
117
293
  }
118
294
 
295
+ if (ss._pruneCycleDone && ss.lastAutoPruneAt && (now - ss.lastAutoPruneAt) > AUTOPRUNE_COOLDOWN) {
296
+ ss._pruneCycleDone = false
297
+ }
298
+
119
299
  if (ss.prunedIds.size > 0) {
120
300
  const prunedIds = ss.prunedIds
121
301
  const summary = ss.summary
@@ -145,20 +325,77 @@ export const OhcPlugin = async (ctx) => {
145
325
  }
146
326
 
147
327
  const afterTotal = totalTokens(output.messages)
148
- const pct = afterTotal / max
149
- const nudge = buildNudge(pct, max)
328
+
329
+ const summaryBufferTotal = config.compress?.summaryBuffer
330
+ ? estimateSummaryTokens(output.messages)
331
+ : 0
332
+ const effectiveMax = max + summaryBufferTotal
333
+ const pct = afterTotal / effectiveMax
334
+ const nudge = buildNudge(pct, effectiveMax)
335
+
336
+ let nudgeText = null
150
337
  if (nudge && pct > ss.lastNudgePct + 0.05) {
338
+ nudgeText = nudge
151
339
  ss.lastNudgePct = pct
340
+ }
341
+
342
+ const iterationThreshold = config.compress?.iterationNudgeThreshold ?? 15
343
+ const iterCount = countIterationsSinceLastUser(output.messages)
344
+ if (iterCount >= iterationThreshold && iterCount % 5 === 0) {
345
+ const iterNudge = `[OHC] ${iterCount} AI iterations since your last message. Consider summarizing completed work with \`compress\`.`
346
+ if (nudgeText) nudgeText += "\n\n" + iterNudge
347
+ else nudgeText = iterNudge
348
+ }
349
+
350
+ const blockRefs = [...(ss.prune?.messages?.activeBlockIds || [])]
351
+ .filter(id => Number.isInteger(id) && id > 0)
352
+ .sort((a, b) => a - b)
353
+ .map(id => `bk${id}`)
354
+ let blockGuidance = null
355
+ if (blockRefs.length > 0) {
356
+ blockGuidance = `<ohc-reminder>\nActive compressed blocks: ${blockRefs.join(", ")}\nUse \`bkNN\` IDs as boundaries when compressing ranges that include previously compressed blocks.\n</ohc-reminder>`
357
+ }
358
+
359
+ if (nudgeText || blockGuidance) {
360
+ const appendText = [nudgeText, blockGuidance].filter(Boolean).join("\n\n")
152
361
  for (let i = output.messages.length - 1; i >= 0; i--) {
153
362
  const m = output.messages[i]
154
363
  if (m.info?.role === "assistant" && m.parts?.length) {
155
364
  const textPart = m.parts.find(p => p.type === "text")
156
- if (textPart) { textPart.text += "\n\n" + nudge; break }
365
+ if (textPart) { textPart.text += "\n\n" + appendText; break }
157
366
  }
158
367
  }
159
368
  }
160
369
  },
161
370
 
371
+ event: async (input) => {
372
+ if (input.event?.type !== "message.part.updated") return
373
+ const part = input.event.properties?.part
374
+ if (part?.type !== "tool" || part.tool !== "compress") return
375
+
376
+ const sessionId = input.event.properties?.sessionID
377
+ if (!sessionId) return
378
+ const ss = getOrCreateState(sessionId)
379
+ if (!ss) return
380
+
381
+ if (part.state?.status === "pending") {
382
+ if (typeof part.callID !== "string") return
383
+ ss.compressionTiming.starts.set(part.callID, Date.now())
384
+ return
385
+ }
386
+
387
+ if (part.state?.status === "completed") {
388
+ if (typeof part.callID !== "string") return
389
+ const start = ss.compressionTiming.starts.get(part.callID)
390
+ if (!start) return
391
+ ss.compressionTiming.starts.delete(part.callID)
392
+ const durationMs = Date.now() - start
393
+ ss.compressionTiming.lastDurationMs = durationMs
394
+ ss.compressionTiming.totalDurationMs = (ss.compressionTiming.totalDurationMs || 0) + durationMs
395
+ saveOhcState(sessionId, serializeState(ss))
396
+ }
397
+ },
398
+
162
399
  "command.execute.before": async (input, output) => {
163
400
  if (input.command !== "ohc") return
164
401
  const sub = (input.arguments || "").trim().toLowerCase()
@@ -175,7 +412,11 @@ export const OhcPlugin = async (ctx) => {
175
412
  } catch {}
176
413
  const ss = getOrCreateState(input.sessionID)
177
414
  const prunedCount = ss?.prunedIds.size || 0
178
- const text = `[OHC Status] ${msgs.length} messages visible (${prunedCount} pruned), ~${Math.round(t / 1000)}K / ${max.toLocaleString()} tokens (${Math.round((t / max) * 100)}%). Soft floor: ${min.toLocaleString()}.`
415
+ const strategyPruned = ss?.prune?.tools?.size || 0
416
+ const timing = buildTimingStr(ss)
417
+ const activeBlockIds = [...(ss?.prune?.messages?.activeBlockIds || [])].filter(id => Number.isInteger(id)).sort((a, b) => a - b)
418
+ const blockLine = activeBlockIds.length ? ` Blocks: bk${activeBlockIds.join(", bk")}.` : ""
419
+ const text = `[OHC Status] ${msgs.length} messages visible (${prunedCount} auto-pruned, ${strategyPruned} strategy-pruned)${blockLine}${timing}. ~${Math.round(t / 1000)}K / ${max.toLocaleString()} tokens (${Math.round((t / max) * 100)}%). Soft floor: ${min.toLocaleString()}.`
179
420
  await ctx.client.session.prompt({
180
421
  path: { id: input.sessionID },
181
422
  body: { noReply: true, parts: [{ type: "text", text, ignored: true }] },
@@ -183,6 +424,43 @@ export const OhcPlugin = async (ctx) => {
183
424
  throw new Error("__OHC_STATUS_HANDLED__")
184
425
  }
185
426
 
427
+ if (sub === "stats") {
428
+ const ss = getOrCreateState(input.sessionID)
429
+ const totalSaved = ss?.totalTokensSaved || 0
430
+ const blocks = ss?.blockCount || 0
431
+ const dedupPruned = ss?.prune?.tools?.size || 0
432
+ const autoPruned = ss?.prunedIds?.size || 0
433
+ const timing = buildTimingStr(ss)
434
+ const activeBlockIds = [...(ss?.prune?.messages?.activeBlockIds || [])].filter(id => Number.isInteger(id)).sort((a, b) => a - b)
435
+ const blockLine = activeBlockIds.length ? ` Active: bk${activeBlockIds.join(", bk")}.` : ""
436
+ const text = `[OHC Stats] ${blocks} total compression blocks${blockLine} ~${Math.round(totalSaved / 1000)}K tokens saved${timing}. Auto-pruned: ${autoPruned} messages. Strategy-pruned: ${dedupPruned} calls.`
437
+ await ctx.client.session.prompt({
438
+ path: { id: input.sessionID },
439
+ body: { noReply: true, parts: [{ type: "text", text, ignored: true }] },
440
+ })
441
+ throw new Error("__OHC_STATS_HANDLED__")
442
+ }
443
+
444
+ if (sub === "manual") {
445
+ const onOff = args.replace(/^manual\s*/i, "").trim().toLowerCase()
446
+ if (onOff === "on" || onOff === "1" || onOff === "true") {
447
+ const ss = getOrCreateState(input.sessionID)
448
+ if (ss) ss.manualMode = "active"
449
+ await ctx.client.session.prompt({
450
+ path: { id: input.sessionID },
451
+ body: { noReply: true, parts: [{ type: "text", text: "[OHC] Manual mode enabled. Agent will not autonomously compress.", ignored: true }] },
452
+ })
453
+ } else {
454
+ const ss = getOrCreateState(input.sessionID)
455
+ if (ss) ss.manualMode = false
456
+ await ctx.client.session.prompt({
457
+ path: { id: input.sessionID },
458
+ body: { noReply: true, parts: [{ type: "text", text: "[OHC] Manual mode disabled. Agent can compress autonomously.", ignored: true }] },
459
+ })
460
+ }
461
+ throw new Error("__OHC_MANUAL_HANDLED__")
462
+ }
463
+
186
464
  if (sub.startsWith("compress")) {
187
465
  const rest = args.replace(/^compress\s*/i, "").trim()
188
466
  const numMatch = rest.match(/^(\d+)\s*(.*)/)
@@ -190,12 +468,14 @@ export const OhcPlugin = async (ctx) => {
190
468
  let focus
191
469
  if (numMatch) {
192
470
  targetTokens = parseInt(numMatch[1], 10)
193
- focus = numMatch[2].trim() || "Manual compression by user"
471
+ focus = numMatch[2].trim() || "Manual compression"
194
472
  } else {
195
- focus = rest || "Manual compression by user"
473
+ focus = rest || "Manual compression"
196
474
  }
197
475
  try {
198
476
  const result = await applyCompress(ctx, input.sessionID, focus, max, min, targetTokens)
477
+ const cmdSs = getOrCreateState(input.sessionID)
478
+ await sendCompressNotification(ctx.client, input.sessionID, config, result.removed, focus, result.tokensRemoved, cmdSs?.totalTokensSaved || 0, cmdSs?.blockCount || 0, result.removed, result.afterCount)
199
479
  output.parts.length = 0
200
480
  output.parts.push({
201
481
  type: "text",
@@ -208,7 +488,64 @@ export const OhcPlugin = async (ctx) => {
208
488
  return
209
489
  }
210
490
 
211
- const text = "OHC commands: /ohc status — /ohc compress [targetTokens] [focus description]"
491
+ if (sub === "context") {
492
+ let msgs = [], t = 0
493
+ try {
494
+ if (ctx?.client?.session?.messages) {
495
+ const res = await ctx.client.session.messages({ path: { id: input.sessionID } })
496
+ msgs = res?.data || res || []
497
+ t = totalTokens(msgs)
498
+ }
499
+ } catch {}
500
+ const ss = getOrCreateState(input.sessionID)
501
+ const autoPruned = ss?.prunedIds?.size || 0
502
+ const stratPruned = ss?.prune?.tools?.size || 0
503
+ const totalSaved = ss?.totalTokensSaved || 0
504
+ const blocks = ss?.blockCount || 0
505
+ const visibleTokens = t
506
+ const totalTokensWithPruned = visibleTokens + totalSaved
507
+
508
+ const roles = computeRoleBreakdown(msgs)
509
+ const roleLines = []
510
+ for (const [role, info] of Object.entries(roles)) {
511
+ const pct = t > 0 ? Math.round((info.tokens / t) * 100) : 0
512
+ roleLines.push(`${role}: ${info.count} msgs, ~${Math.round(info.tokens / 1000)}K (${pct}%)`)
513
+ }
514
+
515
+ const activeBlockIds = [...(ss?.prune?.messages?.activeBlockIds || [])].filter(id => Number.isInteger(id)).sort((a, b) => a - b)
516
+ const blockLine = activeBlockIds.length ? ` Active blocks: bk${activeBlockIds.join(", bk")}.` : ""
517
+ const text = `[OHC Context] ${msgs.length} visible messages (${autoPruned + stratPruned} pruned). Tokens: ~${Math.round(visibleTokens / 1000)}K visible / ~${Math.round(totalTokensWithPruned / 1000)}K total. ${blocks} compression blocks, ~${Math.round(totalSaved / 1000)}K saved. ${blockLine}\n\nBreakdown:\n${roleLines.join("\n")}`
518
+ await ctx.client.session.prompt({
519
+ path: { id: input.sessionID },
520
+ body: { noReply: true, parts: [{ type: "text", text, ignored: true }] },
521
+ })
522
+ throw new Error("__OHC_CONTEXT_HANDLED__")
523
+ }
524
+
525
+ if (sub === "sweep") {
526
+ const rest = args.replace(/^sweep\s*/i, "").trim()
527
+ let count = rest ? parseInt(rest, 10) : 10
528
+ if (isNaN(count) || count < 1) count = 10
529
+ const ss = getOrCreateState(input.sessionID)
530
+ const allToolIds = ss?.toolIdList || []
531
+ const unpruned = allToolIds.filter(id => !ss.prune.tools.has(id))
532
+ const toSweep = unpruned.slice(-count)
533
+ let sweptCount = 0
534
+ for (const id of toSweep) {
535
+ const entry = ss.toolParameters.get(id)
536
+ if (entry) {
537
+ ss.prune.tools.set(id, entry.tokenCount || 0)
538
+ sweptCount++
539
+ }
540
+ }
541
+ applyPruneTools(ss, output.messages)
542
+ const text = `[OHC] Swept: ${sweptCount} tool calls pruned.`
543
+ output.parts.length = 0
544
+ output.parts.push({ type: "text", text })
545
+ return
546
+ }
547
+
548
+ const text = "OHC commands: /ohc status — /ohc stats — /ohc context — /ohc sweep [n] — /ohc manual [on|off] — /ohc compress [focus]"
212
549
  await ctx.client.session.prompt({
213
550
  path: { id: input.sessionID },
214
551
  body: { noReply: true, parts: [{ type: "text", text, ignored: true }] },
@@ -218,22 +555,36 @@ export const OhcPlugin = async (ctx) => {
218
555
 
219
556
  tool: {
220
557
  compress: tool({
221
- description: "Proactively compress old conversation content to free context space. Provide a technical summary of what was removed. Optionally specify targetTokens to control how much to keep (lower = more aggressive).",
558
+ description: "Compress conversation content to free context space. Supports two modes: range mode (specify content array with startId/endId/summary per entry) and legacy mode (specify summary with optional targetTokens). Use range mode for precise targeting; fall back to legacy for general oldest-first pruning. When using range mode, wrap each boundary pair: startId (ohcNNNN or bkNN) must appear before endId. Each entry's summary replaces that conversation range entirely. Provide a technical summary of what was removed, including file paths, function signatures, decisions, and constraints.",
222
559
  args: {
223
- summary: tool.schema.string().describe("Technical summary of the compressed content. Include what was removed and key decisions preserved."),
224
- targetTokens: tool.schema.number().optional().describe("Estimated target after compression (heuristic, not exact). Lower = more aggressive. Default uses soft config floor."),
560
+ topic: tool.schema.string().optional().describe("Range mode: Short label (3-5 words) for the overall batch e.g. 'Auth System Exploration'"),
561
+ content: tool.schema.array(tool.schema.object({
562
+ startId: tool.schema.string().describe("Boundary at range start: ohcNNNN (message) or bkNN (block)"),
563
+ endId: tool.schema.string().describe("Boundary at range end: ohcNNNN (message) or bkNN (block)"),
564
+ summary: tool.schema.string().describe("Complete technical summary replacing all content in this range. Include user intent, decisions, constraints, file paths, and function signatures."),
565
+ })).optional().describe("Range mode: One or more non-overlapping ranges to compress"),
566
+ summary: tool.schema.string().optional().describe("Legacy mode: Technical summary of compressed content. Use when not specifying content array."),
567
+ targetTokens: tool.schema.number().optional().describe("Legacy mode: Estimated target after compression. Lower = more aggressive. Default uses soft config floor."),
225
568
  },
226
569
  async execute(args, toolCtx) {
227
- const result = await applyCompress(ctx, toolCtx.sessionID, args.summary, max, min, args.targetTokens)
570
+ const callId = toolCtx.callID || null
571
+ const sessionId = toolCtx.sessionID
572
+
573
+ if (Array.isArray(args.content) && args.content.length > 0) {
574
+ const result = await executeRangeCompress(ctx, sessionId, callId, args.topic || "Compression", args.content)
575
+ toolCtx.metadata({ title: "Compress Range" })
576
+ const resultSs = getOrCreateState(sessionId)
577
+ await sendCompressNotification(ctx.client, sessionId, config, result.messageIds.length, result.summaryRef, result.compressedTokens, resultSs?.totalTokensSaved || 0, resultSs?.blockCount || 0, result.messageIds.length, 0)
578
+ return `Compressed ${result.messageIds.length} messages across ${args.content.length} range(s). Summary: "${truncateText(result.summaryRef, 200)}"`
579
+ }
580
+
581
+ const result = await applyCompress(ctx, sessionId, args.summary, max, min, args.targetTokens)
228
582
  toolCtx.metadata({ title: "Compress" })
583
+ const toolSs = getOrCreateState(sessionId)
584
+ await sendCompressNotification(ctx.client, sessionId, config, result.removed, truncateText(args.summary, 200), result.tokensRemoved, toolSs?.totalTokensSaved || 0, toolSs?.blockCount || 0, result.removed, result.afterCount)
229
585
  return `Compressed: ${result.removed} messages removed. Summary: "${truncateText(args.summary, 200)}"`
230
586
  },
231
587
  }),
232
588
  },
233
589
  }
234
590
  }
235
-
236
- function truncateText(s, n) {
237
- if (!s || s.length <= n) return s || ""
238
- return s.slice(0, n) + "..."
239
- }
@@ -28,18 +28,25 @@ export function totalTokens(messages) {
28
28
  *
29
29
  * targetOverride: when set, replaces floor/minFloor — agent explicit request
30
30
  * overrides the soft config defaults. Used when user asks "compress to X".
31
+ *
32
+ * turnProtectionTags: optional set of message IDs to protect from pruning.
33
+ * These are recent tool call message IDs that should be kept.
31
34
  */
32
- export function selectMessagesToReap(messages, maxLimit, minFloor, mode = "auto", targetOverride) {
35
+ export function selectMessagesToReap(messages, maxLimit, minFloor, mode = "auto", targetOverride, turnProtectionTags) {
33
36
  if (!messages?.length || messages.length < 3) return []
34
37
 
38
+ const protectSet = turnProtectionTags?.length ? new Set(turnProtectionTags) : null
39
+
35
40
  let total = totalTokens(messages)
36
41
  const selected = []
37
42
 
38
43
  if (mode === "compress") {
39
44
  const floor = targetOverride ?? minFloor
45
+ const tokenCache = messages.map((msg, i) => ({ idx: i, tokens: msgTokens(msg) }))
40
46
  let i = 1
41
47
  while (i < messages.length - 1) {
42
- const t = msgTokens(messages[i])
48
+ if (protectSet?.has(String(messages[i].info?.id ?? i))) { i++; continue }
49
+ const t = tokenCache[i].tokens
43
50
  if (total - t < floor) break
44
51
  total -= t
45
52
  selected.push({ id: String(messages[i].info?.id ?? i), msg: messages[i], tokens: t })
@@ -47,9 +54,11 @@ export function selectMessagesToReap(messages, maxLimit, minFloor, mode = "auto"
47
54
  }
48
55
  } else {
49
56
  const floor = targetOverride ?? minFloor
57
+ const tokenCache = messages.map((msg, i) => ({ idx: i, tokens: msgTokens(msg) }))
50
58
  let i = 1
51
59
  while (i < messages.length - 1 && total > maxLimit) {
52
- const t = msgTokens(messages[i])
60
+ if (protectSet?.has(String(messages[i].info?.id ?? i))) { i++; continue }
61
+ const t = tokenCache[i].tokens
53
62
  if (total - t < floor) break
54
63
  total -= t
55
64
  selected.push({ id: String(messages[i].info?.id ?? i), msg: messages[i], tokens: t })