@benkhz/context-manager 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -56,6 +56,7 @@ new AIContextManager(config)
56
56
  | `maxTokens` | `number` | — | Passed through to `formatRequest` |
57
57
  | `contextLimit` | `number` | `80_000` | Char count that triggers auto-compaction |
58
58
  | `compactKeepLast` | `number` | `6` | Messages preserved verbatim after compaction |
59
+ | `injectSummary` | `boolean` | `true` | Auto-prepend the latest summary as a `system` message on every LLM request. Set `false` to place it yourself via `context.summary` in `formatRequest`. |
59
60
  | `headers` | `object` | `{}` | Extra HTTP headers on every `fetch` call |
60
61
 
61
62
  ---
@@ -180,13 +181,19 @@ unsub()
180
181
  ## Introspection
181
182
 
182
183
  ```js
183
- mgr.getMessages() // → Message[] (shallow copy)
184
- mgr.getSummary() // → string | null
185
- mgr.getTools() // → [{ name, schema }]
186
- mgr.getContext() // → { messages, summary, tools }
187
- mgr.reset() // clear messages, summary, state — returns this
184
+ mgr.getMessages() // → Message[] full, never-pruned turn history
185
+ mgr.getActiveMessages() // → Message[] current LLM-facing window (post-compaction)
186
+ mgr.getSummary() // → string | null — latest summary
187
+ mgr.getSummaries() // → string[] every summary ever produced, oldest first
188
+ mgr.getTools() // [{ name, schema }]
189
+ mgr.getContext() // → { messages, activeMessages, summary, summaries, tools }
190
+ mgr.reset() // clear all message/summary state — returns this
188
191
  ```
189
192
 
193
+ `getMessages()` always returns every turn ever sent or received, even after compaction has shrunk the
194
+ LLM-facing window — useful for rendering a full conversation transcript in a UI. `getActiveMessages()`
195
+ returns what's actually being sent to the model right now.
196
+
190
197
  ---
191
198
 
192
199
  ## Presets
@@ -212,15 +219,25 @@ const mgr = new AIContextManager({
212
219
 
213
220
  ## Context compaction
214
221
 
215
- When the total character count of `_messages` exceeds `contextLimit`:
222
+ The manager tracks two parallel message lists: the full **history** (everything ever sent or
223
+ received, exposed via `getMessages()`) and the **active window** (`getActiveMessages()`) — the
224
+ slice actually sent to the LLM, which compaction and truncation shrink. History is never pruned.
225
+
226
+ When the character count of the active window exceeds `contextLimit`:
216
227
 
217
228
  1. `onContextLimit` hook is called — returns `'compact'` (default), `'truncate'`, or `'error'`
218
229
  2. If `compact`: the overflow messages are sent to the LLM with a summarise prompt
219
- 3. The summary is stored in `_summary`; the last `compactKeepLast` messages are kept verbatim
220
- 4. `formatRequest` receives `context.summary` and can inline it however the API prefers
230
+ 3. The summary is stored (and appended to the summary history); the last `compactKeepLast`
231
+ active messages are kept verbatim
232
+ 4. On the next request, the latest summary is auto-prepended as a `system` message ahead of the
233
+ active window — unless `injectSummary: false`, in which case you place it yourself via
234
+ `context.summary` in `formatRequest`
221
235
 
222
236
  The `onCompact` hook can return a string to bypass the LLM call entirely.
223
237
 
238
+ Both compaction and truncation snap their cut point to avoid splitting a tool-call/tool-result
239
+ pair across the boundary — the kept window never starts with an orphaned `tool` message.
240
+
224
241
  ---
225
242
 
226
243
  ## Open decisions
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@benkhz/context-manager",
3
- "version": "1.0.0",
3
+ "version": "2.0.0",
4
4
  "description": "Provider-agnostic LLM context manager with tool execution, auto-compaction, reactive state, and an event bus.",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -12,6 +12,9 @@ export class AIContextManager {
12
12
  * @param {number} [config.maxTokens] — forwarded to formatRequest
13
13
  * @param {number} [config.contextLimit] — char count before auto-compact (default 80 000)
14
14
  * @param {number} [config.compactKeepLast] — messages to keep verbatim after compact (default 6)
15
+ * @param {boolean} [config.injectSummary] — auto-prepend the latest summary as a system
16
+ * message on every LLM request (default true).
17
+ * Set false to manage summary placement yourself.
15
18
  * @param {object} [config.headers] — extra HTTP headers on every fetch
16
19
  * @param {object} config.hooks — see README for full hook reference
17
20
  */
@@ -22,6 +25,7 @@ export class AIContextManager {
22
25
  maxTokens,
23
26
  contextLimit = DEFAULT_CONTEXT_LIMIT,
24
27
  compactKeepLast = DEFAULT_COMPACT_KEEP_LAST,
28
+ injectSummary = true,
25
29
  headers = {},
26
30
  hooks = {},
27
31
  } = config
@@ -30,11 +34,16 @@ export class AIContextManager {
30
34
  if (!hooks.formatRequest) throw new Error('AIContextManager: hooks.formatRequest is required')
31
35
  if (!hooks.parseResponse) throw new Error('AIContextManager: hooks.parseResponse is required')
32
36
 
33
- this._config = { endpoint, model, maxTokens, contextLimit, compactKeepLast, headers }
37
+ this._config = { endpoint, model, maxTokens, contextLimit, compactKeepLast, injectSummary, headers }
34
38
  this._hooks = hooks
35
39
 
36
- this._messages = []
37
- this._summary = null
40
+ // Full, never-pruned turn log — what getMessages()/getHistory() expose to consumers.
41
+ this._history = []
42
+ // LLM-facing window — subject to compact()/truncate(), this is what's actually sent.
43
+ this._activeMessages = []
44
+ // Latest summary (string) and the full chronological list of summaries ever produced.
45
+ this._summary = null
46
+ this._summaries = []
38
47
  this._tools = new Map() // name → { schema, handler }
39
48
  this._emitter = new EventEmitter()
40
49
  this._state = new Map() // reactive state
@@ -54,13 +63,13 @@ export class AIContextManager {
54
63
  */
55
64
  async send(content, opts = {}) {
56
65
  const userMsg = { role: 'user', content }
57
- this._messages.push(userMsg)
66
+ this._pushMessage(userMsg)
58
67
  this._emitter.emit('message:sent', { message: userMsg })
59
68
 
60
- // beforeSend hook — can return a transformed messages array (sync or async)
69
+ // beforeSend hook — can return a transformed active-window array (sync or async)
61
70
  if (this._hooks.beforeSend) {
62
- const next = await this._hooks.beforeSend([...this._messages])
63
- if (Array.isArray(next)) this._messages = next
71
+ const next = await this._hooks.beforeSend([...this._activeMessages])
72
+ if (Array.isArray(next)) this._activeMessages = next
64
73
  }
65
74
 
66
75
  // Auto-compact when approaching the context limit
@@ -74,7 +83,7 @@ export class AIContextManager {
74
83
 
75
84
  try {
76
85
  const assistantMsg = await this._runLoop(opts.system)
77
- this._messages.push(assistantMsg)
86
+ this._pushMessage(assistantMsg)
78
87
  this._emitter.emit('message:received', { message: assistantMsg })
79
88
  return assistantMsg
80
89
  } catch (err) {
@@ -90,8 +99,8 @@ export class AIContextManager {
90
99
  * or can be called manually at any time.
91
100
  */
92
101
  async compact() {
93
- const keep = this._config.compactKeepLast
94
- const overflow = this._messages.slice(0, -keep)
102
+ const cut = this._compactionBoundary(this._activeMessages, this._config.compactKeepLast)
103
+ const overflow = this._activeMessages.slice(0, cut)
95
104
  if (!overflow.length) return
96
105
 
97
106
  this._emitter.emit('context:compact', { messageCount: overflow.length })
@@ -101,7 +110,8 @@ export class AIContextManager {
101
110
  if (!summary) summary = await this._summarise(overflow)
102
111
 
103
112
  this._summary = summary
104
- this._messages = this._messages.slice(-keep)
113
+ this._summaries.push(summary)
114
+ this._activeMessages = this._activeMessages.slice(cut)
105
115
  this._emitter.emit('context:compacted', { summary })
106
116
  }
107
117
 
@@ -157,21 +167,34 @@ export class AIContextManager {
157
167
 
158
168
  // ── Introspection ─────────────────────────────────────────────────────────────
159
169
 
160
- getMessages() { return [...this._messages] }
161
- getSummary() { return this._summary }
162
- getTools() { return [...this._tools.entries()].map(([name, { schema }]) => ({ name, schema })) }
170
+ /** Full, never-pruned turn history — every message ever sent or received. */
171
+ getMessages() { return [...this._history] }
172
+ /** The current LLM-facing window (post-compaction/truncation). */
173
+ getActiveMessages() { return [...this._activeMessages] }
174
+ getSummary() { return this._summary }
175
+ /** Every summary ever produced, oldest first. */
176
+ getSummaries() { return [...this._summaries] }
177
+ getTools() { return [...this._tools.entries()].map(([name, { schema }]) => ({ name, schema })) }
163
178
 
164
179
  /** Full snapshot of context — useful for debugging or serialisation */
165
180
  getContext() {
166
- return { messages: this.getMessages(), summary: this._summary, tools: this.getTools() }
181
+ return {
182
+ messages: this.getMessages(),
183
+ activeMessages: this.getActiveMessages(),
184
+ summary: this._summary,
185
+ summaries: this.getSummaries(),
186
+ tools: this.getTools(),
187
+ }
167
188
  }
168
189
 
169
190
  /** Reset all conversation state. Does not touch config or registered tools. */
170
191
  reset() {
171
- this._messages = []
172
- this._summary = null
173
- this._state = new Map()
174
- this._subscribers = new Map()
192
+ this._history = []
193
+ this._activeMessages = []
194
+ this._summary = null
195
+ this._summaries = []
196
+ this._state = new Map()
197
+ this._subscribers = new Map()
175
198
  return this
176
199
  }
177
200
 
@@ -186,7 +209,7 @@ export class AIContextManager {
186
209
  throw new Error(`AIContextManager: tool loop exceeded ${MAX_TOOL_ITERATIONS} iterations`)
187
210
  }
188
211
 
189
- const context = this.getContext()
212
+ const context = this._buildRequestContext()
190
213
  const body = this._hooks.formatRequest(context, { ...this._config, system })
191
214
  const raw = await this._fetch(body)
192
215
  let parsed = this._hooks.parseResponse(raw)
@@ -206,7 +229,7 @@ export class AIContextManager {
206
229
  content: parsed.content ?? '',
207
230
  toolCalls: parsed.toolCalls,
208
231
  }
209
- this._messages.push(assistantMsg)
232
+ this._pushMessage(assistantMsg)
210
233
 
211
234
  for (const tc of parsed.toolCalls) {
212
235
  const toolDef = this._tools.get(tc.name)
@@ -230,12 +253,34 @@ export class AIContextManager {
230
253
  if (transformed !== undefined) result = transformed
231
254
 
232
255
  this._emitter.emit('tool:result', { name: tc.name, result })
233
- this._messages.push({ role: 'tool', content: JSON.stringify(result), toolCallId: tc.id })
256
+ this._pushMessage({ role: 'tool', content: JSON.stringify(result), toolCallId: tc.id })
234
257
  }
235
258
 
236
259
  return this._runLoop(system, depth + 1)
237
260
  }
238
261
 
262
+ /** Append a message to both the full history and the active LLM-facing window. */
263
+ _pushMessage(msg) {
264
+ this._history.push(msg)
265
+ this._activeMessages.push(msg)
266
+ }
267
+
268
+ /**
269
+ * Build the context actually handed to hooks.formatRequest.
270
+ * Auto-prepends the latest summary as a system message unless
271
+ * config.injectSummary is false — this is what makes compaction
272
+ * actually save tokens on subsequent turns without consumers having
273
+ * to wire the summary in themselves.
274
+ */
275
+ _buildRequestContext() {
276
+ const shouldInject = this._config.injectSummary && this._summary
277
+ const messages = shouldInject
278
+ ? [{ role: 'system', content: `Conversation summary (earlier turns):\n${this._summary}` }, ...this._activeMessages]
279
+ : [...this._activeMessages]
280
+
281
+ return { messages, summary: this._summary, tools: this.getTools() }
282
+ }
283
+
239
284
  /** POST overflow messages to the LLM with a summarise prompt */
240
285
  async _summarise(messages) {
241
286
  const summaryMessages = [
@@ -272,13 +317,26 @@ export class AIContextManager {
272
317
  return res.json()
273
318
  }
274
319
 
275
- /** Approximate context size in characters */
320
+ /** Approximate context size in characters, measured on the active LLM-facing window */
276
321
  _charCount() {
277
- return this._messages.reduce((n, m) => n + (m.content?.length ?? 0), 0)
322
+ return this._activeMessages.reduce((n, m) => n + (m.content?.length ?? 0), 0)
278
323
  }
279
324
 
280
- /** Hard truncation — drop oldest messages down to compactKeepLast */
325
+ /** Hard truncation — drop oldest active messages down to compactKeepLast (no summary kept) */
281
326
  _truncate() {
282
- this._messages = this._messages.slice(-this._config.compactKeepLast)
327
+ const cut = this._compactionBoundary(this._activeMessages, this._config.compactKeepLast)
328
+ this._activeMessages = this._activeMessages.slice(cut)
329
+ }
330
+
331
+ /**
332
+ * Find the slice index that keeps roughly the last `keep` messages without
333
+ * splitting a tool-call/tool-result pair — landing inside one would leave
334
+ * the kept window starting with an orphaned 'tool' message no provider accepts.
335
+ * Walks the boundary back to the start of the call/result group instead.
336
+ */
337
+ _compactionBoundary(messages, keep) {
338
+ let cut = Math.max(messages.length - keep, 0)
339
+ while (cut > 0 && messages[cut].role === 'tool') cut--
340
+ return cut
283
341
  }
284
342
  }