@benkhz/context-manager 1.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -56,6 +56,7 @@ new AIContextManager(config)
56
56
  | `maxTokens` | `number` | — | Passed through to `formatRequest` |
57
57
  | `contextLimit` | `number` | `80_000` | Char count that triggers auto-compaction |
58
58
  | `compactKeepLast` | `number` | `6` | Messages preserved verbatim after compaction |
59
+ | `injectSummary` | `boolean` | `true` | Auto-prepend the latest summary as a `system` message on every LLM request. Set `false` to place it yourself via `context.summary` in `formatRequest`. |
59
60
  | `headers` | `object` | `{}` | Extra HTTP headers on every `fetch` call |
60
61
 
61
62
  ---
@@ -180,13 +181,19 @@ unsub()
180
181
  ## Introspection
181
182
 
182
183
  ```js
183
- mgr.getMessages() // → Message[] (shallow copy)
184
- mgr.getSummary() // → string | null
185
- mgr.getTools() // → [{ name, schema }]
186
- mgr.getContext() // → { messages, summary, tools }
187
- mgr.reset() // clear messages, summary, state — returns this
184
+ mgr.getMessages() // → Message[] full, never-pruned turn history
185
+ mgr.getActiveMessages() // → Message[] current LLM-facing window (post-compaction)
186
+ mgr.getSummary() // → string | null — latest summary
187
+ mgr.getSummaries() // → string[] every summary ever produced, oldest first
188
+ mgr.getTools() // [{ name, schema }]
189
+ mgr.getContext() // → { messages, activeMessages, summary, summaries, tools }
190
+ mgr.reset() // clear all message/summary state — returns this
188
191
  ```
189
192
 
193
+ `getMessages()` always returns every turn ever sent or received, even after compaction has shrunk the
194
+ LLM-facing window — useful for rendering a full conversation transcript in a UI. `getActiveMessages()`
195
+ returns what's actually being sent to the model right now.
196
+
190
197
  ---
191
198
 
192
199
  ## Presets
@@ -212,15 +219,30 @@ const mgr = new AIContextManager({
212
219
 
213
220
  ## Context compaction
214
221
 
215
- When the total character count of `_messages` exceeds `contextLimit`:
222
+ The manager tracks two parallel message lists: the full **history** (everything ever sent or
223
+ received, exposed via `getMessages()`) and the **active window** (`getActiveMessages()`) — the
224
+ slice actually sent to the LLM, which compaction and truncation shrink. History is never pruned.
225
+
226
+ This check runs at the start of every `send()` call, and also between tool-call iterations
227
+ *within* a single turn — a request that triggers several tool calls in a row can grow the active
228
+ window past `contextLimit` well before the turn finishes, so compaction can kick in mid-turn
229
+ rather than waiting for the next `send()`.
230
+
231
+ When the character count of the active window exceeds `contextLimit`:
216
232
 
217
233
  1. `onContextLimit` hook is called — returns `'compact'` (default), `'truncate'`, or `'error'`
218
234
  2. If `compact`: the overflow messages are sent to the LLM with a summarise prompt
219
- 3. The summary is stored in `_summary`; the last `compactKeepLast` messages are kept verbatim
220
- 4. `formatRequest` receives `context.summary` and can inline it however the API prefers
235
+ 3. The summary is stored (and appended to the summary history); the last `compactKeepLast`
236
+ active messages are kept verbatim
237
+ 4. On the next request, the latest summary is auto-prepended as a `system` message ahead of the
238
+ active window — unless `injectSummary: false`, in which case you place it yourself via
239
+ `context.summary` in `formatRequest`
221
240
 
222
241
  The `onCompact` hook can return a string to bypass the LLM call entirely.
223
242
 
243
+ Both compaction and truncation snap their cut point to avoid splitting a tool-call/tool-result
244
+ pair across the boundary — the kept window never starts with an orphaned `tool` message.
245
+
224
246
  ---
225
247
 
226
248
  ## Open decisions
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@benkhz/context-manager",
3
- "version": "1.0.0",
3
+ "version": "2.0.1",
4
4
  "description": "Provider-agnostic LLM context manager with tool execution, auto-compaction, reactive state, and an event bus.",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -12,6 +12,9 @@ export class AIContextManager {
12
12
  * @param {number} [config.maxTokens] — forwarded to formatRequest
13
13
  * @param {number} [config.contextLimit] — char count before auto-compact (default 80 000)
14
14
  * @param {number} [config.compactKeepLast] — messages to keep verbatim after compact (default 6)
15
+ * @param {boolean} [config.injectSummary] — auto-prepend the latest summary as a system
16
+ * message on every LLM request (default true).
17
+ * Set false to manage summary placement yourself.
15
18
  * @param {object} [config.headers] — extra HTTP headers on every fetch
16
19
  * @param {object} config.hooks — see README for full hook reference
17
20
  */
@@ -22,6 +25,7 @@ export class AIContextManager {
22
25
  maxTokens,
23
26
  contextLimit = DEFAULT_CONTEXT_LIMIT,
24
27
  compactKeepLast = DEFAULT_COMPACT_KEEP_LAST,
28
+ injectSummary = true,
25
29
  headers = {},
26
30
  hooks = {},
27
31
  } = config
@@ -30,11 +34,16 @@ export class AIContextManager {
30
34
  if (!hooks.formatRequest) throw new Error('AIContextManager: hooks.formatRequest is required')
31
35
  if (!hooks.parseResponse) throw new Error('AIContextManager: hooks.parseResponse is required')
32
36
 
33
- this._config = { endpoint, model, maxTokens, contextLimit, compactKeepLast, headers }
37
+ this._config = { endpoint, model, maxTokens, contextLimit, compactKeepLast, injectSummary, headers }
34
38
  this._hooks = hooks
35
39
 
36
- this._messages = []
37
- this._summary = null
40
+ // Full, never-pruned turn log — what getMessages()/getHistory() expose to consumers.
41
+ this._history = []
42
+ // LLM-facing window — subject to compact()/truncate(), this is what's actually sent.
43
+ this._activeMessages = []
44
+ // Latest summary (string) and the full chronological list of summaries ever produced.
45
+ this._summary = null
46
+ this._summaries = []
38
47
  this._tools = new Map() // name → { schema, handler }
39
48
  this._emitter = new EventEmitter()
40
49
  this._state = new Map() // reactive state
@@ -54,27 +63,21 @@ export class AIContextManager {
54
63
  */
55
64
  async send(content, opts = {}) {
56
65
  const userMsg = { role: 'user', content }
57
- this._messages.push(userMsg)
66
+ this._pushMessage(userMsg)
58
67
  this._emitter.emit('message:sent', { message: userMsg })
59
68
 
60
- // beforeSend hook — can return a transformed messages array (sync or async)
69
+ // beforeSend hook — can return a transformed active-window array (sync or async)
61
70
  if (this._hooks.beforeSend) {
62
- const next = await this._hooks.beforeSend([...this._messages])
63
- if (Array.isArray(next)) this._messages = next
71
+ const next = await this._hooks.beforeSend([...this._activeMessages])
72
+ if (Array.isArray(next)) this._activeMessages = next
64
73
  }
65
74
 
66
75
  // Auto-compact when approaching the context limit
67
- const chars = this._charCount()
68
- if (chars > this._config.contextLimit) {
69
- const policy = this._hooks.onContextLimit?.(chars, this._config.contextLimit) ?? 'compact'
70
- if (policy === 'compact') await this.compact()
71
- else if (policy === 'truncate') this._truncate()
72
- else throw new Error(`AIContextManager: context limit exceeded (${chars} chars)`)
73
- }
76
+ await this._enforceContextLimit()
74
77
 
75
78
  try {
76
79
  const assistantMsg = await this._runLoop(opts.system)
77
- this._messages.push(assistantMsg)
80
+ this._pushMessage(assistantMsg)
78
81
  this._emitter.emit('message:received', { message: assistantMsg })
79
82
  return assistantMsg
80
83
  } catch (err) {
@@ -90,8 +93,8 @@ export class AIContextManager {
90
93
  * or can be called manually at any time.
91
94
  */
92
95
  async compact() {
93
- const keep = this._config.compactKeepLast
94
- const overflow = this._messages.slice(0, -keep)
96
+ const cut = this._compactionBoundary(this._activeMessages, this._config.compactKeepLast)
97
+ const overflow = this._activeMessages.slice(0, cut)
95
98
  if (!overflow.length) return
96
99
 
97
100
  this._emitter.emit('context:compact', { messageCount: overflow.length })
@@ -101,7 +104,8 @@ export class AIContextManager {
101
104
  if (!summary) summary = await this._summarise(overflow)
102
105
 
103
106
  this._summary = summary
104
- this._messages = this._messages.slice(-keep)
107
+ this._summaries.push(summary)
108
+ this._activeMessages = this._activeMessages.slice(cut)
105
109
  this._emitter.emit('context:compacted', { summary })
106
110
  }
107
111
 
@@ -157,21 +161,34 @@ export class AIContextManager {
157
161
 
158
162
  // ── Introspection ─────────────────────────────────────────────────────────────
159
163
 
160
- getMessages() { return [...this._messages] }
161
- getSummary() { return this._summary }
162
- getTools() { return [...this._tools.entries()].map(([name, { schema }]) => ({ name, schema })) }
164
+ /** Full, never-pruned turn history — every message ever sent or received. */
165
+ getMessages() { return [...this._history] }
166
+ /** The current LLM-facing window (post-compaction/truncation). */
167
+ getActiveMessages() { return [...this._activeMessages] }
168
+ getSummary() { return this._summary }
169
+ /** Every summary ever produced, oldest first. */
170
+ getSummaries() { return [...this._summaries] }
171
+ getTools() { return [...this._tools.entries()].map(([name, { schema }]) => ({ name, schema })) }
163
172
 
164
173
  /** Full snapshot of context — useful for debugging or serialisation */
165
174
  getContext() {
166
- return { messages: this.getMessages(), summary: this._summary, tools: this.getTools() }
175
+ return {
176
+ messages: this.getMessages(),
177
+ activeMessages: this.getActiveMessages(),
178
+ summary: this._summary,
179
+ summaries: this.getSummaries(),
180
+ tools: this.getTools(),
181
+ }
167
182
  }
168
183
 
169
184
  /** Reset all conversation state. Does not touch config or registered tools. */
170
185
  reset() {
171
- this._messages = []
172
- this._summary = null
173
- this._state = new Map()
174
- this._subscribers = new Map()
186
+ this._history = []
187
+ this._activeMessages = []
188
+ this._summary = null
189
+ this._summaries = []
190
+ this._state = new Map()
191
+ this._subscribers = new Map()
175
192
  return this
176
193
  }
177
194
 
@@ -186,7 +203,7 @@ export class AIContextManager {
186
203
  throw new Error(`AIContextManager: tool loop exceeded ${MAX_TOOL_ITERATIONS} iterations`)
187
204
  }
188
205
 
189
- const context = this.getContext()
206
+ const context = this._buildRequestContext()
190
207
  const body = this._hooks.formatRequest(context, { ...this._config, system })
191
208
  const raw = await this._fetch(body)
192
209
  let parsed = this._hooks.parseResponse(raw)
@@ -206,7 +223,7 @@ export class AIContextManager {
206
223
  content: parsed.content ?? '',
207
224
  toolCalls: parsed.toolCalls,
208
225
  }
209
- this._messages.push(assistantMsg)
226
+ this._pushMessage(assistantMsg)
210
227
 
211
228
  for (const tc of parsed.toolCalls) {
212
229
  const toolDef = this._tools.get(tc.name)
@@ -230,12 +247,49 @@ export class AIContextManager {
230
247
  if (transformed !== undefined) result = transformed
231
248
 
232
249
  this._emitter.emit('tool:result', { name: tc.name, result })
233
- this._messages.push({ role: 'tool', content: JSON.stringify(result), toolCallId: tc.id })
250
+ this._pushMessage({ role: 'tool', content: JSON.stringify(result), toolCallId: tc.id })
234
251
  }
235
252
 
253
+ // Re-check the context limit between tool-call iterations, not just at the
254
+ // top of send() — a single turn can run several iterations and blow past
255
+ // the limit long before the next send() call ever re-evaluates it.
256
+ await this._enforceContextLimit()
257
+
236
258
  return this._runLoop(system, depth + 1)
237
259
  }
238
260
 
261
+ /** Check contextLimit and apply the configured policy (compact/truncate/error). */
262
+ async _enforceContextLimit() {
263
+ const chars = this._charCount()
264
+ if (chars <= this._config.contextLimit) return
265
+ const policy = this._hooks.onContextLimit?.(chars, this._config.contextLimit) ?? 'compact'
266
+ if (policy === 'compact') await this.compact()
267
+ else if (policy === 'truncate') this._truncate()
268
+ else throw new Error(`AIContextManager: context limit exceeded (${chars} chars)`)
269
+ }
270
+
271
+ /** Append a message to both the full history and the active LLM-facing window. */
272
+ _pushMessage(msg) {
273
+ this._history.push(msg)
274
+ this._activeMessages.push(msg)
275
+ }
276
+
277
+ /**
278
+ * Build the context actually handed to hooks.formatRequest.
279
+ * Auto-prepends the latest summary as a system message unless
280
+ * config.injectSummary is false — this is what makes compaction
281
+ * actually save tokens on subsequent turns without consumers having
282
+ * to wire the summary in themselves.
283
+ */
284
+ _buildRequestContext() {
285
+ const shouldInject = this._config.injectSummary && this._summary
286
+ const messages = shouldInject
287
+ ? [{ role: 'system', content: `Conversation summary (earlier turns):\n${this._summary}` }, ...this._activeMessages]
288
+ : [...this._activeMessages]
289
+
290
+ return { messages, summary: this._summary, tools: this.getTools() }
291
+ }
292
+
239
293
  /** POST overflow messages to the LLM with a summarise prompt */
240
294
  async _summarise(messages) {
241
295
  const summaryMessages = [
@@ -272,13 +326,26 @@ export class AIContextManager {
272
326
  return res.json()
273
327
  }
274
328
 
275
- /** Approximate context size in characters */
329
+ /** Approximate context size in characters, measured on the active LLM-facing window */
276
330
  _charCount() {
277
- return this._messages.reduce((n, m) => n + (m.content?.length ?? 0), 0)
331
+ return this._activeMessages.reduce((n, m) => n + (m.content?.length ?? 0), 0)
278
332
  }
279
333
 
280
- /** Hard truncation — drop oldest messages down to compactKeepLast */
334
+ /** Hard truncation — drop oldest active messages down to compactKeepLast (no summary kept) */
281
335
  _truncate() {
282
- this._messages = this._messages.slice(-this._config.compactKeepLast)
336
+ const cut = this._compactionBoundary(this._activeMessages, this._config.compactKeepLast)
337
+ this._activeMessages = this._activeMessages.slice(cut)
338
+ }
339
+
340
+ /**
341
+ * Find the slice index that keeps roughly the last `keep` messages without
342
+ * splitting a tool-call/tool-result pair — landing inside one would leave
343
+ * the kept window starting with an orphaned 'tool' message no provider accepts.
344
+ * Walks the boundary back to the start of the call/result group instead.
345
+ */
346
+ _compactionBoundary(messages, keep) {
347
+ let cut = Math.max(messages.length - keep, 0)
348
+ while (cut > 0 && messages[cut].role === 'tool') cut--
349
+ return cut
283
350
  }
284
351
  }