@benkhz/context-manager 1.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -8
- package/package.json +1 -1
- package/src/AIContextManager.js +100 -33
package/README.md
CHANGED
|
@@ -56,6 +56,7 @@ new AIContextManager(config)
|
|
|
56
56
|
| `maxTokens` | `number` | — | Passed through to `formatRequest` |
|
|
57
57
|
| `contextLimit` | `number` | `80_000` | Char count that triggers auto-compaction |
|
|
58
58
|
| `compactKeepLast` | `number` | `6` | Messages preserved verbatim after compaction |
|
|
59
|
+
| `injectSummary` | `boolean` | `true` | Auto-prepend the latest summary as a `system` message on every LLM request. Set `false` to place it yourself via `context.summary` in `formatRequest`. |
|
|
59
60
|
| `headers` | `object` | `{}` | Extra HTTP headers on every `fetch` call |
|
|
60
61
|
|
|
61
62
|
---
|
|
@@ -180,13 +181,19 @@ unsub()
|
|
|
180
181
|
## Introspection
|
|
181
182
|
|
|
182
183
|
```js
|
|
183
|
-
mgr.getMessages()
|
|
184
|
-
mgr.
|
|
185
|
-
mgr.
|
|
186
|
-
mgr.
|
|
187
|
-
mgr.
|
|
184
|
+
mgr.getMessages() // → Message[] — full, never-pruned turn history
|
|
185
|
+
mgr.getActiveMessages() // → Message[] — current LLM-facing window (post-compaction)
|
|
186
|
+
mgr.getSummary() // → string | null — latest summary
|
|
187
|
+
mgr.getSummaries() // → string[] — every summary ever produced, oldest first
|
|
188
|
+
mgr.getTools() // → [{ name, schema }]
|
|
189
|
+
mgr.getContext() // → { messages, activeMessages, summary, summaries, tools }
|
|
190
|
+
mgr.reset() // clear all message/summary state — returns this
|
|
188
191
|
```
|
|
189
192
|
|
|
193
|
+
`getMessages()` always returns every turn ever sent or received, even after compaction has shrunk the
|
|
194
|
+
LLM-facing window — useful for rendering a full conversation transcript in a UI. `getActiveMessages()`
|
|
195
|
+
returns what's actually being sent to the model right now.
|
|
196
|
+
|
|
190
197
|
---
|
|
191
198
|
|
|
192
199
|
## Presets
|
|
@@ -212,15 +219,30 @@ const mgr = new AIContextManager({
|
|
|
212
219
|
|
|
213
220
|
## Context compaction
|
|
214
221
|
|
|
215
|
-
|
|
222
|
+
The manager tracks two parallel message lists: the full **history** (everything ever sent or
|
|
223
|
+
received, exposed via `getMessages()`) and the **active window** (`getActiveMessages()`) — the
|
|
224
|
+
slice actually sent to the LLM, which compaction and truncation shrink. History is never pruned.
|
|
225
|
+
|
|
226
|
+
This check runs at the start of every `send()` call, and also between tool-call iterations
|
|
227
|
+
*within* a single turn — a request that triggers several tool calls in a row can grow the active
|
|
228
|
+
window past `contextLimit` well before the turn finishes, so compaction can kick in mid-turn
|
|
229
|
+
rather than waiting for the next `send()`.
|
|
230
|
+
|
|
231
|
+
When the character count of the active window exceeds `contextLimit`:
|
|
216
232
|
|
|
217
233
|
1. `onContextLimit` hook is called — returns `'compact'` (default), `'truncate'`, or `'error'`
|
|
218
234
|
2. If `compact`: the overflow messages are sent to the LLM with a summarise prompt
|
|
219
|
-
3. The summary is stored
|
|
220
|
-
|
|
235
|
+
3. The summary is stored (and appended to the summary history); the last `compactKeepLast`
|
|
236
|
+
active messages are kept verbatim
|
|
237
|
+
4. On the next request, the latest summary is auto-prepended as a `system` message ahead of the
|
|
238
|
+
active window — unless `injectSummary: false`, in which case you place it yourself via
|
|
239
|
+
`context.summary` in `formatRequest`
|
|
221
240
|
|
|
222
241
|
The `onCompact` hook can return a string to bypass the LLM call entirely.
|
|
223
242
|
|
|
243
|
+
Both compaction and truncation snap their cut point to avoid splitting a tool-call/tool-result
|
|
244
|
+
pair across the boundary — the kept window never starts with an orphaned `tool` message.
|
|
245
|
+
|
|
224
246
|
---
|
|
225
247
|
|
|
226
248
|
## Open decisions
|
package/package.json
CHANGED
package/src/AIContextManager.js
CHANGED
|
@@ -12,6 +12,9 @@ export class AIContextManager {
|
|
|
12
12
|
* @param {number} [config.maxTokens] — forwarded to formatRequest
|
|
13
13
|
* @param {number} [config.contextLimit] — char count before auto-compact (default 80 000)
|
|
14
14
|
* @param {number} [config.compactKeepLast] — messages to keep verbatim after compact (default 6)
|
|
15
|
+
* @param {boolean} [config.injectSummary] — auto-prepend the latest summary as a system
|
|
16
|
+
* message on every LLM request (default true).
|
|
17
|
+
* Set false to manage summary placement yourself.
|
|
15
18
|
* @param {object} [config.headers] — extra HTTP headers on every fetch
|
|
16
19
|
* @param {object} config.hooks — see README for full hook reference
|
|
17
20
|
*/
|
|
@@ -22,6 +25,7 @@ export class AIContextManager {
|
|
|
22
25
|
maxTokens,
|
|
23
26
|
contextLimit = DEFAULT_CONTEXT_LIMIT,
|
|
24
27
|
compactKeepLast = DEFAULT_COMPACT_KEEP_LAST,
|
|
28
|
+
injectSummary = true,
|
|
25
29
|
headers = {},
|
|
26
30
|
hooks = {},
|
|
27
31
|
} = config
|
|
@@ -30,11 +34,16 @@ export class AIContextManager {
|
|
|
30
34
|
if (!hooks.formatRequest) throw new Error('AIContextManager: hooks.formatRequest is required')
|
|
31
35
|
if (!hooks.parseResponse) throw new Error('AIContextManager: hooks.parseResponse is required')
|
|
32
36
|
|
|
33
|
-
this._config = { endpoint, model, maxTokens, contextLimit, compactKeepLast, headers }
|
|
37
|
+
this._config = { endpoint, model, maxTokens, contextLimit, compactKeepLast, injectSummary, headers }
|
|
34
38
|
this._hooks = hooks
|
|
35
39
|
|
|
36
|
-
|
|
37
|
-
this.
|
|
40
|
+
// Full, never-pruned turn log — what getMessages()/getHistory() expose to consumers.
|
|
41
|
+
this._history = []
|
|
42
|
+
// LLM-facing window — subject to compact()/truncate(), this is what's actually sent.
|
|
43
|
+
this._activeMessages = []
|
|
44
|
+
// Latest summary (string) and the full chronological list of summaries ever produced.
|
|
45
|
+
this._summary = null
|
|
46
|
+
this._summaries = []
|
|
38
47
|
this._tools = new Map() // name → { schema, handler }
|
|
39
48
|
this._emitter = new EventEmitter()
|
|
40
49
|
this._state = new Map() // reactive state
|
|
@@ -54,27 +63,21 @@ export class AIContextManager {
|
|
|
54
63
|
*/
|
|
55
64
|
async send(content, opts = {}) {
|
|
56
65
|
const userMsg = { role: 'user', content }
|
|
57
|
-
this.
|
|
66
|
+
this._pushMessage(userMsg)
|
|
58
67
|
this._emitter.emit('message:sent', { message: userMsg })
|
|
59
68
|
|
|
60
|
-
// beforeSend hook — can return a transformed
|
|
69
|
+
// beforeSend hook — can return a transformed active-window array (sync or async)
|
|
61
70
|
if (this._hooks.beforeSend) {
|
|
62
|
-
const next = await this._hooks.beforeSend([...this.
|
|
63
|
-
if (Array.isArray(next)) this.
|
|
71
|
+
const next = await this._hooks.beforeSend([...this._activeMessages])
|
|
72
|
+
if (Array.isArray(next)) this._activeMessages = next
|
|
64
73
|
}
|
|
65
74
|
|
|
66
75
|
// Auto-compact when approaching the context limit
|
|
67
|
-
|
|
68
|
-
if (chars > this._config.contextLimit) {
|
|
69
|
-
const policy = this._hooks.onContextLimit?.(chars, this._config.contextLimit) ?? 'compact'
|
|
70
|
-
if (policy === 'compact') await this.compact()
|
|
71
|
-
else if (policy === 'truncate') this._truncate()
|
|
72
|
-
else throw new Error(`AIContextManager: context limit exceeded (${chars} chars)`)
|
|
73
|
-
}
|
|
76
|
+
await this._enforceContextLimit()
|
|
74
77
|
|
|
75
78
|
try {
|
|
76
79
|
const assistantMsg = await this._runLoop(opts.system)
|
|
77
|
-
this.
|
|
80
|
+
this._pushMessage(assistantMsg)
|
|
78
81
|
this._emitter.emit('message:received', { message: assistantMsg })
|
|
79
82
|
return assistantMsg
|
|
80
83
|
} catch (err) {
|
|
@@ -90,8 +93,8 @@ export class AIContextManager {
|
|
|
90
93
|
* or can be called manually at any time.
|
|
91
94
|
*/
|
|
92
95
|
async compact() {
|
|
93
|
-
const
|
|
94
|
-
const overflow = this.
|
|
96
|
+
const cut = this._compactionBoundary(this._activeMessages, this._config.compactKeepLast)
|
|
97
|
+
const overflow = this._activeMessages.slice(0, cut)
|
|
95
98
|
if (!overflow.length) return
|
|
96
99
|
|
|
97
100
|
this._emitter.emit('context:compact', { messageCount: overflow.length })
|
|
@@ -101,7 +104,8 @@ export class AIContextManager {
|
|
|
101
104
|
if (!summary) summary = await this._summarise(overflow)
|
|
102
105
|
|
|
103
106
|
this._summary = summary
|
|
104
|
-
this.
|
|
107
|
+
this._summaries.push(summary)
|
|
108
|
+
this._activeMessages = this._activeMessages.slice(cut)
|
|
105
109
|
this._emitter.emit('context:compacted', { summary })
|
|
106
110
|
}
|
|
107
111
|
|
|
@@ -157,21 +161,34 @@ export class AIContextManager {
|
|
|
157
161
|
|
|
158
162
|
// ── Introspection ─────────────────────────────────────────────────────────────
|
|
159
163
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
164
|
+
/** Full, never-pruned turn history — every message ever sent or received. */
|
|
165
|
+
getMessages() { return [...this._history] }
|
|
166
|
+
/** The current LLM-facing window (post-compaction/truncation). */
|
|
167
|
+
getActiveMessages() { return [...this._activeMessages] }
|
|
168
|
+
getSummary() { return this._summary }
|
|
169
|
+
/** Every summary ever produced, oldest first. */
|
|
170
|
+
getSummaries() { return [...this._summaries] }
|
|
171
|
+
getTools() { return [...this._tools.entries()].map(([name, { schema }]) => ({ name, schema })) }
|
|
163
172
|
|
|
164
173
|
/** Full snapshot of context — useful for debugging or serialisation */
|
|
165
174
|
getContext() {
|
|
166
|
-
return {
|
|
175
|
+
return {
|
|
176
|
+
messages: this.getMessages(),
|
|
177
|
+
activeMessages: this.getActiveMessages(),
|
|
178
|
+
summary: this._summary,
|
|
179
|
+
summaries: this.getSummaries(),
|
|
180
|
+
tools: this.getTools(),
|
|
181
|
+
}
|
|
167
182
|
}
|
|
168
183
|
|
|
169
184
|
/** Reset all conversation state. Does not touch config or registered tools. */
|
|
170
185
|
reset() {
|
|
171
|
-
this.
|
|
172
|
-
this.
|
|
173
|
-
this.
|
|
174
|
-
this.
|
|
186
|
+
this._history = []
|
|
187
|
+
this._activeMessages = []
|
|
188
|
+
this._summary = null
|
|
189
|
+
this._summaries = []
|
|
190
|
+
this._state = new Map()
|
|
191
|
+
this._subscribers = new Map()
|
|
175
192
|
return this
|
|
176
193
|
}
|
|
177
194
|
|
|
@@ -186,7 +203,7 @@ export class AIContextManager {
|
|
|
186
203
|
throw new Error(`AIContextManager: tool loop exceeded ${MAX_TOOL_ITERATIONS} iterations`)
|
|
187
204
|
}
|
|
188
205
|
|
|
189
|
-
const context = this.
|
|
206
|
+
const context = this._buildRequestContext()
|
|
190
207
|
const body = this._hooks.formatRequest(context, { ...this._config, system })
|
|
191
208
|
const raw = await this._fetch(body)
|
|
192
209
|
let parsed = this._hooks.parseResponse(raw)
|
|
@@ -206,7 +223,7 @@ export class AIContextManager {
|
|
|
206
223
|
content: parsed.content ?? '',
|
|
207
224
|
toolCalls: parsed.toolCalls,
|
|
208
225
|
}
|
|
209
|
-
this.
|
|
226
|
+
this._pushMessage(assistantMsg)
|
|
210
227
|
|
|
211
228
|
for (const tc of parsed.toolCalls) {
|
|
212
229
|
const toolDef = this._tools.get(tc.name)
|
|
@@ -230,12 +247,49 @@ export class AIContextManager {
|
|
|
230
247
|
if (transformed !== undefined) result = transformed
|
|
231
248
|
|
|
232
249
|
this._emitter.emit('tool:result', { name: tc.name, result })
|
|
233
|
-
this.
|
|
250
|
+
this._pushMessage({ role: 'tool', content: JSON.stringify(result), toolCallId: tc.id })
|
|
234
251
|
}
|
|
235
252
|
|
|
253
|
+
// Re-check the context limit between tool-call iterations, not just at the
|
|
254
|
+
// top of send() — a single turn can run several iterations and blow past
|
|
255
|
+
// the limit long before the next send() call ever re-evaluates it.
|
|
256
|
+
await this._enforceContextLimit()
|
|
257
|
+
|
|
236
258
|
return this._runLoop(system, depth + 1)
|
|
237
259
|
}
|
|
238
260
|
|
|
261
|
+
/** Check contextLimit and apply the configured policy (compact/truncate/error). */
|
|
262
|
+
async _enforceContextLimit() {
|
|
263
|
+
const chars = this._charCount()
|
|
264
|
+
if (chars <= this._config.contextLimit) return
|
|
265
|
+
const policy = this._hooks.onContextLimit?.(chars, this._config.contextLimit) ?? 'compact'
|
|
266
|
+
if (policy === 'compact') await this.compact()
|
|
267
|
+
else if (policy === 'truncate') this._truncate()
|
|
268
|
+
else throw new Error(`AIContextManager: context limit exceeded (${chars} chars)`)
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
/** Append a message to both the full history and the active LLM-facing window. */
|
|
272
|
+
_pushMessage(msg) {
|
|
273
|
+
this._history.push(msg)
|
|
274
|
+
this._activeMessages.push(msg)
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Build the context actually handed to hooks.formatRequest.
|
|
279
|
+
* Auto-prepends the latest summary as a system message unless
|
|
280
|
+
* config.injectSummary is false — this is what makes compaction
|
|
281
|
+
* actually save tokens on subsequent turns without consumers having
|
|
282
|
+
* to wire the summary in themselves.
|
|
283
|
+
*/
|
|
284
|
+
_buildRequestContext() {
|
|
285
|
+
const shouldInject = this._config.injectSummary && this._summary
|
|
286
|
+
const messages = shouldInject
|
|
287
|
+
? [{ role: 'system', content: `Conversation summary (earlier turns):\n${this._summary}` }, ...this._activeMessages]
|
|
288
|
+
: [...this._activeMessages]
|
|
289
|
+
|
|
290
|
+
return { messages, summary: this._summary, tools: this.getTools() }
|
|
291
|
+
}
|
|
292
|
+
|
|
239
293
|
/** POST overflow messages to the LLM with a summarise prompt */
|
|
240
294
|
async _summarise(messages) {
|
|
241
295
|
const summaryMessages = [
|
|
@@ -272,13 +326,26 @@ export class AIContextManager {
|
|
|
272
326
|
return res.json()
|
|
273
327
|
}
|
|
274
328
|
|
|
275
|
-
/** Approximate context size in characters */
|
|
329
|
+
/** Approximate context size in characters, measured on the active LLM-facing window */
|
|
276
330
|
_charCount() {
|
|
277
|
-
return this.
|
|
331
|
+
return this._activeMessages.reduce((n, m) => n + (m.content?.length ?? 0), 0)
|
|
278
332
|
}
|
|
279
333
|
|
|
280
|
-
/** Hard truncation — drop oldest messages down to compactKeepLast */
|
|
334
|
+
/** Hard truncation — drop oldest active messages down to compactKeepLast (no summary kept) */
|
|
281
335
|
_truncate() {
|
|
282
|
-
|
|
336
|
+
const cut = this._compactionBoundary(this._activeMessages, this._config.compactKeepLast)
|
|
337
|
+
this._activeMessages = this._activeMessages.slice(cut)
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
/**
|
|
341
|
+
* Find the slice index that keeps roughly the last `keep` messages without
|
|
342
|
+
* splitting a tool-call/tool-result pair — landing inside one would leave
|
|
343
|
+
* the kept window starting with an orphaned 'tool' message no provider accepts.
|
|
344
|
+
* Walks the boundary back to the start of the call/result group instead.
|
|
345
|
+
*/
|
|
346
|
+
_compactionBoundary(messages, keep) {
|
|
347
|
+
let cut = Math.max(messages.length - keep, 0)
|
|
348
|
+
while (cut > 0 && messages[cut].role === 'tool') cut--
|
|
349
|
+
return cut
|
|
283
350
|
}
|
|
284
351
|
}
|