@benkhz/context-manager 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -8
- package/package.json +1 -1
- package/src/AIContextManager.js +84 -26
package/README.md
CHANGED
|
@@ -56,6 +56,7 @@ new AIContextManager(config)
|
|
|
56
56
|
| `maxTokens` | `number` | — | Passed through to `formatRequest` |
|
|
57
57
|
| `contextLimit` | `number` | `80_000` | Char count that triggers auto-compaction |
|
|
58
58
|
| `compactKeepLast` | `number` | `6` | Messages preserved verbatim after compaction |
|
|
59
|
+
| `injectSummary` | `boolean` | `true` | Auto-prepend the latest summary as a `system` message on every LLM request. Set `false` to place it yourself via `context.summary` in `formatRequest`. |
|
|
59
60
|
| `headers` | `object` | `{}` | Extra HTTP headers on every `fetch` call |
|
|
60
61
|
|
|
61
62
|
---
|
|
@@ -180,13 +181,19 @@ unsub()
|
|
|
180
181
|
## Introspection
|
|
181
182
|
|
|
182
183
|
```js
|
|
183
|
-
mgr.getMessages()
|
|
184
|
-
mgr.
|
|
185
|
-
mgr.
|
|
186
|
-
mgr.
|
|
187
|
-
mgr.
|
|
184
|
+
mgr.getMessages() // → Message[] — full, never-pruned turn history
|
|
185
|
+
mgr.getActiveMessages() // → Message[] — current LLM-facing window (post-compaction)
|
|
186
|
+
mgr.getSummary() // → string | null — latest summary
|
|
187
|
+
mgr.getSummaries() // → string[] — every summary ever produced, oldest first
|
|
188
|
+
mgr.getTools() // → [{ name, schema }]
|
|
189
|
+
mgr.getContext() // → { messages, activeMessages, summary, summaries, tools }
|
|
190
|
+
mgr.reset() // clear all message/summary state — returns this
|
|
188
191
|
```
|
|
189
192
|
|
|
193
|
+
`getMessages()` always returns every turn ever sent or received, even after compaction has shrunk the
|
|
194
|
+
LLM-facing window — useful for rendering a full conversation transcript in a UI. `getActiveMessages()`
|
|
195
|
+
returns what's actually being sent to the model right now.
|
|
196
|
+
|
|
190
197
|
---
|
|
191
198
|
|
|
192
199
|
## Presets
|
|
@@ -212,15 +219,25 @@ const mgr = new AIContextManager({
|
|
|
212
219
|
|
|
213
220
|
## Context compaction
|
|
214
221
|
|
|
215
|
-
|
|
222
|
+
The manager tracks two parallel message lists: the full **history** (everything ever sent or
|
|
223
|
+
received, exposed via `getMessages()`) and the **active window** (`getActiveMessages()`) — the
|
|
224
|
+
slice actually sent to the LLM, which compaction and truncation shrink. History is never pruned.
|
|
225
|
+
|
|
226
|
+
When the character count of the active window exceeds `contextLimit`:
|
|
216
227
|
|
|
217
228
|
1. `onContextLimit` hook is called — returns `'compact'` (default), `'truncate'`, or `'error'`
|
|
218
229
|
2. If `compact`: the overflow messages are sent to the LLM with a summarise prompt
|
|
219
|
-
3. The summary is stored
|
|
220
|
-
|
|
230
|
+
3. The summary is stored (and appended to the summary history); the last `compactKeepLast`
|
|
231
|
+
active messages are kept verbatim
|
|
232
|
+
4. On the next request, the latest summary is auto-prepended as a `system` message ahead of the
|
|
233
|
+
active window — unless `injectSummary: false`, in which case you place it yourself via
|
|
234
|
+
`context.summary` in `formatRequest`
|
|
221
235
|
|
|
222
236
|
The `onCompact` hook can return a string to bypass the LLM call entirely.
|
|
223
237
|
|
|
238
|
+
Both compaction and truncation snap their cut point to avoid splitting a tool-call/tool-result
|
|
239
|
+
pair across the boundary — the kept window never starts with an orphaned `tool` message.
|
|
240
|
+
|
|
224
241
|
---
|
|
225
242
|
|
|
226
243
|
## Open decisions
|
package/package.json
CHANGED
package/src/AIContextManager.js
CHANGED
|
@@ -12,6 +12,9 @@ export class AIContextManager {
|
|
|
12
12
|
* @param {number} [config.maxTokens] — forwarded to formatRequest
|
|
13
13
|
* @param {number} [config.contextLimit] — char count before auto-compact (default 80 000)
|
|
14
14
|
* @param {number} [config.compactKeepLast] — messages to keep verbatim after compact (default 6)
|
|
15
|
+
* @param {boolean} [config.injectSummary] — auto-prepend the latest summary as a system
|
|
16
|
+
* message on every LLM request (default true).
|
|
17
|
+
* Set false to manage summary placement yourself.
|
|
15
18
|
* @param {object} [config.headers] — extra HTTP headers on every fetch
|
|
16
19
|
* @param {object} config.hooks — see README for full hook reference
|
|
17
20
|
*/
|
|
@@ -22,6 +25,7 @@ export class AIContextManager {
|
|
|
22
25
|
maxTokens,
|
|
23
26
|
contextLimit = DEFAULT_CONTEXT_LIMIT,
|
|
24
27
|
compactKeepLast = DEFAULT_COMPACT_KEEP_LAST,
|
|
28
|
+
injectSummary = true,
|
|
25
29
|
headers = {},
|
|
26
30
|
hooks = {},
|
|
27
31
|
} = config
|
|
@@ -30,11 +34,16 @@ export class AIContextManager {
|
|
|
30
34
|
if (!hooks.formatRequest) throw new Error('AIContextManager: hooks.formatRequest is required')
|
|
31
35
|
if (!hooks.parseResponse) throw new Error('AIContextManager: hooks.parseResponse is required')
|
|
32
36
|
|
|
33
|
-
this._config = { endpoint, model, maxTokens, contextLimit, compactKeepLast, headers }
|
|
37
|
+
this._config = { endpoint, model, maxTokens, contextLimit, compactKeepLast, injectSummary, headers }
|
|
34
38
|
this._hooks = hooks
|
|
35
39
|
|
|
36
|
-
|
|
37
|
-
this.
|
|
40
|
+
// Full, never-pruned turn log — what getMessages()/getHistory() expose to consumers.
|
|
41
|
+
this._history = []
|
|
42
|
+
// LLM-facing window — subject to compact()/truncate(), this is what's actually sent.
|
|
43
|
+
this._activeMessages = []
|
|
44
|
+
// Latest summary (string) and the full chronological list of summaries ever produced.
|
|
45
|
+
this._summary = null
|
|
46
|
+
this._summaries = []
|
|
38
47
|
this._tools = new Map() // name → { schema, handler }
|
|
39
48
|
this._emitter = new EventEmitter()
|
|
40
49
|
this._state = new Map() // reactive state
|
|
@@ -54,13 +63,13 @@ export class AIContextManager {
|
|
|
54
63
|
*/
|
|
55
64
|
async send(content, opts = {}) {
|
|
56
65
|
const userMsg = { role: 'user', content }
|
|
57
|
-
this.
|
|
66
|
+
this._pushMessage(userMsg)
|
|
58
67
|
this._emitter.emit('message:sent', { message: userMsg })
|
|
59
68
|
|
|
60
|
-
// beforeSend hook — can return a transformed
|
|
69
|
+
// beforeSend hook — can return a transformed active-window array (sync or async)
|
|
61
70
|
if (this._hooks.beforeSend) {
|
|
62
|
-
const next = await this._hooks.beforeSend([...this.
|
|
63
|
-
if (Array.isArray(next)) this.
|
|
71
|
+
const next = await this._hooks.beforeSend([...this._activeMessages])
|
|
72
|
+
if (Array.isArray(next)) this._activeMessages = next
|
|
64
73
|
}
|
|
65
74
|
|
|
66
75
|
// Auto-compact when approaching the context limit
|
|
@@ -74,7 +83,7 @@ export class AIContextManager {
|
|
|
74
83
|
|
|
75
84
|
try {
|
|
76
85
|
const assistantMsg = await this._runLoop(opts.system)
|
|
77
|
-
this.
|
|
86
|
+
this._pushMessage(assistantMsg)
|
|
78
87
|
this._emitter.emit('message:received', { message: assistantMsg })
|
|
79
88
|
return assistantMsg
|
|
80
89
|
} catch (err) {
|
|
@@ -90,8 +99,8 @@ export class AIContextManager {
|
|
|
90
99
|
* or can be called manually at any time.
|
|
91
100
|
*/
|
|
92
101
|
async compact() {
|
|
93
|
-
const
|
|
94
|
-
const overflow = this.
|
|
102
|
+
const cut = this._compactionBoundary(this._activeMessages, this._config.compactKeepLast)
|
|
103
|
+
const overflow = this._activeMessages.slice(0, cut)
|
|
95
104
|
if (!overflow.length) return
|
|
96
105
|
|
|
97
106
|
this._emitter.emit('context:compact', { messageCount: overflow.length })
|
|
@@ -101,7 +110,8 @@ export class AIContextManager {
|
|
|
101
110
|
if (!summary) summary = await this._summarise(overflow)
|
|
102
111
|
|
|
103
112
|
this._summary = summary
|
|
104
|
-
this.
|
|
113
|
+
this._summaries.push(summary)
|
|
114
|
+
this._activeMessages = this._activeMessages.slice(cut)
|
|
105
115
|
this._emitter.emit('context:compacted', { summary })
|
|
106
116
|
}
|
|
107
117
|
|
|
@@ -157,21 +167,34 @@ export class AIContextManager {
|
|
|
157
167
|
|
|
158
168
|
// ── Introspection ─────────────────────────────────────────────────────────────
|
|
159
169
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
170
|
+
/** Full, never-pruned turn history — every message ever sent or received. */
|
|
171
|
+
getMessages() { return [...this._history] }
|
|
172
|
+
/** The current LLM-facing window (post-compaction/truncation). */
|
|
173
|
+
getActiveMessages() { return [...this._activeMessages] }
|
|
174
|
+
getSummary() { return this._summary }
|
|
175
|
+
/** Every summary ever produced, oldest first. */
|
|
176
|
+
getSummaries() { return [...this._summaries] }
|
|
177
|
+
getTools() { return [...this._tools.entries()].map(([name, { schema }]) => ({ name, schema })) }
|
|
163
178
|
|
|
164
179
|
/** Full snapshot of context — useful for debugging or serialisation */
|
|
165
180
|
getContext() {
|
|
166
|
-
return {
|
|
181
|
+
return {
|
|
182
|
+
messages: this.getMessages(),
|
|
183
|
+
activeMessages: this.getActiveMessages(),
|
|
184
|
+
summary: this._summary,
|
|
185
|
+
summaries: this.getSummaries(),
|
|
186
|
+
tools: this.getTools(),
|
|
187
|
+
}
|
|
167
188
|
}
|
|
168
189
|
|
|
169
190
|
/** Reset all conversation state. Does not touch config or registered tools. */
|
|
170
191
|
reset() {
|
|
171
|
-
this.
|
|
172
|
-
this.
|
|
173
|
-
this.
|
|
174
|
-
this.
|
|
192
|
+
this._history = []
|
|
193
|
+
this._activeMessages = []
|
|
194
|
+
this._summary = null
|
|
195
|
+
this._summaries = []
|
|
196
|
+
this._state = new Map()
|
|
197
|
+
this._subscribers = new Map()
|
|
175
198
|
return this
|
|
176
199
|
}
|
|
177
200
|
|
|
@@ -186,7 +209,7 @@ export class AIContextManager {
|
|
|
186
209
|
throw new Error(`AIContextManager: tool loop exceeded ${MAX_TOOL_ITERATIONS} iterations`)
|
|
187
210
|
}
|
|
188
211
|
|
|
189
|
-
const context = this.
|
|
212
|
+
const context = this._buildRequestContext()
|
|
190
213
|
const body = this._hooks.formatRequest(context, { ...this._config, system })
|
|
191
214
|
const raw = await this._fetch(body)
|
|
192
215
|
let parsed = this._hooks.parseResponse(raw)
|
|
@@ -206,7 +229,7 @@ export class AIContextManager {
|
|
|
206
229
|
content: parsed.content ?? '',
|
|
207
230
|
toolCalls: parsed.toolCalls,
|
|
208
231
|
}
|
|
209
|
-
this.
|
|
232
|
+
this._pushMessage(assistantMsg)
|
|
210
233
|
|
|
211
234
|
for (const tc of parsed.toolCalls) {
|
|
212
235
|
const toolDef = this._tools.get(tc.name)
|
|
@@ -230,12 +253,34 @@ export class AIContextManager {
|
|
|
230
253
|
if (transformed !== undefined) result = transformed
|
|
231
254
|
|
|
232
255
|
this._emitter.emit('tool:result', { name: tc.name, result })
|
|
233
|
-
this.
|
|
256
|
+
this._pushMessage({ role: 'tool', content: JSON.stringify(result), toolCallId: tc.id })
|
|
234
257
|
}
|
|
235
258
|
|
|
236
259
|
return this._runLoop(system, depth + 1)
|
|
237
260
|
}
|
|
238
261
|
|
|
262
|
+
/** Append a message to both the full history and the active LLM-facing window. */
|
|
263
|
+
_pushMessage(msg) {
|
|
264
|
+
this._history.push(msg)
|
|
265
|
+
this._activeMessages.push(msg)
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
/**
|
|
269
|
+
* Build the context actually handed to hooks.formatRequest.
|
|
270
|
+
* Auto-prepends the latest summary as a system message unless
|
|
271
|
+
* config.injectSummary is false — this is what makes compaction
|
|
272
|
+
* actually save tokens on subsequent turns without consumers having
|
|
273
|
+
* to wire the summary in themselves.
|
|
274
|
+
*/
|
|
275
|
+
_buildRequestContext() {
|
|
276
|
+
const shouldInject = this._config.injectSummary && this._summary
|
|
277
|
+
const messages = shouldInject
|
|
278
|
+
? [{ role: 'system', content: `Conversation summary (earlier turns):\n${this._summary}` }, ...this._activeMessages]
|
|
279
|
+
: [...this._activeMessages]
|
|
280
|
+
|
|
281
|
+
return { messages, summary: this._summary, tools: this.getTools() }
|
|
282
|
+
}
|
|
283
|
+
|
|
239
284
|
/** POST overflow messages to the LLM with a summarise prompt */
|
|
240
285
|
async _summarise(messages) {
|
|
241
286
|
const summaryMessages = [
|
|
@@ -272,13 +317,26 @@ export class AIContextManager {
|
|
|
272
317
|
return res.json()
|
|
273
318
|
}
|
|
274
319
|
|
|
275
|
-
/** Approximate context size in characters */
|
|
320
|
+
/** Approximate context size in characters, measured on the active LLM-facing window */
|
|
276
321
|
_charCount() {
|
|
277
|
-
return this.
|
|
322
|
+
return this._activeMessages.reduce((n, m) => n + (m.content?.length ?? 0), 0)
|
|
278
323
|
}
|
|
279
324
|
|
|
280
|
-
/** Hard truncation — drop oldest messages down to compactKeepLast */
|
|
325
|
+
/** Hard truncation — drop oldest active messages down to compactKeepLast (no summary kept) */
|
|
281
326
|
_truncate() {
|
|
282
|
-
|
|
327
|
+
const cut = this._compactionBoundary(this._activeMessages, this._config.compactKeepLast)
|
|
328
|
+
this._activeMessages = this._activeMessages.slice(cut)
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
/**
|
|
332
|
+
* Find the slice index that keeps roughly the last `keep` messages without
|
|
333
|
+
* splitting a tool-call/tool-result pair — landing inside one would leave
|
|
334
|
+
* the kept window starting with an orphaned 'tool' message no provider accepts.
|
|
335
|
+
* Walks the boundary back to the start of the call/result group instead.
|
|
336
|
+
*/
|
|
337
|
+
_compactionBoundary(messages, keep) {
|
|
338
|
+
let cut = Math.max(messages.length - keep, 0)
|
|
339
|
+
while (cut > 0 && messages[cut].role === 'tool') cut--
|
|
340
|
+
return cut
|
|
283
341
|
}
|
|
284
342
|
}
|