npm - @benkhz/context-manager - Versions diffs - 1.0.0 → 2.0.1 - Mend

@benkhz/context-manager 1.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md +30 -8
package/package.json +1 -1
package/src/AIContextManager.js +100 -33

package/README.md CHANGED Viewed

@@ -56,6 +56,7 @@ new AIContextManager(config)
 | `maxTokens` | `number` | — | Passed through to `formatRequest` |
 | `contextLimit` | `number` | `80_000` | Char count that triggers auto-compaction |
 | `compactKeepLast` | `number` | `6` | Messages preserved verbatim after compaction |
+| `injectSummary` | `boolean` | `true` | Auto-prepend the latest summary as a `system` message on every LLM request. Set `false` to place it yourself via `context.summary` in `formatRequest`. |
 | `headers` | `object` | `{}` | Extra HTTP headers on every `fetch` call |
 ---
@@ -180,13 +181,19 @@ unsub()
 ## Introspection
 ```js
-mgr.getMessages()   // → Message[] (shallow copy)
-mgr.getSummary()    // → string | null
-mgr.getTools()      // → [{ name, schema }]
-mgr.getContext()    // → { messages, summary, tools }
-mgr.reset()         // clear messages, summary, state — returns this
+mgr.getMessages()        // → Message[] — full, never-pruned turn history
+mgr.getActiveMessages()  // → Message[] — current LLM-facing window (post-compaction)
+mgr.getSummary()         // → string | null — latest summary
+mgr.getSummaries()       // → string[] — every summary ever produced, oldest first
+mgr.getTools()           // → [{ name, schema }]
+mgr.getContext()         // → { messages, activeMessages, summary, summaries, tools }
+mgr.reset()              // clear all message/summary state — returns this
 ```
+`getMessages()` always returns every turn ever sent or received, even after compaction has shrunk the
+LLM-facing window — useful for rendering a full conversation transcript in a UI. `getActiveMessages()`
+returns what's actually being sent to the model right now.
 ---
 ## Presets
@@ -212,15 +219,30 @@ const mgr = new AIContextManager({
 ## Context compaction
-When the total character count of `_messages` exceeds `contextLimit`:
+The manager tracks two parallel message lists: the full **history** (everything ever sent or
+received, exposed via `getMessages()`) and the **active window** (`getActiveMessages()`) — the
+slice actually sent to the LLM, which compaction and truncation shrink. History is never pruned.
+This check runs at the start of every `send()` call, and also between tool-call iterations
+*within* a single turn — a request that triggers several tool calls in a row can grow the active
+window past `contextLimit` well before the turn finishes, so compaction can kick in mid-turn
+rather than waiting for the next `send()`.
+When the character count of the active window exceeds `contextLimit`:
 1. `onContextLimit` hook is called — returns `'compact'` (default), `'truncate'`, or `'error'`
 2. If `compact`: the overflow messages are sent to the LLM with a summarise prompt
-3. The summary is stored in `_summary`; the last `compactKeepLast` messages are kept verbatim
-4. `formatRequest` receives `context.summary` and can inline it however the API prefers
+3. The summary is stored (and appended to the summary history); the last `compactKeepLast`
+   active messages are kept verbatim
+4. On the next request, the latest summary is auto-prepended as a `system` message ahead of the
+   active window — unless `injectSummary: false`, in which case you place it yourself via
+   `context.summary` in `formatRequest`
 The `onCompact` hook can return a string to bypass the LLM call entirely.
+Both compaction and truncation snap their cut point to avoid splitting a tool-call/tool-result
+pair across the boundary — the kept window never starts with an orphaned `tool` message.
 ---
 ## Open decisions

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@benkhz/context-manager",
-  "version": "1.0.0",
+  "version": "2.0.1",
   "description": "Provider-agnostic LLM context manager with tool execution, auto-compaction, reactive state, and an event bus.",
   "license": "MIT",
   "type": "module",

package/src/AIContextManager.js CHANGED Viewed

@@ -12,6 +12,9 @@ export class AIContextManager {
    * @param {number}   [config.maxTokens]     — forwarded to formatRequest
    * @param {number}   [config.contextLimit]  — char count before auto-compact (default 80 000)
    * @param {number}   [config.compactKeepLast] — messages to keep verbatim after compact (default 6)
+   * @param {boolean}  [config.injectSummary] — auto-prepend the latest summary as a system
+   *                                            message on every LLM request (default true).
+   *                                            Set false to manage summary placement yourself.
    * @param {object}   [config.headers]       — extra HTTP headers on every fetch
    * @param {object}   config.hooks           — see README for full hook reference
    */
@@ -22,6 +25,7 @@ export class AIContextManager {
       maxTokens,
       contextLimit     = DEFAULT_CONTEXT_LIMIT,
       compactKeepLast  = DEFAULT_COMPACT_KEEP_LAST,
+      injectSummary    = true,
       headers          = {},
       hooks            = {},
     } = config
@@ -30,11 +34,16 @@ export class AIContextManager {
     if (!hooks.formatRequest) throw new Error('AIContextManager: hooks.formatRequest is required')
     if (!hooks.parseResponse) throw new Error('AIContextManager: hooks.parseResponse is required')
-    this._config = { endpoint, model, maxTokens, contextLimit, compactKeepLast, headers }
+    this._config = { endpoint, model, maxTokens, contextLimit, compactKeepLast, injectSummary, headers }
     this._hooks  = hooks
-    this._messages    = []
-    this._summary     = null
+    // Full, never-pruned turn log — what getMessages()/getHistory() expose to consumers.
+    this._history = []
+    // LLM-facing window — subject to compact()/truncate(), this is what's actually sent.
+    this._activeMessages = []
+    // Latest summary (string) and the full chronological list of summaries ever produced.
+    this._summary   = null
+    this._summaries = []
     this._tools       = new Map()    // name → { schema, handler }
     this._emitter     = new EventEmitter()
     this._state       = new Map()    // reactive state
@@ -54,27 +63,21 @@ export class AIContextManager {
    */
   async send(content, opts = {}) {
     const userMsg = { role: 'user', content }
-    this._messages.push(userMsg)
+    this._pushMessage(userMsg)
     this._emitter.emit('message:sent', { message: userMsg })
-    // beforeSend hook — can return a transformed messages array (sync or async)
+    // beforeSend hook — can return a transformed active-window array (sync or async)
     if (this._hooks.beforeSend) {
-      const next = await this._hooks.beforeSend([...this._messages])
-      if (Array.isArray(next)) this._messages = next
+      const next = await this._hooks.beforeSend([...this._activeMessages])
+      if (Array.isArray(next)) this._activeMessages = next
     }
     // Auto-compact when approaching the context limit
-    const chars = this._charCount()
-    if (chars > this._config.contextLimit) {
-      const policy = this._hooks.onContextLimit?.(chars, this._config.contextLimit) ?? 'compact'
-      if      (policy === 'compact')   await this.compact()
-      else if (policy === 'truncate')  this._truncate()
-      else throw new Error(`AIContextManager: context limit exceeded (${chars} chars)`)
-    }
+    await this._enforceContextLimit()
     try {
       const assistantMsg = await this._runLoop(opts.system)
-      this._messages.push(assistantMsg)
+      this._pushMessage(assistantMsg)
       this._emitter.emit('message:received', { message: assistantMsg })
       return assistantMsg
     } catch (err) {
@@ -90,8 +93,8 @@ export class AIContextManager {
    * or can be called manually at any time.
    */
   async compact() {
-    const keep     = this._config.compactKeepLast
-    const overflow = this._messages.slice(0, -keep)
+    const cut      = this._compactionBoundary(this._activeMessages, this._config.compactKeepLast)
+    const overflow = this._activeMessages.slice(0, cut)
     if (!overflow.length) return
     this._emitter.emit('context:compact', { messageCount: overflow.length })
@@ -101,7 +104,8 @@ export class AIContextManager {
     if (!summary) summary = await this._summarise(overflow)
     this._summary  = summary
-    this._messages = this._messages.slice(-keep)
+    this._summaries.push(summary)
+    this._activeMessages = this._activeMessages.slice(cut)
     this._emitter.emit('context:compacted', { summary })
   }
@@ -157,21 +161,34 @@ export class AIContextManager {
   // ── Introspection ─────────────────────────────────────────────────────────────
-  getMessages() { return [...this._messages] }
-  getSummary()  { return this._summary }
-  getTools()    { return [...this._tools.entries()].map(([name, { schema }]) => ({ name, schema })) }
+  /** Full, never-pruned turn history — every message ever sent or received. */
+  getMessages()        { return [...this._history] }
+  /** The current LLM-facing window (post-compaction/truncation). */
+  getActiveMessages()  { return [...this._activeMessages] }
+  getSummary()         { return this._summary }
+  /** Every summary ever produced, oldest first. */
+  getSummaries()       { return [...this._summaries] }
+  getTools()           { return [...this._tools.entries()].map(([name, { schema }]) => ({ name, schema })) }
   /** Full snapshot of context — useful for debugging or serialisation */
   getContext() {
-    return { messages: this.getMessages(), summary: this._summary, tools: this.getTools() }
+    return {
+      messages:       this.getMessages(),
+      activeMessages: this.getActiveMessages(),
+      summary:        this._summary,
+      summaries:      this.getSummaries(),
+      tools:          this.getTools(),
+    }
   }
   /** Reset all conversation state. Does not touch config or registered tools. */
   reset() {
-    this._messages    = []
-    this._summary     = null
-    this._state       = new Map()
-    this._subscribers = new Map()
+    this._history        = []
+    this._activeMessages = []
+    this._summary        = null
+    this._summaries      = []
+    this._state          = new Map()
+    this._subscribers    = new Map()
     return this
   }
@@ -186,7 +203,7 @@ export class AIContextManager {
       throw new Error(`AIContextManager: tool loop exceeded ${MAX_TOOL_ITERATIONS} iterations`)
     }
-    const context = this.getContext()
+    const context = this._buildRequestContext()
     const body    = this._hooks.formatRequest(context, { ...this._config, system })
     const raw     = await this._fetch(body)
     let   parsed  = this._hooks.parseResponse(raw)
@@ -206,7 +223,7 @@ export class AIContextManager {
       content:   parsed.content ?? '',
       toolCalls: parsed.toolCalls,
     }
-    this._messages.push(assistantMsg)
+    this._pushMessage(assistantMsg)
     for (const tc of parsed.toolCalls) {
       const toolDef = this._tools.get(tc.name)
@@ -230,12 +247,49 @@ export class AIContextManager {
       if (transformed !== undefined) result = transformed
       this._emitter.emit('tool:result', { name: tc.name, result })
-      this._messages.push({ role: 'tool', content: JSON.stringify(result), toolCallId: tc.id })
+      this._pushMessage({ role: 'tool', content: JSON.stringify(result), toolCallId: tc.id })
     }
+    // Re-check the context limit between tool-call iterations, not just at the
+    // top of send() — a single turn can run several iterations and blow past
+    // the limit long before the next send() call ever re-evaluates it.
+    await this._enforceContextLimit()
     return this._runLoop(system, depth + 1)
   }
+  /** Check contextLimit and apply the configured policy (compact/truncate/error). */
+  async _enforceContextLimit() {
+    const chars = this._charCount()
+    if (chars <= this._config.contextLimit) return
+    const policy = this._hooks.onContextLimit?.(chars, this._config.contextLimit) ?? 'compact'
+    if      (policy === 'compact')   await this.compact()
+    else if (policy === 'truncate')  this._truncate()
+    else throw new Error(`AIContextManager: context limit exceeded (${chars} chars)`)
+  }
+  /** Append a message to both the full history and the active LLM-facing window. */
+  _pushMessage(msg) {
+    this._history.push(msg)
+    this._activeMessages.push(msg)
+  }
+  /**
+   * Build the context actually handed to hooks.formatRequest.
+   * Auto-prepends the latest summary as a system message unless
+   * config.injectSummary is false — this is what makes compaction
+   * actually save tokens on subsequent turns without consumers having
+   * to wire the summary in themselves.
+   */
+  _buildRequestContext() {
+    const shouldInject = this._config.injectSummary && this._summary
+    const messages = shouldInject
+      ? [{ role: 'system', content: `Conversation summary (earlier turns):\n${this._summary}` }, ...this._activeMessages]
+      : [...this._activeMessages]
+    return { messages, summary: this._summary, tools: this.getTools() }
+  }
   /** POST overflow messages to the LLM with a summarise prompt */
   async _summarise(messages) {
     const summaryMessages = [
@@ -272,13 +326,26 @@ export class AIContextManager {
     return res.json()
   }
-  /** Approximate context size in characters */
+  /** Approximate context size in characters, measured on the active LLM-facing window */
   _charCount() {
-    return this._messages.reduce((n, m) => n + (m.content?.length ?? 0), 0)
+    return this._activeMessages.reduce((n, m) => n + (m.content?.length ?? 0), 0)
   }
-  /** Hard truncation — drop oldest messages down to compactKeepLast */
+  /** Hard truncation — drop oldest active messages down to compactKeepLast (no summary kept) */
   _truncate() {
-    this._messages = this._messages.slice(-this._config.compactKeepLast)
+    const cut = this._compactionBoundary(this._activeMessages, this._config.compactKeepLast)
+    this._activeMessages = this._activeMessages.slice(cut)
+  }
+  /**
+   * Find the slice index that keeps roughly the last `keep` messages without
+   * splitting a tool-call/tool-result pair — landing inside one would leave
+   * the kept window starting with an orphaned 'tool' message no provider accepts.
+   * Walks the boundary back to the start of the call/result group instead.
+   */
+  _compactionBoundary(messages, keep) {
+    let cut = Math.max(messages.length - keep, 0)
+    while (cut > 0 && messages[cut].role === 'tool') cut--
+    return cut
   }
 }