@context-chef/ai-sdk-middleware 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +164 -0
- package/dist/index.cjs +371 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +92 -0
- package/dist/index.d.mts +92 -0
- package/dist/index.mjs +367 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +60 -0
package/README.md
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# @context-chef/ai-sdk-middleware
|
|
2
|
+
|
|
3
|
+
[](https://www.npmjs.com/package/@context-chef/ai-sdk-middleware)
|
|
4
|
+
|
|
5
|
+
[Vercel AI SDK](https://sdk.vercel.ai) middleware powered by [context-chef](https://github.com/MyPrototypeWhat/context-chef). Transparent history compression, tool result truncation, and token budget management — zero code changes required.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npm install @context-chef/ai-sdk-middleware @context-chef/core ai
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
```typescript
|
|
16
|
+
import { withContextChef } from '@context-chef/ai-sdk-middleware';
|
|
17
|
+
import { openai } from '@ai-sdk/openai';
|
|
18
|
+
import { generateText } from 'ai';
|
|
19
|
+
|
|
20
|
+
const model = withContextChef(openai('gpt-4o'), {
|
|
21
|
+
contextWindow: 128_000,
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
// Use exactly like normal — everything below is unchanged
|
|
25
|
+
const result = await generateText({
|
|
26
|
+
model,
|
|
27
|
+
messages: conversationHistory,
|
|
28
|
+
tools: myTools,
|
|
29
|
+
});
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
That's it. History compression and token budget tracking happen automatically behind the scenes.
|
|
33
|
+
|
|
34
|
+
## Features
|
|
35
|
+
|
|
36
|
+
### History Compression
|
|
37
|
+
|
|
38
|
+
When the conversation exceeds the token budget, the middleware compresses older messages to make room. Two modes:
|
|
39
|
+
|
|
40
|
+
**Without a compression model** (default) — old messages are discarded, only recent messages are kept:
|
|
41
|
+
|
|
42
|
+
```typescript
|
|
43
|
+
const model = withContextChef(openai('gpt-4o'), {
|
|
44
|
+
contextWindow: 128_000,
|
|
45
|
+
});
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
**With a compression model** — old messages are summarized by a cheap model before being replaced:
|
|
49
|
+
|
|
50
|
+
```typescript
|
|
51
|
+
const model = withContextChef(openai('gpt-4o'), {
|
|
52
|
+
contextWindow: 128_000,
|
|
53
|
+
compress: {
|
|
54
|
+
model: openai('gpt-4o-mini'), // cheap model for summarization
|
|
55
|
+
preserveRatio: 0.8, // keep 80% of context for recent messages
|
|
56
|
+
},
|
|
57
|
+
});
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Tool Result Truncation
|
|
61
|
+
|
|
62
|
+
Large tool outputs (terminal logs, API responses) are automatically truncated while preserving the head and tail:
|
|
63
|
+
|
|
64
|
+
```typescript
|
|
65
|
+
const model = withContextChef(openai('gpt-4o'), {
|
|
66
|
+
contextWindow: 128_000,
|
|
67
|
+
truncate: {
|
|
68
|
+
threshold: 5000, // truncate tool results over 5000 chars
|
|
69
|
+
headChars: 500, // preserve first 500 chars
|
|
70
|
+
tailChars: 1000, // preserve last 1000 chars
|
|
71
|
+
},
|
|
72
|
+
});
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Token Budget Tracking
|
|
76
|
+
|
|
77
|
+
The middleware automatically extracts token usage from `generateText` and `streamText` responses and feeds it back to the compression engine. No manual `reportTokenUsage()` calls needed.
|
|
78
|
+
|
|
79
|
+
## API
|
|
80
|
+
|
|
81
|
+
### `withContextChef(model, options)`
|
|
82
|
+
|
|
83
|
+
Wraps an AI SDK language model with context-chef middleware.
|
|
84
|
+
|
|
85
|
+
```typescript
|
|
86
|
+
import { withContextChef } from '@context-chef/ai-sdk-middleware';
|
|
87
|
+
|
|
88
|
+
const wrappedModel = withContextChef(model, options);
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
**Parameters:**
|
|
92
|
+
|
|
93
|
+
| Option | Type | Required | Description |
|
|
94
|
+
|---|---|---|---|
|
|
95
|
+
| `contextWindow` | `number` | Yes | Model's context window size in tokens |
|
|
96
|
+
| `compress` | `CompressOptions` | No | Enable LLM-based compression |
|
|
97
|
+
| `compress.model` | `LanguageModelV3` | Yes (if compress) | Cheap model for summarization |
|
|
98
|
+
| `compress.preserveRatio` | `number` | No | Ratio of context to preserve (default: `0.8`) |
|
|
99
|
+
| `truncate` | `TruncateOptions` | No | Enable tool result truncation |
|
|
100
|
+
| `truncate.threshold` | `number` | Yes (if truncate) | Character count to trigger truncation |
|
|
101
|
+
| `truncate.headChars` | `number` | No | Characters to preserve from start (default: `0`) |
|
|
102
|
+
| `truncate.tailChars` | `number` | No | Characters to preserve from end (default: `1000`) |
|
|
103
|
+
| `tokenizer` | `(msgs) => number` | No | Custom tokenizer for precise counting |
|
|
104
|
+
| `onCompress` | `(summary, count) => void` | No | Hook called after compression |
|
|
105
|
+
|
|
106
|
+
**Returns:** `LanguageModelV3` — a wrapped model that can be used anywhere the original model was used.
|
|
107
|
+
|
|
108
|
+
### `createMiddleware(options)`
|
|
109
|
+
|
|
110
|
+
Creates a raw `LanguageModelMiddleware` if you want to apply it yourself via `wrapLanguageModel`:
|
|
111
|
+
|
|
112
|
+
```typescript
|
|
113
|
+
import { createMiddleware } from '@context-chef/ai-sdk-middleware';
|
|
114
|
+
import { wrapLanguageModel } from 'ai';
|
|
115
|
+
|
|
116
|
+
const middleware = createMiddleware({ contextWindow: 128_000 });
|
|
117
|
+
const model = wrapLanguageModel({ model: openai('gpt-4o'), middleware });
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### `fromAISDK(prompt)` / `toAISDK(messages)`
|
|
121
|
+
|
|
122
|
+
Low-level converters between AI SDK `LanguageModelV3Prompt` and context-chef `Message[]` IR. Useful if you want to use context-chef modules directly with AI SDK message formats.
|
|
123
|
+
|
|
124
|
+
```typescript
|
|
125
|
+
import { fromAISDK, toAISDK } from '@context-chef/ai-sdk-middleware';
|
|
126
|
+
|
|
127
|
+
const irMessages = fromAISDK(aiSdkPrompt);
|
|
128
|
+
// ... process with context-chef modules ...
|
|
129
|
+
const aiSdkPrompt = toAISDK(irMessages);
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## How It Works
|
|
133
|
+
|
|
134
|
+
```
|
|
135
|
+
generateText({ model: wrappedModel, messages })
|
|
136
|
+
|
|
|
137
|
+
v
|
|
138
|
+
transformParams (before LLM call)
|
|
139
|
+
1. Truncate large tool results (if configured)
|
|
140
|
+
2. Convert AI SDK messages -> context-chef IR
|
|
141
|
+
3. Run Janitor compression (if over token budget)
|
|
142
|
+
4. Convert back to AI SDK messages
|
|
143
|
+
|
|
|
144
|
+
v
|
|
145
|
+
LLM call executes normally
|
|
146
|
+
|
|
|
147
|
+
v
|
|
148
|
+
wrapGenerate / wrapStream (after LLM call)
|
|
149
|
+
5. Extract token usage from response
|
|
150
|
+
6. Feed back to Janitor for next call's budget check
|
|
151
|
+
|
|
|
152
|
+
v
|
|
153
|
+
Result returned unchanged
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
The middleware is **stateful** — it tracks token usage across calls to know when compression is needed. Create one wrapped model per conversation/session.
|
|
157
|
+
|
|
158
|
+
## Need More Control?
|
|
159
|
+
|
|
160
|
+
The middleware covers the most common use case: transparent compression and truncation. For advanced features like dynamic state injection, tool namespaces, memory, or snapshot/restore, use [`@context-chef/core`](../core) directly.
|
|
161
|
+
|
|
162
|
+
## License
|
|
163
|
+
|
|
164
|
+
ISC
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
Object.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });
|
|
2
|
+
let ai = require("ai");
|
|
3
|
+
let _context_chef_core = require("@context-chef/core");
|
|
4
|
+
|
|
5
|
+
//#region src/adapter.ts
|
|
6
|
+
/**
|
|
7
|
+
* Converts an AI SDK V3 prompt to context-chef IR messages.
|
|
8
|
+
*
|
|
9
|
+
* Original AI SDK content is stored in `_originalContent` for lossless round-trip.
|
|
10
|
+
* `_originalText` caches the extracted text so `toAISDK` can detect Janitor modifications.
|
|
11
|
+
* `_providerOptions` preserves message-level provider options (e.g. Anthropic cache control).
|
|
12
|
+
*/
|
|
13
|
+
function fromAISDK(prompt) {
|
|
14
|
+
const messages = [];
|
|
15
|
+
for (const msg of prompt) {
|
|
16
|
+
if (msg.role === "system") {
|
|
17
|
+
messages.push({
|
|
18
|
+
role: "system",
|
|
19
|
+
content: msg.content,
|
|
20
|
+
...msg.providerOptions ? { _providerOptions: msg.providerOptions } : {}
|
|
21
|
+
});
|
|
22
|
+
continue;
|
|
23
|
+
}
|
|
24
|
+
if (msg.role === "user") {
|
|
25
|
+
const text = msg.content.filter((p) => p.type === "text").map((p) => p.text).join("\n");
|
|
26
|
+
messages.push({
|
|
27
|
+
role: "user",
|
|
28
|
+
content: text,
|
|
29
|
+
_originalContent: msg.content,
|
|
30
|
+
_originalText: text,
|
|
31
|
+
...msg.providerOptions ? { _providerOptions: msg.providerOptions } : {}
|
|
32
|
+
});
|
|
33
|
+
continue;
|
|
34
|
+
}
|
|
35
|
+
if (msg.role === "assistant") {
|
|
36
|
+
const text = [];
|
|
37
|
+
const toolCalls = [];
|
|
38
|
+
let thinking;
|
|
39
|
+
for (const part of msg.content) if (part.type === "text") text.push(part.text);
|
|
40
|
+
else if (part.type === "tool-call") toolCalls.push({
|
|
41
|
+
id: part.toolCallId,
|
|
42
|
+
type: "function",
|
|
43
|
+
function: {
|
|
44
|
+
name: part.toolName,
|
|
45
|
+
arguments: typeof part.input === "string" ? part.input : JSON.stringify(part.input)
|
|
46
|
+
}
|
|
47
|
+
});
|
|
48
|
+
else if (part.type === "reasoning") thinking = { thinking: part.text };
|
|
49
|
+
const joinedText = text.join("\n");
|
|
50
|
+
const m = {
|
|
51
|
+
role: "assistant",
|
|
52
|
+
content: joinedText,
|
|
53
|
+
_originalContent: msg.content,
|
|
54
|
+
_originalText: joinedText,
|
|
55
|
+
...msg.providerOptions ? { _providerOptions: msg.providerOptions } : {}
|
|
56
|
+
};
|
|
57
|
+
if (toolCalls.length > 0) m.tool_calls = toolCalls;
|
|
58
|
+
if (thinking) m.thinking = thinking;
|
|
59
|
+
messages.push(m);
|
|
60
|
+
continue;
|
|
61
|
+
}
|
|
62
|
+
if (msg.role === "tool") {
|
|
63
|
+
for (const part of msg.content) if (part.type === "tool-result") {
|
|
64
|
+
const text = stringifyToolOutput(part.output);
|
|
65
|
+
messages.push({
|
|
66
|
+
role: "tool",
|
|
67
|
+
content: text,
|
|
68
|
+
tool_call_id: part.toolCallId,
|
|
69
|
+
_originalContent: [part],
|
|
70
|
+
_originalText: text,
|
|
71
|
+
_toolName: part.toolName
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
return messages;
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Converts context-chef IR messages back to AI SDK V3 prompt format.
|
|
80
|
+
*
|
|
81
|
+
* Uses `_originalContent` when content is unmodified (detected via `_originalText`).
|
|
82
|
+
* Falls back to constructing from IR fields when content was modified by Janitor
|
|
83
|
+
* (e.g. compact() cleared tool results) or for new messages (e.g. compression summaries).
|
|
84
|
+
*/
|
|
85
|
+
function toAISDK(messages) {
|
|
86
|
+
const prompt = [];
|
|
87
|
+
let i = 0;
|
|
88
|
+
while (i < messages.length) {
|
|
89
|
+
const msg = messages[i];
|
|
90
|
+
const providerOptions = msg._providerOptions;
|
|
91
|
+
const contentModified = msg._originalText !== void 0 && msg._originalText !== msg.content;
|
|
92
|
+
if (msg.role === "system") {
|
|
93
|
+
prompt.push({
|
|
94
|
+
role: "system",
|
|
95
|
+
content: msg.content,
|
|
96
|
+
...providerOptions ? { providerOptions } : {}
|
|
97
|
+
});
|
|
98
|
+
i++;
|
|
99
|
+
continue;
|
|
100
|
+
}
|
|
101
|
+
if (msg.role === "user") {
|
|
102
|
+
const content = !contentModified && Array.isArray(msg._originalContent) ? msg._originalContent : [{
|
|
103
|
+
type: "text",
|
|
104
|
+
text: msg.content
|
|
105
|
+
}];
|
|
106
|
+
prompt.push({
|
|
107
|
+
role: "user",
|
|
108
|
+
content,
|
|
109
|
+
...providerOptions ? { providerOptions } : {}
|
|
110
|
+
});
|
|
111
|
+
i++;
|
|
112
|
+
continue;
|
|
113
|
+
}
|
|
114
|
+
if (msg.role === "assistant") {
|
|
115
|
+
const content = !contentModified && Array.isArray(msg._originalContent) ? msg._originalContent : [{
|
|
116
|
+
type: "text",
|
|
117
|
+
text: msg.content
|
|
118
|
+
}];
|
|
119
|
+
prompt.push({
|
|
120
|
+
role: "assistant",
|
|
121
|
+
content,
|
|
122
|
+
...providerOptions ? { providerOptions } : {}
|
|
123
|
+
});
|
|
124
|
+
i++;
|
|
125
|
+
continue;
|
|
126
|
+
}
|
|
127
|
+
if (msg.role === "tool") {
|
|
128
|
+
const toolResults = [];
|
|
129
|
+
while (i < messages.length && messages[i].role === "tool") {
|
|
130
|
+
const toolMsg = messages[i];
|
|
131
|
+
if (!(toolMsg._originalText !== void 0 && toolMsg._originalText !== toolMsg.content) && toolMsg._originalContent) toolResults.push(...toolMsg._originalContent);
|
|
132
|
+
else toolResults.push({
|
|
133
|
+
type: "tool-result",
|
|
134
|
+
toolCallId: toolMsg.tool_call_id ?? "",
|
|
135
|
+
toolName: toolMsg._toolName ?? "unknown",
|
|
136
|
+
output: {
|
|
137
|
+
type: "text",
|
|
138
|
+
value: toolMsg.content
|
|
139
|
+
}
|
|
140
|
+
});
|
|
141
|
+
i++;
|
|
142
|
+
}
|
|
143
|
+
prompt.push({
|
|
144
|
+
role: "tool",
|
|
145
|
+
content: toolResults
|
|
146
|
+
});
|
|
147
|
+
continue;
|
|
148
|
+
}
|
|
149
|
+
i++;
|
|
150
|
+
}
|
|
151
|
+
return prompt;
|
|
152
|
+
}
|
|
153
|
+
function stringifyToolOutput(output) {
|
|
154
|
+
switch (output.type) {
|
|
155
|
+
case "text":
|
|
156
|
+
case "error-text": return output.value;
|
|
157
|
+
case "json":
|
|
158
|
+
case "error-json": return JSON.stringify(output.value);
|
|
159
|
+
case "content": return output.value.map((v) => v.type === "text" ? v.text ?? "" : "").filter(Boolean).join("\n");
|
|
160
|
+
default: return JSON.stringify(output);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
//#endregion
|
|
165
|
+
//#region src/truncator.ts
|
|
166
|
+
/**
|
|
167
|
+
* Truncates tool-result content within an AI SDK prompt when it exceeds the configured threshold.
|
|
168
|
+
* When a storage adapter is provided, original content is persisted and a URI is included in the output.
|
|
169
|
+
*/
|
|
170
|
+
async function truncateToolResults(prompt, options) {
|
|
171
|
+
const { threshold, headChars = 0, tailChars = 1e3, storage } = options;
|
|
172
|
+
const offloader = storage ? new _context_chef_core.Offloader({
|
|
173
|
+
threshold,
|
|
174
|
+
adapter: storage,
|
|
175
|
+
storageDir: ""
|
|
176
|
+
}) : null;
|
|
177
|
+
const result = [];
|
|
178
|
+
for (const msg of prompt) {
|
|
179
|
+
if (msg.role !== "tool") {
|
|
180
|
+
result.push(msg);
|
|
181
|
+
continue;
|
|
182
|
+
}
|
|
183
|
+
const newContent = [];
|
|
184
|
+
for (const part of msg.content) {
|
|
185
|
+
if (part.type !== "tool-result") {
|
|
186
|
+
newContent.push(part);
|
|
187
|
+
continue;
|
|
188
|
+
}
|
|
189
|
+
const text = extractText(part.output);
|
|
190
|
+
if (text.length <= threshold || headChars + tailChars >= text.length) {
|
|
191
|
+
newContent.push(part);
|
|
192
|
+
continue;
|
|
193
|
+
}
|
|
194
|
+
if (offloader) try {
|
|
195
|
+
const vfsResult = await offloader.offloadAsync(text, {
|
|
196
|
+
threshold,
|
|
197
|
+
headChars,
|
|
198
|
+
tailChars
|
|
199
|
+
});
|
|
200
|
+
newContent.push({
|
|
201
|
+
...part,
|
|
202
|
+
output: {
|
|
203
|
+
type: "text",
|
|
204
|
+
value: vfsResult.content
|
|
205
|
+
}
|
|
206
|
+
});
|
|
207
|
+
continue;
|
|
208
|
+
} catch (error) {
|
|
209
|
+
console.warn(`[context-chef] Storage adapter write failed for tool result (${part.toolCallId}). Falling back to simple truncation. Error: ${error instanceof Error ? error.message : String(error)}`);
|
|
210
|
+
}
|
|
211
|
+
const head = text.slice(0, headChars);
|
|
212
|
+
const tail = text.slice(text.length - tailChars);
|
|
213
|
+
const truncated = [
|
|
214
|
+
head,
|
|
215
|
+
`\n--- truncated (${text.split("\n").length} lines, ${text.length} chars total) ---\n`,
|
|
216
|
+
tail
|
|
217
|
+
].filter(Boolean).join("").trim();
|
|
218
|
+
newContent.push({
|
|
219
|
+
...part,
|
|
220
|
+
output: {
|
|
221
|
+
type: "text",
|
|
222
|
+
value: truncated
|
|
223
|
+
}
|
|
224
|
+
});
|
|
225
|
+
}
|
|
226
|
+
result.push({
|
|
227
|
+
...msg,
|
|
228
|
+
content: newContent
|
|
229
|
+
});
|
|
230
|
+
}
|
|
231
|
+
return result;
|
|
232
|
+
}
|
|
233
|
+
function extractText(output) {
|
|
234
|
+
switch (output.type) {
|
|
235
|
+
case "text":
|
|
236
|
+
case "error-text": return output.value;
|
|
237
|
+
case "json":
|
|
238
|
+
case "error-json": return JSON.stringify(output.value);
|
|
239
|
+
case "content": return output.value.map((v) => v.type === "text" ? v.text ?? "" : "").filter(Boolean).join("\n");
|
|
240
|
+
default: return "";
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
//#endregion
|
|
245
|
+
//#region src/middleware.ts
|
|
246
|
+
/**
|
|
247
|
+
* Creates a LanguageModelMiddleware that transparently applies
|
|
248
|
+
* context-chef compression and truncation to AI SDK model calls.
|
|
249
|
+
*
|
|
250
|
+
* The middleware holds a stateful Janitor instance that tracks
|
|
251
|
+
* token usage across calls for compression decisions.
|
|
252
|
+
*/
|
|
253
|
+
function createMiddleware(options) {
|
|
254
|
+
let usageWarned = false;
|
|
255
|
+
const janitor = new _context_chef_core.Janitor({
|
|
256
|
+
contextWindow: options.contextWindow,
|
|
257
|
+
tokenizer: options.tokenizer ? (msgs) => options.tokenizer?.(msgs) ?? 0 : void 0,
|
|
258
|
+
preserveRatio: options.compress?.preserveRatio ?? .8,
|
|
259
|
+
compressionModel: options.compress?.model ? createCompressionAdapter(options.compress.model) : void 0,
|
|
260
|
+
onCompress: options.onCompress ? (summary, count) => options.onCompress?.(summary.content, count) : void 0
|
|
261
|
+
});
|
|
262
|
+
return {
|
|
263
|
+
specificationVersion: "v3",
|
|
264
|
+
transformParams: async ({ params }) => {
|
|
265
|
+
let { prompt } = params;
|
|
266
|
+
if (options.truncate) prompt = await truncateToolResults(prompt, options.truncate);
|
|
267
|
+
const irMessages = fromAISDK(prompt);
|
|
268
|
+
const compressed = await janitor.compress(irMessages);
|
|
269
|
+
if (compressed !== irMessages) prompt = toAISDK(compressed);
|
|
270
|
+
return {
|
|
271
|
+
...params,
|
|
272
|
+
prompt
|
|
273
|
+
};
|
|
274
|
+
},
|
|
275
|
+
wrapGenerate: async ({ doGenerate }) => {
|
|
276
|
+
const result = await doGenerate();
|
|
277
|
+
if (result.usage?.inputTokens?.total != null) janitor.feedTokenUsage(result.usage.inputTokens.total);
|
|
278
|
+
else if (!usageWarned && !options.tokenizer) {
|
|
279
|
+
usageWarned = true;
|
|
280
|
+
console.warn("[context-chef] Model response did not include usage.inputTokens.total. Token-based compression may not trigger accurately. Consider providing a tokenizer for precise token counting.");
|
|
281
|
+
}
|
|
282
|
+
return result;
|
|
283
|
+
},
|
|
284
|
+
wrapStream: async ({ doStream }) => {
|
|
285
|
+
const { stream, ...rest } = await doStream();
|
|
286
|
+
const transform = new TransformStream({ transform(chunk, controller) {
|
|
287
|
+
if (chunk.type === "finish") {
|
|
288
|
+
if (chunk.usage?.inputTokens?.total != null) janitor.feedTokenUsage(chunk.usage.inputTokens.total);
|
|
289
|
+
else if (!usageWarned && !options.tokenizer) {
|
|
290
|
+
usageWarned = true;
|
|
291
|
+
console.warn("[context-chef] Stream finish did not include usage.inputTokens.total. Token-based compression may not trigger accurately. Consider providing a tokenizer for precise token counting.");
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
controller.enqueue(chunk);
|
|
295
|
+
} });
|
|
296
|
+
return {
|
|
297
|
+
...rest,
|
|
298
|
+
stream: stream.pipeThrough(transform)
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
};
|
|
302
|
+
}
|
|
303
|
+
/**
|
|
304
|
+
* Adapts an AI SDK LanguageModelV3 into the compressionModel callback
|
|
305
|
+
* that Janitor expects: (messages: Message[]) => Promise<string>
|
|
306
|
+
*
|
|
307
|
+
* Tool messages are converted to user messages describing the tool interaction,
|
|
308
|
+
* since generateText only accepts system/user/assistant roles.
|
|
309
|
+
*/
|
|
310
|
+
function createCompressionAdapter(model) {
|
|
311
|
+
return async (messages) => {
|
|
312
|
+
const { text } = await (0, ai.generateText)({
|
|
313
|
+
model,
|
|
314
|
+
messages: messages.map((m) => {
|
|
315
|
+
if (m.role === "tool") return {
|
|
316
|
+
role: "user",
|
|
317
|
+
content: `[Tool result${m.tool_call_id ? ` (${m.tool_call_id})` : ""}: ${m.content}]`
|
|
318
|
+
};
|
|
319
|
+
if (m.role === "assistant" && m.tool_calls?.length) {
|
|
320
|
+
const toolCallsDesc = m.tool_calls.map((tc) => `[Called tool: ${tc.function.name}(${tc.function.arguments})]`).join("\n");
|
|
321
|
+
return {
|
|
322
|
+
role: "assistant",
|
|
323
|
+
content: m.content ? `${m.content}\n${toolCallsDesc}` : toolCallsDesc
|
|
324
|
+
};
|
|
325
|
+
}
|
|
326
|
+
return {
|
|
327
|
+
role: m.role,
|
|
328
|
+
content: m.content
|
|
329
|
+
};
|
|
330
|
+
}),
|
|
331
|
+
maxOutputTokens: 2048
|
|
332
|
+
});
|
|
333
|
+
return text || "[Compression produced no output]";
|
|
334
|
+
};
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
//#endregion
|
|
338
|
+
//#region src/index.ts
|
|
339
|
+
/**
|
|
340
|
+
* Wraps an AI SDK language model with context-chef middleware for
|
|
341
|
+
* transparent history compression, tool result truncation, and token budget management.
|
|
342
|
+
*
|
|
343
|
+
* @example
|
|
344
|
+
* ```typescript
|
|
345
|
+
* import { withContextChef } from '@context-chef/ai-sdk-middleware';
|
|
346
|
+
* import { openai } from '@ai-sdk/openai';
|
|
347
|
+
* import { generateText } from 'ai';
|
|
348
|
+
*
|
|
349
|
+
* const model = withContextChef(openai('gpt-4o'), {
|
|
350
|
+
* contextWindow: 128_000,
|
|
351
|
+
* compress: { model: openai('gpt-4o-mini') },
|
|
352
|
+
* truncate: { threshold: 5000, headChars: 500, tailChars: 1000 },
|
|
353
|
+
* });
|
|
354
|
+
*
|
|
355
|
+
* // Use exactly like normal — zero other code changes
|
|
356
|
+
* const result = await generateText({ model, messages, tools });
|
|
357
|
+
* ```
|
|
358
|
+
*/
|
|
359
|
+
function withContextChef(model, options) {
|
|
360
|
+
return (0, ai.wrapLanguageModel)({
|
|
361
|
+
model,
|
|
362
|
+
middleware: createMiddleware(options)
|
|
363
|
+
});
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
//#endregion
|
|
367
|
+
exports.createMiddleware = createMiddleware;
|
|
368
|
+
exports.fromAISDK = fromAISDK;
|
|
369
|
+
exports.toAISDK = toAISDK;
|
|
370
|
+
exports.withContextChef = withContextChef;
|
|
371
|
+
//# sourceMappingURL=index.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.cjs","names":["Offloader","Janitor"],"sources":["../src/adapter.ts","../src/truncator.ts","../src/middleware.ts","../src/index.ts"],"sourcesContent":["import type {\n LanguageModelV3Prompt,\n LanguageModelV3ToolResultOutput,\n LanguageModelV3ToolResultPart,\n SharedV3ProviderOptions,\n} from '@ai-sdk/provider';\nimport type { Message, ToolCall } from '@context-chef/core';\n\n/**\n * Converts an AI SDK V3 prompt to context-chef IR messages.\n *\n * Original AI SDK content is stored in `_originalContent` for lossless round-trip.\n * `_originalText` caches the extracted text so `toAISDK` can detect Janitor modifications.\n * `_providerOptions` preserves message-level provider options (e.g. Anthropic cache control).\n */\nexport function fromAISDK(prompt: LanguageModelV3Prompt): Message[] {\n const messages: Message[] = [];\n\n for (const msg of prompt) {\n if (msg.role === 'system') {\n messages.push({\n role: 'system',\n content: msg.content,\n ...(msg.providerOptions ? { _providerOptions: msg.providerOptions } : {}),\n });\n continue;\n }\n\n if (msg.role === 'user') {\n const text = msg.content\n .filter((p) => p.type === 'text')\n .map((p) => p.text)\n .join('\\n');\n messages.push({\n role: 'user',\n content: text,\n _originalContent: msg.content,\n _originalText: text,\n ...(msg.providerOptions ? { _providerOptions: msg.providerOptions } : {}),\n });\n continue;\n }\n\n if (msg.role === 'assistant') {\n const text: string[] = [];\n const toolCalls: ToolCall[] = [];\n let thinking: { thinking: string } | undefined;\n\n for (const part of msg.content) {\n if (part.type === 'text') text.push(part.text);\n else if (part.type === 'tool-call') {\n toolCalls.push({\n id: part.toolCallId,\n type: 'function',\n function: {\n name: part.toolName,\n arguments: typeof part.input === 'string' ? part.input : JSON.stringify(part.input),\n },\n });\n } else if (part.type === 'reasoning') {\n thinking = { thinking: part.text };\n }\n }\n\n const joinedText = text.join('\\n');\n const m: Message = {\n role: 'assistant',\n content: joinedText,\n _originalContent: msg.content,\n _originalText: joinedText,\n ...(msg.providerOptions ? { _providerOptions: msg.providerOptions } : {}),\n };\n if (toolCalls.length > 0) m.tool_calls = toolCalls;\n if (thinking) m.thinking = thinking;\n messages.push(m);\n continue;\n }\n\n if (msg.role === 'tool') {\n for (const part of msg.content) {\n if (part.type === 'tool-result') {\n const text = stringifyToolOutput(part.output);\n messages.push({\n role: 'tool',\n content: text,\n tool_call_id: part.toolCallId,\n _originalContent: [part],\n _originalText: text,\n _toolName: part.toolName,\n });\n }\n }\n }\n }\n\n return messages;\n}\n\n/**\n * Converts context-chef IR messages back to AI SDK V3 prompt format.\n *\n * Uses `_originalContent` when content is unmodified (detected via `_originalText`).\n * Falls back to constructing from IR fields when content was modified by Janitor\n * (e.g. compact() cleared tool results) or for new messages (e.g. compression summaries).\n */\nexport function toAISDK(messages: Message[]): LanguageModelV3Prompt {\n const prompt: LanguageModelV3Prompt = [];\n\n let i = 0;\n while (i < messages.length) {\n const msg = messages[i];\n const providerOptions = msg._providerOptions as SharedV3ProviderOptions | undefined;\n const contentModified = msg._originalText !== undefined && msg._originalText !== msg.content;\n\n if (msg.role === 'system') {\n prompt.push({\n role: 'system',\n content: msg.content,\n ...(providerOptions ? { providerOptions } : {}),\n });\n i++;\n continue;\n }\n\n if (msg.role === 'user') {\n const content =\n !contentModified && Array.isArray(msg._originalContent)\n ? (msg._originalContent as any)\n : [{ type: 'text' as const, text: msg.content }];\n prompt.push({\n role: 'user',\n content,\n ...(providerOptions ? { providerOptions } : {}),\n });\n i++;\n continue;\n }\n\n if (msg.role === 'assistant') {\n const content =\n !contentModified && Array.isArray(msg._originalContent)\n ? (msg._originalContent as any)\n : [{ type: 'text' as const, text: msg.content }];\n prompt.push({\n role: 'assistant',\n content,\n ...(providerOptions ? { providerOptions } : {}),\n });\n i++;\n continue;\n }\n\n if (msg.role === 'tool') {\n const toolResults: LanguageModelV3ToolResultPart[] = [];\n while (i < messages.length && messages[i].role === 'tool') {\n const toolMsg = messages[i];\n const toolModified =\n toolMsg._originalText !== undefined && toolMsg._originalText !== toolMsg.content;\n\n if (!toolModified && toolMsg._originalContent) {\n toolResults.push(...(toolMsg._originalContent as LanguageModelV3ToolResultPart[]));\n } else {\n toolResults.push({\n type: 'tool-result',\n toolCallId: toolMsg.tool_call_id ?? '',\n toolName: (toolMsg._toolName as string) ?? 'unknown',\n output: { type: 'text', value: toolMsg.content },\n });\n }\n i++;\n }\n prompt.push({ role: 'tool', content: toolResults });\n continue;\n }\n\n i++;\n }\n\n return prompt;\n}\n\nfunction stringifyToolOutput(output: LanguageModelV3ToolResultOutput): string {\n switch (output.type) {\n case 'text':\n case 'error-text':\n return output.value;\n case 'json':\n case 'error-json':\n return JSON.stringify(output.value);\n case 'content':\n return output.value\n .map((v: { type: string; text?: string }) => (v.type === 'text' ? (v.text ?? '') : ''))\n .filter(Boolean)\n .join('\\n');\n default:\n return JSON.stringify(output);\n }\n}\n","import type {\n LanguageModelV3Prompt,\n LanguageModelV3ToolResultOutput,\n LanguageModelV3ToolResultPart,\n} from '@ai-sdk/provider';\nimport { Offloader } from '@context-chef/core';\nimport type { TruncateOptions } from './types';\n\n/**\n * Truncates tool-result content within an AI SDK prompt when it exceeds the configured threshold.\n * When a storage adapter is provided, original content is persisted and a URI is included in the output.\n */\nexport async function truncateToolResults(\n prompt: LanguageModelV3Prompt,\n options: TruncateOptions,\n): Promise<LanguageModelV3Prompt> {\n const { threshold, headChars = 0, tailChars = 1000, storage } = options;\n\n const offloader = storage\n ? new Offloader({ threshold, adapter: storage, storageDir: '' })\n : null;\n\n const result: LanguageModelV3Prompt = [];\n\n for (const msg of prompt) {\n if (msg.role !== 'tool') {\n result.push(msg);\n continue;\n }\n\n const newContent: typeof msg.content = [];\n\n for (const part of msg.content) {\n if (part.type !== 'tool-result') {\n newContent.push(part);\n continue;\n }\n\n const text = extractText(part.output);\n if (text.length <= threshold || headChars + tailChars >= text.length) {\n newContent.push(part);\n continue;\n }\n\n // With storage: use Offloader to persist original and get a URI-annotated truncation\n if (offloader) {\n try {\n const vfsResult = await offloader.offloadAsync(text, { threshold, headChars, tailChars });\n newContent.push({\n ...part,\n output: { type: 'text', value: vfsResult.content } satisfies LanguageModelV3ToolResultOutput,\n } satisfies LanguageModelV3ToolResultPart);\n continue;\n } catch (error) {\n console.warn(\n `[context-chef] Storage adapter write failed for tool result (${part.toolCallId}). ` +\n `Falling back to simple truncation. Error: ${error instanceof Error ? error.message : String(error)}`,\n );\n // Fall through to simple truncation below\n }\n }\n\n // Without storage: simple truncation, original is discarded\n const head = text.slice(0, headChars);\n const tail = text.slice(text.length - tailChars);\n const totalLines = text.split('\\n').length;\n\n const truncated = [\n head,\n `\\n--- truncated (${totalLines} lines, ${text.length} chars total) ---\\n`,\n tail,\n ]\n .filter(Boolean)\n .join('')\n .trim();\n\n newContent.push({\n ...part,\n output: { type: 'text', value: truncated } satisfies LanguageModelV3ToolResultOutput,\n } satisfies LanguageModelV3ToolResultPart);\n }\n\n result.push({ ...msg, content: newContent });\n }\n\n return result;\n}\n\nfunction extractText(output: LanguageModelV3ToolResultOutput): string {\n switch (output.type) {\n case 'text':\n case 'error-text':\n return output.value;\n case 'json':\n case 'error-json':\n return JSON.stringify(output.value);\n case 'content':\n return output.value\n .map((v: { type: string; text?: string }) => (v.type === 'text' ? (v.text ?? '') : ''))\n .filter(Boolean)\n .join('\\n');\n default:\n return '';\n }\n}\n","import type { LanguageModelV3, LanguageModelV3StreamPart } from '@ai-sdk/provider';\nimport { generateText, type LanguageModelMiddleware } from 'ai';\nimport { Janitor, type Message } from '@context-chef/core';\n\nimport { fromAISDK, toAISDK } from './adapter';\nimport { truncateToolResults } from './truncator';\nimport type { ContextChefOptions } from './types';\n\n/**\n * Creates a LanguageModelMiddleware that transparently applies\n * context-chef compression and truncation to AI SDK model calls.\n *\n * The middleware holds a stateful Janitor instance that tracks\n * token usage across calls for compression decisions.\n */\nexport function createMiddleware(options: ContextChefOptions): LanguageModelMiddleware {\n let usageWarned = false;\n\n const janitor = new Janitor({\n contextWindow: options.contextWindow,\n tokenizer: options.tokenizer ? (msgs: Message[]) => options.tokenizer?.(msgs) ?? 0 : undefined,\n preserveRatio: options.compress?.preserveRatio ?? 0.8,\n compressionModel: options.compress?.model\n ? createCompressionAdapter(options.compress.model)\n : undefined,\n onCompress: options.onCompress\n ? (summary, count) => options.onCompress?.(summary.content, count)\n : undefined,\n });\n\n return {\n specificationVersion: 'v3',\n\n transformParams: async ({ params }) => {\n let { prompt } = params;\n\n // 1. Truncate large tool results\n if (options.truncate) {\n prompt = await truncateToolResults(prompt, options.truncate);\n }\n\n // 2. Compress history if over token budget\n const irMessages = fromAISDK(prompt);\n const compressed = await janitor.compress(irMessages);\n\n // Only convert back if compression actually changed something\n if (compressed !== irMessages) {\n prompt = toAISDK(compressed);\n }\n\n return { ...params, prompt };\n },\n\n wrapGenerate: async ({ doGenerate }) => {\n const result = await doGenerate();\n\n if (result.usage?.inputTokens?.total != null) {\n janitor.feedTokenUsage(result.usage.inputTokens.total);\n } else if (!usageWarned && !options.tokenizer) {\n usageWarned = true;\n console.warn(\n '[context-chef] Model response did not include usage.inputTokens.total. ' +\n 'Token-based compression may not trigger accurately. ' +\n 'Consider providing a tokenizer for precise token counting.',\n );\n }\n\n return result;\n },\n\n wrapStream: async ({ doStream }) => {\n const { stream, ...rest } = await doStream();\n\n const transform = new TransformStream<LanguageModelV3StreamPart, LanguageModelV3StreamPart>({\n transform(chunk, controller) {\n if (chunk.type === 'finish') {\n if (chunk.usage?.inputTokens?.total != null) {\n janitor.feedTokenUsage(chunk.usage.inputTokens.total);\n } else if (!usageWarned && !options.tokenizer) {\n usageWarned = true;\n console.warn(\n '[context-chef] Stream finish did not include usage.inputTokens.total. ' +\n 'Token-based compression may not trigger accurately. ' +\n 'Consider providing a tokenizer for precise token counting.',\n );\n }\n }\n controller.enqueue(chunk);\n },\n });\n\n return { ...rest, stream: stream.pipeThrough(transform) };\n },\n };\n}\n\n/**\n * Adapts an AI SDK LanguageModelV3 into the compressionModel callback\n * that Janitor expects: (messages: Message[]) => Promise<string>\n *\n * Tool messages are converted to user messages describing the tool interaction,\n * since generateText only accepts system/user/assistant roles.\n */\nfunction createCompressionAdapter(model: LanguageModelV3): (messages: Message[]) => Promise<string> {\n return async (messages: Message[]): Promise<string> => {\n const formatted = messages.map((m) => {\n if (m.role === 'tool') {\n return {\n role: 'user' as const,\n content: `[Tool result${m.tool_call_id ? ` (${m.tool_call_id})` : ''}: ${m.content}]`,\n };\n }\n // assistant messages with tool_calls: include tool call info in content\n if (m.role === 'assistant' && m.tool_calls?.length) {\n const toolCallsDesc = m.tool_calls\n .map((tc) => `[Called tool: ${tc.function.name}(${tc.function.arguments})]`)\n .join('\\n');\n return {\n role: 'assistant' as const,\n content: m.content ? `${m.content}\\n${toolCallsDesc}` : toolCallsDesc,\n };\n }\n return {\n role: m.role as 'system' | 'user' | 'assistant',\n content: m.content,\n };\n });\n\n const { text } = await generateText({\n model,\n messages: formatted,\n maxOutputTokens: 2048,\n });\n\n return text || '[Compression produced no output]';\n };\n}\n","import type { LanguageModelV3 } from '@ai-sdk/provider';\nimport { wrapLanguageModel } from 'ai';\n\nimport { createMiddleware } from './middleware';\nimport type { ContextChefOptions } from './types';\n\nexport { fromAISDK, toAISDK } from './adapter';\nexport { createMiddleware } from './middleware';\nexport type { CompressOptions, ContextChefOptions, TruncateOptions } from './types';\n\n/**\n * Wraps an AI SDK language model with context-chef middleware for\n * transparent history compression, tool result truncation, and token budget management.\n *\n * @example\n * ```typescript\n * import { withContextChef } from '@context-chef/ai-sdk-middleware';\n * import { openai } from '@ai-sdk/openai';\n * import { generateText } from 'ai';\n *\n * const model = withContextChef(openai('gpt-4o'), {\n * contextWindow: 128_000,\n * compress: { model: openai('gpt-4o-mini') },\n * truncate: { threshold: 5000, headChars: 500, tailChars: 1000 },\n * });\n *\n * // Use exactly like normal — zero other code changes\n * const result = await generateText({ model, messages, tools });\n * ```\n */\nexport function withContextChef(model: LanguageModelV3, options: ContextChefOptions): LanguageModelV3 {\n const middleware = createMiddleware(options);\n return wrapLanguageModel({ model, middleware });\n}\n"],"mappings":";;;;;;;;;;;;AAeA,SAAgB,UAAU,QAA0C;CAClE,MAAM,WAAsB,EAAE;AAE9B,MAAK,MAAM,OAAO,QAAQ;AACxB,MAAI,IAAI,SAAS,UAAU;AACzB,YAAS,KAAK;IACZ,MAAM;IACN,SAAS,IAAI;IACb,GAAI,IAAI,kBAAkB,EAAE,kBAAkB,IAAI,iBAAiB,GAAG,EAAE;IACzE,CAAC;AACF;;AAGF,MAAI,IAAI,SAAS,QAAQ;GACvB,MAAM,OAAO,IAAI,QACd,QAAQ,MAAM,EAAE,SAAS,OAAO,CAChC,KAAK,MAAM,EAAE,KAAK,CAClB,KAAK,KAAK;AACb,YAAS,KAAK;IACZ,MAAM;IACN,SAAS;IACT,kBAAkB,IAAI;IACtB,eAAe;IACf,GAAI,IAAI,kBAAkB,EAAE,kBAAkB,IAAI,iBAAiB,GAAG,EAAE;IACzE,CAAC;AACF;;AAGF,MAAI,IAAI,SAAS,aAAa;GAC5B,MAAM,OAAiB,EAAE;GACzB,MAAM,YAAwB,EAAE;GAChC,IAAI;AAEJ,QAAK,MAAM,QAAQ,IAAI,QACrB,KAAI,KAAK,SAAS,OAAQ,MAAK,KAAK,KAAK,KAAK;YACrC,KAAK,SAAS,YACrB,WAAU,KAAK;IACb,IAAI,KAAK;IACT,MAAM;IACN,UAAU;KACR,MAAM,KAAK;KACX,WAAW,OAAO,KAAK,UAAU,WAAW,KAAK,QAAQ,KAAK,UAAU,KAAK,MAAM;KACpF;IACF,CAAC;YACO,KAAK,SAAS,YACvB,YAAW,EAAE,UAAU,KAAK,MAAM;GAItC,MAAM,aAAa,KAAK,KAAK,KAAK;GAClC,MAAM,IAAa;IACjB,MAAM;IACN,SAAS;IACT,kBAAkB,IAAI;IACtB,eAAe;IACf,GAAI,IAAI,kBAAkB,EAAE,kBAAkB,IAAI,iBAAiB,GAAG,EAAE;IACzE;AACD,OAAI,UAAU,SAAS,EAAG,GAAE,aAAa;AACzC,OAAI,SAAU,GAAE,WAAW;AAC3B,YAAS,KAAK,EAAE;AAChB;;AAGF,MAAI,IAAI,SAAS,QACf;QAAK,MAAM,QAAQ,IAAI,QACrB,KAAI,KAAK,SAAS,eAAe;IAC/B,MAAM,OAAO,oBAAoB,KAAK,OAAO;AAC7C,aAAS,KAAK;KACZ,MAAM;KACN,SAAS;KACT,cAAc,KAAK;KACnB,kBAAkB,CAAC,KAAK;KACxB,eAAe;KACf,WAAW,KAAK;KACjB,CAAC;;;;AAMV,QAAO;;;;;;;;;AAUT,SAAgB,QAAQ,UAA4C;CAClE,MAAM,SAAgC,EAAE;CAExC,IAAI,IAAI;AACR,QAAO,IAAI,SAAS,QAAQ;EAC1B,MAAM,MAAM,SAAS;EACrB,MAAM,kBAAkB,IAAI;EAC5B,MAAM,kBAAkB,IAAI,kBAAkB,UAAa,IAAI,kBAAkB,IAAI;AAErF,MAAI,IAAI,SAAS,UAAU;AACzB,UAAO,KAAK;IACV,MAAM;IACN,SAAS,IAAI;IACb,GAAI,kBAAkB,EAAE,iBAAiB,GAAG,EAAE;IAC/C,CAAC;AACF;AACA;;AAGF,MAAI,IAAI,SAAS,QAAQ;GACvB,MAAM,UACJ,CAAC,mBAAmB,MAAM,QAAQ,IAAI,iBAAiB,GAClD,IAAI,mBACL,CAAC;IAAE,MAAM;IAAiB,MAAM,IAAI;IAAS,CAAC;AACpD,UAAO,KAAK;IACV,MAAM;IACN;IACA,GAAI,kBAAkB,EAAE,iBAAiB,GAAG,EAAE;IAC/C,CAAC;AACF;AACA;;AAGF,MAAI,IAAI,SAAS,aAAa;GAC5B,MAAM,UACJ,CAAC,mBAAmB,MAAM,QAAQ,IAAI,iBAAiB,GAClD,IAAI,mBACL,CAAC;IAAE,MAAM;IAAiB,MAAM,IAAI;IAAS,CAAC;AACpD,UAAO,KAAK;IACV,MAAM;IACN;IACA,GAAI,kBAAkB,EAAE,iBAAiB,GAAG,EAAE;IAC/C,CAAC;AACF;AACA;;AAGF,MAAI,IAAI,SAAS,QAAQ;GACvB,MAAM,cAA+C,EAAE;AACvD,UAAO,IAAI,SAAS,UAAU,SAAS,GAAG,SAAS,QAAQ;IACzD,MAAM,UAAU,SAAS;AAIzB,QAAI,EAFF,QAAQ,kBAAkB,UAAa,QAAQ,kBAAkB,QAAQ,YAEtD,QAAQ,iBAC3B,aAAY,KAAK,GAAI,QAAQ,iBAAqD;QAElF,aAAY,KAAK;KACf,MAAM;KACN,YAAY,QAAQ,gBAAgB;KACpC,UAAW,QAAQ,aAAwB;KAC3C,QAAQ;MAAE,MAAM;MAAQ,OAAO,QAAQ;MAAS;KACjD,CAAC;AAEJ;;AAEF,UAAO,KAAK;IAAE,MAAM;IAAQ,SAAS;IAAa,CAAC;AACnD;;AAGF;;AAGF,QAAO;;AAGT,SAAS,oBAAoB,QAAiD;AAC5E,SAAQ,OAAO,MAAf;EACE,KAAK;EACL,KAAK,aACH,QAAO,OAAO;EAChB,KAAK;EACL,KAAK,aACH,QAAO,KAAK,UAAU,OAAO,MAAM;EACrC,KAAK,UACH,QAAO,OAAO,MACX,KAAK,MAAwC,EAAE,SAAS,SAAU,EAAE,QAAQ,KAAM,GAAI,CACtF,OAAO,QAAQ,CACf,KAAK,KAAK;EACf,QACE,QAAO,KAAK,UAAU,OAAO;;;;;;;;;;ACvLnC,eAAsB,oBACpB,QACA,SACgC;CAChC,MAAM,EAAE,WAAW,YAAY,GAAG,YAAY,KAAM,YAAY;CAEhE,MAAM,YAAY,UACd,IAAIA,6BAAU;EAAE;EAAW,SAAS;EAAS,YAAY;EAAI,CAAC,GAC9D;CAEJ,MAAM,SAAgC,EAAE;AAExC,MAAK,MAAM,OAAO,QAAQ;AACxB,MAAI,IAAI,SAAS,QAAQ;AACvB,UAAO,KAAK,IAAI;AAChB;;EAGF,MAAM,aAAiC,EAAE;AAEzC,OAAK,MAAM,QAAQ,IAAI,SAAS;AAC9B,OAAI,KAAK,SAAS,eAAe;AAC/B,eAAW,KAAK,KAAK;AACrB;;GAGF,MAAM,OAAO,YAAY,KAAK,OAAO;AACrC,OAAI,KAAK,UAAU,aAAa,YAAY,aAAa,KAAK,QAAQ;AACpE,eAAW,KAAK,KAAK;AACrB;;AAIF,OAAI,UACF,KAAI;IACF,MAAM,YAAY,MAAM,UAAU,aAAa,MAAM;KAAE;KAAW;KAAW;KAAW,CAAC;AACzF,eAAW,KAAK;KACd,GAAG;KACH,QAAQ;MAAE,MAAM;MAAQ,OAAO,UAAU;MAAS;KACnD,CAAyC;AAC1C;YACO,OAAO;AACd,YAAQ,KACN,gEAAgE,KAAK,WAAW,+CACjC,iBAAiB,QAAQ,MAAM,UAAU,OAAO,MAAM,GACtG;;GAML,MAAM,OAAO,KAAK,MAAM,GAAG,UAAU;GACrC,MAAM,OAAO,KAAK,MAAM,KAAK,SAAS,UAAU;GAGhD,MAAM,YAAY;IAChB;IACA,oBAJiB,KAAK,MAAM,KAAK,CAAC,OAIH,UAAU,KAAK,OAAO;IACrD;IACD,CACE,OAAO,QAAQ,CACf,KAAK,GAAG,CACR,MAAM;AAET,cAAW,KAAK;IACd,GAAG;IACH,QAAQ;KAAE,MAAM;KAAQ,OAAO;KAAW;IAC3C,CAAyC;;AAG5C,SAAO,KAAK;GAAE,GAAG;GAAK,SAAS;GAAY,CAAC;;AAG9C,QAAO;;AAGT,SAAS,YAAY,QAAiD;AACpE,SAAQ,OAAO,MAAf;EACE,KAAK;EACL,KAAK,aACH,QAAO,OAAO;EAChB,KAAK;EACL,KAAK,aACH,QAAO,KAAK,UAAU,OAAO,MAAM;EACrC,KAAK,UACH,QAAO,OAAO,MACX,KAAK,MAAwC,EAAE,SAAS,SAAU,EAAE,QAAQ,KAAM,GAAI,CACtF,OAAO,QAAQ,CACf,KAAK,KAAK;EACf,QACE,QAAO;;;;;;;;;;;;;ACvFb,SAAgB,iBAAiB,SAAsD;CACrF,IAAI,cAAc;CAElB,MAAM,UAAU,IAAIC,2BAAQ;EAC1B,eAAe,QAAQ;EACvB,WAAW,QAAQ,aAAa,SAAoB,QAAQ,YAAY,KAAK,IAAI,IAAI;EACrF,eAAe,QAAQ,UAAU,iBAAiB;EAClD,kBAAkB,QAAQ,UAAU,QAChC,yBAAyB,QAAQ,SAAS,MAAM,GAChD;EACJ,YAAY,QAAQ,cACf,SAAS,UAAU,QAAQ,aAAa,QAAQ,SAAS,MAAM,GAChE;EACL,CAAC;AAEF,QAAO;EACL,sBAAsB;EAEtB,iBAAiB,OAAO,EAAE,aAAa;GACrC,IAAI,EAAE,WAAW;AAGjB,OAAI,QAAQ,SACV,UAAS,MAAM,oBAAoB,QAAQ,QAAQ,SAAS;GAI9D,MAAM,aAAa,UAAU,OAAO;GACpC,MAAM,aAAa,MAAM,QAAQ,SAAS,WAAW;AAGrD,OAAI,eAAe,WACjB,UAAS,QAAQ,WAAW;AAG9B,UAAO;IAAE,GAAG;IAAQ;IAAQ;;EAG9B,cAAc,OAAO,EAAE,iBAAiB;GACtC,MAAM,SAAS,MAAM,YAAY;AAEjC,OAAI,OAAO,OAAO,aAAa,SAAS,KACtC,SAAQ,eAAe,OAAO,MAAM,YAAY,MAAM;YAC7C,CAAC,eAAe,CAAC,QAAQ,WAAW;AAC7C,kBAAc;AACd,YAAQ,KACN,wLAGD;;AAGH,UAAO;;EAGT,YAAY,OAAO,EAAE,eAAe;GAClC,MAAM,EAAE,QAAQ,GAAG,SAAS,MAAM,UAAU;GAE5C,MAAM,YAAY,IAAI,gBAAsE,EAC1F,UAAU,OAAO,YAAY;AAC3B,QAAI,MAAM,SAAS,UACjB;SAAI,MAAM,OAAO,aAAa,SAAS,KACrC,SAAQ,eAAe,MAAM,MAAM,YAAY,MAAM;cAC5C,CAAC,eAAe,CAAC,QAAQ,WAAW;AAC7C,oBAAc;AACd,cAAQ,KACN,uLAGD;;;AAGL,eAAW,QAAQ,MAAM;MAE5B,CAAC;AAEF,UAAO;IAAE,GAAG;IAAM,QAAQ,OAAO,YAAY,UAAU;IAAE;;EAE5D;;;;;;;;;AAUH,SAAS,yBAAyB,OAAkE;AAClG,QAAO,OAAO,aAAyC;EAwBrD,MAAM,EAAE,SAAS,2BAAmB;GAClC;GACA,UAzBgB,SAAS,KAAK,MAAM;AACpC,QAAI,EAAE,SAAS,OACb,QAAO;KACL,MAAM;KACN,SAAS,eAAe,EAAE,eAAe,KAAK,EAAE,aAAa,KAAK,GAAG,IAAI,EAAE,QAAQ;KACpF;AAGH,QAAI,EAAE,SAAS,eAAe,EAAE,YAAY,QAAQ;KAClD,MAAM,gBAAgB,EAAE,WACrB,KAAK,OAAO,iBAAiB,GAAG,SAAS,KAAK,GAAG,GAAG,SAAS,UAAU,IAAI,CAC3E,KAAK,KAAK;AACb,YAAO;MACL,MAAM;MACN,SAAS,EAAE,UAAU,GAAG,EAAE,QAAQ,IAAI,kBAAkB;MACzD;;AAEH,WAAO;KACL,MAAM,EAAE;KACR,SAAS,EAAE;KACZ;KACD;GAKA,iBAAiB;GAClB,CAAC;AAEF,SAAO,QAAQ;;;;;;;;;;;;;;;;;;;;;;;;;;ACxGnB,SAAgB,gBAAgB,OAAwB,SAA8C;AAEpG,kCAAyB;EAAE;EAAO,YADf,iBAAiB,QAAQ;EACE,CAAC"}
|
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import { LanguageModelV3, LanguageModelV3Prompt } from "@ai-sdk/provider";
|
|
2
|
+
import { Message, VFSStorageAdapter } from "@context-chef/core";
|
|
3
|
+
import { LanguageModelMiddleware } from "ai";
|
|
4
|
+
|
|
5
|
+
//#region src/types.d.ts
|
|
6
|
+
interface TruncateOptions {
|
|
7
|
+
/** Character count threshold to trigger truncation. */
|
|
8
|
+
threshold: number;
|
|
9
|
+
/** Characters to preserve from the start. Default: 0 */
|
|
10
|
+
headChars?: number;
|
|
11
|
+
/** Characters to preserve from the end. Default: 1000 */
|
|
12
|
+
tailChars?: number;
|
|
13
|
+
/**
|
|
14
|
+
* Storage adapter for persisting original content before truncation.
|
|
15
|
+
* Can be a FileSystemAdapter, database adapter, or any custom implementation.
|
|
16
|
+
* When provided, truncated output includes a `context://vfs/` URI for retrieval.
|
|
17
|
+
* When omitted, original content is discarded after truncation.
|
|
18
|
+
*/
|
|
19
|
+
storage?: VFSStorageAdapter;
|
|
20
|
+
}
|
|
21
|
+
interface CompressOptions {
|
|
22
|
+
/** A cheap model used for summarization (e.g. openai('gpt-4o-mini')). */
|
|
23
|
+
model: LanguageModelV3;
|
|
24
|
+
/** Ratio of context window to preserve for recent messages. Default: 0.8 */
|
|
25
|
+
preserveRatio?: number;
|
|
26
|
+
}
|
|
27
|
+
interface ContextChefOptions {
|
|
28
|
+
/** The model's context window size in tokens. */
|
|
29
|
+
contextWindow: number;
|
|
30
|
+
/** Enable history compression. Omit for no compression. */
|
|
31
|
+
compress?: CompressOptions;
|
|
32
|
+
/** Enable tool result truncation. Omit for no truncation. */
|
|
33
|
+
truncate?: TruncateOptions;
|
|
34
|
+
/** Optional tokenizer for precise per-message token counting. */
|
|
35
|
+
tokenizer?: (messages: unknown[]) => number;
|
|
36
|
+
/** Hook called after compression occurs. */
|
|
37
|
+
onCompress?: (summary: string, truncatedCount: number) => void;
|
|
38
|
+
}
|
|
39
|
+
//#endregion
|
|
40
|
+
//#region src/adapter.d.ts
|
|
41
|
+
/**
|
|
42
|
+
* Converts an AI SDK V3 prompt to context-chef IR messages.
|
|
43
|
+
*
|
|
44
|
+
* Original AI SDK content is stored in `_originalContent` for lossless round-trip.
|
|
45
|
+
* `_originalText` caches the extracted text so `toAISDK` can detect Janitor modifications.
|
|
46
|
+
* `_providerOptions` preserves message-level provider options (e.g. Anthropic cache control).
|
|
47
|
+
*/
|
|
48
|
+
declare function fromAISDK(prompt: LanguageModelV3Prompt): Message[];
|
|
49
|
+
/**
|
|
50
|
+
* Converts context-chef IR messages back to AI SDK V3 prompt format.
|
|
51
|
+
*
|
|
52
|
+
* Uses `_originalContent` when content is unmodified (detected via `_originalText`).
|
|
53
|
+
* Falls back to constructing from IR fields when content was modified by Janitor
|
|
54
|
+
* (e.g. compact() cleared tool results) or for new messages (e.g. compression summaries).
|
|
55
|
+
*/
|
|
56
|
+
declare function toAISDK(messages: Message[]): LanguageModelV3Prompt;
|
|
57
|
+
//#endregion
|
|
58
|
+
//#region src/middleware.d.ts
|
|
59
|
+
/**
|
|
60
|
+
* Creates a LanguageModelMiddleware that transparently applies
|
|
61
|
+
* context-chef compression and truncation to AI SDK model calls.
|
|
62
|
+
*
|
|
63
|
+
* The middleware holds a stateful Janitor instance that tracks
|
|
64
|
+
* token usage across calls for compression decisions.
|
|
65
|
+
*/
|
|
66
|
+
declare function createMiddleware(options: ContextChefOptions): LanguageModelMiddleware;
|
|
67
|
+
//#endregion
|
|
68
|
+
//#region src/index.d.ts
|
|
69
|
+
/**
|
|
70
|
+
* Wraps an AI SDK language model with context-chef middleware for
|
|
71
|
+
* transparent history compression, tool result truncation, and token budget management.
|
|
72
|
+
*
|
|
73
|
+
* @example
|
|
74
|
+
* ```typescript
|
|
75
|
+
* import { withContextChef } from '@context-chef/ai-sdk-middleware';
|
|
76
|
+
* import { openai } from '@ai-sdk/openai';
|
|
77
|
+
* import { generateText } from 'ai';
|
|
78
|
+
*
|
|
79
|
+
* const model = withContextChef(openai('gpt-4o'), {
|
|
80
|
+
* contextWindow: 128_000,
|
|
81
|
+
* compress: { model: openai('gpt-4o-mini') },
|
|
82
|
+
* truncate: { threshold: 5000, headChars: 500, tailChars: 1000 },
|
|
83
|
+
* });
|
|
84
|
+
*
|
|
85
|
+
* // Use exactly like normal — zero other code changes
|
|
86
|
+
* const result = await generateText({ model, messages, tools });
|
|
87
|
+
* ```
|
|
88
|
+
*/
|
|
89
|
+
declare function withContextChef(model: LanguageModelV3, options: ContextChefOptions): LanguageModelV3;
|
|
90
|
+
//#endregion
|
|
91
|
+
export { type CompressOptions, type ContextChefOptions, type TruncateOptions, createMiddleware, fromAISDK, toAISDK, withContextChef };
|
|
92
|
+
//# sourceMappingURL=index.d.cts.map
|
package/dist/index.d.mts
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import { LanguageModelMiddleware } from "ai";
|
|
2
|
+
import { Message, VFSStorageAdapter } from "@context-chef/core";
|
|
3
|
+
import { LanguageModelV3, LanguageModelV3Prompt } from "@ai-sdk/provider";
|
|
4
|
+
|
|
5
|
+
//#region src/types.d.ts
|
|
6
|
+
interface TruncateOptions {
|
|
7
|
+
/** Character count threshold to trigger truncation. */
|
|
8
|
+
threshold: number;
|
|
9
|
+
/** Characters to preserve from the start. Default: 0 */
|
|
10
|
+
headChars?: number;
|
|
11
|
+
/** Characters to preserve from the end. Default: 1000 */
|
|
12
|
+
tailChars?: number;
|
|
13
|
+
/**
|
|
14
|
+
* Storage adapter for persisting original content before truncation.
|
|
15
|
+
* Can be a FileSystemAdapter, database adapter, or any custom implementation.
|
|
16
|
+
* When provided, truncated output includes a `context://vfs/` URI for retrieval.
|
|
17
|
+
* When omitted, original content is discarded after truncation.
|
|
18
|
+
*/
|
|
19
|
+
storage?: VFSStorageAdapter;
|
|
20
|
+
}
|
|
21
|
+
interface CompressOptions {
|
|
22
|
+
/** A cheap model used for summarization (e.g. openai('gpt-4o-mini')). */
|
|
23
|
+
model: LanguageModelV3;
|
|
24
|
+
/** Ratio of context window to preserve for recent messages. Default: 0.8 */
|
|
25
|
+
preserveRatio?: number;
|
|
26
|
+
}
|
|
27
|
+
interface ContextChefOptions {
|
|
28
|
+
/** The model's context window size in tokens. */
|
|
29
|
+
contextWindow: number;
|
|
30
|
+
/** Enable history compression. Omit for no compression. */
|
|
31
|
+
compress?: CompressOptions;
|
|
32
|
+
/** Enable tool result truncation. Omit for no truncation. */
|
|
33
|
+
truncate?: TruncateOptions;
|
|
34
|
+
/** Optional tokenizer for precise per-message token counting. */
|
|
35
|
+
tokenizer?: (messages: unknown[]) => number;
|
|
36
|
+
/** Hook called after compression occurs. */
|
|
37
|
+
onCompress?: (summary: string, truncatedCount: number) => void;
|
|
38
|
+
}
|
|
39
|
+
//#endregion
|
|
40
|
+
//#region src/adapter.d.ts
|
|
41
|
+
/**
|
|
42
|
+
* Converts an AI SDK V3 prompt to context-chef IR messages.
|
|
43
|
+
*
|
|
44
|
+
* Original AI SDK content is stored in `_originalContent` for lossless round-trip.
|
|
45
|
+
* `_originalText` caches the extracted text so `toAISDK` can detect Janitor modifications.
|
|
46
|
+
* `_providerOptions` preserves message-level provider options (e.g. Anthropic cache control).
|
|
47
|
+
*/
|
|
48
|
+
declare function fromAISDK(prompt: LanguageModelV3Prompt): Message[];
|
|
49
|
+
/**
|
|
50
|
+
* Converts context-chef IR messages back to AI SDK V3 prompt format.
|
|
51
|
+
*
|
|
52
|
+
* Uses `_originalContent` when content is unmodified (detected via `_originalText`).
|
|
53
|
+
* Falls back to constructing from IR fields when content was modified by Janitor
|
|
54
|
+
* (e.g. compact() cleared tool results) or for new messages (e.g. compression summaries).
|
|
55
|
+
*/
|
|
56
|
+
declare function toAISDK(messages: Message[]): LanguageModelV3Prompt;
|
|
57
|
+
//#endregion
|
|
58
|
+
//#region src/middleware.d.ts
|
|
59
|
+
/**
|
|
60
|
+
* Creates a LanguageModelMiddleware that transparently applies
|
|
61
|
+
* context-chef compression and truncation to AI SDK model calls.
|
|
62
|
+
*
|
|
63
|
+
* The middleware holds a stateful Janitor instance that tracks
|
|
64
|
+
* token usage across calls for compression decisions.
|
|
65
|
+
*/
|
|
66
|
+
declare function createMiddleware(options: ContextChefOptions): LanguageModelMiddleware;
|
|
67
|
+
//#endregion
|
|
68
|
+
//#region src/index.d.ts
|
|
69
|
+
/**
|
|
70
|
+
* Wraps an AI SDK language model with context-chef middleware for
|
|
71
|
+
* transparent history compression, tool result truncation, and token budget management.
|
|
72
|
+
*
|
|
73
|
+
* @example
|
|
74
|
+
* ```typescript
|
|
75
|
+
* import { withContextChef } from '@context-chef/ai-sdk-middleware';
|
|
76
|
+
* import { openai } from '@ai-sdk/openai';
|
|
77
|
+
* import { generateText } from 'ai';
|
|
78
|
+
*
|
|
79
|
+
* const model = withContextChef(openai('gpt-4o'), {
|
|
80
|
+
* contextWindow: 128_000,
|
|
81
|
+
* compress: { model: openai('gpt-4o-mini') },
|
|
82
|
+
* truncate: { threshold: 5000, headChars: 500, tailChars: 1000 },
|
|
83
|
+
* });
|
|
84
|
+
*
|
|
85
|
+
* // Use exactly like normal — zero other code changes
|
|
86
|
+
* const result = await generateText({ model, messages, tools });
|
|
87
|
+
* ```
|
|
88
|
+
*/
|
|
89
|
+
declare function withContextChef(model: LanguageModelV3, options: ContextChefOptions): LanguageModelV3;
|
|
90
|
+
//#endregion
|
|
91
|
+
export { type CompressOptions, type ContextChefOptions, type TruncateOptions, createMiddleware, fromAISDK, toAISDK, withContextChef };
|
|
92
|
+
//# sourceMappingURL=index.d.mts.map
|
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,367 @@
|
|
|
1
|
+
import { generateText, wrapLanguageModel } from "ai";
|
|
2
|
+
import { Janitor, Offloader } from "@context-chef/core";
|
|
3
|
+
|
|
4
|
+
//#region src/adapter.ts
|
|
5
|
+
/**
|
|
6
|
+
* Converts an AI SDK V3 prompt to context-chef IR messages.
|
|
7
|
+
*
|
|
8
|
+
* Original AI SDK content is stored in `_originalContent` for lossless round-trip.
|
|
9
|
+
* `_originalText` caches the extracted text so `toAISDK` can detect Janitor modifications.
|
|
10
|
+
* `_providerOptions` preserves message-level provider options (e.g. Anthropic cache control).
|
|
11
|
+
*/
|
|
12
|
+
function fromAISDK(prompt) {
|
|
13
|
+
const messages = [];
|
|
14
|
+
for (const msg of prompt) {
|
|
15
|
+
if (msg.role === "system") {
|
|
16
|
+
messages.push({
|
|
17
|
+
role: "system",
|
|
18
|
+
content: msg.content,
|
|
19
|
+
...msg.providerOptions ? { _providerOptions: msg.providerOptions } : {}
|
|
20
|
+
});
|
|
21
|
+
continue;
|
|
22
|
+
}
|
|
23
|
+
if (msg.role === "user") {
|
|
24
|
+
const text = msg.content.filter((p) => p.type === "text").map((p) => p.text).join("\n");
|
|
25
|
+
messages.push({
|
|
26
|
+
role: "user",
|
|
27
|
+
content: text,
|
|
28
|
+
_originalContent: msg.content,
|
|
29
|
+
_originalText: text,
|
|
30
|
+
...msg.providerOptions ? { _providerOptions: msg.providerOptions } : {}
|
|
31
|
+
});
|
|
32
|
+
continue;
|
|
33
|
+
}
|
|
34
|
+
if (msg.role === "assistant") {
|
|
35
|
+
const text = [];
|
|
36
|
+
const toolCalls = [];
|
|
37
|
+
let thinking;
|
|
38
|
+
for (const part of msg.content) if (part.type === "text") text.push(part.text);
|
|
39
|
+
else if (part.type === "tool-call") toolCalls.push({
|
|
40
|
+
id: part.toolCallId,
|
|
41
|
+
type: "function",
|
|
42
|
+
function: {
|
|
43
|
+
name: part.toolName,
|
|
44
|
+
arguments: typeof part.input === "string" ? part.input : JSON.stringify(part.input)
|
|
45
|
+
}
|
|
46
|
+
});
|
|
47
|
+
else if (part.type === "reasoning") thinking = { thinking: part.text };
|
|
48
|
+
const joinedText = text.join("\n");
|
|
49
|
+
const m = {
|
|
50
|
+
role: "assistant",
|
|
51
|
+
content: joinedText,
|
|
52
|
+
_originalContent: msg.content,
|
|
53
|
+
_originalText: joinedText,
|
|
54
|
+
...msg.providerOptions ? { _providerOptions: msg.providerOptions } : {}
|
|
55
|
+
};
|
|
56
|
+
if (toolCalls.length > 0) m.tool_calls = toolCalls;
|
|
57
|
+
if (thinking) m.thinking = thinking;
|
|
58
|
+
messages.push(m);
|
|
59
|
+
continue;
|
|
60
|
+
}
|
|
61
|
+
if (msg.role === "tool") {
|
|
62
|
+
for (const part of msg.content) if (part.type === "tool-result") {
|
|
63
|
+
const text = stringifyToolOutput(part.output);
|
|
64
|
+
messages.push({
|
|
65
|
+
role: "tool",
|
|
66
|
+
content: text,
|
|
67
|
+
tool_call_id: part.toolCallId,
|
|
68
|
+
_originalContent: [part],
|
|
69
|
+
_originalText: text,
|
|
70
|
+
_toolName: part.toolName
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
return messages;
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Converts context-chef IR messages back to AI SDK V3 prompt format.
|
|
79
|
+
*
|
|
80
|
+
* Uses `_originalContent` when content is unmodified (detected via `_originalText`).
|
|
81
|
+
* Falls back to constructing from IR fields when content was modified by Janitor
|
|
82
|
+
* (e.g. compact() cleared tool results) or for new messages (e.g. compression summaries).
|
|
83
|
+
*/
|
|
84
|
+
function toAISDK(messages) {
|
|
85
|
+
const prompt = [];
|
|
86
|
+
let i = 0;
|
|
87
|
+
while (i < messages.length) {
|
|
88
|
+
const msg = messages[i];
|
|
89
|
+
const providerOptions = msg._providerOptions;
|
|
90
|
+
const contentModified = msg._originalText !== void 0 && msg._originalText !== msg.content;
|
|
91
|
+
if (msg.role === "system") {
|
|
92
|
+
prompt.push({
|
|
93
|
+
role: "system",
|
|
94
|
+
content: msg.content,
|
|
95
|
+
...providerOptions ? { providerOptions } : {}
|
|
96
|
+
});
|
|
97
|
+
i++;
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
if (msg.role === "user") {
|
|
101
|
+
const content = !contentModified && Array.isArray(msg._originalContent) ? msg._originalContent : [{
|
|
102
|
+
type: "text",
|
|
103
|
+
text: msg.content
|
|
104
|
+
}];
|
|
105
|
+
prompt.push({
|
|
106
|
+
role: "user",
|
|
107
|
+
content,
|
|
108
|
+
...providerOptions ? { providerOptions } : {}
|
|
109
|
+
});
|
|
110
|
+
i++;
|
|
111
|
+
continue;
|
|
112
|
+
}
|
|
113
|
+
if (msg.role === "assistant") {
|
|
114
|
+
const content = !contentModified && Array.isArray(msg._originalContent) ? msg._originalContent : [{
|
|
115
|
+
type: "text",
|
|
116
|
+
text: msg.content
|
|
117
|
+
}];
|
|
118
|
+
prompt.push({
|
|
119
|
+
role: "assistant",
|
|
120
|
+
content,
|
|
121
|
+
...providerOptions ? { providerOptions } : {}
|
|
122
|
+
});
|
|
123
|
+
i++;
|
|
124
|
+
continue;
|
|
125
|
+
}
|
|
126
|
+
if (msg.role === "tool") {
|
|
127
|
+
const toolResults = [];
|
|
128
|
+
while (i < messages.length && messages[i].role === "tool") {
|
|
129
|
+
const toolMsg = messages[i];
|
|
130
|
+
if (!(toolMsg._originalText !== void 0 && toolMsg._originalText !== toolMsg.content) && toolMsg._originalContent) toolResults.push(...toolMsg._originalContent);
|
|
131
|
+
else toolResults.push({
|
|
132
|
+
type: "tool-result",
|
|
133
|
+
toolCallId: toolMsg.tool_call_id ?? "",
|
|
134
|
+
toolName: toolMsg._toolName ?? "unknown",
|
|
135
|
+
output: {
|
|
136
|
+
type: "text",
|
|
137
|
+
value: toolMsg.content
|
|
138
|
+
}
|
|
139
|
+
});
|
|
140
|
+
i++;
|
|
141
|
+
}
|
|
142
|
+
prompt.push({
|
|
143
|
+
role: "tool",
|
|
144
|
+
content: toolResults
|
|
145
|
+
});
|
|
146
|
+
continue;
|
|
147
|
+
}
|
|
148
|
+
i++;
|
|
149
|
+
}
|
|
150
|
+
return prompt;
|
|
151
|
+
}
|
|
152
|
+
function stringifyToolOutput(output) {
|
|
153
|
+
switch (output.type) {
|
|
154
|
+
case "text":
|
|
155
|
+
case "error-text": return output.value;
|
|
156
|
+
case "json":
|
|
157
|
+
case "error-json": return JSON.stringify(output.value);
|
|
158
|
+
case "content": return output.value.map((v) => v.type === "text" ? v.text ?? "" : "").filter(Boolean).join("\n");
|
|
159
|
+
default: return JSON.stringify(output);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
//#endregion
|
|
164
|
+
//#region src/truncator.ts
|
|
165
|
+
/**
|
|
166
|
+
* Truncates tool-result content within an AI SDK prompt when it exceeds the configured threshold.
|
|
167
|
+
* When a storage adapter is provided, original content is persisted and a URI is included in the output.
|
|
168
|
+
*/
|
|
169
|
+
async function truncateToolResults(prompt, options) {
|
|
170
|
+
const { threshold, headChars = 0, tailChars = 1e3, storage } = options;
|
|
171
|
+
const offloader = storage ? new Offloader({
|
|
172
|
+
threshold,
|
|
173
|
+
adapter: storage,
|
|
174
|
+
storageDir: ""
|
|
175
|
+
}) : null;
|
|
176
|
+
const result = [];
|
|
177
|
+
for (const msg of prompt) {
|
|
178
|
+
if (msg.role !== "tool") {
|
|
179
|
+
result.push(msg);
|
|
180
|
+
continue;
|
|
181
|
+
}
|
|
182
|
+
const newContent = [];
|
|
183
|
+
for (const part of msg.content) {
|
|
184
|
+
if (part.type !== "tool-result") {
|
|
185
|
+
newContent.push(part);
|
|
186
|
+
continue;
|
|
187
|
+
}
|
|
188
|
+
const text = extractText(part.output);
|
|
189
|
+
if (text.length <= threshold || headChars + tailChars >= text.length) {
|
|
190
|
+
newContent.push(part);
|
|
191
|
+
continue;
|
|
192
|
+
}
|
|
193
|
+
if (offloader) try {
|
|
194
|
+
const vfsResult = await offloader.offloadAsync(text, {
|
|
195
|
+
threshold,
|
|
196
|
+
headChars,
|
|
197
|
+
tailChars
|
|
198
|
+
});
|
|
199
|
+
newContent.push({
|
|
200
|
+
...part,
|
|
201
|
+
output: {
|
|
202
|
+
type: "text",
|
|
203
|
+
value: vfsResult.content
|
|
204
|
+
}
|
|
205
|
+
});
|
|
206
|
+
continue;
|
|
207
|
+
} catch (error) {
|
|
208
|
+
console.warn(`[context-chef] Storage adapter write failed for tool result (${part.toolCallId}). Falling back to simple truncation. Error: ${error instanceof Error ? error.message : String(error)}`);
|
|
209
|
+
}
|
|
210
|
+
const head = text.slice(0, headChars);
|
|
211
|
+
const tail = text.slice(text.length - tailChars);
|
|
212
|
+
const truncated = [
|
|
213
|
+
head,
|
|
214
|
+
`\n--- truncated (${text.split("\n").length} lines, ${text.length} chars total) ---\n`,
|
|
215
|
+
tail
|
|
216
|
+
].filter(Boolean).join("").trim();
|
|
217
|
+
newContent.push({
|
|
218
|
+
...part,
|
|
219
|
+
output: {
|
|
220
|
+
type: "text",
|
|
221
|
+
value: truncated
|
|
222
|
+
}
|
|
223
|
+
});
|
|
224
|
+
}
|
|
225
|
+
result.push({
|
|
226
|
+
...msg,
|
|
227
|
+
content: newContent
|
|
228
|
+
});
|
|
229
|
+
}
|
|
230
|
+
return result;
|
|
231
|
+
}
|
|
232
|
+
function extractText(output) {
|
|
233
|
+
switch (output.type) {
|
|
234
|
+
case "text":
|
|
235
|
+
case "error-text": return output.value;
|
|
236
|
+
case "json":
|
|
237
|
+
case "error-json": return JSON.stringify(output.value);
|
|
238
|
+
case "content": return output.value.map((v) => v.type === "text" ? v.text ?? "" : "").filter(Boolean).join("\n");
|
|
239
|
+
default: return "";
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
//#endregion
|
|
244
|
+
//#region src/middleware.ts
|
|
245
|
+
/**
|
|
246
|
+
* Creates a LanguageModelMiddleware that transparently applies
|
|
247
|
+
* context-chef compression and truncation to AI SDK model calls.
|
|
248
|
+
*
|
|
249
|
+
* The middleware holds a stateful Janitor instance that tracks
|
|
250
|
+
* token usage across calls for compression decisions.
|
|
251
|
+
*/
|
|
252
|
+
function createMiddleware(options) {
|
|
253
|
+
let usageWarned = false;
|
|
254
|
+
const janitor = new Janitor({
|
|
255
|
+
contextWindow: options.contextWindow,
|
|
256
|
+
tokenizer: options.tokenizer ? (msgs) => options.tokenizer?.(msgs) ?? 0 : void 0,
|
|
257
|
+
preserveRatio: options.compress?.preserveRatio ?? .8,
|
|
258
|
+
compressionModel: options.compress?.model ? createCompressionAdapter(options.compress.model) : void 0,
|
|
259
|
+
onCompress: options.onCompress ? (summary, count) => options.onCompress?.(summary.content, count) : void 0
|
|
260
|
+
});
|
|
261
|
+
return {
|
|
262
|
+
specificationVersion: "v3",
|
|
263
|
+
transformParams: async ({ params }) => {
|
|
264
|
+
let { prompt } = params;
|
|
265
|
+
if (options.truncate) prompt = await truncateToolResults(prompt, options.truncate);
|
|
266
|
+
const irMessages = fromAISDK(prompt);
|
|
267
|
+
const compressed = await janitor.compress(irMessages);
|
|
268
|
+
if (compressed !== irMessages) prompt = toAISDK(compressed);
|
|
269
|
+
return {
|
|
270
|
+
...params,
|
|
271
|
+
prompt
|
|
272
|
+
};
|
|
273
|
+
},
|
|
274
|
+
wrapGenerate: async ({ doGenerate }) => {
|
|
275
|
+
const result = await doGenerate();
|
|
276
|
+
if (result.usage?.inputTokens?.total != null) janitor.feedTokenUsage(result.usage.inputTokens.total);
|
|
277
|
+
else if (!usageWarned && !options.tokenizer) {
|
|
278
|
+
usageWarned = true;
|
|
279
|
+
console.warn("[context-chef] Model response did not include usage.inputTokens.total. Token-based compression may not trigger accurately. Consider providing a tokenizer for precise token counting.");
|
|
280
|
+
}
|
|
281
|
+
return result;
|
|
282
|
+
},
|
|
283
|
+
wrapStream: async ({ doStream }) => {
|
|
284
|
+
const { stream, ...rest } = await doStream();
|
|
285
|
+
const transform = new TransformStream({ transform(chunk, controller) {
|
|
286
|
+
if (chunk.type === "finish") {
|
|
287
|
+
if (chunk.usage?.inputTokens?.total != null) janitor.feedTokenUsage(chunk.usage.inputTokens.total);
|
|
288
|
+
else if (!usageWarned && !options.tokenizer) {
|
|
289
|
+
usageWarned = true;
|
|
290
|
+
console.warn("[context-chef] Stream finish did not include usage.inputTokens.total. Token-based compression may not trigger accurately. Consider providing a tokenizer for precise token counting.");
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
controller.enqueue(chunk);
|
|
294
|
+
} });
|
|
295
|
+
return {
|
|
296
|
+
...rest,
|
|
297
|
+
stream: stream.pipeThrough(transform)
|
|
298
|
+
};
|
|
299
|
+
}
|
|
300
|
+
};
|
|
301
|
+
}
|
|
302
|
+
/**
|
|
303
|
+
* Adapts an AI SDK LanguageModelV3 into the compressionModel callback
|
|
304
|
+
* that Janitor expects: (messages: Message[]) => Promise<string>
|
|
305
|
+
*
|
|
306
|
+
* Tool messages are converted to user messages describing the tool interaction,
|
|
307
|
+
* since generateText only accepts system/user/assistant roles.
|
|
308
|
+
*/
|
|
309
|
+
function createCompressionAdapter(model) {
|
|
310
|
+
return async (messages) => {
|
|
311
|
+
const { text } = await generateText({
|
|
312
|
+
model,
|
|
313
|
+
messages: messages.map((m) => {
|
|
314
|
+
if (m.role === "tool") return {
|
|
315
|
+
role: "user",
|
|
316
|
+
content: `[Tool result${m.tool_call_id ? ` (${m.tool_call_id})` : ""}: ${m.content}]`
|
|
317
|
+
};
|
|
318
|
+
if (m.role === "assistant" && m.tool_calls?.length) {
|
|
319
|
+
const toolCallsDesc = m.tool_calls.map((tc) => `[Called tool: ${tc.function.name}(${tc.function.arguments})]`).join("\n");
|
|
320
|
+
return {
|
|
321
|
+
role: "assistant",
|
|
322
|
+
content: m.content ? `${m.content}\n${toolCallsDesc}` : toolCallsDesc
|
|
323
|
+
};
|
|
324
|
+
}
|
|
325
|
+
return {
|
|
326
|
+
role: m.role,
|
|
327
|
+
content: m.content
|
|
328
|
+
};
|
|
329
|
+
}),
|
|
330
|
+
maxOutputTokens: 2048
|
|
331
|
+
});
|
|
332
|
+
return text || "[Compression produced no output]";
|
|
333
|
+
};
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
//#endregion
|
|
337
|
+
//#region src/index.ts
|
|
338
|
+
/**
|
|
339
|
+
* Wraps an AI SDK language model with context-chef middleware for
|
|
340
|
+
* transparent history compression, tool result truncation, and token budget management.
|
|
341
|
+
*
|
|
342
|
+
* @example
|
|
343
|
+
* ```typescript
|
|
344
|
+
* import { withContextChef } from '@context-chef/ai-sdk-middleware';
|
|
345
|
+
* import { openai } from '@ai-sdk/openai';
|
|
346
|
+
* import { generateText } from 'ai';
|
|
347
|
+
*
|
|
348
|
+
* const model = withContextChef(openai('gpt-4o'), {
|
|
349
|
+
* contextWindow: 128_000,
|
|
350
|
+
* compress: { model: openai('gpt-4o-mini') },
|
|
351
|
+
* truncate: { threshold: 5000, headChars: 500, tailChars: 1000 },
|
|
352
|
+
* });
|
|
353
|
+
*
|
|
354
|
+
* // Use exactly like normal — zero other code changes
|
|
355
|
+
* const result = await generateText({ model, messages, tools });
|
|
356
|
+
* ```
|
|
357
|
+
*/
|
|
358
|
+
function withContextChef(model, options) {
|
|
359
|
+
return wrapLanguageModel({
|
|
360
|
+
model,
|
|
361
|
+
middleware: createMiddleware(options)
|
|
362
|
+
});
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
//#endregion
|
|
366
|
+
export { createMiddleware, fromAISDK, toAISDK, withContextChef };
|
|
367
|
+
//# sourceMappingURL=index.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.mjs","names":[],"sources":["../src/adapter.ts","../src/truncator.ts","../src/middleware.ts","../src/index.ts"],"sourcesContent":["import type {\n LanguageModelV3Prompt,\n LanguageModelV3ToolResultOutput,\n LanguageModelV3ToolResultPart,\n SharedV3ProviderOptions,\n} from '@ai-sdk/provider';\nimport type { Message, ToolCall } from '@context-chef/core';\n\n/**\n * Converts an AI SDK V3 prompt to context-chef IR messages.\n *\n * Original AI SDK content is stored in `_originalContent` for lossless round-trip.\n * `_originalText` caches the extracted text so `toAISDK` can detect Janitor modifications.\n * `_providerOptions` preserves message-level provider options (e.g. Anthropic cache control).\n */\nexport function fromAISDK(prompt: LanguageModelV3Prompt): Message[] {\n const messages: Message[] = [];\n\n for (const msg of prompt) {\n if (msg.role === 'system') {\n messages.push({\n role: 'system',\n content: msg.content,\n ...(msg.providerOptions ? { _providerOptions: msg.providerOptions } : {}),\n });\n continue;\n }\n\n if (msg.role === 'user') {\n const text = msg.content\n .filter((p) => p.type === 'text')\n .map((p) => p.text)\n .join('\\n');\n messages.push({\n role: 'user',\n content: text,\n _originalContent: msg.content,\n _originalText: text,\n ...(msg.providerOptions ? { _providerOptions: msg.providerOptions } : {}),\n });\n continue;\n }\n\n if (msg.role === 'assistant') {\n const text: string[] = [];\n const toolCalls: ToolCall[] = [];\n let thinking: { thinking: string } | undefined;\n\n for (const part of msg.content) {\n if (part.type === 'text') text.push(part.text);\n else if (part.type === 'tool-call') {\n toolCalls.push({\n id: part.toolCallId,\n type: 'function',\n function: {\n name: part.toolName,\n arguments: typeof part.input === 'string' ? part.input : JSON.stringify(part.input),\n },\n });\n } else if (part.type === 'reasoning') {\n thinking = { thinking: part.text };\n }\n }\n\n const joinedText = text.join('\\n');\n const m: Message = {\n role: 'assistant',\n content: joinedText,\n _originalContent: msg.content,\n _originalText: joinedText,\n ...(msg.providerOptions ? { _providerOptions: msg.providerOptions } : {}),\n };\n if (toolCalls.length > 0) m.tool_calls = toolCalls;\n if (thinking) m.thinking = thinking;\n messages.push(m);\n continue;\n }\n\n if (msg.role === 'tool') {\n for (const part of msg.content) {\n if (part.type === 'tool-result') {\n const text = stringifyToolOutput(part.output);\n messages.push({\n role: 'tool',\n content: text,\n tool_call_id: part.toolCallId,\n _originalContent: [part],\n _originalText: text,\n _toolName: part.toolName,\n });\n }\n }\n }\n }\n\n return messages;\n}\n\n/**\n * Converts context-chef IR messages back to AI SDK V3 prompt format.\n *\n * Uses `_originalContent` when content is unmodified (detected via `_originalText`).\n * Falls back to constructing from IR fields when content was modified by Janitor\n * (e.g. compact() cleared tool results) or for new messages (e.g. compression summaries).\n */\nexport function toAISDK(messages: Message[]): LanguageModelV3Prompt {\n const prompt: LanguageModelV3Prompt = [];\n\n let i = 0;\n while (i < messages.length) {\n const msg = messages[i];\n const providerOptions = msg._providerOptions as SharedV3ProviderOptions | undefined;\n const contentModified = msg._originalText !== undefined && msg._originalText !== msg.content;\n\n if (msg.role === 'system') {\n prompt.push({\n role: 'system',\n content: msg.content,\n ...(providerOptions ? { providerOptions } : {}),\n });\n i++;\n continue;\n }\n\n if (msg.role === 'user') {\n const content =\n !contentModified && Array.isArray(msg._originalContent)\n ? (msg._originalContent as any)\n : [{ type: 'text' as const, text: msg.content }];\n prompt.push({\n role: 'user',\n content,\n ...(providerOptions ? { providerOptions } : {}),\n });\n i++;\n continue;\n }\n\n if (msg.role === 'assistant') {\n const content =\n !contentModified && Array.isArray(msg._originalContent)\n ? (msg._originalContent as any)\n : [{ type: 'text' as const, text: msg.content }];\n prompt.push({\n role: 'assistant',\n content,\n ...(providerOptions ? { providerOptions } : {}),\n });\n i++;\n continue;\n }\n\n if (msg.role === 'tool') {\n const toolResults: LanguageModelV3ToolResultPart[] = [];\n while (i < messages.length && messages[i].role === 'tool') {\n const toolMsg = messages[i];\n const toolModified =\n toolMsg._originalText !== undefined && toolMsg._originalText !== toolMsg.content;\n\n if (!toolModified && toolMsg._originalContent) {\n toolResults.push(...(toolMsg._originalContent as LanguageModelV3ToolResultPart[]));\n } else {\n toolResults.push({\n type: 'tool-result',\n toolCallId: toolMsg.tool_call_id ?? '',\n toolName: (toolMsg._toolName as string) ?? 'unknown',\n output: { type: 'text', value: toolMsg.content },\n });\n }\n i++;\n }\n prompt.push({ role: 'tool', content: toolResults });\n continue;\n }\n\n i++;\n }\n\n return prompt;\n}\n\nfunction stringifyToolOutput(output: LanguageModelV3ToolResultOutput): string {\n switch (output.type) {\n case 'text':\n case 'error-text':\n return output.value;\n case 'json':\n case 'error-json':\n return JSON.stringify(output.value);\n case 'content':\n return output.value\n .map((v: { type: string; text?: string }) => (v.type === 'text' ? (v.text ?? '') : ''))\n .filter(Boolean)\n .join('\\n');\n default:\n return JSON.stringify(output);\n }\n}\n","import type {\n LanguageModelV3Prompt,\n LanguageModelV3ToolResultOutput,\n LanguageModelV3ToolResultPart,\n} from '@ai-sdk/provider';\nimport { Offloader } from '@context-chef/core';\nimport type { TruncateOptions } from './types';\n\n/**\n * Truncates tool-result content within an AI SDK prompt when it exceeds the configured threshold.\n * When a storage adapter is provided, original content is persisted and a URI is included in the output.\n */\nexport async function truncateToolResults(\n prompt: LanguageModelV3Prompt,\n options: TruncateOptions,\n): Promise<LanguageModelV3Prompt> {\n const { threshold, headChars = 0, tailChars = 1000, storage } = options;\n\n const offloader = storage\n ? new Offloader({ threshold, adapter: storage, storageDir: '' })\n : null;\n\n const result: LanguageModelV3Prompt = [];\n\n for (const msg of prompt) {\n if (msg.role !== 'tool') {\n result.push(msg);\n continue;\n }\n\n const newContent: typeof msg.content = [];\n\n for (const part of msg.content) {\n if (part.type !== 'tool-result') {\n newContent.push(part);\n continue;\n }\n\n const text = extractText(part.output);\n if (text.length <= threshold || headChars + tailChars >= text.length) {\n newContent.push(part);\n continue;\n }\n\n // With storage: use Offloader to persist original and get a URI-annotated truncation\n if (offloader) {\n try {\n const vfsResult = await offloader.offloadAsync(text, { threshold, headChars, tailChars });\n newContent.push({\n ...part,\n output: { type: 'text', value: vfsResult.content } satisfies LanguageModelV3ToolResultOutput,\n } satisfies LanguageModelV3ToolResultPart);\n continue;\n } catch (error) {\n console.warn(\n `[context-chef] Storage adapter write failed for tool result (${part.toolCallId}). ` +\n `Falling back to simple truncation. Error: ${error instanceof Error ? error.message : String(error)}`,\n );\n // Fall through to simple truncation below\n }\n }\n\n // Without storage: simple truncation, original is discarded\n const head = text.slice(0, headChars);\n const tail = text.slice(text.length - tailChars);\n const totalLines = text.split('\\n').length;\n\n const truncated = [\n head,\n `\\n--- truncated (${totalLines} lines, ${text.length} chars total) ---\\n`,\n tail,\n ]\n .filter(Boolean)\n .join('')\n .trim();\n\n newContent.push({\n ...part,\n output: { type: 'text', value: truncated } satisfies LanguageModelV3ToolResultOutput,\n } satisfies LanguageModelV3ToolResultPart);\n }\n\n result.push({ ...msg, content: newContent });\n }\n\n return result;\n}\n\nfunction extractText(output: LanguageModelV3ToolResultOutput): string {\n switch (output.type) {\n case 'text':\n case 'error-text':\n return output.value;\n case 'json':\n case 'error-json':\n return JSON.stringify(output.value);\n case 'content':\n return output.value\n .map((v: { type: string; text?: string }) => (v.type === 'text' ? (v.text ?? '') : ''))\n .filter(Boolean)\n .join('\\n');\n default:\n return '';\n }\n}\n","import type { LanguageModelV3, LanguageModelV3StreamPart } from '@ai-sdk/provider';\nimport { generateText, type LanguageModelMiddleware } from 'ai';\nimport { Janitor, type Message } from '@context-chef/core';\n\nimport { fromAISDK, toAISDK } from './adapter';\nimport { truncateToolResults } from './truncator';\nimport type { ContextChefOptions } from './types';\n\n/**\n * Creates a LanguageModelMiddleware that transparently applies\n * context-chef compression and truncation to AI SDK model calls.\n *\n * The middleware holds a stateful Janitor instance that tracks\n * token usage across calls for compression decisions.\n */\nexport function createMiddleware(options: ContextChefOptions): LanguageModelMiddleware {\n let usageWarned = false;\n\n const janitor = new Janitor({\n contextWindow: options.contextWindow,\n tokenizer: options.tokenizer ? (msgs: Message[]) => options.tokenizer?.(msgs) ?? 0 : undefined,\n preserveRatio: options.compress?.preserveRatio ?? 0.8,\n compressionModel: options.compress?.model\n ? createCompressionAdapter(options.compress.model)\n : undefined,\n onCompress: options.onCompress\n ? (summary, count) => options.onCompress?.(summary.content, count)\n : undefined,\n });\n\n return {\n specificationVersion: 'v3',\n\n transformParams: async ({ params }) => {\n let { prompt } = params;\n\n // 1. Truncate large tool results\n if (options.truncate) {\n prompt = await truncateToolResults(prompt, options.truncate);\n }\n\n // 2. Compress history if over token budget\n const irMessages = fromAISDK(prompt);\n const compressed = await janitor.compress(irMessages);\n\n // Only convert back if compression actually changed something\n if (compressed !== irMessages) {\n prompt = toAISDK(compressed);\n }\n\n return { ...params, prompt };\n },\n\n wrapGenerate: async ({ doGenerate }) => {\n const result = await doGenerate();\n\n if (result.usage?.inputTokens?.total != null) {\n janitor.feedTokenUsage(result.usage.inputTokens.total);\n } else if (!usageWarned && !options.tokenizer) {\n usageWarned = true;\n console.warn(\n '[context-chef] Model response did not include usage.inputTokens.total. ' +\n 'Token-based compression may not trigger accurately. ' +\n 'Consider providing a tokenizer for precise token counting.',\n );\n }\n\n return result;\n },\n\n wrapStream: async ({ doStream }) => {\n const { stream, ...rest } = await doStream();\n\n const transform = new TransformStream<LanguageModelV3StreamPart, LanguageModelV3StreamPart>({\n transform(chunk, controller) {\n if (chunk.type === 'finish') {\n if (chunk.usage?.inputTokens?.total != null) {\n janitor.feedTokenUsage(chunk.usage.inputTokens.total);\n } else if (!usageWarned && !options.tokenizer) {\n usageWarned = true;\n console.warn(\n '[context-chef] Stream finish did not include usage.inputTokens.total. ' +\n 'Token-based compression may not trigger accurately. ' +\n 'Consider providing a tokenizer for precise token counting.',\n );\n }\n }\n controller.enqueue(chunk);\n },\n });\n\n return { ...rest, stream: stream.pipeThrough(transform) };\n },\n };\n}\n\n/**\n * Adapts an AI SDK LanguageModelV3 into the compressionModel callback\n * that Janitor expects: (messages: Message[]) => Promise<string>\n *\n * Tool messages are converted to user messages describing the tool interaction,\n * since generateText only accepts system/user/assistant roles.\n */\nfunction createCompressionAdapter(model: LanguageModelV3): (messages: Message[]) => Promise<string> {\n return async (messages: Message[]): Promise<string> => {\n const formatted = messages.map((m) => {\n if (m.role === 'tool') {\n return {\n role: 'user' as const,\n content: `[Tool result${m.tool_call_id ? ` (${m.tool_call_id})` : ''}: ${m.content}]`,\n };\n }\n // assistant messages with tool_calls: include tool call info in content\n if (m.role === 'assistant' && m.tool_calls?.length) {\n const toolCallsDesc = m.tool_calls\n .map((tc) => `[Called tool: ${tc.function.name}(${tc.function.arguments})]`)\n .join('\\n');\n return {\n role: 'assistant' as const,\n content: m.content ? `${m.content}\\n${toolCallsDesc}` : toolCallsDesc,\n };\n }\n return {\n role: m.role as 'system' | 'user' | 'assistant',\n content: m.content,\n };\n });\n\n const { text } = await generateText({\n model,\n messages: formatted,\n maxOutputTokens: 2048,\n });\n\n return text || '[Compression produced no output]';\n };\n}\n","import type { LanguageModelV3 } from '@ai-sdk/provider';\nimport { wrapLanguageModel } from 'ai';\n\nimport { createMiddleware } from './middleware';\nimport type { ContextChefOptions } from './types';\n\nexport { fromAISDK, toAISDK } from './adapter';\nexport { createMiddleware } from './middleware';\nexport type { CompressOptions, ContextChefOptions, TruncateOptions } from './types';\n\n/**\n * Wraps an AI SDK language model with context-chef middleware for\n * transparent history compression, tool result truncation, and token budget management.\n *\n * @example\n * ```typescript\n * import { withContextChef } from '@context-chef/ai-sdk-middleware';\n * import { openai } from '@ai-sdk/openai';\n * import { generateText } from 'ai';\n *\n * const model = withContextChef(openai('gpt-4o'), {\n * contextWindow: 128_000,\n * compress: { model: openai('gpt-4o-mini') },\n * truncate: { threshold: 5000, headChars: 500, tailChars: 1000 },\n * });\n *\n * // Use exactly like normal — zero other code changes\n * const result = await generateText({ model, messages, tools });\n * ```\n */\nexport function withContextChef(model: LanguageModelV3, options: ContextChefOptions): LanguageModelV3 {\n const middleware = createMiddleware(options);\n return wrapLanguageModel({ model, middleware });\n}\n"],"mappings":";;;;;;;;;;;AAeA,SAAgB,UAAU,QAA0C;CAClE,MAAM,WAAsB,EAAE;AAE9B,MAAK,MAAM,OAAO,QAAQ;AACxB,MAAI,IAAI,SAAS,UAAU;AACzB,YAAS,KAAK;IACZ,MAAM;IACN,SAAS,IAAI;IACb,GAAI,IAAI,kBAAkB,EAAE,kBAAkB,IAAI,iBAAiB,GAAG,EAAE;IACzE,CAAC;AACF;;AAGF,MAAI,IAAI,SAAS,QAAQ;GACvB,MAAM,OAAO,IAAI,QACd,QAAQ,MAAM,EAAE,SAAS,OAAO,CAChC,KAAK,MAAM,EAAE,KAAK,CAClB,KAAK,KAAK;AACb,YAAS,KAAK;IACZ,MAAM;IACN,SAAS;IACT,kBAAkB,IAAI;IACtB,eAAe;IACf,GAAI,IAAI,kBAAkB,EAAE,kBAAkB,IAAI,iBAAiB,GAAG,EAAE;IACzE,CAAC;AACF;;AAGF,MAAI,IAAI,SAAS,aAAa;GAC5B,MAAM,OAAiB,EAAE;GACzB,MAAM,YAAwB,EAAE;GAChC,IAAI;AAEJ,QAAK,MAAM,QAAQ,IAAI,QACrB,KAAI,KAAK,SAAS,OAAQ,MAAK,KAAK,KAAK,KAAK;YACrC,KAAK,SAAS,YACrB,WAAU,KAAK;IACb,IAAI,KAAK;IACT,MAAM;IACN,UAAU;KACR,MAAM,KAAK;KACX,WAAW,OAAO,KAAK,UAAU,WAAW,KAAK,QAAQ,KAAK,UAAU,KAAK,MAAM;KACpF;IACF,CAAC;YACO,KAAK,SAAS,YACvB,YAAW,EAAE,UAAU,KAAK,MAAM;GAItC,MAAM,aAAa,KAAK,KAAK,KAAK;GAClC,MAAM,IAAa;IACjB,MAAM;IACN,SAAS;IACT,kBAAkB,IAAI;IACtB,eAAe;IACf,GAAI,IAAI,kBAAkB,EAAE,kBAAkB,IAAI,iBAAiB,GAAG,EAAE;IACzE;AACD,OAAI,UAAU,SAAS,EAAG,GAAE,aAAa;AACzC,OAAI,SAAU,GAAE,WAAW;AAC3B,YAAS,KAAK,EAAE;AAChB;;AAGF,MAAI,IAAI,SAAS,QACf;QAAK,MAAM,QAAQ,IAAI,QACrB,KAAI,KAAK,SAAS,eAAe;IAC/B,MAAM,OAAO,oBAAoB,KAAK,OAAO;AAC7C,aAAS,KAAK;KACZ,MAAM;KACN,SAAS;KACT,cAAc,KAAK;KACnB,kBAAkB,CAAC,KAAK;KACxB,eAAe;KACf,WAAW,KAAK;KACjB,CAAC;;;;AAMV,QAAO;;;;;;;;;AAUT,SAAgB,QAAQ,UAA4C;CAClE,MAAM,SAAgC,EAAE;CAExC,IAAI,IAAI;AACR,QAAO,IAAI,SAAS,QAAQ;EAC1B,MAAM,MAAM,SAAS;EACrB,MAAM,kBAAkB,IAAI;EAC5B,MAAM,kBAAkB,IAAI,kBAAkB,UAAa,IAAI,kBAAkB,IAAI;AAErF,MAAI,IAAI,SAAS,UAAU;AACzB,UAAO,KAAK;IACV,MAAM;IACN,SAAS,IAAI;IACb,GAAI,kBAAkB,EAAE,iBAAiB,GAAG,EAAE;IAC/C,CAAC;AACF;AACA;;AAGF,MAAI,IAAI,SAAS,QAAQ;GACvB,MAAM,UACJ,CAAC,mBAAmB,MAAM,QAAQ,IAAI,iBAAiB,GAClD,IAAI,mBACL,CAAC;IAAE,MAAM;IAAiB,MAAM,IAAI;IAAS,CAAC;AACpD,UAAO,KAAK;IACV,MAAM;IACN;IACA,GAAI,kBAAkB,EAAE,iBAAiB,GAAG,EAAE;IAC/C,CAAC;AACF;AACA;;AAGF,MAAI,IAAI,SAAS,aAAa;GAC5B,MAAM,UACJ,CAAC,mBAAmB,MAAM,QAAQ,IAAI,iBAAiB,GAClD,IAAI,mBACL,CAAC;IAAE,MAAM;IAAiB,MAAM,IAAI;IAAS,CAAC;AACpD,UAAO,KAAK;IACV,MAAM;IACN;IACA,GAAI,kBAAkB,EAAE,iBAAiB,GAAG,EAAE;IAC/C,CAAC;AACF;AACA;;AAGF,MAAI,IAAI,SAAS,QAAQ;GACvB,MAAM,cAA+C,EAAE;AACvD,UAAO,IAAI,SAAS,UAAU,SAAS,GAAG,SAAS,QAAQ;IACzD,MAAM,UAAU,SAAS;AAIzB,QAAI,EAFF,QAAQ,kBAAkB,UAAa,QAAQ,kBAAkB,QAAQ,YAEtD,QAAQ,iBAC3B,aAAY,KAAK,GAAI,QAAQ,iBAAqD;QAElF,aAAY,KAAK;KACf,MAAM;KACN,YAAY,QAAQ,gBAAgB;KACpC,UAAW,QAAQ,aAAwB;KAC3C,QAAQ;MAAE,MAAM;MAAQ,OAAO,QAAQ;MAAS;KACjD,CAAC;AAEJ;;AAEF,UAAO,KAAK;IAAE,MAAM;IAAQ,SAAS;IAAa,CAAC;AACnD;;AAGF;;AAGF,QAAO;;AAGT,SAAS,oBAAoB,QAAiD;AAC5E,SAAQ,OAAO,MAAf;EACE,KAAK;EACL,KAAK,aACH,QAAO,OAAO;EAChB,KAAK;EACL,KAAK,aACH,QAAO,KAAK,UAAU,OAAO,MAAM;EACrC,KAAK,UACH,QAAO,OAAO,MACX,KAAK,MAAwC,EAAE,SAAS,SAAU,EAAE,QAAQ,KAAM,GAAI,CACtF,OAAO,QAAQ,CACf,KAAK,KAAK;EACf,QACE,QAAO,KAAK,UAAU,OAAO;;;;;;;;;;ACvLnC,eAAsB,oBACpB,QACA,SACgC;CAChC,MAAM,EAAE,WAAW,YAAY,GAAG,YAAY,KAAM,YAAY;CAEhE,MAAM,YAAY,UACd,IAAI,UAAU;EAAE;EAAW,SAAS;EAAS,YAAY;EAAI,CAAC,GAC9D;CAEJ,MAAM,SAAgC,EAAE;AAExC,MAAK,MAAM,OAAO,QAAQ;AACxB,MAAI,IAAI,SAAS,QAAQ;AACvB,UAAO,KAAK,IAAI;AAChB;;EAGF,MAAM,aAAiC,EAAE;AAEzC,OAAK,MAAM,QAAQ,IAAI,SAAS;AAC9B,OAAI,KAAK,SAAS,eAAe;AAC/B,eAAW,KAAK,KAAK;AACrB;;GAGF,MAAM,OAAO,YAAY,KAAK,OAAO;AACrC,OAAI,KAAK,UAAU,aAAa,YAAY,aAAa,KAAK,QAAQ;AACpE,eAAW,KAAK,KAAK;AACrB;;AAIF,OAAI,UACF,KAAI;IACF,MAAM,YAAY,MAAM,UAAU,aAAa,MAAM;KAAE;KAAW;KAAW;KAAW,CAAC;AACzF,eAAW,KAAK;KACd,GAAG;KACH,QAAQ;MAAE,MAAM;MAAQ,OAAO,UAAU;MAAS;KACnD,CAAyC;AAC1C;YACO,OAAO;AACd,YAAQ,KACN,gEAAgE,KAAK,WAAW,+CACjC,iBAAiB,QAAQ,MAAM,UAAU,OAAO,MAAM,GACtG;;GAML,MAAM,OAAO,KAAK,MAAM,GAAG,UAAU;GACrC,MAAM,OAAO,KAAK,MAAM,KAAK,SAAS,UAAU;GAGhD,MAAM,YAAY;IAChB;IACA,oBAJiB,KAAK,MAAM,KAAK,CAAC,OAIH,UAAU,KAAK,OAAO;IACrD;IACD,CACE,OAAO,QAAQ,CACf,KAAK,GAAG,CACR,MAAM;AAET,cAAW,KAAK;IACd,GAAG;IACH,QAAQ;KAAE,MAAM;KAAQ,OAAO;KAAW;IAC3C,CAAyC;;AAG5C,SAAO,KAAK;GAAE,GAAG;GAAK,SAAS;GAAY,CAAC;;AAG9C,QAAO;;AAGT,SAAS,YAAY,QAAiD;AACpE,SAAQ,OAAO,MAAf;EACE,KAAK;EACL,KAAK,aACH,QAAO,OAAO;EAChB,KAAK;EACL,KAAK,aACH,QAAO,KAAK,UAAU,OAAO,MAAM;EACrC,KAAK,UACH,QAAO,OAAO,MACX,KAAK,MAAwC,EAAE,SAAS,SAAU,EAAE,QAAQ,KAAM,GAAI,CACtF,OAAO,QAAQ,CACf,KAAK,KAAK;EACf,QACE,QAAO;;;;;;;;;;;;;ACvFb,SAAgB,iBAAiB,SAAsD;CACrF,IAAI,cAAc;CAElB,MAAM,UAAU,IAAI,QAAQ;EAC1B,eAAe,QAAQ;EACvB,WAAW,QAAQ,aAAa,SAAoB,QAAQ,YAAY,KAAK,IAAI,IAAI;EACrF,eAAe,QAAQ,UAAU,iBAAiB;EAClD,kBAAkB,QAAQ,UAAU,QAChC,yBAAyB,QAAQ,SAAS,MAAM,GAChD;EACJ,YAAY,QAAQ,cACf,SAAS,UAAU,QAAQ,aAAa,QAAQ,SAAS,MAAM,GAChE;EACL,CAAC;AAEF,QAAO;EACL,sBAAsB;EAEtB,iBAAiB,OAAO,EAAE,aAAa;GACrC,IAAI,EAAE,WAAW;AAGjB,OAAI,QAAQ,SACV,UAAS,MAAM,oBAAoB,QAAQ,QAAQ,SAAS;GAI9D,MAAM,aAAa,UAAU,OAAO;GACpC,MAAM,aAAa,MAAM,QAAQ,SAAS,WAAW;AAGrD,OAAI,eAAe,WACjB,UAAS,QAAQ,WAAW;AAG9B,UAAO;IAAE,GAAG;IAAQ;IAAQ;;EAG9B,cAAc,OAAO,EAAE,iBAAiB;GACtC,MAAM,SAAS,MAAM,YAAY;AAEjC,OAAI,OAAO,OAAO,aAAa,SAAS,KACtC,SAAQ,eAAe,OAAO,MAAM,YAAY,MAAM;YAC7C,CAAC,eAAe,CAAC,QAAQ,WAAW;AAC7C,kBAAc;AACd,YAAQ,KACN,wLAGD;;AAGH,UAAO;;EAGT,YAAY,OAAO,EAAE,eAAe;GAClC,MAAM,EAAE,QAAQ,GAAG,SAAS,MAAM,UAAU;GAE5C,MAAM,YAAY,IAAI,gBAAsE,EAC1F,UAAU,OAAO,YAAY;AAC3B,QAAI,MAAM,SAAS,UACjB;SAAI,MAAM,OAAO,aAAa,SAAS,KACrC,SAAQ,eAAe,MAAM,MAAM,YAAY,MAAM;cAC5C,CAAC,eAAe,CAAC,QAAQ,WAAW;AAC7C,oBAAc;AACd,cAAQ,KACN,uLAGD;;;AAGL,eAAW,QAAQ,MAAM;MAE5B,CAAC;AAEF,UAAO;IAAE,GAAG;IAAM,QAAQ,OAAO,YAAY,UAAU;IAAE;;EAE5D;;;;;;;;;AAUH,SAAS,yBAAyB,OAAkE;AAClG,QAAO,OAAO,aAAyC;EAwBrD,MAAM,EAAE,SAAS,MAAM,aAAa;GAClC;GACA,UAzBgB,SAAS,KAAK,MAAM;AACpC,QAAI,EAAE,SAAS,OACb,QAAO;KACL,MAAM;KACN,SAAS,eAAe,EAAE,eAAe,KAAK,EAAE,aAAa,KAAK,GAAG,IAAI,EAAE,QAAQ;KACpF;AAGH,QAAI,EAAE,SAAS,eAAe,EAAE,YAAY,QAAQ;KAClD,MAAM,gBAAgB,EAAE,WACrB,KAAK,OAAO,iBAAiB,GAAG,SAAS,KAAK,GAAG,GAAG,SAAS,UAAU,IAAI,CAC3E,KAAK,KAAK;AACb,YAAO;MACL,MAAM;MACN,SAAS,EAAE,UAAU,GAAG,EAAE,QAAQ,IAAI,kBAAkB;MACzD;;AAEH,WAAO;KACL,MAAM,EAAE;KACR,SAAS,EAAE;KACZ;KACD;GAKA,iBAAiB;GAClB,CAAC;AAEF,SAAO,QAAQ;;;;;;;;;;;;;;;;;;;;;;;;;;ACxGnB,SAAgB,gBAAgB,OAAwB,SAA8C;AAEpG,QAAO,kBAAkB;EAAE;EAAO,YADf,iBAAiB,QAAQ;EACE,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@context-chef/ai-sdk-middleware",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"main": "./dist/index.cjs",
|
|
6
|
+
"module": "./dist/index.mjs",
|
|
7
|
+
"types": "./dist/index.d.mts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"types": "./dist/index.d.mts",
|
|
11
|
+
"import": "./dist/index.mjs",
|
|
12
|
+
"require": "./dist/index.cjs"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
"files": [
|
|
16
|
+
"dist"
|
|
17
|
+
],
|
|
18
|
+
"scripts": {
|
|
19
|
+
"build": "tsdown",
|
|
20
|
+
"test": "vitest run",
|
|
21
|
+
"typecheck": "tsc -p tsconfig.build.json --noEmit"
|
|
22
|
+
},
|
|
23
|
+
"keywords": [
|
|
24
|
+
"ai-sdk",
|
|
25
|
+
"vercel-ai",
|
|
26
|
+
"context-chef",
|
|
27
|
+
"middleware",
|
|
28
|
+
"context-engineering",
|
|
29
|
+
"llm",
|
|
30
|
+
"compression",
|
|
31
|
+
"token-management"
|
|
32
|
+
],
|
|
33
|
+
"author": "MyPrototypeWhat",
|
|
34
|
+
"license": "ISC",
|
|
35
|
+
"description": "AI SDK middleware for context-chef. Transparent history compression, tool result truncation, and token budget management.",
|
|
36
|
+
"repository": {
|
|
37
|
+
"type": "git",
|
|
38
|
+
"url": "https://github.com/MyPrototypeWhat/context-chef.git",
|
|
39
|
+
"directory": "packages/ai-sdk-middleware"
|
|
40
|
+
},
|
|
41
|
+
"homepage": "https://github.com/MyPrototypeWhat/context-chef#readme",
|
|
42
|
+
"bugs": {
|
|
43
|
+
"url": "https://github.com/MyPrototypeWhat/context-chef/issues"
|
|
44
|
+
},
|
|
45
|
+
"dependencies": {
|
|
46
|
+
"@context-chef/core": "workspace:*"
|
|
47
|
+
},
|
|
48
|
+
"peerDependencies": {
|
|
49
|
+
"@ai-sdk/provider": ">=3",
|
|
50
|
+
"ai": ">=6"
|
|
51
|
+
},
|
|
52
|
+
"devDependencies": {
|
|
53
|
+
"@ai-sdk/provider": "^3.0.8",
|
|
54
|
+
"@types/node": "^25.3.0",
|
|
55
|
+
"ai": "^6.0.140",
|
|
56
|
+
"tsdown": "^0.20.3",
|
|
57
|
+
"typescript": "^5.9.3",
|
|
58
|
+
"vitest": "^4.0.18"
|
|
59
|
+
}
|
|
60
|
+
}
|