@gram-ai/elements 1.28.0 → 1.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/elements.cjs +1 -1
- package/dist/elements.js +1 -1
- package/dist/{index-CtZz13Cf.js → index-BzA55RRF.js} +11741 -11557
- package/dist/index-BzA55RRF.js.map +1 -0
- package/dist/{index-BmTGnEaV.cjs → index-CgO7wXs-.cjs} +52 -48
- package/dist/index-CgO7wXs-.cjs.map +1 -0
- package/dist/lib/contextCompaction.d.ts +58 -0
- package/dist/lib/contextCompaction.test.d.ts +1 -0
- package/dist/lib/errorTracking.config.d.ts +2 -0
- package/dist/lib/tools.byte-cap.test.d.ts +1 -0
- package/dist/lib/tools.d.ts +19 -0
- package/dist/lib/tools.test.d.ts +1 -0
- package/dist/{profiler-Ccma0l1p.js → profiler-BPCxiY-X.js} +2 -2
- package/dist/{profiler-Ccma0l1p.js.map → profiler-BPCxiY-X.js.map} +1 -1
- package/dist/{profiler-CjNa3A1d.cjs → profiler-BmAwBXpj.cjs} +2 -2
- package/dist/{profiler-CjNa3A1d.cjs.map → profiler-BmAwBXpj.cjs.map} +1 -1
- package/dist/{startRecording-jSovclaq.cjs → startRecording-B0Xe2DOI.cjs} +2 -2
- package/dist/{startRecording-jSovclaq.cjs.map → startRecording-B0Xe2DOI.cjs.map} +1 -1
- package/dist/{startRecording-DAURU74n.js → startRecording-DXGt4fON.js} +2 -2
- package/dist/{startRecording-DAURU74n.js.map → startRecording-DXGt4fON.js.map} +1 -1
- package/dist/types/index.d.ts +49 -0
- package/package.json +1 -1
- package/src/contexts/ElementsProvider.tsx +50 -5
- package/src/lib/contextCompaction.test.ts +201 -0
- package/src/lib/contextCompaction.ts +211 -0
- package/src/lib/errorTracking.config.ts +2 -0
- package/src/lib/errorTracking.ts +1 -1
- package/src/lib/tools.byte-cap.test.ts +132 -0
- package/src/lib/tools.test.ts +259 -0
- package/src/lib/tools.ts +122 -0
- package/src/types/index.ts +55 -0
- package/dist/index-BmTGnEaV.cjs.map +0 -1
- package/dist/index-CtZz13Cf.js.map +0 -1
|
@@ -15,9 +15,11 @@ import {
|
|
|
15
15
|
setFrontendToolApprovalConfig,
|
|
16
16
|
toAISDKTools,
|
|
17
17
|
wrapToolsWithApproval,
|
|
18
|
+
wrapToolsWithByteCap,
|
|
18
19
|
type ApprovalHelpers,
|
|
19
20
|
type FrontendTool,
|
|
20
21
|
} from "@/lib/tools";
|
|
22
|
+
import { compactForModel } from "@/lib/contextCompaction";
|
|
21
23
|
import { cn } from "@/lib/utils";
|
|
22
24
|
import { recommended } from "@/plugins";
|
|
23
25
|
import { ElementsConfig, Model } from "@/types";
|
|
@@ -37,6 +39,7 @@ import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
|
|
|
37
39
|
import {
|
|
38
40
|
convertToModelMessages,
|
|
39
41
|
createUIMessageStream,
|
|
42
|
+
lastAssistantMessageIsCompleteWithToolCalls,
|
|
40
43
|
LanguageModel,
|
|
41
44
|
smoothStream,
|
|
42
45
|
stepCountIs,
|
|
@@ -366,12 +369,19 @@ const ElementsProviderInner = ({ children, config }: ElementsProviderProps) => {
|
|
|
366
369
|
} as ToolSet;
|
|
367
370
|
|
|
368
371
|
// Wrap tools that require approval
|
|
369
|
-
const
|
|
372
|
+
const approvedTools = wrapToolsWithApproval(
|
|
370
373
|
combinedTools,
|
|
371
374
|
config.tools?.toolsRequiringApproval,
|
|
372
375
|
getApprovalHelpers(),
|
|
373
376
|
);
|
|
374
377
|
|
|
378
|
+
// Cap oversized tool results so one greedy tool call (e.g. a wide log
|
|
379
|
+
// search) can't fill the context window in a single step.
|
|
380
|
+
const tools = wrapToolsWithByteCap(
|
|
381
|
+
approvedTools,
|
|
382
|
+
config.tools?.maxOutputBytes,
|
|
383
|
+
);
|
|
384
|
+
|
|
375
385
|
// Stream the response
|
|
376
386
|
const modelToUse = config.languageModel
|
|
377
387
|
? config.languageModel
|
|
@@ -387,7 +397,29 @@ const ElementsProviderInner = ({ children, config }: ElementsProviderProps) => {
|
|
|
387
397
|
const nonSystemMessages = cleanedMessages.filter(
|
|
388
398
|
(m) => m.role !== "system",
|
|
389
399
|
);
|
|
390
|
-
const
|
|
400
|
+
const rawModelMessages = convertToModelMessages(nonSystemMessages);
|
|
401
|
+
|
|
402
|
+
// Auto-compact older turns if the estimated input is approaching
|
|
403
|
+
// the model's context window. System prompt + last few turns are
|
|
404
|
+
// always preserved. No-op when the conversation is small.
|
|
405
|
+
const compaction = config.contextCompaction?.disabled
|
|
406
|
+
? {
|
|
407
|
+
messages: rawModelMessages,
|
|
408
|
+
droppedCount: 0,
|
|
409
|
+
estimatedTokensBefore: 0,
|
|
410
|
+
estimatedTokensAfter: 0,
|
|
411
|
+
}
|
|
412
|
+
: compactForModel(rawModelMessages, model, {
|
|
413
|
+
maxTokens: config.contextCompaction?.maxTokens,
|
|
414
|
+
compactAtFraction: config.contextCompaction?.compactAtFraction,
|
|
415
|
+
keepRecent: config.contextCompaction?.keepRecent,
|
|
416
|
+
});
|
|
417
|
+
if (compaction.droppedCount > 0) {
|
|
418
|
+
console.warn(
|
|
419
|
+
`[elements] compacted ${compaction.droppedCount} older turn(s) from ${compaction.estimatedTokensBefore} → ${compaction.estimatedTokensAfter} est. tokens (model ${model})`,
|
|
420
|
+
);
|
|
421
|
+
}
|
|
422
|
+
const modelMessages = compaction.messages;
|
|
391
423
|
|
|
392
424
|
const result = streamText({
|
|
393
425
|
system: systemPrompt,
|
|
@@ -456,6 +488,11 @@ const ElementsProviderInner = ({ children, config }: ElementsProviderProps) => {
|
|
|
456
488
|
[
|
|
457
489
|
config.languageModel,
|
|
458
490
|
config.tools?.toolsRequiringApproval,
|
|
491
|
+
config.tools?.maxOutputBytes,
|
|
492
|
+
config.contextCompaction?.disabled,
|
|
493
|
+
config.contextCompaction?.maxTokens,
|
|
494
|
+
config.contextCompaction?.compactAtFraction,
|
|
495
|
+
config.contextCompaction?.keepRecent,
|
|
459
496
|
model,
|
|
460
497
|
systemPrompt,
|
|
461
498
|
mcpTools,
|
|
@@ -606,9 +643,14 @@ const ElementsProviderWithHistory = ({
|
|
|
606
643
|
});
|
|
607
644
|
const initialThreadId = contextValue?.config.history?.initialThreadId;
|
|
608
645
|
|
|
609
|
-
//
|
|
646
|
+
// Without `sendAutomaticallyWhen`, client-side frontend tools leave the turn
|
|
647
|
+
// half-finished: the tool-result is patched in but the agent never resumes,
|
|
648
|
+
// so the next user message lands on top of an unresolved tool-call sequence.
|
|
610
649
|
const useChatRuntimeHook = useCallback(() => {
|
|
611
|
-
return useChatRuntime({
|
|
650
|
+
return useChatRuntime({
|
|
651
|
+
transport,
|
|
652
|
+
sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls,
|
|
653
|
+
});
|
|
612
654
|
}, [transport]);
|
|
613
655
|
|
|
614
656
|
const runtime = useRemoteThreadListRuntime({
|
|
@@ -691,7 +733,10 @@ const ElementsProviderWithoutHistory = ({
|
|
|
691
733
|
executableTools,
|
|
692
734
|
currentChatId,
|
|
693
735
|
}: ElementsProviderWithoutHistoryProps) => {
|
|
694
|
-
const runtime = useChatRuntime({
|
|
736
|
+
const runtime = useChatRuntime({
|
|
737
|
+
transport,
|
|
738
|
+
sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls,
|
|
739
|
+
});
|
|
695
740
|
|
|
696
741
|
// Populate runtimeRef so transport can access thread context
|
|
697
742
|
useEffect(() => {
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import type { ModelMessage } from "ai";
|
|
3
|
+
import {
|
|
4
|
+
compactBySlidingWindow,
|
|
5
|
+
compactForModel,
|
|
6
|
+
DEFAULT_CONTEXT_LIMIT,
|
|
7
|
+
estimateTokens,
|
|
8
|
+
getModelContextLimit,
|
|
9
|
+
} from "./contextCompaction";
|
|
10
|
+
|
|
11
|
+
function msg(
|
|
12
|
+
role: "system" | "user" | "assistant" | "tool",
|
|
13
|
+
content: string,
|
|
14
|
+
): ModelMessage {
|
|
15
|
+
return { role, content } as ModelMessage;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
describe("estimateTokens", () => {
|
|
19
|
+
it("returns roughly chars/4", () => {
|
|
20
|
+
const messages = [msg("user", "a".repeat(400))];
|
|
21
|
+
const n = estimateTokens(messages);
|
|
22
|
+
// Actual output is JSON-wrapped so it's slightly larger than 100
|
|
23
|
+
expect(n).toBeGreaterThan(100);
|
|
24
|
+
expect(n).toBeLessThan(200);
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
it("grows with message count", () => {
|
|
28
|
+
const one = estimateTokens([msg("user", "hello")]);
|
|
29
|
+
const many = estimateTokens(
|
|
30
|
+
Array.from({ length: 100 }, () => msg("user", "hello")),
|
|
31
|
+
);
|
|
32
|
+
expect(many).toBeGreaterThan(one * 50);
|
|
33
|
+
});
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
describe("getModelContextLimit", () => {
|
|
37
|
+
it("returns known mapping for Sonnet 4.6", () => {
|
|
38
|
+
expect(getModelContextLimit("anthropic/claude-sonnet-4.6")).toBe(1_000_000);
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
it("returns known mapping for Claude 4 (non-1M)", () => {
|
|
42
|
+
expect(getModelContextLimit("anthropic/claude-sonnet-4")).toBe(200_000);
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it("returns DEFAULT_CONTEXT_LIMIT for unknown models", () => {
|
|
46
|
+
expect(getModelContextLimit("acme/very-new-model")).toBe(
|
|
47
|
+
DEFAULT_CONTEXT_LIMIT,
|
|
48
|
+
);
|
|
49
|
+
});
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
describe("compactBySlidingWindow", () => {
|
|
53
|
+
it("no-ops when under the limit", () => {
|
|
54
|
+
const messages = [msg("user", "hi"), msg("assistant", "hello")];
|
|
55
|
+
const result = compactBySlidingWindow(messages, 1_000_000);
|
|
56
|
+
expect(result.droppedCount).toBe(0);
|
|
57
|
+
expect(result.messages).toBe(messages);
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it("drops oldest non-system turns to fit", () => {
|
|
61
|
+
// 10 bulky messages, tiny limit → forces dropping
|
|
62
|
+
const messages: ModelMessage[] = [];
|
|
63
|
+
for (let i = 0; i < 10; i++) {
|
|
64
|
+
messages.push(msg("user", `query-${i} ` + "x".repeat(400)));
|
|
65
|
+
messages.push(msg("assistant", `reply-${i} ` + "y".repeat(400)));
|
|
66
|
+
}
|
|
67
|
+
const maxTokens = 500;
|
|
68
|
+
const result = compactBySlidingWindow(messages, maxTokens, 4);
|
|
69
|
+
expect(result.droppedCount).toBeGreaterThan(0);
|
|
70
|
+
expect(result.estimatedTokensAfter).toBeLessThanOrEqual(
|
|
71
|
+
result.estimatedTokensBefore,
|
|
72
|
+
);
|
|
73
|
+
// Last 4 are preserved verbatim
|
|
74
|
+
const tail = result.messages.slice(-4);
|
|
75
|
+
expect(tail[tail.length - 1]).toEqual(messages[messages.length - 1]);
|
|
76
|
+
// Marker prepended
|
|
77
|
+
const markerPresent = result.messages.some(
|
|
78
|
+
(m) => typeof m.content === "string" && m.content.includes("omitted"),
|
|
79
|
+
);
|
|
80
|
+
expect(markerPresent).toBe(true);
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it("always preserves system messages", () => {
|
|
84
|
+
const messages: ModelMessage[] = [
|
|
85
|
+
msg("system", "sys " + "s".repeat(1000)),
|
|
86
|
+
...Array.from({ length: 20 }, (_, i) =>
|
|
87
|
+
msg("user", `q-${i} ` + "x".repeat(500)),
|
|
88
|
+
),
|
|
89
|
+
];
|
|
90
|
+
const result = compactBySlidingWindow(messages, 300, 2);
|
|
91
|
+
expect(result.droppedCount).toBeGreaterThan(0);
|
|
92
|
+
expect(result.messages[0]!.role).toBe("system");
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it("preserves at least keepRecent messages even if over limit", () => {
|
|
96
|
+
const messages = Array.from({ length: 10 }, (_, i) =>
|
|
97
|
+
msg("user", "x".repeat(1000) + `-${i}`),
|
|
98
|
+
);
|
|
99
|
+
const result = compactBySlidingWindow(messages, 10, 3);
|
|
100
|
+
// keepRecent preserved even though we can't get under the limit
|
|
101
|
+
expect(result.messages.length).toBeGreaterThanOrEqual(3);
|
|
102
|
+
// Last 3 are intact
|
|
103
|
+
const tail = result.messages.slice(-3);
|
|
104
|
+
expect(tail).toEqual(messages.slice(-3));
|
|
105
|
+
});
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
describe("compactBySlidingWindow — tool message pairing", () => {
|
|
109
|
+
it("never leaves a tool message at the head of the retained window", () => {
|
|
110
|
+
// Scenario from Devin: dropping oldest-first could split an
|
|
111
|
+
// assistant(tool_calls) → tool pair, leaving an orphan tool at the
|
|
112
|
+
// head of the retained set. Providers reject this with a 400.
|
|
113
|
+
const messages: ModelMessage[] = [
|
|
114
|
+
msg("user", "q1 " + "x".repeat(400)),
|
|
115
|
+
msg("assistant", "a1-with-tool-call " + "x".repeat(400)),
|
|
116
|
+
msg("tool", "t1-result " + "x".repeat(400)),
|
|
117
|
+
msg("assistant", "a1-final " + "x".repeat(400)),
|
|
118
|
+
msg("user", "q2 " + "x".repeat(400)),
|
|
119
|
+
msg("assistant", "a2-with-tool-call " + "x".repeat(400)),
|
|
120
|
+
msg("tool", "t2-result " + "x".repeat(400)),
|
|
121
|
+
msg("assistant", "a2-final " + "x".repeat(400)),
|
|
122
|
+
];
|
|
123
|
+
|
|
124
|
+
const result = compactBySlidingWindow(messages, 400, 4);
|
|
125
|
+
expect(result.droppedCount).toBeGreaterThan(0);
|
|
126
|
+
|
|
127
|
+
// The retained non-system messages should never start with a tool.
|
|
128
|
+
const nonSystem = result.messages.filter((m) => m.role !== "system");
|
|
129
|
+
// Skip the synthetic assistant marker if present.
|
|
130
|
+
const firstReal = nonSystem.find(
|
|
131
|
+
(m) =>
|
|
132
|
+
!(
|
|
133
|
+
m.role === "assistant" &&
|
|
134
|
+
typeof m.content === "string" &&
|
|
135
|
+
m.content.includes("omitted")
|
|
136
|
+
),
|
|
137
|
+
);
|
|
138
|
+
expect(firstReal?.role).not.toBe("tool");
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
it("drops an assistant+tool pair atomically (not one without the other)", () => {
|
|
142
|
+
const messages: ModelMessage[] = [
|
|
143
|
+
msg("user", "old"),
|
|
144
|
+
msg("assistant", "calling tool"),
|
|
145
|
+
msg("tool", "result " + "x".repeat(2000)),
|
|
146
|
+
msg("user", "recent " + "x".repeat(200)),
|
|
147
|
+
msg("assistant", "recent reply " + "x".repeat(200)),
|
|
148
|
+
];
|
|
149
|
+
const result = compactBySlidingWindow(messages, 300, 2);
|
|
150
|
+
// If the group was dropped atomically, both the assistant and its tool
|
|
151
|
+
// are gone together. If the bug was still present, we'd see the tool
|
|
152
|
+
// message lingering alone.
|
|
153
|
+
const nonSystem = result.messages.filter((m) => m.role !== "system");
|
|
154
|
+
const hasLoneTool = nonSystem.some(
|
|
155
|
+
(m, i) =>
|
|
156
|
+
m.role === "tool" && (i === 0 || nonSystem[i - 1]!.role === "user"),
|
|
157
|
+
);
|
|
158
|
+
expect(hasLoneTool).toBe(false);
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
it("does not split a tool group when aligning the recent window", () => {
|
|
162
|
+
// keepRecent=3 would cut mid-group with naive slicing. Grouping should
|
|
163
|
+
// expand the recent window to keep the assistant+tools together.
|
|
164
|
+
const messages: ModelMessage[] = [
|
|
165
|
+
msg("user", "old " + "x".repeat(1000)),
|
|
166
|
+
msg("assistant", "calling 2 tools"),
|
|
167
|
+
msg("tool", "result1"),
|
|
168
|
+
msg("tool", "result2"),
|
|
169
|
+
msg("assistant", "final"),
|
|
170
|
+
];
|
|
171
|
+
const result = compactBySlidingWindow(messages, 200, 3);
|
|
172
|
+
// If the first tool was the "recent" cut-off, we'd see a tool at
|
|
173
|
+
// the head of retained — but grouping should have pulled the
|
|
174
|
+
// assistant with it.
|
|
175
|
+
const kept = result.messages.filter((m) => m.role !== "system");
|
|
176
|
+
const firstTool = kept.findIndex((m) => m.role === "tool");
|
|
177
|
+
if (firstTool !== -1) {
|
|
178
|
+
expect(kept[firstTool - 1]?.role).toMatch(/assistant|tool/);
|
|
179
|
+
}
|
|
180
|
+
});
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
describe("compactForModel", () => {
|
|
184
|
+
it("uses 70% of the nominal ceiling by default", () => {
|
|
185
|
+
const small = [msg("user", "hi")];
|
|
186
|
+
const result = compactForModel(small, "anthropic/claude-sonnet-4.6");
|
|
187
|
+
expect(result.droppedCount).toBe(0);
|
|
188
|
+
expect(result.messages).toBe(small);
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
it("honors explicit maxTokens override", () => {
|
|
192
|
+
const messages = Array.from({ length: 30 }, (_, i) =>
|
|
193
|
+
msg("user", "x".repeat(500) + `-${i}`),
|
|
194
|
+
);
|
|
195
|
+
const result = compactForModel(messages, "anthropic/claude-sonnet-4.6", {
|
|
196
|
+
maxTokens: 2000,
|
|
197
|
+
keepRecent: 2,
|
|
198
|
+
});
|
|
199
|
+
expect(result.droppedCount).toBeGreaterThan(0);
|
|
200
|
+
});
|
|
201
|
+
});
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
import type { ModelMessage } from "ai";
|
|
2
|
+
import { MODELS } from "./models";
|
|
3
|
+
|
|
4
|
+
type KnownModelId = (typeof MODELS)[number];
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Fraction-of-limit at which compaction kicks in. Below this, messages pass
|
|
8
|
+
* through untouched; above this, oldest non-system turns are dropped until
|
|
9
|
+
* the estimated token count is back under the threshold.
|
|
10
|
+
*/
|
|
11
|
+
export const DEFAULT_COMPACTION_FRACTION = 0.7;
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Number of most-recent messages preserved verbatim, even if the conversation
|
|
15
|
+
* is already over the limit. Ensures the assistant always has the latest turn
|
|
16
|
+
* and its immediate predecessor.
|
|
17
|
+
*/
|
|
18
|
+
export const DEFAULT_KEEP_RECENT = 4;
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Conservative fallback when we encounter a model we haven't mapped — big
|
|
22
|
+
* enough to be useful for unknown models, small enough to still trigger
|
|
23
|
+
* compaction before hitting upstream 400s.
|
|
24
|
+
*/
|
|
25
|
+
export const DEFAULT_CONTEXT_LIMIT = 200_000;
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Known input-token ceilings per model (nominal upstream maximum). Keyed by
|
|
29
|
+
* MODELS so TypeScript catches drift — adding a model id here that isn't in
|
|
30
|
+
* MODELS, or misspelling an id, is a compile error. Coverage is intentionally
|
|
31
|
+
* partial: models without an explicit entry fall back to DEFAULT_CONTEXT_LIMIT.
|
|
32
|
+
*/
|
|
33
|
+
const MODEL_CONTEXT_LIMITS: Partial<Record<KnownModelId, number>> = {
|
|
34
|
+
// Anthropic (1M tier where available, else 200K)
|
|
35
|
+
"anthropic/claude-opus-4.6": 1_000_000,
|
|
36
|
+
"anthropic/claude-opus-4.5": 1_000_000,
|
|
37
|
+
"anthropic/claude-opus-4.1": 200_000,
|
|
38
|
+
"anthropic/claude-sonnet-4.6": 1_000_000,
|
|
39
|
+
"anthropic/claude-sonnet-4.5": 1_000_000,
|
|
40
|
+
"anthropic/claude-sonnet-4": 200_000,
|
|
41
|
+
"anthropic/claude-haiku-4.5": 200_000,
|
|
42
|
+
|
|
43
|
+
// OpenAI
|
|
44
|
+
"openai/gpt-5.4": 400_000,
|
|
45
|
+
"openai/gpt-5.4-mini": 400_000,
|
|
46
|
+
"openai/gpt-5.1": 400_000,
|
|
47
|
+
"openai/gpt-5.1-codex": 400_000,
|
|
48
|
+
"openai/gpt-5": 400_000,
|
|
49
|
+
"openai/gpt-4.1": 1_000_000,
|
|
50
|
+
"openai/o4-mini": 200_000,
|
|
51
|
+
"openai/o3": 200_000,
|
|
52
|
+
|
|
53
|
+
// Google
|
|
54
|
+
"google/gemini-3.1-pro-preview": 1_000_000,
|
|
55
|
+
"google/gemini-2.5-pro": 1_000_000,
|
|
56
|
+
"google/gemini-2.5-flash": 1_000_000,
|
|
57
|
+
|
|
58
|
+
// Others
|
|
59
|
+
"deepseek/deepseek-r1": 128_000,
|
|
60
|
+
"deepseek/deepseek-v3.2": 128_000,
|
|
61
|
+
"meta-llama/llama-4-maverick": 1_000_000,
|
|
62
|
+
"x-ai/grok-4": 256_000,
|
|
63
|
+
"qwen/qwen3-coder": 256_000,
|
|
64
|
+
"moonshotai/kimi-k2.5": 128_000,
|
|
65
|
+
"mistralai/mistral-medium-3.1": 128_000,
|
|
66
|
+
"mistralai/codestral-2508": 256_000,
|
|
67
|
+
"mistralai/devstral-small": 128_000,
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Returns the input-token ceiling for a given OpenRouter model id, or
|
|
72
|
+
* DEFAULT_CONTEXT_LIMIT if unknown.
|
|
73
|
+
*/
|
|
74
|
+
export function getModelContextLimit(modelId: string): number {
|
|
75
|
+
return MODEL_CONTEXT_LIMITS[modelId as KnownModelId] ?? DEFAULT_CONTEXT_LIMIT;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Rough input-token estimate using a chars/4 heuristic on the JSON serialized
|
|
80
|
+
* conversation. Tokens-per-char varies by model and content, but a chars/4
|
|
81
|
+
* heuristic matches OpenAI's rule-of-thumb within ~15% for English prose and
|
|
82
|
+
* is deterministic + zero-cost — good enough to trigger compaction.
|
|
83
|
+
*/
|
|
84
|
+
export function estimateTokens(messages: ModelMessage[]): number {
|
|
85
|
+
const serialized = JSON.stringify(messages);
|
|
86
|
+
return Math.ceil(serialized.length / 4);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
export interface CompactionResult {
|
|
90
|
+
messages: ModelMessage[];
|
|
91
|
+
droppedCount: number;
|
|
92
|
+
estimatedTokensBefore: number;
|
|
93
|
+
estimatedTokensAfter: number;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Drops oldest non-system messages until the estimated token count is under
|
|
98
|
+
* maxTokens. Always preserves the last `keepRecent` messages and any system
|
|
99
|
+
* role messages. When any messages are dropped, prepends a synthetic assistant
|
|
100
|
+
* note so the model knows earlier context was elided.
|
|
101
|
+
*/
|
|
102
|
+
export function compactBySlidingWindow(
|
|
103
|
+
messages: ModelMessage[],
|
|
104
|
+
maxTokens: number,
|
|
105
|
+
keepRecent: number = DEFAULT_KEEP_RECENT,
|
|
106
|
+
): CompactionResult {
|
|
107
|
+
const estimatedTokensBefore = estimateTokens(messages);
|
|
108
|
+
|
|
109
|
+
if (estimatedTokensBefore <= maxTokens || messages.length <= keepRecent) {
|
|
110
|
+
return {
|
|
111
|
+
messages,
|
|
112
|
+
droppedCount: 0,
|
|
113
|
+
estimatedTokensBefore,
|
|
114
|
+
estimatedTokensAfter: estimatedTokensBefore,
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const systemMessages = messages.filter((m) => m.role === "system");
|
|
119
|
+
const nonSystem = messages.filter((m) => m.role !== "system");
|
|
120
|
+
|
|
121
|
+
// Group consecutive `tool` messages with the assistant message that
|
|
122
|
+
// precedes them. OpenAI-compatible providers require every tool-result
|
|
123
|
+
// message to be immediately preceded by the assistant message holding its
|
|
124
|
+
// tool_calls — splitting these produces an invalid conversation that
|
|
125
|
+
// providers reject with a 400. Grouping ensures we drop or keep the
|
|
126
|
+
// full assistant+tools unit atomically.
|
|
127
|
+
const groups: ModelMessage[][] = [];
|
|
128
|
+
for (const m of nonSystem) {
|
|
129
|
+
if (m.role === "tool" && groups.length > 0) {
|
|
130
|
+
groups[groups.length - 1]!.push(m);
|
|
131
|
+
} else {
|
|
132
|
+
groups.push([m]);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Reserve the trailing groups that together contain at least `keepRecent`
|
|
137
|
+
// messages. Using groups (not raw messages) keeps assistant+tool pairs
|
|
138
|
+
// intact at the boundary between retained and dropped.
|
|
139
|
+
let recentMsgCount = 0;
|
|
140
|
+
let recentStart = groups.length;
|
|
141
|
+
while (recentStart > 0 && recentMsgCount < keepRecent) {
|
|
142
|
+
recentStart -= 1;
|
|
143
|
+
recentMsgCount += groups[recentStart]!.length;
|
|
144
|
+
}
|
|
145
|
+
const recentGroups = groups.slice(recentStart);
|
|
146
|
+
const droppableGroups = groups.slice(0, recentStart);
|
|
147
|
+
|
|
148
|
+
let droppedCount = 0;
|
|
149
|
+
let working = [
|
|
150
|
+
...systemMessages,
|
|
151
|
+
...droppableGroups.flat(),
|
|
152
|
+
...recentGroups.flat(),
|
|
153
|
+
];
|
|
154
|
+
|
|
155
|
+
while (droppableGroups.length > 0 && estimateTokens(working) > maxTokens) {
|
|
156
|
+
const droppedGroup = droppableGroups.shift()!;
|
|
157
|
+
droppedCount += droppedGroup.length;
|
|
158
|
+
working = [
|
|
159
|
+
...systemMessages,
|
|
160
|
+
...droppableGroups.flat(),
|
|
161
|
+
...recentGroups.flat(),
|
|
162
|
+
];
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
if (droppedCount > 0) {
|
|
166
|
+
const marker: ModelMessage = {
|
|
167
|
+
role: "assistant",
|
|
168
|
+
content: `[${droppedCount} earlier message${
|
|
169
|
+
droppedCount === 1 ? "" : "s"
|
|
170
|
+
} omitted to stay under context length. If the user asks about them, say you no longer have that context and suggest they restate the relevant details.]`,
|
|
171
|
+
};
|
|
172
|
+
working = [
|
|
173
|
+
...systemMessages,
|
|
174
|
+
marker,
|
|
175
|
+
...droppableGroups.flat(),
|
|
176
|
+
...recentGroups.flat(),
|
|
177
|
+
];
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
return {
|
|
181
|
+
messages: working,
|
|
182
|
+
droppedCount,
|
|
183
|
+
estimatedTokensBefore,
|
|
184
|
+
estimatedTokensAfter: estimateTokens(working),
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
export interface CompactionOptions {
|
|
189
|
+
/** Override the model's nominal input ceiling. */
|
|
190
|
+
maxTokens?: number;
|
|
191
|
+
/** Fraction of maxTokens at which compaction kicks in. */
|
|
192
|
+
compactAtFraction?: number;
|
|
193
|
+
/** Most-recent messages preserved verbatim. */
|
|
194
|
+
keepRecent?: number;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Convenience wrapper that picks the model ceiling, applies compactAtFraction,
|
|
199
|
+
* and runs compactBySlidingWindow. Returns the (possibly unchanged) messages
|
|
200
|
+
* plus diagnostics.
|
|
201
|
+
*/
|
|
202
|
+
export function compactForModel(
|
|
203
|
+
messages: ModelMessage[],
|
|
204
|
+
modelId: string,
|
|
205
|
+
opts: CompactionOptions = {},
|
|
206
|
+
): CompactionResult {
|
|
207
|
+
const ceiling = opts.maxTokens ?? getModelContextLimit(modelId);
|
|
208
|
+
const fraction = opts.compactAtFraction ?? DEFAULT_COMPACTION_FRACTION;
|
|
209
|
+
const limit = Math.floor(ceiling * fraction);
|
|
210
|
+
return compactBySlidingWindow(messages, limit, opts.keepRecent);
|
|
211
|
+
}
|
|
@@ -7,10 +7,12 @@
|
|
|
7
7
|
* - VITE_DATADOG_APPLICATION_ID
|
|
8
8
|
* - VITE_DATADOG_CLIENT_TOKEN
|
|
9
9
|
* - VITE_DATADOG_SITE (optional, defaults to datadoghq.com)
|
|
10
|
+
* - VITE_DATADOG_ENV (optional, defaults to prod)
|
|
10
11
|
*/
|
|
11
12
|
export const DATADOG_CONFIG = {
|
|
12
13
|
applicationId: import.meta.env.VITE_DATADOG_APPLICATION_ID ?? "",
|
|
13
14
|
clientToken: import.meta.env.VITE_DATADOG_CLIENT_TOKEN ?? "",
|
|
14
15
|
site: import.meta.env.VITE_DATADOG_SITE ?? "datadoghq.com",
|
|
16
|
+
env: import.meta.env.VITE_DATADOG_ENV ?? "prod",
|
|
15
17
|
service: "gram-elements",
|
|
16
18
|
} as const;
|
package/src/lib/errorTracking.ts
CHANGED
|
@@ -44,7 +44,7 @@ export function initErrorTracking(config: ErrorTrackingConfig = {}): void {
|
|
|
44
44
|
clientToken: DATADOG_CONFIG.clientToken,
|
|
45
45
|
site: DATADOG_CONFIG.site,
|
|
46
46
|
service: DATADOG_CONFIG.service,
|
|
47
|
-
env:
|
|
47
|
+
env: DATADOG_CONFIG.env,
|
|
48
48
|
sessionSampleRate: 100,
|
|
49
49
|
sessionReplaySampleRate: 100,
|
|
50
50
|
trackUserInteractions: true,
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import { describe, expect, it, vi } from "vitest";
|
|
2
|
+
import {
|
|
3
|
+
capToolResultBytes,
|
|
4
|
+
truncateTextToByteCap,
|
|
5
|
+
wrapToolsWithByteCap,
|
|
6
|
+
} from "./tools";
|
|
7
|
+
import type { ToolSet } from "ai";
|
|
8
|
+
|
|
9
|
+
describe("truncateTextToByteCap", () => {
|
|
10
|
+
it("returns original when under cap", () => {
|
|
11
|
+
expect(truncateTextToByteCap("hello world", 100)).toBe("hello world");
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
it("truncates with head + tail + notice when over cap", () => {
|
|
15
|
+
const text = "a".repeat(1000) + "-MIDDLE-" + "b".repeat(1000);
|
|
16
|
+
const out = truncateTextToByteCap(text, 200);
|
|
17
|
+
expect(out.length).toBeLessThan(text.length);
|
|
18
|
+
expect(out).toContain("tool output truncated");
|
|
19
|
+
expect(out.startsWith("a")).toBe(true);
|
|
20
|
+
expect(out.endsWith("b")).toBe(true);
|
|
21
|
+
expect(out).not.toContain("MIDDLE");
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it("output always stays at or under maxBytes (notice included in budget)", () => {
|
|
25
|
+
// Regression test — earlier version appended the notice *without*
|
|
26
|
+
// reserving budget for it, so the output overshot maxBytes by ~100.
|
|
27
|
+
for (const maxBytes of [256, 512, 1024, 4096]) {
|
|
28
|
+
const text = "x".repeat(50_000);
|
|
29
|
+
const out = truncateTextToByteCap(text, maxBytes);
|
|
30
|
+
const outBytes = new TextEncoder().encode(out).byteLength;
|
|
31
|
+
expect(outBytes).toBeLessThanOrEqual(maxBytes);
|
|
32
|
+
}
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it("passes through when maxBytes <= 0 (disabled)", () => {
|
|
36
|
+
const text = "x".repeat(10_000);
|
|
37
|
+
expect(truncateTextToByteCap(text, 0)).toBe(text);
|
|
38
|
+
expect(truncateTextToByteCap(text, -1)).toBe(text);
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
it("handles multibyte UTF-8 without crashing", () => {
|
|
42
|
+
const text = "🎉".repeat(500);
|
|
43
|
+
const out = truncateTextToByteCap(text, 200);
|
|
44
|
+
expect(out).toContain("tool output truncated");
|
|
45
|
+
expect(new TextEncoder().encode(out).byteLength).toBeGreaterThan(0);
|
|
46
|
+
});
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
describe("capToolResultBytes", () => {
|
|
50
|
+
it("truncates plain string results", () => {
|
|
51
|
+
const out = capToolResultBytes("x".repeat(5_000), 100);
|
|
52
|
+
expect(typeof out).toBe("string");
|
|
53
|
+
expect(out).not.toBe("x".repeat(5_000));
|
|
54
|
+
expect(out).toContain("tool output truncated");
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it("truncates text chunks inside MCP-shaped results", () => {
|
|
58
|
+
const result = {
|
|
59
|
+
content: [
|
|
60
|
+
{ type: "text", text: "short" },
|
|
61
|
+
{ type: "text", text: "big".repeat(5_000) },
|
|
62
|
+
],
|
|
63
|
+
isError: false,
|
|
64
|
+
};
|
|
65
|
+
const out = capToolResultBytes(result, 100) as typeof result;
|
|
66
|
+
expect(out.content[0]).toEqual({ type: "text", text: "short" });
|
|
67
|
+
expect((out.content[1] as { text: string }).text).toContain(
|
|
68
|
+
"tool output truncated",
|
|
69
|
+
);
|
|
70
|
+
expect(out.isError).toBe(false);
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
it("leaves non-text chunks alone", () => {
|
|
74
|
+
const result = {
|
|
75
|
+
content: [
|
|
76
|
+
{ type: "image", data: "x".repeat(5_000), mimeType: "image/png" },
|
|
77
|
+
],
|
|
78
|
+
};
|
|
79
|
+
const out = capToolResultBytes(result, 100) as typeof result;
|
|
80
|
+
expect(out.content[0]).toEqual(result.content[0]);
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it("preserves isError flag", () => {
|
|
84
|
+
const result = {
|
|
85
|
+
content: [{ type: "text", text: "tool blew up: " + "x".repeat(5_000) }],
|
|
86
|
+
isError: true,
|
|
87
|
+
};
|
|
88
|
+
const out = capToolResultBytes(result, 100) as typeof result;
|
|
89
|
+
expect(out.isError).toBe(true);
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
it("passes unknown shapes through", () => {
|
|
93
|
+
expect(capToolResultBytes(42, 100)).toBe(42);
|
|
94
|
+
expect(capToolResultBytes(null, 100)).toBe(null);
|
|
95
|
+
expect(capToolResultBytes({ foo: "bar" }, 100)).toEqual({ foo: "bar" });
|
|
96
|
+
});
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
describe("wrapToolsWithByteCap", () => {
|
|
100
|
+
it("is a no-op when maxBytes is undefined/0", () => {
|
|
101
|
+
const execute = vi.fn().mockResolvedValue("anything");
|
|
102
|
+
const tools: ToolSet = {
|
|
103
|
+
t: { description: "", inputSchema: { type: "object" }, execute } as never,
|
|
104
|
+
};
|
|
105
|
+
expect(wrapToolsWithByteCap(tools, undefined)).toBe(tools);
|
|
106
|
+
expect(wrapToolsWithByteCap(tools, 0)).toBe(tools);
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
it("wraps execute and truncates oversized result", async () => {
|
|
110
|
+
const execute = vi.fn().mockResolvedValue({
|
|
111
|
+
content: [{ type: "text", text: "z".repeat(10_000) }],
|
|
112
|
+
});
|
|
113
|
+
const tools: ToolSet = {
|
|
114
|
+
t: { description: "", inputSchema: { type: "object" }, execute } as never,
|
|
115
|
+
};
|
|
116
|
+
const wrapped = wrapToolsWithByteCap(tools, 256);
|
|
117
|
+
const wrappedExecute = wrapped.t.execute!;
|
|
118
|
+
const out = (await wrappedExecute({}, { toolCallId: "id" } as never)) as {
|
|
119
|
+
content: Array<{ text: string }>;
|
|
120
|
+
};
|
|
121
|
+
expect(out.content[0]!.text).toContain("tool output truncated");
|
|
122
|
+
expect(execute).toHaveBeenCalledOnce();
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
it("leaves tools without execute alone", () => {
|
|
126
|
+
const tools: ToolSet = {
|
|
127
|
+
t: { description: "", inputSchema: { type: "object" } } as never,
|
|
128
|
+
};
|
|
129
|
+
const wrapped = wrapToolsWithByteCap(tools, 256);
|
|
130
|
+
expect(wrapped.t).toBe(tools.t);
|
|
131
|
+
});
|
|
132
|
+
});
|