@gram-ai/elements 1.28.0 → 1.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/components/MessageContent.d.ts +20 -0
- package/dist/components/MessageContent.parser.d.ts +12 -0
- package/dist/components/MessageContent.test.d.ts +1 -0
- package/dist/elements.cjs +1 -1
- package/dist/elements.css +1 -1
- package/dist/elements.js +14 -13
- package/dist/{index-C4bFBGfl.cjs → index-COzPF-WM.cjs} +45 -45
- package/dist/index-COzPF-WM.cjs.map +1 -0
- package/dist/{index-D93pV0_o.js → index-CRhpKl-G.js} +5218 -5201
- package/dist/index-CRhpKl-G.js.map +1 -0
- package/dist/{index-CtZz13Cf.js → index-QUz5guSg.js} +11835 -11604
- package/dist/index-QUz5guSg.js.map +1 -0
- package/dist/index-fVcTljYT.cjs +194 -0
- package/dist/index-fVcTljYT.cjs.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/lib/contextCompaction.d.ts +58 -0
- package/dist/lib/contextCompaction.test.d.ts +1 -0
- package/dist/lib/errorTracking.config.d.ts +2 -0
- package/dist/lib/tools.byte-cap.test.d.ts +1 -0
- package/dist/lib/tools.d.ts +19 -0
- package/dist/lib/tools.test.d.ts +1 -0
- package/dist/plugins/index.d.ts +4 -1
- package/dist/plugins/index.test.d.ts +1 -0
- package/dist/plugins.cjs +1 -1
- package/dist/plugins.js +1 -1
- package/dist/{profiler-Ccma0l1p.js → profiler-DifNjGGB.js} +2 -2
- package/dist/{profiler-Ccma0l1p.js.map → profiler-DifNjGGB.js.map} +1 -1
- package/dist/{profiler-CjNa3A1d.cjs → profiler-KLtVMM14.cjs} +2 -2
- package/dist/{profiler-CjNa3A1d.cjs.map → profiler-KLtVMM14.cjs.map} +1 -1
- package/dist/{startRecording-DAURU74n.js → startRecording-C6xu9UA9.js} +2 -2
- package/dist/{startRecording-DAURU74n.js.map → startRecording-C6xu9UA9.js.map} +1 -1
- package/dist/{startRecording-jSovclaq.cjs → startRecording-YENzw_0G.cjs} +2 -2
- package/dist/{startRecording-jSovclaq.cjs.map → startRecording-YENzw_0G.cjs.map} +1 -1
- package/dist/types/index.d.ts +49 -0
- package/dist/types/plugins.d.ts +5 -0
- package/package.json +2 -2
- package/src/components/MessageContent.parser.ts +39 -0
- package/src/components/MessageContent.test.ts +110 -0
- package/src/components/MessageContent.tsx +82 -0
- package/src/contexts/ElementsProvider.tsx +57 -7
- package/src/index.ts +2 -0
- package/src/lib/contextCompaction.test.ts +201 -0
- package/src/lib/contextCompaction.ts +211 -0
- package/src/lib/errorTracking.config.ts +2 -0
- package/src/lib/errorTracking.ts +1 -1
- package/src/lib/tools.byte-cap.test.ts +132 -0
- package/src/lib/tools.test.ts +259 -0
- package/src/lib/tools.ts +122 -0
- package/src/plugins/chart/index.ts +1 -0
- package/src/plugins/chart/ui/bar-chart.tsx +9 -1
- package/src/plugins/generative-ui/index.ts +1 -0
- package/src/plugins/index.test.ts +62 -0
- package/src/plugins/index.ts +14 -1
- package/src/types/index.ts +55 -0
- package/src/types/plugins.ts +6 -0
- package/dist/index-BmTGnEaV.cjs +0 -190
- package/dist/index-BmTGnEaV.cjs.map +0 -1
- package/dist/index-C4bFBGfl.cjs.map +0 -1
- package/dist/index-CtZz13Cf.js.map +0 -1
- package/dist/index-D93pV0_o.js.map +0 -1
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"use client";
|
|
2
|
+
|
|
3
|
+
import { FC, useMemo } from "react";
|
|
4
|
+
import { ElementsContext } from "@/contexts/contexts";
|
|
5
|
+
import { ToolExecutionProvider } from "@/contexts/ToolExecutionContext";
|
|
6
|
+
import type { ElementsContextType, Model } from "@/types";
|
|
7
|
+
import { recommended } from "@/plugins";
|
|
8
|
+
import { chart } from "@/plugins/chart";
|
|
9
|
+
import { generativeUI } from "@/plugins/generative-ui";
|
|
10
|
+
import { parseSegments } from "./MessageContent.parser";
|
|
11
|
+
|
|
12
|
+
const SUPPORTED_LANGUAGES: Record<string, FC<{ code: string }>> = {
|
|
13
|
+
chart: chart.Component as FC<{ code: string }>,
|
|
14
|
+
ui: generativeUI.Component as FC<{ code: string }>,
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
// Provides only what useDensity()/useElements() read inside the chart and ui
|
|
18
|
+
// renderers — no auth, no MCP, no runtime.
|
|
19
|
+
const STUB_CONTEXT: ElementsContextType = {
|
|
20
|
+
config: { projectSlug: "" },
|
|
21
|
+
setModel: () => {},
|
|
22
|
+
model: "" as Model,
|
|
23
|
+
isExpanded: false,
|
|
24
|
+
setIsExpanded: () => {},
|
|
25
|
+
isOpen: false,
|
|
26
|
+
setIsOpen: () => {},
|
|
27
|
+
plugins: recommended,
|
|
28
|
+
mcpTools: undefined,
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
export interface MessageContentProps {
|
|
32
|
+
/** Raw assistant message content (markdown text optionally containing
|
|
33
|
+
* ```chart and ```ui fenced code blocks). */
|
|
34
|
+
content: string;
|
|
35
|
+
/** Optional className applied to the root container. */
|
|
36
|
+
className?: string;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Standalone renderer for stored chat message content. Recognises the same
|
|
41
|
+
* `chart` and `ui` fenced code blocks that the live `<Chat />` component
|
|
42
|
+
* renders as widgets, but works without an `ElementsProvider`, MCP client,
|
|
43
|
+
* auth session, or assistant-ui runtime.
|
|
44
|
+
*
|
|
45
|
+
* Use in static viewers (agent session detail panel, replay, share) so a
|
|
46
|
+
* stored bar chart appears as a chart instead of as raw JSON. Plain markdown
|
|
47
|
+
* formatting is intentionally not applied — text segments render as
|
|
48
|
+
* preformatted text.
|
|
49
|
+
*/
|
|
50
|
+
export const MessageContent: FC<MessageContentProps> = ({
|
|
51
|
+
content,
|
|
52
|
+
className,
|
|
53
|
+
}) => {
|
|
54
|
+
const segments = useMemo(() => parseSegments(content), [content]);
|
|
55
|
+
|
|
56
|
+
return (
|
|
57
|
+
<ElementsContext.Provider value={STUB_CONTEXT}>
|
|
58
|
+
{/* Empty tools so generative-ui's <ActionButton> renders disabled. */}
|
|
59
|
+
<ToolExecutionProvider tools={{}}>
|
|
60
|
+
<div className={className}>
|
|
61
|
+
{segments.map((seg, i) => {
|
|
62
|
+
if (seg.type === "text") {
|
|
63
|
+
if (seg.text.trim() === "") return null;
|
|
64
|
+
return (
|
|
65
|
+
<div key={i} className="whitespace-pre-wrap">
|
|
66
|
+
{seg.text}
|
|
67
|
+
</div>
|
|
68
|
+
);
|
|
69
|
+
}
|
|
70
|
+
const Component = SUPPORTED_LANGUAGES[seg.lang];
|
|
71
|
+
if (!Component) return null;
|
|
72
|
+
return (
|
|
73
|
+
<div key={i} className="my-2">
|
|
74
|
+
<Component code={seg.code} />
|
|
75
|
+
</div>
|
|
76
|
+
);
|
|
77
|
+
})}
|
|
78
|
+
</div>
|
|
79
|
+
</ToolExecutionProvider>
|
|
80
|
+
</ElementsContext.Provider>
|
|
81
|
+
);
|
|
82
|
+
};
|
|
@@ -15,9 +15,11 @@ import {
|
|
|
15
15
|
setFrontendToolApprovalConfig,
|
|
16
16
|
toAISDKTools,
|
|
17
17
|
wrapToolsWithApproval,
|
|
18
|
+
wrapToolsWithByteCap,
|
|
18
19
|
type ApprovalHelpers,
|
|
19
20
|
type FrontendTool,
|
|
20
21
|
} from "@/lib/tools";
|
|
22
|
+
import { compactForModel } from "@/lib/contextCompaction";
|
|
21
23
|
import { cn } from "@/lib/utils";
|
|
22
24
|
import { recommended } from "@/plugins";
|
|
23
25
|
import { ElementsConfig, Model } from "@/types";
|
|
@@ -37,6 +39,7 @@ import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
|
|
|
37
39
|
import {
|
|
38
40
|
convertToModelMessages,
|
|
39
41
|
createUIMessageStream,
|
|
42
|
+
lastAssistantMessageIsCompleteWithToolCalls,
|
|
40
43
|
LanguageModel,
|
|
41
44
|
smoothStream,
|
|
42
45
|
stepCountIs,
|
|
@@ -188,6 +191,12 @@ const ElementsProviderInner = ({ children, config }: ElementsProviderProps) => {
|
|
|
188
191
|
toolsWithCustomComponents,
|
|
189
192
|
);
|
|
190
193
|
|
|
194
|
+
// Read inside `sendMessages` via ref so prompt changes don't churn the
|
|
195
|
+
// transport useMemo identity. Same pattern as ensureValidHeadersRef /
|
|
196
|
+
// approvalHelpersRef below.
|
|
197
|
+
const systemPromptRef = useRef(systemPrompt);
|
|
198
|
+
systemPromptRef.current = systemPrompt;
|
|
199
|
+
|
|
191
200
|
// Initialize error tracking on mount
|
|
192
201
|
useEffect(() => {
|
|
193
202
|
initErrorTracking({
|
|
@@ -366,12 +375,19 @@ const ElementsProviderInner = ({ children, config }: ElementsProviderProps) => {
|
|
|
366
375
|
} as ToolSet;
|
|
367
376
|
|
|
368
377
|
// Wrap tools that require approval
|
|
369
|
-
const
|
|
378
|
+
const approvedTools = wrapToolsWithApproval(
|
|
370
379
|
combinedTools,
|
|
371
380
|
config.tools?.toolsRequiringApproval,
|
|
372
381
|
getApprovalHelpers(),
|
|
373
382
|
);
|
|
374
383
|
|
|
384
|
+
// Cap oversized tool results so one greedy tool call (e.g. a wide log
|
|
385
|
+
// search) can't fill the context window in a single step.
|
|
386
|
+
const tools = wrapToolsWithByteCap(
|
|
387
|
+
approvedTools,
|
|
388
|
+
config.tools?.maxOutputBytes,
|
|
389
|
+
);
|
|
390
|
+
|
|
375
391
|
// Stream the response
|
|
376
392
|
const modelToUse = config.languageModel
|
|
377
393
|
? config.languageModel
|
|
@@ -387,10 +403,32 @@ const ElementsProviderInner = ({ children, config }: ElementsProviderProps) => {
|
|
|
387
403
|
const nonSystemMessages = cleanedMessages.filter(
|
|
388
404
|
(m) => m.role !== "system",
|
|
389
405
|
);
|
|
390
|
-
const
|
|
406
|
+
const rawModelMessages = convertToModelMessages(nonSystemMessages);
|
|
407
|
+
|
|
408
|
+
// Auto-compact older turns if the estimated input is approaching
|
|
409
|
+
// the model's context window. System prompt + last few turns are
|
|
410
|
+
// always preserved. No-op when the conversation is small.
|
|
411
|
+
const compaction = config.contextCompaction?.disabled
|
|
412
|
+
? {
|
|
413
|
+
messages: rawModelMessages,
|
|
414
|
+
droppedCount: 0,
|
|
415
|
+
estimatedTokensBefore: 0,
|
|
416
|
+
estimatedTokensAfter: 0,
|
|
417
|
+
}
|
|
418
|
+
: compactForModel(rawModelMessages, model, {
|
|
419
|
+
maxTokens: config.contextCompaction?.maxTokens,
|
|
420
|
+
compactAtFraction: config.contextCompaction?.compactAtFraction,
|
|
421
|
+
keepRecent: config.contextCompaction?.keepRecent,
|
|
422
|
+
});
|
|
423
|
+
if (compaction.droppedCount > 0) {
|
|
424
|
+
console.warn(
|
|
425
|
+
`[elements] compacted ${compaction.droppedCount} older turn(s) from ${compaction.estimatedTokensBefore} → ${compaction.estimatedTokensAfter} est. tokens (model ${model})`,
|
|
426
|
+
);
|
|
427
|
+
}
|
|
428
|
+
const modelMessages = compaction.messages;
|
|
391
429
|
|
|
392
430
|
const result = streamText({
|
|
393
|
-
system:
|
|
431
|
+
system: systemPromptRef.current,
|
|
394
432
|
model: modelToUse,
|
|
395
433
|
messages: modelMessages,
|
|
396
434
|
tools,
|
|
@@ -456,8 +494,12 @@ const ElementsProviderInner = ({ children, config }: ElementsProviderProps) => {
|
|
|
456
494
|
[
|
|
457
495
|
config.languageModel,
|
|
458
496
|
config.tools?.toolsRequiringApproval,
|
|
497
|
+
config.tools?.maxOutputBytes,
|
|
498
|
+
config.contextCompaction?.disabled,
|
|
499
|
+
config.contextCompaction?.maxTokens,
|
|
500
|
+
config.contextCompaction?.compactAtFraction,
|
|
501
|
+
config.contextCompaction?.keepRecent,
|
|
459
502
|
model,
|
|
460
|
-
systemPrompt,
|
|
461
503
|
mcpTools,
|
|
462
504
|
getApprovalHelpers,
|
|
463
505
|
apiUrl,
|
|
@@ -606,9 +648,14 @@ const ElementsProviderWithHistory = ({
|
|
|
606
648
|
});
|
|
607
649
|
const initialThreadId = contextValue?.config.history?.initialThreadId;
|
|
608
650
|
|
|
609
|
-
//
|
|
651
|
+
// Without `sendAutomaticallyWhen`, client-side frontend tools leave the turn
|
|
652
|
+
// half-finished: the tool-result is patched in but the agent never resumes,
|
|
653
|
+
// so the next user message lands on top of an unresolved tool-call sequence.
|
|
610
654
|
const useChatRuntimeHook = useCallback(() => {
|
|
611
|
-
return useChatRuntime({
|
|
655
|
+
return useChatRuntime({
|
|
656
|
+
transport,
|
|
657
|
+
sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls,
|
|
658
|
+
});
|
|
612
659
|
}, [transport]);
|
|
613
660
|
|
|
614
661
|
const runtime = useRemoteThreadListRuntime({
|
|
@@ -691,7 +738,10 @@ const ElementsProviderWithoutHistory = ({
|
|
|
691
738
|
executableTools,
|
|
692
739
|
currentChatId,
|
|
693
740
|
}: ElementsProviderWithoutHistoryProps) => {
|
|
694
|
-
const runtime = useChatRuntime({
|
|
741
|
+
const runtime = useChatRuntime({
|
|
742
|
+
transport,
|
|
743
|
+
sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls,
|
|
744
|
+
});
|
|
695
745
|
|
|
696
746
|
// Populate runtimeRef so transport can access thread context
|
|
697
747
|
useEffect(() => {
|
package/src/index.ts
CHANGED
|
@@ -18,6 +18,8 @@ export { ChatHistory } from "@/components/ChatHistory";
|
|
|
18
18
|
export { ShareButton } from "@/components/ShareButton";
|
|
19
19
|
export type { ShareButtonProps } from "@/components/ShareButton";
|
|
20
20
|
export { ToolFallback } from "@/components/assistant-ui/tool-fallback";
|
|
21
|
+
export { MessageContent } from "@/components/MessageContent";
|
|
22
|
+
export type { MessageContentProps } from "@/components/MessageContent";
|
|
21
23
|
|
|
22
24
|
// Replay
|
|
23
25
|
export { Replay } from "@/components/Replay";
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import type { ModelMessage } from "ai";
|
|
3
|
+
import {
|
|
4
|
+
compactBySlidingWindow,
|
|
5
|
+
compactForModel,
|
|
6
|
+
DEFAULT_CONTEXT_LIMIT,
|
|
7
|
+
estimateTokens,
|
|
8
|
+
getModelContextLimit,
|
|
9
|
+
} from "./contextCompaction";
|
|
10
|
+
|
|
11
|
+
function msg(
|
|
12
|
+
role: "system" | "user" | "assistant" | "tool",
|
|
13
|
+
content: string,
|
|
14
|
+
): ModelMessage {
|
|
15
|
+
return { role, content } as ModelMessage;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
describe("estimateTokens", () => {
|
|
19
|
+
it("returns roughly chars/4", () => {
|
|
20
|
+
const messages = [msg("user", "a".repeat(400))];
|
|
21
|
+
const n = estimateTokens(messages);
|
|
22
|
+
// Actual output is JSON-wrapped so it's slightly larger than 100
|
|
23
|
+
expect(n).toBeGreaterThan(100);
|
|
24
|
+
expect(n).toBeLessThan(200);
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
it("grows with message count", () => {
|
|
28
|
+
const one = estimateTokens([msg("user", "hello")]);
|
|
29
|
+
const many = estimateTokens(
|
|
30
|
+
Array.from({ length: 100 }, () => msg("user", "hello")),
|
|
31
|
+
);
|
|
32
|
+
expect(many).toBeGreaterThan(one * 50);
|
|
33
|
+
});
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
describe("getModelContextLimit", () => {
|
|
37
|
+
it("returns known mapping for Sonnet 4.6", () => {
|
|
38
|
+
expect(getModelContextLimit("anthropic/claude-sonnet-4.6")).toBe(1_000_000);
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
it("returns known mapping for Claude 4 (non-1M)", () => {
|
|
42
|
+
expect(getModelContextLimit("anthropic/claude-sonnet-4")).toBe(200_000);
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it("returns DEFAULT_CONTEXT_LIMIT for unknown models", () => {
|
|
46
|
+
expect(getModelContextLimit("acme/very-new-model")).toBe(
|
|
47
|
+
DEFAULT_CONTEXT_LIMIT,
|
|
48
|
+
);
|
|
49
|
+
});
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
describe("compactBySlidingWindow", () => {
|
|
53
|
+
it("no-ops when under the limit", () => {
|
|
54
|
+
const messages = [msg("user", "hi"), msg("assistant", "hello")];
|
|
55
|
+
const result = compactBySlidingWindow(messages, 1_000_000);
|
|
56
|
+
expect(result.droppedCount).toBe(0);
|
|
57
|
+
expect(result.messages).toBe(messages);
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it("drops oldest non-system turns to fit", () => {
|
|
61
|
+
// 10 bulky messages, tiny limit → forces dropping
|
|
62
|
+
const messages: ModelMessage[] = [];
|
|
63
|
+
for (let i = 0; i < 10; i++) {
|
|
64
|
+
messages.push(msg("user", `query-${i} ` + "x".repeat(400)));
|
|
65
|
+
messages.push(msg("assistant", `reply-${i} ` + "y".repeat(400)));
|
|
66
|
+
}
|
|
67
|
+
const maxTokens = 500;
|
|
68
|
+
const result = compactBySlidingWindow(messages, maxTokens, 4);
|
|
69
|
+
expect(result.droppedCount).toBeGreaterThan(0);
|
|
70
|
+
expect(result.estimatedTokensAfter).toBeLessThanOrEqual(
|
|
71
|
+
result.estimatedTokensBefore,
|
|
72
|
+
);
|
|
73
|
+
// Last 4 are preserved verbatim
|
|
74
|
+
const tail = result.messages.slice(-4);
|
|
75
|
+
expect(tail[tail.length - 1]).toEqual(messages[messages.length - 1]);
|
|
76
|
+
// Marker prepended
|
|
77
|
+
const markerPresent = result.messages.some(
|
|
78
|
+
(m) => typeof m.content === "string" && m.content.includes("omitted"),
|
|
79
|
+
);
|
|
80
|
+
expect(markerPresent).toBe(true);
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it("always preserves system messages", () => {
|
|
84
|
+
const messages: ModelMessage[] = [
|
|
85
|
+
msg("system", "sys " + "s".repeat(1000)),
|
|
86
|
+
...Array.from({ length: 20 }, (_, i) =>
|
|
87
|
+
msg("user", `q-${i} ` + "x".repeat(500)),
|
|
88
|
+
),
|
|
89
|
+
];
|
|
90
|
+
const result = compactBySlidingWindow(messages, 300, 2);
|
|
91
|
+
expect(result.droppedCount).toBeGreaterThan(0);
|
|
92
|
+
expect(result.messages[0]!.role).toBe("system");
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it("preserves at least keepRecent messages even if over limit", () => {
|
|
96
|
+
const messages = Array.from({ length: 10 }, (_, i) =>
|
|
97
|
+
msg("user", "x".repeat(1000) + `-${i}`),
|
|
98
|
+
);
|
|
99
|
+
const result = compactBySlidingWindow(messages, 10, 3);
|
|
100
|
+
// keepRecent preserved even though we can't get under the limit
|
|
101
|
+
expect(result.messages.length).toBeGreaterThanOrEqual(3);
|
|
102
|
+
// Last 3 are intact
|
|
103
|
+
const tail = result.messages.slice(-3);
|
|
104
|
+
expect(tail).toEqual(messages.slice(-3));
|
|
105
|
+
});
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
describe("compactBySlidingWindow — tool message pairing", () => {
|
|
109
|
+
it("never leaves a tool message at the head of the retained window", () => {
|
|
110
|
+
// Scenario from Devin: dropping oldest-first could split an
|
|
111
|
+
// assistant(tool_calls) → tool pair, leaving an orphan tool at the
|
|
112
|
+
// head of the retained set. Providers reject this with a 400.
|
|
113
|
+
const messages: ModelMessage[] = [
|
|
114
|
+
msg("user", "q1 " + "x".repeat(400)),
|
|
115
|
+
msg("assistant", "a1-with-tool-call " + "x".repeat(400)),
|
|
116
|
+
msg("tool", "t1-result " + "x".repeat(400)),
|
|
117
|
+
msg("assistant", "a1-final " + "x".repeat(400)),
|
|
118
|
+
msg("user", "q2 " + "x".repeat(400)),
|
|
119
|
+
msg("assistant", "a2-with-tool-call " + "x".repeat(400)),
|
|
120
|
+
msg("tool", "t2-result " + "x".repeat(400)),
|
|
121
|
+
msg("assistant", "a2-final " + "x".repeat(400)),
|
|
122
|
+
];
|
|
123
|
+
|
|
124
|
+
const result = compactBySlidingWindow(messages, 400, 4);
|
|
125
|
+
expect(result.droppedCount).toBeGreaterThan(0);
|
|
126
|
+
|
|
127
|
+
// The retained non-system messages should never start with a tool.
|
|
128
|
+
const nonSystem = result.messages.filter((m) => m.role !== "system");
|
|
129
|
+
// Skip the synthetic assistant marker if present.
|
|
130
|
+
const firstReal = nonSystem.find(
|
|
131
|
+
(m) =>
|
|
132
|
+
!(
|
|
133
|
+
m.role === "assistant" &&
|
|
134
|
+
typeof m.content === "string" &&
|
|
135
|
+
m.content.includes("omitted")
|
|
136
|
+
),
|
|
137
|
+
);
|
|
138
|
+
expect(firstReal?.role).not.toBe("tool");
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
it("drops an assistant+tool pair atomically (not one without the other)", () => {
|
|
142
|
+
const messages: ModelMessage[] = [
|
|
143
|
+
msg("user", "old"),
|
|
144
|
+
msg("assistant", "calling tool"),
|
|
145
|
+
msg("tool", "result " + "x".repeat(2000)),
|
|
146
|
+
msg("user", "recent " + "x".repeat(200)),
|
|
147
|
+
msg("assistant", "recent reply " + "x".repeat(200)),
|
|
148
|
+
];
|
|
149
|
+
const result = compactBySlidingWindow(messages, 300, 2);
|
|
150
|
+
// If the group was dropped atomically, both the assistant and its tool
|
|
151
|
+
// are gone together. If the bug was still present, we'd see the tool
|
|
152
|
+
// message lingering alone.
|
|
153
|
+
const nonSystem = result.messages.filter((m) => m.role !== "system");
|
|
154
|
+
const hasLoneTool = nonSystem.some(
|
|
155
|
+
(m, i) =>
|
|
156
|
+
m.role === "tool" && (i === 0 || nonSystem[i - 1]!.role === "user"),
|
|
157
|
+
);
|
|
158
|
+
expect(hasLoneTool).toBe(false);
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
it("does not split a tool group when aligning the recent window", () => {
|
|
162
|
+
// keepRecent=3 would cut mid-group with naive slicing. Grouping should
|
|
163
|
+
// expand the recent window to keep the assistant+tools together.
|
|
164
|
+
const messages: ModelMessage[] = [
|
|
165
|
+
msg("user", "old " + "x".repeat(1000)),
|
|
166
|
+
msg("assistant", "calling 2 tools"),
|
|
167
|
+
msg("tool", "result1"),
|
|
168
|
+
msg("tool", "result2"),
|
|
169
|
+
msg("assistant", "final"),
|
|
170
|
+
];
|
|
171
|
+
const result = compactBySlidingWindow(messages, 200, 3);
|
|
172
|
+
// If the first tool was the "recent" cut-off, we'd see a tool at
|
|
173
|
+
// the head of retained — but grouping should have pulled the
|
|
174
|
+
// assistant with it.
|
|
175
|
+
const kept = result.messages.filter((m) => m.role !== "system");
|
|
176
|
+
const firstTool = kept.findIndex((m) => m.role === "tool");
|
|
177
|
+
if (firstTool !== -1) {
|
|
178
|
+
expect(kept[firstTool - 1]?.role).toMatch(/assistant|tool/);
|
|
179
|
+
}
|
|
180
|
+
});
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
describe("compactForModel", () => {
|
|
184
|
+
it("uses 70% of the nominal ceiling by default", () => {
|
|
185
|
+
const small = [msg("user", "hi")];
|
|
186
|
+
const result = compactForModel(small, "anthropic/claude-sonnet-4.6");
|
|
187
|
+
expect(result.droppedCount).toBe(0);
|
|
188
|
+
expect(result.messages).toBe(small);
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
it("honors explicit maxTokens override", () => {
|
|
192
|
+
const messages = Array.from({ length: 30 }, (_, i) =>
|
|
193
|
+
msg("user", "x".repeat(500) + `-${i}`),
|
|
194
|
+
);
|
|
195
|
+
const result = compactForModel(messages, "anthropic/claude-sonnet-4.6", {
|
|
196
|
+
maxTokens: 2000,
|
|
197
|
+
keepRecent: 2,
|
|
198
|
+
});
|
|
199
|
+
expect(result.droppedCount).toBeGreaterThan(0);
|
|
200
|
+
});
|
|
201
|
+
});
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
import type { ModelMessage } from "ai";
|
|
2
|
+
import { MODELS } from "./models";
|
|
3
|
+
|
|
4
|
+
type KnownModelId = (typeof MODELS)[number];
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Fraction-of-limit at which compaction kicks in. Below this, messages pass
|
|
8
|
+
* through untouched; above this, oldest non-system turns are dropped until
|
|
9
|
+
* the estimated token count is back under the threshold.
|
|
10
|
+
*/
|
|
11
|
+
export const DEFAULT_COMPACTION_FRACTION = 0.7;
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Number of most-recent messages preserved verbatim, even if the conversation
|
|
15
|
+
* is already over the limit. Ensures the assistant always has the latest turn
|
|
16
|
+
* and its immediate predecessor.
|
|
17
|
+
*/
|
|
18
|
+
export const DEFAULT_KEEP_RECENT = 4;
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Conservative fallback when we encounter a model we haven't mapped — big
|
|
22
|
+
* enough to be useful for unknown models, small enough to still trigger
|
|
23
|
+
* compaction before hitting upstream 400s.
|
|
24
|
+
*/
|
|
25
|
+
export const DEFAULT_CONTEXT_LIMIT = 200_000;
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Known input-token ceilings per model (nominal upstream maximum). Keyed by
|
|
29
|
+
* MODELS so TypeScript catches drift — adding a model id here that isn't in
|
|
30
|
+
* MODELS, or misspelling an id, is a compile error. Coverage is intentionally
|
|
31
|
+
* partial: models without an explicit entry fall back to DEFAULT_CONTEXT_LIMIT.
|
|
32
|
+
*/
|
|
33
|
+
const MODEL_CONTEXT_LIMITS: Partial<Record<KnownModelId, number>> = {
|
|
34
|
+
// Anthropic (1M tier where available, else 200K)
|
|
35
|
+
"anthropic/claude-opus-4.6": 1_000_000,
|
|
36
|
+
"anthropic/claude-opus-4.5": 1_000_000,
|
|
37
|
+
"anthropic/claude-opus-4.1": 200_000,
|
|
38
|
+
"anthropic/claude-sonnet-4.6": 1_000_000,
|
|
39
|
+
"anthropic/claude-sonnet-4.5": 1_000_000,
|
|
40
|
+
"anthropic/claude-sonnet-4": 200_000,
|
|
41
|
+
"anthropic/claude-haiku-4.5": 200_000,
|
|
42
|
+
|
|
43
|
+
// OpenAI
|
|
44
|
+
"openai/gpt-5.4": 400_000,
|
|
45
|
+
"openai/gpt-5.4-mini": 400_000,
|
|
46
|
+
"openai/gpt-5.1": 400_000,
|
|
47
|
+
"openai/gpt-5.1-codex": 400_000,
|
|
48
|
+
"openai/gpt-5": 400_000,
|
|
49
|
+
"openai/gpt-4.1": 1_000_000,
|
|
50
|
+
"openai/o4-mini": 200_000,
|
|
51
|
+
"openai/o3": 200_000,
|
|
52
|
+
|
|
53
|
+
// Google
|
|
54
|
+
"google/gemini-3.1-pro-preview": 1_000_000,
|
|
55
|
+
"google/gemini-2.5-pro": 1_000_000,
|
|
56
|
+
"google/gemini-2.5-flash": 1_000_000,
|
|
57
|
+
|
|
58
|
+
// Others
|
|
59
|
+
"deepseek/deepseek-r1": 128_000,
|
|
60
|
+
"deepseek/deepseek-v3.2": 128_000,
|
|
61
|
+
"meta-llama/llama-4-maverick": 1_000_000,
|
|
62
|
+
"x-ai/grok-4": 256_000,
|
|
63
|
+
"qwen/qwen3-coder": 256_000,
|
|
64
|
+
"moonshotai/kimi-k2.5": 128_000,
|
|
65
|
+
"mistralai/mistral-medium-3.1": 128_000,
|
|
66
|
+
"mistralai/codestral-2508": 256_000,
|
|
67
|
+
"mistralai/devstral-small": 128_000,
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Returns the input-token ceiling for a given OpenRouter model id, or
|
|
72
|
+
* DEFAULT_CONTEXT_LIMIT if unknown.
|
|
73
|
+
*/
|
|
74
|
+
export function getModelContextLimit(modelId: string): number {
|
|
75
|
+
return MODEL_CONTEXT_LIMITS[modelId as KnownModelId] ?? DEFAULT_CONTEXT_LIMIT;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Rough input-token estimate using a chars/4 heuristic on the JSON serialized
|
|
80
|
+
* conversation. Tokens-per-char varies by model and content, but a chars/4
|
|
81
|
+
* heuristic matches OpenAI's rule-of-thumb within ~15% for English prose and
|
|
82
|
+
* is deterministic + zero-cost — good enough to trigger compaction.
|
|
83
|
+
*/
|
|
84
|
+
export function estimateTokens(messages: ModelMessage[]): number {
|
|
85
|
+
const serialized = JSON.stringify(messages);
|
|
86
|
+
return Math.ceil(serialized.length / 4);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
export interface CompactionResult {
|
|
90
|
+
messages: ModelMessage[];
|
|
91
|
+
droppedCount: number;
|
|
92
|
+
estimatedTokensBefore: number;
|
|
93
|
+
estimatedTokensAfter: number;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Drops oldest non-system messages until the estimated token count is under
|
|
98
|
+
* maxTokens. Always preserves the last `keepRecent` messages and any system
|
|
99
|
+
* role messages. When any messages are dropped, prepends a synthetic assistant
|
|
100
|
+
* note so the model knows earlier context was elided.
|
|
101
|
+
*/
|
|
102
|
+
export function compactBySlidingWindow(
|
|
103
|
+
messages: ModelMessage[],
|
|
104
|
+
maxTokens: number,
|
|
105
|
+
keepRecent: number = DEFAULT_KEEP_RECENT,
|
|
106
|
+
): CompactionResult {
|
|
107
|
+
const estimatedTokensBefore = estimateTokens(messages);
|
|
108
|
+
|
|
109
|
+
if (estimatedTokensBefore <= maxTokens || messages.length <= keepRecent) {
|
|
110
|
+
return {
|
|
111
|
+
messages,
|
|
112
|
+
droppedCount: 0,
|
|
113
|
+
estimatedTokensBefore,
|
|
114
|
+
estimatedTokensAfter: estimatedTokensBefore,
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const systemMessages = messages.filter((m) => m.role === "system");
|
|
119
|
+
const nonSystem = messages.filter((m) => m.role !== "system");
|
|
120
|
+
|
|
121
|
+
// Group consecutive `tool` messages with the assistant message that
|
|
122
|
+
// precedes them. OpenAI-compatible providers require every tool-result
|
|
123
|
+
// message to be immediately preceded by the assistant message holding its
|
|
124
|
+
// tool_calls — splitting these produces an invalid conversation that
|
|
125
|
+
// providers reject with a 400. Grouping ensures we drop or keep the
|
|
126
|
+
// full assistant+tools unit atomically.
|
|
127
|
+
const groups: ModelMessage[][] = [];
|
|
128
|
+
for (const m of nonSystem) {
|
|
129
|
+
if (m.role === "tool" && groups.length > 0) {
|
|
130
|
+
groups[groups.length - 1]!.push(m);
|
|
131
|
+
} else {
|
|
132
|
+
groups.push([m]);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Reserve the trailing groups that together contain at least `keepRecent`
|
|
137
|
+
// messages. Using groups (not raw messages) keeps assistant+tool pairs
|
|
138
|
+
// intact at the boundary between retained and dropped.
|
|
139
|
+
let recentMsgCount = 0;
|
|
140
|
+
let recentStart = groups.length;
|
|
141
|
+
while (recentStart > 0 && recentMsgCount < keepRecent) {
|
|
142
|
+
recentStart -= 1;
|
|
143
|
+
recentMsgCount += groups[recentStart]!.length;
|
|
144
|
+
}
|
|
145
|
+
const recentGroups = groups.slice(recentStart);
|
|
146
|
+
const droppableGroups = groups.slice(0, recentStart);
|
|
147
|
+
|
|
148
|
+
let droppedCount = 0;
|
|
149
|
+
let working = [
|
|
150
|
+
...systemMessages,
|
|
151
|
+
...droppableGroups.flat(),
|
|
152
|
+
...recentGroups.flat(),
|
|
153
|
+
];
|
|
154
|
+
|
|
155
|
+
while (droppableGroups.length > 0 && estimateTokens(working) > maxTokens) {
|
|
156
|
+
const droppedGroup = droppableGroups.shift()!;
|
|
157
|
+
droppedCount += droppedGroup.length;
|
|
158
|
+
working = [
|
|
159
|
+
...systemMessages,
|
|
160
|
+
...droppableGroups.flat(),
|
|
161
|
+
...recentGroups.flat(),
|
|
162
|
+
];
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
if (droppedCount > 0) {
|
|
166
|
+
const marker: ModelMessage = {
|
|
167
|
+
role: "assistant",
|
|
168
|
+
content: `[${droppedCount} earlier message${
|
|
169
|
+
droppedCount === 1 ? "" : "s"
|
|
170
|
+
} omitted to stay under context length. If the user asks about them, say you no longer have that context and suggest they restate the relevant details.]`,
|
|
171
|
+
};
|
|
172
|
+
working = [
|
|
173
|
+
...systemMessages,
|
|
174
|
+
marker,
|
|
175
|
+
...droppableGroups.flat(),
|
|
176
|
+
...recentGroups.flat(),
|
|
177
|
+
];
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
return {
|
|
181
|
+
messages: working,
|
|
182
|
+
droppedCount,
|
|
183
|
+
estimatedTokensBefore,
|
|
184
|
+
estimatedTokensAfter: estimateTokens(working),
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
export interface CompactionOptions {
|
|
189
|
+
/** Override the model's nominal input ceiling. */
|
|
190
|
+
maxTokens?: number;
|
|
191
|
+
/** Fraction of maxTokens at which compaction kicks in. */
|
|
192
|
+
compactAtFraction?: number;
|
|
193
|
+
/** Most-recent messages preserved verbatim. */
|
|
194
|
+
keepRecent?: number;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Convenience wrapper that picks the model ceiling, applies compactAtFraction,
|
|
199
|
+
* and runs compactBySlidingWindow. Returns the (possibly unchanged) messages
|
|
200
|
+
* plus diagnostics.
|
|
201
|
+
*/
|
|
202
|
+
export function compactForModel(
|
|
203
|
+
messages: ModelMessage[],
|
|
204
|
+
modelId: string,
|
|
205
|
+
opts: CompactionOptions = {},
|
|
206
|
+
): CompactionResult {
|
|
207
|
+
const ceiling = opts.maxTokens ?? getModelContextLimit(modelId);
|
|
208
|
+
const fraction = opts.compactAtFraction ?? DEFAULT_COMPACTION_FRACTION;
|
|
209
|
+
const limit = Math.floor(ceiling * fraction);
|
|
210
|
+
return compactBySlidingWindow(messages, limit, opts.keepRecent);
|
|
211
|
+
}
|
|
@@ -7,10 +7,12 @@
|
|
|
7
7
|
* - VITE_DATADOG_APPLICATION_ID
|
|
8
8
|
* - VITE_DATADOG_CLIENT_TOKEN
|
|
9
9
|
* - VITE_DATADOG_SITE (optional, defaults to datadoghq.com)
|
|
10
|
+
* - VITE_DATADOG_ENV (optional, defaults to prod)
|
|
10
11
|
*/
|
|
11
12
|
export const DATADOG_CONFIG = {
|
|
12
13
|
applicationId: import.meta.env.VITE_DATADOG_APPLICATION_ID ?? "",
|
|
13
14
|
clientToken: import.meta.env.VITE_DATADOG_CLIENT_TOKEN ?? "",
|
|
14
15
|
site: import.meta.env.VITE_DATADOG_SITE ?? "datadoghq.com",
|
|
16
|
+
env: import.meta.env.VITE_DATADOG_ENV ?? "prod",
|
|
15
17
|
service: "gram-elements",
|
|
16
18
|
} as const;
|
package/src/lib/errorTracking.ts
CHANGED
|
@@ -44,7 +44,7 @@ export function initErrorTracking(config: ErrorTrackingConfig = {}): void {
|
|
|
44
44
|
clientToken: DATADOG_CONFIG.clientToken,
|
|
45
45
|
site: DATADOG_CONFIG.site,
|
|
46
46
|
service: DATADOG_CONFIG.service,
|
|
47
|
-
env:
|
|
47
|
+
env: DATADOG_CONFIG.env,
|
|
48
48
|
sessionSampleRate: 100,
|
|
49
49
|
sessionReplaySampleRate: 100,
|
|
50
50
|
trackUserInteractions: true,
|