@gram-ai/elements 1.28.0 → 1.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/dist/components/MessageContent.d.ts +20 -0
  2. package/dist/components/MessageContent.parser.d.ts +12 -0
  3. package/dist/components/MessageContent.test.d.ts +1 -0
  4. package/dist/elements.cjs +1 -1
  5. package/dist/elements.css +1 -1
  6. package/dist/elements.js +14 -13
  7. package/dist/{index-C4bFBGfl.cjs → index-COzPF-WM.cjs} +45 -45
  8. package/dist/index-COzPF-WM.cjs.map +1 -0
  9. package/dist/{index-D93pV0_o.js → index-CRhpKl-G.js} +5218 -5201
  10. package/dist/index-CRhpKl-G.js.map +1 -0
  11. package/dist/{index-CtZz13Cf.js → index-QUz5guSg.js} +11835 -11604
  12. package/dist/index-QUz5guSg.js.map +1 -0
  13. package/dist/index-fVcTljYT.cjs +194 -0
  14. package/dist/index-fVcTljYT.cjs.map +1 -0
  15. package/dist/index.d.ts +2 -0
  16. package/dist/lib/contextCompaction.d.ts +58 -0
  17. package/dist/lib/contextCompaction.test.d.ts +1 -0
  18. package/dist/lib/errorTracking.config.d.ts +2 -0
  19. package/dist/lib/tools.byte-cap.test.d.ts +1 -0
  20. package/dist/lib/tools.d.ts +19 -0
  21. package/dist/lib/tools.test.d.ts +1 -0
  22. package/dist/plugins/index.d.ts +4 -1
  23. package/dist/plugins/index.test.d.ts +1 -0
  24. package/dist/plugins.cjs +1 -1
  25. package/dist/plugins.js +1 -1
  26. package/dist/{profiler-Ccma0l1p.js → profiler-DifNjGGB.js} +2 -2
  27. package/dist/{profiler-Ccma0l1p.js.map → profiler-DifNjGGB.js.map} +1 -1
  28. package/dist/{profiler-CjNa3A1d.cjs → profiler-KLtVMM14.cjs} +2 -2
  29. package/dist/{profiler-CjNa3A1d.cjs.map → profiler-KLtVMM14.cjs.map} +1 -1
  30. package/dist/{startRecording-DAURU74n.js → startRecording-C6xu9UA9.js} +2 -2
  31. package/dist/{startRecording-DAURU74n.js.map → startRecording-C6xu9UA9.js.map} +1 -1
  32. package/dist/{startRecording-jSovclaq.cjs → startRecording-YENzw_0G.cjs} +2 -2
  33. package/dist/{startRecording-jSovclaq.cjs.map → startRecording-YENzw_0G.cjs.map} +1 -1
  34. package/dist/types/index.d.ts +49 -0
  35. package/dist/types/plugins.d.ts +5 -0
  36. package/package.json +2 -2
  37. package/src/components/MessageContent.parser.ts +39 -0
  38. package/src/components/MessageContent.test.ts +110 -0
  39. package/src/components/MessageContent.tsx +82 -0
  40. package/src/contexts/ElementsProvider.tsx +57 -7
  41. package/src/index.ts +2 -0
  42. package/src/lib/contextCompaction.test.ts +201 -0
  43. package/src/lib/contextCompaction.ts +211 -0
  44. package/src/lib/errorTracking.config.ts +2 -0
  45. package/src/lib/errorTracking.ts +1 -1
  46. package/src/lib/tools.byte-cap.test.ts +132 -0
  47. package/src/lib/tools.test.ts +259 -0
  48. package/src/lib/tools.ts +122 -0
  49. package/src/plugins/chart/index.ts +1 -0
  50. package/src/plugins/chart/ui/bar-chart.tsx +9 -1
  51. package/src/plugins/generative-ui/index.ts +1 -0
  52. package/src/plugins/index.test.ts +62 -0
  53. package/src/plugins/index.ts +14 -1
  54. package/src/types/index.ts +55 -0
  55. package/src/types/plugins.ts +6 -0
  56. package/dist/index-BmTGnEaV.cjs +0 -190
  57. package/dist/index-BmTGnEaV.cjs.map +0 -1
  58. package/dist/index-C4bFBGfl.cjs.map +0 -1
  59. package/dist/index-CtZz13Cf.js.map +0 -1
  60. package/dist/index-D93pV0_o.js.map +0 -1
@@ -0,0 +1,82 @@
1
+ "use client";
2
+
3
+ import { FC, useMemo } from "react";
4
+ import { ElementsContext } from "@/contexts/contexts";
5
+ import { ToolExecutionProvider } from "@/contexts/ToolExecutionContext";
6
+ import type { ElementsContextType, Model } from "@/types";
7
+ import { recommended } from "@/plugins";
8
+ import { chart } from "@/plugins/chart";
9
+ import { generativeUI } from "@/plugins/generative-ui";
10
+ import { parseSegments } from "./MessageContent.parser";
11
+
12
+ const SUPPORTED_LANGUAGES: Record<string, FC<{ code: string }>> = {
13
+ chart: chart.Component as FC<{ code: string }>,
14
+ ui: generativeUI.Component as FC<{ code: string }>,
15
+ };
16
+
17
+ // Provides only what useDensity()/useElements() read inside the chart and ui
18
+ // renderers — no auth, no MCP, no runtime.
19
+ const STUB_CONTEXT: ElementsContextType = {
20
+ config: { projectSlug: "" },
21
+ setModel: () => {},
22
+ model: "" as Model,
23
+ isExpanded: false,
24
+ setIsExpanded: () => {},
25
+ isOpen: false,
26
+ setIsOpen: () => {},
27
+ plugins: recommended,
28
+ mcpTools: undefined,
29
+ };
30
+
31
+ export interface MessageContentProps {
32
+ /** Raw assistant message content (markdown text optionally containing
33
+ * ```chart and ```ui fenced code blocks). */
34
+ content: string;
35
+ /** Optional className applied to the root container. */
36
+ className?: string;
37
+ }
38
+
39
+ /**
40
+ * Standalone renderer for stored chat message content. Recognises the same
41
+ * `chart` and `ui` fenced code blocks that the live `<Chat />` component
42
+ * renders as widgets, but works without an `ElementsProvider`, MCP client,
43
+ * auth session, or assistant-ui runtime.
44
+ *
45
+ * Use in static viewers (agent session detail panel, replay, share) so a
46
+ * stored bar chart appears as a chart instead of as raw JSON. Plain markdown
47
+ * formatting is intentionally not applied — text segments render as
48
+ * preformatted text.
49
+ */
50
+ export const MessageContent: FC<MessageContentProps> = ({
51
+ content,
52
+ className,
53
+ }) => {
54
+ const segments = useMemo(() => parseSegments(content), [content]);
55
+
56
+ return (
57
+ <ElementsContext.Provider value={STUB_CONTEXT}>
58
+ {/* Empty tools so generative-ui's <ActionButton> renders disabled. */}
59
+ <ToolExecutionProvider tools={{}}>
60
+ <div className={className}>
61
+ {segments.map((seg, i) => {
62
+ if (seg.type === "text") {
63
+ if (seg.text.trim() === "") return null;
64
+ return (
65
+ <div key={i} className="whitespace-pre-wrap">
66
+ {seg.text}
67
+ </div>
68
+ );
69
+ }
70
+ const Component = SUPPORTED_LANGUAGES[seg.lang];
71
+ if (!Component) return null;
72
+ return (
73
+ <div key={i} className="my-2">
74
+ <Component code={seg.code} />
75
+ </div>
76
+ );
77
+ })}
78
+ </div>
79
+ </ToolExecutionProvider>
80
+ </ElementsContext.Provider>
81
+ );
82
+ };
@@ -15,9 +15,11 @@ import {
15
15
  setFrontendToolApprovalConfig,
16
16
  toAISDKTools,
17
17
  wrapToolsWithApproval,
18
+ wrapToolsWithByteCap,
18
19
  type ApprovalHelpers,
19
20
  type FrontendTool,
20
21
  } from "@/lib/tools";
22
+ import { compactForModel } from "@/lib/contextCompaction";
21
23
  import { cn } from "@/lib/utils";
22
24
  import { recommended } from "@/plugins";
23
25
  import { ElementsConfig, Model } from "@/types";
@@ -37,6 +39,7 @@ import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
37
39
  import {
38
40
  convertToModelMessages,
39
41
  createUIMessageStream,
42
+ lastAssistantMessageIsCompleteWithToolCalls,
40
43
  LanguageModel,
41
44
  smoothStream,
42
45
  stepCountIs,
@@ -188,6 +191,12 @@ const ElementsProviderInner = ({ children, config }: ElementsProviderProps) => {
188
191
  toolsWithCustomComponents,
189
192
  );
190
193
 
194
+ // Read inside `sendMessages` via ref so prompt changes don't churn the
195
+ // transport useMemo identity. Same pattern as ensureValidHeadersRef /
196
+ // approvalHelpersRef below.
197
+ const systemPromptRef = useRef(systemPrompt);
198
+ systemPromptRef.current = systemPrompt;
199
+
191
200
  // Initialize error tracking on mount
192
201
  useEffect(() => {
193
202
  initErrorTracking({
@@ -366,12 +375,19 @@ const ElementsProviderInner = ({ children, config }: ElementsProviderProps) => {
366
375
  } as ToolSet;
367
376
 
368
377
  // Wrap tools that require approval
369
- const tools = wrapToolsWithApproval(
378
+ const approvedTools = wrapToolsWithApproval(
370
379
  combinedTools,
371
380
  config.tools?.toolsRequiringApproval,
372
381
  getApprovalHelpers(),
373
382
  );
374
383
 
384
+ // Cap oversized tool results so one greedy tool call (e.g. a wide log
385
+ // search) can't fill the context window in a single step.
386
+ const tools = wrapToolsWithByteCap(
387
+ approvedTools,
388
+ config.tools?.maxOutputBytes,
389
+ );
390
+
375
391
  // Stream the response
376
392
  const modelToUse = config.languageModel
377
393
  ? config.languageModel
@@ -387,10 +403,32 @@ const ElementsProviderInner = ({ children, config }: ElementsProviderProps) => {
387
403
  const nonSystemMessages = cleanedMessages.filter(
388
404
  (m) => m.role !== "system",
389
405
  );
390
- const modelMessages = convertToModelMessages(nonSystemMessages);
406
+ const rawModelMessages = convertToModelMessages(nonSystemMessages);
407
+
408
+ // Auto-compact older turns if the estimated input is approaching
409
+ // the model's context window. System prompt + last few turns are
410
+ // always preserved. No-op when the conversation is small.
411
+ const compaction = config.contextCompaction?.disabled
412
+ ? {
413
+ messages: rawModelMessages,
414
+ droppedCount: 0,
415
+ estimatedTokensBefore: 0,
416
+ estimatedTokensAfter: 0,
417
+ }
418
+ : compactForModel(rawModelMessages, model, {
419
+ maxTokens: config.contextCompaction?.maxTokens,
420
+ compactAtFraction: config.contextCompaction?.compactAtFraction,
421
+ keepRecent: config.contextCompaction?.keepRecent,
422
+ });
423
+ if (compaction.droppedCount > 0) {
424
+ console.warn(
425
+ `[elements] compacted ${compaction.droppedCount} older turn(s) from ${compaction.estimatedTokensBefore} → ${compaction.estimatedTokensAfter} est. tokens (model ${model})`,
426
+ );
427
+ }
428
+ const modelMessages = compaction.messages;
391
429
 
392
430
  const result = streamText({
393
- system: systemPrompt,
431
+ system: systemPromptRef.current,
394
432
  model: modelToUse,
395
433
  messages: modelMessages,
396
434
  tools,
@@ -456,8 +494,12 @@ const ElementsProviderInner = ({ children, config }: ElementsProviderProps) => {
456
494
  [
457
495
  config.languageModel,
458
496
  config.tools?.toolsRequiringApproval,
497
+ config.tools?.maxOutputBytes,
498
+ config.contextCompaction?.disabled,
499
+ config.contextCompaction?.maxTokens,
500
+ config.contextCompaction?.compactAtFraction,
501
+ config.contextCompaction?.keepRecent,
459
502
  model,
460
- systemPrompt,
461
503
  mcpTools,
462
504
  getApprovalHelpers,
463
505
  apiUrl,
@@ -606,9 +648,14 @@ const ElementsProviderWithHistory = ({
606
648
  });
607
649
  const initialThreadId = contextValue?.config.history?.initialThreadId;
608
650
 
609
- // Hook factory for creating the base chat runtime
651
+ // Without `sendAutomaticallyWhen`, client-side frontend tools leave the turn
652
+ // half-finished: the tool-result is patched in but the agent never resumes,
653
+ // so the next user message lands on top of an unresolved tool-call sequence.
610
654
  const useChatRuntimeHook = useCallback(() => {
611
- return useChatRuntime({ transport });
655
+ return useChatRuntime({
656
+ transport,
657
+ sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls,
658
+ });
612
659
  }, [transport]);
613
660
 
614
661
  const runtime = useRemoteThreadListRuntime({
@@ -691,7 +738,10 @@ const ElementsProviderWithoutHistory = ({
691
738
  executableTools,
692
739
  currentChatId,
693
740
  }: ElementsProviderWithoutHistoryProps) => {
694
- const runtime = useChatRuntime({ transport });
741
+ const runtime = useChatRuntime({
742
+ transport,
743
+ sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls,
744
+ });
695
745
 
696
746
  // Populate runtimeRef so transport can access thread context
697
747
  useEffect(() => {
package/src/index.ts CHANGED
@@ -18,6 +18,8 @@ export { ChatHistory } from "@/components/ChatHistory";
18
18
  export { ShareButton } from "@/components/ShareButton";
19
19
  export type { ShareButtonProps } from "@/components/ShareButton";
20
20
  export { ToolFallback } from "@/components/assistant-ui/tool-fallback";
21
+ export { MessageContent } from "@/components/MessageContent";
22
+ export type { MessageContentProps } from "@/components/MessageContent";
21
23
 
22
24
  // Replay
23
25
  export { Replay } from "@/components/Replay";
@@ -0,0 +1,201 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import type { ModelMessage } from "ai";
3
+ import {
4
+ compactBySlidingWindow,
5
+ compactForModel,
6
+ DEFAULT_CONTEXT_LIMIT,
7
+ estimateTokens,
8
+ getModelContextLimit,
9
+ } from "./contextCompaction";
10
+
11
+ function msg(
12
+ role: "system" | "user" | "assistant" | "tool",
13
+ content: string,
14
+ ): ModelMessage {
15
+ return { role, content } as ModelMessage;
16
+ }
17
+
18
+ describe("estimateTokens", () => {
19
+ it("returns roughly chars/4", () => {
20
+ const messages = [msg("user", "a".repeat(400))];
21
+ const n = estimateTokens(messages);
22
+ // Actual output is JSON-wrapped so it's slightly larger than 100
23
+ expect(n).toBeGreaterThan(100);
24
+ expect(n).toBeLessThan(200);
25
+ });
26
+
27
+ it("grows with message count", () => {
28
+ const one = estimateTokens([msg("user", "hello")]);
29
+ const many = estimateTokens(
30
+ Array.from({ length: 100 }, () => msg("user", "hello")),
31
+ );
32
+ expect(many).toBeGreaterThan(one * 50);
33
+ });
34
+ });
35
+
36
+ describe("getModelContextLimit", () => {
37
+ it("returns known mapping for Sonnet 4.6", () => {
38
+ expect(getModelContextLimit("anthropic/claude-sonnet-4.6")).toBe(1_000_000);
39
+ });
40
+
41
+ it("returns known mapping for Claude 4 (non-1M)", () => {
42
+ expect(getModelContextLimit("anthropic/claude-sonnet-4")).toBe(200_000);
43
+ });
44
+
45
+ it("returns DEFAULT_CONTEXT_LIMIT for unknown models", () => {
46
+ expect(getModelContextLimit("acme/very-new-model")).toBe(
47
+ DEFAULT_CONTEXT_LIMIT,
48
+ );
49
+ });
50
+ });
51
+
52
+ describe("compactBySlidingWindow", () => {
53
+ it("no-ops when under the limit", () => {
54
+ const messages = [msg("user", "hi"), msg("assistant", "hello")];
55
+ const result = compactBySlidingWindow(messages, 1_000_000);
56
+ expect(result.droppedCount).toBe(0);
57
+ expect(result.messages).toBe(messages);
58
+ });
59
+
60
+ it("drops oldest non-system turns to fit", () => {
61
+ // 10 bulky messages, tiny limit → forces dropping
62
+ const messages: ModelMessage[] = [];
63
+ for (let i = 0; i < 10; i++) {
64
+ messages.push(msg("user", `query-${i} ` + "x".repeat(400)));
65
+ messages.push(msg("assistant", `reply-${i} ` + "y".repeat(400)));
66
+ }
67
+ const maxTokens = 500;
68
+ const result = compactBySlidingWindow(messages, maxTokens, 4);
69
+ expect(result.droppedCount).toBeGreaterThan(0);
70
+ expect(result.estimatedTokensAfter).toBeLessThanOrEqual(
71
+ result.estimatedTokensBefore,
72
+ );
73
+ // Last 4 are preserved verbatim
74
+ const tail = result.messages.slice(-4);
75
+ expect(tail[tail.length - 1]).toEqual(messages[messages.length - 1]);
76
+ // Marker prepended
77
+ const markerPresent = result.messages.some(
78
+ (m) => typeof m.content === "string" && m.content.includes("omitted"),
79
+ );
80
+ expect(markerPresent).toBe(true);
81
+ });
82
+
83
+ it("always preserves system messages", () => {
84
+ const messages: ModelMessage[] = [
85
+ msg("system", "sys " + "s".repeat(1000)),
86
+ ...Array.from({ length: 20 }, (_, i) =>
87
+ msg("user", `q-${i} ` + "x".repeat(500)),
88
+ ),
89
+ ];
90
+ const result = compactBySlidingWindow(messages, 300, 2);
91
+ expect(result.droppedCount).toBeGreaterThan(0);
92
+ expect(result.messages[0]!.role).toBe("system");
93
+ });
94
+
95
+ it("preserves at least keepRecent messages even if over limit", () => {
96
+ const messages = Array.from({ length: 10 }, (_, i) =>
97
+ msg("user", "x".repeat(1000) + `-${i}`),
98
+ );
99
+ const result = compactBySlidingWindow(messages, 10, 3);
100
+ // keepRecent preserved even though we can't get under the limit
101
+ expect(result.messages.length).toBeGreaterThanOrEqual(3);
102
+ // Last 3 are intact
103
+ const tail = result.messages.slice(-3);
104
+ expect(tail).toEqual(messages.slice(-3));
105
+ });
106
+ });
107
+
108
+ describe("compactBySlidingWindow — tool message pairing", () => {
109
+ it("never leaves a tool message at the head of the retained window", () => {
110
+ // Scenario from Devin: dropping oldest-first could split an
111
+ // assistant(tool_calls) → tool pair, leaving an orphan tool at the
112
+ // head of the retained set. Providers reject this with a 400.
113
+ const messages: ModelMessage[] = [
114
+ msg("user", "q1 " + "x".repeat(400)),
115
+ msg("assistant", "a1-with-tool-call " + "x".repeat(400)),
116
+ msg("tool", "t1-result " + "x".repeat(400)),
117
+ msg("assistant", "a1-final " + "x".repeat(400)),
118
+ msg("user", "q2 " + "x".repeat(400)),
119
+ msg("assistant", "a2-with-tool-call " + "x".repeat(400)),
120
+ msg("tool", "t2-result " + "x".repeat(400)),
121
+ msg("assistant", "a2-final " + "x".repeat(400)),
122
+ ];
123
+
124
+ const result = compactBySlidingWindow(messages, 400, 4);
125
+ expect(result.droppedCount).toBeGreaterThan(0);
126
+
127
+ // The retained non-system messages should never start with a tool.
128
+ const nonSystem = result.messages.filter((m) => m.role !== "system");
129
+ // Skip the synthetic assistant marker if present.
130
+ const firstReal = nonSystem.find(
131
+ (m) =>
132
+ !(
133
+ m.role === "assistant" &&
134
+ typeof m.content === "string" &&
135
+ m.content.includes("omitted")
136
+ ),
137
+ );
138
+ expect(firstReal?.role).not.toBe("tool");
139
+ });
140
+
141
+ it("drops an assistant+tool pair atomically (not one without the other)", () => {
142
+ const messages: ModelMessage[] = [
143
+ msg("user", "old"),
144
+ msg("assistant", "calling tool"),
145
+ msg("tool", "result " + "x".repeat(2000)),
146
+ msg("user", "recent " + "x".repeat(200)),
147
+ msg("assistant", "recent reply " + "x".repeat(200)),
148
+ ];
149
+ const result = compactBySlidingWindow(messages, 300, 2);
150
+ // If the group was dropped atomically, both the assistant and its tool
151
+ // are gone together. If the bug was still present, we'd see the tool
152
+ // message lingering alone.
153
+ const nonSystem = result.messages.filter((m) => m.role !== "system");
154
+ const hasLoneTool = nonSystem.some(
155
+ (m, i) =>
156
+ m.role === "tool" && (i === 0 || nonSystem[i - 1]!.role === "user"),
157
+ );
158
+ expect(hasLoneTool).toBe(false);
159
+ });
160
+
161
+ it("does not split a tool group when aligning the recent window", () => {
162
+ // keepRecent=3 would cut mid-group with naive slicing. Grouping should
163
+ // expand the recent window to keep the assistant+tools together.
164
+ const messages: ModelMessage[] = [
165
+ msg("user", "old " + "x".repeat(1000)),
166
+ msg("assistant", "calling 2 tools"),
167
+ msg("tool", "result1"),
168
+ msg("tool", "result2"),
169
+ msg("assistant", "final"),
170
+ ];
171
+ const result = compactBySlidingWindow(messages, 200, 3);
172
+ // If the first tool was the "recent" cut-off, we'd see a tool at
173
+ // the head of retained — but grouping should have pulled the
174
+ // assistant with it.
175
+ const kept = result.messages.filter((m) => m.role !== "system");
176
+ const firstTool = kept.findIndex((m) => m.role === "tool");
177
+ if (firstTool !== -1) {
178
+ expect(kept[firstTool - 1]?.role).toMatch(/assistant|tool/);
179
+ }
180
+ });
181
+ });
182
+
183
+ describe("compactForModel", () => {
184
+ it("uses 70% of the nominal ceiling by default", () => {
185
+ const small = [msg("user", "hi")];
186
+ const result = compactForModel(small, "anthropic/claude-sonnet-4.6");
187
+ expect(result.droppedCount).toBe(0);
188
+ expect(result.messages).toBe(small);
189
+ });
190
+
191
+ it("honors explicit maxTokens override", () => {
192
+ const messages = Array.from({ length: 30 }, (_, i) =>
193
+ msg("user", "x".repeat(500) + `-${i}`),
194
+ );
195
+ const result = compactForModel(messages, "anthropic/claude-sonnet-4.6", {
196
+ maxTokens: 2000,
197
+ keepRecent: 2,
198
+ });
199
+ expect(result.droppedCount).toBeGreaterThan(0);
200
+ });
201
+ });
@@ -0,0 +1,211 @@
1
+ import type { ModelMessage } from "ai";
2
+ import { MODELS } from "./models";
3
+
4
+ type KnownModelId = (typeof MODELS)[number];
5
+
6
+ /**
7
+ * Fraction-of-limit at which compaction kicks in. Below this, messages pass
8
+ * through untouched; above this, oldest non-system turns are dropped until
9
+ * the estimated token count is back under the threshold.
10
+ */
11
+ export const DEFAULT_COMPACTION_FRACTION = 0.7;
12
+
13
+ /**
14
+ * Number of most-recent messages preserved verbatim, even if the conversation
15
+ * is already over the limit. Ensures the assistant always has the latest turn
16
+ * and its immediate predecessor.
17
+ */
18
+ export const DEFAULT_KEEP_RECENT = 4;
19
+
20
+ /**
21
+ * Conservative fallback when we encounter a model we haven't mapped — big
22
+ * enough to be useful for unknown models, small enough to still trigger
23
+ * compaction before hitting upstream 400s.
24
+ */
25
+ export const DEFAULT_CONTEXT_LIMIT = 200_000;
26
+
27
+ /**
28
+ * Known input-token ceilings per model (nominal upstream maximum). Keyed by
29
+ * MODELS so TypeScript catches drift — adding a model id here that isn't in
30
+ * MODELS, or misspelling an id, is a compile error. Coverage is intentionally
31
+ * partial: models without an explicit entry fall back to DEFAULT_CONTEXT_LIMIT.
32
+ */
33
+ const MODEL_CONTEXT_LIMITS: Partial<Record<KnownModelId, number>> = {
34
+ // Anthropic (1M tier where available, else 200K)
35
+ "anthropic/claude-opus-4.6": 1_000_000,
36
+ "anthropic/claude-opus-4.5": 1_000_000,
37
+ "anthropic/claude-opus-4.1": 200_000,
38
+ "anthropic/claude-sonnet-4.6": 1_000_000,
39
+ "anthropic/claude-sonnet-4.5": 1_000_000,
40
+ "anthropic/claude-sonnet-4": 200_000,
41
+ "anthropic/claude-haiku-4.5": 200_000,
42
+
43
+ // OpenAI
44
+ "openai/gpt-5.4": 400_000,
45
+ "openai/gpt-5.4-mini": 400_000,
46
+ "openai/gpt-5.1": 400_000,
47
+ "openai/gpt-5.1-codex": 400_000,
48
+ "openai/gpt-5": 400_000,
49
+ "openai/gpt-4.1": 1_000_000,
50
+ "openai/o4-mini": 200_000,
51
+ "openai/o3": 200_000,
52
+
53
+ // Google
54
+ "google/gemini-3.1-pro-preview": 1_000_000,
55
+ "google/gemini-2.5-pro": 1_000_000,
56
+ "google/gemini-2.5-flash": 1_000_000,
57
+
58
+ // Others
59
+ "deepseek/deepseek-r1": 128_000,
60
+ "deepseek/deepseek-v3.2": 128_000,
61
+ "meta-llama/llama-4-maverick": 1_000_000,
62
+ "x-ai/grok-4": 256_000,
63
+ "qwen/qwen3-coder": 256_000,
64
+ "moonshotai/kimi-k2.5": 128_000,
65
+ "mistralai/mistral-medium-3.1": 128_000,
66
+ "mistralai/codestral-2508": 256_000,
67
+ "mistralai/devstral-small": 128_000,
68
+ };
69
+
70
+ /**
71
+ * Returns the input-token ceiling for a given OpenRouter model id, or
72
+ * DEFAULT_CONTEXT_LIMIT if unknown.
73
+ */
74
+ export function getModelContextLimit(modelId: string): number {
75
+ return MODEL_CONTEXT_LIMITS[modelId as KnownModelId] ?? DEFAULT_CONTEXT_LIMIT;
76
+ }
77
+
78
+ /**
79
+ * Rough input-token estimate using a chars/4 heuristic on the JSON serialized
80
+ * conversation. Tokens-per-char varies by model and content, but a chars/4
81
+ * heuristic matches OpenAI's rule-of-thumb within ~15% for English prose and
82
+ * is deterministic + zero-cost — good enough to trigger compaction.
83
+ */
84
+ export function estimateTokens(messages: ModelMessage[]): number {
85
+ const serialized = JSON.stringify(messages);
86
+ return Math.ceil(serialized.length / 4);
87
+ }
88
+
89
+ export interface CompactionResult {
90
+ messages: ModelMessage[];
91
+ droppedCount: number;
92
+ estimatedTokensBefore: number;
93
+ estimatedTokensAfter: number;
94
+ }
95
+
96
+ /**
97
+ * Drops oldest non-system messages until the estimated token count is under
98
+ * maxTokens. Always preserves the last `keepRecent` messages and any system
99
+ * role messages. When any messages are dropped, prepends a synthetic assistant
100
+ * note so the model knows earlier context was elided.
101
+ */
102
+ export function compactBySlidingWindow(
103
+ messages: ModelMessage[],
104
+ maxTokens: number,
105
+ keepRecent: number = DEFAULT_KEEP_RECENT,
106
+ ): CompactionResult {
107
+ const estimatedTokensBefore = estimateTokens(messages);
108
+
109
+ if (estimatedTokensBefore <= maxTokens || messages.length <= keepRecent) {
110
+ return {
111
+ messages,
112
+ droppedCount: 0,
113
+ estimatedTokensBefore,
114
+ estimatedTokensAfter: estimatedTokensBefore,
115
+ };
116
+ }
117
+
118
+ const systemMessages = messages.filter((m) => m.role === "system");
119
+ const nonSystem = messages.filter((m) => m.role !== "system");
120
+
121
+ // Group consecutive `tool` messages with the assistant message that
122
+ // precedes them. OpenAI-compatible providers require every tool-result
123
+ // message to be immediately preceded by the assistant message holding its
124
+ // tool_calls — splitting these produces an invalid conversation that
125
+ // providers reject with a 400. Grouping ensures we drop or keep the
126
+ // full assistant+tools unit atomically.
127
+ const groups: ModelMessage[][] = [];
128
+ for (const m of nonSystem) {
129
+ if (m.role === "tool" && groups.length > 0) {
130
+ groups[groups.length - 1]!.push(m);
131
+ } else {
132
+ groups.push([m]);
133
+ }
134
+ }
135
+
136
+ // Reserve the trailing groups that together contain at least `keepRecent`
137
+ // messages. Using groups (not raw messages) keeps assistant+tool pairs
138
+ // intact at the boundary between retained and dropped.
139
+ let recentMsgCount = 0;
140
+ let recentStart = groups.length;
141
+ while (recentStart > 0 && recentMsgCount < keepRecent) {
142
+ recentStart -= 1;
143
+ recentMsgCount += groups[recentStart]!.length;
144
+ }
145
+ const recentGroups = groups.slice(recentStart);
146
+ const droppableGroups = groups.slice(0, recentStart);
147
+
148
+ let droppedCount = 0;
149
+ let working = [
150
+ ...systemMessages,
151
+ ...droppableGroups.flat(),
152
+ ...recentGroups.flat(),
153
+ ];
154
+
155
+ while (droppableGroups.length > 0 && estimateTokens(working) > maxTokens) {
156
+ const droppedGroup = droppableGroups.shift()!;
157
+ droppedCount += droppedGroup.length;
158
+ working = [
159
+ ...systemMessages,
160
+ ...droppableGroups.flat(),
161
+ ...recentGroups.flat(),
162
+ ];
163
+ }
164
+
165
+ if (droppedCount > 0) {
166
+ const marker: ModelMessage = {
167
+ role: "assistant",
168
+ content: `[${droppedCount} earlier message${
169
+ droppedCount === 1 ? "" : "s"
170
+ } omitted to stay under context length. If the user asks about them, say you no longer have that context and suggest they restate the relevant details.]`,
171
+ };
172
+ working = [
173
+ ...systemMessages,
174
+ marker,
175
+ ...droppableGroups.flat(),
176
+ ...recentGroups.flat(),
177
+ ];
178
+ }
179
+
180
+ return {
181
+ messages: working,
182
+ droppedCount,
183
+ estimatedTokensBefore,
184
+ estimatedTokensAfter: estimateTokens(working),
185
+ };
186
+ }
187
+
188
+ export interface CompactionOptions {
189
+ /** Override the model's nominal input ceiling. */
190
+ maxTokens?: number;
191
+ /** Fraction of maxTokens at which compaction kicks in. */
192
+ compactAtFraction?: number;
193
+ /** Most-recent messages preserved verbatim. */
194
+ keepRecent?: number;
195
+ }
196
+
197
+ /**
198
+ * Convenience wrapper that picks the model ceiling, applies compactAtFraction,
199
+ * and runs compactBySlidingWindow. Returns the (possibly unchanged) messages
200
+ * plus diagnostics.
201
+ */
202
+ export function compactForModel(
203
+ messages: ModelMessage[],
204
+ modelId: string,
205
+ opts: CompactionOptions = {},
206
+ ): CompactionResult {
207
+ const ceiling = opts.maxTokens ?? getModelContextLimit(modelId);
208
+ const fraction = opts.compactAtFraction ?? DEFAULT_COMPACTION_FRACTION;
209
+ const limit = Math.floor(ceiling * fraction);
210
+ return compactBySlidingWindow(messages, limit, opts.keepRecent);
211
+ }
@@ -7,10 +7,12 @@
7
7
  * - VITE_DATADOG_APPLICATION_ID
8
8
  * - VITE_DATADOG_CLIENT_TOKEN
9
9
  * - VITE_DATADOG_SITE (optional, defaults to datadoghq.com)
10
+ * - VITE_DATADOG_ENV (optional, defaults to prod)
10
11
  */
11
12
  export const DATADOG_CONFIG = {
12
13
  applicationId: import.meta.env.VITE_DATADOG_APPLICATION_ID ?? "",
13
14
  clientToken: import.meta.env.VITE_DATADOG_CLIENT_TOKEN ?? "",
14
15
  site: import.meta.env.VITE_DATADOG_SITE ?? "datadoghq.com",
16
+ env: import.meta.env.VITE_DATADOG_ENV ?? "prod",
15
17
  service: "gram-elements",
16
18
  } as const;
@@ -44,7 +44,7 @@ export function initErrorTracking(config: ErrorTrackingConfig = {}): void {
44
44
  clientToken: DATADOG_CONFIG.clientToken,
45
45
  site: DATADOG_CONFIG.site,
46
46
  service: DATADOG_CONFIG.service,
47
- env: process.env.NODE_ENV || "production",
47
+ env: DATADOG_CONFIG.env,
48
48
  sessionSampleRate: 100,
49
49
  sessionReplaySampleRate: 100,
50
50
  trackUserInteractions: true,