npm - @gram-ai/elements - Versions diffs - 1.28.0 → 1.30.0 - Mend

@gram-ai/elements 1.28.0 → 1.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

package/dist/components/MessageContent.d.ts +20 -0
package/dist/components/MessageContent.parser.d.ts +12 -0
package/dist/components/MessageContent.test.d.ts +1 -0
package/dist/elements.cjs +1 -1
package/dist/elements.css +1 -1
package/dist/elements.js +14 -13
package/dist/{index-C4bFBGfl.cjs → index-COzPF-WM.cjs} +45 -45
package/dist/index-COzPF-WM.cjs.map +1 -0
package/dist/{index-D93pV0_o.js → index-CRhpKl-G.js} +5218 -5201
package/dist/index-CRhpKl-G.js.map +1 -0
package/dist/{index-CtZz13Cf.js → index-QUz5guSg.js} +11835 -11604
package/dist/index-QUz5guSg.js.map +1 -0
package/dist/index-fVcTljYT.cjs +194 -0
package/dist/index-fVcTljYT.cjs.map +1 -0
package/dist/index.d.ts +2 -0
package/dist/lib/contextCompaction.d.ts +58 -0
package/dist/lib/contextCompaction.test.d.ts +1 -0
package/dist/lib/errorTracking.config.d.ts +2 -0
package/dist/lib/tools.byte-cap.test.d.ts +1 -0
package/dist/lib/tools.d.ts +19 -0
package/dist/lib/tools.test.d.ts +1 -0
package/dist/plugins/index.d.ts +4 -1
package/dist/plugins/index.test.d.ts +1 -0
package/dist/plugins.cjs +1 -1
package/dist/plugins.js +1 -1
package/dist/{profiler-Ccma0l1p.js → profiler-DifNjGGB.js} +2 -2
package/dist/{profiler-Ccma0l1p.js.map → profiler-DifNjGGB.js.map} +1 -1
package/dist/{profiler-CjNa3A1d.cjs → profiler-KLtVMM14.cjs} +2 -2
package/dist/{profiler-CjNa3A1d.cjs.map → profiler-KLtVMM14.cjs.map} +1 -1
package/dist/{startRecording-DAURU74n.js → startRecording-C6xu9UA9.js} +2 -2
package/dist/{startRecording-DAURU74n.js.map → startRecording-C6xu9UA9.js.map} +1 -1
package/dist/{startRecording-jSovclaq.cjs → startRecording-YENzw_0G.cjs} +2 -2
package/dist/{startRecording-jSovclaq.cjs.map → startRecording-YENzw_0G.cjs.map} +1 -1
package/dist/types/index.d.ts +49 -0
package/dist/types/plugins.d.ts +5 -0
package/package.json +2 -2
package/src/components/MessageContent.parser.ts +39 -0
package/src/components/MessageContent.test.ts +110 -0
package/src/components/MessageContent.tsx +82 -0
package/src/contexts/ElementsProvider.tsx +57 -7
package/src/index.ts +2 -0
package/src/lib/contextCompaction.test.ts +201 -0
package/src/lib/contextCompaction.ts +211 -0
package/src/lib/errorTracking.config.ts +2 -0
package/src/lib/errorTracking.ts +1 -1
package/src/lib/tools.byte-cap.test.ts +132 -0
package/src/lib/tools.test.ts +259 -0
package/src/lib/tools.ts +122 -0
package/src/plugins/chart/index.ts +1 -0
package/src/plugins/chart/ui/bar-chart.tsx +9 -1
package/src/plugins/generative-ui/index.ts +1 -0
package/src/plugins/index.test.ts +62 -0
package/src/plugins/index.ts +14 -1
package/src/types/index.ts +55 -0
package/src/types/plugins.ts +6 -0
package/dist/index-BmTGnEaV.cjs +0 -190
package/dist/index-BmTGnEaV.cjs.map +0 -1
package/dist/index-C4bFBGfl.cjs.map +0 -1
package/dist/index-CtZz13Cf.js.map +0 -1
package/dist/index-D93pV0_o.js.map +0 -1

package/src/components/MessageContent.tsx ADDED Viewed

@@ -0,0 +1,82 @@
+"use client";
+import { FC, useMemo } from "react";
+import { ElementsContext } from "@/contexts/contexts";
+import { ToolExecutionProvider } from "@/contexts/ToolExecutionContext";
+import type { ElementsContextType, Model } from "@/types";
+import { recommended } from "@/plugins";
+import { chart } from "@/plugins/chart";
+import { generativeUI } from "@/plugins/generative-ui";
+import { parseSegments } from "./MessageContent.parser";
+const SUPPORTED_LANGUAGES: Record<string, FC<{ code: string }>> = {
+  chart: chart.Component as FC<{ code: string }>,
+  ui: generativeUI.Component as FC<{ code: string }>,
+};
+// Provides only what useDensity()/useElements() read inside the chart and ui
+// renderers — no auth, no MCP, no runtime.
+const STUB_CONTEXT: ElementsContextType = {
+  config: { projectSlug: "" },
+  setModel: () => {},
+  model: "" as Model,
+  isExpanded: false,
+  setIsExpanded: () => {},
+  isOpen: false,
+  setIsOpen: () => {},
+  plugins: recommended,
+  mcpTools: undefined,
+};
+export interface MessageContentProps {
+  /** Raw assistant message content (markdown text optionally containing
+   * ```chart and ```ui fenced code blocks). */
+  content: string;
+  /** Optional className applied to the root container. */
+  className?: string;
+}
+/**
+ * Standalone renderer for stored chat message content. Recognises the same
+ * `chart` and `ui` fenced code blocks that the live `<Chat />` component
+ * renders as widgets, but works without an `ElementsProvider`, MCP client,
+ * auth session, or assistant-ui runtime.
+ *
+ * Use in static viewers (agent session detail panel, replay, share) so a
+ * stored bar chart appears as a chart instead of as raw JSON. Plain markdown
+ * formatting is intentionally not applied — text segments render as
+ * preformatted text.
+ */
+export const MessageContent: FC<MessageContentProps> = ({
+  content,
+  className,
+}) => {
+  const segments = useMemo(() => parseSegments(content), [content]);
+  return (
+    <ElementsContext.Provider value={STUB_CONTEXT}>
+      {/* Empty tools so generative-ui's <ActionButton> renders disabled. */}
+      <ToolExecutionProvider tools={{}}>
+        <div className={className}>
+          {segments.map((seg, i) => {
+            if (seg.type === "text") {
+              if (seg.text.trim() === "") return null;
+              return (
+                <div key={i} className="whitespace-pre-wrap">
+                  {seg.text}
+                </div>
+              );
+            }
+            const Component = SUPPORTED_LANGUAGES[seg.lang];
+            if (!Component) return null;
+            return (
+              <div key={i} className="my-2">
+                <Component code={seg.code} />
+              </div>
+            );
+          })}
+        </div>
+      </ToolExecutionProvider>
+    </ElementsContext.Provider>
+  );
+};

package/src/contexts/ElementsProvider.tsx CHANGED Viewed

@@ -15,9 +15,11 @@ import {
   setFrontendToolApprovalConfig,
   toAISDKTools,
   wrapToolsWithApproval,
+  wrapToolsWithByteCap,
   type ApprovalHelpers,
   type FrontendTool,
 } from "@/lib/tools";
+import { compactForModel } from "@/lib/contextCompaction";
 import { cn } from "@/lib/utils";
 import { recommended } from "@/plugins";
 import { ElementsConfig, Model } from "@/types";
@@ -37,6 +39,7 @@ import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
 import {
   convertToModelMessages,
   createUIMessageStream,
+  lastAssistantMessageIsCompleteWithToolCalls,
   LanguageModel,
   smoothStream,
   stepCountIs,
@@ -188,6 +191,12 @@ const ElementsProviderInner = ({ children, config }: ElementsProviderProps) => {
     toolsWithCustomComponents,
   );
+  // Read inside `sendMessages` via ref so prompt changes don't churn the
+  // transport useMemo identity. Same pattern as ensureValidHeadersRef /
+  // approvalHelpersRef below.
+  const systemPromptRef = useRef(systemPrompt);
+  systemPromptRef.current = systemPrompt;
   // Initialize error tracking on mount
   useEffect(() => {
     initErrorTracking({
@@ -366,12 +375,19 @@ const ElementsProviderInner = ({ children, config }: ElementsProviderProps) => {
         } as ToolSet;
         // Wrap tools that require approval
-        const tools = wrapToolsWithApproval(
+        const approvedTools = wrapToolsWithApproval(
           combinedTools,
           config.tools?.toolsRequiringApproval,
           getApprovalHelpers(),
         );
+        // Cap oversized tool results so one greedy tool call (e.g. a wide log
+        // search) can't fill the context window in a single step.
+        const tools = wrapToolsWithByteCap(
+          approvedTools,
+          config.tools?.maxOutputBytes,
+        );
         // Stream the response
         const modelToUse = config.languageModel
           ? config.languageModel
@@ -387,10 +403,32 @@ const ElementsProviderInner = ({ children, config }: ElementsProviderProps) => {
           const nonSystemMessages = cleanedMessages.filter(
             (m) => m.role !== "system",
           );
-          const modelMessages = convertToModelMessages(nonSystemMessages);
+          const rawModelMessages = convertToModelMessages(nonSystemMessages);
+          // Auto-compact older turns if the estimated input is approaching
+          // the model's context window. System prompt + last few turns are
+          // always preserved. No-op when the conversation is small.
+          const compaction = config.contextCompaction?.disabled
+            ? {
+                messages: rawModelMessages,
+                droppedCount: 0,
+                estimatedTokensBefore: 0,
+                estimatedTokensAfter: 0,
+              }
+            : compactForModel(rawModelMessages, model, {
+                maxTokens: config.contextCompaction?.maxTokens,
+                compactAtFraction: config.contextCompaction?.compactAtFraction,
+                keepRecent: config.contextCompaction?.keepRecent,
+              });
+          if (compaction.droppedCount > 0) {
+            console.warn(
+              `[elements] compacted ${compaction.droppedCount} older turn(s) from ${compaction.estimatedTokensBefore} → ${compaction.estimatedTokensAfter} est. tokens (model ${model})`,
+            );
+          }
+          const modelMessages = compaction.messages;
           const result = streamText({
-            system: systemPrompt,
+            system: systemPromptRef.current,
             model: modelToUse,
             messages: modelMessages,
             tools,
@@ -456,8 +494,12 @@ const ElementsProviderInner = ({ children, config }: ElementsProviderProps) => {
     [
       config.languageModel,
       config.tools?.toolsRequiringApproval,
+      config.tools?.maxOutputBytes,
+      config.contextCompaction?.disabled,
+      config.contextCompaction?.maxTokens,
+      config.contextCompaction?.compactAtFraction,
+      config.contextCompaction?.keepRecent,
       model,
-      systemPrompt,
       mcpTools,
       getApprovalHelpers,
       apiUrl,
@@ -606,9 +648,14 @@ const ElementsProviderWithHistory = ({
   });
   const initialThreadId = contextValue?.config.history?.initialThreadId;
-  // Hook factory for creating the base chat runtime
+  // Without `sendAutomaticallyWhen`, client-side frontend tools leave the turn
+  // half-finished: the tool-result is patched in but the agent never resumes,
+  // so the next user message lands on top of an unresolved tool-call sequence.
   const useChatRuntimeHook = useCallback(() => {
-    return useChatRuntime({ transport });
+    return useChatRuntime({
+      transport,
+      sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls,
+    });
   }, [transport]);
   const runtime = useRemoteThreadListRuntime({
@@ -691,7 +738,10 @@ const ElementsProviderWithoutHistory = ({
   executableTools,
   currentChatId,
 }: ElementsProviderWithoutHistoryProps) => {
-  const runtime = useChatRuntime({ transport });
+  const runtime = useChatRuntime({
+    transport,
+    sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls,
+  });
   // Populate runtimeRef so transport can access thread context
   useEffect(() => {

package/src/index.ts CHANGED Viewed

@@ -18,6 +18,8 @@ export { ChatHistory } from "@/components/ChatHistory";
 export { ShareButton } from "@/components/ShareButton";
 export type { ShareButtonProps } from "@/components/ShareButton";
 export { ToolFallback } from "@/components/assistant-ui/tool-fallback";
+export { MessageContent } from "@/components/MessageContent";
+export type { MessageContentProps } from "@/components/MessageContent";
 // Replay
 export { Replay } from "@/components/Replay";

package/src/lib/contextCompaction.test.ts ADDED Viewed

@@ -0,0 +1,201 @@
+import { describe, expect, it } from "vitest";
+import type { ModelMessage } from "ai";
+import {
+  compactBySlidingWindow,
+  compactForModel,
+  DEFAULT_CONTEXT_LIMIT,
+  estimateTokens,
+  getModelContextLimit,
+} from "./contextCompaction";
+function msg(
+  role: "system" | "user" | "assistant" | "tool",
+  content: string,
+): ModelMessage {
+  return { role, content } as ModelMessage;
+}
+describe("estimateTokens", () => {
+  it("returns roughly chars/4", () => {
+    const messages = [msg("user", "a".repeat(400))];
+    const n = estimateTokens(messages);
+    // Actual output is JSON-wrapped so it's slightly larger than 100
+    expect(n).toBeGreaterThan(100);
+    expect(n).toBeLessThan(200);
+  });
+  it("grows with message count", () => {
+    const one = estimateTokens([msg("user", "hello")]);
+    const many = estimateTokens(
+      Array.from({ length: 100 }, () => msg("user", "hello")),
+    );
+    expect(many).toBeGreaterThan(one * 50);
+  });
+});
+describe("getModelContextLimit", () => {
+  it("returns known mapping for Sonnet 4.6", () => {
+    expect(getModelContextLimit("anthropic/claude-sonnet-4.6")).toBe(1_000_000);
+  });
+  it("returns known mapping for Claude 4 (non-1M)", () => {
+    expect(getModelContextLimit("anthropic/claude-sonnet-4")).toBe(200_000);
+  });
+  it("returns DEFAULT_CONTEXT_LIMIT for unknown models", () => {
+    expect(getModelContextLimit("acme/very-new-model")).toBe(
+      DEFAULT_CONTEXT_LIMIT,
+    );
+  });
+});
+describe("compactBySlidingWindow", () => {
+  it("no-ops when under the limit", () => {
+    const messages = [msg("user", "hi"), msg("assistant", "hello")];
+    const result = compactBySlidingWindow(messages, 1_000_000);
+    expect(result.droppedCount).toBe(0);
+    expect(result.messages).toBe(messages);
+  });
+  it("drops oldest non-system turns to fit", () => {
+    // 10 bulky messages, tiny limit → forces dropping
+    const messages: ModelMessage[] = [];
+    for (let i = 0; i < 10; i++) {
+      messages.push(msg("user", `query-${i} ` + "x".repeat(400)));
+      messages.push(msg("assistant", `reply-${i} ` + "y".repeat(400)));
+    }
+    const maxTokens = 500;
+    const result = compactBySlidingWindow(messages, maxTokens, 4);
+    expect(result.droppedCount).toBeGreaterThan(0);
+    expect(result.estimatedTokensAfter).toBeLessThanOrEqual(
+      result.estimatedTokensBefore,
+    );
+    // Last 4 are preserved verbatim
+    const tail = result.messages.slice(-4);
+    expect(tail[tail.length - 1]).toEqual(messages[messages.length - 1]);
+    // Marker prepended
+    const markerPresent = result.messages.some(
+      (m) => typeof m.content === "string" && m.content.includes("omitted"),
+    );
+    expect(markerPresent).toBe(true);
+  });
+  it("always preserves system messages", () => {
+    const messages: ModelMessage[] = [
+      msg("system", "sys " + "s".repeat(1000)),
+      ...Array.from({ length: 20 }, (_, i) =>
+        msg("user", `q-${i} ` + "x".repeat(500)),
+      ),
+    ];
+    const result = compactBySlidingWindow(messages, 300, 2);
+    expect(result.droppedCount).toBeGreaterThan(0);
+    expect(result.messages[0]!.role).toBe("system");
+  });
+  it("preserves at least keepRecent messages even if over limit", () => {
+    const messages = Array.from({ length: 10 }, (_, i) =>
+      msg("user", "x".repeat(1000) + `-${i}`),
+    );
+    const result = compactBySlidingWindow(messages, 10, 3);
+    // keepRecent preserved even though we can't get under the limit
+    expect(result.messages.length).toBeGreaterThanOrEqual(3);
+    // Last 3 are intact
+    const tail = result.messages.slice(-3);
+    expect(tail).toEqual(messages.slice(-3));
+  });
+});
+describe("compactBySlidingWindow — tool message pairing", () => {
+  it("never leaves a tool message at the head of the retained window", () => {
+    // Scenario from Devin: dropping oldest-first could split an
+    // assistant(tool_calls) → tool pair, leaving an orphan tool at the
+    // head of the retained set. Providers reject this with a 400.
+    const messages: ModelMessage[] = [
+      msg("user", "q1 " + "x".repeat(400)),
+      msg("assistant", "a1-with-tool-call " + "x".repeat(400)),
+      msg("tool", "t1-result " + "x".repeat(400)),
+      msg("assistant", "a1-final " + "x".repeat(400)),
+      msg("user", "q2 " + "x".repeat(400)),
+      msg("assistant", "a2-with-tool-call " + "x".repeat(400)),
+      msg("tool", "t2-result " + "x".repeat(400)),
+      msg("assistant", "a2-final " + "x".repeat(400)),
+    ];
+    const result = compactBySlidingWindow(messages, 400, 4);
+    expect(result.droppedCount).toBeGreaterThan(0);
+    // The retained non-system messages should never start with a tool.
+    const nonSystem = result.messages.filter((m) => m.role !== "system");
+    // Skip the synthetic assistant marker if present.
+    const firstReal = nonSystem.find(
+      (m) =>
+        !(
+          m.role === "assistant" &&
+          typeof m.content === "string" &&
+          m.content.includes("omitted")
+        ),
+    );
+    expect(firstReal?.role).not.toBe("tool");
+  });
+  it("drops an assistant+tool pair atomically (not one without the other)", () => {
+    const messages: ModelMessage[] = [
+      msg("user", "old"),
+      msg("assistant", "calling tool"),
+      msg("tool", "result " + "x".repeat(2000)),
+      msg("user", "recent " + "x".repeat(200)),
+      msg("assistant", "recent reply " + "x".repeat(200)),
+    ];
+    const result = compactBySlidingWindow(messages, 300, 2);
+    // If the group was dropped atomically, both the assistant and its tool
+    // are gone together. If the bug was still present, we'd see the tool
+    // message lingering alone.
+    const nonSystem = result.messages.filter((m) => m.role !== "system");
+    const hasLoneTool = nonSystem.some(
+      (m, i) =>
+        m.role === "tool" && (i === 0 || nonSystem[i - 1]!.role === "user"),
+    );
+    expect(hasLoneTool).toBe(false);
+  });
+  it("does not split a tool group when aligning the recent window", () => {
+    // keepRecent=3 would cut mid-group with naive slicing. Grouping should
+    // expand the recent window to keep the assistant+tools together.
+    const messages: ModelMessage[] = [
+      msg("user", "old " + "x".repeat(1000)),
+      msg("assistant", "calling 2 tools"),
+      msg("tool", "result1"),
+      msg("tool", "result2"),
+      msg("assistant", "final"),
+    ];
+    const result = compactBySlidingWindow(messages, 200, 3);
+    // If the first tool was the "recent" cut-off, we'd see a tool at
+    // the head of retained — but grouping should have pulled the
+    // assistant with it.
+    const kept = result.messages.filter((m) => m.role !== "system");
+    const firstTool = kept.findIndex((m) => m.role === "tool");
+    if (firstTool !== -1) {
+      expect(kept[firstTool - 1]?.role).toMatch(/assistant|tool/);
+    }
+  });
+});
+describe("compactForModel", () => {
+  it("uses 70% of the nominal ceiling by default", () => {
+    const small = [msg("user", "hi")];
+    const result = compactForModel(small, "anthropic/claude-sonnet-4.6");
+    expect(result.droppedCount).toBe(0);
+    expect(result.messages).toBe(small);
+  });
+  it("honors explicit maxTokens override", () => {
+    const messages = Array.from({ length: 30 }, (_, i) =>
+      msg("user", "x".repeat(500) + `-${i}`),
+    );
+    const result = compactForModel(messages, "anthropic/claude-sonnet-4.6", {
+      maxTokens: 2000,
+      keepRecent: 2,
+    });
+    expect(result.droppedCount).toBeGreaterThan(0);
+  });
+});

package/src/lib/contextCompaction.ts ADDED Viewed

@@ -0,0 +1,211 @@
+import type { ModelMessage } from "ai";
+import { MODELS } from "./models";
+type KnownModelId = (typeof MODELS)[number];
+/**
+ * Fraction-of-limit at which compaction kicks in. Below this, messages pass
+ * through untouched; above this, oldest non-system turns are dropped until
+ * the estimated token count is back under the threshold.
+ */
+export const DEFAULT_COMPACTION_FRACTION = 0.7;
+/**
+ * Number of most-recent messages preserved verbatim, even if the conversation
+ * is already over the limit. Ensures the assistant always has the latest turn
+ * and its immediate predecessor.
+ */
+export const DEFAULT_KEEP_RECENT = 4;
+/**
+ * Conservative fallback when we encounter a model we haven't mapped — big
+ * enough to be useful for unknown models, small enough to still trigger
+ * compaction before hitting upstream 400s.
+ */
+export const DEFAULT_CONTEXT_LIMIT = 200_000;
+/**
+ * Known input-token ceilings per model (nominal upstream maximum). Keyed by
+ * MODELS so TypeScript catches drift — adding a model id here that isn't in
+ * MODELS, or misspelling an id, is a compile error. Coverage is intentionally
+ * partial: models without an explicit entry fall back to DEFAULT_CONTEXT_LIMIT.
+ */
+const MODEL_CONTEXT_LIMITS: Partial<Record<KnownModelId, number>> = {
+  // Anthropic (1M tier where available, else 200K)
+  "anthropic/claude-opus-4.6": 1_000_000,
+  "anthropic/claude-opus-4.5": 1_000_000,
+  "anthropic/claude-opus-4.1": 200_000,
+  "anthropic/claude-sonnet-4.6": 1_000_000,
+  "anthropic/claude-sonnet-4.5": 1_000_000,
+  "anthropic/claude-sonnet-4": 200_000,
+  "anthropic/claude-haiku-4.5": 200_000,
+  // OpenAI
+  "openai/gpt-5.4": 400_000,
+  "openai/gpt-5.4-mini": 400_000,
+  "openai/gpt-5.1": 400_000,
+  "openai/gpt-5.1-codex": 400_000,
+  "openai/gpt-5": 400_000,
+  "openai/gpt-4.1": 1_000_000,
+  "openai/o4-mini": 200_000,
+  "openai/o3": 200_000,
+  // Google
+  "google/gemini-3.1-pro-preview": 1_000_000,
+  "google/gemini-2.5-pro": 1_000_000,
+  "google/gemini-2.5-flash": 1_000_000,
+  // Others
+  "deepseek/deepseek-r1": 128_000,
+  "deepseek/deepseek-v3.2": 128_000,
+  "meta-llama/llama-4-maverick": 1_000_000,
+  "x-ai/grok-4": 256_000,
+  "qwen/qwen3-coder": 256_000,
+  "moonshotai/kimi-k2.5": 128_000,
+  "mistralai/mistral-medium-3.1": 128_000,
+  "mistralai/codestral-2508": 256_000,
+  "mistralai/devstral-small": 128_000,
+};
+/**
+ * Returns the input-token ceiling for a given OpenRouter model id, or
+ * DEFAULT_CONTEXT_LIMIT if unknown.
+ */
+export function getModelContextLimit(modelId: string): number {
+  return MODEL_CONTEXT_LIMITS[modelId as KnownModelId] ?? DEFAULT_CONTEXT_LIMIT;
+}
+/**
+ * Rough input-token estimate using a chars/4 heuristic on the JSON serialized
+ * conversation. Tokens-per-char varies by model and content, but a chars/4
+ * heuristic matches OpenAI's rule-of-thumb within ~15% for English prose and
+ * is deterministic + zero-cost — good enough to trigger compaction.
+ */
+export function estimateTokens(messages: ModelMessage[]): number {
+  const serialized = JSON.stringify(messages);
+  return Math.ceil(serialized.length / 4);
+}
+export interface CompactionResult {
+  messages: ModelMessage[];
+  droppedCount: number;
+  estimatedTokensBefore: number;
+  estimatedTokensAfter: number;
+}
+/**
+ * Drops oldest non-system messages until the estimated token count is under
+ * maxTokens. Always preserves the last `keepRecent` messages and any system
+ * role messages. When any messages are dropped, prepends a synthetic assistant
+ * note so the model knows earlier context was elided.
+ */
+export function compactBySlidingWindow(
+  messages: ModelMessage[],
+  maxTokens: number,
+  keepRecent: number = DEFAULT_KEEP_RECENT,
+): CompactionResult {
+  const estimatedTokensBefore = estimateTokens(messages);
+  if (estimatedTokensBefore <= maxTokens || messages.length <= keepRecent) {
+    return {
+      messages,
+      droppedCount: 0,
+      estimatedTokensBefore,
+      estimatedTokensAfter: estimatedTokensBefore,
+    };
+  }
+  const systemMessages = messages.filter((m) => m.role === "system");
+  const nonSystem = messages.filter((m) => m.role !== "system");
+  // Group consecutive `tool` messages with the assistant message that
+  // precedes them. OpenAI-compatible providers require every tool-result
+  // message to be immediately preceded by the assistant message holding its
+  // tool_calls — splitting these produces an invalid conversation that
+  // providers reject with a 400. Grouping ensures we drop or keep the
+  // full assistant+tools unit atomically.
+  const groups: ModelMessage[][] = [];
+  for (const m of nonSystem) {
+    if (m.role === "tool" && groups.length > 0) {
+      groups[groups.length - 1]!.push(m);
+    } else {
+      groups.push([m]);
+    }
+  }
+  // Reserve the trailing groups that together contain at least `keepRecent`
+  // messages. Using groups (not raw messages) keeps assistant+tool pairs
+  // intact at the boundary between retained and dropped.
+  let recentMsgCount = 0;
+  let recentStart = groups.length;
+  while (recentStart > 0 && recentMsgCount < keepRecent) {
+    recentStart -= 1;
+    recentMsgCount += groups[recentStart]!.length;
+  }
+  const recentGroups = groups.slice(recentStart);
+  const droppableGroups = groups.slice(0, recentStart);
+  let droppedCount = 0;
+  let working = [
+    ...systemMessages,
+    ...droppableGroups.flat(),
+    ...recentGroups.flat(),
+  ];
+  while (droppableGroups.length > 0 && estimateTokens(working) > maxTokens) {
+    const droppedGroup = droppableGroups.shift()!;
+    droppedCount += droppedGroup.length;
+    working = [
+      ...systemMessages,
+      ...droppableGroups.flat(),
+      ...recentGroups.flat(),
+    ];
+  }
+  if (droppedCount > 0) {
+    const marker: ModelMessage = {
+      role: "assistant",
+      content: `[${droppedCount} earlier message${
+        droppedCount === 1 ? "" : "s"
+      } omitted to stay under context length. If the user asks about them, say you no longer have that context and suggest they restate the relevant details.]`,
+    };
+    working = [
+      ...systemMessages,
+      marker,
+      ...droppableGroups.flat(),
+      ...recentGroups.flat(),
+    ];
+  }
+  return {
+    messages: working,
+    droppedCount,
+    estimatedTokensBefore,
+    estimatedTokensAfter: estimateTokens(working),
+  };
+}
+export interface CompactionOptions {
+  /** Override the model's nominal input ceiling. */
+  maxTokens?: number;
+  /** Fraction of maxTokens at which compaction kicks in. */
+  compactAtFraction?: number;
+  /** Most-recent messages preserved verbatim. */
+  keepRecent?: number;
+}
+/**
+ * Convenience wrapper that picks the model ceiling, applies compactAtFraction,
+ * and runs compactBySlidingWindow. Returns the (possibly unchanged) messages
+ * plus diagnostics.
+ */
+export function compactForModel(
+  messages: ModelMessage[],
+  modelId: string,
+  opts: CompactionOptions = {},
+): CompactionResult {
+  const ceiling = opts.maxTokens ?? getModelContextLimit(modelId);
+  const fraction = opts.compactAtFraction ?? DEFAULT_COMPACTION_FRACTION;
+  const limit = Math.floor(ceiling * fraction);
+  return compactBySlidingWindow(messages, limit, opts.keepRecent);
+}

package/src/lib/errorTracking.config.ts CHANGED Viewed

@@ -7,10 +7,12 @@
  * - VITE_DATADOG_APPLICATION_ID
  * - VITE_DATADOG_CLIENT_TOKEN
  * - VITE_DATADOG_SITE (optional, defaults to datadoghq.com)
+ * - VITE_DATADOG_ENV (optional, defaults to prod)
  */
 export const DATADOG_CONFIG = {
   applicationId: import.meta.env.VITE_DATADOG_APPLICATION_ID ?? "",
   clientToken: import.meta.env.VITE_DATADOG_CLIENT_TOKEN ?? "",
   site: import.meta.env.VITE_DATADOG_SITE ?? "datadoghq.com",
+  env: import.meta.env.VITE_DATADOG_ENV ?? "prod",
   service: "gram-elements",
 } as const;

package/src/lib/errorTracking.ts CHANGED Viewed

@@ -44,7 +44,7 @@ export function initErrorTracking(config: ErrorTrackingConfig = {}): void {
       clientToken: DATADOG_CONFIG.clientToken,
       site: DATADOG_CONFIG.site,
       service: DATADOG_CONFIG.service,
-      env: process.env.NODE_ENV || "production",
+      env: DATADOG_CONFIG.env,
       sessionSampleRate: 100,
       sessionReplaySampleRate: 100,
       trackUserInteractions: true,