npm - @librechat/agents - Versions diffs - 3.1.57 → 3.1.61 - Mend

@librechat/agents 3.1.57 → 3.1.61

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (214) hide show

package/dist/cjs/agents/AgentContext.cjs +326 -62
package/dist/cjs/agents/AgentContext.cjs.map +1 -1
package/dist/cjs/common/enum.cjs +13 -0
package/dist/cjs/common/enum.cjs.map +1 -1
package/dist/cjs/events.cjs +7 -27
package/dist/cjs/events.cjs.map +1 -1
package/dist/cjs/graphs/Graph.cjs +303 -222
package/dist/cjs/graphs/Graph.cjs.map +1 -1
package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +4 -4
package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +6 -2
package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
package/dist/cjs/llm/init.cjs +60 -0
package/dist/cjs/llm/init.cjs.map +1 -0
package/dist/cjs/llm/invoke.cjs +90 -0
package/dist/cjs/llm/invoke.cjs.map +1 -0
package/dist/cjs/llm/openai/index.cjs +2 -0
package/dist/cjs/llm/openai/index.cjs.map +1 -1
package/dist/cjs/llm/request.cjs +41 -0
package/dist/cjs/llm/request.cjs.map +1 -0
package/dist/cjs/main.cjs +40 -0
package/dist/cjs/main.cjs.map +1 -1
package/dist/cjs/messages/cache.cjs +76 -89
package/dist/cjs/messages/cache.cjs.map +1 -1
package/dist/cjs/messages/contextPruning.cjs +156 -0
package/dist/cjs/messages/contextPruning.cjs.map +1 -0
package/dist/cjs/messages/contextPruningSettings.cjs +53 -0
package/dist/cjs/messages/contextPruningSettings.cjs.map +1 -0
package/dist/cjs/messages/core.cjs +23 -37
package/dist/cjs/messages/core.cjs.map +1 -1
package/dist/cjs/messages/format.cjs +156 -11
package/dist/cjs/messages/format.cjs.map +1 -1
package/dist/cjs/messages/prune.cjs +1161 -49
package/dist/cjs/messages/prune.cjs.map +1 -1
package/dist/cjs/messages/reducer.cjs +87 -0
package/dist/cjs/messages/reducer.cjs.map +1 -0
package/dist/cjs/run.cjs +81 -42
package/dist/cjs/run.cjs.map +1 -1
package/dist/cjs/stream.cjs +54 -7
package/dist/cjs/stream.cjs.map +1 -1
package/dist/cjs/summarization/index.cjs +75 -0
package/dist/cjs/summarization/index.cjs.map +1 -0
package/dist/cjs/summarization/node.cjs +663 -0
package/dist/cjs/summarization/node.cjs.map +1 -0
package/dist/cjs/tools/ToolNode.cjs +16 -8
package/dist/cjs/tools/ToolNode.cjs.map +1 -1
package/dist/cjs/tools/handlers.cjs +2 -0
package/dist/cjs/tools/handlers.cjs.map +1 -1
package/dist/cjs/utils/errors.cjs +115 -0
package/dist/cjs/utils/errors.cjs.map +1 -0
package/dist/cjs/utils/events.cjs +17 -0
package/dist/cjs/utils/events.cjs.map +1 -1
package/dist/cjs/utils/handlers.cjs +16 -0
package/dist/cjs/utils/handlers.cjs.map +1 -1
package/dist/cjs/utils/llm.cjs +10 -0
package/dist/cjs/utils/llm.cjs.map +1 -1
package/dist/cjs/utils/tokens.cjs +247 -14
package/dist/cjs/utils/tokens.cjs.map +1 -1
package/dist/cjs/utils/truncation.cjs +107 -0
package/dist/cjs/utils/truncation.cjs.map +1 -0
package/dist/esm/agents/AgentContext.mjs +325 -61
package/dist/esm/agents/AgentContext.mjs.map +1 -1
package/dist/esm/common/enum.mjs +13 -0
package/dist/esm/common/enum.mjs.map +1 -1
package/dist/esm/events.mjs +8 -28
package/dist/esm/events.mjs.map +1 -1
package/dist/esm/graphs/Graph.mjs +307 -226
package/dist/esm/graphs/Graph.mjs.map +1 -1
package/dist/esm/llm/anthropic/utils/message_inputs.mjs +4 -4
package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
package/dist/esm/llm/bedrock/utils/message_inputs.mjs +6 -2
package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
package/dist/esm/llm/init.mjs +58 -0
package/dist/esm/llm/init.mjs.map +1 -0
package/dist/esm/llm/invoke.mjs +87 -0
package/dist/esm/llm/invoke.mjs.map +1 -0
package/dist/esm/llm/openai/index.mjs +2 -0
package/dist/esm/llm/openai/index.mjs.map +1 -1
package/dist/esm/llm/request.mjs +38 -0
package/dist/esm/llm/request.mjs.map +1 -0
package/dist/esm/main.mjs +13 -3
package/dist/esm/main.mjs.map +1 -1
package/dist/esm/messages/cache.mjs +76 -89
package/dist/esm/messages/cache.mjs.map +1 -1
package/dist/esm/messages/contextPruning.mjs +154 -0
package/dist/esm/messages/contextPruning.mjs.map +1 -0
package/dist/esm/messages/contextPruningSettings.mjs +50 -0
package/dist/esm/messages/contextPruningSettings.mjs.map +1 -0
package/dist/esm/messages/core.mjs +23 -37
package/dist/esm/messages/core.mjs.map +1 -1
package/dist/esm/messages/format.mjs +156 -11
package/dist/esm/messages/format.mjs.map +1 -1
package/dist/esm/messages/prune.mjs +1158 -52
package/dist/esm/messages/prune.mjs.map +1 -1
package/dist/esm/messages/reducer.mjs +83 -0
package/dist/esm/messages/reducer.mjs.map +1 -0
package/dist/esm/run.mjs +82 -43
package/dist/esm/run.mjs.map +1 -1
package/dist/esm/stream.mjs +54 -7
package/dist/esm/stream.mjs.map +1 -1
package/dist/esm/summarization/index.mjs +73 -0
package/dist/esm/summarization/index.mjs.map +1 -0
package/dist/esm/summarization/node.mjs +659 -0
package/dist/esm/summarization/node.mjs.map +1 -0
package/dist/esm/tools/ToolNode.mjs +16 -8
package/dist/esm/tools/ToolNode.mjs.map +1 -1
package/dist/esm/tools/handlers.mjs +2 -0
package/dist/esm/tools/handlers.mjs.map +1 -1
package/dist/esm/utils/errors.mjs +111 -0
package/dist/esm/utils/errors.mjs.map +1 -0
package/dist/esm/utils/events.mjs +17 -1
package/dist/esm/utils/events.mjs.map +1 -1
package/dist/esm/utils/handlers.mjs +16 -0
package/dist/esm/utils/handlers.mjs.map +1 -1
package/dist/esm/utils/llm.mjs +10 -1
package/dist/esm/utils/llm.mjs.map +1 -1
package/dist/esm/utils/tokens.mjs +245 -15
package/dist/esm/utils/tokens.mjs.map +1 -1
package/dist/esm/utils/truncation.mjs +102 -0
package/dist/esm/utils/truncation.mjs.map +1 -0
package/dist/types/agents/AgentContext.d.ts +124 -6
package/dist/types/common/enum.d.ts +14 -1
package/dist/types/graphs/Graph.d.ts +22 -27
package/dist/types/index.d.ts +5 -0
package/dist/types/llm/init.d.ts +18 -0
package/dist/types/llm/invoke.d.ts +48 -0
package/dist/types/llm/request.d.ts +14 -0
package/dist/types/messages/contextPruning.d.ts +42 -0
package/dist/types/messages/contextPruningSettings.d.ts +44 -0
package/dist/types/messages/core.d.ts +1 -1
package/dist/types/messages/format.d.ts +17 -1
package/dist/types/messages/index.d.ts +3 -0
package/dist/types/messages/prune.d.ts +162 -1
package/dist/types/messages/reducer.d.ts +18 -0
package/dist/types/run.d.ts +12 -1
package/dist/types/summarization/index.d.ts +20 -0
package/dist/types/summarization/node.d.ts +29 -0
package/dist/types/tools/ToolNode.d.ts +3 -1
package/dist/types/types/graph.d.ts +44 -6
package/dist/types/types/index.d.ts +1 -0
package/dist/types/types/run.d.ts +30 -0
package/dist/types/types/stream.d.ts +31 -4
package/dist/types/types/summarize.d.ts +47 -0
package/dist/types/types/tools.d.ts +7 -0
package/dist/types/utils/errors.d.ts +28 -0
package/dist/types/utils/events.d.ts +13 -0
package/dist/types/utils/index.d.ts +2 -0
package/dist/types/utils/llm.d.ts +4 -0
package/dist/types/utils/tokens.d.ts +14 -1
package/dist/types/utils/truncation.d.ts +49 -0
package/package.json +3 -3
package/src/agents/AgentContext.ts +388 -58
package/src/agents/__tests__/AgentContext.test.ts +265 -5
package/src/common/enum.ts +13 -0
package/src/events.ts +9 -39
package/src/graphs/Graph.ts +468 -331
package/src/index.ts +7 -0
package/src/llm/anthropic/llm.spec.ts +3 -3
package/src/llm/anthropic/utils/message_inputs.ts +6 -4
package/src/llm/bedrock/llm.spec.ts +1 -1
package/src/llm/bedrock/utils/message_inputs.ts +6 -2
package/src/llm/init.ts +63 -0
package/src/llm/invoke.ts +144 -0
package/src/llm/request.ts +55 -0
package/src/messages/__tests__/observationMasking.test.ts +221 -0
package/src/messages/cache.ts +77 -102
package/src/messages/contextPruning.ts +191 -0
package/src/messages/contextPruningSettings.ts +90 -0
package/src/messages/core.ts +32 -53
package/src/messages/ensureThinkingBlock.test.ts +39 -39
package/src/messages/format.ts +227 -15
package/src/messages/formatAgentMessages.test.ts +511 -1
package/src/messages/index.ts +3 -0
package/src/messages/prune.ts +1548 -62
package/src/messages/reducer.ts +22 -0
package/src/run.ts +104 -51
package/src/scripts/bedrock-merge-test.ts +1 -1
package/src/scripts/test-thinking-handoff-bedrock.ts +1 -1
package/src/scripts/test-thinking-handoff.ts +1 -1
package/src/scripts/thinking-bedrock.ts +1 -1
package/src/scripts/thinking.ts +1 -1
package/src/specs/anthropic.simple.test.ts +1 -1
package/src/specs/multi-agent-summarization.test.ts +396 -0
package/src/specs/prune.test.ts +1196 -23
package/src/specs/summarization-unit.test.ts +868 -0
package/src/specs/summarization.test.ts +3827 -0
package/src/specs/summarize-prune.test.ts +376 -0
package/src/specs/thinking-handoff.test.ts +10 -10
package/src/specs/thinking-prune.test.ts +7 -4
package/src/specs/token-accounting-e2e.test.ts +1034 -0
package/src/specs/token-accounting-pipeline.test.ts +882 -0
package/src/specs/token-distribution-edge-case.test.ts +25 -26
package/src/splitStream.test.ts +42 -33
package/src/stream.ts +64 -11
package/src/summarization/__tests__/aggregator.test.ts +153 -0
package/src/summarization/__tests__/node.test.ts +708 -0
package/src/summarization/__tests__/trigger.test.ts +50 -0
package/src/summarization/index.ts +102 -0
package/src/summarization/node.ts +982 -0
package/src/tools/ToolNode.ts +25 -3
package/src/types/graph.ts +62 -7
package/src/types/index.ts +1 -0
package/src/types/run.ts +32 -0
package/src/types/stream.ts +45 -5
package/src/types/summarize.ts +58 -0
package/src/types/tools.ts +7 -0
package/src/utils/errors.ts +117 -0
package/src/utils/events.ts +31 -0
package/src/utils/handlers.ts +18 -0
package/src/utils/index.ts +2 -0
package/src/utils/llm.ts +12 -0
package/src/utils/tokens.ts +336 -18
package/src/utils/truncation.ts +124 -0
package/src/scripts/image.ts +0 -180

package/dist/types/messages/format.d.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import { AIMessage, ToolMessage, BaseMessage, HumanMessage, SystemMessage } from '@langchain/core/messages';
 import type { MessageContentImageUrl } from '@langchain/core/messages';
 import type { MessageContentComplex, TPayload } from '@/types';
+import type { RunnableConfig } from '@langchain/core/runnables';
 import { Providers } from '@/common';
 interface MediaMessageParams {
     message: {
@@ -108,6 +109,20 @@ export declare const labelContentByAgent: (contentParts: MessageContentComplex[]
 export declare const formatAgentMessages: (payload: TPayload, indexTokenCountMap?: Record<number, number | undefined>, tools?: Set<string>) => {
     messages: Array<HumanMessage | AIMessage | SystemMessage | ToolMessage>;
     indexTokenCountMap?: Record<number, number>;
+    /** Cross-run summary extracted from the payload. Should be forwarded to the
+     *  agent run so it can be included in the system message via AgentContext. */
+    summary?: {
+        text: string;
+        tokenCount: number;
+    };
+    /** When a summary boundary sliced content from a message, the token count
+     *  was proportionally reduced. Returned so the caller can log it. */
+    boundaryTokenAdjustment?: {
+        original: number;
+        adjusted: number;
+        remainingChars: number;
+        totalChars: number;
+    };
 };
 /**
  * Adds a value at key 0 for system messages and shifts all key indices by one in an indexTokenCountMap.
@@ -131,7 +146,8 @@ export declare function shiftIndexTokenCountMap(indexTokenCountMap: Record<numbe
  *
  * @param messages - Array of messages to process
  * @param provider - The provider being used (unused but kept for future compatibility)
+ * @param config - Optional RunnableConfig for structured agent logging
  * @returns The messages array with tool sequences converted to buffer strings if necessary
  */
-export declare function ensureThinkingBlockInMessages(messages: BaseMessage[], _provider: Providers): BaseMessage[];
+export declare function ensureThinkingBlockInMessages(messages: BaseMessage[], _provider: Providers, config?: RunnableConfig): BaseMessage[];
 export {};

package/dist/types/messages/index.d.ts CHANGED Viewed

@@ -5,3 +5,6 @@ export * from './format';
 export * from './cache';
 export * from './content';
 export * from './tools';
+export * from './contextPruning';
+export * from './contextPruningSettings';
+export * from './reducer';

package/dist/types/messages/prune.d.ts CHANGED Viewed

@@ -1,6 +1,9 @@
 import { BaseMessage, UsageMetadata } from '@langchain/core/messages';
 import type { TokenCounter } from '@/types/run';
+import type { ContextPruningConfig } from '@/types/graph';
 import { ContentTypes, Providers } from '@/common';
+/** Default fraction of the token budget reserved as headroom (5 %). */
+export declare const DEFAULT_RESERVE_RATIO = 0.05;
 export type PruneMessagesFactoryParams = {
     provider?: Providers;
     maxTokens: number;
@@ -8,12 +11,90 @@ export type PruneMessagesFactoryParams = {
     tokenCounter: TokenCounter;
     indexTokenCountMap: Record<string, number | undefined>;
     thinkingEnabled?: boolean;
+    /** Context pruning configuration for position-based tool result degradation. */
+    contextPruningConfig?: ContextPruningConfig;
+    /**
+     * When true, context pressure fading (pre-flight tool result truncation)
+     * is skipped.  Summarization replaces pruning as the primary context
+     * management strategy — the summarizer needs full un-truncated tool results
+     * to produce an accurate summary.  Hard pruning still runs as a fallback
+     * when summarization is skipped or capped.
+     */
+    summarizationEnabled?: boolean;
+    /**
+     * Returns the current instruction-token overhead (system message + tool schemas + summary).
+     * Called on each prune invocation so the budget reflects dynamic changes
+     * (e.g. summary added between turns).  When messages don't include a leading
+     * SystemMessage, these tokens are subtracted from the available budget so
+     * the pruner correctly reserves space for the system prompt that will be
+     * prepended later by `buildSystemRunnable`.
+     */
+    getInstructionTokens?: () => number;
+    /**
+     * Fraction of the effective token budget to reserve as headroom (0–1).
+     * When set, pruning triggers at `effectiveMax * (1 - reserveRatio)` instead of
+     * filling the context window to 100%.  Defaults to 5 % (0.05) when omitted.
+     */
+    reserveRatio?: number;
+    /**
+     * Initial calibration ratio from a previous run's persisted contextMeta.
+     * Seeds the running EMA so new messages are scaled immediately instead
+     * of waiting for the first provider response.  Ignored when <= 0.
+     */
+    calibrationRatio?: number;
+    /** Optional diagnostic log callback wired by the graph for observability. */
+    log?: (level: 'debug' | 'info' | 'warn' | 'error', message: string, data?: Record<string, unknown>) => void;
 };
 export type PruneMessagesParams = {
     messages: BaseMessage[];
     usageMetadata?: Partial<UsageMetadata>;
     startType?: ReturnType<BaseMessage['getType']>;
+    /**
+     * Usage from the most recent LLM call only (not accumulated).
+     * When provided, calibration uses this instead of usageMetadata
+     * to avoid inflated ratios from N×cacheRead accumulation.
+     */
+    lastCallUsage?: {
+        totalTokens: number;
+        inputTokens?: number;
+    };
+    /**
+     * Whether the token data is fresh (from a just-completed LLM call).
+     * When false, provider calibration is skipped to avoid applying
+     * stale ratios.
+     */
+    totalTokensFresh?: boolean;
 };
+export declare function repairOrphanedToolMessages({ context, allMessages, tokenCounter, indexTokenCountMap, }: {
+    context: BaseMessage[];
+    allMessages: BaseMessage[];
+    tokenCounter: TokenCounter;
+    indexTokenCountMap: Record<string, number | undefined>;
+}): {
+    context: BaseMessage[];
+    reclaimedTokens: number;
+    droppedOrphanCount: number;
+    /** Messages removed from context during orphan repair.  These should be
+     *  appended to `messagesToRefine` so that summarization can still see them
+     *  (e.g. a ToolMessage whose parent AI was pruned). */
+    droppedMessages: BaseMessage[];
+};
+/**
+ * Lightweight structural cleanup: strips orphan tool_use blocks from AI messages
+ * and drops orphan ToolMessages whose AI counterpart is missing.
+ *
+ * Unlike `repairOrphanedToolMessages`, this does NOT track tokens — it is
+ * intended as a final safety net in Graph.ts right before model invocation
+ * to prevent Anthropic/Bedrock structural validation errors.
+ *
+ * Uses duck-typing instead of `getType()` because messages at this stage
+ * may be plain objects (from LangGraph state serialization) rather than
+ * proper BaseMessage class instances.
+ *
+ * Includes a fast-path: if every tool_call has a matching tool_result and
+ * vice-versa, the original array is returned immediately with zero allocation.
+ */
+export declare function sanitizeOrphanToolBlocks(messages: BaseMessage[]): BaseMessage[];
 /**
  * Calculates the total tokens from a single usage object
  *
@@ -34,7 +115,7 @@ export type PruningResult = {
  * @param options Configuration options for processing messages
  * @returns Object containing the message context, remaining tokens, messages not included, and summary index
  */
-export declare function getMessagesWithinTokenLimit({ messages: _messages, maxContextTokens, indexTokenCountMap, startType: _startType, thinkingEnabled, tokenCounter, thinkingStartIndex: _thinkingStartIndex, reasoningType, }: {
+export declare function getMessagesWithinTokenLimit({ messages: _messages, maxContextTokens, indexTokenCountMap, startType: _startType, thinkingEnabled, tokenCounter, thinkingStartIndex: _thinkingStartIndex, reasoningType, instructionTokens: _instructionTokens, }: {
     messages: BaseMessage[];
     maxContextTokens: number;
     indexTokenCountMap: Record<string, number | undefined>;
@@ -43,9 +124,89 @@ export declare function getMessagesWithinTokenLimit({ messages: _messages, maxCo
     tokenCounter: TokenCounter;
     thinkingStartIndex?: number;
     reasoningType?: ContentTypes.THINKING | ContentTypes.REASONING_CONTENT;
+    /**
+     * Token overhead for instructions (system message + tool schemas + summary)
+     * that are NOT included in `messages`.  When messages[0] is already a
+     * SystemMessage the budget is deducted from its indexTokenCountMap entry
+     * as before; otherwise this value is subtracted from the available budget.
+     */
+    instructionTokens?: number;
 }): PruningResult;
 export declare function checkValidNumber(value: unknown): value is number;
+/**
+ * Observation masking: replaces consumed ToolMessage content with tight
+ * head+tail truncations that serve as informative placeholders.
+ *
+ * A ToolMessage is "consumed" when a subsequent AI message exists that is NOT
+ * purely tool calls — meaning the model has already read and acted on the
+ * result. Unconsumed results (the latest tool outputs the model hasn't
+ * responded to yet) are left intact so the model can still use them.
+ *
+ * AI messages are never masked — they contain the model's own reasoning and
+ * conclusions, which is what prevents the model from repeating work after
+ * its tool results are masked.
+ *
+ * @returns The number of tool messages that were masked.
+ */
+export declare function maskConsumedToolResults(params: {
+    messages: BaseMessage[];
+    indexTokenCountMap: Record<string, number | undefined>;
+    tokenCounter: TokenCounter;
+    /** Raw-space token budget available for all consumed tool results combined.
+     *  When provided, the budget is distributed across consumed results weighted
+     *  by recency (newest get the most, oldest get MASKED_RESULT_MAX_CHARS min).
+     *  When omitted, falls back to a flat MASKED_RESULT_MAX_CHARS per result. */
+    availableRawBudget?: number;
+    /** When provided, original (pre-masking) content is stored here keyed by
+     *  message index — only for entries that actually get truncated. */
+    originalContentStore?: Map<number, string>;
+    /** Called after storing content with the char length of the stored entry. */
+    onContentStored?: (charLength: number) => void;
+}): number;
+/**
+ * Pre-flight truncation: truncates oversized ToolMessage content before the
+ * main backward-iteration pruning runs. Unlike the ingestion guard (which caps
+ * at tool-execution time), pre-flight truncation applies per-turn based on the
+ * current context window budget (which may have shrunk due to growing conversation).
+ *
+ * After truncation, recounts tokens via tokenCounter and updates indexTokenCountMap
+ * so subsequent pruning works with accurate counts.
+ *
+ * @returns The number of tool messages that were truncated.
+ */
+export declare function preFlightTruncateToolResults(params: {
+    messages: BaseMessage[];
+    maxContextTokens: number;
+    indexTokenCountMap: Record<string, number | undefined>;
+    tokenCounter: TokenCounter;
+}): number;
+/**
+ * Pre-flight truncation: truncates oversized `tool_use` input fields in AI messages.
+ *
+ * Tool call inputs (arguments) can be very large — e.g., code evaluation payloads from
+ * MCP tools like chrome-devtools. Since these tool calls have already been executed,
+ * the model only needs a summary of what was called, not the full arguments. Truncating
+ * them before pruning can prevent entire messages from being dropped.
+ *
+ * Uses 15% of the context window (in estimated characters, ~4 chars/token) as the
+ * per-input cap, capped at 200K chars.
+ *
+ * @returns The number of AI messages that had tool_use inputs truncated.
+ */
+export declare function preFlightTruncateToolCallInputs(params: {
+    messages: BaseMessage[];
+    maxContextTokens: number;
+    indexTokenCountMap: Record<string, number | undefined>;
+    tokenCounter: TokenCounter;
+}): number;
 export declare function createPruneMessages(factoryParams: PruneMessagesFactoryParams): (params: PruneMessagesParams) => {
     context: BaseMessage[];
     indexTokenCountMap: Record<string, number | undefined>;
+    messagesToRefine?: BaseMessage[];
+    prePruneContextTokens?: number;
+    remainingContextTokens?: number;
+    contextPressure?: number;
+    originalToolContent?: Map<number, string>;
+    calibrationRatio?: number;
+    resolvedInstructionOverhead?: number;
 };

package/dist/types/messages/reducer.d.ts CHANGED Viewed

@@ -1,5 +1,23 @@
 import { BaseMessage, BaseMessageLike } from '@langchain/core/messages';
 export declare const REMOVE_ALL_MESSAGES = "__remove_all__";
+/**
+ * Creates a message that instructs messagesStateReducer to remove ALL
+ * existing messages from state.  Messages appearing after this one in
+ * the array become the new state.
+ *
+ * Usage (in a node return value):
+ * ```ts
+ * return { messages: [createRemoveAllMessage(), ...survivingMessages] };
+ * ```
+ *
+ * This works because the reducer checks for `getType() === 'remove'`
+ * with `id === REMOVE_ALL_MESSAGES` and discards everything before it.
+ *
+ * NOTE: Uses RemoveMessage from @langchain/core with a sentinel id so
+ * the reducer can distinguish a "remove-all" marker from a single-message
+ * removal.
+ */
+export declare function createRemoveAllMessage(): BaseMessage;
 export type Messages = Array<BaseMessage | BaseMessageLike> | BaseMessage | BaseMessageLike;
 /**
  * Prebuilt reducer that combines returned messages.

package/dist/types/run.d.ts CHANGED Viewed

@@ -10,21 +10,32 @@ export declare class Run<_T extends t.BaseGraphState> {
     private tokenCounter?;
     private handlerRegistry?;
     private indexTokenCountMap?;
+    calibrationRatio: number;
     graphRunnable?: t.CompiledStateWorkflow;
     Graph: StandardGraph | MultiAgentGraph | undefined;
     returnContent: boolean;
     private skipCleanup;
+    private _streamResult;
     private constructor();
     private createLegacyGraph;
     private createMultiAgentGraph;
     static create<T extends t.BaseGraphState>(config: t.RunConfig): Promise<Run<T>>;
     getRunMessages(): BaseMessage[] | undefined;
+    /**
+     * Returns the current calibration ratio (EMA of provider-vs-estimate token ratios).
+     * Hosts should persist this value and pass it back as `RunConfig.calibrationRatio`
+     * on the next run for the same conversation so the pruner starts with an accurate
+     * scaling factor instead of the default (1).
+     */
+    getCalibrationRatio(): number;
+    getResolvedInstructionOverhead(): number | undefined;
+    getToolCount(): number;
     /**
      * Creates a custom event callback handler that intercepts custom events
      * and processes them through our handler registry instead of EventStreamCallbackHandler
      */
     private createCustomEventCallback;
-    processStream(inputs: t.IState, config: Partial<RunnableConfig> & {
+    processStream(inputs: t.IState, callerConfig: Partial<RunnableConfig> & {
         version: 'v1' | 'v2';
         run_id?: string;
     }, streamOptions?: t.EventStreamOptions): Promise<MessageContentComplex[] | undefined>;

package/dist/types/summarization/index.d.ts ADDED Viewed

@@ -0,0 +1,20 @@
+import type { SummarizationTrigger } from '@/types';
+/**
+ * Determines whether summarization should be triggered based on the configured trigger
+ * and current context state.
+ *
+ * Default behavior (no trigger configured): returns `true` whenever messages were pruned.
+ * This is intentional — when an admin enables summarization without specifying a trigger,
+ * summarization fires on any context overflow that causes pruning.
+ *
+ * When a trigger IS configured but required runtime data is missing (e.g., maxContextTokens
+ * unavailable for a token_ratio trigger), returns `false` — we cannot evaluate the condition,
+ * so we do not fire.
+ */
+export declare function shouldTriggerSummarization(params: {
+    trigger?: SummarizationTrigger;
+    maxContextTokens?: number;
+    prePruneContextTokens?: number;
+    remainingContextTokens?: number;
+    messagesToRefineCount: number;
+}): boolean;

package/dist/types/summarization/node.d.ts ADDED Viewed

@@ -0,0 +1,29 @@
+import type { RunnableConfig } from '@langchain/core/runnables';
+import type { BaseMessage } from '@langchain/core/messages';
+import type { AgentContext } from '@/agents/AgentContext';
+import type * as t from '@/types';
+/** Structured checkpoint prompt for fresh summarization (no prior summary). */
+export declare const DEFAULT_SUMMARIZATION_PROMPT = "Hold on, before you continue I need you to write me a checkpoint of everything so far. Your context window is filling up and this checkpoint replaces the messages above, so capture everything you need to pick right back up.\n\nDon't second-guess or fact-check anything you did, your tool results reflect exactly what happened. Just record what you did and what you observed. Only the checkpoint, don't respond to me or continue the conversation.\n\n## Checkpoint\n\n## Goal\nWhat I asked you to do and any sub-goals you identified.\n\n## Constraints & Preferences\nAny rules, preferences, or configuration I established.\n\n## Progress\n### Done\n- What you completed and the outcomes\n\n### In Progress\n- What you're currently working on\n\n## Key Decisions\nDecisions you made and why.\n\n## Next Steps\nConcrete task actions remaining, in priority order.\n\n## Critical Context\nExact identifiers, names, error messages, URLs, and details you need to preserve verbatim.\n\nRules:\n- Record what you did and observed, don't judge or re-evaluate it\n- For each tool call: the tool name, key inputs, and the outcome\n- Preserve exact identifiers, names, errors, and references verbatim\n- Short declarative sentences\n- Skip empty sections";
+/** Prompt for re-compaction when a prior summary exists. */
+export declare const DEFAULT_UPDATE_SUMMARIZATION_PROMPT = "Hold on again, update your checkpoint. Merge the new messages into your existing checkpoint and give me a single consolidated replacement.\n\nKeep it roughly the same length as your last checkpoint. Compress older details to make room for what's new, don't just append. Give recent actions more detail, compress older items to one-liners.\n\nDon't fact-check or second-guess anything, your tool results are ground truth. Only the checkpoint, don't respond to me or continue the conversation.\n\nRules:\n- Merge new progress into existing sections, don't duplicate headers\n- Compress older completed items into one-line entries\n- Move items from \"In Progress\" to \"Done\" when you completed them\n- Update \"Next Steps\" to reflect current task priorities.\n- For each new tool call: the tool name, key inputs, and the outcome\n- Preserve exact identifiers, names, errors, and references verbatim\n- Skip empty sections";
+interface CreateSummarizeNodeParams {
+    agentContext: AgentContext;
+    graph: {
+        contentData: t.RunStep[];
+        contentIndexMap: Map<string, number>;
+        config?: RunnableConfig;
+        runId?: string;
+        isMultiAgent: boolean;
+        dispatchRunStep: (runStep: t.RunStep, config?: RunnableConfig) => Promise<void>;
+        dispatchRunStepCompleted: (stepId: string, result: t.StepCompleted, config?: RunnableConfig) => Promise<void>;
+    };
+    generateStepId: (stepKey: string) => [string, number];
+}
+export declare function createSummarizeNode({ agentContext, graph, generateStepId, }: CreateSummarizeNodeParams): (state: {
+    messages: BaseMessage[];
+    summarizationRequest?: t.SummarizationNodeInput;
+}, config?: RunnableConfig) => Promise<{
+    summarizationRequest: undefined;
+    messages?: BaseMessage[];
+}>;
+export {};

package/dist/types/tools/ToolNode.d.ts CHANGED Viewed

@@ -26,7 +26,9 @@ export declare class ToolNode<T = any> extends RunnableCallable<T, T> {
     private agentId?;
     /** Tool names that bypass event dispatch and execute directly (e.g., graph-managed handoff tools) */
     private directToolNames?;
-    constructor({ tools, toolMap, name, tags, errorHandler, toolCallStepIds, handleToolErrors, loadRuntimeTools, toolRegistry, sessions, eventDrivenMode, agentId, directToolNames, }: t.ToolNodeConstructorParams);
+    /** Maximum characters allowed in a single tool result before truncation. */
+    private maxToolResultChars;
+    constructor({ tools, toolMap, name, tags, errorHandler, toolCallStepIds, handleToolErrors, loadRuntimeTools, toolRegistry, sessions, eventDrivenMode, agentId, directToolNames, maxContextTokens, maxToolResultChars, }: t.ToolNodeConstructorParams);
     /**
      * Returns cached programmatic tools, computing once on first access.
      * Single iteration builds both toolMap and toolDefs simultaneously.

package/dist/types/types/graph.d.ts CHANGED Viewed

@@ -8,6 +8,7 @@ import type { ToolMap, ToolEndEvent, GenericTool, LCTool } from '@/types/tools';
 import type { Providers, Callback, GraphNodeKeys } from '@/common';
 import type { StandardGraph, MultiAgentGraph } from '@/graphs';
 import type { ClientOptions } from '@/types/llm';
+import type { SummarizationNodeInput, SummarizeCompleteEvent, SummarizationConfig, SummarizeStartEvent, SummarizeDeltaEvent } from '@/types/summarize';
 import type { RunStep, RunStepDeltaEvent, MessageDeltaEvent, ReasoningDeltaEvent } from '@/types/stream';
 import type { TokenCounter } from '@/types/run';
 /** Interface for bound model with stream and invoke methods */
@@ -28,12 +29,23 @@ export type SystemCallbacks = {
 export type BaseGraphState = {
     messages: BaseMessage[];
 };
+export type AgentSubgraphState = BaseGraphState & {
+    summarizationRequest?: SummarizationNodeInput;
+};
 export type MultiAgentGraphState = BaseGraphState & {
     agentMessages?: BaseMessage[];
 };
 export type IState = BaseGraphState;
+export interface AgentLogEvent {
+    level: 'debug' | 'info' | 'warn' | 'error';
+    scope: 'prune' | 'summarize' | 'graph' | 'sanitize' | (string & {});
+    message: string;
+    data?: Record<string, unknown>;
+    runId?: string;
+    agentId?: string;
+}
 export interface EventHandler {
-    handle(event: string, data: StreamEventData | ModelEndData | RunStep | RunStepDeltaEvent | MessageDeltaEvent | ReasoningDeltaEvent | {
+    handle(event: string, data: StreamEventData | ModelEndData | RunStep | RunStepDeltaEvent | MessageDeltaEvent | ReasoningDeltaEvent | SummarizeStartEvent | SummarizeDeltaEvent | SummarizeCompleteEvent | AgentLogEvent | {
         result: ToolEndEvent;
     }, metadata?: Record<string, unknown>, graph?: StandardGraph | MultiAgentGraph): void | Promise<void>;
 }
@@ -62,17 +74,16 @@ export type CompiledMultiAgentWorkflow = CompiledStateGraph<StateType<{
     messages: BinaryOperatorAggregate<BaseMessage[], BaseMessage[]>;
     agentMessages: BinaryOperatorAggregate<BaseMessage[], BaseMessage[]>;
 }, StateDefinition>;
-export type CompiledAgentWorfklow = CompiledStateGraph<{
-    messages: BaseMessage[];
-}, {
-    messages?: BaseMessage[] | undefined;
-}, '__start__' | `agent=${string}` | `tools=${string}`, {
+export type CompiledAgentWorfklow = CompiledStateGraph<AgentSubgraphState, Partial<AgentSubgraphState>, '__start__' | `agent=${string}` | `tools=${string}` | `summarize=${string}`, {
     messages: BinaryOperatorAggregate<BaseMessage[], BaseMessage[]>;
+    summarizationRequest: BinaryOperatorAggregate<SummarizationNodeInput | undefined, SummarizationNodeInput | undefined>;
 }, {
     messages: BinaryOperatorAggregate<BaseMessage[], BaseMessage[]>;
+    summarizationRequest: BinaryOperatorAggregate<SummarizationNodeInput | undefined, SummarizationNodeInput | undefined>;
 }, StateDefinition, {
     [x: `agent=${string}`]: Partial<BaseGraphState>;
     [x: `tools=${string}`]: any;
+    [x: `summarize=${string}`]: any;
 }>;
 export type SystemRunnable = Runnable<BaseMessage[], (BaseMessage | SystemMessage)[], RunnableConfig<Record<string, unknown>>> | undefined;
 /**
@@ -201,6 +212,7 @@ export type StandardGraphInput = {
     agents: AgentInputs[];
     tokenCounter?: TokenCounter;
     indexTokenCountMap?: Record<string, number>;
+    calibrationRatio?: number;
 };
 export type GraphEdge = {
     /** Agent ID, use a list for multiple sources */
@@ -270,4 +282,30 @@ export interface AgentInputs {
      * in tool binding without requiring tool_search.
      */
     discoveredTools?: string[];
+    summarizationEnabled?: boolean;
+    summarizationConfig?: SummarizationConfig;
+    /** Cross-run summary from a previous run, forwarded from formatAgentMessages.
+     *  Injected into the system message via AgentContext.buildInstructionsString(). */
+    initialSummary?: {
+        text: string;
+        tokenCount: number;
+    };
+    contextPruningConfig?: ContextPruningConfig;
+    maxToolResultChars?: number;
+}
+export interface ContextPruningConfig {
+    enabled?: boolean;
+    keepLastAssistants?: number;
+    softTrimRatio?: number;
+    hardClearRatio?: number;
+    minPrunableToolChars?: number;
+    softTrim?: {
+        maxChars?: number;
+        headChars?: number;
+        tailChars?: number;
+    };
+    hardClear?: {
+        enabled?: boolean;
+        placeholder?: string;
+    };
 }

package/dist/types/types/index.d.ts CHANGED Viewed

@@ -3,3 +3,4 @@ export * from './llm';
 export * from './run';
 export * from './stream';
 export * from './tools';
+export * from './summarize';

package/dist/types/types/run.d.ts CHANGED Viewed

@@ -103,11 +103,41 @@ export type RunConfig = {
     returnContent?: boolean;
     tokenCounter?: TokenCounter;
     indexTokenCountMap?: Record<string, number>;
+    /**
+     * Calibration ratio from a previous run's contextMeta.
+     * Seeds the pruner's EMA so new messages are scaled immediately.
+     *
+     * Hosts should persist the value returned by `Run.getCalibrationRatio()`
+     * after each run and pass it back here on subsequent runs for the same
+     * conversation. Without this, the EMA resets to 1 on every new Run instance.
+     */
+    calibrationRatio?: number;
     /** Skip post-stream cleanup (clearHeavyState) — useful for tests that inspect graph state after processStream */
     skipCleanup?: boolean;
 };
 export type ProvidedCallbacks = (BaseCallbackHandler | CallbackHandlerMethods)[] | undefined;
 export type TokenCounter = (message: BaseMessage) => number;
+/** Structured breakdown of how context token budget is consumed. */
+export type TokenBudgetBreakdown = {
+    /** Total context window budget (maxContextTokens). */
+    maxContextTokens: number;
+    /** Total instruction tokens (system + tools + summary). */
+    instructionTokens: number;
+    /** Tokens from the system message text alone. */
+    systemMessageTokens: number;
+    /** Tokens from tool schema definitions. */
+    toolSchemaTokens: number;
+    /** Tokens from the conversation summary. */
+    summaryTokens: number;
+    /** Number of registered tools. */
+    toolCount: number;
+    /** Number of messages in the conversation. */
+    messageCount: number;
+    /** Total tokens consumed by messages (excluding system). */
+    messageTokens: number;
+    /** Tokens available for messages after instructions. */
+    availableForMessages: number;
+};
 export type EventStreamOptions = {
     callbacks?: g.ClientCallbacks;
     keepContent?: boolean;

package/dist/types/types/stream.d.ts CHANGED Viewed

@@ -4,6 +4,7 @@ import type { ToolCall, ToolCallChunk } from '@langchain/core/messages/tool';
 import type { LLMResult, Generation } from '@langchain/core/outputs';
 import type { AnthropicContentBlock } from '@/llm/anthropic/types';
 import type { Command } from '@langchain/langgraph';
+import type { SummarizeCompleteEvent } from '@/types/summarize';
 import type { ToolEndEvent } from '@/types/tools';
 import { StepTypes, ContentTypes, GraphEvents } from '@/common/enum';
 export type HandleLLMEnd = (output: LLMResult, runId: string, parentRunId?: string, tags?: string[]) => void;
@@ -52,6 +53,7 @@ export type RunStep = {
     index: number;
     stepIndex?: number;
     stepDetails: StepDetails;
+    summary?: SummaryContentBlock;
     usage?: null | object;
 };
 /**
@@ -69,7 +71,11 @@ export interface RunStepDeltaEvent {
     delta: ToolCallDelta;
 }
 export type StepDetails = MessageCreationDetails | ToolCallsDetails;
-export type StepCompleted = ToolCallCompleted;
+export type SummaryCompleted = {
+    type: 'summary';
+    summary: SummaryContentBlock;
+};
+export type StepCompleted = ToolCallCompleted | SummaryCompleted;
 export type MessageCreationDetails = {
     type: StepTypes.MESSAGE_CREATION;
     message_creation: {
@@ -116,6 +122,7 @@ export type ToolCallsDetails = {
 export type ToolCallDelta = {
     type: StepTypes;
     tool_calls?: ToolCallChunk[];
+    summary?: SummaryContentBlock;
     auth?: string;
     expires_at?: number;
 };
@@ -202,11 +209,25 @@ export type ReasoningDeltaUpdate = {
     type: ContentTypes.THINK;
     think: string;
 };
-export type ContentType = 'text' | 'image_url' | 'tool_call' | 'think' | string;
+export type ContentType = 'text' | 'image_url' | 'tool_call' | 'think' | 'summary' | string;
 export type ReasoningContentText = {
     type: ContentTypes.THINK;
     think: string;
 };
+export type SummaryBoundary = {
+    messageId: string;
+    contentIndex: number;
+};
+export type SummaryContentBlock = {
+    type: ContentTypes.SUMMARY;
+    content?: MessageContentComplex[];
+    tokenCount?: number;
+    boundary?: SummaryBoundary;
+    summaryVersion?: number;
+    model?: string;
+    provider?: string;
+    createdAt?: string;
+};
 /** Vertex AI / Google Common - Reasoning Content Block Format */
 export type GoogleReasoningContentText = {
     type: ContentTypes.REASONING;
@@ -258,7 +279,7 @@ export type ToolResultContent = {
     input?: string | Record<string, unknown>;
     index?: number;
 };
-export type MessageContentComplex = (ToolResultContent | ThinkingContentText | AgentUpdate | ToolCallContent | ReasoningContentText | MessageContentText | MessageContentImageUrl | (Record<string, any> & {
+export type MessageContentComplex = (ToolResultContent | ThinkingContentText | SummaryContentBlock | AgentUpdate | ToolCallContent | ReasoningContentText | MessageContentText | MessageContentImageUrl | (Record<string, any> & {
     type?: 'text' | 'image_url' | 'think' | 'thinking' | string;
 }) | (Record<string, any> & {
     type?: never;
@@ -297,9 +318,15 @@ export type SplitStreamHandlers = Partial<{
         data: ReasoningDeltaEvent;
     }) => void;
 }>;
+export type SummarizeDeltaData = {
+    id: string;
+    delta: {
+        summary: SummaryContentBlock;
+    };
+};
 export type ContentAggregator = ({ event, data, }: {
     event: GraphEvents;
-    data: RunStep | MessageDeltaEvent | RunStepDeltaEvent | {
+    data: RunStep | AgentUpdate | MessageDeltaEvent | ReasoningDeltaEvent | RunStepDeltaEvent | SummarizeDeltaData | SummarizeCompleteEvent | {
         result: ToolEndEvent;
     };
 }) => void;

package/dist/types/types/summarize.d.ts ADDED Viewed

@@ -0,0 +1,47 @@
+import type { SummaryContentBlock } from '@/types/stream';
+import type { Providers } from '@/common';
+export type SummarizationTrigger = {
+    type: 'token_ratio' | 'remaining_tokens' | 'messages_to_refine' | (string & {});
+    value: number;
+};
+export type SummarizationConfig = {
+    provider?: Providers;
+    model?: string;
+    parameters?: Record<string, unknown>;
+    prompt?: string;
+    updatePrompt?: string;
+    trigger?: SummarizationTrigger;
+    maxSummaryTokens?: number;
+    /** Fraction of the token budget reserved as headroom (0–1). Defaults to 0.05. */
+    reserveRatio?: number;
+};
+export interface SummarizeResult {
+    text: string;
+    tokenCount: number;
+    model?: string;
+    provider?: string;
+}
+export interface SummarizationNodeInput {
+    remainingContextTokens: number;
+    agentId: string;
+}
+export interface SummarizeStartEvent {
+    agentId: string;
+    provider: string;
+    model?: string;
+    messagesToRefineCount: number;
+    /** Which summarization cycle this is (1-based, increments each time summarization fires) */
+    summaryVersion: number;
+}
+export interface SummarizeDeltaEvent {
+    id: string;
+    delta: {
+        summary: SummaryContentBlock;
+    };
+}
+export interface SummarizeCompleteEvent {
+    id: string;
+    agentId: string;
+    summary?: SummaryContentBlock;
+    error?: string;
+}

package/dist/types/types/tools.d.ts CHANGED Viewed

@@ -39,6 +39,13 @@ export type ToolNodeOptions = {
     agentId?: string;
     /** Tool names that must be executed directly (via runTool) even in event-driven mode (e.g., graph-managed handoff tools) */
     directToolNames?: Set<string>;
+    /** Max context tokens for the agent — used to compute tool result truncation limits. */
+    maxContextTokens?: number;
+    /**
+     * Maximum characters allowed in a single tool result before truncation.
+     * When provided, takes precedence over the value computed from maxContextTokens.
+     */
+    maxToolResultChars?: number;
 };
 export type ToolNodeConstructorParams = ToolRefs & ToolNodeOptions;
 export type ToolEndEvent = {