@librechat/agents 3.2.34 → 3.2.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +119 -9
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/agents/projection.cjs +25 -0
- package/dist/cjs/agents/projection.cjs.map +1 -0
- package/dist/cjs/common/enum.cjs +13 -0
- package/dist/cjs/common/enum.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +106 -3
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +26 -4
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +20 -0
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/invoke.cjs +49 -8
- package/dist/cjs/llm/invoke.cjs.map +1 -1
- package/dist/cjs/main.cjs +7 -0
- package/dist/cjs/messages/budget.cjs +23 -0
- package/dist/cjs/messages/budget.cjs.map +1 -0
- package/dist/cjs/messages/cache.cjs +1 -0
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/messages/content.cjs +12 -14
- package/dist/cjs/messages/content.cjs.map +1 -1
- package/dist/cjs/messages/index.cjs +1 -0
- package/dist/cjs/messages/prune.cjs +31 -13
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/cjs/run.cjs +7 -2
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/summarization/node.cjs +12 -1
- package/dist/cjs/summarization/node.cjs.map +1 -1
- package/dist/cjs/tools/search/format.cjs +91 -2
- package/dist/cjs/tools/search/format.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +4 -3
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs +138 -2
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
- package/dist/cjs/utils/tokens.cjs +30 -0
- package/dist/cjs/utils/tokens.cjs.map +1 -1
- package/dist/esm/agents/AgentContext.mjs +121 -11
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/agents/projection.mjs +25 -0
- package/dist/esm/agents/projection.mjs.map +1 -0
- package/dist/esm/common/enum.mjs +13 -0
- package/dist/esm/common/enum.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +107 -4
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs +26 -4
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs +20 -0
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/invoke.mjs +49 -8
- package/dist/esm/llm/invoke.mjs.map +1 -1
- package/dist/esm/main.mjs +6 -4
- package/dist/esm/messages/budget.mjs +23 -0
- package/dist/esm/messages/budget.mjs.map +1 -0
- package/dist/esm/messages/cache.mjs +1 -1
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/messages/content.mjs +12 -15
- package/dist/esm/messages/content.mjs.map +1 -1
- package/dist/esm/messages/index.mjs +1 -0
- package/dist/esm/messages/prune.mjs +31 -13
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/esm/run.mjs +7 -2
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/summarization/node.mjs +12 -1
- package/dist/esm/summarization/node.mjs.map +1 -1
- package/dist/esm/tools/search/format.mjs +91 -2
- package/dist/esm/tools/search/format.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +4 -3
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/subagent/SubagentExecutor.mjs +138 -2
- package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
- package/dist/esm/utils/tokens.mjs +30 -1
- package/dist/esm/utils/tokens.mjs.map +1 -1
- package/dist/types/agents/AgentContext.d.ts +37 -4
- package/dist/types/agents/projection.d.ts +26 -0
- package/dist/types/common/enum.d.ts +13 -0
- package/dist/types/graphs/Graph.d.ts +8 -1
- package/dist/types/index.d.ts +1 -0
- package/dist/types/llm/invoke.d.ts +1 -1
- package/dist/types/messages/budget.d.ts +11 -0
- package/dist/types/messages/cache.d.ts +7 -0
- package/dist/types/messages/content.d.ts +5 -0
- package/dist/types/messages/index.d.ts +1 -0
- package/dist/types/messages/prune.d.ts +4 -0
- package/dist/types/run.d.ts +1 -0
- package/dist/types/tools/search/format.d.ts +4 -1
- package/dist/types/tools/search/types.d.ts +7 -0
- package/dist/types/tools/subagent/SubagentExecutor.d.ts +11 -1
- package/dist/types/types/graph.d.ts +89 -3
- package/dist/types/types/run.d.ts +13 -0
- package/dist/types/utils/tokens.d.ts +7 -0
- package/package.json +1 -1
- package/src/agents/AgentContext.ts +172 -8
- package/src/agents/__tests__/AgentContext.test.ts +235 -2
- package/src/agents/__tests__/projection.test.ts +73 -0
- package/src/agents/projection.ts +46 -0
- package/src/common/enum.ts +13 -0
- package/src/graphs/Graph.ts +168 -0
- package/src/index.ts +3 -0
- package/src/llm/anthropic/utils/cross-provider-reasoning.test.ts +317 -0
- package/src/llm/anthropic/utils/message_inputs.ts +78 -16
- package/src/llm/bedrock/utils/cross-provider-reasoning.test.ts +131 -0
- package/src/llm/bedrock/utils/message_inputs.ts +35 -0
- package/src/llm/invoke.test.ts +79 -1
- package/src/llm/invoke.ts +58 -4
- package/src/messages/budget.ts +32 -0
- package/src/messages/cache.ts +1 -1
- package/src/messages/content.ts +24 -32
- package/src/messages/index.ts +1 -0
- package/src/messages/prune.ts +39 -2
- package/src/run.ts +5 -0
- package/src/scripts/subagent-usage-sink.ts +176 -0
- package/src/specs/context-accuracy.live.test.ts +409 -0
- package/src/specs/context-usage-event.test.ts +117 -0
- package/src/specs/context-usage.live.test.ts +297 -0
- package/src/specs/prune.test.ts +51 -1
- package/src/specs/subagent.test.ts +124 -1
- package/src/summarization/__tests__/node.test.ts +60 -1
- package/src/summarization/node.ts +20 -1
- package/src/tools/__tests__/SubagentExecutor.test.ts +443 -1
- package/src/tools/search/format.test.ts +242 -0
- package/src/tools/search/format.ts +122 -5
- package/src/tools/search/tool.ts +5 -1
- package/src/tools/search/types.ts +7 -0
- package/src/tools/subagent/SubagentExecutor.ts +221 -3
- package/src/types/graph.ts +94 -1
- package/src/types/run.ts +13 -0
- package/src/utils/__tests__/apportion.test.ts +32 -0
- package/src/utils/tokens.ts +33 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { SystemMessage } from '@langchain/core/messages';
|
|
2
2
|
import type { UsageMetadata, BaseMessage } from '@langchain/core/messages';
|
|
3
3
|
import type { RunnableConfig, Runnable } from '@langchain/core/runnables';
|
|
4
|
-
import type { createPruneMessages } from '@/messages';
|
|
5
4
|
import type * as t from '@/types';
|
|
6
5
|
import { ContentTypes, Providers } from '@/common';
|
|
6
|
+
import { createPruneMessages } from '@/messages';
|
|
7
7
|
/**
|
|
8
8
|
* Encapsulates agent-specific state that can vary between agents in a multi-agent system
|
|
9
9
|
*/
|
|
@@ -60,6 +60,11 @@ export declare class AgentContext {
|
|
|
60
60
|
dynamicInstructionTokens: number;
|
|
61
61
|
/** Token count for tool schemas only. */
|
|
62
62
|
toolSchemaTokens: number;
|
|
63
|
+
/** Per-tool schema token counts (post-multiplier), keyed by tool name.
|
|
64
|
+
* `undefined` when not calculated (e.g. cached aggregate schema tokens). */
|
|
65
|
+
toolTokenCounts?: Record<string, number>;
|
|
66
|
+
/** Names of counted tools that are deferred (`defer_loading`) and discovered. */
|
|
67
|
+
deferredToolNames: string[];
|
|
63
68
|
/** Running calibration ratio from the pruner — persisted across runs via contextMeta. */
|
|
64
69
|
calibrationRatio: number;
|
|
65
70
|
/** Provider-observed instruction overhead from the pruner's best-variance turn. */
|
|
@@ -330,9 +335,8 @@ export declare class AgentContext {
|
|
|
330
335
|
* Returns a structured breakdown of how the context token budget is consumed.
|
|
331
336
|
* Useful for diagnostics when context overflow or pruning issues occur.
|
|
332
337
|
*
|
|
333
|
-
* Note: `
|
|
334
|
-
*
|
|
335
|
-
* recomputed when `markToolsAsDiscovered` is called mid-run.
|
|
338
|
+
* Note: `markToolsAsDiscovered` re-triggers `calculateInstructionTokens`,
|
|
339
|
+
* so `toolSchemaTokens`/`toolTokenCounts` refresh before the next call.
|
|
336
340
|
*/
|
|
337
341
|
getTokenBudgetBreakdown(messages?: BaseMessage[]): t.TokenBudgetBreakdown;
|
|
338
342
|
/**
|
|
@@ -340,6 +344,35 @@ export declare class AgentContext {
|
|
|
340
344
|
* for inclusion in error messages and diagnostics.
|
|
341
345
|
*/
|
|
342
346
|
formatTokenBudgetBreakdown(messages?: BaseMessage[]): string;
|
|
347
|
+
/**
|
|
348
|
+
* Projects the context-usage snapshot for an arbitrary message set WITHOUT
|
|
349
|
+
* invoking the model — the pre-send / page-load / window-switch counterpart to
|
|
350
|
+
* the live `ON_CONTEXT_USAGE` snapshot. Runs the same pruner + budget math the
|
|
351
|
+
* graph uses (`createPruneMessages` → `getTokenBudgetBreakdown` →
|
|
352
|
+
* `syncBudgetDerivedFields`) so projected numbers match a real call. Returns
|
|
353
|
+
* null when the context lacks the tokenizer or window needed to prune. Omits
|
|
354
|
+
* the live post-format reconciliation (provider-specific, invoke-time) — a
|
|
355
|
+
* small, acceptable delta for a pre-send estimate.
|
|
356
|
+
*
|
|
357
|
+
* Safe to call off the hot path: the supplied `messages` are never mutated
|
|
358
|
+
* (each is passed as a clone — the pruner both replaces tool-result slots and
|
|
359
|
+
* unshifts reasoning blocks into AI content arrays in place), and this
|
|
360
|
+
* context's own state is untouched apart from refreshing stale instruction
|
|
361
|
+
* counts (idempotent, exactly what a real call does). Token counts are
|
|
362
|
+
* recounted for the supplied messages (the context's `indexTokenCountMap` is
|
|
363
|
+
* keyed to the live run's branch and would missum an arbitrary branch) unless
|
|
364
|
+
* the caller passes a map it guarantees matches. Calibration is NOT re-derived
|
|
365
|
+
* from this context's live usage (a fresh pruner would compare the prior
|
|
366
|
+
* call's provider input against the whole projected branch); the learned
|
|
367
|
+
* `calibrationRatio` is applied as a static seed, and callers may override it
|
|
368
|
+
* with a persisted ratio via `opts.calibrationRatio`.
|
|
369
|
+
*/
|
|
370
|
+
projectContextUsage(messages: BaseMessage[], opts?: {
|
|
371
|
+
runId?: string;
|
|
372
|
+
agentId?: string;
|
|
373
|
+
calibrationRatio?: number;
|
|
374
|
+
indexTokenCountMap?: Record<string, number | undefined>;
|
|
375
|
+
}): t.ContextUsageEvent | null;
|
|
343
376
|
/**
|
|
344
377
|
* Updates the last-call usage with data from the most recent LLM response.
|
|
345
378
|
* Unlike `currentUsage` which accumulates, this captures only the single call.
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import type { BaseMessage } from '@langchain/core/messages';
|
|
2
|
+
import type * as t from '@/types';
|
|
3
|
+
export interface ProjectAgentContextUsageParams {
|
|
4
|
+
/** Same `AgentInputs` a run is built from (instructions, tools, model, window). */
|
|
5
|
+
agent: t.AgentInputs;
|
|
6
|
+
/** Branch messages to project, in send order (no leading system message). */
|
|
7
|
+
messages: BaseMessage[];
|
|
8
|
+
tokenCounter: t.TokenCounter;
|
|
9
|
+
/** Per-message counts aligned to `messages` (e.g. from `formatAgentMessages`).
|
|
10
|
+
* When omitted, counts are recounted via `tokenCounter`. */
|
|
11
|
+
indexTokenCountMap?: Record<string, number>;
|
|
12
|
+
/** Provider-calibrated ratio from a prior snapshot, applied as a static seed. */
|
|
13
|
+
calibrationRatio?: number;
|
|
14
|
+
runId?: string;
|
|
15
|
+
agentId?: string;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Projects a pre-send context-usage snapshot for a branch under an agent config
|
|
19
|
+
* WITHOUT invoking the model — the host-side (page-load / branch-switch /
|
|
20
|
+
* window-switch) counterpart to the live `ON_CONTEXT_USAGE` event. Builds a
|
|
21
|
+
* throwaway `AgentContext` from the same `AgentInputs` a run uses, awaits its
|
|
22
|
+
* instruction/tool token accounting, then runs the shared pruner + budget math
|
|
23
|
+
* via `AgentContext.projectContextUsage` (which never mutates the supplied
|
|
24
|
+
* messages). Returns null when the config has no tokenizer or context window.
|
|
25
|
+
*/
|
|
26
|
+
export declare function projectAgentContextUsage({ agent, messages, tokenCounter, indexTokenCountMap, calibrationRatio, runId, agentId, }: ProjectAgentContextUsageParams): Promise<t.ContextUsageEvent | null>;
|
|
@@ -29,6 +29,8 @@ export declare enum GraphEvents {
|
|
|
29
29
|
ON_SUBAGENT_UPDATE = "on_subagent_update",
|
|
30
30
|
/** [Custom] Diagnostic logging event for context management observability */
|
|
31
31
|
ON_AGENT_LOG = "on_agent_log",
|
|
32
|
+
/** [Custom] Per-model-call context window usage snapshot (post-prune token budget) */
|
|
33
|
+
ON_CONTEXT_USAGE = "on_context_usage",
|
|
32
34
|
/** Custom event, emitted by system */
|
|
33
35
|
ON_CUSTOM_EVENT = "on_custom_event",
|
|
34
36
|
/** Emitted when a chat model starts processing. */
|
|
@@ -140,6 +142,17 @@ export declare enum Constants {
|
|
|
140
142
|
/** Anthropic server tool ID prefix (web_search, code_execution, etc.) */
|
|
141
143
|
ANTHROPIC_SERVER_TOOL_PREFIX = "srvtoolu_",
|
|
142
144
|
SKILL_TOOL = "skill",
|
|
145
|
+
/**
|
|
146
|
+
* Callback-metadata keys stamped by `attemptInvoke` /
|
|
147
|
+
* `tryFallbackProviders` carrying the provider (SDK `Providers` enum
|
|
148
|
+
* value) and configured model that actually served a model invocation.
|
|
149
|
+
* Unlike `ls_provider` — which derived providers inherit from their base
|
|
150
|
+
* class (e.g. DeepSeek/OpenRouter report `'openai'`) — these reflect the
|
|
151
|
+
* SDK's own routing, including fallback-provider calls. Consumed by the
|
|
152
|
+
* subagent usage-capture handler to tag billing events.
|
|
153
|
+
*/
|
|
154
|
+
INVOKED_PROVIDER = "__invoked_provider",
|
|
155
|
+
INVOKED_MODEL = "__invoked_model",
|
|
143
156
|
READ_FILE = "read_file",
|
|
144
157
|
BASH_TOOL = "bash_tool",
|
|
145
158
|
BASH_PROGRAMMATIC_TOOL_CALLING = "run_tools_with_bash",
|
|
@@ -179,7 +179,14 @@ export declare class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode>
|
|
|
179
179
|
agentContexts: Map<string, AgentContext>;
|
|
180
180
|
/** Default agent ID to use */
|
|
181
181
|
defaultAgentId: string;
|
|
182
|
-
|
|
182
|
+
/**
|
|
183
|
+
* Host sink for model usage emitted inside subagent child runs. Threaded
|
|
184
|
+
* into each `SubagentExecutor` this graph creates (and from there into
|
|
185
|
+
* child graphs, so nested subagents report too). See
|
|
186
|
+
* {@link t.StandardGraphInput.subagentUsageSink}.
|
|
187
|
+
*/
|
|
188
|
+
subagentUsageSink?: t.SubagentUsageSink;
|
|
189
|
+
constructor({ runId, signal, agents, langfuse, tokenCounter, indexTokenCountMap, calibrationRatio, subagentUsageSink, }: t.StandardGraphInput);
|
|
183
190
|
resetValues(keepContent?: boolean): void;
|
|
184
191
|
clearHeavyState(): void;
|
|
185
192
|
getRunStep(stepId: string): t.RunStep | undefined;
|
package/dist/types/index.d.ts
CHANGED
|
@@ -3,8 +3,8 @@ import type { RunnableConfig } from '@langchain/core/runnables';
|
|
|
3
3
|
import type { BaseMessage } from '@langchain/core/messages';
|
|
4
4
|
import type { ToolOutputReferenceRegistry } from '@/tools/toolOutputReferences';
|
|
5
5
|
import type * as t from '@/types';
|
|
6
|
-
import { ChatModelStreamHandler } from '@/stream';
|
|
7
6
|
import { Providers } from '@/common';
|
|
7
|
+
import { ChatModelStreamHandler } from '@/stream';
|
|
8
8
|
/**
|
|
9
9
|
* Context passed to `attemptInvoke`. Matches the subset of Graph that
|
|
10
10
|
* `ChatModelStreamHandler.handle` needs *plus* the explicit
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type * as t from '@/types';
|
|
2
|
+
/**
|
|
3
|
+
* Reconciles a context-usage breakdown's instruction/available/message fields
|
|
4
|
+
* from the pruner's budget metrics. `messageTokens` and `availableForMessages`
|
|
5
|
+
* are DERIVED from `contextBudget` / `effectiveInstructionTokens` /
|
|
6
|
+
* `remainingContextTokens` rather than summed from the index map — that map is
|
|
7
|
+
* keyed by pre-prune indices, so summing it over the kept context would missum.
|
|
8
|
+
* Shared by the live snapshot path (`Graph.createCallModel`) and the pre-send
|
|
9
|
+
* projection (`AgentContext.projectContextUsage`) so both yield identical numbers.
|
|
10
|
+
*/
|
|
11
|
+
export declare function syncBudgetDerivedFields(usage: t.ContextUsageEvent): void;
|
|
@@ -3,6 +3,13 @@ import type { AnthropicMessage } from '@/types/messages';
|
|
|
3
3
|
type MessageWithContent = {
|
|
4
4
|
content?: string | MessageContentComplex[];
|
|
5
5
|
};
|
|
6
|
+
/**
|
|
7
|
+
* Clones a message with new content. For LangChain BaseMessage instances,
|
|
8
|
+
* constructs a proper class instance so that `instanceof` checks are preserved
|
|
9
|
+
* in downstream code (e.g., ensureThinkingBlockInMessages).
|
|
10
|
+
* For plain objects (AnthropicMessage), uses object spread.
|
|
11
|
+
*/
|
|
12
|
+
export declare function cloneMessage<T extends MessageWithContent>(message: T, content: string | MessageContentComplex[]): T;
|
|
6
13
|
/**
|
|
7
14
|
* Anthropic API: Adds cache control to the appropriate user messages in the payload.
|
|
8
15
|
* Strips ALL existing cache control (both Anthropic and Bedrock formats) from all messages,
|
|
@@ -1,4 +1,9 @@
|
|
|
1
1
|
import type { BaseMessage } from '@langchain/core/messages';
|
|
2
|
+
/**
|
|
3
|
+
* Whether {@link formatContentStrings} will flatten this message's content:
|
|
4
|
+
* a human/ai/system message whose content is an array of text-only blocks.
|
|
5
|
+
*/
|
|
6
|
+
export declare const isLegacyConvertible: (message: BaseMessage) => boolean;
|
|
2
7
|
/**
|
|
3
8
|
* Formats an array of messages for LangChain, making sure all content fields are strings
|
|
4
9
|
* @param {Array<HumanMessage | AIMessage | SystemMessage | ToolMessage>} payload - The array of messages to format.
|
|
@@ -220,4 +220,8 @@ export declare function createPruneMessages(factoryParams: PruneMessagesFactoryP
|
|
|
220
220
|
originalToolContent?: Map<number, string>;
|
|
221
221
|
calibrationRatio?: number;
|
|
222
222
|
resolvedInstructionOverhead?: number;
|
|
223
|
+
/** Usable budget this call: maxTokens minus output reserve */
|
|
224
|
+
contextBudget?: number;
|
|
225
|
+
/** Calibrated instruction overhead actually applied this call */
|
|
226
|
+
effectiveInstructionTokens?: number;
|
|
223
227
|
};
|
package/dist/types/run.d.ts
CHANGED
|
@@ -15,6 +15,7 @@ export declare class Run<_T extends t.BaseGraphState> {
|
|
|
15
15
|
private toolOutputReferences?;
|
|
16
16
|
private eagerEventToolExecution?;
|
|
17
17
|
private toolExecution?;
|
|
18
|
+
private subagentUsageSink?;
|
|
18
19
|
private indexTokenCountMap?;
|
|
19
20
|
calibrationRatio: number;
|
|
20
21
|
graphRunnable?: t.CompiledStateWorkflow;
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
import type * as t from './types';
|
|
2
|
-
|
|
2
|
+
/** Resolves the per-search highlight budget from config, the
|
|
3
|
+
* `SEARCH_MAX_LLM_OUTPUT_CHARS` env var, or the default (50,000 chars). */
|
|
4
|
+
export declare function resolveMaxLLMOutputChars(maxOutputChars?: number): number;
|
|
5
|
+
export declare function formatResultsForLLM(turn: number, results: t.SearchResultData, maxOutputChars?: number): {
|
|
3
6
|
output: string;
|
|
4
7
|
references: t.ResultReference[];
|
|
5
8
|
};
|
|
@@ -189,6 +189,13 @@ export type SafeSearchLevel = 0 | 1 | 2;
|
|
|
189
189
|
export type Logger = WinstonLogger;
|
|
190
190
|
export interface SearchToolConfig extends SearchConfig, ProcessSourcesConfig, FirecrawlConfig {
|
|
191
191
|
tavilyScraperOptions?: TavilyScraperConfig;
|
|
192
|
+
/** Max chars of highlight content this tool feeds the MODEL per search (the
|
|
193
|
+
* dominant, otherwise-unbounded part of the output). Distinct from
|
|
194
|
+
* `maxContentLength`, which caps scraped/reranked content per source — full
|
|
195
|
+
* content always remains in the `WEB_SEARCH` artifact. Defaults to 50,000;
|
|
196
|
+
* also configurable via the `SEARCH_MAX_LLM_OUTPUT_CHARS` env var. Hosts that
|
|
197
|
+
* know the context window (e.g. LibreChat) pass a window-relative value. */
|
|
198
|
+
maxOutputChars?: number;
|
|
192
199
|
logger?: Logger;
|
|
193
200
|
safeSearch?: SafeSearchLevel;
|
|
194
201
|
jinaApiKey?: string;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { BaseMessage } from '@langchain/core/messages';
|
|
2
|
-
import type { AgentInputs, StandardGraphInput, ResolvedSubagentConfig, SubagentConfig, TokenCounter } from '@/types';
|
|
2
|
+
import type { AgentInputs, StandardGraphInput, ResolvedSubagentConfig, SubagentConfig, SubagentUsageSink, TokenCounter } from '@/types';
|
|
3
3
|
import type { HookRegistry } from '@/hooks';
|
|
4
4
|
import type { AgentContext } from '@/agents/AgentContext';
|
|
5
5
|
import type { StandardGraph } from '@/graphs/Graph';
|
|
@@ -88,6 +88,15 @@ export type SubagentExecutorOptions = {
|
|
|
88
88
|
* post-`createWorkflow`, so `createAgentNode` must capture lazily).
|
|
89
89
|
*/
|
|
90
90
|
parentHandlerRegistry?: HandlerRegistry | (() => HandlerRegistry | undefined);
|
|
91
|
+
/**
|
|
92
|
+
* Receives a usage event for every model call the child run makes. The
|
|
93
|
+
* child workflow executes via `invoke()` with a detached callbacks array,
|
|
94
|
+
* so its `on_chat_model_end` events never reach the parent's handler
|
|
95
|
+
* registry — without this sink, child token usage is invisible to the
|
|
96
|
+
* host (unbilled model calls). Forwarded into the child graph's input so
|
|
97
|
+
* nested subagents report through the same sink.
|
|
98
|
+
*/
|
|
99
|
+
usageSink?: SubagentUsageSink;
|
|
91
100
|
};
|
|
92
101
|
export declare class SubagentExecutor {
|
|
93
102
|
private readonly configs;
|
|
@@ -99,6 +108,7 @@ export declare class SubagentExecutor {
|
|
|
99
108
|
private readonly tokenCounter?;
|
|
100
109
|
private readonly maxDepth;
|
|
101
110
|
private readonly createChildGraph;
|
|
111
|
+
private readonly usageSink?;
|
|
102
112
|
private readonly resolveParentHandlerRegistry?;
|
|
103
113
|
constructor(options: SubagentExecutorOptions);
|
|
104
114
|
/** Snapshot of the parent's registry at the moment a subagent is dispatched. */
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { BaseMessage, AIMessageChunk, SystemMessage } from '@langchain/core/messages';
|
|
1
|
+
import type { BaseMessage, AIMessageChunk, SystemMessage, UsageMetadata } from '@langchain/core/messages';
|
|
2
2
|
import type { BindToolsInput } from '@langchain/core/language_models/chat_models';
|
|
3
3
|
import type { START, StateGraph, StateGraphArgs } from '@langchain/langgraph';
|
|
4
4
|
import type { RunnableConfig, Runnable } from '@langchain/core/runnables';
|
|
@@ -7,10 +7,10 @@ import type { GoogleAIToolType } from '@langchain/google-common';
|
|
|
7
7
|
import type { SummarizationNodeInput, SummarizeCompleteEvent, SummarizationConfig, SummarizeStartEvent, SummarizeDeltaEvent } from '@/types/summarize';
|
|
8
8
|
import type { ToolMap, ToolEndEvent, GenericTool, LCTool, ToolExecuteBatchRequest } from '@/types/tools';
|
|
9
9
|
import type { RunStep, RunStepDeltaEvent, MessageDeltaEvent, ReasoningDeltaEvent } from '@/types/stream';
|
|
10
|
+
import type { TokenCounter, TokenBudgetBreakdown } from '@/types/run';
|
|
10
11
|
import type { Providers, Callback, GraphNodeKeys } from '@/common';
|
|
11
12
|
import type { StandardGraph, MultiAgentGraph } from '@/graphs';
|
|
12
13
|
import type { ClientOptions } from '@/types/llm';
|
|
13
|
-
import type { TokenCounter } from '@/types/run';
|
|
14
14
|
/** Interface for bound model with stream and invoke methods */
|
|
15
15
|
export interface ChatModel {
|
|
16
16
|
stream?: (messages: BaseMessage[], config?: RunnableConfig) => Promise<AsyncIterable<AIMessageChunk>>;
|
|
@@ -44,8 +44,31 @@ export interface AgentLogEvent {
|
|
|
44
44
|
runId?: string;
|
|
45
45
|
agentId?: string;
|
|
46
46
|
}
|
|
47
|
+
/**
|
|
48
|
+
* Per-model-call context window usage snapshot, dispatched after pruning and
|
|
49
|
+
* before the model invocation. Dispatched once per `callModel` invocation:
|
|
50
|
+
* fallback retries reuse the snapshot since the prompt is identical — budget
|
|
51
|
+
* numbers reflect the primary provider's tokenizer, and the calibration
|
|
52
|
+
* ratio self-corrects from whichever provider reports usage.
|
|
53
|
+
*/
|
|
54
|
+
export interface ContextUsageEvent {
|
|
55
|
+
runId?: string;
|
|
56
|
+
agentId?: string;
|
|
57
|
+
/** Structural token budget snapshot from AgentContext.getTokenBudgetBreakdown */
|
|
58
|
+
breakdown: TokenBudgetBreakdown;
|
|
59
|
+
/** Usable budget this call: maxContextTokens minus output reserve */
|
|
60
|
+
contextBudget?: number;
|
|
61
|
+
/** Calibrated instruction overhead actually applied this call */
|
|
62
|
+
effectiveInstructionTokens?: number;
|
|
63
|
+
/** Calibrated message tokens before pruning (excluding instructions) */
|
|
64
|
+
prePruneContextTokens?: number;
|
|
65
|
+
/** Tokens still free after instructions + pruned messages */
|
|
66
|
+
remainingContextTokens?: number;
|
|
67
|
+
/** EMA ratio of provider-reported vs locally estimated token counts */
|
|
68
|
+
calibrationRatio?: number;
|
|
69
|
+
}
|
|
47
70
|
export interface EventHandler {
|
|
48
|
-
handle(event: string, data: StreamEventData | ModelEndData | RunStep | RunStepDeltaEvent | MessageDeltaEvent | ReasoningDeltaEvent | SummarizeStartEvent | SummarizeDeltaEvent | SummarizeCompleteEvent | SubagentUpdateEvent | AgentLogEvent | ToolExecuteBatchRequest | {
|
|
71
|
+
handle(event: string, data: StreamEventData | ModelEndData | RunStep | RunStepDeltaEvent | MessageDeltaEvent | ReasoningDeltaEvent | SummarizeStartEvent | SummarizeDeltaEvent | SummarizeCompleteEvent | SubagentUpdateEvent | AgentLogEvent | ContextUsageEvent | ToolExecuteBatchRequest | {
|
|
49
72
|
result: ToolEndEvent;
|
|
50
73
|
}, metadata?: Record<string, unknown>, graph?: StandardGraph | MultiAgentGraph): void | Promise<void>;
|
|
51
74
|
}
|
|
@@ -199,6 +222,17 @@ export type StandardGraphInput = {
|
|
|
199
222
|
tokenCounter?: TokenCounter;
|
|
200
223
|
indexTokenCountMap?: Record<string, number>;
|
|
201
224
|
calibrationRatio?: number;
|
|
225
|
+
/**
|
|
226
|
+
* Receives a {@link SubagentUsageEvent} for every model call made inside
|
|
227
|
+
* a subagent child run spawned from this graph (including nested
|
|
228
|
+
* subagents and child-side summarization calls). Child graphs run via
|
|
229
|
+
* `invoke()` outside the host's `streamEvents` loop, so their
|
|
230
|
+
* `on_chat_model_end` events never reach the run's handler registry —
|
|
231
|
+
* this sink is the only way hosts can observe child token usage for
|
|
232
|
+
* billing/accounting. Parent-graph model calls are NOT reported here;
|
|
233
|
+
* they already flow through the registry's `CHAT_MODEL_END` handler.
|
|
234
|
+
*/
|
|
235
|
+
subagentUsageSink?: SubagentUsageSink;
|
|
202
236
|
};
|
|
203
237
|
export type GraphEdge = {
|
|
204
238
|
/** Agent ID, use a list for multiple sources */
|
|
@@ -289,6 +323,58 @@ export interface SubagentUpdateEvent {
|
|
|
289
323
|
/** ISO timestamp for ordering / display. */
|
|
290
324
|
timestamp: string;
|
|
291
325
|
}
|
|
326
|
+
/**
|
|
327
|
+
* Token usage for a single model call made inside a subagent child run.
|
|
328
|
+
* Emitted through {@link SubagentUsageSink} as each call completes, so
|
|
329
|
+
* hosts can bill child-run model usage that never reaches the parent
|
|
330
|
+
* run's `CHAT_MODEL_END` handler (child graphs execute via `invoke()`
|
|
331
|
+
* outside the host's `streamEvents` loop).
|
|
332
|
+
*/
|
|
333
|
+
export interface SubagentUsageEvent {
|
|
334
|
+
/** Usage metadata reported by the child's model call. */
|
|
335
|
+
usage: UsageMetadata;
|
|
336
|
+
/**
|
|
337
|
+
* Model that produced this usage. Per-call `ls_model_name` from the
|
|
338
|
+
* model's callback metadata when available (covers child-side
|
|
339
|
+
* summarization or any call that differs from the configured model),
|
|
340
|
+
* then the fallback-invocation's configured model (`INVOKED_MODEL`
|
|
341
|
+
* metadata), then the subagent config's `clientOptions` model.
|
|
342
|
+
*/
|
|
343
|
+
model?: string;
|
|
344
|
+
/**
|
|
345
|
+
* Provider that actually served this call — the SDK `Providers` enum
|
|
346
|
+
* value stamped per-invocation by `attemptInvoke` (`INVOKED_PROVIDER`
|
|
347
|
+
* metadata), so fallback-served calls are attributed to the fallback
|
|
348
|
+
* provider, not the configured primary. Falls back to the subagent
|
|
349
|
+
* config's provider. Never LangSmith's `ls_provider` string — derived
|
|
350
|
+
* providers inherit that from their base class, and hosts key
|
|
351
|
+
* pricing/cache semantics off the enum.
|
|
352
|
+
*/
|
|
353
|
+
provider?: string;
|
|
354
|
+
/** Subagent `type` identifier from the SubagentConfig. */
|
|
355
|
+
subagentType: string;
|
|
356
|
+
/** Child run ID (unique per subagent execution). */
|
|
357
|
+
subagentRunId: string;
|
|
358
|
+
/** Child agent ID assigned to this subagent execution. */
|
|
359
|
+
subagentAgentId: string;
|
|
360
|
+
/**
|
|
361
|
+
* ROOT run ID of the host run that owns billing. For nested subagents
|
|
362
|
+
* each forwarding layer rewrites this upward, so events from any depth
|
|
363
|
+
* surface with the outermost run's ID — never an intermediate
|
|
364
|
+
* `*_sub_*` child id (use {@link subagentRunId} to identify the
|
|
365
|
+
* emitting child).
|
|
366
|
+
*/
|
|
367
|
+
runId: string;
|
|
368
|
+
}
|
|
369
|
+
/**
|
|
370
|
+
* Host-provided callback receiving {@link SubagentUsageEvent}s. Invoked as
|
|
371
|
+
* each child model call completes. May return a promise — the executor
|
|
372
|
+
* awaits each dispatch (so all usage is recorded before the child's result
|
|
373
|
+
* resolves to the parent) and swallows both synchronous throws and
|
|
374
|
+
* rejections; implementations should still be cheap, as they sit on the
|
|
375
|
+
* child's model-call path.
|
|
376
|
+
*/
|
|
377
|
+
export type SubagentUsageSink = (event: SubagentUsageEvent) => void | Promise<void>;
|
|
292
378
|
export type LangfuseToolOutputTracingConfig = {
|
|
293
379
|
/**
|
|
294
380
|
* Whether tool outputs should be exported to Langfuse. Defaults to
|
|
@@ -111,6 +111,15 @@ export type RunConfig = {
|
|
|
111
111
|
*/
|
|
112
112
|
langfuse?: g.LangfuseConfig;
|
|
113
113
|
customHandlers?: Record<string, g.EventHandler>;
|
|
114
|
+
/**
|
|
115
|
+
* Receives token usage for every model call made inside subagent child
|
|
116
|
+
* runs (including nested subagents). Child graphs execute via `invoke()`
|
|
117
|
+
* outside this run's `streamEvents` loop, so their model-end events never
|
|
118
|
+
* reach `customHandlers` — without this sink, child usage is invisible to
|
|
119
|
+
* the host. Parent-graph calls are not reported here; they flow through
|
|
120
|
+
* the registered `CHAT_MODEL_END` handler as usual.
|
|
121
|
+
*/
|
|
122
|
+
subagentUsageSink?: g.SubagentUsageSink;
|
|
114
123
|
/**
|
|
115
124
|
* Pre-constructed hook registry for this run. Hooks fire at lifecycle
|
|
116
125
|
* points in `processStream` (RunStart, UserPromptSubmit, Stop,
|
|
@@ -225,6 +234,10 @@ export type TokenBudgetBreakdown = {
|
|
|
225
234
|
messageTokens: number;
|
|
226
235
|
/** Tokens available for messages after instructions. */
|
|
227
236
|
availableForMessages: number;
|
|
237
|
+
/** Per-tool schema token counts (post-multiplier), keyed by tool name. */
|
|
238
|
+
toolTokenCounts?: Record<string, number>;
|
|
239
|
+
/** Names of counted tools that are deferred (`defer_loading`) and discovered. */
|
|
240
|
+
deferredToolNames?: string[];
|
|
228
241
|
};
|
|
229
242
|
export type EventStreamOptions = {
|
|
230
243
|
callbacks?: g.ClientCallbacks;
|
|
@@ -15,6 +15,13 @@ export declare function estimateAnthropicImageTokens(width: number, height: numb
|
|
|
15
15
|
export declare function estimateOpenAIImageTokens(width: number, height: number, detail?: string): number;
|
|
16
16
|
export declare function encodingForModel(model: string): EncodingName;
|
|
17
17
|
export declare function getTokenCountForMessage(message: BaseMessage, getTokenCount: (text: string) => number, encoding?: EncodingName): number;
|
|
18
|
+
/**
|
|
19
|
+
* Largest-remainder apportionment: scales each count by `multiplier` and
|
|
20
|
+
* distributes the rounding remainder so the results sum exactly to
|
|
21
|
+
* `targetTotal`. Keeps per-item breakdowns reconciled with an aggregate
|
|
22
|
+
* computed as a single rounded product of the summed raw counts.
|
|
23
|
+
*/
|
|
24
|
+
export declare function apportionTokenCounts(rawCounts: Record<string, number>, multiplier: number, targetTotal: number): Record<string, number>;
|
|
18
25
|
/**
|
|
19
26
|
* Creates a token counter function using the specified encoding.
|
|
20
27
|
* Lazily loads the encoding data on first use via dynamic import.
|