@librechat/agents 3.2.34 → 3.2.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +47 -10
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/common/enum.cjs +13 -0
- package/dist/cjs/common/enum.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +121 -3
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/llm/invoke.cjs +49 -8
- package/dist/cjs/llm/invoke.cjs.map +1 -1
- package/dist/cjs/main.cjs +2 -0
- package/dist/cjs/messages/content.cjs +12 -14
- package/dist/cjs/messages/content.cjs.map +1 -1
- package/dist/cjs/messages/prune.cjs +31 -13
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/cjs/run.cjs +7 -2
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/summarization/node.cjs +12 -1
- package/dist/cjs/summarization/node.cjs.map +1 -1
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs +138 -2
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
- package/dist/cjs/utils/tokens.cjs +30 -0
- package/dist/cjs/utils/tokens.cjs.map +1 -1
- package/dist/esm/agents/AgentContext.mjs +47 -10
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/common/enum.mjs +13 -0
- package/dist/esm/common/enum.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +122 -4
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/llm/invoke.mjs +49 -8
- package/dist/esm/llm/invoke.mjs.map +1 -1
- package/dist/esm/main.mjs +3 -3
- package/dist/esm/messages/content.mjs +12 -15
- package/dist/esm/messages/content.mjs.map +1 -1
- package/dist/esm/messages/prune.mjs +31 -13
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/esm/run.mjs +7 -2
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/summarization/node.mjs +12 -1
- package/dist/esm/summarization/node.mjs.map +1 -1
- package/dist/esm/tools/subagent/SubagentExecutor.mjs +138 -2
- package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
- package/dist/esm/utils/tokens.mjs +30 -1
- package/dist/esm/utils/tokens.mjs.map +1 -1
- package/dist/types/agents/AgentContext.d.ts +7 -3
- package/dist/types/common/enum.d.ts +13 -0
- package/dist/types/graphs/Graph.d.ts +8 -1
- package/dist/types/llm/invoke.d.ts +1 -1
- package/dist/types/messages/content.d.ts +5 -0
- package/dist/types/messages/prune.d.ts +4 -0
- package/dist/types/run.d.ts +1 -0
- package/dist/types/tools/subagent/SubagentExecutor.d.ts +11 -1
- package/dist/types/types/graph.d.ts +89 -3
- package/dist/types/types/run.d.ts +13 -0
- package/dist/types/utils/tokens.d.ts +7 -0
- package/package.json +1 -1
- package/src/agents/AgentContext.ts +69 -6
- package/src/agents/__tests__/AgentContext.test.ts +6 -2
- package/src/common/enum.ts +13 -0
- package/src/graphs/Graph.ts +196 -0
- package/src/llm/invoke.test.ts +79 -1
- package/src/llm/invoke.ts +58 -4
- package/src/messages/content.ts +24 -32
- package/src/messages/prune.ts +39 -2
- package/src/run.ts +5 -0
- package/src/scripts/subagent-usage-sink.ts +176 -0
- package/src/specs/context-accuracy.live.test.ts +409 -0
- package/src/specs/context-usage-event.test.ts +117 -0
- package/src/specs/context-usage.live.test.ts +297 -0
- package/src/specs/prune.test.ts +51 -1
- package/src/specs/subagent.test.ts +124 -1
- package/src/summarization/__tests__/node.test.ts +60 -1
- package/src/summarization/node.ts +20 -1
- package/src/tools/__tests__/SubagentExecutor.test.ts +443 -1
- package/src/tools/subagent/SubagentExecutor.ts +221 -3
- package/src/types/graph.ts +94 -1
- package/src/types/run.ts +13 -0
- package/src/utils/__tests__/apportion.test.ts +32 -0
- package/src/utils/tokens.ts +33 -0
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import { nanoid } from 'nanoid';
|
|
2
2
|
import { HumanMessage } from '@langchain/core/messages';
|
|
3
3
|
import { BaseCallbackHandler } from '@langchain/core/callbacks/base';
|
|
4
|
+
import type { BaseMessage, UsageMetadata } from '@langchain/core/messages';
|
|
5
|
+
import type { ChatGeneration, LLMResult } from '@langchain/core/outputs';
|
|
4
6
|
import type { Callbacks } from '@langchain/core/callbacks/manager';
|
|
5
|
-
import type { BaseMessage } from '@langchain/core/messages';
|
|
6
7
|
import type {
|
|
7
8
|
AgentInputs,
|
|
8
9
|
MessageDeltaEvent,
|
|
@@ -16,6 +17,7 @@ import type {
|
|
|
16
17
|
SubagentConfig,
|
|
17
18
|
SubagentUpdateEvent,
|
|
18
19
|
SubagentUpdatePhase,
|
|
20
|
+
SubagentUsageSink,
|
|
19
21
|
ToolExecuteBatchRequest,
|
|
20
22
|
ToolCallDelta,
|
|
21
23
|
TokenCounter,
|
|
@@ -24,7 +26,7 @@ import type { AggregatedHookResult, HookRegistry } from '@/hooks';
|
|
|
24
26
|
import type { AgentContext } from '@/agents/AgentContext';
|
|
25
27
|
import type { StandardGraph } from '@/graphs/Graph';
|
|
26
28
|
import type { HandlerRegistry } from '@/events';
|
|
27
|
-
import { GraphEvents, Callback, StepTypes } from '@/common';
|
|
29
|
+
import { Constants, GraphEvents, Callback, StepTypes } from '@/common';
|
|
28
30
|
import { executeHooks } from '@/hooks';
|
|
29
31
|
|
|
30
32
|
const DEFAULT_MAX_TURNS = 25;
|
|
@@ -236,6 +238,15 @@ export type SubagentExecutorOptions = {
|
|
|
236
238
|
* post-`createWorkflow`, so `createAgentNode` must capture lazily).
|
|
237
239
|
*/
|
|
238
240
|
parentHandlerRegistry?: HandlerRegistry | (() => HandlerRegistry | undefined);
|
|
241
|
+
/**
|
|
242
|
+
* Receives a usage event for every model call the child run makes. The
|
|
243
|
+
* child workflow executes via `invoke()` with a detached callbacks array,
|
|
244
|
+
* so its `on_chat_model_end` events never reach the parent's handler
|
|
245
|
+
* registry — without this sink, child token usage is invisible to the
|
|
246
|
+
* host (unbilled model calls). Forwarded into the child graph's input so
|
|
247
|
+
* nested subagents report through the same sink.
|
|
248
|
+
*/
|
|
249
|
+
usageSink?: SubagentUsageSink;
|
|
239
250
|
};
|
|
240
251
|
|
|
241
252
|
export class SubagentExecutor {
|
|
@@ -248,6 +259,7 @@ export class SubagentExecutor {
|
|
|
248
259
|
private readonly tokenCounter?: TokenCounter;
|
|
249
260
|
private readonly maxDepth: number;
|
|
250
261
|
private readonly createChildGraph: ChildGraphFactory;
|
|
262
|
+
private readonly usageSink?: SubagentUsageSink;
|
|
251
263
|
private readonly resolveParentHandlerRegistry?: () =>
|
|
252
264
|
| HandlerRegistry
|
|
253
265
|
| undefined;
|
|
@@ -262,6 +274,7 @@ export class SubagentExecutor {
|
|
|
262
274
|
this.tokenCounter = options.tokenCounter;
|
|
263
275
|
this.maxDepth = options.maxDepth ?? 1;
|
|
264
276
|
this.createChildGraph = options.createChildGraph;
|
|
277
|
+
this.usageSink = options.usageSink;
|
|
265
278
|
const rawRegistry = options.parentHandlerRegistry;
|
|
266
279
|
if (typeof rawRegistry === 'function') {
|
|
267
280
|
this.resolveParentHandlerRegistry = rawRegistry;
|
|
@@ -351,12 +364,35 @@ export class SubagentExecutor {
|
|
|
351
364
|
const childRunId = `${this.parentRunId}_sub_${nanoid(8)}`;
|
|
352
365
|
const maxTurns = config.maxTurns ?? DEFAULT_MAX_TURNS;
|
|
353
366
|
|
|
367
|
+
const hostUsageSink = this.usageSink;
|
|
354
368
|
const childGraph = this.createChildGraph({
|
|
355
369
|
runId: childRunId,
|
|
356
370
|
signal: this.parentSignal,
|
|
357
371
|
agents: [childInputs],
|
|
358
372
|
langfuse: this.langfuse,
|
|
359
373
|
tokenCounter: this.tokenCounter,
|
|
374
|
+
/**
|
|
375
|
+
* Forwarded so the child graph's own `SubagentExecutor` (created in
|
|
376
|
+
* its `createAgentNode` when `allowNested` keeps subagentConfigs)
|
|
377
|
+
* reports nested-child usage through the same host sink. Each nesting
|
|
378
|
+
* level attaches its own capture callback — `workflow.invoke` replaces
|
|
379
|
+
* the inherited callback chain, so a single top-level handler would
|
|
380
|
+
* never see grandchild model calls.
|
|
381
|
+
*
|
|
382
|
+
* The wrapper rewrites `runId` to THIS executor's parent run: nested
|
|
383
|
+
* executors emit with their own `parentRunId` (a `*_sub_*` child id),
|
|
384
|
+
* and each wrapper layer rewrites upward, so by the time an event
|
|
385
|
+
* reaches the host sink its `runId` is the ROOT run — hosts keying
|
|
386
|
+
* billing by run id never see intermediate child run ids there
|
|
387
|
+
* (`subagentRunId` still identifies the emitting child).
|
|
388
|
+
*/
|
|
389
|
+
subagentUsageSink:
|
|
390
|
+
hostUsageSink == null
|
|
391
|
+
? undefined
|
|
392
|
+
: /** Returns the host sink's result so async sinks stay awaited
|
|
393
|
+
* through every wrapper layer. */
|
|
394
|
+
(event): void | Promise<void> =>
|
|
395
|
+
hostUsageSink({ ...event, runId: this.parentRunId }),
|
|
360
396
|
});
|
|
361
397
|
|
|
362
398
|
let forwarding: ForwarderCallback | undefined;
|
|
@@ -402,7 +438,31 @@ export class SubagentExecutor {
|
|
|
402
438
|
* `runName` gives the child a distinct LangSmith trace root (avoids
|
|
403
439
|
* nested trace pollution).
|
|
404
440
|
*/
|
|
405
|
-
const
|
|
441
|
+
const callbackHandlers: BaseCallbackHandler[] = [];
|
|
442
|
+
if (forwarder) {
|
|
443
|
+
callbackHandlers.push(forwarder);
|
|
444
|
+
}
|
|
445
|
+
/**
|
|
446
|
+
* Usage capture rides the same detached callbacks array. Because
|
|
447
|
+
* `callbacks` REPLACES the inherited chain (see above), the host's
|
|
448
|
+
* `CHAT_MODEL_END` handler never observes the child's model calls —
|
|
449
|
+
* this handler is the child-side equivalent of `ModelEndHandler`,
|
|
450
|
+
* reporting per-call usage to the host's sink for billing.
|
|
451
|
+
*/
|
|
452
|
+
if (this.usageSink) {
|
|
453
|
+
callbackHandlers.push(
|
|
454
|
+
createUsageCaptureHandler({
|
|
455
|
+
sink: this.usageSink,
|
|
456
|
+
subagentType,
|
|
457
|
+
subagentRunId: childRunId,
|
|
458
|
+
subagentAgentId: childAgentId,
|
|
459
|
+
parentRunId: this.parentRunId,
|
|
460
|
+
provider: config.agentInputs.provider,
|
|
461
|
+
fallbackModel: extractConfiguredModel(config.agentInputs),
|
|
462
|
+
})
|
|
463
|
+
);
|
|
464
|
+
}
|
|
465
|
+
const callbacks: Callbacks = callbackHandlers;
|
|
406
466
|
/**
|
|
407
467
|
* Inherit the parent's host `configurable` — host-set fields
|
|
408
468
|
* (`requestBody`, `user`, `userMCPAuthMap`, etc.) AND the run-
|
|
@@ -719,6 +779,164 @@ export class SubagentExecutor {
|
|
|
719
779
|
}
|
|
720
780
|
}
|
|
721
781
|
|
|
782
|
+
/**
|
|
783
|
+
* Builds the child-run equivalent of a host `CHAT_MODEL_END` handler: a
|
|
784
|
+
* callback that joins per-call model identity (captured from
|
|
785
|
+
* `ls_model_name` at chat-model start) with the usage metadata reported at
|
|
786
|
+
* LLM end, and emits a {@link SubagentUsageEvent} through the host's sink.
|
|
787
|
+
*
|
|
788
|
+
* Attached to the child `workflow.invoke` callbacks array, so it observes
|
|
789
|
+
* every model call inside the child graph — the agent loop and any
|
|
790
|
+
* auxiliary calls (e.g. child-side summarization). It does NOT observe
|
|
791
|
+
* deeper subagent levels: each nesting level replaces the callback chain
|
|
792
|
+
* and attaches its own capture handler via the forwarded
|
|
793
|
+
* `subagentUsageSink` on the child graph's input.
|
|
794
|
+
*/
|
|
795
|
+
function createUsageCaptureHandler(args: {
|
|
796
|
+
sink: SubagentUsageSink;
|
|
797
|
+
subagentType: string;
|
|
798
|
+
subagentRunId: string;
|
|
799
|
+
subagentAgentId: string;
|
|
800
|
+
parentRunId: string;
|
|
801
|
+
/**
|
|
802
|
+
* Child config's provider enum — the default tag when a call carries no
|
|
803
|
+
* `INVOKED_PROVIDER` metadata (hosts key pricing/cache semantics off it).
|
|
804
|
+
*/
|
|
805
|
+
provider?: string;
|
|
806
|
+
/**
|
|
807
|
+
* Child config's model, used when a call carries neither `ls_model_name`
|
|
808
|
+
* nor `INVOKED_MODEL` metadata.
|
|
809
|
+
*/
|
|
810
|
+
fallbackModel?: string;
|
|
811
|
+
}): BaseCallbackHandler {
|
|
812
|
+
const {
|
|
813
|
+
sink,
|
|
814
|
+
subagentType,
|
|
815
|
+
subagentRunId,
|
|
816
|
+
subagentAgentId,
|
|
817
|
+
parentRunId,
|
|
818
|
+
provider,
|
|
819
|
+
fallbackModel,
|
|
820
|
+
} = args;
|
|
821
|
+
/**
|
|
822
|
+
* Per-call attribution keyed by LangChain callback runId. `model` joins
|
|
823
|
+
* `ls_model_name` (provider-reported) with `INVOKED_MODEL` (stamped by
|
|
824
|
+
* `tryFallbackProviders` from the fallback's client options); `provider`
|
|
825
|
+
* is `INVOKED_PROVIDER`, stamped by `attemptInvoke` with the SDK enum of
|
|
826
|
+
* the provider that ACTUALLY served the call — correct for
|
|
827
|
+
* fallback-served calls, where the static config provider would mis-tag
|
|
828
|
+
* pricing/cache semantics.
|
|
829
|
+
*/
|
|
830
|
+
const callInfoByCallId = new Map<
|
|
831
|
+
string,
|
|
832
|
+
{ model?: string; provider?: string }
|
|
833
|
+
>();
|
|
834
|
+
const handler = BaseCallbackHandler.fromMethods({
|
|
835
|
+
handleChatModelStart: (
|
|
836
|
+
_llm: unknown,
|
|
837
|
+
_messages: unknown,
|
|
838
|
+
runId: string,
|
|
839
|
+
_parentRunId?: string,
|
|
840
|
+
_extraParams?: Record<string, unknown>,
|
|
841
|
+
_tags?: string[],
|
|
842
|
+
metadata?: Record<string, unknown>
|
|
843
|
+
): void => {
|
|
844
|
+
const callModel =
|
|
845
|
+
asNonEmptyString(metadata?.ls_model_name) ??
|
|
846
|
+
asNonEmptyString(metadata?.[Constants.INVOKED_MODEL]);
|
|
847
|
+
const callProvider = asNonEmptyString(
|
|
848
|
+
metadata?.[Constants.INVOKED_PROVIDER]
|
|
849
|
+
);
|
|
850
|
+
if (callModel != null || callProvider != null) {
|
|
851
|
+
callInfoByCallId.set(runId, {
|
|
852
|
+
model: callModel,
|
|
853
|
+
provider: callProvider,
|
|
854
|
+
});
|
|
855
|
+
}
|
|
856
|
+
},
|
|
857
|
+
handleLLMEnd: async (output: LLMResult, runId: string): Promise<void> => {
|
|
858
|
+
const callInfo = callInfoByCallId.get(runId);
|
|
859
|
+
callInfoByCallId.delete(runId);
|
|
860
|
+
const model = callInfo?.model ?? fallbackModel;
|
|
861
|
+
const callProvider = callInfo?.provider ?? provider;
|
|
862
|
+
for (const generationGroup of output.generations) {
|
|
863
|
+
/**
|
|
864
|
+
* At most ONE event per generation group: each group is one
|
|
865
|
+
* provider request (the outer array is per-prompt for batched
|
|
866
|
+
* calls), and with multiple completions (`n > 1`) every choice in
|
|
867
|
+
* a group repeats the request-level `usage_metadata` — emitting
|
|
868
|
+
* per choice would multiply billed tokens.
|
|
869
|
+
*/
|
|
870
|
+
for (const generation of generationGroup) {
|
|
871
|
+
const message = (generation as ChatGeneration | undefined)?.message;
|
|
872
|
+
const usage = (
|
|
873
|
+
message as { usage_metadata?: UsageMetadata } | undefined
|
|
874
|
+
)?.usage_metadata;
|
|
875
|
+
if (usage == null) {
|
|
876
|
+
continue;
|
|
877
|
+
}
|
|
878
|
+
/**
|
|
879
|
+
* Awaited so async host sinks (billing/persistence) complete
|
|
880
|
+
* before the model call resolves — `awaitHandlers` only waits on
|
|
881
|
+
* `handleLLMEnd` itself, so a dropped promise here would let the
|
|
882
|
+
* parent run finish before usage is recorded and would turn sink
|
|
883
|
+
* rejections into unhandled rejections.
|
|
884
|
+
*/
|
|
885
|
+
try {
|
|
886
|
+
await sink({
|
|
887
|
+
usage,
|
|
888
|
+
model,
|
|
889
|
+
provider: callProvider,
|
|
890
|
+
subagentType,
|
|
891
|
+
subagentRunId,
|
|
892
|
+
subagentAgentId,
|
|
893
|
+
runId: parentRunId,
|
|
894
|
+
});
|
|
895
|
+
} catch {
|
|
896
|
+
/* observational — a throwing/rejecting host sink must not break the child run */
|
|
897
|
+
}
|
|
898
|
+
break;
|
|
899
|
+
}
|
|
900
|
+
}
|
|
901
|
+
},
|
|
902
|
+
handleLLMError: (_err: unknown, runId: string): void => {
|
|
903
|
+
callInfoByCallId.delete(runId);
|
|
904
|
+
},
|
|
905
|
+
});
|
|
906
|
+
/**
|
|
907
|
+
* Dispatch usage synchronously with each model call so all entries are
|
|
908
|
+
* sunk before `workflow.invoke` resolves — hosts read their accumulator
|
|
909
|
+
* right after the parent run completes.
|
|
910
|
+
*/
|
|
911
|
+
handler.awaitHandlers = true;
|
|
912
|
+
return handler;
|
|
913
|
+
}
|
|
914
|
+
|
|
915
|
+
function asNonEmptyString(value: unknown): string | undefined {
|
|
916
|
+
return typeof value === 'string' && value !== '' ? value : undefined;
|
|
917
|
+
}
|
|
918
|
+
|
|
919
|
+
/**
|
|
920
|
+
* Best-effort read of the configured model from a subagent's client
|
|
921
|
+
* options. Providers disagree on the key (`model` vs `modelName`), and the
|
|
922
|
+
* value is only a fallback for calls that carry no `ls_model_name`.
|
|
923
|
+
*/
|
|
924
|
+
function extractConfiguredModel(agentInputs: AgentInputs): string | undefined {
|
|
925
|
+
const clientOptions = agentInputs.clientOptions as
|
|
926
|
+
| { model?: unknown; modelName?: unknown }
|
|
927
|
+
| undefined;
|
|
928
|
+
if (typeof clientOptions?.model === 'string' && clientOptions.model !== '') {
|
|
929
|
+
return clientOptions.model;
|
|
930
|
+
}
|
|
931
|
+
if (
|
|
932
|
+
typeof clientOptions?.modelName === 'string' &&
|
|
933
|
+
clientOptions.modelName !== ''
|
|
934
|
+
) {
|
|
935
|
+
return clientOptions.modelName;
|
|
936
|
+
}
|
|
937
|
+
return undefined;
|
|
938
|
+
}
|
|
939
|
+
|
|
722
940
|
function sanitizeChildConfigurable(
|
|
723
941
|
parentConfigurable: Record<string, unknown> | undefined
|
|
724
942
|
): Record<string, unknown> {
|
package/src/types/graph.ts
CHANGED
|
@@ -3,6 +3,7 @@ import type {
|
|
|
3
3
|
BaseMessage,
|
|
4
4
|
AIMessageChunk,
|
|
5
5
|
SystemMessage,
|
|
6
|
+
UsageMetadata,
|
|
6
7
|
} from '@langchain/core/messages';
|
|
7
8
|
import type { BindToolsInput } from '@langchain/core/language_models/chat_models';
|
|
8
9
|
import type { START, StateGraph, StateGraphArgs } from '@langchain/langgraph';
|
|
@@ -29,10 +30,10 @@ import type {
|
|
|
29
30
|
MessageDeltaEvent,
|
|
30
31
|
ReasoningDeltaEvent,
|
|
31
32
|
} from '@/types/stream';
|
|
33
|
+
import type { TokenCounter, TokenBudgetBreakdown } from '@/types/run';
|
|
32
34
|
import type { Providers, Callback, GraphNodeKeys } from '@/common';
|
|
33
35
|
import type { StandardGraph, MultiAgentGraph } from '@/graphs';
|
|
34
36
|
import type { ClientOptions } from '@/types/llm';
|
|
35
|
-
import type { TokenCounter } from '@/types/run';
|
|
36
37
|
|
|
37
38
|
/** Interface for bound model with stream and invoke methods */
|
|
38
39
|
export interface ChatModel {
|
|
@@ -89,6 +90,30 @@ export interface AgentLogEvent {
|
|
|
89
90
|
agentId?: string;
|
|
90
91
|
}
|
|
91
92
|
|
|
93
|
+
/**
|
|
94
|
+
* Per-model-call context window usage snapshot, dispatched after pruning and
|
|
95
|
+
* before the model invocation. Dispatched once per `callModel` invocation:
|
|
96
|
+
* fallback retries reuse the snapshot since the prompt is identical — budget
|
|
97
|
+
* numbers reflect the primary provider's tokenizer, and the calibration
|
|
98
|
+
* ratio self-corrects from whichever provider reports usage.
|
|
99
|
+
*/
|
|
100
|
+
export interface ContextUsageEvent {
|
|
101
|
+
runId?: string;
|
|
102
|
+
agentId?: string;
|
|
103
|
+
/** Structural token budget snapshot from AgentContext.getTokenBudgetBreakdown */
|
|
104
|
+
breakdown: TokenBudgetBreakdown;
|
|
105
|
+
/** Usable budget this call: maxContextTokens minus output reserve */
|
|
106
|
+
contextBudget?: number;
|
|
107
|
+
/** Calibrated instruction overhead actually applied this call */
|
|
108
|
+
effectiveInstructionTokens?: number;
|
|
109
|
+
/** Calibrated message tokens before pruning (excluding instructions) */
|
|
110
|
+
prePruneContextTokens?: number;
|
|
111
|
+
/** Tokens still free after instructions + pruned messages */
|
|
112
|
+
remainingContextTokens?: number;
|
|
113
|
+
/** EMA ratio of provider-reported vs locally estimated token counts */
|
|
114
|
+
calibrationRatio?: number;
|
|
115
|
+
}
|
|
116
|
+
|
|
92
117
|
export interface EventHandler {
|
|
93
118
|
handle(
|
|
94
119
|
event: string,
|
|
@@ -104,6 +129,7 @@ export interface EventHandler {
|
|
|
104
129
|
| SummarizeCompleteEvent
|
|
105
130
|
| SubagentUpdateEvent
|
|
106
131
|
| AgentLogEvent
|
|
132
|
+
| ContextUsageEvent
|
|
107
133
|
| ToolExecuteBatchRequest
|
|
108
134
|
| { result: ToolEndEvent },
|
|
109
135
|
metadata?: Record<string, unknown>,
|
|
@@ -299,6 +325,17 @@ export type StandardGraphInput = {
|
|
|
299
325
|
tokenCounter?: TokenCounter;
|
|
300
326
|
indexTokenCountMap?: Record<string, number>;
|
|
301
327
|
calibrationRatio?: number;
|
|
328
|
+
/**
|
|
329
|
+
* Receives a {@link SubagentUsageEvent} for every model call made inside
|
|
330
|
+
* a subagent child run spawned from this graph (including nested
|
|
331
|
+
* subagents and child-side summarization calls). Child graphs run via
|
|
332
|
+
* `invoke()` outside the host's `streamEvents` loop, so their
|
|
333
|
+
* `on_chat_model_end` events never reach the run's handler registry —
|
|
334
|
+
* this sink is the only way hosts can observe child token usage for
|
|
335
|
+
* billing/accounting. Parent-graph model calls are NOT reported here;
|
|
336
|
+
* they already flow through the registry's `CHAT_MODEL_END` handler.
|
|
337
|
+
*/
|
|
338
|
+
subagentUsageSink?: SubagentUsageSink;
|
|
302
339
|
};
|
|
303
340
|
|
|
304
341
|
export type GraphEdge = {
|
|
@@ -409,6 +446,62 @@ export interface SubagentUpdateEvent {
|
|
|
409
446
|
timestamp: string;
|
|
410
447
|
}
|
|
411
448
|
|
|
449
|
+
/**
|
|
450
|
+
* Token usage for a single model call made inside a subagent child run.
|
|
451
|
+
* Emitted through {@link SubagentUsageSink} as each call completes, so
|
|
452
|
+
* hosts can bill child-run model usage that never reaches the parent
|
|
453
|
+
* run's `CHAT_MODEL_END` handler (child graphs execute via `invoke()`
|
|
454
|
+
* outside the host's `streamEvents` loop).
|
|
455
|
+
*/
|
|
456
|
+
export interface SubagentUsageEvent {
|
|
457
|
+
/** Usage metadata reported by the child's model call. */
|
|
458
|
+
usage: UsageMetadata;
|
|
459
|
+
/**
|
|
460
|
+
* Model that produced this usage. Per-call `ls_model_name` from the
|
|
461
|
+
* model's callback metadata when available (covers child-side
|
|
462
|
+
* summarization or any call that differs from the configured model),
|
|
463
|
+
* then the fallback-invocation's configured model (`INVOKED_MODEL`
|
|
464
|
+
* metadata), then the subagent config's `clientOptions` model.
|
|
465
|
+
*/
|
|
466
|
+
model?: string;
|
|
467
|
+
/**
|
|
468
|
+
* Provider that actually served this call — the SDK `Providers` enum
|
|
469
|
+
* value stamped per-invocation by `attemptInvoke` (`INVOKED_PROVIDER`
|
|
470
|
+
* metadata), so fallback-served calls are attributed to the fallback
|
|
471
|
+
* provider, not the configured primary. Falls back to the subagent
|
|
472
|
+
* config's provider. Never LangSmith's `ls_provider` string — derived
|
|
473
|
+
* providers inherit that from their base class, and hosts key
|
|
474
|
+
* pricing/cache semantics off the enum.
|
|
475
|
+
*/
|
|
476
|
+
provider?: string;
|
|
477
|
+
/** Subagent `type` identifier from the SubagentConfig. */
|
|
478
|
+
subagentType: string;
|
|
479
|
+
/** Child run ID (unique per subagent execution). */
|
|
480
|
+
subagentRunId: string;
|
|
481
|
+
/** Child agent ID assigned to this subagent execution. */
|
|
482
|
+
subagentAgentId: string;
|
|
483
|
+
/**
|
|
484
|
+
* ROOT run ID of the host run that owns billing. For nested subagents
|
|
485
|
+
* each forwarding layer rewrites this upward, so events from any depth
|
|
486
|
+
* surface with the outermost run's ID — never an intermediate
|
|
487
|
+
* `*_sub_*` child id (use {@link subagentRunId} to identify the
|
|
488
|
+
* emitting child).
|
|
489
|
+
*/
|
|
490
|
+
runId: string;
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
/**
|
|
494
|
+
* Host-provided callback receiving {@link SubagentUsageEvent}s. Invoked as
|
|
495
|
+
* each child model call completes. May return a promise — the executor
|
|
496
|
+
* awaits each dispatch (so all usage is recorded before the child's result
|
|
497
|
+
* resolves to the parent) and swallows both synchronous throws and
|
|
498
|
+
* rejections; implementations should still be cheap, as they sit on the
|
|
499
|
+
* child's model-call path.
|
|
500
|
+
*/
|
|
501
|
+
export type SubagentUsageSink = (
|
|
502
|
+
event: SubagentUsageEvent
|
|
503
|
+
) => void | Promise<void>;
|
|
504
|
+
|
|
412
505
|
export type LangfuseToolOutputTracingConfig = {
|
|
413
506
|
/**
|
|
414
507
|
* Whether tool outputs should be exported to Langfuse. Defaults to
|
package/src/types/run.ts
CHANGED
|
@@ -125,6 +125,15 @@ export type RunConfig = {
|
|
|
125
125
|
*/
|
|
126
126
|
langfuse?: g.LangfuseConfig;
|
|
127
127
|
customHandlers?: Record<string, g.EventHandler>;
|
|
128
|
+
/**
|
|
129
|
+
* Receives token usage for every model call made inside subagent child
|
|
130
|
+
* runs (including nested subagents). Child graphs execute via `invoke()`
|
|
131
|
+
* outside this run's `streamEvents` loop, so their model-end events never
|
|
132
|
+
* reach `customHandlers` — without this sink, child usage is invisible to
|
|
133
|
+
* the host. Parent-graph calls are not reported here; they flow through
|
|
134
|
+
* the registered `CHAT_MODEL_END` handler as usual.
|
|
135
|
+
*/
|
|
136
|
+
subagentUsageSink?: g.SubagentUsageSink;
|
|
128
137
|
/**
|
|
129
138
|
* Pre-constructed hook registry for this run. Hooks fire at lifecycle
|
|
130
139
|
* points in `processStream` (RunStart, UserPromptSubmit, Stop,
|
|
@@ -242,6 +251,10 @@ export type TokenBudgetBreakdown = {
|
|
|
242
251
|
messageTokens: number;
|
|
243
252
|
/** Tokens available for messages after instructions. */
|
|
244
253
|
availableForMessages: number;
|
|
254
|
+
/** Per-tool schema token counts (post-multiplier), keyed by tool name. */
|
|
255
|
+
toolTokenCounts?: Record<string, number>;
|
|
256
|
+
/** Names of counted tools that are deferred (`defer_loading`) and discovered. */
|
|
257
|
+
deferredToolNames?: string[];
|
|
245
258
|
};
|
|
246
259
|
|
|
247
260
|
export type EventStreamOptions = {
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { apportionTokenCounts } from '@/utils/tokens';
|
|
2
|
+
|
|
3
|
+
describe('apportionTokenCounts', () => {
|
|
4
|
+
it('sums exactly to a ceil-of-sum aggregate despite per-entry fractions', () => {
|
|
5
|
+
const raw = { add: 33, search: 33, fetch: 33 };
|
|
6
|
+
const multiplier = 1.4;
|
|
7
|
+
const target = Math.ceil((33 + 33 + 33) * multiplier);
|
|
8
|
+
const result = apportionTokenCounts(raw, multiplier, target);
|
|
9
|
+
const sum = Object.values(result).reduce((acc, count) => acc + count, 0);
|
|
10
|
+
expect(sum).toBe(target);
|
|
11
|
+
expect(Object.keys(result).sort()).toEqual(['add', 'fetch', 'search']);
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
it('gives larger remainders priority when distributing leftovers', () => {
|
|
15
|
+
const result = apportionTokenCounts({ a: 10, b: 19 }, 1.05, 31);
|
|
16
|
+
expect(result.a + result.b).toBe(31);
|
|
17
|
+
expect(result.b).toBe(20);
|
|
18
|
+
expect(result.a).toBe(11);
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
it('handles calibration-style rescaling to an arbitrary target', () => {
|
|
22
|
+
const counts = { a: 100, b: 200, c: 300 };
|
|
23
|
+
const target = 451;
|
|
24
|
+
const result = apportionTokenCounts(counts, target / 600, target);
|
|
25
|
+
const sum = Object.values(result).reduce((acc, count) => acc + count, 0);
|
|
26
|
+
expect(sum).toBe(target);
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
it('returns an empty map for no entries', () => {
|
|
30
|
+
expect(apportionTokenCounts({}, 1.4, 10)).toEqual({});
|
|
31
|
+
});
|
|
32
|
+
});
|
package/src/utils/tokens.ts
CHANGED
|
@@ -395,6 +395,39 @@ export function getTokenCountForMessage(
|
|
|
395
395
|
return numTokens;
|
|
396
396
|
}
|
|
397
397
|
|
|
398
|
+
/**
|
|
399
|
+
* Largest-remainder apportionment: scales each count by `multiplier` and
|
|
400
|
+
* distributes the rounding remainder so the results sum exactly to
|
|
401
|
+
* `targetTotal`. Keeps per-item breakdowns reconciled with an aggregate
|
|
402
|
+
* computed as a single rounded product of the summed raw counts.
|
|
403
|
+
*/
|
|
404
|
+
export function apportionTokenCounts(
|
|
405
|
+
rawCounts: Record<string, number>,
|
|
406
|
+
multiplier: number,
|
|
407
|
+
targetTotal: number
|
|
408
|
+
): Record<string, number> {
|
|
409
|
+
const result: Record<string, number> = Object.create(null);
|
|
410
|
+
const remainders: Array<{ name: string; remainder: number }> = [];
|
|
411
|
+
let floorSum = 0;
|
|
412
|
+
for (const [name, rawCount] of Object.entries(rawCounts)) {
|
|
413
|
+
const scaled = rawCount * multiplier;
|
|
414
|
+
const floored = Math.floor(scaled);
|
|
415
|
+
result[name] = floored;
|
|
416
|
+
floorSum += floored;
|
|
417
|
+
remainders.push({ name, remainder: scaled - floored });
|
|
418
|
+
}
|
|
419
|
+
let leftover = targetTotal - floorSum;
|
|
420
|
+
if (leftover <= 0 || remainders.length === 0) {
|
|
421
|
+
return result;
|
|
422
|
+
}
|
|
423
|
+
remainders.sort((a, b) => b.remainder - a.remainder);
|
|
424
|
+
for (let i = 0; leftover > 0; i = (i + 1) % remainders.length) {
|
|
425
|
+
result[remainders[i].name] += 1;
|
|
426
|
+
leftover--;
|
|
427
|
+
}
|
|
428
|
+
return result;
|
|
429
|
+
}
|
|
430
|
+
|
|
398
431
|
/**
|
|
399
432
|
* Anthropic's API consistently reports ~10% more tokens than the local
|
|
400
433
|
* claude tokenizer due to internal message framing and content encoding.
|