@librechat/agents 3.2.33 → 3.2.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +47 -10
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/common/enum.cjs +13 -0
- package/dist/cjs/common/enum.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +121 -3
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/index.cjs +21 -2
- package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_outputs.cjs +38 -2
- package/dist/cjs/llm/bedrock/utils/message_outputs.cjs.map +1 -1
- package/dist/cjs/llm/google/utils/common.cjs +6 -0
- package/dist/cjs/llm/google/utils/common.cjs.map +1 -1
- package/dist/cjs/llm/invoke.cjs +49 -8
- package/dist/cjs/llm/invoke.cjs.map +1 -1
- package/dist/cjs/llm/openai/index.cjs +48 -1
- package/dist/cjs/llm/openai/index.cjs.map +1 -1
- package/dist/cjs/llm/vertexai/index.cjs +19 -0
- package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
- package/dist/cjs/main.cjs +2 -0
- package/dist/cjs/messages/content.cjs +12 -14
- package/dist/cjs/messages/content.cjs.map +1 -1
- package/dist/cjs/messages/prune.cjs +31 -13
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/cjs/run.cjs +7 -2
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/stream.cjs +20 -2
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/summarization/node.cjs +12 -1
- package/dist/cjs/summarization/node.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +41 -4
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/streamedToolCallSeals.cjs +30 -1
- package/dist/cjs/tools/streamedToolCallSeals.cjs.map +1 -1
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs +138 -2
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
- package/dist/cjs/utils/tokens.cjs +30 -0
- package/dist/cjs/utils/tokens.cjs.map +1 -1
- package/dist/esm/agents/AgentContext.mjs +47 -10
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/common/enum.mjs +13 -0
- package/dist/esm/common/enum.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +122 -4
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/llm/bedrock/index.mjs +22 -3
- package/dist/esm/llm/bedrock/index.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_outputs.mjs +38 -3
- package/dist/esm/llm/bedrock/utils/message_outputs.mjs.map +1 -1
- package/dist/esm/llm/google/utils/common.mjs +6 -0
- package/dist/esm/llm/google/utils/common.mjs.map +1 -1
- package/dist/esm/llm/invoke.mjs +49 -8
- package/dist/esm/llm/invoke.mjs.map +1 -1
- package/dist/esm/llm/openai/index.mjs +48 -1
- package/dist/esm/llm/openai/index.mjs.map +1 -1
- package/dist/esm/llm/vertexai/index.mjs +19 -0
- package/dist/esm/llm/vertexai/index.mjs.map +1 -1
- package/dist/esm/main.mjs +3 -3
- package/dist/esm/messages/content.mjs +12 -15
- package/dist/esm/messages/content.mjs.map +1 -1
- package/dist/esm/messages/prune.mjs +31 -13
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/esm/run.mjs +7 -2
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/stream.mjs +21 -3
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/summarization/node.mjs +12 -1
- package/dist/esm/summarization/node.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +41 -4
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/streamedToolCallSeals.mjs +25 -2
- package/dist/esm/tools/streamedToolCallSeals.mjs.map +1 -1
- package/dist/esm/tools/subagent/SubagentExecutor.mjs +138 -2
- package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
- package/dist/esm/utils/tokens.mjs +30 -1
- package/dist/esm/utils/tokens.mjs.map +1 -1
- package/dist/types/agents/AgentContext.d.ts +7 -3
- package/dist/types/common/enum.d.ts +13 -0
- package/dist/types/graphs/Graph.d.ts +8 -1
- package/dist/types/llm/bedrock/utils/index.d.ts +1 -1
- package/dist/types/llm/bedrock/utils/message_outputs.d.ts +9 -0
- package/dist/types/llm/invoke.d.ts +1 -1
- package/dist/types/llm/vertexai/index.d.ts +10 -0
- package/dist/types/messages/content.d.ts +5 -0
- package/dist/types/messages/prune.d.ts +4 -0
- package/dist/types/run.d.ts +1 -0
- package/dist/types/tools/ToolNode.d.ts +8 -0
- package/dist/types/tools/streamedToolCallSeals.d.ts +5 -1
- package/dist/types/tools/subagent/SubagentExecutor.d.ts +11 -1
- package/dist/types/types/graph.d.ts +89 -3
- package/dist/types/types/run.d.ts +13 -0
- package/dist/types/types/tools.d.ts +10 -0
- package/dist/types/utils/tokens.d.ts +7 -0
- package/package.json +1 -1
- package/src/__tests__/stream.eagerEventExecution.test.ts +703 -0
- package/src/agents/AgentContext.ts +69 -6
- package/src/agents/__tests__/AgentContext.test.ts +6 -2
- package/src/common/enum.ts +13 -0
- package/src/graphs/Graph.ts +196 -0
- package/src/llm/bedrock/index.ts +40 -0
- package/src/llm/bedrock/streamSealDispatch.test.ts +158 -0
- package/src/llm/bedrock/utils/index.ts +1 -0
- package/src/llm/bedrock/utils/message_outputs.test.ts +85 -0
- package/src/llm/bedrock/utils/message_outputs.ts +43 -0
- package/src/llm/google/utils/common.test.ts +64 -0
- package/src/llm/google/utils/common.ts +18 -0
- package/src/llm/invoke.test.ts +79 -1
- package/src/llm/invoke.ts +58 -4
- package/src/llm/openai/index.ts +95 -1
- package/src/llm/openai/sequentialToolCallSeals.test.ts +199 -0
- package/src/llm/vertexai/index.ts +31 -0
- package/src/llm/vertexai/sealStreamedToolCalls.test.ts +88 -0
- package/src/llm/vertexai/streamSealDispatch.test.ts +148 -0
- package/src/messages/content.ts +24 -32
- package/src/messages/prune.ts +39 -2
- package/src/run.ts +5 -0
- package/src/scripts/subagent-usage-sink.ts +176 -0
- package/src/specs/context-accuracy.live.test.ts +409 -0
- package/src/specs/context-usage-event.test.ts +117 -0
- package/src/specs/context-usage.live.test.ts +297 -0
- package/src/specs/prune.test.ts +51 -1
- package/src/specs/subagent.test.ts +124 -1
- package/src/stream.ts +40 -6
- package/src/summarization/__tests__/node.test.ts +60 -1
- package/src/summarization/node.ts +20 -1
- package/src/tools/ToolNode.ts +85 -3
- package/src/tools/__tests__/SubagentExecutor.test.ts +443 -1
- package/src/tools/__tests__/ToolNode.onResultCompletion.test.ts +368 -0
- package/src/tools/streamedToolCallSeals.ts +37 -9
- package/src/tools/subagent/SubagentExecutor.ts +221 -3
- package/src/types/graph.ts +94 -1
- package/src/types/run.ts +13 -0
- package/src/types/tools.ts +10 -0
- package/src/utils/__tests__/apportion.test.ts +32 -0
- package/src/utils/tokens.ts +33 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tokens.mjs","names":[],"sources":["../../../src/utils/tokens.ts"],"sourcesContent":["import { Tokenizer } from 'ai-tokenizer';\nimport type { BaseMessage } from '@langchain/core/messages';\nimport { ContentTypes } from '@/common/enum';\n\nexport type EncodingName = 'o200k_base' | 'claude';\n\n/** Anthropic minimum image token cost. */\nconst ANTHROPIC_IMAGE_MIN_TOKENS = 1024;\n/** Anthropic divisor: tokens = width × height / 750. */\nconst ANTHROPIC_IMAGE_DIVISOR = 750;\n/** OpenAI low-detail fixed cost. */\nconst OPENAI_IMAGE_LOW_TOKENS = 85;\n/** OpenAI high-detail tile size. */\nconst OPENAI_IMAGE_TILE_SIZE = 512;\n/** OpenAI high-detail tokens per tile. */\nconst OPENAI_IMAGE_TOKENS_PER_TILE = 170;\n/** Google Gemini fixed per-image cost. */\nconst _GEMINI_IMAGE_TOKENS = 258;\n/** Safety margin for image and document token estimates (5% overestimate). */\nconst IMAGE_TOKEN_SAFETY_MARGIN = 1.05;\n\n/**\n * Anthropic PDF: each page costs image tokens + text tokens.\n * Typical range is 1500-3000 tokens/page. Using 2000 as midpoint.\n */\nconst ANTHROPIC_PDF_TOKENS_PER_PAGE = 2000;\n/** OpenAI PDF: each page rendered as high-detail image. ~1500 tokens typical. */\nconst OPENAI_PDF_TOKENS_PER_PAGE = 1500;\n/** Gemini PDF: fixed 258 tokens per page. */\nconst _GEMINI_PDF_TOKENS_PER_PAGE = 258;\n/** Approximate base64 bytes per PDF page for page count estimation. */\nconst BASE64_BYTES_PER_PDF_PAGE = 75_000;\n/** Fallback token cost for URL-referenced documents without local data. */\nconst URL_DOCUMENT_FALLBACK_TOKENS = 2000;\n\n/**\n * Extracts image dimensions from the first bytes of a base64-encoded\n * PNG, JPEG, GIF, or WebP without decoding the full image.\n * Returns null if the format is unrecognized or data is too short.\n */\nexport function extractImageDimensions(\n base64Data: string\n): { width: number; height: number } | null {\n const raw = base64Data.startsWith('data:')\n ? base64Data.slice(base64Data.indexOf(',') + 1)\n : base64Data;\n\n if (raw.length < 32) {\n return null;\n }\n\n const bytes = new Uint8Array(Buffer.from(raw.slice(0, 80), 'base64'));\n\n if (bytes[0] === 0x89 && bytes[1] === 0x50) {\n // PNG: width at bytes 16-19, height at 20-23 (big-endian)\n const width =\n (bytes[16] << 24) | (bytes[17] << 16) | (bytes[18] << 8) | bytes[19];\n const height =\n (bytes[20] << 24) | (bytes[21] << 16) | (bytes[22] << 8) | bytes[23];\n return { width, height };\n }\n\n if (bytes[0] === 0xff && bytes[1] === 0xd8) {\n // JPEG: scan for SOF0 (0xFFC0) or SOF2 (0xFFC2) marker\n for (let i = 2; i < bytes.length - 9; i++) {\n if (\n bytes[i] === 0xff &&\n (bytes[i + 1] === 0xc0 || bytes[i + 1] === 0xc2)\n ) {\n const height = (bytes[i + 5] << 8) | bytes[i + 6];\n const width = (bytes[i + 7] << 8) | bytes[i + 8];\n return { width, height };\n }\n }\n return null;\n }\n\n if (bytes[0] === 0x47 && bytes[1] === 0x49 && bytes[2] === 0x46) {\n // GIF: width at bytes 6-7, height at 8-9 (little-endian)\n const width = bytes[6] | (bytes[7] << 8);\n const height = bytes[8] | (bytes[9] << 8);\n return { width, height };\n }\n\n if (\n bytes[0] === 0x52 &&\n bytes[1] === 0x49 &&\n bytes[2] === 0x46 &&\n bytes[3] === 0x46 &&\n bytes[8] === 0x57 &&\n bytes[9] === 0x45 &&\n bytes[10] === 0x42 &&\n bytes[11] === 0x50\n ) {\n // WebP VP8: width at bytes 26-27, height at 28-29\n if (bytes.length > 29) {\n const width = (bytes[26] | (bytes[27] << 8)) & 0x3fff;\n const height = (bytes[28] | (bytes[29] << 8)) & 0x3fff;\n return { width, height };\n }\n return null;\n }\n\n return null;\n}\n\n/** Estimates image token cost for Anthropic/Bedrock (Claude). */\nexport function estimateAnthropicImageTokens(\n width: number,\n height: number\n): number {\n return Math.max(\n ANTHROPIC_IMAGE_MIN_TOKENS,\n Math.ceil((width * height) / ANTHROPIC_IMAGE_DIVISOR)\n );\n}\n\n/** Estimates image token cost for OpenAI (high detail). */\nexport function estimateOpenAIImageTokens(\n width: number,\n height: number,\n detail: string = 'high'\n): number {\n if (detail === 'low') {\n return OPENAI_IMAGE_LOW_TOKENS;\n }\n const tiles =\n Math.ceil(width / OPENAI_IMAGE_TILE_SIZE) *\n Math.ceil(height / OPENAI_IMAGE_TILE_SIZE);\n return OPENAI_IMAGE_LOW_TOKENS + tiles * OPENAI_IMAGE_TOKENS_PER_TILE;\n}\n\n/**\n * Estimates token cost for an image content block.\n * Extracts dimensions from base64 header when available.\n * Falls back to Anthropic minimum (1024) when dimensions can't be determined.\n */\nfunction estimateImageBlockTokens(\n block: Record<string, unknown>,\n encoding: EncodingName\n): number {\n let base64Data: string | undefined;\n\n if (block.type === ContentTypes.IMAGE_URL || block.type === 'image_url') {\n const imageUrl = block.image_url as string | { url?: string } | undefined;\n const url = typeof imageUrl === 'string' ? imageUrl : imageUrl?.url;\n if (typeof url === 'string' && url.startsWith('data:')) {\n base64Data = url;\n } else {\n return ANTHROPIC_IMAGE_MIN_TOKENS;\n }\n } else if (block.type === 'image') {\n const source = block.source as { type?: string; data?: string } | undefined;\n if (source?.type === 'base64' && typeof source.data === 'string') {\n base64Data = source.data;\n } else {\n return ANTHROPIC_IMAGE_MIN_TOKENS;\n }\n } else {\n return ANTHROPIC_IMAGE_MIN_TOKENS;\n }\n\n const dims = extractImageDimensions(base64Data);\n if (dims == null) {\n return ANTHROPIC_IMAGE_MIN_TOKENS;\n }\n\n if (encoding === 'claude') {\n return estimateAnthropicImageTokens(dims.width, dims.height);\n }\n return estimateOpenAIImageTokens(dims.width, dims.height);\n}\n\n/**\n * Estimates token cost for a document/file content block.\n * Handles both LangChain standard format (`type: 'file'` with `source_type`)\n * and Anthropic format (`type: 'document'` with `source`).\n *\n * - Plain text: tokenized directly via `getTokenCount`.\n * - Base64 PDF: page count estimated from base64 length × per-page cost.\n * - URL reference: conservative flat estimate.\n */\nfunction estimateDocumentBlockTokens(\n block: Record<string, unknown>,\n encoding: EncodingName,\n getTokenCount: (text: string) => number\n): number {\n const pdfTokensPerPage =\n encoding === 'claude'\n ? ANTHROPIC_PDF_TOKENS_PER_PAGE\n : OPENAI_PDF_TOKENS_PER_PAGE;\n\n // LangChain standard format: type='file', source_type, data/text/url, mime_type\n const sourceType = block.source_type as string | undefined;\n if (typeof sourceType === 'string') {\n const mimeType = ((block.mime_type as string | undefined) ?? '').split(\n ';'\n )[0];\n\n if (sourceType === 'text' && typeof block.text === 'string') {\n return getTokenCount(block.text as string);\n }\n\n if (sourceType === 'base64' && typeof block.data === 'string') {\n if (mimeType === 'application/pdf' || mimeType === '') {\n const pageEstimate = Math.max(\n 1,\n Math.ceil((block.data as string).length / BASE64_BYTES_PER_PDF_PAGE)\n );\n return pageEstimate * pdfTokensPerPage;\n }\n // Image inside a file block — delegate to image estimation\n if (mimeType.startsWith('image/')) {\n return estimateImageBlockTokens(\n {\n ...block,\n type: 'image',\n source: { type: 'base64', data: block.data },\n },\n encoding\n );\n }\n return getTokenCount(block.data as string);\n }\n\n if (sourceType === 'url') {\n return URL_DOCUMENT_FALLBACK_TOKENS;\n }\n\n return URL_DOCUMENT_FALLBACK_TOKENS;\n }\n\n // Anthropic format: type='document', source: { type, data, media_type }\n const source = block.source as\n | {\n type?: string;\n data?: string;\n media_type?: string;\n content?: unknown[];\n }\n | undefined;\n\n if (source == null) {\n return URL_DOCUMENT_FALLBACK_TOKENS;\n }\n\n if (source.type === 'text' && typeof source.data === 'string') {\n return getTokenCount(source.data);\n }\n\n if (source.type === 'base64' && typeof source.data === 'string') {\n const mediaType = (source.media_type ?? '').split(';')[0];\n if (mediaType === 'application/pdf' || mediaType === '') {\n const pageEstimate = Math.max(\n 1,\n Math.ceil(source.data.length / BASE64_BYTES_PER_PDF_PAGE)\n );\n return pageEstimate * pdfTokensPerPage;\n }\n if (mediaType.startsWith('image/')) {\n return estimateImageBlockTokens(\n { type: 'image', source: { type: 'base64', data: source.data } },\n encoding\n );\n }\n return getTokenCount(source.data);\n }\n\n if (source.type === 'url') {\n return URL_DOCUMENT_FALLBACK_TOKENS;\n }\n\n // content-type source (wraps other blocks like images)\n if (source.type === 'content' && Array.isArray(source.content)) {\n let total = 0;\n for (const inner of source.content) {\n if (inner != null && typeof inner === 'object' && 'type' in inner) {\n const innerBlock = inner as Record<string, unknown>;\n if (innerBlock.type === 'image') {\n total += estimateImageBlockTokens(innerBlock, encoding);\n }\n }\n }\n return total > 0 ? total : URL_DOCUMENT_FALLBACK_TOKENS;\n }\n\n return URL_DOCUMENT_FALLBACK_TOKENS;\n}\n\nconst tokenizers: Partial<Record<EncodingName, Tokenizer>> = {};\n\nasync function getTokenizer(\n encoding: EncodingName = 'o200k_base'\n): Promise<Tokenizer> {\n const cached = tokenizers[encoding];\n if (cached) {\n return cached;\n }\n const data =\n encoding === 'claude'\n ? await import('ai-tokenizer/encoding/claude')\n : await import('ai-tokenizer/encoding/o200k_base');\n const instance = new Tokenizer(data);\n tokenizers[encoding] = instance;\n return instance;\n}\n\nexport function encodingForModel(model: string): EncodingName {\n if (model.toLowerCase().includes('claude')) {\n return 'claude';\n }\n return 'o200k_base';\n}\n\nexport function getTokenCountForMessage(\n message: BaseMessage,\n getTokenCount: (text: string) => number,\n encoding: EncodingName = 'o200k_base'\n): number {\n const tokensPerMessage = 3;\n\n type ContentBlock = Record<string, unknown> & {\n type?: string;\n tool_call?: { name?: string; args?: string; output?: string };\n };\n\n const processValue = (value: unknown): void => {\n if (Array.isArray(value)) {\n for (const raw of value) {\n const item = raw as ContentBlock | null | undefined;\n if (item == null || typeof item.type !== 'string') {\n continue;\n }\n if (item.type === ContentTypes.ERROR) {\n continue;\n }\n\n if (\n item.type === ContentTypes.IMAGE_URL ||\n item.type === 'image_url' ||\n item.type === 'image'\n ) {\n numTokens += Math.ceil(\n estimateImageBlockTokens(item, encoding) * IMAGE_TOKEN_SAFETY_MARGIN\n );\n continue;\n }\n\n if (\n item.type === 'document' ||\n item.type === 'file' ||\n item.type === ContentTypes.IMAGE_FILE\n ) {\n numTokens += Math.ceil(\n estimateDocumentBlockTokens(item, encoding, getTokenCount) *\n IMAGE_TOKEN_SAFETY_MARGIN\n );\n continue;\n }\n\n if (item.type === ContentTypes.TOOL_CALL && item.tool_call != null) {\n const toolName = item.tool_call.name;\n if (typeof toolName === 'string' && toolName.length > 0) {\n numTokens += getTokenCount(toolName);\n }\n const args = item.tool_call.args;\n if (typeof args === 'string' && args.length > 0) {\n numTokens += getTokenCount(args);\n }\n const output = item.tool_call.output;\n if (typeof output === 'string' && output.length > 0) {\n numTokens += getTokenCount(output);\n }\n continue;\n }\n\n const nestedValue = item[item.type];\n if (nestedValue == null) {\n continue;\n }\n\n processValue(nestedValue);\n }\n } else if (typeof value === 'string') {\n numTokens += getTokenCount(value);\n } else if (typeof value === 'number') {\n numTokens += getTokenCount(value.toString());\n } else if (typeof value === 'boolean') {\n numTokens += getTokenCount(value.toString());\n }\n };\n\n let numTokens = tokensPerMessage;\n processValue(message.content);\n return numTokens;\n}\n\n/**\n * Anthropic's API consistently reports ~10% more tokens than the local\n * claude tokenizer due to internal message framing and content encoding.\n * Verified empirically across content types via the count_tokens endpoint.\n */\nconst CLAUDE_TOKEN_CORRECTION = 1.1;\n\n/**\n * Creates a token counter function using the specified encoding.\n * Lazily loads the encoding data on first use via dynamic import.\n */\nexport const createTokenCounter = async (\n encoding: EncodingName = 'o200k_base'\n): Promise<(message: BaseMessage) => number> => {\n const tok = await getTokenizer(encoding);\n const countTokens = (text: string): number => tok.count(text);\n const isClaude = encoding === 'claude';\n return (message: BaseMessage): number => {\n const count = getTokenCountForMessage(message, countTokens, encoding);\n return isClaude ? Math.ceil(count * CLAUDE_TOKEN_CORRECTION) : count;\n };\n};\n\n/** Utility to manage the token encoder lifecycle explicitly. */\nexport const TokenEncoderManager = {\n async initialize(): Promise<void> {\n // No-op: ai-tokenizer is synchronously initialized from bundled data.\n },\n\n reset(): void {\n for (const key of Object.keys(tokenizers)) {\n delete tokenizers[key as EncodingName];\n }\n },\n\n isInitialized(): boolean {\n return Object.keys(tokenizers).length > 0;\n },\n};\n"],"mappings":";;;;AAOA,MAAM,6BAA6B;;AAEnC,MAAM,0BAA0B;;AAEhC,MAAM,0BAA0B;;AAEhC,MAAM,yBAAyB;;AAE/B,MAAM,+BAA+B;;AAIrC,MAAM,4BAA4B;;;;;AAMlC,MAAM,gCAAgC;;AAEtC,MAAM,6BAA6B;;AAInC,MAAM,4BAA4B;;AAElC,MAAM,+BAA+B;;;;;;AAOrC,SAAgB,uBACd,YAC0C;CAC1C,MAAM,MAAM,WAAW,WAAW,OAAO,IACrC,WAAW,MAAM,WAAW,QAAQ,GAAG,IAAI,CAAC,IAC5C;CAEJ,IAAI,IAAI,SAAS,IACf,OAAO;CAGT,MAAM,QAAQ,IAAI,WAAW,OAAO,KAAK,IAAI,MAAM,GAAG,EAAE,GAAG,QAAQ,CAAC;CAEpE,IAAI,MAAM,OAAO,OAAQ,MAAM,OAAO,IAMpC,OAAO;EAAE,OAHN,MAAM,OAAO,KAAO,MAAM,OAAO,KAAO,MAAM,OAAO,IAAK,MAAM;EAGnD,QADb,MAAM,OAAO,KAAO,MAAM,OAAO,KAAO,MAAM,OAAO,IAAK,MAAM;CAC5C;CAGzB,IAAI,MAAM,OAAO,OAAQ,MAAM,OAAO,KAAM;EAE1C,KAAK,IAAI,IAAI,GAAG,IAAI,MAAM,SAAS,GAAG,KACpC,IACE,MAAM,OAAO,QACZ,MAAM,IAAI,OAAO,OAAQ,MAAM,IAAI,OAAO,MAC3C;GACA,MAAM,SAAU,MAAM,IAAI,MAAM,IAAK,MAAM,IAAI;GAE/C,OAAO;IAAE,OADM,MAAM,IAAI,MAAM,IAAK,MAAM,IAAI;IAC9B;GAAO;EACzB;EAEF,OAAO;CACT;CAEA,IAAI,MAAM,OAAO,MAAQ,MAAM,OAAO,MAAQ,MAAM,OAAO,IAIzD,OAAO;EAAE,OAFK,MAAM,KAAM,MAAM,MAAM;EAEtB,QADD,MAAM,KAAM,MAAM,MAAM;CAChB;CAGzB,IACE,MAAM,OAAO,MACb,MAAM,OAAO,MACb,MAAM,OAAO,MACb,MAAM,OAAO,MACb,MAAM,OAAO,MACb,MAAM,OAAO,MACb,MAAM,QAAQ,MACd,MAAM,QAAQ,IACd;EAEA,IAAI,MAAM,SAAS,IAGjB,OAAO;GAAE,QAFM,MAAM,MAAO,MAAM,OAAO,KAAM;GAE/B,SADA,MAAM,MAAO,MAAM,OAAO,KAAM;EACzB;EAEzB,OAAO;CACT;CAEA,OAAO;AACT;;AAGA,SAAgB,6BACd,OACA,QACQ;CACR,OAAO,KAAK,IACV,4BACA,KAAK,KAAM,QAAQ,SAAU,uBAAuB,CACtD;AACF;;AAGA,SAAgB,0BACd,OACA,QACA,SAAiB,QACT;CACR,IAAI,WAAW,OACb,OAAO;CAKT,OAAO,0BAFL,KAAK,KAAK,QAAQ,sBAAsB,IACxC,KAAK,KAAK,SAAS,sBAAsB,IACF;AAC3C;;;;;;AAOA,SAAS,yBACP,OACA,UACQ;CACR,IAAI;CAEJ,IAAI,MAAM,SAAA,eAAmC,MAAM,SAAS,aAAa;EACvE,MAAM,WAAW,MAAM;EACvB,MAAM,MAAM,OAAO,aAAa,WAAW,WAAW,UAAU;EAChE,IAAI,OAAO,QAAQ,YAAY,IAAI,WAAW,OAAO,GACnD,aAAa;OAEb,OAAO;CAEX,OAAO,IAAI,MAAM,SAAS,SAAS;EACjC,MAAM,SAAS,MAAM;EACrB,IAAI,QAAQ,SAAS,YAAY,OAAO,OAAO,SAAS,UACtD,aAAa,OAAO;OAEpB,OAAO;CAEX,OACE,OAAO;CAGT,MAAM,OAAO,uBAAuB,UAAU;CAC9C,IAAI,QAAQ,MACV,OAAO;CAGT,IAAI,aAAa,UACf,OAAO,6BAA6B,KAAK,OAAO,KAAK,MAAM;CAE7D,OAAO,0BAA0B,KAAK,OAAO,KAAK,MAAM;AAC1D;;;;;;;;;;AAWA,SAAS,4BACP,OACA,UACA,eACQ;CACR,MAAM,mBACJ,aAAa,WACT,gCACA;CAGN,MAAM,aAAa,MAAM;CACzB,IAAI,OAAO,eAAe,UAAU;EAClC,MAAM,YAAa,MAAM,aAAoC,GAAA,CAAI,MAC/D,GACF,CAAC,CAAC;EAEF,IAAI,eAAe,UAAU,OAAO,MAAM,SAAS,UACjD,OAAO,cAAc,MAAM,IAAc;EAG3C,IAAI,eAAe,YAAY,OAAO,MAAM,SAAS,UAAU;GAC7D,IAAI,aAAa,qBAAqB,aAAa,IAKjD,OAJqB,KAAK,IACxB,GACA,KAAK,KAAM,MAAM,KAAgB,SAAS,yBAAyB,CAEnD,IAAI;GAGxB,IAAI,SAAS,WAAW,QAAQ,GAC9B,OAAO,yBACL;IACE,GAAG;IACH,MAAM;IACN,QAAQ;KAAE,MAAM;KAAU,MAAM,MAAM;IAAK;GAC7C,GACA,QACF;GAEF,OAAO,cAAc,MAAM,IAAc;EAC3C;EAEA,IAAI,eAAe,OACjB,OAAO;EAGT,OAAO;CACT;CAGA,MAAM,SAAS,MAAM;CASrB,IAAI,UAAU,MACZ,OAAO;CAGT,IAAI,OAAO,SAAS,UAAU,OAAO,OAAO,SAAS,UACnD,OAAO,cAAc,OAAO,IAAI;CAGlC,IAAI,OAAO,SAAS,YAAY,OAAO,OAAO,SAAS,UAAU;EAC/D,MAAM,aAAa,OAAO,cAAc,GAAA,CAAI,MAAM,GAAG,CAAC,CAAC;EACvD,IAAI,cAAc,qBAAqB,cAAc,IAKnD,OAJqB,KAAK,IACxB,GACA,KAAK,KAAK,OAAO,KAAK,SAAS,yBAAyB,CAExC,IAAI;EAExB,IAAI,UAAU,WAAW,QAAQ,GAC/B,OAAO,yBACL;GAAE,MAAM;GAAS,QAAQ;IAAE,MAAM;IAAU,MAAM,OAAO;GAAK;EAAE,GAC/D,QACF;EAEF,OAAO,cAAc,OAAO,IAAI;CAClC;CAEA,IAAI,OAAO,SAAS,OAClB,OAAO;CAIT,IAAI,OAAO,SAAS,aAAa,MAAM,QAAQ,OAAO,OAAO,GAAG;EAC9D,IAAI,QAAQ;EACZ,KAAK,MAAM,SAAS,OAAO,SACzB,IAAI,SAAS,QAAQ,OAAO,UAAU,YAAY,UAAU,OAAO;GACjE,MAAM,aAAa;GACnB,IAAI,WAAW,SAAS,SACtB,SAAS,yBAAyB,YAAY,QAAQ;EAE1D;EAEF,OAAO,QAAQ,IAAI,QAAQ;CAC7B;CAEA,OAAO;AACT;AAEA,MAAM,aAAuD,CAAC;AAE9D,eAAe,aACb,WAAyB,cACL;CACpB,MAAM,SAAS,WAAW;CAC1B,IAAI,QACF,OAAO;CAMT,MAAM,WAAW,IAAI,UAHnB,aAAa,WACT,MAAM,OAAO,kCACb,MAAM,OAAO,mCACgB;CACnC,WAAW,YAAY;CACvB,OAAO;AACT;AAEA,SAAgB,iBAAiB,OAA6B;CAC5D,IAAI,MAAM,YAAY,CAAC,CAAC,SAAS,QAAQ,GACvC,OAAO;CAET,OAAO;AACT;AAEA,SAAgB,wBACd,SACA,eACA,WAAyB,cACjB;CACR,MAAM,mBAAmB;CAOzB,MAAM,gBAAgB,UAAyB;EAC7C,IAAI,MAAM,QAAQ,KAAK,GACrB,KAAK,MAAM,OAAO,OAAO;GACvB,MAAM,OAAO;GACb,IAAI,QAAQ,QAAQ,OAAO,KAAK,SAAS,UACvC;GAEF,IAAI,KAAK,SAAA,SACP;GAGF,IACE,KAAK,SAAA,eACL,KAAK,SAAS,eACd,KAAK,SAAS,SACd;IACA,aAAa,KAAK,KAChB,yBAAyB,MAAM,QAAQ,IAAI,yBAC7C;IACA;GACF;GAEA,IACE,KAAK,SAAS,cACd,KAAK,SAAS,UACd,KAAK,SAAA,cACL;IACA,aAAa,KAAK,KAChB,4BAA4B,MAAM,UAAU,aAAa,IACvD,yBACJ;IACA;GACF;GAEA,IAAI,KAAK,SAAA,eAAmC,KAAK,aAAa,MAAM;IAClE,MAAM,WAAW,KAAK,UAAU;IAChC,IAAI,OAAO,aAAa,YAAY,SAAS,SAAS,GACpD,aAAa,cAAc,QAAQ;IAErC,MAAM,OAAO,KAAK,UAAU;IAC5B,IAAI,OAAO,SAAS,YAAY,KAAK,SAAS,GAC5C,aAAa,cAAc,IAAI;IAEjC,MAAM,SAAS,KAAK,UAAU;IAC9B,IAAI,OAAO,WAAW,YAAY,OAAO,SAAS,GAChD,aAAa,cAAc,MAAM;IAEnC;GACF;GAEA,MAAM,cAAc,KAAK,KAAK;GAC9B,IAAI,eAAe,MACjB;GAGF,aAAa,WAAW;EAC1B;OACK,IAAI,OAAO,UAAU,UAC1B,aAAa,cAAc,KAAK;OAC3B,IAAI,OAAO,UAAU,UAC1B,aAAa,cAAc,MAAM,SAAS,CAAC;OACtC,IAAI,OAAO,UAAU,WAC1B,aAAa,cAAc,MAAM,SAAS,CAAC;CAE/C;CAEA,IAAI,YAAY;CAChB,aAAa,QAAQ,OAAO;CAC5B,OAAO;AACT;;;;;;AAOA,MAAM,0BAA0B;;;;;AAMhC,MAAa,qBAAqB,OAChC,WAAyB,iBACqB;CAC9C,MAAM,MAAM,MAAM,aAAa,QAAQ;CACvC,MAAM,eAAe,SAAyB,IAAI,MAAM,IAAI;CAC5D,MAAM,WAAW,aAAa;CAC9B,QAAQ,YAAiC;EACvC,MAAM,QAAQ,wBAAwB,SAAS,aAAa,QAAQ;EACpE,OAAO,WAAW,KAAK,KAAK,QAAQ,uBAAuB,IAAI;CACjE;AACF;;AAGA,MAAa,sBAAsB;CACjC,MAAM,aAA4B,CAElC;CAEA,QAAc;EACZ,KAAK,MAAM,OAAO,OAAO,KAAK,UAAU,GACtC,OAAO,WAAW;CAEtB;CAEA,gBAAyB;EACvB,OAAO,OAAO,KAAK,UAAU,CAAC,CAAC,SAAS;CAC1C;AACF"}
|
|
1
|
+
{"version":3,"file":"tokens.mjs","names":[],"sources":["../../../src/utils/tokens.ts"],"sourcesContent":["import { Tokenizer } from 'ai-tokenizer';\nimport type { BaseMessage } from '@langchain/core/messages';\nimport { ContentTypes } from '@/common/enum';\n\nexport type EncodingName = 'o200k_base' | 'claude';\n\n/** Anthropic minimum image token cost. */\nconst ANTHROPIC_IMAGE_MIN_TOKENS = 1024;\n/** Anthropic divisor: tokens = width × height / 750. */\nconst ANTHROPIC_IMAGE_DIVISOR = 750;\n/** OpenAI low-detail fixed cost. */\nconst OPENAI_IMAGE_LOW_TOKENS = 85;\n/** OpenAI high-detail tile size. */\nconst OPENAI_IMAGE_TILE_SIZE = 512;\n/** OpenAI high-detail tokens per tile. */\nconst OPENAI_IMAGE_TOKENS_PER_TILE = 170;\n/** Google Gemini fixed per-image cost. */\nconst _GEMINI_IMAGE_TOKENS = 258;\n/** Safety margin for image and document token estimates (5% overestimate). */\nconst IMAGE_TOKEN_SAFETY_MARGIN = 1.05;\n\n/**\n * Anthropic PDF: each page costs image tokens + text tokens.\n * Typical range is 1500-3000 tokens/page. Using 2000 as midpoint.\n */\nconst ANTHROPIC_PDF_TOKENS_PER_PAGE = 2000;\n/** OpenAI PDF: each page rendered as high-detail image. ~1500 tokens typical. */\nconst OPENAI_PDF_TOKENS_PER_PAGE = 1500;\n/** Gemini PDF: fixed 258 tokens per page. */\nconst _GEMINI_PDF_TOKENS_PER_PAGE = 258;\n/** Approximate base64 bytes per PDF page for page count estimation. */\nconst BASE64_BYTES_PER_PDF_PAGE = 75_000;\n/** Fallback token cost for URL-referenced documents without local data. */\nconst URL_DOCUMENT_FALLBACK_TOKENS = 2000;\n\n/**\n * Extracts image dimensions from the first bytes of a base64-encoded\n * PNG, JPEG, GIF, or WebP without decoding the full image.\n * Returns null if the format is unrecognized or data is too short.\n */\nexport function extractImageDimensions(\n base64Data: string\n): { width: number; height: number } | null {\n const raw = base64Data.startsWith('data:')\n ? base64Data.slice(base64Data.indexOf(',') + 1)\n : base64Data;\n\n if (raw.length < 32) {\n return null;\n }\n\n const bytes = new Uint8Array(Buffer.from(raw.slice(0, 80), 'base64'));\n\n if (bytes[0] === 0x89 && bytes[1] === 0x50) {\n // PNG: width at bytes 16-19, height at 20-23 (big-endian)\n const width =\n (bytes[16] << 24) | (bytes[17] << 16) | (bytes[18] << 8) | bytes[19];\n const height =\n (bytes[20] << 24) | (bytes[21] << 16) | (bytes[22] << 8) | bytes[23];\n return { width, height };\n }\n\n if (bytes[0] === 0xff && bytes[1] === 0xd8) {\n // JPEG: scan for SOF0 (0xFFC0) or SOF2 (0xFFC2) marker\n for (let i = 2; i < bytes.length - 9; i++) {\n if (\n bytes[i] === 0xff &&\n (bytes[i + 1] === 0xc0 || bytes[i + 1] === 0xc2)\n ) {\n const height = (bytes[i + 5] << 8) | bytes[i + 6];\n const width = (bytes[i + 7] << 8) | bytes[i + 8];\n return { width, height };\n }\n }\n return null;\n }\n\n if (bytes[0] === 0x47 && bytes[1] === 0x49 && bytes[2] === 0x46) {\n // GIF: width at bytes 6-7, height at 8-9 (little-endian)\n const width = bytes[6] | (bytes[7] << 8);\n const height = bytes[8] | (bytes[9] << 8);\n return { width, height };\n }\n\n if (\n bytes[0] === 0x52 &&\n bytes[1] === 0x49 &&\n bytes[2] === 0x46 &&\n bytes[3] === 0x46 &&\n bytes[8] === 0x57 &&\n bytes[9] === 0x45 &&\n bytes[10] === 0x42 &&\n bytes[11] === 0x50\n ) {\n // WebP VP8: width at bytes 26-27, height at 28-29\n if (bytes.length > 29) {\n const width = (bytes[26] | (bytes[27] << 8)) & 0x3fff;\n const height = (bytes[28] | (bytes[29] << 8)) & 0x3fff;\n return { width, height };\n }\n return null;\n }\n\n return null;\n}\n\n/** Estimates image token cost for Anthropic/Bedrock (Claude). */\nexport function estimateAnthropicImageTokens(\n width: number,\n height: number\n): number {\n return Math.max(\n ANTHROPIC_IMAGE_MIN_TOKENS,\n Math.ceil((width * height) / ANTHROPIC_IMAGE_DIVISOR)\n );\n}\n\n/** Estimates image token cost for OpenAI (high detail). */\nexport function estimateOpenAIImageTokens(\n width: number,\n height: number,\n detail: string = 'high'\n): number {\n if (detail === 'low') {\n return OPENAI_IMAGE_LOW_TOKENS;\n }\n const tiles =\n Math.ceil(width / OPENAI_IMAGE_TILE_SIZE) *\n Math.ceil(height / OPENAI_IMAGE_TILE_SIZE);\n return OPENAI_IMAGE_LOW_TOKENS + tiles * OPENAI_IMAGE_TOKENS_PER_TILE;\n}\n\n/**\n * Estimates token cost for an image content block.\n * Extracts dimensions from base64 header when available.\n * Falls back to Anthropic minimum (1024) when dimensions can't be determined.\n */\nfunction estimateImageBlockTokens(\n block: Record<string, unknown>,\n encoding: EncodingName\n): number {\n let base64Data: string | undefined;\n\n if (block.type === ContentTypes.IMAGE_URL || block.type === 'image_url') {\n const imageUrl = block.image_url as string | { url?: string } | undefined;\n const url = typeof imageUrl === 'string' ? imageUrl : imageUrl?.url;\n if (typeof url === 'string' && url.startsWith('data:')) {\n base64Data = url;\n } else {\n return ANTHROPIC_IMAGE_MIN_TOKENS;\n }\n } else if (block.type === 'image') {\n const source = block.source as { type?: string; data?: string } | undefined;\n if (source?.type === 'base64' && typeof source.data === 'string') {\n base64Data = source.data;\n } else {\n return ANTHROPIC_IMAGE_MIN_TOKENS;\n }\n } else {\n return ANTHROPIC_IMAGE_MIN_TOKENS;\n }\n\n const dims = extractImageDimensions(base64Data);\n if (dims == null) {\n return ANTHROPIC_IMAGE_MIN_TOKENS;\n }\n\n if (encoding === 'claude') {\n return estimateAnthropicImageTokens(dims.width, dims.height);\n }\n return estimateOpenAIImageTokens(dims.width, dims.height);\n}\n\n/**\n * Estimates token cost for a document/file content block.\n * Handles both LangChain standard format (`type: 'file'` with `source_type`)\n * and Anthropic format (`type: 'document'` with `source`).\n *\n * - Plain text: tokenized directly via `getTokenCount`.\n * - Base64 PDF: page count estimated from base64 length × per-page cost.\n * - URL reference: conservative flat estimate.\n */\nfunction estimateDocumentBlockTokens(\n block: Record<string, unknown>,\n encoding: EncodingName,\n getTokenCount: (text: string) => number\n): number {\n const pdfTokensPerPage =\n encoding === 'claude'\n ? ANTHROPIC_PDF_TOKENS_PER_PAGE\n : OPENAI_PDF_TOKENS_PER_PAGE;\n\n // LangChain standard format: type='file', source_type, data/text/url, mime_type\n const sourceType = block.source_type as string | undefined;\n if (typeof sourceType === 'string') {\n const mimeType = ((block.mime_type as string | undefined) ?? '').split(\n ';'\n )[0];\n\n if (sourceType === 'text' && typeof block.text === 'string') {\n return getTokenCount(block.text as string);\n }\n\n if (sourceType === 'base64' && typeof block.data === 'string') {\n if (mimeType === 'application/pdf' || mimeType === '') {\n const pageEstimate = Math.max(\n 1,\n Math.ceil((block.data as string).length / BASE64_BYTES_PER_PDF_PAGE)\n );\n return pageEstimate * pdfTokensPerPage;\n }\n // Image inside a file block — delegate to image estimation\n if (mimeType.startsWith('image/')) {\n return estimateImageBlockTokens(\n {\n ...block,\n type: 'image',\n source: { type: 'base64', data: block.data },\n },\n encoding\n );\n }\n return getTokenCount(block.data as string);\n }\n\n if (sourceType === 'url') {\n return URL_DOCUMENT_FALLBACK_TOKENS;\n }\n\n return URL_DOCUMENT_FALLBACK_TOKENS;\n }\n\n // Anthropic format: type='document', source: { type, data, media_type }\n const source = block.source as\n | {\n type?: string;\n data?: string;\n media_type?: string;\n content?: unknown[];\n }\n | undefined;\n\n if (source == null) {\n return URL_DOCUMENT_FALLBACK_TOKENS;\n }\n\n if (source.type === 'text' && typeof source.data === 'string') {\n return getTokenCount(source.data);\n }\n\n if (source.type === 'base64' && typeof source.data === 'string') {\n const mediaType = (source.media_type ?? '').split(';')[0];\n if (mediaType === 'application/pdf' || mediaType === '') {\n const pageEstimate = Math.max(\n 1,\n Math.ceil(source.data.length / BASE64_BYTES_PER_PDF_PAGE)\n );\n return pageEstimate * pdfTokensPerPage;\n }\n if (mediaType.startsWith('image/')) {\n return estimateImageBlockTokens(\n { type: 'image', source: { type: 'base64', data: source.data } },\n encoding\n );\n }\n return getTokenCount(source.data);\n }\n\n if (source.type === 'url') {\n return URL_DOCUMENT_FALLBACK_TOKENS;\n }\n\n // content-type source (wraps other blocks like images)\n if (source.type === 'content' && Array.isArray(source.content)) {\n let total = 0;\n for (const inner of source.content) {\n if (inner != null && typeof inner === 'object' && 'type' in inner) {\n const innerBlock = inner as Record<string, unknown>;\n if (innerBlock.type === 'image') {\n total += estimateImageBlockTokens(innerBlock, encoding);\n }\n }\n }\n return total > 0 ? total : URL_DOCUMENT_FALLBACK_TOKENS;\n }\n\n return URL_DOCUMENT_FALLBACK_TOKENS;\n}\n\nconst tokenizers: Partial<Record<EncodingName, Tokenizer>> = {};\n\nasync function getTokenizer(\n encoding: EncodingName = 'o200k_base'\n): Promise<Tokenizer> {\n const cached = tokenizers[encoding];\n if (cached) {\n return cached;\n }\n const data =\n encoding === 'claude'\n ? await import('ai-tokenizer/encoding/claude')\n : await import('ai-tokenizer/encoding/o200k_base');\n const instance = new Tokenizer(data);\n tokenizers[encoding] = instance;\n return instance;\n}\n\nexport function encodingForModel(model: string): EncodingName {\n if (model.toLowerCase().includes('claude')) {\n return 'claude';\n }\n return 'o200k_base';\n}\n\nexport function getTokenCountForMessage(\n message: BaseMessage,\n getTokenCount: (text: string) => number,\n encoding: EncodingName = 'o200k_base'\n): number {\n const tokensPerMessage = 3;\n\n type ContentBlock = Record<string, unknown> & {\n type?: string;\n tool_call?: { name?: string; args?: string; output?: string };\n };\n\n const processValue = (value: unknown): void => {\n if (Array.isArray(value)) {\n for (const raw of value) {\n const item = raw as ContentBlock | null | undefined;\n if (item == null || typeof item.type !== 'string') {\n continue;\n }\n if (item.type === ContentTypes.ERROR) {\n continue;\n }\n\n if (\n item.type === ContentTypes.IMAGE_URL ||\n item.type === 'image_url' ||\n item.type === 'image'\n ) {\n numTokens += Math.ceil(\n estimateImageBlockTokens(item, encoding) * IMAGE_TOKEN_SAFETY_MARGIN\n );\n continue;\n }\n\n if (\n item.type === 'document' ||\n item.type === 'file' ||\n item.type === ContentTypes.IMAGE_FILE\n ) {\n numTokens += Math.ceil(\n estimateDocumentBlockTokens(item, encoding, getTokenCount) *\n IMAGE_TOKEN_SAFETY_MARGIN\n );\n continue;\n }\n\n if (item.type === ContentTypes.TOOL_CALL && item.tool_call != null) {\n const toolName = item.tool_call.name;\n if (typeof toolName === 'string' && toolName.length > 0) {\n numTokens += getTokenCount(toolName);\n }\n const args = item.tool_call.args;\n if (typeof args === 'string' && args.length > 0) {\n numTokens += getTokenCount(args);\n }\n const output = item.tool_call.output;\n if (typeof output === 'string' && output.length > 0) {\n numTokens += getTokenCount(output);\n }\n continue;\n }\n\n const nestedValue = item[item.type];\n if (nestedValue == null) {\n continue;\n }\n\n processValue(nestedValue);\n }\n } else if (typeof value === 'string') {\n numTokens += getTokenCount(value);\n } else if (typeof value === 'number') {\n numTokens += getTokenCount(value.toString());\n } else if (typeof value === 'boolean') {\n numTokens += getTokenCount(value.toString());\n }\n };\n\n let numTokens = tokensPerMessage;\n processValue(message.content);\n return numTokens;\n}\n\n/**\n * Largest-remainder apportionment: scales each count by `multiplier` and\n * distributes the rounding remainder so the results sum exactly to\n * `targetTotal`. Keeps per-item breakdowns reconciled with an aggregate\n * computed as a single rounded product of the summed raw counts.\n */\nexport function apportionTokenCounts(\n rawCounts: Record<string, number>,\n multiplier: number,\n targetTotal: number\n): Record<string, number> {\n const result: Record<string, number> = Object.create(null);\n const remainders: Array<{ name: string; remainder: number }> = [];\n let floorSum = 0;\n for (const [name, rawCount] of Object.entries(rawCounts)) {\n const scaled = rawCount * multiplier;\n const floored = Math.floor(scaled);\n result[name] = floored;\n floorSum += floored;\n remainders.push({ name, remainder: scaled - floored });\n }\n let leftover = targetTotal - floorSum;\n if (leftover <= 0 || remainders.length === 0) {\n return result;\n }\n remainders.sort((a, b) => b.remainder - a.remainder);\n for (let i = 0; leftover > 0; i = (i + 1) % remainders.length) {\n result[remainders[i].name] += 1;\n leftover--;\n }\n return result;\n}\n\n/**\n * Anthropic's API consistently reports ~10% more tokens than the local\n * claude tokenizer due to internal message framing and content encoding.\n * Verified empirically across content types via the count_tokens endpoint.\n */\nconst CLAUDE_TOKEN_CORRECTION = 1.1;\n\n/**\n * Creates a token counter function using the specified encoding.\n * Lazily loads the encoding data on first use via dynamic import.\n */\nexport const createTokenCounter = async (\n encoding: EncodingName = 'o200k_base'\n): Promise<(message: BaseMessage) => number> => {\n const tok = await getTokenizer(encoding);\n const countTokens = (text: string): number => tok.count(text);\n const isClaude = encoding === 'claude';\n return (message: BaseMessage): number => {\n const count = getTokenCountForMessage(message, countTokens, encoding);\n return isClaude ? Math.ceil(count * CLAUDE_TOKEN_CORRECTION) : count;\n };\n};\n\n/** Utility to manage the token encoder lifecycle explicitly. */\nexport const TokenEncoderManager = {\n async initialize(): Promise<void> {\n // No-op: ai-tokenizer is synchronously initialized from bundled data.\n },\n\n reset(): void {\n for (const key of Object.keys(tokenizers)) {\n delete tokenizers[key as EncodingName];\n }\n },\n\n isInitialized(): boolean {\n return Object.keys(tokenizers).length > 0;\n },\n};\n"],"mappings":";;;;AAOA,MAAM,6BAA6B;;AAEnC,MAAM,0BAA0B;;AAEhC,MAAM,0BAA0B;;AAEhC,MAAM,yBAAyB;;AAE/B,MAAM,+BAA+B;;AAIrC,MAAM,4BAA4B;;;;;AAMlC,MAAM,gCAAgC;;AAEtC,MAAM,6BAA6B;;AAInC,MAAM,4BAA4B;;AAElC,MAAM,+BAA+B;;;;;;AAOrC,SAAgB,uBACd,YAC0C;CAC1C,MAAM,MAAM,WAAW,WAAW,OAAO,IACrC,WAAW,MAAM,WAAW,QAAQ,GAAG,IAAI,CAAC,IAC5C;CAEJ,IAAI,IAAI,SAAS,IACf,OAAO;CAGT,MAAM,QAAQ,IAAI,WAAW,OAAO,KAAK,IAAI,MAAM,GAAG,EAAE,GAAG,QAAQ,CAAC;CAEpE,IAAI,MAAM,OAAO,OAAQ,MAAM,OAAO,IAMpC,OAAO;EAAE,OAHN,MAAM,OAAO,KAAO,MAAM,OAAO,KAAO,MAAM,OAAO,IAAK,MAAM;EAGnD,QADb,MAAM,OAAO,KAAO,MAAM,OAAO,KAAO,MAAM,OAAO,IAAK,MAAM;CAC5C;CAGzB,IAAI,MAAM,OAAO,OAAQ,MAAM,OAAO,KAAM;EAE1C,KAAK,IAAI,IAAI,GAAG,IAAI,MAAM,SAAS,GAAG,KACpC,IACE,MAAM,OAAO,QACZ,MAAM,IAAI,OAAO,OAAQ,MAAM,IAAI,OAAO,MAC3C;GACA,MAAM,SAAU,MAAM,IAAI,MAAM,IAAK,MAAM,IAAI;GAE/C,OAAO;IAAE,OADM,MAAM,IAAI,MAAM,IAAK,MAAM,IAAI;IAC9B;GAAO;EACzB;EAEF,OAAO;CACT;CAEA,IAAI,MAAM,OAAO,MAAQ,MAAM,OAAO,MAAQ,MAAM,OAAO,IAIzD,OAAO;EAAE,OAFK,MAAM,KAAM,MAAM,MAAM;EAEtB,QADD,MAAM,KAAM,MAAM,MAAM;CAChB;CAGzB,IACE,MAAM,OAAO,MACb,MAAM,OAAO,MACb,MAAM,OAAO,MACb,MAAM,OAAO,MACb,MAAM,OAAO,MACb,MAAM,OAAO,MACb,MAAM,QAAQ,MACd,MAAM,QAAQ,IACd;EAEA,IAAI,MAAM,SAAS,IAGjB,OAAO;GAAE,QAFM,MAAM,MAAO,MAAM,OAAO,KAAM;GAE/B,SADA,MAAM,MAAO,MAAM,OAAO,KAAM;EACzB;EAEzB,OAAO;CACT;CAEA,OAAO;AACT;;AAGA,SAAgB,6BACd,OACA,QACQ;CACR,OAAO,KAAK,IACV,4BACA,KAAK,KAAM,QAAQ,SAAU,uBAAuB,CACtD;AACF;;AAGA,SAAgB,0BACd,OACA,QACA,SAAiB,QACT;CACR,IAAI,WAAW,OACb,OAAO;CAKT,OAAO,0BAFL,KAAK,KAAK,QAAQ,sBAAsB,IACxC,KAAK,KAAK,SAAS,sBAAsB,IACF;AAC3C;;;;;;AAOA,SAAS,yBACP,OACA,UACQ;CACR,IAAI;CAEJ,IAAI,MAAM,SAAA,eAAmC,MAAM,SAAS,aAAa;EACvE,MAAM,WAAW,MAAM;EACvB,MAAM,MAAM,OAAO,aAAa,WAAW,WAAW,UAAU;EAChE,IAAI,OAAO,QAAQ,YAAY,IAAI,WAAW,OAAO,GACnD,aAAa;OAEb,OAAO;CAEX,OAAO,IAAI,MAAM,SAAS,SAAS;EACjC,MAAM,SAAS,MAAM;EACrB,IAAI,QAAQ,SAAS,YAAY,OAAO,OAAO,SAAS,UACtD,aAAa,OAAO;OAEpB,OAAO;CAEX,OACE,OAAO;CAGT,MAAM,OAAO,uBAAuB,UAAU;CAC9C,IAAI,QAAQ,MACV,OAAO;CAGT,IAAI,aAAa,UACf,OAAO,6BAA6B,KAAK,OAAO,KAAK,MAAM;CAE7D,OAAO,0BAA0B,KAAK,OAAO,KAAK,MAAM;AAC1D;;;;;;;;;;AAWA,SAAS,4BACP,OACA,UACA,eACQ;CACR,MAAM,mBACJ,aAAa,WACT,gCACA;CAGN,MAAM,aAAa,MAAM;CACzB,IAAI,OAAO,eAAe,UAAU;EAClC,MAAM,YAAa,MAAM,aAAoC,GAAA,CAAI,MAC/D,GACF,CAAC,CAAC;EAEF,IAAI,eAAe,UAAU,OAAO,MAAM,SAAS,UACjD,OAAO,cAAc,MAAM,IAAc;EAG3C,IAAI,eAAe,YAAY,OAAO,MAAM,SAAS,UAAU;GAC7D,IAAI,aAAa,qBAAqB,aAAa,IAKjD,OAJqB,KAAK,IACxB,GACA,KAAK,KAAM,MAAM,KAAgB,SAAS,yBAAyB,CAEnD,IAAI;GAGxB,IAAI,SAAS,WAAW,QAAQ,GAC9B,OAAO,yBACL;IACE,GAAG;IACH,MAAM;IACN,QAAQ;KAAE,MAAM;KAAU,MAAM,MAAM;IAAK;GAC7C,GACA,QACF;GAEF,OAAO,cAAc,MAAM,IAAc;EAC3C;EAEA,IAAI,eAAe,OACjB,OAAO;EAGT,OAAO;CACT;CAGA,MAAM,SAAS,MAAM;CASrB,IAAI,UAAU,MACZ,OAAO;CAGT,IAAI,OAAO,SAAS,UAAU,OAAO,OAAO,SAAS,UACnD,OAAO,cAAc,OAAO,IAAI;CAGlC,IAAI,OAAO,SAAS,YAAY,OAAO,OAAO,SAAS,UAAU;EAC/D,MAAM,aAAa,OAAO,cAAc,GAAA,CAAI,MAAM,GAAG,CAAC,CAAC;EACvD,IAAI,cAAc,qBAAqB,cAAc,IAKnD,OAJqB,KAAK,IACxB,GACA,KAAK,KAAK,OAAO,KAAK,SAAS,yBAAyB,CAExC,IAAI;EAExB,IAAI,UAAU,WAAW,QAAQ,GAC/B,OAAO,yBACL;GAAE,MAAM;GAAS,QAAQ;IAAE,MAAM;IAAU,MAAM,OAAO;GAAK;EAAE,GAC/D,QACF;EAEF,OAAO,cAAc,OAAO,IAAI;CAClC;CAEA,IAAI,OAAO,SAAS,OAClB,OAAO;CAIT,IAAI,OAAO,SAAS,aAAa,MAAM,QAAQ,OAAO,OAAO,GAAG;EAC9D,IAAI,QAAQ;EACZ,KAAK,MAAM,SAAS,OAAO,SACzB,IAAI,SAAS,QAAQ,OAAO,UAAU,YAAY,UAAU,OAAO;GACjE,MAAM,aAAa;GACnB,IAAI,WAAW,SAAS,SACtB,SAAS,yBAAyB,YAAY,QAAQ;EAE1D;EAEF,OAAO,QAAQ,IAAI,QAAQ;CAC7B;CAEA,OAAO;AACT;AAEA,MAAM,aAAuD,CAAC;AAE9D,eAAe,aACb,WAAyB,cACL;CACpB,MAAM,SAAS,WAAW;CAC1B,IAAI,QACF,OAAO;CAMT,MAAM,WAAW,IAAI,UAHnB,aAAa,WACT,MAAM,OAAO,kCACb,MAAM,OAAO,mCACgB;CACnC,WAAW,YAAY;CACvB,OAAO;AACT;AAEA,SAAgB,iBAAiB,OAA6B;CAC5D,IAAI,MAAM,YAAY,CAAC,CAAC,SAAS,QAAQ,GACvC,OAAO;CAET,OAAO;AACT;AAEA,SAAgB,wBACd,SACA,eACA,WAAyB,cACjB;CACR,MAAM,mBAAmB;CAOzB,MAAM,gBAAgB,UAAyB;EAC7C,IAAI,MAAM,QAAQ,KAAK,GACrB,KAAK,MAAM,OAAO,OAAO;GACvB,MAAM,OAAO;GACb,IAAI,QAAQ,QAAQ,OAAO,KAAK,SAAS,UACvC;GAEF,IAAI,KAAK,SAAA,SACP;GAGF,IACE,KAAK,SAAA,eACL,KAAK,SAAS,eACd,KAAK,SAAS,SACd;IACA,aAAa,KAAK,KAChB,yBAAyB,MAAM,QAAQ,IAAI,yBAC7C;IACA;GACF;GAEA,IACE,KAAK,SAAS,cACd,KAAK,SAAS,UACd,KAAK,SAAA,cACL;IACA,aAAa,KAAK,KAChB,4BAA4B,MAAM,UAAU,aAAa,IACvD,yBACJ;IACA;GACF;GAEA,IAAI,KAAK,SAAA,eAAmC,KAAK,aAAa,MAAM;IAClE,MAAM,WAAW,KAAK,UAAU;IAChC,IAAI,OAAO,aAAa,YAAY,SAAS,SAAS,GACpD,aAAa,cAAc,QAAQ;IAErC,MAAM,OAAO,KAAK,UAAU;IAC5B,IAAI,OAAO,SAAS,YAAY,KAAK,SAAS,GAC5C,aAAa,cAAc,IAAI;IAEjC,MAAM,SAAS,KAAK,UAAU;IAC9B,IAAI,OAAO,WAAW,YAAY,OAAO,SAAS,GAChD,aAAa,cAAc,MAAM;IAEnC;GACF;GAEA,MAAM,cAAc,KAAK,KAAK;GAC9B,IAAI,eAAe,MACjB;GAGF,aAAa,WAAW;EAC1B;OACK,IAAI,OAAO,UAAU,UAC1B,aAAa,cAAc,KAAK;OAC3B,IAAI,OAAO,UAAU,UAC1B,aAAa,cAAc,MAAM,SAAS,CAAC;OACtC,IAAI,OAAO,UAAU,WAC1B,aAAa,cAAc,MAAM,SAAS,CAAC;CAE/C;CAEA,IAAI,YAAY;CAChB,aAAa,QAAQ,OAAO;CAC5B,OAAO;AACT;;;;;;;AAQA,SAAgB,qBACd,WACA,YACA,aACwB;CACxB,MAAM,SAAiC,OAAO,OAAO,IAAI;CACzD,MAAM,aAAyD,CAAC;CAChE,IAAI,WAAW;CACf,KAAK,MAAM,CAAC,MAAM,aAAa,OAAO,QAAQ,SAAS,GAAG;EACxD,MAAM,SAAS,WAAW;EAC1B,MAAM,UAAU,KAAK,MAAM,MAAM;EACjC,OAAO,QAAQ;EACf,YAAY;EACZ,WAAW,KAAK;GAAE;GAAM,WAAW,SAAS;EAAQ,CAAC;CACvD;CACA,IAAI,WAAW,cAAc;CAC7B,IAAI,YAAY,KAAK,WAAW,WAAW,GACzC,OAAO;CAET,WAAW,MAAM,GAAG,MAAM,EAAE,YAAY,EAAE,SAAS;CACnD,KAAK,IAAI,IAAI,GAAG,WAAW,GAAG,KAAK,IAAI,KAAK,WAAW,QAAQ;EAC7D,OAAO,WAAW,EAAE,CAAC,SAAS;EAC9B;CACF;CACA,OAAO;AACT;;;;;;AAOA,MAAM,0BAA0B;;;;;AAMhC,MAAa,qBAAqB,OAChC,WAAyB,iBACqB;CAC9C,MAAM,MAAM,MAAM,aAAa,QAAQ;CACvC,MAAM,eAAe,SAAyB,IAAI,MAAM,IAAI;CAC5D,MAAM,WAAW,aAAa;CAC9B,QAAQ,YAAiC;EACvC,MAAM,QAAQ,wBAAwB,SAAS,aAAa,QAAQ;EACpE,OAAO,WAAW,KAAK,KAAK,QAAQ,uBAAuB,IAAI;CACjE;AACF;;AAGA,MAAa,sBAAsB;CACjC,MAAM,aAA4B,CAElC;CAEA,QAAc;EACZ,KAAK,MAAM,OAAO,OAAO,KAAK,UAAU,GACtC,OAAO,WAAW;CAEtB;CAEA,gBAAyB;EACvB,OAAO,OAAO,KAAK,UAAU,CAAC,CAAC,SAAS;CAC1C;AACF"}
|
|
@@ -60,6 +60,11 @@ export declare class AgentContext {
|
|
|
60
60
|
dynamicInstructionTokens: number;
|
|
61
61
|
/** Token count for tool schemas only. */
|
|
62
62
|
toolSchemaTokens: number;
|
|
63
|
+
/** Per-tool schema token counts (post-multiplier), keyed by tool name.
|
|
64
|
+
* `undefined` when not calculated (e.g. cached aggregate schema tokens). */
|
|
65
|
+
toolTokenCounts?: Record<string, number>;
|
|
66
|
+
/** Names of counted tools that are deferred (`defer_loading`) and discovered. */
|
|
67
|
+
deferredToolNames: string[];
|
|
63
68
|
/** Running calibration ratio from the pruner — persisted across runs via contextMeta. */
|
|
64
69
|
calibrationRatio: number;
|
|
65
70
|
/** Provider-observed instruction overhead from the pruner's best-variance turn. */
|
|
@@ -330,9 +335,8 @@ export declare class AgentContext {
|
|
|
330
335
|
* Returns a structured breakdown of how the context token budget is consumed.
|
|
331
336
|
* Useful for diagnostics when context overflow or pruning issues occur.
|
|
332
337
|
*
|
|
333
|
-
* Note: `
|
|
334
|
-
*
|
|
335
|
-
* recomputed when `markToolsAsDiscovered` is called mid-run.
|
|
338
|
+
* Note: `markToolsAsDiscovered` re-triggers `calculateInstructionTokens`,
|
|
339
|
+
* so `toolSchemaTokens`/`toolTokenCounts` refresh before the next call.
|
|
336
340
|
*/
|
|
337
341
|
getTokenBudgetBreakdown(messages?: BaseMessage[]): t.TokenBudgetBreakdown;
|
|
338
342
|
/**
|
|
@@ -29,6 +29,8 @@ export declare enum GraphEvents {
|
|
|
29
29
|
ON_SUBAGENT_UPDATE = "on_subagent_update",
|
|
30
30
|
/** [Custom] Diagnostic logging event for context management observability */
|
|
31
31
|
ON_AGENT_LOG = "on_agent_log",
|
|
32
|
+
/** [Custom] Per-model-call context window usage snapshot (post-prune token budget) */
|
|
33
|
+
ON_CONTEXT_USAGE = "on_context_usage",
|
|
32
34
|
/** Custom event, emitted by system */
|
|
33
35
|
ON_CUSTOM_EVENT = "on_custom_event",
|
|
34
36
|
/** Emitted when a chat model starts processing. */
|
|
@@ -140,6 +142,17 @@ export declare enum Constants {
|
|
|
140
142
|
/** Anthropic server tool ID prefix (web_search, code_execution, etc.) */
|
|
141
143
|
ANTHROPIC_SERVER_TOOL_PREFIX = "srvtoolu_",
|
|
142
144
|
SKILL_TOOL = "skill",
|
|
145
|
+
/**
|
|
146
|
+
* Callback-metadata keys stamped by `attemptInvoke` /
|
|
147
|
+
* `tryFallbackProviders` carrying the provider (SDK `Providers` enum
|
|
148
|
+
* value) and configured model that actually served a model invocation.
|
|
149
|
+
* Unlike `ls_provider` — which derived providers inherit from their base
|
|
150
|
+
* class (e.g. DeepSeek/OpenRouter report `'openai'`) — these reflect the
|
|
151
|
+
* SDK's own routing, including fallback-provider calls. Consumed by the
|
|
152
|
+
* subagent usage-capture handler to tag billing events.
|
|
153
|
+
*/
|
|
154
|
+
INVOKED_PROVIDER = "__invoked_provider",
|
|
155
|
+
INVOKED_MODEL = "__invoked_model",
|
|
143
156
|
READ_FILE = "read_file",
|
|
144
157
|
BASH_TOOL = "bash_tool",
|
|
145
158
|
BASH_PROGRAMMATIC_TOOL_CALLING = "run_tools_with_bash",
|
|
@@ -179,7 +179,14 @@ export declare class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode>
|
|
|
179
179
|
agentContexts: Map<string, AgentContext>;
|
|
180
180
|
/** Default agent ID to use */
|
|
181
181
|
defaultAgentId: string;
|
|
182
|
-
|
|
182
|
+
/**
|
|
183
|
+
* Host sink for model usage emitted inside subagent child runs. Threaded
|
|
184
|
+
* into each `SubagentExecutor` this graph creates (and from there into
|
|
185
|
+
* child graphs, so nested subagents report too). See
|
|
186
|
+
* {@link t.StandardGraphInput.subagentUsageSink}.
|
|
187
|
+
*/
|
|
188
|
+
subagentUsageSink?: t.SubagentUsageSink;
|
|
189
|
+
constructor({ runId, signal, agents, langfuse, tokenCounter, indexTokenCountMap, calibrationRatio, subagentUsageSink, }: t.StandardGraphInput);
|
|
183
190
|
resetValues(keepContent?: boolean): void;
|
|
184
191
|
clearHeavyState(): void;
|
|
185
192
|
getRunStep(stepId: string): t.RunStep | undefined;
|
|
@@ -2,4 +2,4 @@
|
|
|
2
2
|
* Bedrock Converse utility exports.
|
|
3
3
|
*/
|
|
4
4
|
export { convertToConverseMessages, extractImageInfo, langchainReasoningBlockToBedrockReasoningBlock, concatenateLangchainReasoningBlocks, } from './message_inputs';
|
|
5
|
-
export { convertConverseMessageToLangChainMessage, handleConverseStreamContentBlockStart, handleConverseStreamContentBlockDelta, handleConverseStreamMetadata, bedrockReasoningBlockToLangchainReasoningBlock, bedrockReasoningDeltaToLangchainPartialReasoningBlock, } from './message_outputs';
|
|
5
|
+
export { convertConverseMessageToLangChainMessage, createConverseToolUseStopChunk, handleConverseStreamContentBlockStart, handleConverseStreamContentBlockDelta, handleConverseStreamMetadata, bedrockReasoningBlockToLangchainReasoningBlock, bedrockReasoningDeltaToLangchainPartialReasoningBlock, } from './message_outputs';
|
|
@@ -25,6 +25,15 @@ export declare function handleConverseStreamContentBlockDelta(contentBlockDelta:
|
|
|
25
25
|
* Handle a content block start event from Bedrock Converse stream.
|
|
26
26
|
*/
|
|
27
27
|
export declare function handleConverseStreamContentBlockStart(contentBlockStart: ContentBlockStartEvent): ChatGenerationChunk | null;
|
|
28
|
+
/**
|
|
29
|
+
* Build the chunk emitted when a Converse `contentBlockStop` event closes a
|
|
30
|
+
* toolUse block. The Converse protocol guarantees a block's input is complete
|
|
31
|
+
* at `contentBlockStop`, so this chunk carries an explicit streamed tool-call
|
|
32
|
+
* seal for that block index. The empty `args` delta merges as a no-op into the
|
|
33
|
+
* accumulated tool call; id/name are omitted so the chunk matches the existing
|
|
34
|
+
* entry purely by index.
|
|
35
|
+
*/
|
|
36
|
+
export declare function createConverseToolUseStopChunk(contentBlockIndex: number): ChatGenerationChunk;
|
|
28
37
|
/**
|
|
29
38
|
* Handle a metadata event from Bedrock Converse stream.
|
|
30
39
|
*/
|
|
@@ -3,8 +3,8 @@ import type { RunnableConfig } from '@langchain/core/runnables';
|
|
|
3
3
|
import type { BaseMessage } from '@langchain/core/messages';
|
|
4
4
|
import type { ToolOutputReferenceRegistry } from '@/tools/toolOutputReferences';
|
|
5
5
|
import type * as t from '@/types';
|
|
6
|
-
import { ChatModelStreamHandler } from '@/stream';
|
|
7
6
|
import { Providers } from '@/common';
|
|
7
|
+
import { ChatModelStreamHandler } from '@/stream';
|
|
8
8
|
/**
|
|
9
9
|
* Context passed to `attemptInvoke`. Matches the subset of Graph that
|
|
10
10
|
* `ChatModelStreamHandler.handle` needs *plus* the explicit
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { ChatGoogle } from '@langchain/google-gauth';
|
|
2
|
+
import { AIMessageChunk } from '@langchain/core/messages';
|
|
2
3
|
import type { GeminiContent, GoogleAIModelRequestParams, GoogleAbstractedClient } from '@langchain/google-common';
|
|
3
4
|
import type { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager';
|
|
4
5
|
import type { BaseMessage, UsageMetadata } from '@langchain/core/messages';
|
|
@@ -29,6 +30,15 @@ import type { GoogleThinkingConfig, VertexAIClientOptions } from '@/types';
|
|
|
29
30
|
* overriding `_convertToUsageMetadata`.
|
|
30
31
|
*/
|
|
31
32
|
export declare function repairStreamUsageMetadata(current: UsageMetadata | undefined, generationInfoUsage: UsageMetadata | undefined): UsageMetadata | undefined;
|
|
33
|
+
/**
|
|
34
|
+
* The Gemini API delivers function calls as complete objects — never as
|
|
35
|
+
* partial arg deltas. `@langchain/google-common` pre-parses each streamed
|
|
36
|
+
* functionCall part into `tool_calls` (invalid args land in
|
|
37
|
+
* `invalid_tool_calls` instead), so a chunk whose tool-call chunks all parsed
|
|
38
|
+
* cleanly is sealed on arrival for eager tool execution. Anything that fails
|
|
39
|
+
* the parse check is left unstamped and falls back to the lazy path.
|
|
40
|
+
*/
|
|
41
|
+
export declare function sealCompleteStreamedToolCalls(message: AIMessageChunk): void;
|
|
32
42
|
/**
|
|
33
43
|
* Fixes thought signatures on functionCall parts in the formatted Gemini request.
|
|
34
44
|
*
|
|
@@ -1,4 +1,9 @@
|
|
|
1
1
|
import type { BaseMessage } from '@langchain/core/messages';
|
|
2
|
+
/**
|
|
3
|
+
* Whether {@link formatContentStrings} will flatten this message's content:
|
|
4
|
+
* a human/ai/system message whose content is an array of text-only blocks.
|
|
5
|
+
*/
|
|
6
|
+
export declare const isLegacyConvertible: (message: BaseMessage) => boolean;
|
|
2
7
|
/**
|
|
3
8
|
* Formats an array of messages for LangChain, making sure all content fields are strings
|
|
4
9
|
* @param {Array<HumanMessage | AIMessage | SystemMessage | ToolMessage>} payload - The array of messages to format.
|
|
@@ -220,4 +220,8 @@ export declare function createPruneMessages(factoryParams: PruneMessagesFactoryP
|
|
|
220
220
|
originalToolContent?: Map<number, string>;
|
|
221
221
|
calibrationRatio?: number;
|
|
222
222
|
resolvedInstructionOverhead?: number;
|
|
223
|
+
/** Usable budget this call: maxTokens minus output reserve */
|
|
224
|
+
contextBudget?: number;
|
|
225
|
+
/** Calibrated instruction overhead actually applied this call */
|
|
226
|
+
effectiveInstructionTokens?: number;
|
|
223
227
|
};
|
package/dist/types/run.d.ts
CHANGED
|
@@ -15,6 +15,7 @@ export declare class Run<_T extends t.BaseGraphState> {
|
|
|
15
15
|
private toolOutputReferences?;
|
|
16
16
|
private eagerEventToolExecution?;
|
|
17
17
|
private toolExecution?;
|
|
18
|
+
private subagentUsageSink?;
|
|
18
19
|
private indexTokenCountMap?;
|
|
19
20
|
calibrationRatio: number;
|
|
20
21
|
graphRunnable?: t.CompiledStateWorkflow;
|
|
@@ -350,6 +350,14 @@ export declare class ToolNode<T = any> extends RunnableCallable<T, T> {
|
|
|
350
350
|
*/
|
|
351
351
|
private dispatchPostToolBatchAndInjectContext;
|
|
352
352
|
private dispatchStepCompleted;
|
|
353
|
+
/**
|
|
354
|
+
* Emits the completed run step for a single host-reported result before
|
|
355
|
+
* the batch resolves. Mirrors the batch loop's output formatting exactly;
|
|
356
|
+
* callers gate on the no-hooks/no-HITL configuration, so the raw result
|
|
357
|
+
* content here is also the final content. Returns whether the event was
|
|
358
|
+
* actually dispatched so the caller can fall back to batch-time emission.
|
|
359
|
+
*/
|
|
360
|
+
private dispatchEarlyToolCompletion;
|
|
353
361
|
/**
|
|
354
362
|
* Converts InjectedMessage instances to LangChain HumanMessage objects.
|
|
355
363
|
* Both 'user' and 'system' roles become HumanMessage to avoid provider
|
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
export declare const STREAMED_TOOL_CALL_ADAPTER_METADATA_KEY = "lc_streamed_tool_call_adapter";
|
|
2
2
|
export declare const STREAMED_TOOL_CALL_SEAL_METADATA_KEY = "lc_streamed_tool_call_seal";
|
|
3
3
|
export declare const OPENAI_RESPONSES_STREAMED_TOOL_CALL_ADAPTER = "openai_responses";
|
|
4
|
-
export
|
|
4
|
+
export declare const BEDROCK_CONVERSE_STREAMED_TOOL_CALL_ADAPTER = "bedrock_converse";
|
|
5
|
+
export declare const GOOGLE_STREAMED_TOOL_CALL_ADAPTER = "google_genai";
|
|
6
|
+
export declare const OPENAI_CHAT_SEQUENTIAL_STREAMED_TOOL_CALL_ADAPTER = "openai_chat_sequential";
|
|
7
|
+
export type StreamedToolCallAdapter = typeof OPENAI_RESPONSES_STREAMED_TOOL_CALL_ADAPTER | typeof BEDROCK_CONVERSE_STREAMED_TOOL_CALL_ADAPTER | typeof GOOGLE_STREAMED_TOOL_CALL_ADAPTER | typeof OPENAI_CHAT_SEQUENTIAL_STREAMED_TOOL_CALL_ADAPTER;
|
|
8
|
+
export declare function streamedToolCallAdapterAllowsSequentialSeal(metadata: Record<string, unknown> | undefined): boolean;
|
|
5
9
|
export type StreamedToolCallSeal = {
|
|
6
10
|
kind: 'single';
|
|
7
11
|
id?: string;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { BaseMessage } from '@langchain/core/messages';
|
|
2
|
-
import type { AgentInputs, StandardGraphInput, ResolvedSubagentConfig, SubagentConfig, TokenCounter } from '@/types';
|
|
2
|
+
import type { AgentInputs, StandardGraphInput, ResolvedSubagentConfig, SubagentConfig, SubagentUsageSink, TokenCounter } from '@/types';
|
|
3
3
|
import type { HookRegistry } from '@/hooks';
|
|
4
4
|
import type { AgentContext } from '@/agents/AgentContext';
|
|
5
5
|
import type { StandardGraph } from '@/graphs/Graph';
|
|
@@ -88,6 +88,15 @@ export type SubagentExecutorOptions = {
|
|
|
88
88
|
* post-`createWorkflow`, so `createAgentNode` must capture lazily).
|
|
89
89
|
*/
|
|
90
90
|
parentHandlerRegistry?: HandlerRegistry | (() => HandlerRegistry | undefined);
|
|
91
|
+
/**
|
|
92
|
+
* Receives a usage event for every model call the child run makes. The
|
|
93
|
+
* child workflow executes via `invoke()` with a detached callbacks array,
|
|
94
|
+
* so its `on_chat_model_end` events never reach the parent's handler
|
|
95
|
+
* registry — without this sink, child token usage is invisible to the
|
|
96
|
+
* host (unbilled model calls). Forwarded into the child graph's input so
|
|
97
|
+
* nested subagents report through the same sink.
|
|
98
|
+
*/
|
|
99
|
+
usageSink?: SubagentUsageSink;
|
|
91
100
|
};
|
|
92
101
|
export declare class SubagentExecutor {
|
|
93
102
|
private readonly configs;
|
|
@@ -99,6 +108,7 @@ export declare class SubagentExecutor {
|
|
|
99
108
|
private readonly tokenCounter?;
|
|
100
109
|
private readonly maxDepth;
|
|
101
110
|
private readonly createChildGraph;
|
|
111
|
+
private readonly usageSink?;
|
|
102
112
|
private readonly resolveParentHandlerRegistry?;
|
|
103
113
|
constructor(options: SubagentExecutorOptions);
|
|
104
114
|
/** Snapshot of the parent's registry at the moment a subagent is dispatched. */
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { BaseMessage, AIMessageChunk, SystemMessage } from '@langchain/core/messages';
|
|
1
|
+
import type { BaseMessage, AIMessageChunk, SystemMessage, UsageMetadata } from '@langchain/core/messages';
|
|
2
2
|
import type { BindToolsInput } from '@langchain/core/language_models/chat_models';
|
|
3
3
|
import type { START, StateGraph, StateGraphArgs } from '@langchain/langgraph';
|
|
4
4
|
import type { RunnableConfig, Runnable } from '@langchain/core/runnables';
|
|
@@ -7,10 +7,10 @@ import type { GoogleAIToolType } from '@langchain/google-common';
|
|
|
7
7
|
import type { SummarizationNodeInput, SummarizeCompleteEvent, SummarizationConfig, SummarizeStartEvent, SummarizeDeltaEvent } from '@/types/summarize';
|
|
8
8
|
import type { ToolMap, ToolEndEvent, GenericTool, LCTool, ToolExecuteBatchRequest } from '@/types/tools';
|
|
9
9
|
import type { RunStep, RunStepDeltaEvent, MessageDeltaEvent, ReasoningDeltaEvent } from '@/types/stream';
|
|
10
|
+
import type { TokenCounter, TokenBudgetBreakdown } from '@/types/run';
|
|
10
11
|
import type { Providers, Callback, GraphNodeKeys } from '@/common';
|
|
11
12
|
import type { StandardGraph, MultiAgentGraph } from '@/graphs';
|
|
12
13
|
import type { ClientOptions } from '@/types/llm';
|
|
13
|
-
import type { TokenCounter } from '@/types/run';
|
|
14
14
|
/** Interface for bound model with stream and invoke methods */
|
|
15
15
|
export interface ChatModel {
|
|
16
16
|
stream?: (messages: BaseMessage[], config?: RunnableConfig) => Promise<AsyncIterable<AIMessageChunk>>;
|
|
@@ -44,8 +44,31 @@ export interface AgentLogEvent {
|
|
|
44
44
|
runId?: string;
|
|
45
45
|
agentId?: string;
|
|
46
46
|
}
|
|
47
|
+
/**
|
|
48
|
+
* Per-model-call context window usage snapshot, dispatched after pruning and
|
|
49
|
+
* before the model invocation. Dispatched once per `callModel` invocation:
|
|
50
|
+
* fallback retries reuse the snapshot since the prompt is identical — budget
|
|
51
|
+
* numbers reflect the primary provider's tokenizer, and the calibration
|
|
52
|
+
* ratio self-corrects from whichever provider reports usage.
|
|
53
|
+
*/
|
|
54
|
+
export interface ContextUsageEvent {
|
|
55
|
+
runId?: string;
|
|
56
|
+
agentId?: string;
|
|
57
|
+
/** Structural token budget snapshot from AgentContext.getTokenBudgetBreakdown */
|
|
58
|
+
breakdown: TokenBudgetBreakdown;
|
|
59
|
+
/** Usable budget this call: maxContextTokens minus output reserve */
|
|
60
|
+
contextBudget?: number;
|
|
61
|
+
/** Calibrated instruction overhead actually applied this call */
|
|
62
|
+
effectiveInstructionTokens?: number;
|
|
63
|
+
/** Calibrated message tokens before pruning (excluding instructions) */
|
|
64
|
+
prePruneContextTokens?: number;
|
|
65
|
+
/** Tokens still free after instructions + pruned messages */
|
|
66
|
+
remainingContextTokens?: number;
|
|
67
|
+
/** EMA ratio of provider-reported vs locally estimated token counts */
|
|
68
|
+
calibrationRatio?: number;
|
|
69
|
+
}
|
|
47
70
|
export interface EventHandler {
|
|
48
|
-
handle(event: string, data: StreamEventData | ModelEndData | RunStep | RunStepDeltaEvent | MessageDeltaEvent | ReasoningDeltaEvent | SummarizeStartEvent | SummarizeDeltaEvent | SummarizeCompleteEvent | SubagentUpdateEvent | AgentLogEvent | ToolExecuteBatchRequest | {
|
|
71
|
+
handle(event: string, data: StreamEventData | ModelEndData | RunStep | RunStepDeltaEvent | MessageDeltaEvent | ReasoningDeltaEvent | SummarizeStartEvent | SummarizeDeltaEvent | SummarizeCompleteEvent | SubagentUpdateEvent | AgentLogEvent | ContextUsageEvent | ToolExecuteBatchRequest | {
|
|
49
72
|
result: ToolEndEvent;
|
|
50
73
|
}, metadata?: Record<string, unknown>, graph?: StandardGraph | MultiAgentGraph): void | Promise<void>;
|
|
51
74
|
}
|
|
@@ -199,6 +222,17 @@ export type StandardGraphInput = {
|
|
|
199
222
|
tokenCounter?: TokenCounter;
|
|
200
223
|
indexTokenCountMap?: Record<string, number>;
|
|
201
224
|
calibrationRatio?: number;
|
|
225
|
+
/**
|
|
226
|
+
* Receives a {@link SubagentUsageEvent} for every model call made inside
|
|
227
|
+
* a subagent child run spawned from this graph (including nested
|
|
228
|
+
* subagents and child-side summarization calls). Child graphs run via
|
|
229
|
+
* `invoke()` outside the host's `streamEvents` loop, so their
|
|
230
|
+
* `on_chat_model_end` events never reach the run's handler registry —
|
|
231
|
+
* this sink is the only way hosts can observe child token usage for
|
|
232
|
+
* billing/accounting. Parent-graph model calls are NOT reported here;
|
|
233
|
+
* they already flow through the registry's `CHAT_MODEL_END` handler.
|
|
234
|
+
*/
|
|
235
|
+
subagentUsageSink?: SubagentUsageSink;
|
|
202
236
|
};
|
|
203
237
|
export type GraphEdge = {
|
|
204
238
|
/** Agent ID, use a list for multiple sources */
|
|
@@ -289,6 +323,58 @@ export interface SubagentUpdateEvent {
|
|
|
289
323
|
/** ISO timestamp for ordering / display. */
|
|
290
324
|
timestamp: string;
|
|
291
325
|
}
|
|
326
|
+
/**
|
|
327
|
+
* Token usage for a single model call made inside a subagent child run.
|
|
328
|
+
* Emitted through {@link SubagentUsageSink} as each call completes, so
|
|
329
|
+
* hosts can bill child-run model usage that never reaches the parent
|
|
330
|
+
* run's `CHAT_MODEL_END` handler (child graphs execute via `invoke()`
|
|
331
|
+
* outside the host's `streamEvents` loop).
|
|
332
|
+
*/
|
|
333
|
+
export interface SubagentUsageEvent {
|
|
334
|
+
/** Usage metadata reported by the child's model call. */
|
|
335
|
+
usage: UsageMetadata;
|
|
336
|
+
/**
|
|
337
|
+
* Model that produced this usage. Per-call `ls_model_name` from the
|
|
338
|
+
* model's callback metadata when available (covers child-side
|
|
339
|
+
* summarization or any call that differs from the configured model),
|
|
340
|
+
* then the fallback-invocation's configured model (`INVOKED_MODEL`
|
|
341
|
+
* metadata), then the subagent config's `clientOptions` model.
|
|
342
|
+
*/
|
|
343
|
+
model?: string;
|
|
344
|
+
/**
|
|
345
|
+
* Provider that actually served this call — the SDK `Providers` enum
|
|
346
|
+
* value stamped per-invocation by `attemptInvoke` (`INVOKED_PROVIDER`
|
|
347
|
+
* metadata), so fallback-served calls are attributed to the fallback
|
|
348
|
+
* provider, not the configured primary. Falls back to the subagent
|
|
349
|
+
* config's provider. Never LangSmith's `ls_provider` string — derived
|
|
350
|
+
* providers inherit that from their base class, and hosts key
|
|
351
|
+
* pricing/cache semantics off the enum.
|
|
352
|
+
*/
|
|
353
|
+
provider?: string;
|
|
354
|
+
/** Subagent `type` identifier from the SubagentConfig. */
|
|
355
|
+
subagentType: string;
|
|
356
|
+
/** Child run ID (unique per subagent execution). */
|
|
357
|
+
subagentRunId: string;
|
|
358
|
+
/** Child agent ID assigned to this subagent execution. */
|
|
359
|
+
subagentAgentId: string;
|
|
360
|
+
/**
|
|
361
|
+
* ROOT run ID of the host run that owns billing. For nested subagents
|
|
362
|
+
* each forwarding layer rewrites this upward, so events from any depth
|
|
363
|
+
* surface with the outermost run's ID — never an intermediate
|
|
364
|
+
* `*_sub_*` child id (use {@link subagentRunId} to identify the
|
|
365
|
+
* emitting child).
|
|
366
|
+
*/
|
|
367
|
+
runId: string;
|
|
368
|
+
}
|
|
369
|
+
/**
|
|
370
|
+
* Host-provided callback receiving {@link SubagentUsageEvent}s. Invoked as
|
|
371
|
+
* each child model call completes. May return a promise — the executor
|
|
372
|
+
* awaits each dispatch (so all usage is recorded before the child's result
|
|
373
|
+
* resolves to the parent) and swallows both synchronous throws and
|
|
374
|
+
* rejections; implementations should still be cheap, as they sit on the
|
|
375
|
+
* child's model-call path.
|
|
376
|
+
*/
|
|
377
|
+
export type SubagentUsageSink = (event: SubagentUsageEvent) => void | Promise<void>;
|
|
292
378
|
export type LangfuseToolOutputTracingConfig = {
|
|
293
379
|
/**
|
|
294
380
|
* Whether tool outputs should be exported to Langfuse. Defaults to
|
|
@@ -111,6 +111,15 @@ export type RunConfig = {
|
|
|
111
111
|
*/
|
|
112
112
|
langfuse?: g.LangfuseConfig;
|
|
113
113
|
customHandlers?: Record<string, g.EventHandler>;
|
|
114
|
+
/**
|
|
115
|
+
* Receives token usage for every model call made inside subagent child
|
|
116
|
+
* runs (including nested subagents). Child graphs execute via `invoke()`
|
|
117
|
+
* outside this run's `streamEvents` loop, so their model-end events never
|
|
118
|
+
* reach `customHandlers` — without this sink, child usage is invisible to
|
|
119
|
+
* the host. Parent-graph calls are not reported here; they flow through
|
|
120
|
+
* the registered `CHAT_MODEL_END` handler as usual.
|
|
121
|
+
*/
|
|
122
|
+
subagentUsageSink?: g.SubagentUsageSink;
|
|
114
123
|
/**
|
|
115
124
|
* Pre-constructed hook registry for this run. Hooks fire at lifecycle
|
|
116
125
|
* points in `processStream` (RunStart, UserPromptSubmit, Stop,
|
|
@@ -225,6 +234,10 @@ export type TokenBudgetBreakdown = {
|
|
|
225
234
|
messageTokens: number;
|
|
226
235
|
/** Tokens available for messages after instructions. */
|
|
227
236
|
availableForMessages: number;
|
|
237
|
+
/** Per-tool schema token counts (post-multiplier), keyed by tool name. */
|
|
238
|
+
toolTokenCounts?: Record<string, number>;
|
|
239
|
+
/** Names of counted tools that are deferred (`defer_loading`) and discovered. */
|
|
240
|
+
deferredToolNames?: string[];
|
|
228
241
|
};
|
|
229
242
|
export type EventStreamOptions = {
|
|
230
243
|
callbacks?: g.ClientCallbacks;
|
|
@@ -382,6 +382,16 @@ export type ToolExecuteBatchRequest = {
|
|
|
382
382
|
resolve: (results: ToolExecuteResult[]) => void;
|
|
383
383
|
/** Promise rejector - handler calls this on fatal error */
|
|
384
384
|
reject: (error: Error) => void;
|
|
385
|
+
/**
|
|
386
|
+
* Optional per-call result channel. When present, the handler MAY invoke
|
|
387
|
+
* this as each tool call settles (before the final `resolve`) so the
|
|
388
|
+
* graph can emit that call's completion event without waiting for the
|
|
389
|
+
* slowest call in the batch. Purely an emission fast-path: the handler
|
|
390
|
+
* must still pass every result to `resolve`, which remains the
|
|
391
|
+
* authoritative batch outcome. Only provided when no post-tool hooks or
|
|
392
|
+
* human-in-the-loop flows could change a result after execution.
|
|
393
|
+
*/
|
|
394
|
+
onResult?: (result: ToolExecuteResult) => void;
|
|
385
395
|
};
|
|
386
396
|
/**
|
|
387
397
|
* A message injected into graph state by any tool execution handler.
|
|
@@ -15,6 +15,13 @@ export declare function estimateAnthropicImageTokens(width: number, height: numb
|
|
|
15
15
|
export declare function estimateOpenAIImageTokens(width: number, height: number, detail?: string): number;
|
|
16
16
|
export declare function encodingForModel(model: string): EncodingName;
|
|
17
17
|
export declare function getTokenCountForMessage(message: BaseMessage, getTokenCount: (text: string) => number, encoding?: EncodingName): number;
|
|
18
|
+
/**
|
|
19
|
+
* Largest-remainder apportionment: scales each count by `multiplier` and
|
|
20
|
+
* distributes the rounding remainder so the results sum exactly to
|
|
21
|
+
* `targetTotal`. Keeps per-item breakdowns reconciled with an aggregate
|
|
22
|
+
* computed as a single rounded product of the summed raw counts.
|
|
23
|
+
*/
|
|
24
|
+
export declare function apportionTokenCounts(rawCounts: Record<string, number>, multiplier: number, targetTotal: number): Record<string, number>;
|
|
18
25
|
/**
|
|
19
26
|
* Creates a token counter function using the specified encoding.
|
|
20
27
|
* Lazily loads the encoding data on first use via dynamic import.
|