@dvina/agents 0.14.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/eval/index.d.mts +58 -13
- package/dist/eval/index.d.ts +58 -13
- package/dist/eval/index.js +672 -29
- package/dist/eval/index.js.map +1 -1
- package/dist/eval/index.mjs +673 -30
- package/dist/eval/index.mjs.map +1 -1
- package/dist/index.d.mts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +75 -0
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +75 -0
- package/dist/index.mjs.map +1 -1
- package/dist/{model-resolver-DjKRXKtu.d.mts → model-resolver-DSJRvrqA.d.mts} +2 -5
- package/dist/{model-resolver-DjKRXKtu.d.ts → model-resolver-DSJRvrqA.d.ts} +2 -5
- package/package.json +1 -1
package/dist/eval/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/eval/index.ts","../../src/eval/config.ts","../../src/eval/suite.ts","../../src/eval/target.ts","../../src/runtime/langchain/model-resolver.ts","../../src/runtime/langchain/utils.ts","../../src/eval/expectations.ts","../../src/eval/evaluators/language.ts","../../src/eval/evaluators/response-content.ts","../../src/eval/evaluators/no-tool-calls.ts","../../src/eval/evaluators/any-tool-called.ts"],"sourcesContent":["// ── Configuration ───────────────────────────────────────────────────\nexport { configureEvals } from './config';\nexport type { EvalConfig, CreateTargetFn } from './config';\n\n// ── Suite API ────────────────────────────────────────────────────────\nexport { defineSuite, runEvals, human, ai, toolResult, fromToolSpecs } from './suite';\nexport type { SuiteConfig, TestCase, ToolDef } from './suite';\n\n// ── Expectations ─────────────────────────────────────────────────────\nexport { toolsCalled, llmJudge, noTools, anyToolCalled, respondsInLanguage, contains, notContains } from './expectations';\nexport type { Expectation } from './expectations';\n","import type { Agent, Message, ToolDefinition } from '../core/agent.interface';\nimport type { LangchainModelConfig } from '../runtime/langchain/model-resolver';\n\n/** Factory that creates a fresh Agent per test case. Receives the model string and extra suite-level tools. */\nexport type CreateTargetFn = (model: string, extraTools: ToolDefinition[]) => Agent | Promise<Agent>;\n\nexport interface EvalConfig {\n\t/** Required for model-based target and LLM evaluators (respondsInLanguage, llmJudge). */\n\tmodelConfig: LangchainModelConfig;\n\t/** Models to evaluate. Every registered suite is run once per model. */\n\tmodels: string[];\n\t/** Model for evaluators needing LLM calls (language detection, LLM-as-judge). */\n\tevaluatorModel: string;\n\t/** LangSmith experiment (dataset) name. All suites share this single experiment for easy comparison across runs. */\n\texperimentName: string;\n\t/** System prompt for model-based target. Ignored when createTarget is used. Can be overridden per-suite or per-case. */\n\tsystemPrompt?: string;\n\t/** Factory that creates a fresh Agent per test case. Receives the current model string from the models array. */\n\tcreateTarget?: CreateTargetFn;\n\t/** Transforms test case messages before sending to target. Simulates production preprocessing (e.g., message enrichment). */\n\tprepareMessages?: (messages: Message[]) => Message[] | Promise<Message[]>;\n}\n\nlet _config: EvalConfig | null = null;\n\nexport function configureEvals(config: EvalConfig): void {\n\t_config = config;\n}\n\nexport function getEvalConfig(): EvalConfig {\n\tif (!_config) {\n\t\tthrow new Error('Evals not configured. Call configureEvals() in your vitest setupFiles.');\n\t}\n\treturn _config;\n}\n\n","import * as ls from 'langsmith/vitest';\nimport { AIMessage, BaseMessage } from '@langchain/core/messages';\nimport { createEvalTarget, runAgentTarget, type MockToolDef } from './target';\nimport { type Expectation } from './expectations';\nimport { getEvalConfig, type CreateTargetFn } from './config';\nimport {\n\ttype Message,\n\ttype HumanMessage,\n\ttype AiMessage,\n\ttype ToolMessage,\n\ttype ToolSpec,\n} from '../core/agent.interface';\n\n// ── Message builders ─────────────────────────────────────────────────\n\nexport function human(content: string): HumanMessage {\n\treturn { role: 'human', content: [{ type: 'text', text: content }] };\n}\n\nexport function ai(content: string, toolCalls?: string[]): AiMessage {\n\treturn { role: 'ai', content, ...(toolCalls ? { toolCalls: toolCalls.map((name) => ({ name })) } : {}) };\n}\n\nexport function toolResult(name: string, output: string): ToolMessage {\n\treturn { role: 'tool', name, output };\n}\n\nexport interface ToolDef {\n\tdescription: string;\n\t/** A plain key→description record, or a ZodObject passed through from a ToolSpec. */\n\tschema?: Record<string, string> | import('zod').ZodObject<any>;\n\t/** Auto-stringified if not a string or function. */\n\tresponse: unknown | ((input: Record<string, unknown>, callCount: number) => string);\n}\n\nexport interface TestCase {\n\t/** Test name. Defaults to the last human message content if omitted. */\n\tname?: string;\n\tmessages: Message[];\n\tsystemPrompt?: string;\n\t/** Override suite-level tools for this case. */\n\ttools?: Record<string, ToolDef>;\n\t/** Transforms messages before sending to target. Overrides suite-level and global hooks. */\n\tprepareMessages?: (messages: Message[]) => Message[] | Promise<Message[]>;\n\texpect: Expectation[];\n}\n\ntype TargetFn = (inputs: {\n\tsystemPrompt?: string;\n\tmessages: Message[];\n\ttools: MockToolDef[];\n}) => Promise<{ messages: BaseMessage[] }>;\n\nexport interface SuiteConfig {\n\t/** Custom target function, or model string override. Auto-created from global config if omitted. */\n\ttarget?: TargetFn | string;\n\t/** Factory that creates a fresh Agent per test case. Overrides global createTarget. */\n\tcreateTarget?: CreateTargetFn;\n\t/** System prompt for all cases in this suite. Overrides the global prompt; can be overridden per-case. */\n\tsystemPrompt?: string;\n\ttools?: Record<string, ToolDef>;\n\t/** Transforms messages before sending to target. Overrides global hook; can be overridden per-case. */\n\tprepareMessages?: (messages: Message[]) => Message[] | Promise<Message[]>;\n\tcases: TestCase[];\n}\n\n// ── Helpers ──────────────────────────────────────────────────────────\n\n/**\n * Converts a `ToolSpec[]` (from a real tool provider) into the\n * `Record<string, ToolDef>` that `defineSuite` expects.\n *\n * `responses` maps tool names to canned mock responses. Tools without an\n * entry in `responses` default to `''`.\n */\nexport function fromToolSpecs(\n\tspecs: ToolSpec[],\n\tresponses: Record<string, ToolDef['response']> = {},\n): Record<string, ToolDef> {\n\treturn Object.fromEntries(\n\t\tspecs.map((spec) => [\n\t\t\tspec.name,\n\t\t\t{\n\t\t\t\tdescription: spec.description,\n\t\t\t\tschema: spec.inputSchema,\n\t\t\t\tresponse: responses[spec.name] ?? '',\n\t\t\t} satisfies ToolDef,\n\t\t]),\n\t);\n}\n\nfunction toMockTools(defs: Record<string, ToolDef>): MockToolDef[] {\n\treturn Object.entries(defs).map(([name, def]) => ({\n\t\tname,\n\t\tdescription: def.description,\n\t\tschema: def.schema ?? {},\n\t\tresponse:\n\t\t\ttypeof def.response === 'function'\n\t\t\t\t? (def.response as MockToolDef['response'])\n\t\t\t\t: typeof def.response === 'string'\n\t\t\t\t\t? def.response\n\t\t\t\t\t: JSON.stringify(def.response),\n\t}));\n}\n\nfunction lastHumanContent(messages: Message[]): string {\n\tfor (let i = messages.length - 1; i >= 0; i--) {\n\t\tconst msg = messages[i];\n\t\tif (msg.role === 'human') {\n\t\t\tconst textBlock = msg.content.find((c) => c.type === 'text');\n\t\t\treturn textBlock ? textBlock.text : '';\n\t\t}\n\t}\n\treturn '';\n}\n\nfunction resolveModelTarget(config: SuiteConfig, model: string): TargetFn {\n\tif (typeof config.target === 'function') return config.target;\n\tconst evalConfig = getEvalConfig();\n\tconst targetModel = typeof config.target === 'string' ? config.target : model;\n\treturn createEvalTarget(evalConfig.modelConfig, targetModel);\n}\n\nfunction resolveCreateTarget(config: SuiteConfig): CreateTargetFn | undefined {\n\treturn config.createTarget ?? getEvalConfig().createTarget;\n}\n\n// ── Suite registry ───────────────────────────────────────────────────\n\ninterface RegisteredSuite {\n\tname: string;\n\tconfig: SuiteConfig;\n}\n\nconst _suites: RegisteredSuite[] = [];\n\n/**\n * Registers an eval suite. Does not create tests on its own — call\n * `runEvals()` after all suites are registered to emit a single\n * LangSmith experiment containing every test case.\n */\nexport function defineSuite(name: string, config: SuiteConfig): void {\n\t_suites.push({ name, config });\n}\n\n/**\n * Emits all registered suites under a single `ls.describe` block so\n * every test case lands in one LangSmith experiment / dataset.\n *\n * Call this once, after importing all suite files.\n *\n * Individual suites are grouped with native `describe` blocks for\n * readability; test names are prefixed with the suite name\n * (e.g. \"discovery > should use search tool\").\n */\nexport function runEvals(): void {\n\tconst evalConfig = getEvalConfig();\n\n\tls.describe(evalConfig.experimentName, () => {\n\t\tfor (const currentModel of evalConfig.models) {\n\t\t\tfor (const { name: suiteName, config } of _suites) {\n\t\t\t\tconst suiteTools = config.tools ?? {};\n\t\t\t\tconst createTarget = config.target ? undefined : resolveCreateTarget(config);\n\n\t\t\t\tconst categoryLabel = suiteName.charAt(0).toUpperCase() + suiteName.slice(1);\n\t\t\t\tconst model = typeof config.target === 'string' ? config.target : currentModel;\n\n\t\t\t\tfor (const tc of config.cases) {\n\t\t\t\t\tconst testName = tc.name ?? lastHumanContent(tc.messages);\n\t\t\t\t\tconst caseToolDefs = tc.tools ?? suiteTools;\n\t\t\t\t\tconst tools = toMockTools(caseToolDefs);\n\t\t\t\t\tconst ctx = { message: lastHumanContent(tc.messages) };\n\n\t\t\t\t\tconst resolved = tc.expect.map((exp) => exp(ctx));\n\t\t\t\t\tconst evaluators = resolved.map((r) => r.evaluator);\n\t\t\t\t\tconst referenceOutputs = Object.assign({}, ...resolved.map((r) => r.referenceOutputs));\n\n\t\t\t\t\tconst fullTestName = `[${categoryLabel}] > ${testName}`;\n\n\t\t\t\t\tls.test(\n\t\t\t\t\t\t`${fullTestName} (${model})`,\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tinputs: {\n\t\t\t\t\t\t\t\tname: fullTestName,\n\t\t\t\t\t\t\t\tcategory: categoryLabel,\n\t\t\t\t\t\t\t\tmodel,\n\t\t\t\t\t\t\t\tmessages: tc.messages,\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\treferenceOutputs,\n\t\t\t\t\t\t},\n\t\t\t\t\t\tasync ({ referenceOutputs: refOut }) => {\n\t\t\t\t\t\t\tlet output: { messages: BaseMessage[] };\n\n\t\t\t\t\t\t\t// Resolution order: case > suite > global > identity\n\t\t\t\t\t\t\tconst prepareMessages =\n\t\t\t\t\t\t\t\ttc.prepareMessages ?? config.prepareMessages ?? getEvalConfig().prepareMessages;\n\t\t\t\t\t\t\tconst preparedMessages = prepareMessages ? await prepareMessages(tc.messages) : tc.messages;\n\n\t\t\t\t\t\t\tif (createTarget) {\n\t\t\t\t\t\t\t\toutput = await runAgentTarget(\n\t\t\t\t\t\t\t\t\tcreateTarget,\n\t\t\t\t\t\t\t\t\tcurrentModel,\n\t\t\t\t\t\t\t\t\tpreparedMessages,\n\t\t\t\t\t\t\t\t\tcaseToolDefs,\n\t\t\t\t\t\t\t\t);\n\t\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\t\tconst target = resolveModelTarget(config, currentModel);\n\t\t\t\t\t\t\t\tconst globalPrompt = getEvalConfig().systemPrompt;\n\t\t\t\t\t\t\t\tconst systemPrompt = tc.systemPrompt ?? config.systemPrompt ?? globalPrompt;\n\t\t\t\t\t\t\t\toutput = await target({\n\t\t\t\t\t\t\t\t\tmessages: preparedMessages,\n\t\t\t\t\t\t\t\t\ttools,\n\t\t\t\t\t\t\t\t\t...(systemPrompt ? { systemPrompt } : {}),\n\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\tconst calledTools = output.messages\n\t\t\t\t\t\t\t\t.filter((m) => m instanceof AIMessage)\n\t\t\t\t\t\t\t\t.flatMap((m) => (m as AIMessage).tool_calls ?? [])\n\t\t\t\t\t\t\t\t.map((tc) => tc.name);\n\n\t\t\t\t\t\t\tls.logOutputs({\n\t\t\t\t\t\t\t\ttools_called: calledTools.length > 0 ? calledTools.join(' | ') : 'none',\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\tfor (const evaluator of evaluators) {\n\t\t\t\t\t\t\t\tawait evaluator({ outputs: output, referenceOutputs: refOut ?? {} });\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t},\n\t\t\t\t\t);\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t});\n}\n","import { BaseChatModel } from '@langchain/core/language_models/chat_models';\nimport { tool } from '@langchain/core/tools';\nimport { AIMessage, BaseMessage, SystemMessage, ToolMessage } from '@langchain/core/messages';\nimport { z } from 'zod';\nimport { LangchainModelResolver, type LangchainModelConfig } from '../runtime/langchain/model-resolver';\nimport { type CreateTargetFn, getEvalConfig } from './config';\nimport type { AgentResult, Message as AgentMessage, ToolCallContentBlock, ToolDefinition } from '../core/agent.interface';\nimport { convertToLangchainMessages } from '../runtime/langchain/utils';\nimport type { ToolDef } from './suite';\n\nexport interface MockToolDef {\n\tname: string;\n\tdescription: string;\n\tschema: z.ZodObject<any> | Record<string, unknown>;\n\t/**\n\t * Canned response the mock tool returns.\n\t * Can be a static string, or a function that receives input and returns a response.\n\t * If a function is provided, it receives the full invocation count as a second arg\n\t * to support scenarios like \"first call fails, second call succeeds\".\n\t */\n\tresponse: string | ((input: Record<string, unknown>, callCount: number) => string);\n}\n\nexport interface EvalTargetInput {\n\tsystemPrompt?: string;\n\tmessages: AgentMessage[];\n\ttools: MockToolDef[];\n}\n\nconst MAX_AGENT_LOOPS = 10;\n\n/**\n * Strips reasoning/thinking content blocks from an AIMessage so it can be\n * safely replayed as conversation history.\n *\n * Reasoning blocks (OpenAI `reasoning`, Anthropic `thinking`) are\n * output-only artifacts. The APIs reject them when sent back as input,\n * producing errors like \"Item of type 'reasoning' was provided without its\n * required following item.\" Stripping them does not affect model quality\n * because reasoning happens fresh on every new invocation.\n */\nexport function stripReasoningBlocks(message: AIMessage): AIMessage {\n\tif (!Array.isArray(message.content)) return message;\n\n\tconst filtered = message.content.filter(\n\t\t(block: any) => block.type !== 'reasoning' && block.type !== 'thinking',\n\t);\n\n\t// If all content was reasoning, keep an empty string so the message\n\t// remains structurally valid\n\tconst newContent = filtered.length > 0 ? filtered : '';\n\n\treturn new AIMessage({\n\t\tcontent: newContent,\n\t\ttool_calls: message.tool_calls,\n\t\tid: message.id,\n\t\tresponse_metadata: message.response_metadata,\n\t\tusage_metadata: message.usage_metadata,\n\t});\n}\n\n/**\n * Creates a LangSmith-compatible target function that runs an agentic loop\n * with mock tools and returns the full message trajectory.\n */\nexport function createEvalTarget(modelConfig: LangchainModelConfig, modelString: string) {\n\treturn async (inputs: EvalTargetInput): Promise<{ messages: BaseMessage[] }> => {\n\t\tconst resolver = new LangchainModelResolver(modelConfig);\n\t\tconst model = resolver.resolve(modelString) as BaseChatModel;\n\n\t\t// Track invocation counts per tool for stateful mock responses\n\t\tconst toolCallCounts: Record<string, number> = {};\n\n\t\t// Create langchain tools from mock definitions\n\t\tconst langchainTools = inputs.tools.map((mockTool) => {\n\t\t\ttoolCallCounts[mockTool.name] = 0;\n\n\t\t\treturn tool(\n\t\t\t\tasync (toolInput: Record<string, unknown>) => {\n\t\t\t\t\ttoolCallCounts[mockTool.name]++;\n\t\t\t\t\tif (typeof mockTool.response === 'function') {\n\t\t\t\t\t\treturn mockTool.response(toolInput, toolCallCounts[mockTool.name]);\n\t\t\t\t\t}\n\t\t\t\t\treturn mockTool.response;\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\tname: mockTool.name,\n\t\t\t\t\tdescription: mockTool.description,\n\t\t\t\t\tschema:\n\t\t\t\t\t\tmockTool.schema instanceof z.ZodObject\n\t\t\t\t\t\t\t? mockTool.schema\n\t\t\t\t\t\t\t: z.object(\n\t\t\t\t\t\t\t\t\tObject.fromEntries(\n\t\t\t\t\t\t\t\t\t\tObject.entries(mockTool.schema).map(([key, val]) => {\n\t\t\t\t\t\t\t\t\t\t\tif (typeof val === 'string') return [key, z.string().describe(val)];\n\t\t\t\t\t\t\t\t\t\t\tif (typeof val === 'number') return [key, z.number().describe(String(val))];\n\t\t\t\t\t\t\t\t\t\t\treturn [key, z.any()];\n\t\t\t\t\t\t\t\t\t\t}),\n\t\t\t\t\t\t\t\t\t),\n\t\t\t\t\t\t\t\t),\n\t\t\t\t},\n\t\t\t);\n\t\t});\n\n\t\tconst boundModel = langchainTools.length > 0 ? model.bindTools!(langchainTools) : model;\n\n\t\tconst messages: BaseMessage[] = [];\n\n\t\tif (inputs.systemPrompt) {\n\t\t\tmessages.push(new SystemMessage(inputs.systemPrompt));\n\t\t}\n\n\t\t// Convert and push all messages (history + final human)\n\t\tmessages.push(...convertToLangchainMessages(inputs.messages));\n\n\t\t// Agentic loop: keep calling model until it stops making tool calls\n\t\tlet loopCount = 0;\n\t\twhile (loopCount < MAX_AGENT_LOOPS) {\n\t\t\tloopCount++;\n\n\t\t\tconst response = await boundModel.invoke(messages);\n\t\t\tmessages.push(stripReasoningBlocks(response as AIMessage) as BaseMessage);\n\n\t\t\tconst aiMessage = response as AIMessage;\n\t\t\tif (!aiMessage.tool_calls || aiMessage.tool_calls.length === 0) {\n\t\t\t\tbreak;\n\t\t\t}\n\n\t\t\t// Execute tool calls and add results\n\t\t\tfor (const tc of aiMessage.tool_calls) {\n\t\t\t\tconst mockTool = langchainTools.find((t) => t.name === tc.name);\n\t\t\t\tif (mockTool) {\n\t\t\t\t\tconst result = await mockTool.invoke(tc.args);\n\t\t\t\t\tmessages.push(\n\t\t\t\t\t\tnew ToolMessage({\n\t\t\t\t\t\t\tcontent: typeof result === 'string' ? result : JSON.stringify(result),\n\t\t\t\t\t\t\ttool_call_id: tc.id!,\n\t\t\t\t\t\t\tname: tc.name,\n\t\t\t\t\t\t}),\n\t\t\t\t\t);\n\t\t\t\t} else {\n\t\t\t\t\tmessages.push(\n\t\t\t\t\t\tnew ToolMessage({\n\t\t\t\t\t\t\tcontent: `Tool \"${tc.name}\" not found`,\n\t\t\t\t\t\t\ttool_call_id: tc.id!,\n\t\t\t\t\t\t\tname: tc.name,\n\t\t\t\t\t\t}),\n\t\t\t\t\t);\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\treturn { messages };\n\t};\n}\n\n// ── Agent-based target ──────────────────────────────────────────────\n\n/**\n * Converts an `AgentResult` (from `Agent.run()`) into LangChain `BaseMessage[]`\n * so existing evaluators (trajectory match, no-tool-calls, response-content, language) work unchanged.\n *\n * Consecutive `tool_call` content blocks are grouped into a single `AIMessage` with `tool_calls`,\n * followed by one `ToolMessage` per call.\n */\nexport function agentResultToMessages(inputMessages: AgentMessage[], result: AgentResult): BaseMessage[] {\n\t// Include input messages for trajectory context\n\tconst messages: BaseMessage[] = convertToLangchainMessages(inputMessages);\n\n\t// Group content blocks into BaseMessages\n\tlet pendingToolCalls: { id: string; name: string; args: Record<string, unknown>; output: string }[] = [];\n\n\tfor (const block of result.content) {\n\t\tif (block.type === 'tool_call') {\n\t\t\tconst tc = block as ToolCallContentBlock;\n\t\t\tpendingToolCalls.push({\n\t\t\t\tid: tc.toolCallId,\n\t\t\t\tname: tc.name,\n\t\t\t\targs: tc.input ? JSON.parse(tc.input) : {},\n\t\t\t\toutput: tc.output,\n\t\t\t});\n\t\t} else if (block.type === 'text') {\n\t\t\t// Flush any pending tool calls before the text block\n\t\t\tif (pendingToolCalls.length > 0) {\n\t\t\t\tmessages.push(\n\t\t\t\t\tnew AIMessage({\n\t\t\t\t\t\tcontent: '',\n\t\t\t\t\t\ttool_calls: pendingToolCalls.map((tc) => ({ id: tc.id, name: tc.name, args: tc.args })),\n\t\t\t\t\t}),\n\t\t\t\t);\n\t\t\t\tfor (const tc of pendingToolCalls) {\n\t\t\t\t\tmessages.push(new ToolMessage({ content: tc.output, tool_call_id: tc.id, name: tc.name }));\n\t\t\t\t}\n\t\t\t\tpendingToolCalls = [];\n\t\t\t}\n\t\t\tmessages.push(new AIMessage(block.output));\n\t\t}\n\t}\n\n\t// Flush remaining tool calls (agent ended mid-tool-use, unlikely but safe)\n\tif (pendingToolCalls.length > 0) {\n\t\tmessages.push(\n\t\t\tnew AIMessage({\n\t\t\t\tcontent: '',\n\t\t\t\ttool_calls: pendingToolCalls.map((tc) => ({ id: tc.id, name: tc.name, args: tc.args })),\n\t\t\t}),\n\t\t);\n\t\tfor (const tc of pendingToolCalls) {\n\t\t\tmessages.push(new ToolMessage({ content: tc.output, tool_call_id: tc.id, name: tc.name }));\n\t\t}\n\t}\n\n\treturn messages;\n}\n\n/**\n * Converts eval `Record<string, ToolDef>` into `ToolDefinition[]` with mock `exec` functions,\n * suitable for passing to an `AgentFactory.createAgent()` call.\n */\nexport function toolDefsToDefinitions(defs: Record<string, ToolDef>): ToolDefinition[] {\n\tconst callCounts: Record<string, number> = {};\n\n\treturn Object.entries(defs).map(([name, def]) => {\n\t\tcallCounts[name] = 0;\n\n\t\treturn {\n\t\t\tname,\n\t\t\ttoolKit: 'eval-mock',\n\t\t\tdescription: def.description,\n\t\t\tinputSchema:\n\t\t\t\tdef.schema instanceof z.ZodObject\n\t\t\t\t\t? def.schema\n\t\t\t\t\t: z.object(\n\t\t\t\t\t\t\tObject.fromEntries(\n\t\t\t\t\t\t\t\tObject.entries(def.schema ?? {}).map(([key, val]) => {\n\t\t\t\t\t\t\t\t\tif (typeof val === 'string') return [key, z.string().describe(val)];\n\t\t\t\t\t\t\t\t\treturn [key, z.any()];\n\t\t\t\t\t\t\t\t}),\n\t\t\t\t\t\t\t),\n\t\t\t\t\t\t),\n\t\t\texec: async (input: Record<string, unknown>) => {\n\t\t\t\tcallCounts[name]++;\n\t\t\t\tif (typeof def.response === 'function') {\n\t\t\t\t\treturn (def.response as (input: Record<string, unknown>, callCount: number) => string)(\n\t\t\t\t\t\tinput,\n\t\t\t\t\t\tcallCounts[name],\n\t\t\t\t\t);\n\t\t\t\t}\n\t\t\t\treturn typeof def.response === 'string' ? def.response : JSON.stringify(def.response);\n\t\t\t},\n\t\t} satisfies ToolDefinition;\n\t});\n}\n\n/**\n * Runs a real `Agent` as the eval target. Creates a fresh agent per invocation via the factory,\n * sends human messages, and converts the `AgentResult` to `{ messages: BaseMessage[] }`.\n */\nexport async function runAgentTarget(\n\tcreateTarget: CreateTargetFn,\n\tmodel: string,\n\tevalMessages: AgentMessage[],\n\textraToolDefs: Record<string, ToolDef>,\n): Promise<{ messages: BaseMessage[] }> {\n\tconst extraTools = Object.keys(extraToolDefs).length > 0 ? toolDefsToDefinitions(extraToolDefs) : [];\n\tconst agent = await createTarget(model, extraTools);\n\n\tconst result = await agent.run({\n\t\tthreadId: `eval_${Date.now()}_${Math.random().toString(36).slice(2)}`,\n\t\tmessages: evalMessages,\n\t});\n\n\treturn { messages: agentResultToMessages(evalMessages, result) };\n}\n","import { BaseLanguageModel } from '@langchain/core/language_models/base';\nimport { ChatAnthropic } from '@langchain/anthropic';\nimport { AzureChatOpenAI, ChatOpenAI } from '@langchain/openai';\nimport { ReasoningEffort } from 'openai/resources';\n\nexport type LangchainOpenAIConfig = {\n\tapiKey: string;\n};\n\nexport type AzureModelProvider = 'openai' | 'anthropic';\n\nexport type LangchainAzureResourceConfig = {\n\tapiKey: string;\n\tmodels: {\n\t\tmodel: string;\n\t\tprovider: AzureModelProvider;\n\t\tendpoint: string;\n\t\tapiVersion: string;\n\t\tdeploymentName: string;\n\t}[];\n};\n\nexport type ResourceName = string;\n\nexport type LangchainModelConfig = {\n\topenai?: Record<string, LangchainOpenAIConfig>;\n\tazure?: Record<ResourceName, LangchainAzureResourceConfig>;\n};\n\nexport class LangchainModelResolver {\n\tconstructor(private config: LangchainModelConfig) {}\n\n\tresolve(modelString: string, tags?: string[], reasoningEffort?: ReasoningEffort): BaseLanguageModel {\n\t\tconst parts = modelString.split(':');\n\n\t\tif (parts.length === 1) {\n\t\t\tconst fullModelString = this.resolveFullModelString(modelString);\n\t\t\treturn this.resolve(fullModelString, tags, reasoningEffort);\n\t\t}\n\n\t\tif (parts.length === 2) {\n\t\t\tconst [provider, modelName] = parts;\n\t\t\treturn this.resolveByProvider(provider, modelName, modelName, tags, reasoningEffort);\n\t\t}\n\n\t\tif (parts.length === 3) {\n\t\t\tconst [provider, configName, modelName] = parts;\n\t\t\treturn this.resolveByProvider(provider, configName, modelName, tags, reasoningEffort);\n\t\t}\n\n\t\tthrow new Error(\n\t\t\t'Model string must follow format \"modelName\", \"provider:modelName\", or \"provider:configName:modelName\"',\n\t\t);\n\t}\n\n\tprivate resolveFullModelString(modelName: string): string {\n\t\tfor (const [provider, resources] of Object.entries(this.config)) {\n\t\t\tif (provider === 'openai') {\n\t\t\t\tif (modelName in (resources as Record<string, unknown>)) {\n\t\t\t\t\treturn `openai:${modelName}`;\n\t\t\t\t}\n\t\t\t} else if (provider === 'azure') {\n\t\t\t\tfor (const [resource, config] of Object.entries(\n\t\t\t\t\tresources as Record<string, { models: { model: string }[] }>,\n\t\t\t\t)) {\n\t\t\t\t\tif (config.models.some((m) => m.model === modelName)) {\n\t\t\t\t\t\treturn `azure:${resource}:${modelName}`;\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tthrow new Error(`Model \"${modelName}\" not found in model config`);\n\t}\n\n\tprivate resolveByProvider(\n\t\tprovider: string,\n\t\tconfigName: string,\n\t\tmodelName: string,\n\t\ttags?: string[],\n\t\treasoningEffort?: ReasoningEffort,\n\t): BaseLanguageModel {\n\t\tswitch (provider) {\n\t\t\tcase 'openai':\n\t\t\t\treturn this.resolveOpenAI(configName, modelName, tags, reasoningEffort);\n\t\t\tcase 'azure':\n\t\t\t\treturn this.resolveAzure(configName, modelName, tags, reasoningEffort);\n\t\t\tdefault:\n\t\t\t\tthrow new Error(`Unsupported model provider: ${provider}`);\n\t\t}\n\t}\n\n\tprivate resolveOpenAI(\n\t\tconfigName: string,\n\t\tmodelName: string,\n\t\ttags?: string[],\n\t\treasoningEffort?: ReasoningEffort,\n\t): ChatOpenAI {\n\t\tconst providerConfig = this.config.openai?.[configName];\n\t\tif (!providerConfig) {\n\t\t\tthrow new Error(`Configuration \"${configName}\" for provider \"openai\" is missing`);\n\t\t}\n\n\t\treturn new ChatOpenAI({\n\t\t\tapiKey: providerConfig.apiKey,\n\t\t\tmodelName: modelName,\n\t\t\ttags: tags,\n\t\t\t...(reasoningEffort && {\n\t\t\t\treasoning: {\n\t\t\t\t\teffort: reasoningEffort,\n\t\t\t\t\tsummary: 'auto',\n\t\t\t\t},\n\t\t\t\tuseResponsesApi: true,\n\t\t\t}),\n\t\t});\n\t}\n\n\tprivate resolveAzure(\n\t\tresourceName: string,\n\t\tmodelName: string,\n\t\ttags?: string[],\n\t\treasoningEffort?: ReasoningEffort,\n\t): BaseLanguageModel {\n\t\tconst resource = this.config.azure?.[resourceName];\n\t\tif (!resource) {\n\t\t\tthrow new Error(`Resource \"${resourceName}\" for provider \"azure\" is missing`);\n\t\t}\n\n\t\tconst modelEntry = resource.models.find((m) => m.model === modelName);\n\t\tif (!modelEntry) {\n\t\t\tthrow new Error(`Model \"${modelName}\" not found in Azure resource \"${resourceName}\"`);\n\t\t}\n\n\t\tswitch (modelEntry.provider) {\n\t\t\tcase 'anthropic':\n\t\t\t\treturn this.resolveAzureAnthropic(resource, modelEntry, tags, reasoningEffort);\n\t\t\tcase 'openai':\n\t\t\t\treturn this.resolveAzureOpenAI(resource, modelEntry, tags, reasoningEffort);\n\t\t}\n\t}\n\n\tprivate resolveAzureOpenAI(\n\t\tresource: LangchainAzureResourceConfig,\n\t\tmodelEntry: LangchainAzureResourceConfig['models'][number],\n\t\ttags?: string[],\n\t\treasoningEffort?: ReasoningEffort,\n\t): AzureChatOpenAI {\n\t\t/**\n\t\t * OpenAI reasoning models require the Responses API which AzureChatOpenAI\n\t\t * does not support. We rewrite the endpoint to the Azure Responses API path.\n\t\t */\n\t\tconst endpoint = reasoningEffort\n\t\t\t? `${modelEntry.endpoint.replace(/\\/$/, '')}/openai/responses?api-version=${modelEntry.apiVersion}`\n\t\t\t: modelEntry.endpoint;\n\n\t\treturn new AzureChatOpenAI({\n\t\t\tmodel: modelEntry.model,\n\t\t\tazureOpenAIApiKey: resource.apiKey,\n\t\t\tazureOpenAIEndpoint: endpoint,\n\t\t\tazureOpenAIApiDeploymentName: modelEntry.deploymentName,\n\t\t\tazureOpenAIApiVersion: modelEntry.apiVersion,\n\t\t\ttags: tags,\n\t\t\t...(reasoningEffort && {\n\t\t\t\treasoning: {\n\t\t\t\t\teffort: reasoningEffort,\n\t\t\t\t\tsummary: 'auto',\n\t\t\t\t},\n\t\t\t}),\n\t\t});\n\t}\n\n\tprivate static readonly THINKING_BUDGET: Record<string, number> = {\n\t\tminimal: 1024,\n\t\tlow: 4096,\n\t\tmedium: 10000,\n\t\thigh: 16000,\n\t\txhigh: 32000,\n\t};\n\n\tprivate resolveAzureAnthropic(\n\t\tresource: LangchainAzureResourceConfig,\n\t\tmodelEntry: LangchainAzureResourceConfig['models'][number],\n\t\ttags?: string[],\n\t\treasoningEffort?: ReasoningEffort,\n\t): ChatAnthropic {\n\t\tconst budgetTokens = reasoningEffort ? LangchainModelResolver.THINKING_BUDGET[reasoningEffort] : undefined;\n\n\t\treturn new ChatAnthropic({\n\t\t\tmodel: modelEntry.model,\n\t\t\tapiKey: resource.apiKey,\n\t\t\tclientOptions: { baseURL: modelEntry.endpoint },\n\t\t\ttags: tags,\n\t\t\t...(budgetTokens && {\n\t\t\t\tmaxTokens: budgetTokens * 2,\n\t\t\t\tthinking: {\n\t\t\t\t\ttype: 'enabled',\n\t\t\t\t\tbudget_tokens: budgetTokens,\n\t\t\t\t},\n\t\t\t}),\n\t\t});\n\t}\n}\n","import { Message } from '@core/agent.interface';\nimport { AIMessage, BaseMessage, HumanMessage, ToolMessage } from 'langchain';\n\nexport function convertToLangchainMessages(messages: Message[]): BaseMessage[] {\n\tconst result: BaseMessage[] = [];\n\tlet tcIdx = 0;\n\tlet pendingToolCallIds: string[] = [];\n\n\tfor (const msg of messages) {\n\t\tif (msg.role === 'human') {\n\t\t\tresult.push(\n\t\t\t\tnew HumanMessage({\n\t\t\t\t\tcontent: msg.content.map((c) => {\n\t\t\t\t\t\tif (c.type === 'image') {\n\t\t\t\t\t\t\treturn { type: 'image_url', image_url: { url: c.url } };\n\t\t\t\t\t\t}\n\t\t\t\t\t\treturn c;\n\t\t\t\t\t}) as any,\n\t\t\t\t}),\n\t\t\t);\n\t\t} else if (msg.role === 'ai') {\n\t\t\tif (msg.toolCalls && msg.toolCalls.length > 0) {\n\t\t\t\tpendingToolCallIds = msg.toolCalls.map(() => `tc_${++tcIdx}`);\n\t\t\t\tresult.push(\n\t\t\t\t\tnew AIMessage({\n\t\t\t\t\t\tcontent: msg.content,\n\t\t\t\t\t\ttool_calls: msg.toolCalls.map((tc, i) => ({\n\t\t\t\t\t\t\tid: pendingToolCallIds[i],\n\t\t\t\t\t\t\tname: tc.name,\n\t\t\t\t\t\t\targs: tc.input ? JSON.parse(tc.input) : {},\n\t\t\t\t\t\t})),\n\t\t\t\t\t}),\n\t\t\t\t);\n\t\t\t} else {\n\t\t\t\tresult.push(new AIMessage(msg.content));\n\t\t\t}\n\t\t} else if (msg.role === 'tool') {\n\t\t\tconst toolCallId = pendingToolCallIds.shift();\n\t\t\tif (!toolCallId)\n\t\t\t\tthrow new Error(`ToolMessage for \"${msg.name}\" without a preceding AiMessage with toolCalls`);\n\t\t\tresult.push(\n\t\t\t\tnew ToolMessage({\n\t\t\t\t\tcontent: msg.output,\n\t\t\t\t\ttool_call_id: toolCallId,\n\t\t\t\t\tname: msg.name,\n\t\t\t\t}),\n\t\t\t);\n\t\t}\n\t}\n\n\treturn result;\n}\n","import * as ls from 'langsmith/vitest';\nimport { createTrajectoryMatchEvaluator, createTrajectoryLLMAsJudge, TRAJECTORY_ACCURACY_PROMPT } from 'agentevals';\nimport { createLanguageEvaluator } from './evaluators/language';\nimport { createResponseContentEvaluator } from './evaluators/response-content';\nimport { createNoToolCallsEvaluator } from './evaluators/no-tool-calls';\nimport { createAnyToolCalledEvaluator } from './evaluators/any-tool-called';\nimport { getEvalConfig } from './config';\n\n// ── Types ────────────────────────────────────────────────────────────\n\ntype EvaluatorFn = (args: { outputs: Record<string, any>; referenceOutputs: Record<string, any> }) => Promise<any>;\n\ninterface ResolvedExpectation {\n\tevaluator: EvaluatorFn;\n\treferenceOutputs: Record<string, unknown>;\n}\n\n/** A factory that receives test context and returns an evaluator + its referenceOutputs. */\nexport type Expectation = (ctx: { message: string }) => ResolvedExpectation;\n\n// ── Helpers ──────────────────────────────────────────────────────────\n\nfunction withTrajectoryGuard(evaluator: any, key: string): EvaluatorFn {\n\treturn async ({ outputs, referenceOutputs }) => {\n\t\tif (!referenceOutputs?.referenceTrajectory) {\n\t\t\treturn { key, score: true, comment: 'No referenceTrajectory specified, skipping' };\n\t\t}\n\t\treturn evaluator({ outputs, referenceOutputs: referenceOutputs.referenceTrajectory });\n\t};\n}\n\nfunction buildTrajectory(message: string, toolNames: string[]): Record<string, unknown>[] {\n\tconst trajectory: Record<string, unknown>[] = [];\n\tlet tcIdx = 0;\n\n\ttrajectory.push({ role: 'user', content: message });\n\n\tfor (const name of toolNames) {\n\t\tconst id = `tc${++tcIdx}`;\n\t\ttrajectory.push({\n\t\t\trole: 'assistant',\n\t\t\tcontent: '',\n\t\t\ttool_calls: [{ function: { name, arguments: '{}' }, id, type: 'function' }],\n\t\t});\n\t\ttrajectory.push({ role: 'tool', content: '...', tool_call_id: id });\n\t}\n\n\ttrajectory.push({ role: 'assistant', content: '...' });\n\n\treturn trajectory;\n}\n\n// ── Expectation functions ────────────────────────────────────────────\n\n/**\n * Expect the agent to call tools in order (superset trajectory match).\n * Empty `[]` means the agent should answer directly without calling any tools.\n */\nexport function toolsCalled(tools: string[]): Expectation {\n\treturn (ctx) => ({\n\t\tevaluator: ls.wrapEvaluator(\n\t\t\twithTrajectoryGuard(\n\t\t\t\tcreateTrajectoryMatchEvaluator({ trajectoryMatchMode: 'superset', toolArgsMatchMode: 'ignore' }) as any,\n\t\t\t\t'trajectory_match',\n\t\t\t),\n\t\t),\n\t\treferenceOutputs: { referenceTrajectory: buildTrajectory(ctx.message, tools) },\n\t});\n}\n\n/**\n * Run an LLM-as-judge evaluator on the trajectory.\n * Requires `toolsCalled` in the same expect array.\n * Uses the globally configured evaluator model.\n */\nexport function llmJudge(): Expectation {\n\treturn () => {\n\t\tconst config = getEvalConfig();\n\t\tconst model = config.evaluatorModel;\n\t\treturn {\n\t\t\tevaluator: ls.wrapEvaluator(\n\t\t\t\twithTrajectoryGuard(\n\t\t\t\t\tcreateTrajectoryLLMAsJudge({ prompt: TRAJECTORY_ACCURACY_PROMPT, model }) as any,\n\t\t\t\t\t'trajectory_llm_judge',\n\t\t\t\t),\n\t\t\t),\n\t\t\treferenceOutputs: {},\n\t\t};\n\t};\n}\n\n/**\n * Assert the agent made zero tool calls.\n * Optionally allow specific tools via `except` — calls to those tools\n * are permitted (but not required), while any other tool call fails.\n */\nexport function noTools(options?: { except: string[] }): Expectation {\n\treturn () => ({\n\t\tevaluator: ls.wrapEvaluator(createNoToolCallsEvaluator()),\n\t\treferenceOutputs: {\n\t\t\texpectNoToolCalls: true,\n\t\t\t...(options?.except?.length ? { exceptTools: options.except } : {}),\n\t\t},\n\t});\n}\n\n/**\n * Assert the response is in the given language (ISO 639-1 code).\n * Uses the globally configured evaluator model for language detection.\n * @param code - ISO 639-1 language code (e.g. 'en', 'tr', 'de').\n */\nexport function respondsInLanguage(code: string): Expectation {\n\treturn () => {\n\t\tconst config = getEvalConfig();\n\t\tconst model = config.evaluatorModel;\n\t\treturn {\n\t\t\tevaluator: ls.wrapEvaluator(createLanguageEvaluator(config.modelConfig, model)),\n\t\t\treferenceOutputs: { expectedLanguage: code },\n\t\t};\n\t};\n}\n\n/**\n * Assert that at least one tool call was made.\n * When `tools` is provided, at least one of those specific tools must\n * appear in the trajectory. When omitted, any tool call satisfies it.\n */\nexport function anyToolCalled(tools?: string[]): Expectation {\n\treturn () => ({\n\t\tevaluator: ls.wrapEvaluator(createAnyToolCalledEvaluator()),\n\t\treferenceOutputs: {\n\t\t\texpectAnyToolCall: true,\n\t\t\t...(tools?.length ? { anyToolsExpected: tools } : {}),\n\t\t},\n\t});\n}\n\n/** Assert the response contains all given strings. */\nexport function contains(strings: string[]): Expectation {\n\treturn () => ({\n\t\tevaluator: ls.wrapEvaluator(createResponseContentEvaluator()),\n\t\treferenceOutputs: { responseContains: strings },\n\t});\n}\n\n/** Assert the response does not contain any of the given strings. */\nexport function notContains(strings: string[]): Expectation {\n\treturn () => ({\n\t\tevaluator: ls.wrapEvaluator(createResponseContentEvaluator()),\n\t\treferenceOutputs: { responseMustNotContain: strings },\n\t});\n}\n","import { AIMessage, BaseMessage } from '@langchain/core/messages';\nimport { BaseChatModel } from '@langchain/core/language_models/chat_models';\nimport { LangchainModelResolver, type LangchainModelConfig } from '../../runtime/langchain/model-resolver';\n\n/**\n * Creates a custom evaluator that checks whether the agent's final response\n * is in the expected language. Uses a cheap LLM call for language detection.\n */\nexport function createLanguageEvaluator(modelConfig: LangchainModelConfig, model: string) {\n\tconst resolver = new LangchainModelResolver(modelConfig);\n\tconst judge = resolver.resolve(model) as BaseChatModel;\n\n\treturn async ({\n\t\toutputs,\n\t\treferenceOutputs,\n\t}: {\n\t\toutputs: Record<string, any>;\n\t\treferenceOutputs?: Record<string, any>;\n\t}) => {\n\t\tconst expectedLanguage = referenceOutputs?.expectedLanguage;\n\t\tif (!expectedLanguage) {\n\t\t\treturn { key: 'language_match', score: true, comment: 'No expected language specified, skipping' };\n\t\t}\n\n\t\t// Extract the last AI message text from the trajectory\n\t\tconst messages: BaseMessage[] = outputs.messages || [];\n\t\tconst lastAiMessage = [...messages].reverse().find((m) => m instanceof AIMessage);\n\n\t\tif (!lastAiMessage) {\n\t\t\treturn { key: 'language_match', score: false, comment: 'No AI message found in trajectory' };\n\t\t}\n\n\t\tconst responseText = typeof lastAiMessage.content === 'string' ? lastAiMessage.content : JSON.stringify(lastAiMessage.content);\n\n\t\t// Use LLM to detect the language\n\t\tconst detection = await judge.invoke([\n\t\t\t{\n\t\t\t\trole: 'system',\n\t\t\t\tcontent: 'You are a language detection tool. Respond with ONLY the ISO 639-1 language code (e.g., \"en\", \"tr\", \"de\", \"fr\") of the text provided. Nothing else.',\n\t\t\t},\n\t\t\t{\n\t\t\t\trole: 'user',\n\t\t\t\tcontent: responseText,\n\t\t\t},\n\t\t]);\n\n\t\tconst detectedLanguage = (typeof detection.content === 'string' ? detection.content : '').trim().toLowerCase();\n\n\t\tconst matches = detectedLanguage === expectedLanguage.toLowerCase();\n\n\t\treturn {\n\t\t\tkey: 'language_match',\n\t\t\tscore: matches,\n\t\t\tcomment: matches\n\t\t\t\t? `Response language matches expected: ${expectedLanguage}`\n\t\t\t\t: `Expected \"${expectedLanguage}\" but detected \"${detectedLanguage}\"`,\n\t\t};\n\t};\n}\n","import { AIMessage, BaseMessage } from '@langchain/core/messages';\n\n/**\n * Creates a custom evaluator that checks whether the agent's final response\n * contains expected strings and doesn't contain forbidden strings.\n */\nexport function createResponseContentEvaluator() {\n\treturn async ({\n\t\toutputs,\n\t\treferenceOutputs,\n\t}: {\n\t\toutputs: Record<string, any>;\n\t\treferenceOutputs?: Record<string, any>;\n\t}) => {\n\t\tconst mustContain: string[] = referenceOutputs?.responseContains || [];\n\t\tconst mustNotContain: string[] = referenceOutputs?.responseMustNotContain || [];\n\n\t\tif (mustContain.length === 0 && mustNotContain.length === 0) {\n\t\t\treturn { key: 'response_content', score: true, comment: 'No content assertions specified, skipping' };\n\t\t}\n\n\t\t// Extract the last AI message text from the trajectory\n\t\tconst messages: BaseMessage[] = outputs.messages || [];\n\t\tconst lastAiMessage = [...messages].reverse().find((m) => m instanceof AIMessage);\n\n\t\tif (!lastAiMessage) {\n\t\t\treturn { key: 'response_content', score: false, comment: 'No AI message found in trajectory' };\n\t\t}\n\n\t\tconst responseText = (typeof lastAiMessage.content === 'string' ? lastAiMessage.content : JSON.stringify(lastAiMessage.content)).toLowerCase();\n\n\t\tconst failures: string[] = [];\n\n\t\tfor (const expected of mustContain) {\n\t\t\tif (!responseText.includes(expected.toLowerCase())) {\n\t\t\t\tfailures.push(`Missing expected text: \"${expected}\"`);\n\t\t\t}\n\t\t}\n\n\t\tfor (const forbidden of mustNotContain) {\n\t\t\tif (responseText.includes(forbidden.toLowerCase())) {\n\t\t\t\tfailures.push(`Contains forbidden text: \"${forbidden}\"`);\n\t\t\t}\n\t\t}\n\n\t\tconst passed = failures.length === 0;\n\n\t\treturn {\n\t\t\tkey: 'response_content',\n\t\t\tscore: passed,\n\t\t\tcomment: passed ? 'All content assertions passed' : failures.join('; '),\n\t\t};\n\t};\n}\n","import { AIMessage, BaseMessage } from '@langchain/core/messages';\n\n/**\n * Creates a custom evaluator that asserts the agent made zero tool calls.\n * When `referenceOutputs.exceptTools` is set, calls to those tools are\n * allowed (but not required) — only calls to non-excepted tools cause failure.\n */\nexport function createNoToolCallsEvaluator() {\n\treturn async ({\n\t\toutputs,\n\t\treferenceOutputs,\n\t}: {\n\t\toutputs: Record<string, any>;\n\t\treferenceOutputs?: Record<string, any>;\n\t}) => {\n\t\t// Only run this evaluator if the reference explicitly expects no tool calls\n\t\tif (referenceOutputs?.maxToolCalls !== 0 && referenceOutputs?.expectNoToolCalls !== true) {\n\t\t\treturn { key: 'no_tool_calls', score: true, comment: 'No tool call restriction specified, skipping' };\n\t\t}\n\n\t\tconst messages: BaseMessage[] = outputs.messages || [];\n\t\tconst exceptTools: string[] = referenceOutputs?.exceptTools ?? [];\n\n\t\tconst toolCalls = messages\n\t\t\t.filter((m) => m instanceof AIMessage)\n\t\t\t.flatMap((m) => (m as AIMessage).tool_calls || []);\n\n\t\tconst disallowedCalls = exceptTools.length > 0 ? toolCalls.filter((tc) => !exceptTools.includes(tc.name)) : toolCalls;\n\n\t\tconst passed = disallowedCalls.length === 0;\n\n\t\tif (exceptTools.length > 0) {\n\t\t\treturn {\n\t\t\t\tkey: 'no_tool_calls',\n\t\t\t\tscore: passed,\n\t\t\t\tcomment: passed\n\t\t\t\t\t? `No disallowed tool calls made (allowed: ${exceptTools.join(', ')})`\n\t\t\t\t\t: `Agent made ${disallowedCalls.length} disallowed tool call(s): ${disallowedCalls.map((tc) => tc.name).join(', ')}`,\n\t\t\t};\n\t\t}\n\n\t\treturn {\n\t\t\tkey: 'no_tool_calls',\n\t\t\tscore: passed,\n\t\t\tcomment: passed\n\t\t\t\t? 'No tool calls made (as expected)'\n\t\t\t\t: `Agent made ${toolCalls.length} tool call(s): ${toolCalls.map((tc) => tc.name).join(', ')}`,\n\t\t};\n\t};\n}\n","import { AIMessage, BaseMessage } from '@langchain/core/messages';\n\n/**\n * Creates a custom evaluator that asserts at least one tool was called.\n * When `referenceOutputs.anyToolsExpected` contains tool names, at least\n * one of those specific tools must appear. When the list is empty, any\n * tool call satisfies the expectation.\n */\nexport function createAnyToolCalledEvaluator() {\n\treturn async ({\n\t\toutputs,\n\t\treferenceOutputs,\n\t}: {\n\t\toutputs: Record<string, any>;\n\t\treferenceOutputs?: Record<string, any>;\n\t}) => {\n\t\tif (referenceOutputs?.expectAnyToolCall !== true) {\n\t\t\treturn { key: 'any_tool_called', score: true, comment: 'No any-tool-call expectation specified, skipping' };\n\t\t}\n\n\t\tconst expectedTools: string[] = referenceOutputs?.anyToolsExpected ?? [];\n\t\tconst messages: BaseMessage[] = outputs.messages || [];\n\n\t\tconst calledToolNames = messages\n\t\t\t.filter((m) => m instanceof AIMessage)\n\t\t\t.flatMap((m) => (m as AIMessage).tool_calls || [])\n\t\t\t.map((tc) => tc.name);\n\n\t\t// No specific tools requested — any tool call satisfies the expectation\n\t\tif (expectedTools.length === 0) {\n\t\t\tconst passed = calledToolNames.length > 0;\n\t\t\treturn {\n\t\t\t\tkey: 'any_tool_called',\n\t\t\t\tscore: passed,\n\t\t\t\tcomment: passed\n\t\t\t\t\t? `Agent called tool(s): ${calledToolNames.join(', ')}`\n\t\t\t\t\t: 'Agent made no tool calls (expected at least one)',\n\t\t\t};\n\t\t}\n\n\t\tconst matchedTools = expectedTools.filter((name) => calledToolNames.includes(name));\n\t\tconst passed = matchedTools.length > 0;\n\n\t\treturn {\n\t\t\tkey: 'any_tool_called',\n\t\t\tscore: passed,\n\t\t\tcomment: passed\n\t\t\t\t? `Called expected tool(s): ${matchedTools.join(', ')}`\n\t\t\t\t: `None of the expected tools were called (expected one of: ${expectedTools.join(', ')}; actual: ${calledToolNames.length > 0 ? calledToolNames.join(', ') : 'none'})`,\n\t\t};\n\t};\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACuBA,IAAI,UAA6B;AAE1B,SAAS,eAAe,QAA0B;AACxD,YAAU;AACX;AAEO,SAAS,gBAA4B;AAC3C,MAAI,CAAC,SAAS;AACb,UAAM,IAAI,MAAM,wEAAwE;AAAA,EACzF;AACA,SAAO;AACR;;;AClCA,SAAoB;AACpB,IAAAA,mBAAuC;;;ACAvC,mBAAqB;AACrB,sBAAmE;AACnE,iBAAkB;;;ACFlB,uBAA8B;AAC9B,oBAA4C;AA2BrC,IAAM,yBAAN,MAAM,wBAAuB;AAAA,EACnC,YAAoB,QAA8B;AAA9B;AAAA,EAA+B;AAAA,EAEnD,QAAQ,aAAqB,MAAiB,iBAAsD;AACnG,UAAM,QAAQ,YAAY,MAAM,GAAG;AAEnC,QAAI,MAAM,WAAW,GAAG;AACvB,YAAM,kBAAkB,KAAK,uBAAuB,WAAW;AAC/D,aAAO,KAAK,QAAQ,iBAAiB,MAAM,eAAe;AAAA,IAC3D;AAEA,QAAI,MAAM,WAAW,GAAG;AACvB,YAAM,CAAC,UAAU,SAAS,IAAI;AAC9B,aAAO,KAAK,kBAAkB,UAAU,WAAW,WAAW,MAAM,eAAe;AAAA,IACpF;AAEA,QAAI,MAAM,WAAW,GAAG;AACvB,YAAM,CAAC,UAAU,YAAY,SAAS,IAAI;AAC1C,aAAO,KAAK,kBAAkB,UAAU,YAAY,WAAW,MAAM,eAAe;AAAA,IACrF;AAEA,UAAM,IAAI;AAAA,MACT;AAAA,IACD;AAAA,EACD;AAAA,EAEQ,uBAAuB,WAA2B;AACzD,eAAW,CAAC,UAAU,SAAS,KAAK,OAAO,QAAQ,KAAK,MAAM,GAAG;AAChE,UAAI,aAAa,UAAU;AAC1B,YAAI,aAAc,WAAuC;AACxD,iBAAO,UAAU,SAAS;AAAA,QAC3B;AAAA,MACD,WAAW,aAAa,SAAS;AAChC,mBAAW,CAAC,UAAU,MAAM,KAAK,OAAO;AAAA,UACvC;AAAA,QACD,GAAG;AACF,cAAI,OAAO,OAAO,KAAK,CAAC,MAAM,EAAE,UAAU,SAAS,GAAG;AACrD,mBAAO,SAAS,QAAQ,IAAI,SAAS;AAAA,UACtC;AAAA,QACD;AAAA,MACD;AAAA,IACD;AAEA,UAAM,IAAI,MAAM,UAAU,SAAS,6BAA6B;AAAA,EACjE;AAAA,EAEQ,kBACP,UACA,YACA,WACA,MACA,iBACoB;AACpB,YAAQ,UAAU;AAAA,MACjB,KAAK;AACJ,eAAO,KAAK,cAAc,YAAY,WAAW,MAAM,eAAe;AAAA,MACvE,KAAK;AACJ,eAAO,KAAK,aAAa,YAAY,WAAW,MAAM,eAAe;AAAA,MACtE;AACC,cAAM,IAAI,MAAM,+BAA+B,QAAQ,EAAE;AAAA,IAC3D;AAAA,EACD;AAAA,EAEQ,cACP,YACA,WACA,MACA,iBACa;AACb,UAAM,iBAAiB,KAAK,OAAO,SAAS,UAAU;AACtD,QAAI,CAAC,gBAAgB;AACpB,YAAM,IAAI,MAAM,kBAAkB,UAAU,oCAAoC;AAAA,IACjF;AAEA,WAAO,IAAI,yBAAW;AAAA,MACrB,QAAQ,eAAe;AAAA,MACvB;AAAA,MACA;AAAA,MACA,GAAI,mBAAmB;AAAA,QACtB,WAAW;AAAA,UACV,QAAQ;AAAA,UACR,SAAS;AAAA,QACV;AAAA,QACA,iBAAiB;AAAA,MAClB;AAAA,IACD,CAAC;AAAA,EACF;AAAA,EAEQ,aACP,cACA,WACA,MACA,iBACoB;AACpB,UAAM,WAAW,KAAK,OAAO,QAAQ,YAAY;AACjD,QAAI,CAAC,UAAU;AACd,YAAM,IAAI,MAAM,aAAa,YAAY,mCAAmC;AAAA,IAC7E;AAEA,UAAM,aAAa,SAAS,OAAO,KAAK,CAAC,MAAM,EAAE,UAAU,SAAS;AACpE,QAAI,CAAC,YAAY;AAChB,YAAM,IAAI,MAAM,UAAU,SAAS,kCAAkC,YAAY,GAAG;AAAA,IACrF;AAEA,YAAQ,WAAW,UAAU;AAAA,MAC5B,KAAK;AACJ,eAAO,KAAK,sBAAsB,UAAU,YAAY,MAAM,eAAe;AAAA,MAC9E,KAAK;AACJ,eAAO,KAAK,mBAAmB,UAAU,YAAY,MAAM,eAAe;AAAA,IAC5E;AAAA,EACD;AAAA,EAEQ,mBACP,UACA,YACA,MACA,iBACkB;AAKlB,UAAM,WAAW,kBACd,GAAG,WAAW,SAAS,QAAQ,OAAO,EAAE,CAAC,iCAAiC,WAAW,UAAU,KAC/F,WAAW;AAEd,WAAO,IAAI,8BAAgB;AAAA,MAC1B,OAAO,WAAW;AAAA,MAClB,mBAAmB,SAAS;AAAA,MAC5B,qBAAqB;AAAA,MACrB,8BAA8B,WAAW;AAAA,MACzC,uBAAuB,WAAW;AAAA,MAClC;AAAA,MACA,GAAI,mBAAmB;AAAA,QACtB,WAAW;AAAA,UACV,QAAQ;AAAA,UACR,SAAS;AAAA,QACV;AAAA,MACD;AAAA,IACD,CAAC;AAAA,EACF;AAAA,EAEA,OAAwB,kBAA0C;AAAA,IACjE,SAAS;AAAA,IACT,KAAK;AAAA,IACL,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,OAAO;AAAA,EACR;AAAA,EAEQ,sBACP,UACA,YACA,MACA,iBACgB;AAChB,UAAM,eAAe,kBAAkB,wBAAuB,gBAAgB,eAAe,IAAI;AAEjG,WAAO,IAAI,+BAAc;AAAA,MACxB,OAAO,WAAW;AAAA,MAClB,QAAQ,SAAS;AAAA,MACjB,eAAe,EAAE,SAAS,WAAW,SAAS;AAAA,MAC9C;AAAA,MACA,GAAI,gBAAgB;AAAA,QACnB,WAAW,eAAe;AAAA,QAC1B,UAAU;AAAA,UACT,MAAM;AAAA,UACN,eAAe;AAAA,QAChB;AAAA,MACD;AAAA,IACD,CAAC;AAAA,EACF;AACD;;;ACxMA,uBAAkE;AAE3D,SAAS,2BAA2B,UAAoC;AAC9E,QAAM,SAAwB,CAAC;AAC/B,MAAI,QAAQ;AACZ,MAAI,qBAA+B,CAAC;AAEpC,aAAW,OAAO,UAAU;AAC3B,QAAI,IAAI,SAAS,SAAS;AACzB,aAAO;AAAA,QACN,IAAI,8BAAa;AAAA,UAChB,SAAS,IAAI,QAAQ,IAAI,CAAC,MAAM;AAC/B,gBAAI,EAAE,SAAS,SAAS;AACvB,qBAAO,EAAE,MAAM,aAAa,WAAW,EAAE,KAAK,EAAE,IAAI,EAAE;AAAA,YACvD;AACA,mBAAO;AAAA,UACR,CAAC;AAAA,QACF,CAAC;AAAA,MACF;AAAA,IACD,WAAW,IAAI,SAAS,MAAM;AAC7B,UAAI,IAAI,aAAa,IAAI,UAAU,SAAS,GAAG;AAC9C,6BAAqB,IAAI,UAAU,IAAI,MAAM,MAAM,EAAE,KAAK,EAAE;AAC5D,eAAO;AAAA,UACN,IAAI,2BAAU;AAAA,YACb,SAAS,IAAI;AAAA,YACb,YAAY,IAAI,UAAU,IAAI,CAAC,IAAI,OAAO;AAAA,cACzC,IAAI,mBAAmB,CAAC;AAAA,cACxB,MAAM,GAAG;AAAA,cACT,MAAM,GAAG,QAAQ,KAAK,MAAM,GAAG,KAAK,IAAI,CAAC;AAAA,YAC1C,EAAE;AAAA,UACH,CAAC;AAAA,QACF;AAAA,MACD,OAAO;AACN,eAAO,KAAK,IAAI,2BAAU,IAAI,OAAO,CAAC;AAAA,MACvC;AAAA,IACD,WAAW,IAAI,SAAS,QAAQ;AAC/B,YAAM,aAAa,mBAAmB,MAAM;AAC5C,UAAI,CAAC;AACJ,cAAM,IAAI,MAAM,oBAAoB,IAAI,IAAI,gDAAgD;AAC7F,aAAO;AAAA,QACN,IAAI,6BAAY;AAAA,UACf,SAAS,IAAI;AAAA,UACb,cAAc;AAAA,UACd,MAAM,IAAI;AAAA,QACX,CAAC;AAAA,MACF;AAAA,IACD;AAAA,EACD;AAEA,SAAO;AACR;;;AFtBA,IAAM,kBAAkB;AAYjB,SAAS,qBAAqB,SAA+B;AACnE,MAAI,CAAC,MAAM,QAAQ,QAAQ,OAAO,EAAG,QAAO;AAE5C,QAAM,WAAW,QAAQ,QAAQ;AAAA,IAChC,CAAC,UAAe,MAAM,SAAS,eAAe,MAAM,SAAS;AAAA,EAC9D;AAIA,QAAM,aAAa,SAAS,SAAS,IAAI,WAAW;AAEpD,SAAO,IAAI,0BAAU;AAAA,IACpB,SAAS;AAAA,IACT,YAAY,QAAQ;AAAA,IACpB,IAAI,QAAQ;AAAA,IACZ,mBAAmB,QAAQ;AAAA,IAC3B,gBAAgB,QAAQ;AAAA,EACzB,CAAC;AACF;AAMO,SAAS,iBAAiB,aAAmC,aAAqB;AACxF,SAAO,OAAO,WAAkE;AAC/E,UAAM,WAAW,IAAI,uBAAuB,WAAW;AACvD,UAAM,QAAQ,SAAS,QAAQ,WAAW;AAG1C,UAAM,iBAAyC,CAAC;AAGhD,UAAM,iBAAiB,OAAO,MAAM,IAAI,CAAC,aAAa;AACrD,qBAAe,SAAS,IAAI,IAAI;AAEhC,iBAAO;AAAA,QACN,OAAO,cAAuC;AAC7C,yBAAe,SAAS,IAAI;AAC5B,cAAI,OAAO,SAAS,aAAa,YAAY;AAC5C,mBAAO,SAAS,SAAS,WAAW,eAAe,SAAS,IAAI,CAAC;AAAA,UAClE;AACA,iBAAO,SAAS;AAAA,QACjB;AAAA,QACA;AAAA,UACC,MAAM,SAAS;AAAA,UACf,aAAa,SAAS;AAAA,UACtB,QACC,SAAS,kBAAkB,aAAE,YAC1B,SAAS,SACT,aAAE;AAAA,YACF,OAAO;AAAA,cACN,OAAO,QAAQ,SAAS,MAAM,EAAE,IAAI,CAAC,CAAC,KAAK,GAAG,MAAM;AACnD,oBAAI,OAAO,QAAQ,SAAU,QAAO,CAAC,KAAK,aAAE,OAAO,EAAE,SAAS,GAAG,CAAC;AAClE,oBAAI,OAAO,QAAQ,SAAU,QAAO,CAAC,KAAK,aAAE,OAAO,EAAE,SAAS,OAAO,GAAG,CAAC,CAAC;AAC1E,uBAAO,CAAC,KAAK,aAAE,IAAI,CAAC;AAAA,cACrB,CAAC;AAAA,YACF;AAAA,UACD;AAAA,QACJ;AAAA,MACD;AAAA,IACD,CAAC;AAED,UAAM,aAAa,eAAe,SAAS,IAAI,MAAM,UAAW,cAAc,IAAI;AAElF,UAAM,WAA0B,CAAC;AAEjC,QAAI,OAAO,cAAc;AACxB,eAAS,KAAK,IAAI,8BAAc,OAAO,YAAY,CAAC;AAAA,IACrD;AAGA,aAAS,KAAK,GAAG,2BAA2B,OAAO,QAAQ,CAAC;AAG5D,QAAI,YAAY;AAChB,WAAO,YAAY,iBAAiB;AACnC;AAEA,YAAM,WAAW,MAAM,WAAW,OAAO,QAAQ;AACjD,eAAS,KAAK,qBAAqB,QAAqB,CAAgB;AAExE,YAAM,YAAY;AAClB,UAAI,CAAC,UAAU,cAAc,UAAU,WAAW,WAAW,GAAG;AAC/D;AAAA,MACD;AAGA,iBAAW,MAAM,UAAU,YAAY;AACtC,cAAM,WAAW,eAAe,KAAK,CAAC,MAAM,EAAE,SAAS,GAAG,IAAI;AAC9D,YAAI,UAAU;AACb,gBAAM,SAAS,MAAM,SAAS,OAAO,GAAG,IAAI;AAC5C,mBAAS;AAAA,YACR,IAAI,4BAAY;AAAA,cACf,SAAS,OAAO,WAAW,WAAW,SAAS,KAAK,UAAU,MAAM;AAAA,cACpE,cAAc,GAAG;AAAA,cACjB,MAAM,GAAG;AAAA,YACV,CAAC;AAAA,UACF;AAAA,QACD,OAAO;AACN,mBAAS;AAAA,YACR,IAAI,4BAAY;AAAA,cACf,SAAS,SAAS,GAAG,IAAI;AAAA,cACzB,cAAc,GAAG;AAAA,cACjB,MAAM,GAAG;AAAA,YACV,CAAC;AAAA,UACF;AAAA,QACD;AAAA,MACD;AAAA,IACD;AAEA,WAAO,EAAE,SAAS;AAAA,EACnB;AACD;AAWO,SAAS,sBAAsB,eAA+B,QAAoC;AAExG,QAAM,WAA0B,2BAA2B,aAAa;AAGxE,MAAI,mBAAkG,CAAC;AAEvG,aAAW,SAAS,OAAO,SAAS;AACnC,QAAI,MAAM,SAAS,aAAa;AAC/B,YAAM,KAAK;AACX,uBAAiB,KAAK;AAAA,QACrB,IAAI,GAAG;AAAA,QACP,MAAM,GAAG;AAAA,QACT,MAAM,GAAG,QAAQ,KAAK,MAAM,GAAG,KAAK,IAAI,CAAC;AAAA,QACzC,QAAQ,GAAG;AAAA,MACZ,CAAC;AAAA,IACF,WAAW,MAAM,SAAS,QAAQ;AAEjC,UAAI,iBAAiB,SAAS,GAAG;AAChC,iBAAS;AAAA,UACR,IAAI,0BAAU;AAAA,YACb,SAAS;AAAA,YACT,YAAY,iBAAiB,IAAI,CAAC,QAAQ,EAAE,IAAI,GAAG,IAAI,MAAM,GAAG,MAAM,MAAM,GAAG,KAAK,EAAE;AAAA,UACvF,CAAC;AAAA,QACF;AACA,mBAAW,MAAM,kBAAkB;AAClC,mBAAS,KAAK,IAAI,4BAAY,EAAE,SAAS,GAAG,QAAQ,cAAc,GAAG,IAAI,MAAM,GAAG,KAAK,CAAC,CAAC;AAAA,QAC1F;AACA,2BAAmB,CAAC;AAAA,MACrB;AACA,eAAS,KAAK,IAAI,0BAAU,MAAM,MAAM,CAAC;AAAA,IAC1C;AAAA,EACD;AAGA,MAAI,iBAAiB,SAAS,GAAG;AAChC,aAAS;AAAA,MACR,IAAI,0BAAU;AAAA,QACb,SAAS;AAAA,QACT,YAAY,iBAAiB,IAAI,CAAC,QAAQ,EAAE,IAAI,GAAG,IAAI,MAAM,GAAG,MAAM,MAAM,GAAG,KAAK,EAAE;AAAA,MACvF,CAAC;AAAA,IACF;AACA,eAAW,MAAM,kBAAkB;AAClC,eAAS,KAAK,IAAI,4BAAY,EAAE,SAAS,GAAG,QAAQ,cAAc,GAAG,IAAI,MAAM,GAAG,KAAK,CAAC,CAAC;AAAA,IAC1F;AAAA,EACD;AAEA,SAAO;AACR;AAMO,SAAS,sBAAsB,MAAiD;AACtF,QAAM,aAAqC,CAAC;AAE5C,SAAO,OAAO,QAAQ,IAAI,EAAE,IAAI,CAAC,CAAC,MAAM,GAAG,MAAM;AAChD,eAAW,IAAI,IAAI;AAEnB,WAAO;AAAA,MACN;AAAA,MACA,SAAS;AAAA,MACT,aAAa,IAAI;AAAA,MACjB,aACC,IAAI,kBAAkB,aAAE,YACrB,IAAI,SACJ,aAAE;AAAA,QACF,OAAO;AAAA,UACN,OAAO,QAAQ,IAAI,UAAU,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,KAAK,GAAG,MAAM;AACpD,gBAAI,OAAO,QAAQ,SAAU,QAAO,CAAC,KAAK,aAAE,OAAO,EAAE,SAAS,GAAG,CAAC;AAClE,mBAAO,CAAC,KAAK,aAAE,IAAI,CAAC;AAAA,UACrB,CAAC;AAAA,QACF;AAAA,MACD;AAAA,MACH,MAAM,OAAO,UAAmC;AAC/C,mBAAW,IAAI;AACf,YAAI,OAAO,IAAI,aAAa,YAAY;AACvC,iBAAQ,IAAI;AAAA,YACX;AAAA,YACA,WAAW,IAAI;AAAA,UAChB;AAAA,QACD;AACA,eAAO,OAAO,IAAI,aAAa,WAAW,IAAI,WAAW,KAAK,UAAU,IAAI,QAAQ;AAAA,MACrF;AAAA,IACD;AAAA,EACD,CAAC;AACF;AAMA,eAAsB,eACrB,cACA,OACA,cACA,eACuC;AACvC,QAAM,aAAa,OAAO,KAAK,aAAa,EAAE,SAAS,IAAI,sBAAsB,aAAa,IAAI,CAAC;AACnG,QAAM,QAAQ,MAAM,aAAa,OAAO,UAAU;AAElD,QAAM,SAAS,MAAM,MAAM,IAAI;AAAA,IAC9B,UAAU,QAAQ,KAAK,IAAI,CAAC,IAAI,KAAK,OAAO,EAAE,SAAS,EAAE,EAAE,MAAM,CAAC,CAAC;AAAA,IACnE,UAAU;AAAA,EACX,CAAC;AAED,SAAO,EAAE,UAAU,sBAAsB,cAAc,MAAM,EAAE;AAChE;;;ADlQO,SAAS,MAAM,SAA+B;AACpD,SAAO,EAAE,MAAM,SAAS,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,QAAQ,CAAC,EAAE;AACpE;AAEO,SAAS,GAAG,SAAiB,WAAiC;AACpE,SAAO,EAAE,MAAM,MAAM,SAAS,GAAI,YAAY,EAAE,WAAW,UAAU,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE,EAAE,IAAI,CAAC,EAAG;AACxG;AAEO,SAAS,WAAW,MAAc,QAA6B;AACrE,SAAO,EAAE,MAAM,QAAQ,MAAM,OAAO;AACrC;AAkDO,SAAS,cACf,OACA,YAAiD,CAAC,GACxB;AAC1B,SAAO,OAAO;AAAA,IACb,MAAM,IAAI,CAAC,SAAS;AAAA,MACnB,KAAK;AAAA,MACL;AAAA,QACC,aAAa,KAAK;AAAA,QAClB,QAAQ,KAAK;AAAA,QACb,UAAU,UAAU,KAAK,IAAI,KAAK;AAAA,MACnC;AAAA,IACD,CAAC;AAAA,EACF;AACD;AAEA,SAAS,YAAY,MAA8C;AAClE,SAAO,OAAO,QAAQ,IAAI,EAAE,IAAI,CAAC,CAAC,MAAM,GAAG,OAAO;AAAA,IACjD;AAAA,IACA,aAAa,IAAI;AAAA,IACjB,QAAQ,IAAI,UAAU,CAAC;AAAA,IACvB,UACC,OAAO,IAAI,aAAa,aACpB,IAAI,WACL,OAAO,IAAI,aAAa,WACvB,IAAI,WACJ,KAAK,UAAU,IAAI,QAAQ;AAAA,EACjC,EAAE;AACH;AAEA,SAAS,iBAAiB,UAA6B;AACtD,WAAS,IAAI,SAAS,SAAS,GAAG,KAAK,GAAG,KAAK;AAC9C,UAAM,MAAM,SAAS,CAAC;AACtB,QAAI,IAAI,SAAS,SAAS;AACzB,YAAM,YAAY,IAAI,QAAQ,KAAK,CAAC,MAAM,EAAE,SAAS,MAAM;AAC3D,aAAO,YAAY,UAAU,OAAO;AAAA,IACrC;AAAA,EACD;AACA,SAAO;AACR;AAEA,SAAS,mBAAmB,QAAqB,OAAyB;AACzE,MAAI,OAAO,OAAO,WAAW,WAAY,QAAO,OAAO;AACvD,QAAM,aAAa,cAAc;AACjC,QAAM,cAAc,OAAO,OAAO,WAAW,WAAW,OAAO,SAAS;AACxE,SAAO,iBAAiB,WAAW,aAAa,WAAW;AAC5D;AAEA,SAAS,oBAAoB,QAAiD;AAC7E,SAAO,OAAO,gBAAgB,cAAc,EAAE;AAC/C;AASA,IAAM,UAA6B,CAAC;AAO7B,SAAS,YAAY,MAAc,QAA2B;AACpE,UAAQ,KAAK,EAAE,MAAM,OAAO,CAAC;AAC9B;AAYO,SAAS,WAAiB;AAChC,QAAM,aAAa,cAAc;AAEjC,EAAG,YAAS,WAAW,gBAAgB,MAAM;AAC5C,eAAW,gBAAgB,WAAW,QAAQ;AAC7C,iBAAW,EAAE,MAAM,WAAW,OAAO,KAAK,SAAS;AAClD,cAAM,aAAa,OAAO,SAAS,CAAC;AACpC,cAAM,eAAe,OAAO,SAAS,SAAY,oBAAoB,MAAM;AAE3E,cAAM,gBAAgB,UAAU,OAAO,CAAC,EAAE,YAAY,IAAI,UAAU,MAAM,CAAC;AAC3E,cAAM,QAAQ,OAAO,OAAO,WAAW,WAAW,OAAO,SAAS;AAElE,mBAAW,MAAM,OAAO,OAAO;AAC9B,gBAAM,WAAW,GAAG,QAAQ,iBAAiB,GAAG,QAAQ;AACxD,gBAAM,eAAe,GAAG,SAAS;AACjC,gBAAM,QAAQ,YAAY,YAAY;AACtC,gBAAM,MAAM,EAAE,SAAS,iBAAiB,GAAG,QAAQ,EAAE;AAErD,gBAAM,WAAW,GAAG,OAAO,IAAI,CAAC,QAAQ,IAAI,GAAG,CAAC;AAChD,gBAAM,aAAa,SAAS,IAAI,CAAC,MAAM,EAAE,SAAS;AAClD,gBAAM,mBAAmB,OAAO,OAAO,CAAC,GAAG,GAAG,SAAS,IAAI,CAAC,MAAM,EAAE,gBAAgB,CAAC;AAErF,gBAAM,eAAe,IAAI,aAAa,OAAO,QAAQ;AAErD,UAAG;AAAA,YACF,GAAG,YAAY,KAAK,KAAK;AAAA,YACzB;AAAA,cACC,QAAQ;AAAA,gBACP,MAAM;AAAA,gBACN,UAAU;AAAA,gBACV;AAAA,gBACA,UAAU,GAAG;AAAA,cACd;AAAA,cACA;AAAA,YACD;AAAA,YACA,OAAO,EAAE,kBAAkB,OAAO,MAAM;AACvC,kBAAI;AAGJ,oBAAM,kBACL,GAAG,mBAAmB,OAAO,mBAAmB,cAAc,EAAE;AACjE,oBAAM,mBAAmB,kBAAkB,MAAM,gBAAgB,GAAG,QAAQ,IAAI,GAAG;AAEnF,kBAAI,cAAc;AACjB,yBAAS,MAAM;AAAA,kBACd;AAAA,kBACA;AAAA,kBACA;AAAA,kBACA;AAAA,gBACD;AAAA,cACD,OAAO;AACN,sBAAM,SAAS,mBAAmB,QAAQ,YAAY;AACtD,sBAAM,eAAe,cAAc,EAAE;AACrC,sBAAM,eAAe,GAAG,gBAAgB,OAAO,gBAAgB;AAC/D,yBAAS,MAAM,OAAO;AAAA,kBACrB,UAAU;AAAA,kBACV;AAAA,kBACA,GAAI,eAAe,EAAE,aAAa,IAAI,CAAC;AAAA,gBACxC,CAAC;AAAA,cACF;AAEA,oBAAM,cAAc,OAAO,SACzB,OAAO,CAAC,MAAM,aAAa,0BAAS,EACpC,QAAQ,CAAC,MAAO,EAAgB,cAAc,CAAC,CAAC,EAChD,IAAI,CAACC,QAAOA,IAAG,IAAI;AAErB,cAAG,cAAW;AAAA,gBACb,cAAc,YAAY,SAAS,IAAI,YAAY,KAAK,KAAK,IAAI;AAAA,cAClE,CAAC;AACD,yBAAW,aAAa,YAAY;AACnC,sBAAM,UAAU,EAAE,SAAS,QAAQ,kBAAkB,UAAU,CAAC,EAAE,CAAC;AAAA,cACpE;AAAA,YACD;AAAA,UACD;AAAA,QACD;AAAA,MACD;AAAA,IACD;AAAA,EACD,CAAC;AACF;;;AIzOA,IAAAC,MAAoB;AACpB,wBAAuG;;;ACDvG,IAAAC,mBAAuC;AAQhC,SAAS,wBAAwB,aAAmC,OAAe;AACzF,QAAM,WAAW,IAAI,uBAAuB,WAAW;AACvD,QAAM,QAAQ,SAAS,QAAQ,KAAK;AAEpC,SAAO,OAAO;AAAA,IACb;AAAA,IACA;AAAA,EACD,MAGM;AACL,UAAM,mBAAmB,kBAAkB;AAC3C,QAAI,CAAC,kBAAkB;AACtB,aAAO,EAAE,KAAK,kBAAkB,OAAO,MAAM,SAAS,2CAA2C;AAAA,IAClG;AAGA,UAAM,WAA0B,QAAQ,YAAY,CAAC;AACrD,UAAM,gBAAgB,CAAC,GAAG,QAAQ,EAAE,QAAQ,EAAE,KAAK,CAAC,MAAM,aAAa,0BAAS;AAEhF,QAAI,CAAC,eAAe;AACnB,aAAO,EAAE,KAAK,kBAAkB,OAAO,OAAO,SAAS,oCAAoC;AAAA,IAC5F;AAEA,UAAM,eAAe,OAAO,cAAc,YAAY,WAAW,cAAc,UAAU,KAAK,UAAU,cAAc,OAAO;AAG7H,UAAM,YAAY,MAAM,MAAM,OAAO;AAAA,MACpC;AAAA,QACC,MAAM;AAAA,QACN,SAAS;AAAA,MACV;AAAA,MACA;AAAA,QACC,MAAM;AAAA,QACN,SAAS;AAAA,MACV;AAAA,IACD,CAAC;AAED,UAAM,oBAAoB,OAAO,UAAU,YAAY,WAAW,UAAU,UAAU,IAAI,KAAK,EAAE,YAAY;AAE7G,UAAM,UAAU,qBAAqB,iBAAiB,YAAY;AAElE,WAAO;AAAA,MACN,KAAK;AAAA,MACL,OAAO;AAAA,MACP,SAAS,UACN,uCAAuC,gBAAgB,KACvD,aAAa,gBAAgB,mBAAmB,gBAAgB;AAAA,IACpE;AAAA,EACD;AACD;;;AC1DA,IAAAC,mBAAuC;AAMhC,SAAS,iCAAiC;AAChD,SAAO,OAAO;AAAA,IACb;AAAA,IACA;AAAA,EACD,MAGM;AACL,UAAM,cAAwB,kBAAkB,oBAAoB,CAAC;AACrE,UAAM,iBAA2B,kBAAkB,0BAA0B,CAAC;AAE9E,QAAI,YAAY,WAAW,KAAK,eAAe,WAAW,GAAG;AAC5D,aAAO,EAAE,KAAK,oBAAoB,OAAO,MAAM,SAAS,4CAA4C;AAAA,IACrG;AAGA,UAAM,WAA0B,QAAQ,YAAY,CAAC;AACrD,UAAM,gBAAgB,CAAC,GAAG,QAAQ,EAAE,QAAQ,EAAE,KAAK,CAAC,MAAM,aAAa,0BAAS;AAEhF,QAAI,CAAC,eAAe;AACnB,aAAO,EAAE,KAAK,oBAAoB,OAAO,OAAO,SAAS,oCAAoC;AAAA,IAC9F;AAEA,UAAM,gBAAgB,OAAO,cAAc,YAAY,WAAW,cAAc,UAAU,KAAK,UAAU,cAAc,OAAO,GAAG,YAAY;AAE7I,UAAM,WAAqB,CAAC;AAE5B,eAAW,YAAY,aAAa;AACnC,UAAI,CAAC,aAAa,SAAS,SAAS,YAAY,CAAC,GAAG;AACnD,iBAAS,KAAK,2BAA2B,QAAQ,GAAG;AAAA,MACrD;AAAA,IACD;AAEA,eAAW,aAAa,gBAAgB;AACvC,UAAI,aAAa,SAAS,UAAU,YAAY,CAAC,GAAG;AACnD,iBAAS,KAAK,6BAA6B,SAAS,GAAG;AAAA,MACxD;AAAA,IACD;AAEA,UAAM,SAAS,SAAS,WAAW;AAEnC,WAAO;AAAA,MACN,KAAK;AAAA,MACL,OAAO;AAAA,MACP,SAAS,SAAS,kCAAkC,SAAS,KAAK,IAAI;AAAA,IACvE;AAAA,EACD;AACD;;;ACrDA,IAAAC,mBAAuC;AAOhC,SAAS,6BAA6B;AAC5C,SAAO,OAAO;AAAA,IACb;AAAA,IACA;AAAA,EACD,MAGM;AAEL,QAAI,kBAAkB,iBAAiB,KAAK,kBAAkB,sBAAsB,MAAM;AACzF,aAAO,EAAE,KAAK,iBAAiB,OAAO,MAAM,SAAS,+CAA+C;AAAA,IACrG;AAEA,UAAM,WAA0B,QAAQ,YAAY,CAAC;AACrD,UAAM,cAAwB,kBAAkB,eAAe,CAAC;AAEhE,UAAM,YAAY,SAChB,OAAO,CAAC,MAAM,aAAa,0BAAS,EACpC,QAAQ,CAAC,MAAO,EAAgB,cAAc,CAAC,CAAC;AAElD,UAAM,kBAAkB,YAAY,SAAS,IAAI,UAAU,OAAO,CAAC,OAAO,CAAC,YAAY,SAAS,GAAG,IAAI,CAAC,IAAI;AAE5G,UAAM,SAAS,gBAAgB,WAAW;AAE1C,QAAI,YAAY,SAAS,GAAG;AAC3B,aAAO;AAAA,QACN,KAAK;AAAA,QACL,OAAO;AAAA,QACP,SAAS,SACN,2CAA2C,YAAY,KAAK,IAAI,CAAC,MACjE,cAAc,gBAAgB,MAAM,6BAA6B,gBAAgB,IAAI,CAAC,OAAO,GAAG,IAAI,EAAE,KAAK,IAAI,CAAC;AAAA,MACpH;AAAA,IACD;AAEA,WAAO;AAAA,MACN,KAAK;AAAA,MACL,OAAO;AAAA,MACP,SAAS,SACN,qCACA,cAAc,UAAU,MAAM,kBAAkB,UAAU,IAAI,CAAC,OAAO,GAAG,IAAI,EAAE,KAAK,IAAI,CAAC;AAAA,IAC7F;AAAA,EACD;AACD;;;ACjDA,IAAAC,mBAAuC;AAQhC,SAAS,+BAA+B;AAC9C,SAAO,OAAO;AAAA,IACb;AAAA,IACA;AAAA,EACD,MAGM;AACL,QAAI,kBAAkB,sBAAsB,MAAM;AACjD,aAAO,EAAE,KAAK,mBAAmB,OAAO,MAAM,SAAS,mDAAmD;AAAA,IAC3G;AAEA,UAAM,gBAA0B,kBAAkB,oBAAoB,CAAC;AACvE,UAAM,WAA0B,QAAQ,YAAY,CAAC;AAErD,UAAM,kBAAkB,SACtB,OAAO,CAAC,MAAM,aAAa,0BAAS,EACpC,QAAQ,CAAC,MAAO,EAAgB,cAAc,CAAC,CAAC,EAChD,IAAI,CAAC,OAAO,GAAG,IAAI;AAGrB,QAAI,cAAc,WAAW,GAAG;AAC/B,YAAMC,UAAS,gBAAgB,SAAS;AACxC,aAAO;AAAA,QACN,KAAK;AAAA,QACL,OAAOA;AAAA,QACP,SAASA,UACN,yBAAyB,gBAAgB,KAAK,IAAI,CAAC,KACnD;AAAA,MACJ;AAAA,IACD;AAEA,UAAM,eAAe,cAAc,OAAO,CAAC,SAAS,gBAAgB,SAAS,IAAI,CAAC;AAClF,UAAM,SAAS,aAAa,SAAS;AAErC,WAAO;AAAA,MACN,KAAK;AAAA,MACL,OAAO;AAAA,MACP,SAAS,SACN,4BAA4B,aAAa,KAAK,IAAI,CAAC,KACnD,4DAA4D,cAAc,KAAK,IAAI,CAAC,aAAa,gBAAgB,SAAS,IAAI,gBAAgB,KAAK,IAAI,IAAI,MAAM;AAAA,IACrK;AAAA,EACD;AACD;;;AJ7BA,SAAS,oBAAoB,WAAgB,KAA0B;AACtE,SAAO,OAAO,EAAE,SAAS,iBAAiB,MAAM;AAC/C,QAAI,CAAC,kBAAkB,qBAAqB;AAC3C,aAAO,EAAE,KAAK,OAAO,MAAM,SAAS,6CAA6C;AAAA,IAClF;AACA,WAAO,UAAU,EAAE,SAAS,kBAAkB,iBAAiB,oBAAoB,CAAC;AAAA,EACrF;AACD;AAEA,SAAS,gBAAgB,SAAiB,WAAgD;AACzF,QAAM,aAAwC,CAAC;AAC/C,MAAI,QAAQ;AAEZ,aAAW,KAAK,EAAE,MAAM,QAAQ,SAAS,QAAQ,CAAC;AAElD,aAAW,QAAQ,WAAW;AAC7B,UAAM,KAAK,KAAK,EAAE,KAAK;AACvB,eAAW,KAAK;AAAA,MACf,MAAM;AAAA,MACN,SAAS;AAAA,MACT,YAAY,CAAC,EAAE,UAAU,EAAE,MAAM,WAAW,KAAK,GAAG,IAAI,MAAM,WAAW,CAAC;AAAA,IAC3E,CAAC;AACD,eAAW,KAAK,EAAE,MAAM,QAAQ,SAAS,OAAO,cAAc,GAAG,CAAC;AAAA,EACnE;AAEA,aAAW,KAAK,EAAE,MAAM,aAAa,SAAS,MAAM,CAAC;AAErD,SAAO;AACR;AAQO,SAAS,YAAY,OAA8B;AACzD,SAAO,CAAC,SAAS;AAAA,IAChB,WAAc;AAAA,MACb;AAAA,YACC,kDAA+B,EAAE,qBAAqB,YAAY,mBAAmB,SAAS,CAAC;AAAA,QAC/F;AAAA,MACD;AAAA,IACD;AAAA,IACA,kBAAkB,EAAE,qBAAqB,gBAAgB,IAAI,SAAS,KAAK,EAAE;AAAA,EAC9E;AACD;AAOO,SAAS,WAAwB;AACvC,SAAO,MAAM;AACZ,UAAM,SAAS,cAAc;AAC7B,UAAM,QAAQ,OAAO;AACrB,WAAO;AAAA,MACN,WAAc;AAAA,QACb;AAAA,cACC,8CAA2B,EAAE,QAAQ,8CAA4B,MAAM,CAAC;AAAA,UACxE;AAAA,QACD;AAAA,MACD;AAAA,MACA,kBAAkB,CAAC;AAAA,IACpB;AAAA,EACD;AACD;AAOO,SAAS,QAAQ,SAA6C;AACpE,SAAO,OAAO;AAAA,IACb,WAAc,kBAAc,2BAA2B,CAAC;AAAA,IACxD,kBAAkB;AAAA,MACjB,mBAAmB;AAAA,MACnB,GAAI,SAAS,QAAQ,SAAS,EAAE,aAAa,QAAQ,OAAO,IAAI,CAAC;AAAA,IAClE;AAAA,EACD;AACD;AAOO,SAAS,mBAAmB,MAA2B;AAC7D,SAAO,MAAM;AACZ,UAAM,SAAS,cAAc;AAC7B,UAAM,QAAQ,OAAO;AACrB,WAAO;AAAA,MACN,WAAc,kBAAc,wBAAwB,OAAO,aAAa,KAAK,CAAC;AAAA,MAC9E,kBAAkB,EAAE,kBAAkB,KAAK;AAAA,IAC5C;AAAA,EACD;AACD;AAOO,SAAS,cAAc,OAA+B;AAC5D,SAAO,OAAO;AAAA,IACb,WAAc,kBAAc,6BAA6B,CAAC;AAAA,IAC1D,kBAAkB;AAAA,MACjB,mBAAmB;AAAA,MACnB,GAAI,OAAO,SAAS,EAAE,kBAAkB,MAAM,IAAI,CAAC;AAAA,IACpD;AAAA,EACD;AACD;AAGO,SAAS,SAAS,SAAgC;AACxD,SAAO,OAAO;AAAA,IACb,WAAc,kBAAc,+BAA+B,CAAC;AAAA,IAC5D,kBAAkB,EAAE,kBAAkB,QAAQ;AAAA,EAC/C;AACD;AAGO,SAAS,YAAY,SAAgC;AAC3D,SAAO,OAAO;AAAA,IACb,WAAc,kBAAc,+BAA+B,CAAC;AAAA,IAC5D,kBAAkB,EAAE,wBAAwB,QAAQ;AAAA,EACrD;AACD;","names":["import_messages","tc","ls","import_messages","import_messages","import_messages","import_messages","passed"]}
|
|
1
|
+
{"version":3,"sources":["../../src/eval/index.ts","../../src/eval/config.ts","../../src/eval/suite.ts","../../src/eval/target.ts","../../src/runtime/langchain/model-resolver.ts","../../src/runtime/langchain/utils.ts","../../src/eval/expectations.ts","../../src/eval/evaluators/language.ts","../../src/eval/evaluators/llm-judge.ts","../../node_modules/openevals/dist/utils.js","../../node_modules/openevals/dist/json/match.js","../../node_modules/openevals/dist/llm.js","../../node_modules/openevals/dist/code/base.js","../../node_modules/openevals/dist/simulators/multiturn.js","../../node_modules/openevals/dist/simulators/prebuilts.js","../../src/eval/evaluators/response-content.ts","../../src/eval/evaluators/no-tool-calls.ts","../../src/eval/evaluators/any-tool-called.ts","../../src/eval/evaluators/tool-input.ts"],"sourcesContent":["// ── Configuration ───────────────────────────────────────────────────\nexport { configureEvals } from './config';\nexport type { EvalConfig, CreateTargetFn, ToolWrapper } from './config';\n\n// ── Suite API ────────────────────────────────────────────────────────\nexport { defineSuite, runEvals, human, ai, toolResult, fromToolSpecs } from './suite';\nexport type { SuiteConfig, TestCase, ToolDef, ExecutionMode } from './suite';\n\n// ── Expectations ─────────────────────────────────────────────────────\nexport { toolsCalled, llmJudge, noTools, anyToolCalled, respondsInLanguage, contains, notContains } from './expectations';\nexport type { Expectation, ToolExpectation } from './expectations';\nexport type { ToolCallExpectation } from './evaluators/tool-input';\n","import type { Agent, Message, ToolDefinition } from '../core/agent.interface';\nimport type { LangchainModelConfig } from '../runtime/langchain/model-resolver';\n\n/** Optional hook applied by the eval runner to wrap every tool for tracking and stop detection. */\nexport type ToolWrapper = (tools: ToolDefinition[]) => ToolDefinition[];\n\n/** Factory that creates a fresh Agent per test case. Receives the model string and extra suite-level tools. */\nexport type CreateTargetFn = (\n\tmodel: string,\n\textraTools: ToolDefinition[],\n\t/** When provided, the factory MUST apply this to the final merged tool array (built-in + extra) before creating the agent. */\n\twrapTools?: ToolWrapper,\n) => Agent | Promise<Agent>;\n\nexport interface EvalConfig {\n\t/** Required for model-based target and LLM evaluators (respondsInLanguage, llmJudge). */\n\tmodelConfig: LangchainModelConfig;\n\t/** Models to evaluate. Every registered suite is run once per model. */\n\tmodels: string[];\n\t/** Model for evaluators needing LLM calls (language detection, LLM-as-judge). */\n\tevaluatorModel: string;\n\t/** LangSmith experiment (dataset) name. All suites share this single experiment for easy comparison across runs. */\n\texperimentName: string;\n\t/** System prompt for model-based target. Ignored when createTarget is used. Can be overridden per-suite or per-case. */\n\tsystemPrompt?: string;\n\t/** Factory that creates a fresh Agent per test case. Receives the current model string from the models array. */\n\tcreateTarget?: CreateTargetFn;\n\t/** Transforms test case messages before sending to target. Simulates production preprocessing (e.g., message enrichment). */\n\tprepareMessages?: (messages: Message[]) => Message[] | Promise<Message[]>;\n}\n\nlet _config: EvalConfig | null = null;\n\nexport function configureEvals(config: EvalConfig): void {\n\t_config = config;\n}\n\nexport function getEvalConfig(): EvalConfig {\n\tif (!_config) {\n\t\tthrow new Error('Evals not configured. Call configureEvals() in your vitest setupFiles.');\n\t}\n\treturn _config;\n}\n\n","import * as ls from 'langsmith/vitest';\nimport { AIMessage, BaseMessage } from '@langchain/core/messages';\nimport { createEvalTarget, runAgentTarget, type MockToolDef, type EvalTargetInput } from './target';\nimport { type Expectation } from './expectations';\nimport { getEvalConfig, type CreateTargetFn } from './config';\nimport {\n\ttype Message,\n\ttype HumanMessage,\n\ttype AiMessage,\n\ttype ToolMessage,\n\ttype ToolSpec,\n} from '../core/agent.interface';\n\n// ── Message builders ─────────────────────────────────────────────────\n\nexport function human(content: string): HumanMessage {\n\treturn { role: 'human', content: [{ type: 'text', text: content }] };\n}\n\nexport function ai(content: string, toolCalls?: string[]): AiMessage {\n\treturn { role: 'ai', content, ...(toolCalls ? { toolCalls: toolCalls.map((name) => ({ name })) } : {}) };\n}\n\nexport function toolResult(name: string, output: string): ToolMessage {\n\treturn { role: 'tool', name, output };\n}\n\nexport interface ToolDef {\n\tdescription: string;\n\t/** A plain key→description record, or a ZodObject passed through from a ToolSpec. */\n\tschema?: Record<string, string> | import('zod').ZodObject<any>;\n\t/** Auto-stringified if not a string or function. */\n\tresponse: unknown | ((input: Record<string, unknown>, callCount: number) => string);\n}\n\n/**\n * Controls how the eval target executes.\n * - `single-turn`: one model invocation + tool execution, then stop.\n * - `stop-after-tool`: run until the listed tools have been called `count`\n * times cumulatively, then stop. Defaults to 1 (stop on the first match).\n */\nexport type ExecutionMode =\n\t| { type: 'single-turn' }\n\t| { type: 'stop-after-tool'; tools: string[]; count?: number };\n\nexport interface TestCase {\n\t/** Test name. Defaults to the last human message content if omitted. */\n\tname?: string;\n\tmessages: Message[];\n\tsystemPrompt?: string;\n\t/** Override suite-level tools for this case. */\n\ttools?: Record<string, ToolDef>;\n\t/** Transforms messages before sending to target. Overrides suite-level and global hooks. */\n\tprepareMessages?: (messages: Message[]) => Message[] | Promise<Message[]>;\n\t/** Controls target execution. Omit for default behavior (run until the agent stops on its own). */\n\texecutionMode?: ExecutionMode;\n\texpect: Expectation[];\n}\n\ntype TargetFn = (inputs: EvalTargetInput) => Promise<{ messages: BaseMessage[] }>;\n\nexport interface SuiteConfig {\n\t/** Custom target function, or model string override. Auto-created from global config if omitted. */\n\ttarget?: TargetFn | string;\n\t/** Factory that creates a fresh Agent per test case. Overrides global createTarget. */\n\tcreateTarget?: CreateTargetFn;\n\t/** System prompt for all cases in this suite. Overrides the global prompt; can be overridden per-case. */\n\tsystemPrompt?: string;\n\ttools?: Record<string, ToolDef>;\n\t/** Transforms messages before sending to target. Overrides global hook; can be overridden per-case. */\n\tprepareMessages?: (messages: Message[]) => Message[] | Promise<Message[]>;\n\tcases: TestCase[];\n}\n\n// ── Helpers ──────────────────────────────────────────────────────────\n\n/**\n * Converts a `ToolSpec[]` (from a real tool provider) into the\n * `Record<string, ToolDef>` that `defineSuite` expects.\n *\n * `responses` maps tool names to canned mock responses. Tools without an\n * entry in `responses` default to `''`.\n */\nexport function fromToolSpecs(\n\tspecs: ToolSpec[],\n\tresponses: Record<string, ToolDef['response']> = {},\n): Record<string, ToolDef> {\n\treturn Object.fromEntries(\n\t\tspecs.map((spec) => [\n\t\t\tspec.name,\n\t\t\t{\n\t\t\t\tdescription: spec.description,\n\t\t\t\tschema: spec.inputSchema,\n\t\t\t\tresponse: responses[spec.name] ?? '',\n\t\t\t} satisfies ToolDef,\n\t\t]),\n\t);\n}\n\nfunction toMockTools(defs: Record<string, ToolDef>): MockToolDef[] {\n\treturn Object.entries(defs).map(([name, def]) => ({\n\t\tname,\n\t\tdescription: def.description,\n\t\tschema: def.schema ?? {},\n\t\tresponse:\n\t\t\ttypeof def.response === 'function'\n\t\t\t\t? (def.response as MockToolDef['response'])\n\t\t\t\t: typeof def.response === 'string'\n\t\t\t\t\t? def.response\n\t\t\t\t\t: JSON.stringify(def.response),\n\t}));\n}\n\nfunction lastHumanContent(messages: Message[]): string {\n\tfor (let i = messages.length - 1; i >= 0; i--) {\n\t\tconst msg = messages[i];\n\t\tif (msg.role === 'human') {\n\t\t\tconst textBlock = msg.content.find((c) => c.type === 'text');\n\t\t\treturn textBlock ? textBlock.text : '';\n\t\t}\n\t}\n\treturn '';\n}\n\nfunction resolveModelTarget(config: SuiteConfig, model: string): TargetFn {\n\tif (typeof config.target === 'function') return config.target;\n\tconst evalConfig = getEvalConfig();\n\tconst targetModel = typeof config.target === 'string' ? config.target : model;\n\treturn createEvalTarget(evalConfig.modelConfig, targetModel);\n}\n\nfunction resolveCreateTarget(config: SuiteConfig): CreateTargetFn | undefined {\n\treturn config.createTarget ?? getEvalConfig().createTarget;\n}\n\n// ── Suite registry ───────────────────────────────────────────────────\n\ninterface RegisteredSuite {\n\tname: string;\n\tconfig: SuiteConfig;\n}\n\nconst _suites: RegisteredSuite[] = [];\n\n/**\n * Registers an eval suite. Does not create tests on its own — call\n * `runEvals()` after all suites are registered to emit a single\n * LangSmith experiment containing every test case.\n */\nexport function defineSuite(name: string, config: SuiteConfig): void {\n\t_suites.push({ name, config });\n}\n\n/**\n * Emits all registered suites under a single `ls.describe` block so\n * every test case lands in one LangSmith experiment / dataset.\n *\n * Call this once, after importing all suite files.\n *\n * Individual suites are grouped with native `describe` blocks for\n * readability; test names are prefixed with the suite name\n * (e.g. \"discovery > should use search tool\").\n */\nexport function runEvals(): void {\n\tconst evalConfig = getEvalConfig();\n\n\tls.describe(evalConfig.experimentName, () => {\n\t\tfor (const currentModel of evalConfig.models) {\n\t\t\tfor (const { name: suiteName, config } of _suites) {\n\t\t\t\tconst suiteTools = config.tools ?? {};\n\t\t\t\tconst createTarget = config.target ? undefined : resolveCreateTarget(config);\n\n\t\t\t\tconst categoryLabel = suiteName.charAt(0).toUpperCase() + suiteName.slice(1);\n\t\t\t\tconst model = typeof config.target === 'string' ? config.target : currentModel;\n\n\t\t\t\tfor (const tc of config.cases) {\n\t\t\t\t\tconst testName = tc.name ?? lastHumanContent(tc.messages);\n\t\t\t\t\tconst caseToolDefs = tc.tools ?? suiteTools;\n\t\t\t\t\tconst tools = toMockTools(caseToolDefs);\n\t\t\t\t\tconst ctx = { message: lastHumanContent(tc.messages) };\n\n\t\t\t\t\tconst resolved = tc.expect.map((exp) => exp(ctx));\n\t\t\t\t\tconst evaluators = resolved.map((r) => r.evaluator);\n\t\t\t\t\tconst referenceOutputs = Object.assign({}, ...resolved.map((r) => r.referenceOutputs));\n\n\t\t\t\t\tconst fullTestName = `[${categoryLabel}] > ${testName}`;\n\n\t\t\t\t\tls.test(\n\t\t\t\t\t\t`${fullTestName} (${model})`,\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tinputs: {\n\t\t\t\t\t\t\t\tname: fullTestName,\n\t\t\t\t\t\t\t\tcategory: categoryLabel,\n\t\t\t\t\t\t\t\tmodel,\n\t\t\t\t\t\t\t\tmessages: tc.messages,\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\treferenceOutputs,\n\t\t\t\t\t\t},\n\t\t\t\t\t\tasync ({ referenceOutputs: refOut }) => {\n\t\t\t\t\t\t\tlet output: { messages: BaseMessage[] };\n\n\t\t\t\t\t\t\t// Resolution order: case > suite > global > identity\n\t\t\t\t\t\t\tconst prepareMessages =\n\t\t\t\t\t\t\t\ttc.prepareMessages ?? config.prepareMessages ?? getEvalConfig().prepareMessages;\n\t\t\t\t\t\t\tconst preparedMessages = prepareMessages ? await prepareMessages(tc.messages) : tc.messages;\n\n\t\t\t\t\t\t\tif (createTarget) {\n\t\t\t\t\t\t\t\toutput = await runAgentTarget(\n\t\t\t\t\t\t\t\t\tcreateTarget,\n\t\t\t\t\t\t\t\t\tcurrentModel,\n\t\t\t\t\t\t\t\t\tpreparedMessages,\n\t\t\t\t\t\t\t\t\tcaseToolDefs,\n\t\t\t\t\t\t\t\t\ttc.executionMode,\n\t\t\t\t\t\t\t\t);\n\t\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\t\tconst target = resolveModelTarget(config, currentModel);\n\t\t\t\t\t\t\t\tconst globalPrompt = getEvalConfig().systemPrompt;\n\t\t\t\t\t\t\t\tconst systemPrompt = tc.systemPrompt ?? config.systemPrompt ?? globalPrompt;\n\t\t\t\t\t\t\t\toutput = await target({\n\t\t\t\t\t\t\t\t\tmessages: preparedMessages,\n\t\t\t\t\t\t\t\t\ttools,\n\t\t\t\t\t\t\t\t\texecutionMode: tc.executionMode,\n\t\t\t\t\t\t\t\t\t...(systemPrompt ? { systemPrompt } : {}),\n\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\tconst calledTools = output.messages\n\t\t\t\t\t\t\t\t.filter((m) => m instanceof AIMessage)\n\t\t\t\t\t\t\t\t.flatMap((m) => (m as AIMessage).tool_calls ?? [])\n\t\t\t\t\t\t\t\t.map((tc) => tc.name);\n\n\t\t\t\t\t\t\tls.logOutputs({\n\t\t\t\t\t\t\t\ttools_called: calledTools.length > 0 ? calledTools.join(' | ') : 'none',\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\tfor (const evaluator of evaluators) {\n\t\t\t\t\t\t\t\tawait evaluator({ outputs: output, referenceOutputs: refOut ?? {} });\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t},\n\t\t\t\t\t);\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t});\n}\n","import { BaseChatModel } from '@langchain/core/language_models/chat_models';\nimport { tool } from '@langchain/core/tools';\nimport { AIMessage, BaseMessage, SystemMessage, ToolMessage } from '@langchain/core/messages';\nimport { z } from 'zod';\nimport { LangchainModelResolver, type LangchainModelConfig } from '../runtime/langchain/model-resolver';\nimport { type CreateTargetFn, getEvalConfig } from './config';\nimport type {\n\tAgent,\n\tAgentResult,\n\tMessage as AgentMessage,\n\tToolCallContentBlock,\n\tToolDefinition,\n} from '../core/agent.interface';\nimport { convertToLangchainMessages } from '../runtime/langchain/utils';\nimport type { ToolDef, ExecutionMode } from './suite';\n\nexport interface MockToolDef {\n\tname: string;\n\tdescription: string;\n\tschema: z.ZodObject<any> | Record<string, unknown>;\n\t/**\n\t * Canned response the mock tool returns.\n\t * Can be a static string, or a function that receives input and returns a response.\n\t * If a function is provided, it receives the full invocation count as a second arg\n\t * to support scenarios like \"first call fails, second call succeeds\".\n\t */\n\tresponse: string | ((input: Record<string, unknown>, callCount: number) => string);\n}\n\nexport interface EvalTargetInput {\n\tsystemPrompt?: string;\n\tmessages: AgentMessage[];\n\ttools: MockToolDef[];\n\texecutionMode?: ExecutionMode;\n}\n\nconst MAX_AGENT_LOOPS = 10;\n\n/**\n * Strips reasoning/thinking content blocks from an AIMessage so it can be\n * safely replayed as conversation history.\n *\n * Reasoning blocks (OpenAI `reasoning`, Anthropic `thinking`) are\n * output-only artifacts. The APIs reject them when sent back as input,\n * producing errors like \"Item of type 'reasoning' was provided without its\n * required following item.\" Stripping them does not affect model quality\n * because reasoning happens fresh on every new invocation.\n */\nexport function stripReasoningBlocks(message: AIMessage): AIMessage {\n\tif (!Array.isArray(message.content)) return message;\n\n\tconst filtered = message.content.filter((block: any) => block.type !== 'reasoning' && block.type !== 'thinking');\n\n\t// If all content was reasoning, keep an empty string so the message\n\t// remains structurally valid\n\tconst newContent = filtered.length > 0 ? filtered : '';\n\n\treturn new AIMessage({\n\t\tcontent: newContent,\n\t\ttool_calls: message.tool_calls,\n\t\tid: message.id,\n\t\tresponse_metadata: message.response_metadata,\n\t\tusage_metadata: message.usage_metadata,\n\t});\n}\n\n/**\n * Creates a LangSmith-compatible target function that runs an agentic loop\n * with mock tools and returns the full message trajectory.\n */\nexport function createEvalTarget(modelConfig: LangchainModelConfig, modelString: string) {\n\treturn async (inputs: EvalTargetInput): Promise<{ messages: BaseMessage[] }> => {\n\t\tconst resolver = new LangchainModelResolver(modelConfig);\n\t\tconst model = resolver.resolve(modelString) as BaseChatModel;\n\n\t\t// Track invocation counts per tool for stateful mock responses\n\t\tconst toolCallCounts: Record<string, number> = {};\n\n\t\t// Create langchain tools from mock definitions\n\t\tconst langchainTools = inputs.tools.map((mockTool) => {\n\t\t\ttoolCallCounts[mockTool.name] = 0;\n\n\t\t\treturn tool(\n\t\t\t\tasync (toolInput: Record<string, unknown>) => {\n\t\t\t\t\ttoolCallCounts[mockTool.name]++;\n\t\t\t\t\tif (typeof mockTool.response === 'function') {\n\t\t\t\t\t\treturn mockTool.response(toolInput, toolCallCounts[mockTool.name]);\n\t\t\t\t\t}\n\t\t\t\t\treturn mockTool.response;\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\tname: mockTool.name,\n\t\t\t\t\tdescription: mockTool.description,\n\t\t\t\t\tschema:\n\t\t\t\t\t\tmockTool.schema instanceof z.ZodObject\n\t\t\t\t\t\t\t? mockTool.schema\n\t\t\t\t\t\t\t: z.object(\n\t\t\t\t\t\t\t\t\tObject.fromEntries(\n\t\t\t\t\t\t\t\t\t\tObject.entries(mockTool.schema).map(([key, val]) => {\n\t\t\t\t\t\t\t\t\t\t\tif (typeof val === 'string') return [key, z.string().describe(val)];\n\t\t\t\t\t\t\t\t\t\t\tif (typeof val === 'number') return [key, z.number().describe(String(val))];\n\t\t\t\t\t\t\t\t\t\t\treturn [key, z.any()];\n\t\t\t\t\t\t\t\t\t\t}),\n\t\t\t\t\t\t\t\t\t),\n\t\t\t\t\t\t\t\t),\n\t\t\t\t},\n\t\t\t);\n\t\t});\n\n\t\tconst boundModel = langchainTools.length > 0 ? model.bindTools!(langchainTools) : model;\n\n\t\tconst messages: BaseMessage[] = [];\n\n\t\tif (inputs.systemPrompt) {\n\t\t\tmessages.push(new SystemMessage(inputs.systemPrompt));\n\t\t}\n\n\t\t// Convert and push all messages (history + final human)\n\t\tmessages.push(...convertToLangchainMessages(inputs.messages));\n\n\t\tconst stopTools = inputs.executionMode?.type === 'stop-after-tool' ? inputs.executionMode.tools : [];\n\t\tconst stopCount =\n\t\t\tinputs.executionMode?.type === 'stop-after-tool' ? (inputs.executionMode.count ?? 1) : 1;\n\t\tconst singleTurn = inputs.executionMode?.type === 'single-turn';\n\t\tlet cumulativeHits = 0;\n\n\t\t// Agentic loop: keep calling model until it stops making tool calls\n\t\tlet loopCount = 0;\n\t\twhile (loopCount < MAX_AGENT_LOOPS) {\n\t\t\tloopCount++;\n\n\t\t\tconst response = await boundModel.invoke(messages);\n\t\t\tmessages.push(stripReasoningBlocks(response as AIMessage) as BaseMessage);\n\n\t\t\tconst aiMessage = response as AIMessage;\n\t\t\tif (!aiMessage.tool_calls || aiMessage.tool_calls.length === 0) {\n\t\t\t\tbreak;\n\t\t\t}\n\n\t\t\t// Execute tool calls and add results\n\t\t\tlet shouldStop = false;\n\t\t\tfor (const tc of aiMessage.tool_calls) {\n\t\t\t\tconst mockTool = langchainTools.find((t) => t.name === tc.name);\n\t\t\t\tif (mockTool) {\n\t\t\t\t\tconst result = await mockTool.invoke(tc.args);\n\t\t\t\t\tmessages.push(\n\t\t\t\t\t\tnew ToolMessage({\n\t\t\t\t\t\t\tcontent: typeof result === 'string' ? result : JSON.stringify(result),\n\t\t\t\t\t\t\ttool_call_id: tc.id!,\n\t\t\t\t\t\t\tname: tc.name,\n\t\t\t\t\t\t}),\n\t\t\t\t\t);\n\t\t\t\t} else {\n\t\t\t\t\tmessages.push(\n\t\t\t\t\t\tnew ToolMessage({\n\t\t\t\t\t\t\tcontent: `Tool \"${tc.name}\" not found`,\n\t\t\t\t\t\t\ttool_call_id: tc.id!,\n\t\t\t\t\t\t\tname: tc.name,\n\t\t\t\t\t\t}),\n\t\t\t\t\t);\n\t\t\t\t}\n\n\t\t\t\tif (stopTools.includes(tc.name)) {\n\t\t\t\t\tcumulativeHits++;\n\t\t\t\t\tif (cumulativeHits >= stopCount) {\n\t\t\t\t\t\tshouldStop = true;\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tif (singleTurn || shouldStop) {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\n\t\treturn { messages };\n\t};\n}\n\n// ── Agent-based target ──────────────────────────────────────────────\n\n/**\n * Converts an `AgentResult` (from `Agent.run()`) into LangChain `BaseMessage[]`\n * so existing evaluators (trajectory match, no-tool-calls, response-content, language) work unchanged.\n *\n * Consecutive `tool_call` content blocks are grouped into a single `AIMessage` with `tool_calls`,\n * followed by one `ToolMessage` per call.\n */\nexport function agentResultToMessages(inputMessages: AgentMessage[], result: AgentResult): BaseMessage[] {\n\t// Include input messages for trajectory context\n\tconst messages: BaseMessage[] = convertToLangchainMessages(inputMessages);\n\n\t// Group content blocks into BaseMessages\n\tlet pendingToolCalls: { id: string; name: string; args: Record<string, unknown>; output: string }[] = [];\n\n\tfor (const block of result.content) {\n\t\tif (block.type === 'tool_call') {\n\t\t\tconst tc = block as ToolCallContentBlock;\n\t\t\tpendingToolCalls.push({\n\t\t\t\tid: tc.toolCallId,\n\t\t\t\tname: tc.name,\n\t\t\t\targs: tc.input ? JSON.parse(tc.input) : {},\n\t\t\t\toutput: tc.output,\n\t\t\t});\n\t\t} else if (block.type === 'text') {\n\t\t\t// Flush any pending tool calls before the text block\n\t\t\tif (pendingToolCalls.length > 0) {\n\t\t\t\tmessages.push(\n\t\t\t\t\tnew AIMessage({\n\t\t\t\t\t\tcontent: '',\n\t\t\t\t\t\ttool_calls: pendingToolCalls.map((tc) => ({ id: tc.id, name: tc.name, args: tc.args })),\n\t\t\t\t\t}),\n\t\t\t\t);\n\t\t\t\tfor (const tc of pendingToolCalls) {\n\t\t\t\t\tmessages.push(new ToolMessage({ content: tc.output, tool_call_id: tc.id, name: tc.name }));\n\t\t\t\t}\n\t\t\t\tpendingToolCalls = [];\n\t\t\t}\n\t\t\tmessages.push(new AIMessage(block.output));\n\t\t}\n\t}\n\n\t// Flush remaining tool calls (agent ended mid-tool-use, unlikely but safe)\n\tif (pendingToolCalls.length > 0) {\n\t\tmessages.push(\n\t\t\tnew AIMessage({\n\t\t\t\tcontent: '',\n\t\t\t\ttool_calls: pendingToolCalls.map((tc) => ({ id: tc.id, name: tc.name, args: tc.args })),\n\t\t\t}),\n\t\t);\n\t\tfor (const tc of pendingToolCalls) {\n\t\t\tmessages.push(new ToolMessage({ content: tc.output, tool_call_id: tc.id, name: tc.name }));\n\t\t}\n\t}\n\n\treturn messages;\n}\n\n/**\n * Converts eval `Record<string, ToolDef>` into `ToolDefinition[]` with mock `exec` functions,\n * suitable for passing to an `AgentFactory.createAgent()` call.\n */\nexport function toolDefsToDefinitions(defs: Record<string, ToolDef>): ToolDefinition[] {\n\tconst callCounts: Record<string, number> = {};\n\n\treturn Object.entries(defs).map(([name, def]) => {\n\t\tcallCounts[name] = 0;\n\n\t\treturn {\n\t\t\tname,\n\t\t\ttoolKit: 'eval-mock',\n\t\t\tdescription: def.description,\n\t\t\tinputSchema:\n\t\t\t\tdef.schema instanceof z.ZodObject\n\t\t\t\t\t? def.schema\n\t\t\t\t\t: z.object(\n\t\t\t\t\t\t\tObject.fromEntries(\n\t\t\t\t\t\t\t\tObject.entries(def.schema ?? {}).map(([key, val]) => {\n\t\t\t\t\t\t\t\t\tif (typeof val === 'string') return [key, z.string().describe(val)];\n\t\t\t\t\t\t\t\t\treturn [key, z.any()];\n\t\t\t\t\t\t\t\t}),\n\t\t\t\t\t\t\t),\n\t\t\t\t\t\t),\n\t\t\texec: async (input: Record<string, unknown>) => {\n\t\t\t\tcallCounts[name]++;\n\t\t\t\tif (typeof def.response === 'function') {\n\t\t\t\t\treturn (def.response as (input: Record<string, unknown>, callCount: number) => string)(\n\t\t\t\t\t\tinput,\n\t\t\t\t\t\tcallCounts[name],\n\t\t\t\t\t);\n\t\t\t\t}\n\t\t\t\treturn typeof def.response === 'string' ? def.response : JSON.stringify(def.response);\n\t\t\t},\n\t\t} satisfies ToolDefinition;\n\t});\n}\n\n/** Tracks tool calls captured by wrapped tool exec functions. */\ninterface TrackedToolCall {\n\tname: string;\n\tinput: Record<string, unknown>;\n\toutput: string;\n\ttoolCallId: string;\n}\n\n/**\n * Wraps tool definitions to:\n * 1. Track every tool call (name, input, output) in the `tracker` array.\n * 2. Trigger `abortController.abort()` when a stop condition is met.\n *\n * For `single-turn`, every tool triggers abort after execution.\n * For `stop-after-tool`, only the listed tools trigger abort. The optional\n * `count` (default 1) controls how many cumulative matching calls are\n * allowed before the abort fires.\n */\nfunction wrapToolDefsForExecution(\n\tdefs: ToolDefinition[],\n\ttracker: TrackedToolCall[],\n\tabortController: AbortController,\n\texecutionMode: ExecutionMode,\n): ToolDefinition[] {\n\tconst stopTools = executionMode.type === 'stop-after-tool' ? executionMode.tools : [];\n\tconst stopCount = executionMode.type === 'stop-after-tool' ? (executionMode.count ?? 1) : 1;\n\tlet cumulativeHits = 0;\n\n\treturn defs.map((def) => ({\n\t\t...def,\n\t\texec: async (input: Record<string, unknown>) => {\n\t\t\tconst result = await def.exec(input);\n\t\t\tconst output = typeof result === 'string' ? result : JSON.stringify(result);\n\n\t\t\ttracker.push({\n\t\t\t\tname: def.name,\n\t\t\t\tinput,\n\t\t\t\toutput,\n\t\t\t\ttoolCallId: `eval_tc_${tracker.length}`,\n\t\t\t});\n\n\t\t\t// Abort on: single-turn (any tool), or stop-after-tool (cumulative match count reached)\n\t\t\tif (executionMode.type === 'single-turn') {\n\t\t\t\tabortController.abort();\n\t\t\t} else if (stopTools.includes(def.name)) {\n\t\t\t\tcumulativeHits++;\n\t\t\t\tif (cumulativeHits >= stopCount) {\n\t\t\t\t\tabortController.abort();\n\t\t\t\t}\n\t\t\t}\n\n\t\t\treturn result;\n\t\t},\n\t}));\n}\n\n/**\n * Builds a BaseMessage[] trajectory from tracked tool calls.\n * Used when the agent is aborted mid-execution and `agent.run()` doesn't\n * return a result.\n */\nfunction buildTrajectoryFromTrackedCalls(\n\tinputMessages: AgentMessage[],\n\ttrackedCalls: TrackedToolCall[],\n): BaseMessage[] {\n\tconst messages: BaseMessage[] = convertToLangchainMessages(inputMessages);\n\n\tif (trackedCalls.length > 0) {\n\t\t// Build an AIMessage with tool_calls, followed by ToolMessages\n\t\tmessages.push(\n\t\t\tnew AIMessage({\n\t\t\t\tcontent: '',\n\t\t\t\ttool_calls: trackedCalls.map((tc) => ({\n\t\t\t\t\tid: tc.toolCallId,\n\t\t\t\t\tname: tc.name,\n\t\t\t\t\targs: tc.input,\n\t\t\t\t})),\n\t\t\t}),\n\t\t);\n\t\tfor (const tc of trackedCalls) {\n\t\t\tmessages.push(\n\t\t\t\tnew ToolMessage({\n\t\t\t\t\tcontent: tc.output,\n\t\t\t\t\ttool_call_id: tc.toolCallId,\n\t\t\t\t\tname: tc.name,\n\t\t\t\t}),\n\t\t\t);\n\t\t}\n\t}\n\n\treturn messages;\n}\n\n/**\n * Runs a real `Agent` as the eval target. Creates a fresh agent per invocation via the factory,\n * sends human messages, and converts the `AgentResult` to `{ messages: BaseMessage[] }`.\n *\n * When an `executionMode` is provided, tool definitions are wrapped to abort\n * the agent once the stop condition is met. Tool calls are tracked so the\n * trajectory can be reconstructed even if the agent is aborted mid-execution.\n */\nexport async function runAgentTarget(\n\tcreateTarget: CreateTargetFn,\n\tmodel: string,\n\tevalMessages: AgentMessage[],\n\textraToolDefs: Record<string, ToolDef>,\n\texecutionMode?: ExecutionMode,\n): Promise<{ messages: BaseMessage[] }> {\n\tconst extraTools = Object.keys(extraToolDefs).length > 0 ? toolDefsToDefinitions(extraToolDefs) : [];\n\tconst tracker: TrackedToolCall[] = [];\n\tlet abortController: AbortController | undefined;\n\n\tif (executionMode) {\n\t\tabortController = new AbortController();\n\t}\n\n\t// When an executionMode is active, pass a wrapTools hook to createTarget\n\t// so that ALL tools the agent receives (built-in + extra) are wrapped\n\t// with tracking and stop detection. The hook must be applied by the\n\t// createTarget implementation to the final merged tool array.\n\tconst wrapTools =\n\t\texecutionMode && abortController\n\t\t\t? (tools: ToolDefinition[]) => wrapToolDefsForExecution(tools, tracker, abortController, executionMode)\n\t\t\t: undefined;\n\n\tconst agent = await createTarget(model, extraTools, wrapTools);\n\tconst signal = abortController?.signal;\n\n\ttry {\n\t\t// Race the agent against an abort-triggered promise.\n\t\t// LangGraph swallows AbortErrors thrown inside tool nodes (converting\n\t\t// them to tool error messages), so relying on signal propagation alone\n\t\t// is not sufficient. Instead we race against a promise that rejects as\n\t\t// soon as the abort fires, guaranteeing we break out of the agent loop.\n\t\tconst agentPromise = agent.run({\n\t\t\tthreadId: `eval_${Date.now()}_${Math.random().toString(36).slice(2)}`,\n\t\t\tmessages: evalMessages,\n\t\t\tsignal,\n\t\t});\n\n\t\tif (abortController) {\n\t\t\tconst abortPromise = new Promise<never>((_, reject) => {\n\t\t\t\tconst onAbort = () => reject(new DOMException('Eval execution aborted', 'AbortError'));\n\t\t\t\tif (signal!.aborted) {\n\t\t\t\t\tonAbort();\n\t\t\t\t\treturn;\n\t\t\t\t}\n\t\t\t\tsignal!.addEventListener('abort', onAbort, { once: true });\n\t\t\t});\n\n\t\t\tconst result = await Promise.race([agentPromise, abortPromise]);\n\t\t\treturn { messages: agentResultToMessages(evalMessages, result) };\n\t\t}\n\n\t\tconst result = await agentPromise;\n\t\treturn { messages: agentResultToMessages(evalMessages, result) };\n\t} catch (error: any) {\n\t\t// AbortError is expected when execution mode triggers early stop.\n\t\t// Build the trajectory from tracked tool calls captured before the abort.\n\t\tif (error.name === 'AbortError' || signal?.aborted) {\n\t\t\treturn { messages: buildTrajectoryFromTrackedCalls(evalMessages, tracker) };\n\t\t}\n\t\tthrow error;\n\t}\n}\n","import { BaseLanguageModel } from '@langchain/core/language_models/base';\nimport { ChatAnthropic } from '@langchain/anthropic';\nimport { AzureChatOpenAI, ChatOpenAI } from '@langchain/openai';\nimport { ReasoningEffort } from 'openai/resources';\n\nexport type LangchainOpenAIConfig = {\n\tapiKey: string;\n};\n\nexport type AzureModelProvider = 'openai' | 'anthropic';\n\nexport type LangchainAzureResourceConfig = {\n\tapiKey: string;\n\tmodels: {\n\t\tmodel: string;\n\t\tprovider: AzureModelProvider;\n\t\tendpoint: string;\n\t\tapiVersion: string;\n\t\tdeploymentName: string;\n\t}[];\n};\n\nexport type ResourceName = string;\n\nexport type LangchainModelConfig = {\n\topenai?: Record<string, LangchainOpenAIConfig>;\n\tazure?: Record<ResourceName, LangchainAzureResourceConfig>;\n};\n\nexport class LangchainModelResolver {\n\tconstructor(private config: LangchainModelConfig) {}\n\n\tresolve(modelString: string, tags?: string[], reasoningEffort?: ReasoningEffort): BaseLanguageModel {\n\t\tconst parts = modelString.split(':');\n\n\t\tif (parts.length === 1) {\n\t\t\tconst fullModelString = this.resolveFullModelString(modelString);\n\t\t\treturn this.resolve(fullModelString, tags, reasoningEffort);\n\t\t}\n\n\t\tif (parts.length === 2) {\n\t\t\tconst [provider, modelName] = parts;\n\t\t\treturn this.resolveByProvider(provider, modelName, modelName, tags, reasoningEffort);\n\t\t}\n\n\t\tif (parts.length === 3) {\n\t\t\tconst [provider, configName, modelName] = parts;\n\t\t\treturn this.resolveByProvider(provider, configName, modelName, tags, reasoningEffort);\n\t\t}\n\n\t\tthrow new Error(\n\t\t\t'Model string must follow format \"modelName\", \"provider:modelName\", or \"provider:configName:modelName\"',\n\t\t);\n\t}\n\n\tprivate resolveFullModelString(modelName: string): string {\n\t\tfor (const [provider, resources] of Object.entries(this.config)) {\n\t\t\tif (provider === 'openai') {\n\t\t\t\tif (modelName in (resources as Record<string, unknown>)) {\n\t\t\t\t\treturn `openai:${modelName}`;\n\t\t\t\t}\n\t\t\t} else if (provider === 'azure') {\n\t\t\t\tfor (const [resource, config] of Object.entries(\n\t\t\t\t\tresources as Record<string, { models: { model: string }[] }>,\n\t\t\t\t)) {\n\t\t\t\t\tif (config.models.some((m) => m.model === modelName)) {\n\t\t\t\t\t\treturn `azure:${resource}:${modelName}`;\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tthrow new Error(`Model \"${modelName}\" not found in model config`);\n\t}\n\n\tprivate resolveByProvider(\n\t\tprovider: string,\n\t\tconfigName: string,\n\t\tmodelName: string,\n\t\ttags?: string[],\n\t\treasoningEffort?: ReasoningEffort,\n\t): BaseLanguageModel {\n\t\tswitch (provider) {\n\t\t\tcase 'openai':\n\t\t\t\treturn this.resolveOpenAI(configName, modelName, tags, reasoningEffort);\n\t\t\tcase 'azure':\n\t\t\t\treturn this.resolveAzure(configName, modelName, tags, reasoningEffort);\n\t\t\tdefault:\n\t\t\t\tthrow new Error(`Unsupported model provider: ${provider}`);\n\t\t}\n\t}\n\n\tprivate resolveOpenAI(\n\t\tconfigName: string,\n\t\tmodelName: string,\n\t\ttags?: string[],\n\t\treasoningEffort?: ReasoningEffort,\n\t): ChatOpenAI {\n\t\tconst providerConfig = this.config.openai?.[configName];\n\t\tif (!providerConfig) {\n\t\t\tthrow new Error(`Configuration \"${configName}\" for provider \"openai\" is missing`);\n\t\t}\n\n\t\treturn new ChatOpenAI({\n\t\t\tapiKey: providerConfig.apiKey,\n\t\t\tmodelName: modelName,\n\t\t\ttags: tags,\n\t\t\t...(reasoningEffort && {\n\t\t\t\treasoning: {\n\t\t\t\t\teffort: reasoningEffort,\n\t\t\t\t\tsummary: 'auto',\n\t\t\t\t},\n\t\t\t\tuseResponsesApi: true,\n\t\t\t}),\n\t\t});\n\t}\n\n\tprivate resolveAzure(\n\t\tresourceName: string,\n\t\tmodelName: string,\n\t\ttags?: string[],\n\t\treasoningEffort?: ReasoningEffort,\n\t): BaseLanguageModel {\n\t\tconst resource = this.config.azure?.[resourceName];\n\t\tif (!resource) {\n\t\t\tthrow new Error(`Resource \"${resourceName}\" for provider \"azure\" is missing`);\n\t\t}\n\n\t\tconst modelEntry = resource.models.find((m) => m.model === modelName);\n\t\tif (!modelEntry) {\n\t\t\tthrow new Error(`Model \"${modelName}\" not found in Azure resource \"${resourceName}\"`);\n\t\t}\n\n\t\tswitch (modelEntry.provider) {\n\t\t\tcase 'anthropic':\n\t\t\t\treturn this.resolveAzureAnthropic(resource, modelEntry, tags, reasoningEffort);\n\t\t\tcase 'openai':\n\t\t\t\treturn this.resolveAzureOpenAI(resource, modelEntry, tags, reasoningEffort);\n\t\t}\n\t}\n\n\tprivate resolveAzureOpenAI(\n\t\tresource: LangchainAzureResourceConfig,\n\t\tmodelEntry: LangchainAzureResourceConfig['models'][number],\n\t\ttags?: string[],\n\t\treasoningEffort?: ReasoningEffort,\n\t): AzureChatOpenAI {\n\t\t/**\n\t\t * OpenAI reasoning models require the Responses API which AzureChatOpenAI\n\t\t * does not support. We rewrite the endpoint to the Azure Responses API path.\n\t\t */\n\t\tconst endpoint = reasoningEffort\n\t\t\t? `${modelEntry.endpoint.replace(/\\/$/, '')}/openai/responses?api-version=${modelEntry.apiVersion}`\n\t\t\t: modelEntry.endpoint;\n\n\t\treturn new AzureChatOpenAI({\n\t\t\tmodel: modelEntry.model,\n\t\t\tazureOpenAIApiKey: resource.apiKey,\n\t\t\tazureOpenAIEndpoint: endpoint,\n\t\t\tazureOpenAIApiDeploymentName: modelEntry.deploymentName,\n\t\t\tazureOpenAIApiVersion: modelEntry.apiVersion,\n\t\t\ttags: tags,\n\t\t\t...(reasoningEffort && {\n\t\t\t\treasoning: {\n\t\t\t\t\teffort: reasoningEffort,\n\t\t\t\t\tsummary: 'auto',\n\t\t\t\t},\n\t\t\t}),\n\t\t});\n\t}\n\n\tprivate static readonly THINKING_BUDGET: Record<string, number> = {\n\t\tminimal: 1024,\n\t\tlow: 4096,\n\t\tmedium: 10000,\n\t\thigh: 16000,\n\t\txhigh: 32000,\n\t};\n\n\tprivate resolveAzureAnthropic(\n\t\tresource: LangchainAzureResourceConfig,\n\t\tmodelEntry: LangchainAzureResourceConfig['models'][number],\n\t\ttags?: string[],\n\t\treasoningEffort?: ReasoningEffort,\n\t): ChatAnthropic {\n\t\tconst budgetTokens = reasoningEffort ? LangchainModelResolver.THINKING_BUDGET[reasoningEffort] : undefined;\n\n\t\treturn new ChatAnthropic({\n\t\t\tmodel: modelEntry.model,\n\t\t\tapiKey: resource.apiKey,\n\t\t\tclientOptions: { baseURL: modelEntry.endpoint },\n\t\t\ttags: tags,\n\t\t\t...(budgetTokens && {\n\t\t\t\tmaxTokens: budgetTokens * 2,\n\t\t\t\tthinking: {\n\t\t\t\t\ttype: 'enabled',\n\t\t\t\t\tbudget_tokens: budgetTokens,\n\t\t\t\t},\n\t\t\t}),\n\t\t});\n\t}\n}\n","import { Message } from '@core/agent.interface';\nimport { AIMessage, BaseMessage, HumanMessage, ToolMessage } from 'langchain';\n\nexport function convertToLangchainMessages(messages: Message[]): BaseMessage[] {\n\tconst result: BaseMessage[] = [];\n\tlet tcIdx = 0;\n\tlet pendingToolCallIds: string[] = [];\n\n\tfor (const msg of messages) {\n\t\tif (msg.role === 'human') {\n\t\t\tresult.push(\n\t\t\t\tnew HumanMessage({\n\t\t\t\t\tcontent: msg.content.map((c) => {\n\t\t\t\t\t\tif (c.type === 'image') {\n\t\t\t\t\t\t\treturn { type: 'image_url', image_url: { url: c.url } };\n\t\t\t\t\t\t}\n\t\t\t\t\t\treturn c;\n\t\t\t\t\t}) as any,\n\t\t\t\t}),\n\t\t\t);\n\t\t} else if (msg.role === 'ai') {\n\t\t\tif (msg.toolCalls && msg.toolCalls.length > 0) {\n\t\t\t\tpendingToolCallIds = msg.toolCalls.map(() => `tc_${++tcIdx}`);\n\t\t\t\tresult.push(\n\t\t\t\t\tnew AIMessage({\n\t\t\t\t\t\tcontent: msg.content,\n\t\t\t\t\t\ttool_calls: msg.toolCalls.map((tc, i) => ({\n\t\t\t\t\t\t\tid: pendingToolCallIds[i],\n\t\t\t\t\t\t\tname: tc.name,\n\t\t\t\t\t\t\targs: tc.input ? JSON.parse(tc.input) : {},\n\t\t\t\t\t\t})),\n\t\t\t\t\t}),\n\t\t\t\t);\n\t\t\t} else {\n\t\t\t\tresult.push(new AIMessage(msg.content));\n\t\t\t}\n\t\t} else if (msg.role === 'tool') {\n\t\t\tconst toolCallId = pendingToolCallIds.shift();\n\t\t\tif (!toolCallId)\n\t\t\t\tthrow new Error(`ToolMessage for \"${msg.name}\" without a preceding AiMessage with toolCalls`);\n\t\t\tresult.push(\n\t\t\t\tnew ToolMessage({\n\t\t\t\t\tcontent: msg.output,\n\t\t\t\t\ttool_call_id: toolCallId,\n\t\t\t\t\tname: msg.name,\n\t\t\t\t}),\n\t\t\t);\n\t\t}\n\t}\n\n\treturn result;\n}\n","import * as ls from 'langsmith/vitest';\nimport { createTrajectoryMatchEvaluator } from 'agentevals';\nimport { createLanguageEvaluator } from './evaluators/language';\nimport { createLlmJudgeEvaluator } from './evaluators/llm-judge';\nimport { createResponseContentEvaluator } from './evaluators/response-content';\nimport { createNoToolCallsEvaluator } from './evaluators/no-tool-calls';\nimport { createAnyToolCalledEvaluator } from './evaluators/any-tool-called';\nimport { createToolInputEvaluator, type ToolCallExpectation } from './evaluators/tool-input';\nimport { getEvalConfig } from './config';\n\n// ── Types ────────────────────────────────────────────────────────────\n\ntype EvaluatorFn = (args: { outputs: Record<string, any>; referenceOutputs: Record<string, any> }) => Promise<any>;\n\ninterface ResolvedExpectation {\n\tevaluator: EvaluatorFn;\n\treferenceOutputs: Record<string, unknown>;\n}\n\n/** A factory that receives test context and returns an evaluator + its referenceOutputs. */\nexport type Expectation = (ctx: { message: string }) => ResolvedExpectation;\n\n// ── Helpers ──────────────────────────────────────────────────────────\n\nfunction withTrajectoryGuard(evaluator: any, key: string): EvaluatorFn {\n\treturn async ({ outputs, referenceOutputs }) => {\n\t\tif (!referenceOutputs?.referenceTrajectory) {\n\t\t\treturn { key, score: true, comment: 'No referenceTrajectory specified, skipping' };\n\t\t}\n\t\treturn evaluator({ outputs, referenceOutputs: referenceOutputs.referenceTrajectory });\n\t};\n}\n\nfunction buildTrajectory(message: string, toolNames: string[]): Record<string, unknown>[] {\n\tconst trajectory: Record<string, unknown>[] = [];\n\tlet tcIdx = 0;\n\n\ttrajectory.push({ role: 'user', content: message });\n\n\tfor (const name of toolNames) {\n\t\tconst id = `tc${++tcIdx}`;\n\t\ttrajectory.push({\n\t\t\trole: 'assistant',\n\t\t\tcontent: '',\n\t\t\ttool_calls: [{ function: { name, arguments: '{}' }, id, type: 'function' }],\n\t\t});\n\t\ttrajectory.push({ role: 'tool', content: '...', tool_call_id: id });\n\t}\n\n\ttrajectory.push({ role: 'assistant', content: '...' });\n\n\treturn trajectory;\n}\n\n// ── Expectation functions ────────────────────────────────────────────\n\n/** A tool name (string) or an object with a name and input validator. */\nexport type ToolExpectation = string | ToolCallExpectation;\n\n/**\n * Expect the agent to call the listed tools (superset trajectory match).\n * Empty `[]` means the agent should answer directly without calling any tools.\n *\n * Each entry can be a plain tool name or an object with:\n * - `validate` — callback that receives the tool input; at least one call must satisfy it.\n * - `times` — minimum number of times the tool must be called.\n * - Both can be combined.\n *\n * @example\n * toolsCalled([\n * 'list-documents',\n * { name: 'search-tables', validate: (input) => input.query?.includes('Q4') },\n * { name: 'list-documents', times: 2 },\n * ])\n */\nexport function toolsCalled(tools: ToolExpectation[]): Expectation {\n\tconst toolNames = tools.map((t) => (typeof t === 'string' ? t : t.name));\n\tconst validators = tools.filter((t): t is ToolCallExpectation => typeof t !== 'string');\n\n\treturn (ctx) => {\n\t\tconst trajectoryEvaluator = ls.wrapEvaluator(\n\t\t\twithTrajectoryGuard(\n\t\t\t\tcreateTrajectoryMatchEvaluator({ trajectoryMatchMode: 'superset', toolArgsMatchMode: 'ignore' }) as any,\n\t\t\t\t'trajectory_match',\n\t\t\t),\n\t\t);\n\n\t\tif (validators.length === 0) {\n\t\t\treturn {\n\t\t\t\tevaluator: trajectoryEvaluator,\n\t\t\t\treferenceOutputs: { referenceTrajectory: buildTrajectory(ctx.message, toolNames) },\n\t\t\t};\n\t\t}\n\n\t\tconst inputEvaluator = ls.wrapEvaluator(createToolInputEvaluator());\n\n\t\t// Compose both evaluators: trajectory match + tool input validation.\n\t\t// Both must pass for the overall expectation to succeed.\n\t\tconst composedEvaluator: EvaluatorFn = async ({ outputs, referenceOutputs }) => {\n\t\t\tconst trajectoryResult = await trajectoryEvaluator({ outputs, referenceOutputs });\n\t\t\tconst inputResult = await inputEvaluator({\n\t\t\t\toutputs,\n\t\t\t\treferenceOutputs: { ...referenceOutputs, toolInputExpectations: validators },\n\t\t\t});\n\n\t\t\tconst trajectoryPassed = Boolean(trajectoryResult.score);\n\t\t\tconst inputPassed = Boolean(inputResult.score);\n\n\t\t\treturn {\n\t\t\t\tkey: 'tools_called',\n\t\t\t\tscore: trajectoryPassed && inputPassed,\n\t\t\t\tcomment: [trajectoryResult.comment, inputResult.comment].filter(Boolean).join('; '),\n\t\t\t};\n\t\t};\n\n\t\treturn {\n\t\t\tevaluator: composedEvaluator,\n\t\t\treferenceOutputs: { referenceTrajectory: buildTrajectory(ctx.message, toolNames) },\n\t\t};\n\t};\n}\n\n/**\n * Use an LLM to judge the agent's final response against the given criteria.\n * Works independently — does not require `toolsCalled` or any other expectation.\n * Uses the globally configured evaluator model.\n *\n * @param criteria - Human-readable description of what the judge should evaluate.\n */\nexport function llmJudge(criteria: string): Expectation {\n\treturn () => {\n\t\tconst config = getEvalConfig();\n\t\tconst model = config.evaluatorModel;\n\t\treturn {\n\t\t\tevaluator: ls.wrapEvaluator(createLlmJudgeEvaluator(config.modelConfig, model, criteria)),\n\t\t\treferenceOutputs: {},\n\t\t};\n\t};\n}\n\n/**\n * Assert the agent made zero tool calls.\n * Optionally allow specific tools via `except` — calls to those tools\n * are permitted (but not required), while any other tool call fails.\n */\nexport function noTools(options?: { except: string[] }): Expectation {\n\treturn () => ({\n\t\tevaluator: ls.wrapEvaluator(createNoToolCallsEvaluator()),\n\t\treferenceOutputs: {\n\t\t\texpectNoToolCalls: true,\n\t\t\t...(options?.except?.length ? { exceptTools: options.except } : {}),\n\t\t},\n\t});\n}\n\n/**\n * Assert the response is in the given language (ISO 639-1 code).\n * Uses the globally configured evaluator model for language detection.\n * @param code - ISO 639-1 language code (e.g. 'en', 'tr', 'de').\n */\nexport function respondsInLanguage(code: string): Expectation {\n\treturn () => {\n\t\tconst config = getEvalConfig();\n\t\tconst model = config.evaluatorModel;\n\t\treturn {\n\t\t\tevaluator: ls.wrapEvaluator(createLanguageEvaluator(config.modelConfig, model)),\n\t\t\treferenceOutputs: { expectedLanguage: code },\n\t\t};\n\t};\n}\n\n/**\n * Assert that at least one tool call was made.\n * When `tools` is provided, at least one of those specific tools must\n * appear in the trajectory. When omitted, any tool call satisfies it.\n */\nexport function anyToolCalled(tools?: string[]): Expectation {\n\treturn () => ({\n\t\tevaluator: ls.wrapEvaluator(createAnyToolCalledEvaluator()),\n\t\treferenceOutputs: {\n\t\t\texpectAnyToolCall: true,\n\t\t\t...(tools?.length ? { anyToolsExpected: tools } : {}),\n\t\t},\n\t});\n}\n\n/** Assert the response contains all given strings. */\nexport function contains(strings: string[]): Expectation {\n\treturn () => ({\n\t\tevaluator: ls.wrapEvaluator(createResponseContentEvaluator()),\n\t\treferenceOutputs: { responseContains: strings },\n\t});\n}\n\n/** Assert the response does not contain any of the given strings. */\nexport function notContains(strings: string[]): Expectation {\n\treturn () => ({\n\t\tevaluator: ls.wrapEvaluator(createResponseContentEvaluator()),\n\t\treferenceOutputs: { responseMustNotContain: strings },\n\t});\n}\n","import { AIMessage, BaseMessage } from '@langchain/core/messages';\nimport { BaseChatModel } from '@langchain/core/language_models/chat_models';\nimport { LangchainModelResolver, type LangchainModelConfig } from '../../runtime/langchain/model-resolver';\n\n/**\n * Creates a custom evaluator that checks whether the agent's final response\n * is in the expected language. Uses a cheap LLM call for language detection.\n */\nexport function createLanguageEvaluator(modelConfig: LangchainModelConfig, model: string) {\n\tconst resolver = new LangchainModelResolver(modelConfig);\n\tconst judge = resolver.resolve(model) as BaseChatModel;\n\n\treturn async ({\n\t\toutputs,\n\t\treferenceOutputs,\n\t}: {\n\t\toutputs: Record<string, any>;\n\t\treferenceOutputs?: Record<string, any>;\n\t}) => {\n\t\tconst expectedLanguage = referenceOutputs?.expectedLanguage;\n\t\tif (!expectedLanguage) {\n\t\t\treturn { key: 'language_match', score: true, comment: 'No expected language specified, skipping' };\n\t\t}\n\n\t\t// Extract the last AI message text from the trajectory\n\t\tconst messages: BaseMessage[] = outputs.messages || [];\n\t\tconst lastAiMessage = [...messages].reverse().find((m) => m instanceof AIMessage);\n\n\t\tif (!lastAiMessage) {\n\t\t\treturn { key: 'language_match', score: false, comment: 'No AI message found in trajectory' };\n\t\t}\n\n\t\tconst responseText = typeof lastAiMessage.content === 'string' ? lastAiMessage.content : JSON.stringify(lastAiMessage.content);\n\n\t\t// Use LLM to detect the language\n\t\tconst detection = await judge.invoke([\n\t\t\t{\n\t\t\t\trole: 'system',\n\t\t\t\tcontent: 'You are a language detection tool. Respond with ONLY the ISO 639-1 language code (e.g., \"en\", \"tr\", \"de\", \"fr\") of the text provided. Nothing else.',\n\t\t\t},\n\t\t\t{\n\t\t\t\trole: 'user',\n\t\t\t\tcontent: responseText,\n\t\t\t},\n\t\t]);\n\n\t\tconst detectedLanguage = (typeof detection.content === 'string' ? detection.content : '').trim().toLowerCase();\n\n\t\tconst matches = detectedLanguage === expectedLanguage.toLowerCase();\n\n\t\treturn {\n\t\t\tkey: 'language_match',\n\t\t\tscore: matches,\n\t\t\tcomment: matches\n\t\t\t\t? `Response language matches expected: ${expectedLanguage}`\n\t\t\t\t: `Expected \"${expectedLanguage}\" but detected \"${detectedLanguage}\"`,\n\t\t};\n\t};\n}\n","import { AIMessage, BaseMessage } from '@langchain/core/messages';\nimport { createLLMAsJudge } from 'openevals';\nimport { LangchainModelResolver, type LangchainModelConfig } from '../../runtime/langchain/model-resolver';\n\nconst RESPONSE_CRITERIA_PROMPT = `You are an expert evaluator.\nAssess the following AI response based on the given criteria.\n\n<Criteria>\n{criteria}\n</Criteria>\n\n<Response>\n{outputs}\n</Response>\n\nGrade whether the response meets the criteria.`;\n\n/**\n * Creates an evaluator that uses an LLM to judge the agent's final response\n * against caller-provided criteria.\n *\n * Only the last AI message is evaluated — tool calls and intermediate\n * messages are not included.\n *\n * @param modelConfig - LangChain model configuration for credential resolution.\n * @param model - LangChain model identifier (e.g. 'openai:gpt-4o-mini').\n * @param criteria - Human-readable description of what the judge should evaluate.\n */\nexport function createLlmJudgeEvaluator(modelConfig: LangchainModelConfig, model: string, criteria: string) {\n\tconst resolver = new LangchainModelResolver(modelConfig);\n\t// Cast through `any` to work around duplicate @langchain/core type resolution\n\t// between our node_modules and openevals' dependency.\n\tconst judge = resolver.resolve(model) as any;\n\n\tconst llmJudge = createLLMAsJudge({\n\t\tprompt: RESPONSE_CRITERIA_PROMPT,\n\t\tfeedbackKey: 'llm_judge',\n\t\tjudge,\n\t\tuseReasoning: true,\n\t});\n\n\treturn async ({ outputs }: { outputs: Record<string, any>; referenceOutputs?: Record<string, any> }) => {\n\t\tconst messages: BaseMessage[] = outputs.messages || [];\n\t\tconst lastAiMessage = [...messages].reverse().find((m) => m instanceof AIMessage);\n\n\t\tif (!lastAiMessage) {\n\t\t\treturn { key: 'llm_judge', score: false, comment: 'No AI message found in trajectory' };\n\t\t}\n\n\t\tconst responseText =\n\t\t\ttypeof lastAiMessage.content === 'string' ? lastAiMessage.content : JSON.stringify(lastAiMessage.content);\n\n\t\treturn llmJudge({\n\t\t\toutputs: responseText,\n\t\t\tcriteria,\n\t\t});\n\t};\n}\n","import { isBaseMessage } from \"@langchain/core/messages\";\nimport * as openAIImports from \"@langchain/openai\";\nimport { wrapEvaluator, isInTestContext } from \"langsmith/utils/jestlike\";\nimport { traceable } from \"langsmith/traceable\";\nconst { \n// @ts-expect-error Shim for older versions of @langchain/openai\n_convertMessagesToOpenAIParams, convertMessagesToCompletionsMessageParams, } = openAIImports;\nfunction _convertMessagesShim(message) {\n if (typeof _convertMessagesToOpenAIParams === \"function\") {\n return _convertMessagesToOpenAIParams([\n message,\n ])[0];\n }\n return convertMessagesToCompletionsMessageParams({\n messages: [message],\n })[0];\n}\nexport const _convertToOpenAIMessage = (message) => {\n if (isBaseMessage(message)) {\n const converted = _convertMessagesShim(message);\n if (message.id && !converted.id) {\n converted.id = message.id;\n }\n return converted;\n }\n else {\n return message;\n }\n};\nexport const _normalizeToOpenAIMessagesList = (messages) => {\n let messagesList;\n if (!Array.isArray(messages)) {\n if (\"messages\" in messages && Array.isArray(messages.messages)) {\n messagesList = messages.messages;\n }\n else if (\"content\" in messages && \"role\" in messages) {\n messagesList = [messages];\n }\n else {\n throw new Error(`If passing messages as an object, it must contain a \"messages\" key`);\n }\n }\n else {\n messagesList = messages;\n }\n return messagesList.map(_convertToOpenAIMessage);\n};\nexport const processScore = (_, value) => {\n if (typeof value === \"object\") {\n if (value != null && \"score\" in value) {\n return [\n value.score,\n \"reasoning\" in value && typeof value.reasoning === \"string\"\n ? value.reasoning\n : undefined,\n value.metadata,\n value.sourceRunId,\n ];\n }\n else {\n throw new Error(`Expected a dictionary with a \"score\" key, but got \"${JSON.stringify(value, null, 2)}\"`);\n }\n }\n return [value];\n};\nexport async function _runEvaluator(runName, scorer, feedbackKey, extra, ls_framework) {\n return _runEvaluatorUntyped(runName, scorer, feedbackKey, extra, ls_framework, false);\n}\nexport async function _runEvaluatorUntyped(runName, scorer, feedbackKey, extra, ls_framework, returnRawOutputs) {\n const runScorer = async (params) => {\n let score = await scorer(params);\n if (returnRawOutputs) {\n return score;\n }\n let reasoning;\n if (!Array.isArray(score) && typeof score === \"object\") {\n const results = [];\n for (const [key, value] of Object.entries(score)) {\n const [keyScore, reasoning, metadata, sourceRunId] = processScore(key, \n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n value);\n const result = {\n key,\n score: keyScore,\n comment: reasoning,\n metadata,\n };\n if (sourceRunId !== undefined && typeof sourceRunId === \"string\") {\n result.sourceRunId = sourceRunId;\n }\n results.push(result);\n }\n return results;\n }\n else {\n let metadata;\n if (Array.isArray(score)) {\n metadata = score[2];\n reasoning = score[1];\n score = score[0];\n }\n return {\n key: feedbackKey,\n score,\n comment: reasoning,\n metadata,\n };\n }\n };\n if (isInTestContext()) {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n const res = await wrapEvaluator(runScorer)(extra ?? {}, {\n name: runName,\n metadata: {\n __ls_framework: ls_framework ?? \"openevals\",\n __ls_evaluator: runName,\n __ls_language: \"js\",\n },\n });\n if (returnRawOutputs) {\n // TODO: Fix LangSmith SDK types\n const rawResults = res;\n return rawResults;\n }\n return res;\n }\n else {\n const traceableRunScorer = traceable(runScorer, {\n name: runName,\n metadata: {\n __ls_framework: ls_framework ?? \"openevals\",\n __ls_evaluator: runName,\n __ls_language: \"js\",\n },\n });\n const res = await traceableRunScorer(extra ?? {});\n return res;\n }\n}\nexport function _normalizeOutputsAsString(outputs) {\n if (typeof outputs === \"string\") {\n return outputs;\n }\n else if (outputs !== null && typeof outputs === \"object\") {\n if (\"content\" in outputs) {\n return outputs.content;\n }\n else if (\"messages\" in outputs &&\n Array.isArray(outputs.messages) &&\n outputs.messages.length > 0) {\n return outputs.messages[outputs.messages.length - 1].content;\n }\n else {\n throw new Error(`Expected a string, dictionary with a 'content' key or a 'messages' key with a list of messages, but got ${JSON.stringify(outputs, null, 2)}`);\n }\n }\n else {\n throw new Error(`Expected string or object, got ${typeof outputs}`);\n }\n}\n","import { getCurrentRunTree } from \"langsmith/traceable\";\nimport { _createLLMAsJudgeScorer } from \"../llm.js\";\nimport { _runEvaluator } from \"../utils.js\";\nconst SYSTEM_PROMPT = `You are an LLM that evaluates the accuracy of structured outputs.\nMake sure to evaluate each key the users ask you to evaluate separately. Assign the score\nfor each key based on its own criteria - DO NOT convolute the scores of different keys.\nAlso only evaluate the output vs. the reference output based on the criteria. DO NOT EVALUATE\nBASED ON ANYTHING ELSE. If the output does not match the reference output in some way that\nis not mentioned in the criteria that is not a problem and you should ignore those discrepancies.\nOnly focus on finding discrepancies based on the criteria. If there is a None value being compared\nto a non-None value, you should assign a score of 0.`;\nconst USER_PROMPT = `Please evaluate the accuracy of the following output keys according to these criteria:\n{rubric}\n<Outputs>\n{outputs}\n</Outputs>\n<Expected Outputs>\n{reference_outputs}\n</Expected Outputs>`;\nfunction _prepareParameters({ outputs, referenceOutputs, rubric, excludeKeys, useReasoning, listMatchMode = \"same_elements\", }) {\n const jsonSchema = {\n type: \"object\",\n title: \"json_match\",\n description: \"Scores measuring the accuracy of structured outputs\",\n properties: {},\n required: [],\n additionalProperties: false,\n };\n const scores = {};\n let formattedRubric = \"\";\n let useListReducer = false;\n let processedOutputs = {};\n let processedReferenceOutputs = {};\n if (Array.isArray(outputs)) {\n useListReducer = true;\n if (!Array.isArray(referenceOutputs)) {\n throw new Error(\"If outputs is a list, referenceOutputs must also be a list\");\n }\n const outputsToUse = {};\n const referenceOutputsToUse = {};\n if (listMatchMode === \"ordered\") {\n outputs.forEach((output, i) => {\n Object.entries(output).forEach(([key, value]) => {\n outputsToUse[`${key}_${i}`] = value;\n });\n });\n referenceOutputs.forEach((refOutput, i) => {\n Object.entries(refOutput).forEach(([key, value]) => {\n referenceOutputsToUse[`${key}_${i}`] = value;\n });\n });\n }\n else if (listMatchMode === \"superset\") {\n const availableOutputs = Array.from(Array(outputs.length).keys());\n const matchedReferences = new Set();\n referenceOutputs.forEach((refItem, i) => {\n let bestMatchScore = -1;\n let bestMatchIdx;\n availableOutputs.forEach((outIdx) => {\n const outputItem = outputs[outIdx];\n let matchScore = 0;\n Object.keys(refItem).forEach((key) => {\n if (key in outputItem &&\n !excludeKeys.includes(key) &&\n !(key in rubric)) {\n matchScore += Number(refItem[key] === outputItem[key]);\n }\n });\n if (matchScore > bestMatchScore) {\n bestMatchScore = matchScore;\n bestMatchIdx = outIdx;\n }\n });\n if (bestMatchIdx !== undefined) {\n Object.entries(outputs[bestMatchIdx]).forEach(([key, value]) => {\n outputsToUse[`${key}_${i}`] = value;\n });\n Object.entries(refItem).forEach(([key, value]) => {\n referenceOutputsToUse[`${key}_${i}`] = value;\n });\n availableOutputs.splice(availableOutputs.indexOf(bestMatchIdx), 1);\n matchedReferences.add(i);\n }\n else {\n Object.entries(refItem).forEach(([key, value]) => {\n referenceOutputsToUse[`${key}_${i}`] = value;\n });\n }\n });\n }\n else {\n const availableReferences = Array.from(Array(referenceOutputs.length).keys());\n const matchedOutputs = new Set();\n outputs.forEach((outputItem, i) => {\n let bestMatchIdx;\n let bestMatchScore = -1;\n availableReferences.forEach((refIdx) => {\n const refItem = referenceOutputs[refIdx];\n let matchScore = 0;\n Object.keys(outputItem).forEach((key) => {\n if (key in refItem &&\n !excludeKeys.includes(key) &&\n !(key in rubric)) {\n matchScore += Number(outputItem[key] === refItem[key]);\n }\n });\n if (matchScore > bestMatchScore) {\n bestMatchScore = matchScore;\n bestMatchIdx = refIdx;\n }\n });\n if (bestMatchIdx !== undefined) {\n Object.entries(outputItem).forEach(([key, value]) => {\n outputsToUse[`${key}_${i}`] = value;\n });\n Object.entries(referenceOutputs[bestMatchIdx]).forEach(([key, value]) => {\n referenceOutputsToUse[`${key}_${i}`] = value;\n });\n availableReferences.splice(availableReferences.indexOf(bestMatchIdx), 1);\n matchedOutputs.add(i);\n }\n else {\n Object.entries(outputItem).forEach(([key, value]) => {\n outputsToUse[`${key}_${i}`] = value;\n });\n }\n });\n if (listMatchMode === \"same_elements\") {\n availableReferences.forEach((refIdx, index) => {\n const refItem = referenceOutputs[refIdx];\n const dummyIdx = outputs.length + index;\n Object.entries(refItem).forEach(([key, value]) => {\n referenceOutputsToUse[`${key}_${dummyIdx}`] = value;\n });\n });\n }\n }\n processedOutputs = outputsToUse;\n processedReferenceOutputs = referenceOutputsToUse;\n }\n else {\n processedOutputs = outputs;\n processedReferenceOutputs = referenceOutputs;\n }\n Object.entries(processedOutputs).forEach(([rawKey, value]) => {\n const key = useListReducer\n ? rawKey.substring(0, rawKey.lastIndexOf(\"_\"))\n : rawKey;\n if (excludeKeys.includes(key)) {\n return;\n }\n if (!(rawKey in processedReferenceOutputs)) {\n scores[rawKey] = 0;\n return;\n }\n if (!(key in rubric) && processedReferenceOutputs[rawKey] === value) {\n scores[rawKey] = 1;\n }\n else if (!(key in rubric)) {\n scores[rawKey] = 0;\n }\n else {\n const keyCriteria = rubric[key];\n formattedRubric += `Key: ${key}, Criteria: ${keyCriteria}\\n`;\n if (!useReasoning) {\n jsonSchema.properties[rawKey] = {\n type: \"boolean\",\n description: `Does the output for key ${key}, follow the criteria? ${keyCriteria}`,\n };\n jsonSchema.required.push(rawKey);\n }\n else {\n jsonSchema.properties[rawKey] = {\n type: \"object\",\n properties: {\n reasoning: {\n type: \"string\",\n description: `Reasoning for the score you assigned to key ${key}`,\n },\n score: {\n type: \"boolean\",\n description: `Does the output for key ${key}, follow the criteria? ${keyCriteria}`,\n },\n },\n required: [\"score\", \"reasoning\"],\n additionalProperties: false,\n };\n jsonSchema.required.push(rawKey);\n }\n }\n });\n Object.entries(processedReferenceOutputs).forEach(([rawKey, _]) => {\n const key = useListReducer\n ? rawKey.substring(0, rawKey.lastIndexOf(\"_\"))\n : rawKey;\n if (!excludeKeys.includes(key) && !(rawKey in processedOutputs)) {\n scores[rawKey] = 0;\n }\n });\n return {\n processedOutputs,\n processedReferenceOutputs,\n jsonSchema,\n scores,\n formattedRubric,\n useListReducer,\n };\n}\nfunction _aggregateResults({ scores, useListReducer, aggregator, listAggregator, scoreKey, }) {\n if (useListReducer) {\n const indexGroupedScores = {};\n Object.entries(scores).forEach(([k, v]) => {\n const index = k.substring(k.lastIndexOf(\"_\") + 1);\n const baseKey = k.substring(0, k.lastIndexOf(\"_\"));\n if (!indexGroupedScores[index]) {\n indexGroupedScores[index] = {};\n }\n indexGroupedScores[index][baseKey] = v;\n });\n let indexScores = {};\n if (aggregator === \"average\") {\n Object.entries(indexGroupedScores).forEach(([index, group]) => {\n if (Object.keys(group).length) {\n const total = Object.values(group).reduce((sum, v) => sum + (typeof v === \"object\" ? Number(v.score) : Number(v)), 0);\n indexScores[index] = total / Object.keys(group).length;\n }\n });\n }\n else if (aggregator === \"all\") {\n Object.entries(indexGroupedScores).forEach(([index, group]) => {\n if (Object.keys(group).length) {\n const hasNonOne = Object.values(group).some((v) => (typeof v === \"object\" ? Number(v.score) : Number(v)) !== 1);\n indexScores[index] = hasNonOne ? 0 : 1;\n }\n });\n }\n else {\n indexScores = indexGroupedScores;\n }\n if (listAggregator === \"average\") {\n if (Object.values(indexScores).every((v) => typeof v === \"number\")) {\n const score = Object.keys(indexScores).length\n ? Object.values(indexScores).reduce((a, b) => Number(a) + Number(b), 0) / Object.keys(indexScores).length\n : 0;\n return { [[scoreKey, aggregator].join(\":\")]: score };\n }\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n const scoresAggregatedAcrossList = {};\n Object.values(indexScores).forEach((group) => {\n Object.entries(group).forEach(([key, value]) => {\n if (!scoresAggregatedAcrossList[key]) {\n scoresAggregatedAcrossList[key] = [];\n }\n scoresAggregatedAcrossList[key].push(value);\n });\n });\n const result = {};\n Object.entries(scoresAggregatedAcrossList).forEach(([key, values]) => {\n if (values.length) {\n result[[scoreKey, key].join(\":\")] =\n values.reduce((sum, v) => sum + (typeof v === \"object\" ? Number(v.score) : Number(v)), 0) / values.length;\n }\n });\n return result;\n }\n else if (listAggregator === \"all\") {\n if (Object.values(indexScores).every((v) => typeof v === \"number\")) {\n return {\n [[scoreKey, aggregator].join(\":\")]: Object.values(indexScores).some((v) => v !== 1)\n ? 0\n : 1,\n };\n }\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n const scoresAggregatedAcrossList = {};\n Object.values(indexScores).forEach((group) => {\n Object.entries(group).forEach(([key, value]) => {\n if (!scoresAggregatedAcrossList[key]) {\n scoresAggregatedAcrossList[key] = [];\n }\n scoresAggregatedAcrossList[key].push(value);\n });\n });\n const result = {};\n Object.entries(scoresAggregatedAcrossList).forEach(([key, values]) => {\n result[[scoreKey, key].join(\":\")] = values.some((v) => v !== 1) ? 0 : 1;\n });\n return result;\n }\n }\n if (aggregator === \"average\") {\n const score = Object.keys(scores).length\n ? Object.values(scores).reduce((sum, v) => sum + (typeof v === \"object\" ? Number(v.score) : Number(v)), 0) / Object.keys(scores).length\n : 0;\n return { [[scoreKey, aggregator].join(\":\")]: score };\n }\n else if (aggregator === \"all\") {\n const score = Object.values(scores).some((v) => (typeof v === \"object\" ? Number(v.score) : Number(v)) !== 1)\n ? 0\n : 1;\n return { [[scoreKey, aggregator].join(\":\")]: score };\n }\n else {\n const results = {};\n Object.entries(scores).forEach(([key, value]) => {\n if (typeof value === \"object\") {\n results[[scoreKey, key].join(\":\")] = {\n score: Number(value.score),\n reasoning: value.reasoning,\n };\n }\n else {\n results[[scoreKey, key].join(\":\")] = Number(value);\n }\n });\n return results;\n }\n}\n/**\n * Create an evaluator to evaluate the accuracy of structured outputs.\n *\n * @param options The configuration options\n * @param options.aggregator - The aggregation method to use for combining the keys of each structured object.\n * If undefined, will return a single EvaluatorResult for each key that appears in either\n * the outputs or the reference_outputs or both. If \"average\", will return a single EvaluatorResult that\n * is the average of the feedback for each key. If \"all\", will return a single EvaluatorResult that\n * is a combined AND statement of the feedback for each key. If \"all\"/\"average\" the feedback key\n * returned will be called \"json_match\"\n * @param options.listAggregator - The aggregation method to use when evaluating a list of outputs.\n * Defaults to \"all\". If \"all\", the score for a single feedback key will be a combined AND statement\n * of the scores for that key across all elements of the list. If \"average\", the score will be the\n * average of the scores for that key across all elements of the list\n * @param options.rubric - The rubric to use for the judge. Each entry is a key/value pair where the key\n * is the structured output key and the value is the criteria for the LLM to evaluate that key\n * against the reference output\n * @param options.excludeKeys - The keys to exclude from the evaluation. Use this if there are keys in your\n * structured output you don't care about evaluating. Every key not in excludeKeys or rubric will be\n * evaluated for exact match with the reference output\n * @param options.judge - The judge to use for the evaluation\n * @param options.model - The model to use for the evaluation\n * @param options.useReasoning - Whether to use reasoning for the keys in rubric. Defaults to true\n * @param options.listMatchMode - The mode to use for matching list elements. Defaults to \"same_elements\".\n * If \"same_elements\", matches every element of outputs with reference_outputs and vice versa.\n * If \"subset\", matches elements of outputs with reference_outputs.\n * If \"superset\", matches elements of reference_outputs with outputs.\n * If \"ordered\", matches elements by their index position\n * @returns A function that takes outputs and reference_outputs and returns an EvaluatorResult or list of EvaluatorResults\n */\nexport const createJsonMatchEvaluator = ({ aggregator, listAggregator = \"all\", rubric = {}, excludeKeys = [], judge, model, useReasoning = true, listMatchMode = \"same_elements\", }) => {\n if ((judge || model) && Object.keys(rubric).length === 0) {\n throw new Error(\"rubric must be provided if judge or model is provided\");\n }\n else if (!judge && !model && Object.keys(rubric).length !== 0) {\n throw new Error(\"judge or model must be provided if rubric is provided\");\n }\n const wrappedEvaluator = async ({ outputs, referenceOutputs, }) => {\n const { processedOutputs, processedReferenceOutputs, jsonSchema, scores, formattedRubric, useListReducer, } = _prepareParameters({\n outputs,\n referenceOutputs,\n rubric,\n excludeKeys,\n useReasoning,\n listMatchMode,\n });\n // Identify which keys need LLM evaluation\n const llmKeys = Object.keys(formattedRubric ?? {}).length > 0\n ? new Set(Object.keys(jsonSchema.properties))\n : new Set();\n async function aggregateScorer() {\n // Get LLM scores if needed\n if (llmKeys.size > 0) {\n const outputKeys = Object.keys(jsonSchema.properties)\n .map((key) => `${key}: ${processedOutputs[key]}`)\n .join(\"\\n\");\n const expectedOutputKeys = Object.keys(jsonSchema.properties)\n .map((key) => `${key}: ${processedReferenceOutputs[key]}`)\n .join(\"\\n\");\n const scorerFn = _createLLMAsJudgeScorer({\n prompt: USER_PROMPT,\n system: SYSTEM_PROMPT,\n schema: jsonSchema,\n judge,\n model,\n });\n const llmScores = await scorerFn({\n outputs: outputKeys,\n referenceOutputs: expectedOutputKeys,\n rubric: formattedRubric,\n });\n Object.assign(scores, llmScores);\n }\n // Aggregate\n return _aggregateResults({\n scoreKey: \"json_match\",\n scores,\n useListReducer,\n aggregator,\n listAggregator,\n });\n }\n // Special handling when aggregator is specified - aggregate all keys at once\n if (aggregator !== undefined) {\n return _runEvaluator(\"json_match_evaluator\", aggregateScorer, \"json_match\", {\n inputs: outputs,\n referenceOutputs,\n });\n }\n // Group raw keys by their base key for processing\n const rawKeysByBase = {};\n const allRawKeys = new Set([...Object.keys(scores), ...llmKeys]);\n for (const rawKey of allRawKeys) {\n const baseKey = useListReducer && rawKey.includes(\"_\") && rawKey.lastIndexOf(\"_\") > 0\n ? rawKey.substring(0, rawKey.lastIndexOf(\"_\"))\n : rawKey;\n if (!rawKeysByBase[baseKey]) {\n rawKeysByBase[baseKey] = [];\n }\n rawKeysByBase[baseKey].push(rawKey);\n }\n // Process each base key\n const allResults = [];\n for (const baseKey of Object.keys(rawKeysByBase).sort()) {\n const rawKeys = rawKeysByBase[baseKey];\n const needsLlm = rawKeys.some((rk) => llmKeys.has(rk));\n if (needsLlm) {\n // Create scorer that calls LLM for these keys\n // eslint-disable-next-line no-inner-declarations\n async function keyScorer() {\n // Create schema for just these keys\n const keySchema = {\n type: \"object\",\n title: \"structured_match_score\",\n description: \"Scores measuring the accuracy of structured outputs\",\n properties: Object.fromEntries(rawKeys\n .filter((rk) => jsonSchema.properties[rk])\n .map((rk) => [rk, jsonSchema.properties[rk]])),\n required: rawKeys.filter((rk) => jsonSchema.properties[rk]),\n additionalProperties: false,\n };\n // Create LLM scorer\n const scorerFn = _createLLMAsJudgeScorer({\n prompt: USER_PROMPT,\n system: SYSTEM_PROMPT,\n schema: keySchema,\n judge,\n model,\n });\n // Format outputs\n const outputStrs = rawKeys\n .filter((rk) => processedOutputs[rk] !== undefined)\n .map((rk) => `${rk}: ${processedOutputs[rk]}`);\n const expectedStrs = rawKeys\n .filter((rk) => processedReferenceOutputs[rk] !== undefined)\n .map((rk) => `${rk}: ${processedReferenceOutputs[rk]}`);\n const keyCriteria = rubric[baseKey] || \"\";\n const formattedKeyRubric = keyCriteria\n ? `Key: ${baseKey}, Criteria: ${keyCriteria}\\n`\n : \"\";\n // Call LLM\n const llmScores = await scorerFn({\n outputs: outputStrs.join(\"\\n\"),\n referenceOutputs: expectedStrs.join(\"\\n\"),\n rubric: formattedKeyRubric,\n });\n // Combine with non-LLM scores\n const allKeyScores = {\n ...Object.fromEntries(rawKeys\n .filter((rk) => !llmKeys.has(rk))\n .map((rk) => [rk, scores[rk] ?? 0])),\n ...(typeof llmScores === \"object\" && llmScores != null\n ? llmScores\n : {}),\n };\n // Aggregate across list items if needed\n if (useListReducer && rawKeys.length > 1) {\n // Fill in missing indices with 0 scores\n const allIndices = new Set();\n for (const key of Object.keys(scores)) {\n if (key.includes(\"_\")) {\n const idx = key.substring(key.lastIndexOf(\"_\") + 1);\n try {\n allIndices.add(parseInt(idx, 10));\n }\n catch {\n // ignore non-numeric indices\n }\n }\n }\n // Add 0 scores for missing indices\n for (const idx of allIndices) {\n const expectedKey = `${baseKey}_${idx}`;\n if (!(expectedKey in allKeyScores)) {\n allKeyScores[expectedKey] = 0;\n }\n }\n const aggregated = _aggregateResults({\n scoreKey: \"json_match\",\n scores: allKeyScores,\n useListReducer: true,\n aggregator: undefined,\n listAggregator,\n });\n // Convert boolean scores to numbers and preserve reasoning\n const result = {};\n for (const [key, value] of Object.entries(aggregated)) {\n if (typeof value === \"object\" &&\n value != null &&\n \"score\" in value) {\n result[key] = {\n score: Number(value.score),\n reasoning: value.reasoning,\n };\n }\n else {\n result[key] =\n typeof value === \"boolean\" ? Number(value) : value;\n }\n }\n return result;\n }\n else {\n // Single key - return with json_match prefix\n const value = Object.values(allKeyScores)[0];\n const sourceRunId = getCurrentRunTree(true)?.id;\n if (typeof value === \"object\" &&\n value != null &&\n \"score\" in value) {\n return {\n [`json_match:${baseKey}`]: {\n score: Number(value.score),\n reasoning: value.reasoning,\n sourceRunId,\n },\n };\n }\n else {\n return {\n [`json_match:${baseKey}`]: {\n score: typeof value === \"boolean\" ? Number(value) : value,\n sourceRunId,\n },\n };\n }\n }\n }\n const results = await _runEvaluator(\"json_match_evaluator\", keyScorer, `json_match:${baseKey}`, {\n inputs: processedOutputs[baseKey] ?? outputs,\n referenceOutputs: processedReferenceOutputs[baseKey] ?? referenceOutputs,\n });\n if (Array.isArray(results)) {\n allResults.push(...results);\n }\n else {\n allResults.push(results);\n }\n }\n else {\n // Non-LLM keys - just aggregate existing scores\n // eslint-disable-next-line no-inner-declarations\n async function keyScorer() {\n const keyScores = Object.fromEntries(rawKeys.map((rk) => [rk, scores[rk]]));\n // Aggregate across list items if needed\n if (useListReducer && rawKeys.length > 1) {\n // Fill in missing indices with 0 scores\n const allIndices = new Set();\n for (const key of Object.keys(scores)) {\n if (key.includes(\"_\")) {\n const idx = key.substring(key.lastIndexOf(\"_\") + 1);\n try {\n allIndices.add(parseInt(idx, 10));\n }\n catch {\n // ignore non-numeric indices\n }\n }\n }\n // Add 0 scores for missing indices\n for (const idx of allIndices) {\n const expectedKey = `${baseKey}_${idx}`;\n if (!(expectedKey in keyScores)) {\n keyScores[expectedKey] = 0;\n }\n }\n const aggregated = _aggregateResults({\n scoreKey: \"json_match\",\n scores: keyScores,\n useListReducer: true,\n aggregator: undefined,\n listAggregator,\n });\n // Convert boolean scores to numbers\n const result = {};\n for (const [key, value] of Object.entries(aggregated)) {\n result[key] = typeof value === \"boolean\" ? Number(value) : value;\n }\n return result;\n }\n else {\n // Single key - return with json_match prefix\n const value = keyScores[rawKeys[0]];\n return {\n [`json_match:${baseKey}`]: {\n score: typeof value === \"boolean\" ? Number(value) : value,\n sourceRunId: getCurrentRunTree(true)?.id,\n },\n };\n }\n }\n const results = await _runEvaluator(\"json_match_evaluator\", keyScorer, `json_match:${baseKey}`, {\n inputs: processedOutputs[baseKey] ?? outputs,\n referenceOutputs: processedReferenceOutputs[baseKey] ?? referenceOutputs,\n });\n if (Array.isArray(results)) {\n allResults.push(...results);\n }\n else {\n allResults.push(results);\n }\n }\n }\n return allResults;\n };\n return wrappedEvaluator;\n};\n","import { Runnable } from \"@langchain/core/runnables\";\nimport { ChatPromptTemplate } from \"@langchain/core/prompts\";\nimport { isBaseMessage } from \"@langchain/core/messages\";\nimport { toJsonSchema } from \"@langchain/core/utils/json_schema\";\nimport { initChatModel } from \"langchain/chat_models/universal\";\nimport { traceable } from \"langsmith/traceable\";\nimport { _normalizeToOpenAIMessagesList, _convertToOpenAIMessage, _runEvaluatorUntyped, } from \"./utils.js\";\nfunction _isRunnableInterface(prompt) {\n return Runnable.isRunnable(prompt);\n}\nfunction _isStructuredPrompt(prompt) {\n return (_isRunnableInterface(prompt) && \"schema\" in prompt && prompt.schema != null);\n}\nexport function isZodSchema(input) {\n // Check for a characteristic method of Zod schemas\n return typeof input?.parse === \"function\";\n}\nfunction _isBaseChatModel(x) {\n const model = x;\n return (x != null &&\n typeof x === \"object\" &&\n typeof model._modelType === \"function\" &&\n model._modelType() === \"base_chat_model\");\n}\nfunction appendFewShotExamples({ messages, fewShotExamples, }) {\n // Find the last user message to append examples to\n const lastUserMessageIdx = messages\n .slice()\n .reverse()\n .findIndex((msg) => msg.role === \"user\");\n if (lastUserMessageIdx === -1) {\n throw new Error(\"Appending few-shot examples requires a user message in the provided prompt\");\n }\n const actualIdx = messages.length - 1 - lastUserMessageIdx;\n // eslint-disable-next-line no-param-reassign\n messages[actualIdx].content +=\n \"\\n\\n\" +\n fewShotExamples\n .map((example) => {\n let exampleStr = `<example>\\n<input>${JSON.stringify(example.inputs)}</input>\\n<output>${JSON.stringify(example.outputs)}</output>`;\n if (example.reasoning) {\n exampleStr += `\\n<reasoning>${example.reasoning}</reasoning>`;\n }\n if (example.score !== undefined) {\n exampleStr += `\\n<score>${example.score}</score>`;\n }\n exampleStr += \"\\n</example>\";\n return exampleStr;\n })\n .join(\"\\n\");\n return messages;\n}\nfunction constructDefaultOutputJsonSchema({ continuous, choices, useReasoning, }) {\n const jsonSchema = {\n type: \"object\",\n additionalProperties: false,\n };\n let description;\n let scoreSchema;\n if (choices) {\n description =\n \"A number that represents the degree to which the criteria in the prompt are met.\";\n scoreSchema = {\n type: \"number\",\n description,\n enum: choices,\n };\n }\n else if (continuous) {\n description =\n \"A number that represents the degree to which the criteria in the prompt are met, from 0.0 to 1.0. 1.0 means the criteria are met perfectly. 0.0 means none of the criteria are met, 0.5 means exactly half of the criteria are met.\";\n scoreSchema = {\n type: \"number\",\n description,\n };\n }\n else {\n description =\n \"A score that is true if criteria in the prompt are met, and false otherwise.\";\n scoreSchema = {\n type: \"boolean\",\n description,\n };\n }\n if (useReasoning) {\n jsonSchema.properties = {\n reasoning: {\n type: \"string\",\n description: \"A human-readable explanation of the score. You MUST end the reasoning with a sentence that says: Thus, the score should be: SCORE_YOU_ASSIGN.\",\n },\n score: scoreSchema,\n };\n jsonSchema.required = [\"reasoning\", \"score\"];\n }\n else {\n jsonSchema.properties = {\n score: scoreSchema,\n };\n jsonSchema.required = [\"score\"];\n }\n return [jsonSchema, description];\n}\nfunction _stringifyPromptParam(param) {\n if (typeof param === \"string\") {\n return param;\n }\n else if (isBaseMessage(param)) {\n return JSON.stringify(_convertToOpenAIMessage(param));\n }\n else if (typeof param === \"object\" && param !== null) {\n if (Array.isArray(param)) {\n return JSON.stringify(param.map((message) => isBaseMessage(message) ? _convertToOpenAIMessage(message) : message));\n }\n const objParam = param;\n if (\"messages\" in objParam && Array.isArray(objParam.messages)) {\n objParam.messages = objParam.messages.map((message) => isBaseMessage(message) ? _convertToOpenAIMessage(message) : message);\n return JSON.stringify(objParam);\n }\n return JSON.stringify(param);\n }\n return JSON.stringify(param);\n}\nexport const _createLLMAsJudgeScorer = (params) => {\n const { prompt, system, model, continuous, choices, fewShotExamples } = params;\n let schema;\n if (isZodSchema(params.schema)) {\n schema = toJsonSchema(params.schema);\n }\n else {\n schema = params.schema;\n }\n let judge = params.judge;\n const useReasoning = params.useReasoning ?? true;\n const getScore = async (params) => {\n const { inputs, outputs, referenceOutputs, ...rest } = params;\n if (system && typeof prompt !== \"string\") {\n throw new Error(\"`system` is only supported when `prompt` is a string template\");\n }\n let stringifiedInputs = inputs;\n let stringifiedOutputs = outputs;\n let stringifiedReferenceOutputs = referenceOutputs;\n if (inputs) {\n stringifiedInputs = _stringifyPromptParam(inputs);\n }\n if (outputs) {\n stringifiedOutputs = _stringifyPromptParam(outputs);\n }\n if (referenceOutputs) {\n stringifiedReferenceOutputs = _stringifyPromptParam(referenceOutputs);\n }\n const stringifiedRest = Object.fromEntries(Object.entries(rest).map(([key, value]) => [\n key,\n _stringifyPromptParam(value),\n ]));\n let messages = [];\n const promptParams = {\n inputs: stringifiedInputs,\n outputs: stringifiedOutputs,\n reference_outputs: stringifiedReferenceOutputs,\n ...stringifiedRest,\n };\n // Filter out undefined values from promptParams\n const filteredPromptParams = Object.fromEntries(Object.entries(promptParams).filter(([_, value]) => value !== undefined));\n if (_isRunnableInterface(prompt)) {\n const formattedPrompt = await prompt.invoke(filteredPromptParams);\n messages = formattedPrompt.messages;\n if (_isStructuredPrompt(prompt)) {\n schema = prompt.schema;\n }\n }\n else if (typeof prompt === \"string\") {\n const template = ChatPromptTemplate.fromTemplate(prompt);\n const formattedPrompt = await template.invoke(filteredPromptParams);\n messages = formattedPrompt.messages;\n }\n else {\n messages = await prompt({\n inputs,\n outputs,\n reference_outputs: referenceOutputs,\n ...rest,\n });\n }\n if (system) {\n messages = [{ role: \"system\", content: system }, ...messages];\n }\n let normalizedMessages = _normalizeToOpenAIMessagesList(messages);\n if (fewShotExamples) {\n normalizedMessages = appendFewShotExamples({\n messages: normalizedMessages,\n fewShotExamples,\n });\n }\n const [defaultJsonSchema, description] = constructDefaultOutputJsonSchema({\n continuous,\n choices,\n useReasoning,\n });\n if (!judge) {\n if (!model) {\n throw new Error(\"`model` string is required (e.g. 'openai:o3-mini') when `judge` is not provided\");\n }\n judge = await initChatModel(model);\n }\n let response;\n if (_isBaseChatModel(judge)) {\n const judgeWithStructuredOutput = judge.withStructuredOutput(schema ?? {\n title: \"score\",\n description,\n ...defaultJsonSchema,\n });\n response = await judgeWithStructuredOutput.invoke(normalizedMessages);\n if (schema === undefined) {\n if (useReasoning) {\n return [response.score, response.reasoning];\n }\n return response.score;\n }\n else {\n return response;\n }\n }\n else {\n if (!model) {\n throw new Error(\"`model` string is required (e.g. 'openai:o3-mini') when `judge` is an OpenAI client\");\n }\n let openaiJsonSchema = schema ?? defaultJsonSchema;\n if (openaiJsonSchema.name === undefined) {\n openaiJsonSchema = {\n name: \"score\",\n strict: true,\n schema: openaiJsonSchema,\n };\n }\n if (openaiJsonSchema.schema == null ||\n typeof openaiJsonSchema.schema !== \"object\") {\n throw new Error(\"`ouputSchema` must be JSON schema or OpenAI structured output format when using an OpenAI client directly\");\n }\n if (!(\"additionalProperties\" in openaiJsonSchema.schema)) {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n openaiJsonSchema.schema.additionalProperties = false;\n }\n const params = {\n messages: normalizedMessages,\n model: model.startsWith(\"openai:\")\n ? model.slice(\"openai:\".length)\n : model,\n response_format: {\n type: \"json_schema\",\n json_schema: openaiJsonSchema,\n },\n };\n const invokeLlm = traceable(judge.chat.completions.create.bind(judge.chat.completions), {\n metadata: {\n ls_provider: \"openai\",\n ls_model_name: model,\n ls_model_type: \"chat\",\n },\n run_type: \"llm\",\n name: \"OpenAI Chat Completion\",\n });\n const response = await invokeLlm(params);\n const parsed = JSON.parse(response.choices[0].message.content);\n if (schema === undefined) {\n if (useReasoning) {\n return [parsed.score, parsed.reasoning];\n }\n return parsed.score;\n }\n return parsed;\n }\n };\n return getScore;\n};\nexport function createLLMAsJudge({ prompt, feedbackKey = \"score\", model, system, judge, continuous = false, choices, useReasoning = true, fewShotExamples, outputSchema, }) {\n if (outputSchema !== undefined && _isStructuredPrompt(prompt)) {\n throw new Error(\"You may not provide both an `outputSchema` parameter and a LangChain prompt with output schema.\");\n }\n const scorer = _createLLMAsJudgeScorer({\n prompt,\n judge,\n model,\n system,\n continuous,\n choices,\n useReasoning,\n fewShotExamples,\n schema: outputSchema,\n });\n const _wrappedEvaluator = async (inputs) => {\n const runName = feedbackKey !== \"score\" ? \"llm_as_judge\" : `llm_as_${feedbackKey}_judge`;\n return _runEvaluatorUntyped(runName, scorer, feedbackKey, inputs, undefined, outputSchema !== undefined || _isStructuredPrompt(prompt));\n };\n return _wrappedEvaluator;\n}\n","import { initChatModel } from \"langchain/chat_models/universal\";\nimport { ChatPromptTemplate } from \"@langchain/core/prompts\";\nimport { _normalizeOutputsAsString, _runEvaluator } from \"../utils.js\";\nexport const LLM_EXTRACTION_SYSTEM_PROMPT = `\nYou are an expert software auditor.\n\n<Instructions>\n Your job is to extract code from a given text.\n\n - If there is code - extract it into a single script by calling the provided \"ExtractCode\" tool.\n - If there is no code to extract - call \"NoCode\".\n\n If you extract code, your response will be passed DIRECTLY into a code execution sandbox for further testing,\n so make sure to extract all code **without modifications**, even if it contains errors,\n since any modifications will ruin the integrity of the testing process.\n Omit installation instructions and shell commands from any code you extract.\n</Instructions>\n`;\nexport const LLM_EXTRACTION_USER_PROMPT = `\nExtract code from the following:\n\n<text>\n{outputs}\n</text>\n`;\nconst extractCodeToolSchema = {\n type: \"function\",\n function: {\n name: \"ExtractCode\",\n description: \"Tool to call if there is code to extract. Omit installation instructions and shell commands.\",\n parameters: {\n type: \"object\",\n properties: {\n code: {\n type: \"string\",\n description: \"The code to extract.\",\n },\n },\n required: [\"code\"],\n },\n },\n};\nconst noCodeToolSchema = {\n type: \"function\",\n function: {\n name: \"NoCode\",\n description: \"Tool to call to indicate no code was found.\",\n parameters: {\n type: \"object\",\n properties: {\n no_code: {\n type: \"boolean\",\n description: \"Whether no code was found.\",\n },\n },\n required: [\"no_code\"],\n },\n },\n};\n/**\n * Extract code from markdown code blocks in the provided text.\n *\n * Supports both triple backtick code blocks with or without language specifiers.\n *\n * @param text - The text containing markdown code blocks\n * @returns A string containing only the code extracted from code blocks, with blocks\n * separated by newlines\n */\nexport function _extractCodeFromMarkdownCodeBlocks(text) {\n // Pattern to match code blocks with or without language specifier\n const pattern = /^(?<!`)```(\\w*)\\n([\\s\\S]*?)^(?<!`)```$/gm;\n // Find all code blocks\n const excludedLangs = new Set([\n \"bash\",\n \"sh\",\n \"shell\",\n \"zsh\",\n \"fish\",\n \"console\",\n \"terminal\",\n \"json\",\n ]);\n const codeBlocks = [];\n let match = pattern.exec(text);\n while (match !== null) {\n const lang = match[1].trim();\n if (!excludedLangs.has(lang)) {\n codeBlocks.push(match[2]);\n }\n match = pattern.exec(text);\n }\n if (codeBlocks.length === 0) {\n return null; // Return null if no valid code blocks found\n }\n // Join all code blocks with newlines\n return codeBlocks.join(\"\\n\");\n}\nexport function _createBaseCodeEvaluator(config) {\n const { scorer, codeExtractionStrategy, codeExtractor, model, runName, feedbackKey, } = config;\n let client = config.client;\n if (codeExtractor && codeExtractionStrategy !== \"none\") {\n throw new Error(\"`codeExtractor` and `codeExtractionStrategy` cannot both be provided\");\n }\n if (codeExtractionStrategy === \"llm\") {\n if (!model && !client) {\n throw new Error(`You must provide either a \"model\" string or a \"client\"`);\n }\n }\n const _wrappedEvaluator = async (params) => {\n const wrappedScorer = async (params) => {\n let normalizedOutputs = _normalizeOutputsAsString(params.outputs);\n if (codeExtractor) {\n normalizedOutputs = codeExtractor(params.outputs);\n }\n else if (codeExtractionStrategy === \"llm\") {\n if (!client) {\n client = await initChatModel(model);\n }\n const llmUserPrompt = ChatPromptTemplate.fromTemplate(LLM_EXTRACTION_USER_PROMPT);\n if (client.bindTools === undefined) {\n throw new Error(\"You must pass a model that supports tool calling.\");\n }\n const modelWithTools = client.bindTools([\n extractCodeToolSchema,\n noCodeToolSchema,\n ]);\n const res = await modelWithTools.invoke([\n {\n role: \"system\",\n content: LLM_EXTRACTION_SYSTEM_PROMPT,\n },\n {\n role: \"user\",\n content: await llmUserPrompt.format({ outputs: normalizedOutputs }),\n },\n ]);\n if (res.tool_calls?.[0]?.name === \"ExtractCode\") {\n normalizedOutputs = res.tool_calls[0].args.code;\n }\n else {\n return [\n false,\n \"Code extraction failed\",\n { code_extraction_failed: true },\n ];\n }\n }\n else if (codeExtractionStrategy === \"markdown_code_blocks\") {\n const extractedCode = _extractCodeFromMarkdownCodeBlocks(normalizedOutputs);\n if (extractedCode == null) {\n return [\n false,\n \"Code extraction failed\",\n { code_extraction_failed: true },\n ];\n }\n normalizedOutputs = extractedCode;\n }\n return scorer({\n inputs: params.inputs ?? \"\",\n outputs: normalizedOutputs,\n });\n };\n return _runEvaluator(runName, wrappedScorer, feedbackKey, {\n ...params,\n inputs: params.inputs ?? \"\",\n outputs: params.outputs,\n });\n };\n return _wrappedEvaluator;\n}\n","import { v4 as uuidv4 } from \"uuid\";\nimport { traceable } from \"langsmith/traceable\";\nimport { _convertToOpenAIMessage, _normalizeToOpenAIMessagesList, } from \"../utils.js\";\nfunction _wrap(app, runName, threadId) {\n const wrapper = (params) => {\n return app({ ...params, threadId });\n };\n return traceable(wrapper, { name: runName });\n}\nfunction _coerceAndAssignIdToMessage(message) {\n const convertedMessage = _convertToOpenAIMessage(message);\n if (convertedMessage.id === undefined) {\n return {\n ...convertedMessage,\n id: uuidv4(),\n };\n }\n return convertedMessage;\n}\nfunction _trajectoryReducer(currentTrajectory, newUpdate, updateSource, turnCounter) {\n function _combineMessages(left, right) {\n // Coerce to list\n if (!Array.isArray(left)) {\n // eslint-disable-next-line no-param-reassign\n left = [left];\n }\n if (!Array.isArray(right)) {\n // eslint-disable-next-line no-param-reassign\n right = [right];\n }\n // Coerce to message\n const coercedLeft = left.map((msg) => _coerceAndAssignIdToMessage(msg));\n const coercedRight = right.map((msg) => _coerceAndAssignIdToMessage(msg));\n // Merge\n const merged = [...coercedLeft];\n const mergedById = {};\n merged.forEach((m, i) => {\n if (m.id) {\n mergedById[m.id] = i;\n }\n });\n for (const m of coercedRight) {\n if (m.id && mergedById[m.id] === undefined) {\n mergedById[m.id] = merged.length;\n merged.push(m);\n }\n }\n return merged;\n }\n if (currentTrajectory == null) {\n // eslint-disable-next-line no-param-reassign\n currentTrajectory = { trajectory: [] };\n }\n let coercedNewUpdate;\n try {\n coercedNewUpdate = _normalizeToOpenAIMessagesList(newUpdate);\n }\n catch {\n throw new Error(`Received unexpected trajectory update from '${updateSource}': ${JSON.stringify(newUpdate)}. Expected a message, list of messages, or dictionary with a 'messages' key containing messages.`);\n }\n return {\n trajectory: _combineMessages(currentTrajectory?.trajectory, coercedNewUpdate),\n turnCounter,\n };\n}\nfunction _createStaticSimulatedUser(staticResponses) {\n return function _returnNextMessage(params) {\n const turns = params.turnCounter;\n if (turns === undefined || typeof turns !== \"number\") {\n throw new Error(\"Internal error: Turn counter must be an integer in the trajectory.\");\n }\n // First conversation turn is satisfied by the initial input\n if (turns >= staticResponses.length) {\n throw new Error(\"Number of conversation turns is greater than the number of static user responses. Please reduce the number of turns or provide more responses.\");\n }\n const nextResponse = staticResponses[turns];\n if (typeof nextResponse === \"string\") {\n return { role: \"user\", content: nextResponse, id: uuidv4() };\n }\n return _coerceAndAssignIdToMessage(nextResponse);\n };\n}\n/**\n * Run a simulation for multi-turn conversations between an application and a simulated user.\n *\n * This function runs a simulation between an app and\n * either a dynamic user simulator or a list of static user responses. The simulation supports\n * evaluation of conversation trajectories and customizable stopping conditions.\n *\n * Conversation trajectories are represented as lists of message objects with \"role\" and \"content\" keys.\n * The \"app\" param you provide will receive the next message in sequence as an input, and should\n * return a message. Internally, the simulation will dedupe these messages by id and merge them into\n * a complete trajectory.\n *\n * Once \"maxTurns\" is reached or a provided stopping condition is met, the final trajectory\n * will be passed to provided trajectory evaluators, which will receive the final trajectory\n * as an \"outputs\" param.\n *\n * When calling the created simulator, you may also provide a \"referenceOutputs\" param,\n * which will be passed directly through to the provided evaluators.\n *\n * @param {Object} params - Configuration parameters for the simulator\n * @param {(params: {inputs: ChatCompletionMessage, threadId: string}) => ChatCompletionMessage | Promise<ChatCompletionMessage>} params.app - Your application. Can be either a LangChain Runnable or a\n * callable that takes the current conversation trajectory and returns\n * a message.\n * @param {(params: {trajectory: ChatCompletionMessage[], turnCounter: number, threadId: string}) => ChatCompletionMessage | Promise<ChatCompletionMessage> | (string | Messages)[]} params.user - The simulated user. Can be:\n * - A function that takes the current conversation trajectory and returns a message.\n * - A list of strings or Messages representing static user responses\n * @param {number} [params.maxTurns] - Maximum number of conversation turns to simulate\n * @param {SimpleEvaluator[]} [params.trajectoryEvaluators] - Optional list of evaluator functions that assess the conversation\n * trajectory. Each evaluator will receive the final trajectory of the conversation as\n * a param named \"outputs\" and a param named \"referenceOutputs\" if provided.\n * @param {(params: {trajectory: ChatCompletionMessage[], turnCounter: number, threadId: string}) => boolean | Promise<boolean>} [params.stoppingCondition] - Optional callable that determines if the simulation should end early.\n * Takes the current trajectory as input and returns a boolean.\n * @param {unknown} [params.referenceOutputs] - Optional reference outputs for evaluation\n * @param {string} [params.threadId] - Optional thread ID. If not provided, a random one will be generated.\n *\n * @returns Returns a Promise that resolves to a MultiturnSimulationResult containing:\n * - evaluatorResults: List of results from trajectory evaluators\n * - trajectory: The complete conversation trajectory\n *\n * @example\n * ```typescript\n * import { runMultiturnSimulation } from \"openevals\";\n *\n * // Create a simulator with static user responses\n * const result = runMultiturnSimulation({\n * app: myChatApp,\n * user: [\"Hello!\", \"How are you?\", \"Goodbye\"],\n * maxTurns: 3,\n * trajectoryEvaluators: [myEvaluator]\n * });\n * ```\n */\nexport const runMultiturnSimulation = async (params) => {\n // Wrap at runtime to avoid side effects\n const runSimulator = traceable(async (params) => {\n if (params.threadId === undefined) {\n // eslint-disable-next-line no-param-reassign\n params.threadId = uuidv4();\n }\n const { app, user, maxTurns, trajectoryEvaluators, stoppingCondition, referenceOutputs, threadId, } = params;\n if (maxTurns === undefined && stoppingCondition === undefined) {\n throw new Error(\"At least one of maxTurns or stoppingCondition must be provided.\");\n }\n let turnCounter = 0;\n let currentReducedTrajectory = {\n trajectory: [],\n turnCounter: 0,\n };\n const wrappedApp = _wrap(app, \"app\", threadId);\n let wrappedSimulatedUser;\n if (Array.isArray(user)) {\n const staticResponses = user;\n const simulatedUser = _createStaticSimulatedUser(staticResponses);\n wrappedSimulatedUser = _wrap(simulatedUser, \"simulated_user\", threadId);\n }\n else {\n wrappedSimulatedUser = _wrap(user, \"simulated_user\", threadId);\n }\n // eslint-disable-next-line no-constant-condition\n while (true) {\n if (maxTurns !== undefined && turnCounter >= maxTurns) {\n break;\n }\n const rawInputs = await wrappedSimulatedUser({\n trajectory: currentReducedTrajectory.trajectory,\n turnCounter,\n threadId,\n });\n const currentInputs = _coerceAndAssignIdToMessage(rawInputs);\n currentReducedTrajectory = _trajectoryReducer(currentReducedTrajectory, currentInputs, \"user\", turnCounter);\n const rawOutputs = await wrappedApp({\n inputs: currentInputs,\n threadId,\n });\n const currentOutputs = _coerceAndAssignIdToMessage(rawOutputs);\n turnCounter += 1;\n currentReducedTrajectory = _trajectoryReducer(currentReducedTrajectory, currentOutputs, \"app\", turnCounter);\n if (stoppingCondition !== undefined &&\n (await stoppingCondition({\n trajectory: currentReducedTrajectory.trajectory,\n turnCounter,\n threadId,\n }))) {\n break;\n }\n }\n const results = [];\n delete currentReducedTrajectory.turnCounter;\n for (const trajectoryEvaluator of trajectoryEvaluators || []) {\n try {\n const trajectoryEvalResults = await trajectoryEvaluator({\n outputs: currentReducedTrajectory.trajectory,\n referenceOutputs,\n });\n if (Array.isArray(trajectoryEvalResults)) {\n results.push(...trajectoryEvalResults);\n }\n else {\n results.push(trajectoryEvalResults);\n }\n }\n catch (e) {\n console.error(`Error in trajectory evaluator: ${e}`);\n }\n }\n return {\n trajectory: currentReducedTrajectory.trajectory,\n evaluatorResults: results,\n };\n }, { name: \"multiturn_simulation\" });\n return runSimulator(params);\n};\n","import { initChatModel } from \"langchain/chat_models/universal\";\nimport { v4 as uuidv4 } from \"uuid\";\nimport { _convertToOpenAIMessage } from \"../utils.js\";\n// Exported for testing only\nexport function _isInternalMessage(message) {\n return Boolean(message.role !== \"user\" &&\n (message.role !== \"assistant\" || (message.tool_calls ?? []).length > 0));\n}\n/**\n * Creates a simulated user powered by a language model for multi-turn conversations.\n *\n * This function generates a simulator that can be used with the runMultiturnSimulation method to create\n * dynamic, LLM-powered user responses in a conversation. The simulator automatically handles message\n * role conversion to maintain proper conversation flow, where user messages become assistant messages\n * and vice versa when passed to the underlying LLM.\n *\n * @param {Object} params - The parameters for creating the simulated user\n * @param {string} params.system - System prompt that guides the LLM's behavior as a simulated user\n * @param {string} [params.model] - Optional name of the language model to use. Must be provided if client is not.\n * @param {BaseChatModel} [params.client] - Optional LangChain chat model instance. Must be provided if model is not.\n * @param {(string | ChatCompletionMessage)[]} [params.fixedResponses] - Optional list of fixed responses to use for the simulated user.\n * If provided, these responses will be used in sequence based on the turn counter before falling back to LLM generation.\n *\n * @returns A callable simulator function that takes a trajectory and turn counter, and returns a Promise resolving to a ChatCompletionMessage\n *\n * @throws {Error} If neither client nor model is provided\n * @throws {Error} If both client and model are provided\n *\n * @example\n * ```typescript\n * import { runMultiturnSimulation, createLLMSimulatedUser } from \"openevals\";\n *\n * // Create a simulated user with GPT-4.1-mini\n * const simulatedUser = createLLMSimulatedUser({\n * system: \"You are a helpful customer service representative\",\n * model: \"openai:gpt-4.1-mini\"\n * });\n *\n * // Use with runMultiturnSimulation\n * const simulator = runMultiturnSimulation({\n * app: myChatApp,\n * user: simulatedUser,\n * maxTurns: 5\n * });\n * ```\n *\n * Notes:\n * - The simulator automatically converts message roles to maintain proper conversation flow:\n * * User messages become assistant messages when sent to the LLM\n * * Assistant messages (without tool calls) become user messages when sent to the LLM\n * * Messages with tool calls are skipped to maintain conversation coherence\n * - The system prompt is prepended to each conversation to maintain consistent behavior\n * - If no messages exist in the trajectory, an initial query is generated based on the system prompt\n * - Fixed responses are used in sequence based on the turn counter before falling back to LLM generation\n */\nexport function createLLMSimulatedUser({ system, model, client, fixedResponses, }) {\n if (!client && !model) {\n throw new Error(\"Either client or model must be provided\");\n }\n else if (client && model) {\n throw new Error(\"Only one of client or model should be provided\");\n }\n return async function _simulator(params) {\n if (model) {\n // eslint-disable-next-line no-param-reassign\n client = await initChatModel(model);\n }\n if (fixedResponses && params.turnCounter < fixedResponses.length) {\n const res = fixedResponses[params.turnCounter];\n if (typeof res === \"string\") {\n return {\n role: \"user\",\n content: res,\n id: uuidv4(),\n };\n }\n else {\n return res;\n }\n }\n const messages = [];\n for (const msg of params.trajectory) {\n const convertedMessage = _convertToOpenAIMessage(msg);\n if (_isInternalMessage(convertedMessage)) {\n continue;\n }\n if (convertedMessage.role === \"user\") {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n convertedMessage.role = \"assistant\";\n messages.push(convertedMessage);\n }\n else if (convertedMessage.role === \"assistant\" &&\n !convertedMessage.tool_calls) {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n convertedMessage.role = \"user\";\n messages.push(convertedMessage);\n }\n }\n if (messages.length === 0) {\n messages.push({\n role: \"user\",\n content: \"Generate an initial query to start a conversation based on your instructions. Do not respond with other text.\",\n id: uuidv4(),\n });\n }\n if (system) {\n messages.unshift({ role: \"system\", content: system });\n }\n const response = await client.invoke(messages);\n return {\n role: \"user\",\n content: response.content,\n id: response.id,\n };\n };\n}\n","import { AIMessage, BaseMessage } from '@langchain/core/messages';\n\n/**\n * Creates a custom evaluator that checks whether the agent's final response\n * contains expected strings and doesn't contain forbidden strings.\n */\nexport function createResponseContentEvaluator() {\n\treturn async ({\n\t\toutputs,\n\t\treferenceOutputs,\n\t}: {\n\t\toutputs: Record<string, any>;\n\t\treferenceOutputs?: Record<string, any>;\n\t}) => {\n\t\tconst mustContain: string[] = referenceOutputs?.responseContains || [];\n\t\tconst mustNotContain: string[] = referenceOutputs?.responseMustNotContain || [];\n\n\t\tif (mustContain.length === 0 && mustNotContain.length === 0) {\n\t\t\treturn { key: 'response_content', score: true, comment: 'No content assertions specified, skipping' };\n\t\t}\n\n\t\t// Extract the last AI message text from the trajectory\n\t\tconst messages: BaseMessage[] = outputs.messages || [];\n\t\tconst lastAiMessage = [...messages].reverse().find((m) => m instanceof AIMessage);\n\n\t\tif (!lastAiMessage) {\n\t\t\treturn { key: 'response_content', score: false, comment: 'No AI message found in trajectory' };\n\t\t}\n\n\t\tconst responseText = (typeof lastAiMessage.content === 'string' ? lastAiMessage.content : JSON.stringify(lastAiMessage.content)).toLowerCase();\n\n\t\tconst failures: string[] = [];\n\n\t\tfor (const expected of mustContain) {\n\t\t\tif (!responseText.includes(expected.toLowerCase())) {\n\t\t\t\tfailures.push(`Missing expected text: \"${expected}\"`);\n\t\t\t}\n\t\t}\n\n\t\tfor (const forbidden of mustNotContain) {\n\t\t\tif (responseText.includes(forbidden.toLowerCase())) {\n\t\t\t\tfailures.push(`Contains forbidden text: \"${forbidden}\"`);\n\t\t\t}\n\t\t}\n\n\t\tconst passed = failures.length === 0;\n\n\t\treturn {\n\t\t\tkey: 'response_content',\n\t\t\tscore: passed,\n\t\t\tcomment: passed ? 'All content assertions passed' : failures.join('; '),\n\t\t};\n\t};\n}\n","import { AIMessage, BaseMessage } from '@langchain/core/messages';\n\n/**\n * Creates a custom evaluator that asserts the agent made zero tool calls.\n * When `referenceOutputs.exceptTools` is set, calls to those tools are\n * allowed (but not required) — only calls to non-excepted tools cause failure.\n */\nexport function createNoToolCallsEvaluator() {\n\treturn async ({\n\t\toutputs,\n\t\treferenceOutputs,\n\t}: {\n\t\toutputs: Record<string, any>;\n\t\treferenceOutputs?: Record<string, any>;\n\t}) => {\n\t\t// Only run this evaluator if the reference explicitly expects no tool calls\n\t\tif (referenceOutputs?.maxToolCalls !== 0 && referenceOutputs?.expectNoToolCalls !== true) {\n\t\t\treturn { key: 'no_tool_calls', score: true, comment: 'No tool call restriction specified, skipping' };\n\t\t}\n\n\t\tconst messages: BaseMessage[] = outputs.messages || [];\n\t\tconst exceptTools: string[] = referenceOutputs?.exceptTools ?? [];\n\n\t\tconst toolCalls = messages\n\t\t\t.filter((m) => m instanceof AIMessage)\n\t\t\t.flatMap((m) => (m as AIMessage).tool_calls || []);\n\n\t\tconst disallowedCalls = exceptTools.length > 0 ? toolCalls.filter((tc) => !exceptTools.includes(tc.name)) : toolCalls;\n\n\t\tconst passed = disallowedCalls.length === 0;\n\n\t\tif (exceptTools.length > 0) {\n\t\t\treturn {\n\t\t\t\tkey: 'no_tool_calls',\n\t\t\t\tscore: passed,\n\t\t\t\tcomment: passed\n\t\t\t\t\t? `No disallowed tool calls made (allowed: ${exceptTools.join(', ')})`\n\t\t\t\t\t: `Agent made ${disallowedCalls.length} disallowed tool call(s): ${disallowedCalls.map((tc) => tc.name).join(', ')}`,\n\t\t\t};\n\t\t}\n\n\t\treturn {\n\t\t\tkey: 'no_tool_calls',\n\t\t\tscore: passed,\n\t\t\tcomment: passed\n\t\t\t\t? 'No tool calls made (as expected)'\n\t\t\t\t: `Agent made ${toolCalls.length} tool call(s): ${toolCalls.map((tc) => tc.name).join(', ')}`,\n\t\t};\n\t};\n}\n","import { AIMessage, BaseMessage } from '@langchain/core/messages';\n\n/**\n * Creates a custom evaluator that asserts at least one tool was called.\n * When `referenceOutputs.anyToolsExpected` contains tool names, at least\n * one of those specific tools must appear. When the list is empty, any\n * tool call satisfies the expectation.\n */\nexport function createAnyToolCalledEvaluator() {\n\treturn async ({\n\t\toutputs,\n\t\treferenceOutputs,\n\t}: {\n\t\toutputs: Record<string, any>;\n\t\treferenceOutputs?: Record<string, any>;\n\t}) => {\n\t\tif (referenceOutputs?.expectAnyToolCall !== true) {\n\t\t\treturn { key: 'any_tool_called', score: true, comment: 'No any-tool-call expectation specified, skipping' };\n\t\t}\n\n\t\tconst expectedTools: string[] = referenceOutputs?.anyToolsExpected ?? [];\n\t\tconst messages: BaseMessage[] = outputs.messages || [];\n\n\t\tconst calledToolNames = messages\n\t\t\t.filter((m) => m instanceof AIMessage)\n\t\t\t.flatMap((m) => (m as AIMessage).tool_calls || [])\n\t\t\t.map((tc) => tc.name);\n\n\t\t// No specific tools requested — any tool call satisfies the expectation\n\t\tif (expectedTools.length === 0) {\n\t\t\tconst passed = calledToolNames.length > 0;\n\t\t\treturn {\n\t\t\t\tkey: 'any_tool_called',\n\t\t\t\tscore: passed,\n\t\t\t\tcomment: passed\n\t\t\t\t\t? `Agent called tool(s): ${calledToolNames.join(', ')}`\n\t\t\t\t\t: 'Agent made no tool calls (expected at least one)',\n\t\t\t};\n\t\t}\n\n\t\tconst matchedTools = expectedTools.filter((name) => calledToolNames.includes(name));\n\t\tconst passed = matchedTools.length > 0;\n\n\t\treturn {\n\t\t\tkey: 'any_tool_called',\n\t\t\tscore: passed,\n\t\t\tcomment: passed\n\t\t\t\t? `Called expected tool(s): ${matchedTools.join(', ')}`\n\t\t\t\t: `None of the expected tools were called (expected one of: ${expectedTools.join(', ')}; actual: ${calledToolNames.length > 0 ? calledToolNames.join(', ') : 'none'})`,\n\t\t};\n\t};\n}\n","import { AIMessage, BaseMessage } from '@langchain/core/messages';\n\nexport interface ToolCallExpectation {\n\tname: string;\n\t/** Returns `true` if the tool call input is valid. At least one call must satisfy it. */\n\tvalidate?: (input: Record<string, unknown>) => boolean;\n\t/** Minimum number of times the tool must be called. Defaults to 1. */\n\ttimes?: number;\n}\n\n/**\n * Creates an evaluator that asserts tool call expectations: input validation\n * and/or minimum call count.\n *\n * For each `ToolCallExpectation`:\n * - When `validate` is set, at least one call to the named tool must satisfy it.\n * - When `times` is set, the tool must be called at least that many times.\n * - Both can be combined.\n */\nexport function createToolInputEvaluator() {\n\treturn async ({\n\t\toutputs,\n\t\treferenceOutputs,\n\t}: {\n\t\toutputs: Record<string, any>;\n\t\treferenceOutputs?: Record<string, any>;\n\t}) => {\n\t\tconst expectations: ToolCallExpectation[] = referenceOutputs?.toolInputExpectations ?? [];\n\n\t\tif (expectations.length === 0) {\n\t\t\treturn { key: 'tool_input', score: true, comment: 'No tool input expectations specified, skipping' };\n\t\t}\n\n\t\tconst messages: BaseMessage[] = outputs.messages || [];\n\n\t\tconst allToolCalls = messages\n\t\t\t.filter((m) => m instanceof AIMessage)\n\t\t\t.flatMap((m) => (m as AIMessage).tool_calls || []);\n\n\t\tconst results: { name: string; passed: boolean; comment: string }[] = [];\n\n\t\tfor (const expectation of expectations) {\n\t\t\tconst matchingCalls = allToolCalls.filter((tc) => tc.name === expectation.name);\n\t\t\tconst subChecks: string[] = [];\n\t\t\tlet passed = true;\n\n\t\t\t// Check minimum call count\n\t\t\tif (expectation.times !== undefined) {\n\t\t\t\tconst countOk = matchingCalls.length >= expectation.times;\n\t\t\t\tif (!countOk) {\n\t\t\t\t\tpassed = false;\n\t\t\t\t\tsubChecks.push(\n\t\t\t\t\t\t`expected at least ${expectation.times} call(s), got ${matchingCalls.length}`,\n\t\t\t\t\t);\n\t\t\t\t} else {\n\t\t\t\t\tsubChecks.push(\n\t\t\t\t\t\t`call count OK (${matchingCalls.length} >= ${expectation.times})`,\n\t\t\t\t\t);\n\t\t\t\t}\n\t\t\t}\n\n\t\t\t// Check input validation\n\t\t\tif (expectation.validate) {\n\t\t\t\tif (matchingCalls.length === 0) {\n\t\t\t\t\tpassed = false;\n\t\t\t\t\tsubChecks.push('was never called');\n\t\t\t\t} else {\n\t\t\t\t\tconst anyValid = matchingCalls.some((tc) => {\n\t\t\t\t\t\ttry {\n\t\t\t\t\t\t\treturn expectation.validate!(tc.args as Record<string, unknown>);\n\t\t\t\t\t\t} catch {\n\t\t\t\t\t\t\treturn false;\n\t\t\t\t\t\t}\n\t\t\t\t\t});\n\t\t\t\t\tif (!anyValid) {\n\t\t\t\t\t\tpassed = false;\n\t\t\t\t\t\tsubChecks.push(\n\t\t\t\t\t\t\t`input validation failed for all ${matchingCalls.length} call(s)`,\n\t\t\t\t\t\t);\n\t\t\t\t\t} else {\n\t\t\t\t\t\tsubChecks.push('input validation passed');\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\n\t\t\t// If neither times nor validate is set, just check the tool was called\n\t\t\tif (expectation.times === undefined && !expectation.validate) {\n\t\t\t\tif (matchingCalls.length === 0) {\n\t\t\t\t\tpassed = false;\n\t\t\t\t\tsubChecks.push('was never called');\n\t\t\t\t} else {\n\t\t\t\t\tsubChecks.push(`called ${matchingCalls.length} time(s)`);\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tresults.push({\n\t\t\t\tname: expectation.name,\n\t\t\t\tpassed,\n\t\t\t\tcomment: `\"${expectation.name}\": ${subChecks.join(', ')}`,\n\t\t\t});\n\t\t}\n\n\t\tconst allPassed = results.every((r) => r.passed);\n\n\t\treturn {\n\t\t\tkey: 'tool_input',\n\t\t\tscore: allPassed,\n\t\t\tcomment: results.map((r) => r.comment).join('; '),\n\t\t};\n\t};\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;AC+BA,IAAI,UAA6B;AAE1B,SAAS,eAAe,QAA0B;AACxD,YAAU;AACX;AAEO,SAAS,gBAA4B;AAC3C,MAAI,CAAC,SAAS;AACb,UAAM,IAAI,MAAM,wEAAwE;AAAA,EACzF;AACA,SAAO;AACR;;;AC1CA,SAAoB;AACpB,IAAAA,mBAAuC;;;ACAvC,mBAAqB;AACrB,sBAAmE;AACnE,iBAAkB;;;ACFlB,uBAA8B;AAC9B,oBAA4C;AA2BrC,IAAM,yBAAN,MAAM,wBAAuB;AAAA,EACnC,YAAoB,QAA8B;AAA9B;AAAA,EAA+B;AAAA,EAEnD,QAAQ,aAAqB,MAAiB,iBAAsD;AACnG,UAAM,QAAQ,YAAY,MAAM,GAAG;AAEnC,QAAI,MAAM,WAAW,GAAG;AACvB,YAAM,kBAAkB,KAAK,uBAAuB,WAAW;AAC/D,aAAO,KAAK,QAAQ,iBAAiB,MAAM,eAAe;AAAA,IAC3D;AAEA,QAAI,MAAM,WAAW,GAAG;AACvB,YAAM,CAAC,UAAU,SAAS,IAAI;AAC9B,aAAO,KAAK,kBAAkB,UAAU,WAAW,WAAW,MAAM,eAAe;AAAA,IACpF;AAEA,QAAI,MAAM,WAAW,GAAG;AACvB,YAAM,CAAC,UAAU,YAAY,SAAS,IAAI;AAC1C,aAAO,KAAK,kBAAkB,UAAU,YAAY,WAAW,MAAM,eAAe;AAAA,IACrF;AAEA,UAAM,IAAI;AAAA,MACT;AAAA,IACD;AAAA,EACD;AAAA,EAEQ,uBAAuB,WAA2B;AACzD,eAAW,CAAC,UAAU,SAAS,KAAK,OAAO,QAAQ,KAAK,MAAM,GAAG;AAChE,UAAI,aAAa,UAAU;AAC1B,YAAI,aAAc,WAAuC;AACxD,iBAAO,UAAU,SAAS;AAAA,QAC3B;AAAA,MACD,WAAW,aAAa,SAAS;AAChC,mBAAW,CAAC,UAAU,MAAM,KAAK,OAAO;AAAA,UACvC;AAAA,QACD,GAAG;AACF,cAAI,OAAO,OAAO,KAAK,CAAC,MAAM,EAAE,UAAU,SAAS,GAAG;AACrD,mBAAO,SAAS,QAAQ,IAAI,SAAS;AAAA,UACtC;AAAA,QACD;AAAA,MACD;AAAA,IACD;AAEA,UAAM,IAAI,MAAM,UAAU,SAAS,6BAA6B;AAAA,EACjE;AAAA,EAEQ,kBACP,UACA,YACA,WACA,MACA,iBACoB;AACpB,YAAQ,UAAU;AAAA,MACjB,KAAK;AACJ,eAAO,KAAK,cAAc,YAAY,WAAW,MAAM,eAAe;AAAA,MACvE,KAAK;AACJ,eAAO,KAAK,aAAa,YAAY,WAAW,MAAM,eAAe;AAAA,MACtE;AACC,cAAM,IAAI,MAAM,+BAA+B,QAAQ,EAAE;AAAA,IAC3D;AAAA,EACD;AAAA,EAEQ,cACP,YACA,WACA,MACA,iBACa;AACb,UAAM,iBAAiB,KAAK,OAAO,SAAS,UAAU;AACtD,QAAI,CAAC,gBAAgB;AACpB,YAAM,IAAI,MAAM,kBAAkB,UAAU,oCAAoC;AAAA,IACjF;AAEA,WAAO,IAAI,yBAAW;AAAA,MACrB,QAAQ,eAAe;AAAA,MACvB;AAAA,MACA;AAAA,MACA,GAAI,mBAAmB;AAAA,QACtB,WAAW;AAAA,UACV,QAAQ;AAAA,UACR,SAAS;AAAA,QACV;AAAA,QACA,iBAAiB;AAAA,MAClB;AAAA,IACD,CAAC;AAAA,EACF;AAAA,EAEQ,aACP,cACA,WACA,MACA,iBACoB;AACpB,UAAM,WAAW,KAAK,OAAO,QAAQ,YAAY;AACjD,QAAI,CAAC,UAAU;AACd,YAAM,IAAI,MAAM,aAAa,YAAY,mCAAmC;AAAA,IAC7E;AAEA,UAAM,aAAa,SAAS,OAAO,KAAK,CAAC,MAAM,EAAE,UAAU,SAAS;AACpE,QAAI,CAAC,YAAY;AAChB,YAAM,IAAI,MAAM,UAAU,SAAS,kCAAkC,YAAY,GAAG;AAAA,IACrF;AAEA,YAAQ,WAAW,UAAU;AAAA,MAC5B,KAAK;AACJ,eAAO,KAAK,sBAAsB,UAAU,YAAY,MAAM,eAAe;AAAA,MAC9E,KAAK;AACJ,eAAO,KAAK,mBAAmB,UAAU,YAAY,MAAM,eAAe;AAAA,IAC5E;AAAA,EACD;AAAA,EAEQ,mBACP,UACA,YACA,MACA,iBACkB;AAKlB,UAAM,WAAW,kBACd,GAAG,WAAW,SAAS,QAAQ,OAAO,EAAE,CAAC,iCAAiC,WAAW,UAAU,KAC/F,WAAW;AAEd,WAAO,IAAI,8BAAgB;AAAA,MAC1B,OAAO,WAAW;AAAA,MAClB,mBAAmB,SAAS;AAAA,MAC5B,qBAAqB;AAAA,MACrB,8BAA8B,WAAW;AAAA,MACzC,uBAAuB,WAAW;AAAA,MAClC;AAAA,MACA,GAAI,mBAAmB;AAAA,QACtB,WAAW;AAAA,UACV,QAAQ;AAAA,UACR,SAAS;AAAA,QACV;AAAA,MACD;AAAA,IACD,CAAC;AAAA,EACF;AAAA,EAEA,OAAwB,kBAA0C;AAAA,IACjE,SAAS;AAAA,IACT,KAAK;AAAA,IACL,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,OAAO;AAAA,EACR;AAAA,EAEQ,sBACP,UACA,YACA,MACA,iBACgB;AAChB,UAAM,eAAe,kBAAkB,wBAAuB,gBAAgB,eAAe,IAAI;AAEjG,WAAO,IAAI,+BAAc;AAAA,MACxB,OAAO,WAAW;AAAA,MAClB,QAAQ,SAAS;AAAA,MACjB,eAAe,EAAE,SAAS,WAAW,SAAS;AAAA,MAC9C;AAAA,MACA,GAAI,gBAAgB;AAAA,QACnB,WAAW,eAAe;AAAA,QAC1B,UAAU;AAAA,UACT,MAAM;AAAA,UACN,eAAe;AAAA,QAChB;AAAA,MACD;AAAA,IACD,CAAC;AAAA,EACF;AACD;;;ACxMA,uBAAkE;AAE3D,SAAS,2BAA2B,UAAoC;AAC9E,QAAM,SAAwB,CAAC;AAC/B,MAAI,QAAQ;AACZ,MAAI,qBAA+B,CAAC;AAEpC,aAAW,OAAO,UAAU;AAC3B,QAAI,IAAI,SAAS,SAAS;AACzB,aAAO;AAAA,QACN,IAAI,8BAAa;AAAA,UAChB,SAAS,IAAI,QAAQ,IAAI,CAAC,MAAM;AAC/B,gBAAI,EAAE,SAAS,SAAS;AACvB,qBAAO,EAAE,MAAM,aAAa,WAAW,EAAE,KAAK,EAAE,IAAI,EAAE;AAAA,YACvD;AACA,mBAAO;AAAA,UACR,CAAC;AAAA,QACF,CAAC;AAAA,MACF;AAAA,IACD,WAAW,IAAI,SAAS,MAAM;AAC7B,UAAI,IAAI,aAAa,IAAI,UAAU,SAAS,GAAG;AAC9C,6BAAqB,IAAI,UAAU,IAAI,MAAM,MAAM,EAAE,KAAK,EAAE;AAC5D,eAAO;AAAA,UACN,IAAI,2BAAU;AAAA,YACb,SAAS,IAAI;AAAA,YACb,YAAY,IAAI,UAAU,IAAI,CAAC,IAAI,OAAO;AAAA,cACzC,IAAI,mBAAmB,CAAC;AAAA,cACxB,MAAM,GAAG;AAAA,cACT,MAAM,GAAG,QAAQ,KAAK,MAAM,GAAG,KAAK,IAAI,CAAC;AAAA,YAC1C,EAAE;AAAA,UACH,CAAC;AAAA,QACF;AAAA,MACD,OAAO;AACN,eAAO,KAAK,IAAI,2BAAU,IAAI,OAAO,CAAC;AAAA,MACvC;AAAA,IACD,WAAW,IAAI,SAAS,QAAQ;AAC/B,YAAM,aAAa,mBAAmB,MAAM;AAC5C,UAAI,CAAC;AACJ,cAAM,IAAI,MAAM,oBAAoB,IAAI,IAAI,gDAAgD;AAC7F,aAAO;AAAA,QACN,IAAI,6BAAY;AAAA,UACf,SAAS,IAAI;AAAA,UACb,cAAc;AAAA,UACd,MAAM,IAAI;AAAA,QACX,CAAC;AAAA,MACF;AAAA,IACD;AAAA,EACD;AAEA,SAAO;AACR;;;AFfA,IAAM,kBAAkB;AAYjB,SAAS,qBAAqB,SAA+B;AACnE,MAAI,CAAC,MAAM,QAAQ,QAAQ,OAAO,EAAG,QAAO;AAE5C,QAAM,WAAW,QAAQ,QAAQ,OAAO,CAAC,UAAe,MAAM,SAAS,eAAe,MAAM,SAAS,UAAU;AAI/G,QAAM,aAAa,SAAS,SAAS,IAAI,WAAW;AAEpD,SAAO,IAAI,0BAAU;AAAA,IACpB,SAAS;AAAA,IACT,YAAY,QAAQ;AAAA,IACpB,IAAI,QAAQ;AAAA,IACZ,mBAAmB,QAAQ;AAAA,IAC3B,gBAAgB,QAAQ;AAAA,EACzB,CAAC;AACF;AAMO,SAAS,iBAAiB,aAAmC,aAAqB;AACxF,SAAO,OAAO,WAAkE;AAC/E,UAAM,WAAW,IAAI,uBAAuB,WAAW;AACvD,UAAM,QAAQ,SAAS,QAAQ,WAAW;AAG1C,UAAM,iBAAyC,CAAC;AAGhD,UAAM,iBAAiB,OAAO,MAAM,IAAI,CAAC,aAAa;AACrD,qBAAe,SAAS,IAAI,IAAI;AAEhC,iBAAO;AAAA,QACN,OAAO,cAAuC;AAC7C,yBAAe,SAAS,IAAI;AAC5B,cAAI,OAAO,SAAS,aAAa,YAAY;AAC5C,mBAAO,SAAS,SAAS,WAAW,eAAe,SAAS,IAAI,CAAC;AAAA,UAClE;AACA,iBAAO,SAAS;AAAA,QACjB;AAAA,QACA;AAAA,UACC,MAAM,SAAS;AAAA,UACf,aAAa,SAAS;AAAA,UACtB,QACC,SAAS,kBAAkB,aAAE,YAC1B,SAAS,SACT,aAAE;AAAA,YACF,OAAO;AAAA,cACN,OAAO,QAAQ,SAAS,MAAM,EAAE,IAAI,CAAC,CAAC,KAAK,GAAG,MAAM;AACnD,oBAAI,OAAO,QAAQ,SAAU,QAAO,CAAC,KAAK,aAAE,OAAO,EAAE,SAAS,GAAG,CAAC;AAClE,oBAAI,OAAO,QAAQ,SAAU,QAAO,CAAC,KAAK,aAAE,OAAO,EAAE,SAAS,OAAO,GAAG,CAAC,CAAC;AAC1E,uBAAO,CAAC,KAAK,aAAE,IAAI,CAAC;AAAA,cACrB,CAAC;AAAA,YACF;AAAA,UACD;AAAA,QACJ;AAAA,MACD;AAAA,IACD,CAAC;AAED,UAAM,aAAa,eAAe,SAAS,IAAI,MAAM,UAAW,cAAc,IAAI;AAElF,UAAM,WAA0B,CAAC;AAEjC,QAAI,OAAO,cAAc;AACxB,eAAS,KAAK,IAAI,8BAAc,OAAO,YAAY,CAAC;AAAA,IACrD;AAGA,aAAS,KAAK,GAAG,2BAA2B,OAAO,QAAQ,CAAC;AAE5D,UAAM,YAAY,OAAO,eAAe,SAAS,oBAAoB,OAAO,cAAc,QAAQ,CAAC;AACnG,UAAM,YACL,OAAO,eAAe,SAAS,oBAAqB,OAAO,cAAc,SAAS,IAAK;AACxF,UAAM,aAAa,OAAO,eAAe,SAAS;AAClD,QAAI,iBAAiB;AAGrB,QAAI,YAAY;AAChB,WAAO,YAAY,iBAAiB;AACnC;AAEA,YAAM,WAAW,MAAM,WAAW,OAAO,QAAQ;AACjD,eAAS,KAAK,qBAAqB,QAAqB,CAAgB;AAExE,YAAM,YAAY;AAClB,UAAI,CAAC,UAAU,cAAc,UAAU,WAAW,WAAW,GAAG;AAC/D;AAAA,MACD;AAGA,UAAI,aAAa;AACjB,iBAAW,MAAM,UAAU,YAAY;AACtC,cAAM,WAAW,eAAe,KAAK,CAAC,MAAM,EAAE,SAAS,GAAG,IAAI;AAC9D,YAAI,UAAU;AACb,gBAAM,SAAS,MAAM,SAAS,OAAO,GAAG,IAAI;AAC5C,mBAAS;AAAA,YACR,IAAI,4BAAY;AAAA,cACf,SAAS,OAAO,WAAW,WAAW,SAAS,KAAK,UAAU,MAAM;AAAA,cACpE,cAAc,GAAG;AAAA,cACjB,MAAM,GAAG;AAAA,YACV,CAAC;AAAA,UACF;AAAA,QACD,OAAO;AACN,mBAAS;AAAA,YACR,IAAI,4BAAY;AAAA,cACf,SAAS,SAAS,GAAG,IAAI;AAAA,cACzB,cAAc,GAAG;AAAA,cACjB,MAAM,GAAG;AAAA,YACV,CAAC;AAAA,UACF;AAAA,QACD;AAEA,YAAI,UAAU,SAAS,GAAG,IAAI,GAAG;AAChC;AACA,cAAI,kBAAkB,WAAW;AAChC,yBAAa;AAAA,UACd;AAAA,QACD;AAAA,MACD;AAEA,UAAI,cAAc,YAAY;AAC7B;AAAA,MACD;AAAA,IACD;AAEA,WAAO,EAAE,SAAS;AAAA,EACnB;AACD;AAWO,SAAS,sBAAsB,eAA+B,QAAoC;AAExG,QAAM,WAA0B,2BAA2B,aAAa;AAGxE,MAAI,mBAAkG,CAAC;AAEvG,aAAW,SAAS,OAAO,SAAS;AACnC,QAAI,MAAM,SAAS,aAAa;AAC/B,YAAM,KAAK;AACX,uBAAiB,KAAK;AAAA,QACrB,IAAI,GAAG;AAAA,QACP,MAAM,GAAG;AAAA,QACT,MAAM,GAAG,QAAQ,KAAK,MAAM,GAAG,KAAK,IAAI,CAAC;AAAA,QACzC,QAAQ,GAAG;AAAA,MACZ,CAAC;AAAA,IACF,WAAW,MAAM,SAAS,QAAQ;AAEjC,UAAI,iBAAiB,SAAS,GAAG;AAChC,iBAAS;AAAA,UACR,IAAI,0BAAU;AAAA,YACb,SAAS;AAAA,YACT,YAAY,iBAAiB,IAAI,CAAC,QAAQ,EAAE,IAAI,GAAG,IAAI,MAAM,GAAG,MAAM,MAAM,GAAG,KAAK,EAAE;AAAA,UACvF,CAAC;AAAA,QACF;AACA,mBAAW,MAAM,kBAAkB;AAClC,mBAAS,KAAK,IAAI,4BAAY,EAAE,SAAS,GAAG,QAAQ,cAAc,GAAG,IAAI,MAAM,GAAG,KAAK,CAAC,CAAC;AAAA,QAC1F;AACA,2BAAmB,CAAC;AAAA,MACrB;AACA,eAAS,KAAK,IAAI,0BAAU,MAAM,MAAM,CAAC;AAAA,IAC1C;AAAA,EACD;AAGA,MAAI,iBAAiB,SAAS,GAAG;AAChC,aAAS;AAAA,MACR,IAAI,0BAAU;AAAA,QACb,SAAS;AAAA,QACT,YAAY,iBAAiB,IAAI,CAAC,QAAQ,EAAE,IAAI,GAAG,IAAI,MAAM,GAAG,MAAM,MAAM,GAAG,KAAK,EAAE;AAAA,MACvF,CAAC;AAAA,IACF;AACA,eAAW,MAAM,kBAAkB;AAClC,eAAS,KAAK,IAAI,4BAAY,EAAE,SAAS,GAAG,QAAQ,cAAc,GAAG,IAAI,MAAM,GAAG,KAAK,CAAC,CAAC;AAAA,IAC1F;AAAA,EACD;AAEA,SAAO;AACR;AAMO,SAAS,sBAAsB,MAAiD;AACtF,QAAM,aAAqC,CAAC;AAE5C,SAAO,OAAO,QAAQ,IAAI,EAAE,IAAI,CAAC,CAAC,MAAM,GAAG,MAAM;AAChD,eAAW,IAAI,IAAI;AAEnB,WAAO;AAAA,MACN;AAAA,MACA,SAAS;AAAA,MACT,aAAa,IAAI;AAAA,MACjB,aACC,IAAI,kBAAkB,aAAE,YACrB,IAAI,SACJ,aAAE;AAAA,QACF,OAAO;AAAA,UACN,OAAO,QAAQ,IAAI,UAAU,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,KAAK,GAAG,MAAM;AACpD,gBAAI,OAAO,QAAQ,SAAU,QAAO,CAAC,KAAK,aAAE,OAAO,EAAE,SAAS,GAAG,CAAC;AAClE,mBAAO,CAAC,KAAK,aAAE,IAAI,CAAC;AAAA,UACrB,CAAC;AAAA,QACF;AAAA,MACD;AAAA,MACH,MAAM,OAAO,UAAmC;AAC/C,mBAAW,IAAI;AACf,YAAI,OAAO,IAAI,aAAa,YAAY;AACvC,iBAAQ,IAAI;AAAA,YACX;AAAA,YACA,WAAW,IAAI;AAAA,UAChB;AAAA,QACD;AACA,eAAO,OAAO,IAAI,aAAa,WAAW,IAAI,WAAW,KAAK,UAAU,IAAI,QAAQ;AAAA,MACrF;AAAA,IACD;AAAA,EACD,CAAC;AACF;AAoBA,SAAS,yBACR,MACA,SACA,iBACA,eACmB;AACnB,QAAM,YAAY,cAAc,SAAS,oBAAoB,cAAc,QAAQ,CAAC;AACpF,QAAM,YAAY,cAAc,SAAS,oBAAqB,cAAc,SAAS,IAAK;AAC1F,MAAI,iBAAiB;AAErB,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACzB,GAAG;AAAA,IACH,MAAM,OAAO,UAAmC;AAC/C,YAAM,SAAS,MAAM,IAAI,KAAK,KAAK;AACnC,YAAM,SAAS,OAAO,WAAW,WAAW,SAAS,KAAK,UAAU,MAAM;AAE1E,cAAQ,KAAK;AAAA,QACZ,MAAM,IAAI;AAAA,QACV;AAAA,QACA;AAAA,QACA,YAAY,WAAW,QAAQ,MAAM;AAAA,MACtC,CAAC;AAGD,UAAI,cAAc,SAAS,eAAe;AACzC,wBAAgB,MAAM;AAAA,MACvB,WAAW,UAAU,SAAS,IAAI,IAAI,GAAG;AACxC;AACA,YAAI,kBAAkB,WAAW;AAChC,0BAAgB,MAAM;AAAA,QACvB;AAAA,MACD;AAEA,aAAO;AAAA,IACR;AAAA,EACD,EAAE;AACH;AAOA,SAAS,gCACR,eACA,cACgB;AAChB,QAAM,WAA0B,2BAA2B,aAAa;AAExE,MAAI,aAAa,SAAS,GAAG;AAE5B,aAAS;AAAA,MACR,IAAI,0BAAU;AAAA,QACb,SAAS;AAAA,QACT,YAAY,aAAa,IAAI,CAAC,QAAQ;AAAA,UACrC,IAAI,GAAG;AAAA,UACP,MAAM,GAAG;AAAA,UACT,MAAM,GAAG;AAAA,QACV,EAAE;AAAA,MACH,CAAC;AAAA,IACF;AACA,eAAW,MAAM,cAAc;AAC9B,eAAS;AAAA,QACR,IAAI,4BAAY;AAAA,UACf,SAAS,GAAG;AAAA,UACZ,cAAc,GAAG;AAAA,UACjB,MAAM,GAAG;AAAA,QACV,CAAC;AAAA,MACF;AAAA,IACD;AAAA,EACD;AAEA,SAAO;AACR;AAUA,eAAsB,eACrB,cACA,OACA,cACA,eACA,eACuC;AACvC,QAAM,aAAa,OAAO,KAAK,aAAa,EAAE,SAAS,IAAI,sBAAsB,aAAa,IAAI,CAAC;AACnG,QAAM,UAA6B,CAAC;AACpC,MAAI;AAEJ,MAAI,eAAe;AAClB,sBAAkB,IAAI,gBAAgB;AAAA,EACvC;AAMA,QAAM,YACL,iBAAiB,kBACd,CAAC,UAA4B,yBAAyB,OAAO,SAAS,iBAAiB,aAAa,IACpG;AAEJ,QAAM,QAAQ,MAAM,aAAa,OAAO,YAAY,SAAS;AAC7D,QAAM,SAAS,iBAAiB;AAEhC,MAAI;AAMH,UAAM,eAAe,MAAM,IAAI;AAAA,MAC9B,UAAU,QAAQ,KAAK,IAAI,CAAC,IAAI,KAAK,OAAO,EAAE,SAAS,EAAE,EAAE,MAAM,CAAC,CAAC;AAAA,MACnE,UAAU;AAAA,MACV;AAAA,IACD,CAAC;AAED,QAAI,iBAAiB;AACpB,YAAM,eAAe,IAAI,QAAe,CAAC,GAAG,WAAW;AACtD,cAAM,UAAU,MAAM,OAAO,IAAI,aAAa,0BAA0B,YAAY,CAAC;AACrF,YAAI,OAAQ,SAAS;AACpB,kBAAQ;AACR;AAAA,QACD;AACA,eAAQ,iBAAiB,SAAS,SAAS,EAAE,MAAM,KAAK,CAAC;AAAA,MAC1D,CAAC;AAED,YAAMC,UAAS,MAAM,QAAQ,KAAK,CAAC,cAAc,YAAY,CAAC;AAC9D,aAAO,EAAE,UAAU,sBAAsB,cAAcA,OAAM,EAAE;AAAA,IAChE;AAEA,UAAM,SAAS,MAAM;AACrB,WAAO,EAAE,UAAU,sBAAsB,cAAc,MAAM,EAAE;AAAA,EAChE,SAAS,OAAY;AAGpB,QAAI,MAAM,SAAS,gBAAgB,QAAQ,SAAS;AACnD,aAAO,EAAE,UAAU,gCAAgC,cAAc,OAAO,EAAE;AAAA,IAC3E;AACA,UAAM;AAAA,EACP;AACD;;;AD1aO,SAAS,MAAM,SAA+B;AACpD,SAAO,EAAE,MAAM,SAAS,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,QAAQ,CAAC,EAAE;AACpE;AAEO,SAAS,GAAG,SAAiB,WAAiC;AACpE,SAAO,EAAE,MAAM,MAAM,SAAS,GAAI,YAAY,EAAE,WAAW,UAAU,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE,EAAE,IAAI,CAAC,EAAG;AACxG;AAEO,SAAS,WAAW,MAAc,QAA6B;AACrE,SAAO,EAAE,MAAM,QAAQ,MAAM,OAAO;AACrC;AA0DO,SAAS,cACf,OACA,YAAiD,CAAC,GACxB;AAC1B,SAAO,OAAO;AAAA,IACb,MAAM,IAAI,CAAC,SAAS;AAAA,MACnB,KAAK;AAAA,MACL;AAAA,QACC,aAAa,KAAK;AAAA,QAClB,QAAQ,KAAK;AAAA,QACb,UAAU,UAAU,KAAK,IAAI,KAAK;AAAA,MACnC;AAAA,IACD,CAAC;AAAA,EACF;AACD;AAEA,SAAS,YAAY,MAA8C;AAClE,SAAO,OAAO,QAAQ,IAAI,EAAE,IAAI,CAAC,CAAC,MAAM,GAAG,OAAO;AAAA,IACjD;AAAA,IACA,aAAa,IAAI;AAAA,IACjB,QAAQ,IAAI,UAAU,CAAC;AAAA,IACvB,UACC,OAAO,IAAI,aAAa,aACpB,IAAI,WACL,OAAO,IAAI,aAAa,WACvB,IAAI,WACJ,KAAK,UAAU,IAAI,QAAQ;AAAA,EACjC,EAAE;AACH;AAEA,SAAS,iBAAiB,UAA6B;AACtD,WAAS,IAAI,SAAS,SAAS,GAAG,KAAK,GAAG,KAAK;AAC9C,UAAM,MAAM,SAAS,CAAC;AACtB,QAAI,IAAI,SAAS,SAAS;AACzB,YAAM,YAAY,IAAI,QAAQ,KAAK,CAAC,MAAM,EAAE,SAAS,MAAM;AAC3D,aAAO,YAAY,UAAU,OAAO;AAAA,IACrC;AAAA,EACD;AACA,SAAO;AACR;AAEA,SAAS,mBAAmB,QAAqB,OAAyB;AACzE,MAAI,OAAO,OAAO,WAAW,WAAY,QAAO,OAAO;AACvD,QAAM,aAAa,cAAc;AACjC,QAAM,cAAc,OAAO,OAAO,WAAW,WAAW,OAAO,SAAS;AACxE,SAAO,iBAAiB,WAAW,aAAa,WAAW;AAC5D;AAEA,SAAS,oBAAoB,QAAiD;AAC7E,SAAO,OAAO,gBAAgB,cAAc,EAAE;AAC/C;AASA,IAAM,UAA6B,CAAC;AAO7B,SAAS,YAAY,MAAc,QAA2B;AACpE,UAAQ,KAAK,EAAE,MAAM,OAAO,CAAC;AAC9B;AAYO,SAAS,WAAiB;AAChC,QAAM,aAAa,cAAc;AAEjC,EAAG,YAAS,WAAW,gBAAgB,MAAM;AAC5C,eAAW,gBAAgB,WAAW,QAAQ;AAC7C,iBAAW,EAAE,MAAM,WAAW,OAAO,KAAK,SAAS;AAClD,cAAM,aAAa,OAAO,SAAS,CAAC;AACpC,cAAM,eAAe,OAAO,SAAS,SAAY,oBAAoB,MAAM;AAE3E,cAAM,gBAAgB,UAAU,OAAO,CAAC,EAAE,YAAY,IAAI,UAAU,MAAM,CAAC;AAC3E,cAAM,QAAQ,OAAO,OAAO,WAAW,WAAW,OAAO,SAAS;AAElE,mBAAW,MAAM,OAAO,OAAO;AAC9B,gBAAM,WAAW,GAAG,QAAQ,iBAAiB,GAAG,QAAQ;AACxD,gBAAM,eAAe,GAAG,SAAS;AACjC,gBAAM,QAAQ,YAAY,YAAY;AACtC,gBAAM,MAAM,EAAE,SAAS,iBAAiB,GAAG,QAAQ,EAAE;AAErD,gBAAM,WAAW,GAAG,OAAO,IAAI,CAAC,QAAQ,IAAI,GAAG,CAAC;AAChD,gBAAM,aAAa,SAAS,IAAI,CAAC,MAAM,EAAE,SAAS;AAClD,gBAAM,mBAAmB,OAAO,OAAO,CAAC,GAAG,GAAG,SAAS,IAAI,CAAC,MAAM,EAAE,gBAAgB,CAAC;AAErF,gBAAM,eAAe,IAAI,aAAa,OAAO,QAAQ;AAErD,UAAG;AAAA,YACF,GAAG,YAAY,KAAK,KAAK;AAAA,YACzB;AAAA,cACC,QAAQ;AAAA,gBACP,MAAM;AAAA,gBACN,UAAU;AAAA,gBACV;AAAA,gBACA,UAAU,GAAG;AAAA,cACd;AAAA,cACA;AAAA,YACD;AAAA,YACA,OAAO,EAAE,kBAAkB,OAAO,MAAM;AACvC,kBAAI;AAGJ,oBAAM,kBACL,GAAG,mBAAmB,OAAO,mBAAmB,cAAc,EAAE;AACjE,oBAAM,mBAAmB,kBAAkB,MAAM,gBAAgB,GAAG,QAAQ,IAAI,GAAG;AAEnF,kBAAI,cAAc;AACjB,yBAAS,MAAM;AAAA,kBACd;AAAA,kBACA;AAAA,kBACA;AAAA,kBACA;AAAA,kBACA,GAAG;AAAA,gBACJ;AAAA,cACD,OAAO;AACN,sBAAM,SAAS,mBAAmB,QAAQ,YAAY;AACtD,sBAAM,eAAe,cAAc,EAAE;AACrC,sBAAM,eAAe,GAAG,gBAAgB,OAAO,gBAAgB;AAC/D,yBAAS,MAAM,OAAO;AAAA,kBACrB,UAAU;AAAA,kBACV;AAAA,kBACA,eAAe,GAAG;AAAA,kBAClB,GAAI,eAAe,EAAE,aAAa,IAAI,CAAC;AAAA,gBACxC,CAAC;AAAA,cACF;AAEA,oBAAM,cAAc,OAAO,SACzB,OAAO,CAAC,MAAM,aAAa,0BAAS,EACpC,QAAQ,CAAC,MAAO,EAAgB,cAAc,CAAC,CAAC,EAChD,IAAI,CAACC,QAAOA,IAAG,IAAI;AAErB,cAAG,cAAW;AAAA,gBACb,cAAc,YAAY,SAAS,IAAI,YAAY,KAAK,KAAK,IAAI;AAAA,cAClE,CAAC;AACD,yBAAW,aAAa,YAAY;AACnC,sBAAM,UAAU,EAAE,SAAS,QAAQ,kBAAkB,UAAU,CAAC,EAAE,CAAC;AAAA,cACpE;AAAA,YACD;AAAA,UACD;AAAA,QACD;AAAA,MACD;AAAA,IACD;AAAA,EACD,CAAC;AACF;;;AInPA,IAAAC,MAAoB;AACpB,wBAA+C;;;ACD/C,IAAAC,mBAAuC;AAQhC,SAAS,wBAAwB,aAAmC,OAAe;AACzF,QAAM,WAAW,IAAI,uBAAuB,WAAW;AACvD,QAAM,QAAQ,SAAS,QAAQ,KAAK;AAEpC,SAAO,OAAO;AAAA,IACb;AAAA,IACA;AAAA,EACD,MAGM;AACL,UAAM,mBAAmB,kBAAkB;AAC3C,QAAI,CAAC,kBAAkB;AACtB,aAAO,EAAE,KAAK,kBAAkB,OAAO,MAAM,SAAS,2CAA2C;AAAA,IAClG;AAGA,UAAM,WAA0B,QAAQ,YAAY,CAAC;AACrD,UAAM,gBAAgB,CAAC,GAAG,QAAQ,EAAE,QAAQ,EAAE,KAAK,CAAC,MAAM,aAAa,0BAAS;AAEhF,QAAI,CAAC,eAAe;AACnB,aAAO,EAAE,KAAK,kBAAkB,OAAO,OAAO,SAAS,oCAAoC;AAAA,IAC5F;AAEA,UAAM,eAAe,OAAO,cAAc,YAAY,WAAW,cAAc,UAAU,KAAK,UAAU,cAAc,OAAO;AAG7H,UAAM,YAAY,MAAM,MAAM,OAAO;AAAA,MACpC;AAAA,QACC,MAAM;AAAA,QACN,SAAS;AAAA,MACV;AAAA,MACA;AAAA,QACC,MAAM;AAAA,QACN,SAAS;AAAA,MACV;AAAA,IACD,CAAC;AAED,UAAM,oBAAoB,OAAO,UAAU,YAAY,WAAW,UAAU,UAAU,IAAI,KAAK,EAAE,YAAY;AAE7G,UAAM,UAAU,qBAAqB,iBAAiB,YAAY;AAElE,WAAO;AAAA,MACN,KAAK;AAAA,MACL,OAAO;AAAA,MACP,SAAS,UACN,uCAAuC,gBAAgB,KACvD,aAAa,gBAAgB,mBAAmB,gBAAgB;AAAA,IACpE;AAAA,EACD;AACD;;;AC1DA,IAAAC,mBAAuC;;;ACAvC,IAAAC,mBAA8B;AAC9B,oBAA+B;AAC/B,sBAA+C;AAC/C,uBAA0B;AAC1B,IAAM;AAAA;AAAA,EAEN;AAAA,EAAgC;AAA2C,IAAI;AAC/E,SAAS,qBAAqB,SAAS;AACnC,MAAI,OAAO,mCAAmC,YAAY;AACtD,WAAO,+BAA+B;AAAA,MAClC;AAAA,IACJ,CAAC,EAAE,CAAC;AAAA,EACR;AACA,SAAO,0CAA0C;AAAA,IAC7C,UAAU,CAAC,OAAO;AAAA,EACtB,CAAC,EAAE,CAAC;AACR;AACO,IAAM,0BAA0B,CAAC,YAAY;AAChD,UAAI,gCAAc,OAAO,GAAG;AACxB,UAAM,YAAY,qBAAqB,OAAO;AAC9C,QAAI,QAAQ,MAAM,CAAC,UAAU,IAAI;AAC7B,gBAAU,KAAK,QAAQ;AAAA,IAC3B;AACA,WAAO;AAAA,EACX,OACK;AACD,WAAO;AAAA,EACX;AACJ;AACO,IAAM,iCAAiC,CAAC,aAAa;AACxD,MAAI;AACJ,MAAI,CAAC,MAAM,QAAQ,QAAQ,GAAG;AAC1B,QAAI,cAAc,YAAY,MAAM,QAAQ,SAAS,QAAQ,GAAG;AAC5D,qBAAe,SAAS;AAAA,IAC5B,WACS,aAAa,YAAY,UAAU,UAAU;AAClD,qBAAe,CAAC,QAAQ;AAAA,IAC5B,OACK;AACD,YAAM,IAAI,MAAM,oEAAoE;AAAA,IACxF;AAAA,EACJ,OACK;AACD,mBAAe;AAAA,EACnB;AACA,SAAO,aAAa,IAAI,uBAAuB;AACnD;AACO,IAAM,eAAe,CAAC,GAAG,UAAU;AACtC,MAAI,OAAO,UAAU,UAAU;AAC3B,QAAI,SAAS,QAAQ,WAAW,OAAO;AACnC,aAAO;AAAA,QACH,MAAM;AAAA,QACN,eAAe,SAAS,OAAO,MAAM,cAAc,WAC7C,MAAM,YACN;AAAA,QACN,MAAM;AAAA,QACN,MAAM;AAAA,MACV;AAAA,IACJ,OACK;AACD,YAAM,IAAI,MAAM,sDAAsD,KAAK,UAAU,OAAO,MAAM,CAAC,CAAC,GAAG;AAAA,IAC3G;AAAA,EACJ;AACA,SAAO,CAAC,KAAK;AACjB;AAIA,eAAsB,qBAAqB,SAAS,QAAQ,aAAa,OAAO,cAAc,kBAAkB;AAC5G,QAAM,YAAY,OAAO,WAAW;AAChC,QAAI,QAAQ,MAAM,OAAO,MAAM;AAC/B,QAAI,kBAAkB;AAClB,aAAO;AAAA,IACX;AACA,QAAI;AACJ,QAAI,CAAC,MAAM,QAAQ,KAAK,KAAK,OAAO,UAAU,UAAU;AACpD,YAAM,UAAU,CAAC;AACjB,iBAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,KAAK,GAAG;AAC9C,cAAM,CAAC,UAAUC,YAAW,UAAU,WAAW,IAAI;AAAA,UAAa;AAAA;AAAA,UAElE;AAAA,QAAK;AACL,cAAM,SAAS;AAAA,UACX;AAAA,UACA,OAAO;AAAA,UACP,SAASA;AAAA,UACT;AAAA,QACJ;AACA,YAAI,gBAAgB,UAAa,OAAO,gBAAgB,UAAU;AAC9D,iBAAO,cAAc;AAAA,QACzB;AACA,gBAAQ,KAAK,MAAM;AAAA,MACvB;AACA,aAAO;AAAA,IACX,OACK;AACD,UAAI;AACJ,UAAI,MAAM,QAAQ,KAAK,GAAG;AACtB,mBAAW,MAAM,CAAC;AAClB,oBAAY,MAAM,CAAC;AACnB,gBAAQ,MAAM,CAAC;AAAA,MACnB;AACA,aAAO;AAAA,QACH,KAAK;AAAA,QACL;AAAA,QACA,SAAS;AAAA,QACT;AAAA,MACJ;AAAA,IACJ;AAAA,EACJ;AACA,UAAI,iCAAgB,GAAG;AAEnB,UAAM,MAAM,UAAM,+BAAc,SAAS,EAAE,SAAS,CAAC,GAAG;AAAA,MACpD,MAAM;AAAA,MACN,UAAU;AAAA,QACN,gBAAgB,gBAAgB;AAAA,QAChC,gBAAgB;AAAA,QAChB,eAAe;AAAA,MACnB;AAAA,IACJ,CAAC;AACD,QAAI,kBAAkB;AAElB,YAAM,aAAa;AACnB,aAAO;AAAA,IACX;AACA,WAAO;AAAA,EACX,OACK;AACD,UAAM,yBAAqB,4BAAU,WAAW;AAAA,MAC5C,MAAM;AAAA,MACN,UAAU;AAAA,QACN,gBAAgB,gBAAgB;AAAA,QAChC,gBAAgB;AAAA,QAChB,eAAe;AAAA,MACnB;AAAA,IACJ,CAAC;AACD,UAAM,MAAM,MAAM,mBAAmB,SAAS,CAAC,CAAC;AAChD,WAAO;AAAA,EACX;AACJ;;;AC1IA,IAAAC,oBAAkC;;;ACAlC,uBAAyB;AACzB,qBAAmC;AACnC,IAAAC,mBAA8B;AAC9B,yBAA6B;AAC7B,uBAA8B;AAC9B,IAAAC,oBAA0B;AAE1B,SAAS,qBAAqB,QAAQ;AAClC,SAAO,0BAAS,WAAW,MAAM;AACrC;AACA,SAAS,oBAAoB,QAAQ;AACjC,SAAQ,qBAAqB,MAAM,KAAK,YAAY,UAAU,OAAO,UAAU;AACnF;AACO,SAAS,YAAY,OAAO;AAE/B,SAAO,OAAO,OAAO,UAAU;AACnC;AACA,SAAS,iBAAiB,GAAG;AACzB,QAAM,QAAQ;AACd,SAAQ,KAAK,QACT,OAAO,MAAM,YACb,OAAO,MAAM,eAAe,cAC5B,MAAM,WAAW,MAAM;AAC/B;AACA,SAAS,sBAAsB,EAAE,UAAU,gBAAiB,GAAG;AAE3D,QAAM,qBAAqB,SACtB,MAAM,EACN,QAAQ,EACR,UAAU,CAAC,QAAQ,IAAI,SAAS,MAAM;AAC3C,MAAI,uBAAuB,IAAI;AAC3B,UAAM,IAAI,MAAM,4EAA4E;AAAA,EAChG;AACA,QAAM,YAAY,SAAS,SAAS,IAAI;AAExC,WAAS,SAAS,EAAE,WAChB,SACI,gBACK,IAAI,CAAC,YAAY;AAClB,QAAI,aAAa;AAAA,SAAqB,KAAK,UAAU,QAAQ,MAAM,CAAC;AAAA,UAAqB,KAAK,UAAU,QAAQ,OAAO,CAAC;AACxH,QAAI,QAAQ,WAAW;AACnB,oBAAc;AAAA,aAAgB,QAAQ,SAAS;AAAA,IACnD;AACA,QAAI,QAAQ,UAAU,QAAW;AAC7B,oBAAc;AAAA,SAAY,QAAQ,KAAK;AAAA,IAC3C;AACA,kBAAc;AACd,WAAO;AAAA,EACX,CAAC,EACI,KAAK,IAAI;AACtB,SAAO;AACX;AACA,SAAS,iCAAiC,EAAE,YAAY,SAAS,aAAc,GAAG;AAC9E,QAAM,aAAa;AAAA,IACf,MAAM;AAAA,IACN,sBAAsB;AAAA,EAC1B;AACA,MAAI;AACJ,MAAI;AACJ,MAAI,SAAS;AACT,kBACI;AACJ,kBAAc;AAAA,MACV,MAAM;AAAA,MACN;AAAA,MACA,MAAM;AAAA,IACV;AAAA,EACJ,WACS,YAAY;AACjB,kBACI;AACJ,kBAAc;AAAA,MACV,MAAM;AAAA,MACN;AAAA,IACJ;AAAA,EACJ,OACK;AACD,kBACI;AACJ,kBAAc;AAAA,MACV,MAAM;AAAA,MACN;AAAA,IACJ;AAAA,EACJ;AACA,MAAI,cAAc;AACd,eAAW,aAAa;AAAA,MACpB,WAAW;AAAA,QACP,MAAM;AAAA,QACN,aAAa;AAAA,MACjB;AAAA,MACA,OAAO;AAAA,IACX;AACA,eAAW,WAAW,CAAC,aAAa,OAAO;AAAA,EAC/C,OACK;AACD,eAAW,aAAa;AAAA,MACpB,OAAO;AAAA,IACX;AACA,eAAW,WAAW,CAAC,OAAO;AAAA,EAClC;AACA,SAAO,CAAC,YAAY,WAAW;AACnC;AACA,SAAS,sBAAsB,OAAO;AAClC,MAAI,OAAO,UAAU,UAAU;AAC3B,WAAO;AAAA,EACX,eACS,gCAAc,KAAK,GAAG;AAC3B,WAAO,KAAK,UAAU,wBAAwB,KAAK,CAAC;AAAA,EACxD,WACS,OAAO,UAAU,YAAY,UAAU,MAAM;AAClD,QAAI,MAAM,QAAQ,KAAK,GAAG;AACtB,aAAO,KAAK,UAAU,MAAM,IAAI,CAAC,gBAAY,gCAAc,OAAO,IAAI,wBAAwB,OAAO,IAAI,OAAO,CAAC;AAAA,IACrH;AACA,UAAM,WAAW;AACjB,QAAI,cAAc,YAAY,MAAM,QAAQ,SAAS,QAAQ,GAAG;AAC5D,eAAS,WAAW,SAAS,SAAS,IAAI,CAAC,gBAAY,gCAAc,OAAO,IAAI,wBAAwB,OAAO,IAAI,OAAO;AAC1H,aAAO,KAAK,UAAU,QAAQ;AAAA,IAClC;AACA,WAAO,KAAK,UAAU,KAAK;AAAA,EAC/B;AACA,SAAO,KAAK,UAAU,KAAK;AAC/B;AACO,IAAM,0BAA0B,CAAC,WAAW;AAC/C,QAAM,EAAE,QAAQ,QAAQ,OAAO,YAAY,SAAS,gBAAgB,IAAI;AACxE,MAAI;AACJ,MAAI,YAAY,OAAO,MAAM,GAAG;AAC5B,iBAAS,iCAAa,OAAO,MAAM;AAAA,EACvC,OACK;AACD,aAAS,OAAO;AAAA,EACpB;AACA,MAAI,QAAQ,OAAO;AACnB,QAAM,eAAe,OAAO,gBAAgB;AAC5C,QAAM,WAAW,OAAOC,YAAW;AAC/B,UAAM,EAAE,QAAQ,SAAS,kBAAkB,GAAG,KAAK,IAAIA;AACvD,QAAI,UAAU,OAAO,WAAW,UAAU;AACtC,YAAM,IAAI,MAAM,+DAA+D;AAAA,IACnF;AACA,QAAI,oBAAoB;AACxB,QAAI,qBAAqB;AACzB,QAAI,8BAA8B;AAClC,QAAI,QAAQ;AACR,0BAAoB,sBAAsB,MAAM;AAAA,IACpD;AACA,QAAI,SAAS;AACT,2BAAqB,sBAAsB,OAAO;AAAA,IACtD;AACA,QAAI,kBAAkB;AAClB,oCAA8B,sBAAsB,gBAAgB;AAAA,IACxE;AACA,UAAM,kBAAkB,OAAO,YAAY,OAAO,QAAQ,IAAI,EAAE,IAAI,CAAC,CAAC,KAAK,KAAK,MAAM;AAAA,MAClF;AAAA,MACA,sBAAsB,KAAK;AAAA,IAC/B,CAAC,CAAC;AACF,QAAI,WAAW,CAAC;AAChB,UAAM,eAAe;AAAA,MACjB,QAAQ;AAAA,MACR,SAAS;AAAA,MACT,mBAAmB;AAAA,MACnB,GAAG;AAAA,IACP;AAEA,UAAM,uBAAuB,OAAO,YAAY,OAAO,QAAQ,YAAY,EAAE,OAAO,CAAC,CAAC,GAAG,KAAK,MAAM,UAAU,MAAS,CAAC;AACxH,QAAI,qBAAqB,MAAM,GAAG;AAC9B,YAAM,kBAAkB,MAAM,OAAO,OAAO,oBAAoB;AAChE,iBAAW,gBAAgB;AAC3B,UAAI,oBAAoB,MAAM,GAAG;AAC7B,iBAAS,OAAO;AAAA,MACpB;AAAA,IACJ,WACS,OAAO,WAAW,UAAU;AACjC,YAAM,WAAW,kCAAmB,aAAa,MAAM;AACvD,YAAM,kBAAkB,MAAM,SAAS,OAAO,oBAAoB;AAClE,iBAAW,gBAAgB;AAAA,IAC/B,OACK;AACD,iBAAW,MAAM,OAAO;AAAA,QACpB;AAAA,QACA;AAAA,QACA,mBAAmB;AAAA,QACnB,GAAG;AAAA,MACP,CAAC;AAAA,IACL;AACA,QAAI,QAAQ;AACR,iBAAW,CAAC,EAAE,MAAM,UAAU,SAAS,OAAO,GAAG,GAAG,QAAQ;AAAA,IAChE;AACA,QAAI,qBAAqB,+BAA+B,QAAQ;AAChE,QAAI,iBAAiB;AACjB,2BAAqB,sBAAsB;AAAA,QACvC,UAAU;AAAA,QACV;AAAA,MACJ,CAAC;AAAA,IACL;AACA,UAAM,CAAC,mBAAmB,WAAW,IAAI,iCAAiC;AAAA,MACtE;AAAA,MACA;AAAA,MACA;AAAA,IACJ,CAAC;AACD,QAAI,CAAC,OAAO;AACR,UAAI,CAAC,OAAO;AACR,cAAM,IAAI,MAAM,iFAAiF;AAAA,MACrG;AACA,cAAQ,UAAM,gCAAc,KAAK;AAAA,IACrC;AACA,QAAI;AACJ,QAAI,iBAAiB,KAAK,GAAG;AACzB,YAAM,4BAA4B,MAAM,qBAAqB,UAAU;AAAA,QACnE,OAAO;AAAA,QACP;AAAA,QACA,GAAG;AAAA,MACP,CAAC;AACD,iBAAW,MAAM,0BAA0B,OAAO,kBAAkB;AACpE,UAAI,WAAW,QAAW;AACtB,YAAI,cAAc;AACd,iBAAO,CAAC,SAAS,OAAO,SAAS,SAAS;AAAA,QAC9C;AACA,eAAO,SAAS;AAAA,MACpB,OACK;AACD,eAAO;AAAA,MACX;AAAA,IACJ,OACK;AACD,UAAI,CAAC,OAAO;AACR,cAAM,IAAI,MAAM,qFAAqF;AAAA,MACzG;AACA,UAAI,mBAAmB,UAAU;AACjC,UAAI,iBAAiB,SAAS,QAAW;AACrC,2BAAmB;AAAA,UACf,MAAM;AAAA,UACN,QAAQ;AAAA,UACR,QAAQ;AAAA,QACZ;AAAA,MACJ;AACA,UAAI,iBAAiB,UAAU,QAC3B,OAAO,iBAAiB,WAAW,UAAU;AAC7C,cAAM,IAAI,MAAM,2GAA2G;AAAA,MAC/H;AACA,UAAI,EAAE,0BAA0B,iBAAiB,SAAS;AAEtD,yBAAiB,OAAO,uBAAuB;AAAA,MACnD;AACA,YAAMA,UAAS;AAAA,QACX,UAAU;AAAA,QACV,OAAO,MAAM,WAAW,SAAS,IAC3B,MAAM,MAAM,UAAU,MAAM,IAC5B;AAAA,QACN,iBAAiB;AAAA,UACb,MAAM;AAAA,UACN,aAAa;AAAA,QACjB;AAAA,MACJ;AACA,YAAM,gBAAY,6BAAU,MAAM,KAAK,YAAY,OAAO,KAAK,MAAM,KAAK,WAAW,GAAG;AAAA,QACpF,UAAU;AAAA,UACN,aAAa;AAAA,UACb,eAAe;AAAA,UACf,eAAe;AAAA,QACnB;AAAA,QACA,UAAU;AAAA,QACV,MAAM;AAAA,MACV,CAAC;AACD,YAAMC,YAAW,MAAM,UAAUD,OAAM;AACvC,YAAM,SAAS,KAAK,MAAMC,UAAS,QAAQ,CAAC,EAAE,QAAQ,OAAO;AAC7D,UAAI,WAAW,QAAW;AACtB,YAAI,cAAc;AACd,iBAAO,CAAC,OAAO,OAAO,OAAO,SAAS;AAAA,QAC1C;AACA,eAAO,OAAO;AAAA,MAClB;AACA,aAAO;AAAA,IACX;AAAA,EACJ;AACA,SAAO;AACX;AACO,SAAS,iBAAiB,EAAE,QAAQ,cAAc,SAAS,OAAO,QAAQ,OAAO,aAAa,OAAO,SAAS,eAAe,MAAM,iBAAiB,aAAc,GAAG;AACxK,MAAI,iBAAiB,UAAa,oBAAoB,MAAM,GAAG;AAC3D,UAAM,IAAI,MAAM,iGAAiG;AAAA,EACrH;AACA,QAAM,SAAS,wBAAwB;AAAA,IACnC;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,QAAQ;AAAA,EACZ,CAAC;AACD,QAAM,oBAAoB,OAAO,WAAW;AACxC,UAAM,UAAU,gBAAgB,UAAU,iBAAiB,UAAU,WAAW;AAChF,WAAO,qBAAqB,SAAS,QAAQ,aAAa,QAAQ,QAAW,iBAAiB,UAAa,oBAAoB,MAAM,CAAC;AAAA,EAC1I;AACA,SAAO;AACX;;;ACtSA,IAAAC,oBAA8B;AAC9B,IAAAC,kBAAmC;;;ACAnC,IAAAC,oBAA0B;;;ACD1B,IAAAC,oBAA8B;;;ANI9B,IAAM,2BAA2B;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAwB1B,SAAS,wBAAwB,aAAmC,OAAe,UAAkB;AAC3G,QAAM,WAAW,IAAI,uBAAuB,WAAW;AAGvD,QAAM,QAAQ,SAAS,QAAQ,KAAK;AAEpC,QAAMC,YAAW,iBAAiB;AAAA,IACjC,QAAQ;AAAA,IACR,aAAa;AAAA,IACb;AAAA,IACA,cAAc;AAAA,EACf,CAAC;AAED,SAAO,OAAO,EAAE,QAAQ,MAAgF;AACvG,UAAM,WAA0B,QAAQ,YAAY,CAAC;AACrD,UAAM,gBAAgB,CAAC,GAAG,QAAQ,EAAE,QAAQ,EAAE,KAAK,CAAC,MAAM,aAAa,0BAAS;AAEhF,QAAI,CAAC,eAAe;AACnB,aAAO,EAAE,KAAK,aAAa,OAAO,OAAO,SAAS,oCAAoC;AAAA,IACvF;AAEA,UAAM,eACL,OAAO,cAAc,YAAY,WAAW,cAAc,UAAU,KAAK,UAAU,cAAc,OAAO;AAEzG,WAAOA,UAAS;AAAA,MACf,SAAS;AAAA,MACT;AAAA,IACD,CAAC;AAAA,EACF;AACD;;;AOzDA,IAAAC,mBAAuC;AAMhC,SAAS,iCAAiC;AAChD,SAAO,OAAO;AAAA,IACb;AAAA,IACA;AAAA,EACD,MAGM;AACL,UAAM,cAAwB,kBAAkB,oBAAoB,CAAC;AACrE,UAAM,iBAA2B,kBAAkB,0BAA0B,CAAC;AAE9E,QAAI,YAAY,WAAW,KAAK,eAAe,WAAW,GAAG;AAC5D,aAAO,EAAE,KAAK,oBAAoB,OAAO,MAAM,SAAS,4CAA4C;AAAA,IACrG;AAGA,UAAM,WAA0B,QAAQ,YAAY,CAAC;AACrD,UAAM,gBAAgB,CAAC,GAAG,QAAQ,EAAE,QAAQ,EAAE,KAAK,CAAC,MAAM,aAAa,0BAAS;AAEhF,QAAI,CAAC,eAAe;AACnB,aAAO,EAAE,KAAK,oBAAoB,OAAO,OAAO,SAAS,oCAAoC;AAAA,IAC9F;AAEA,UAAM,gBAAgB,OAAO,cAAc,YAAY,WAAW,cAAc,UAAU,KAAK,UAAU,cAAc,OAAO,GAAG,YAAY;AAE7I,UAAM,WAAqB,CAAC;AAE5B,eAAW,YAAY,aAAa;AACnC,UAAI,CAAC,aAAa,SAAS,SAAS,YAAY,CAAC,GAAG;AACnD,iBAAS,KAAK,2BAA2B,QAAQ,GAAG;AAAA,MACrD;AAAA,IACD;AAEA,eAAW,aAAa,gBAAgB;AACvC,UAAI,aAAa,SAAS,UAAU,YAAY,CAAC,GAAG;AACnD,iBAAS,KAAK,6BAA6B,SAAS,GAAG;AAAA,MACxD;AAAA,IACD;AAEA,UAAM,SAAS,SAAS,WAAW;AAEnC,WAAO;AAAA,MACN,KAAK;AAAA,MACL,OAAO;AAAA,MACP,SAAS,SAAS,kCAAkC,SAAS,KAAK,IAAI;AAAA,IACvE;AAAA,EACD;AACD;;;ACrDA,IAAAC,mBAAuC;AAOhC,SAAS,6BAA6B;AAC5C,SAAO,OAAO;AAAA,IACb;AAAA,IACA;AAAA,EACD,MAGM;AAEL,QAAI,kBAAkB,iBAAiB,KAAK,kBAAkB,sBAAsB,MAAM;AACzF,aAAO,EAAE,KAAK,iBAAiB,OAAO,MAAM,SAAS,+CAA+C;AAAA,IACrG;AAEA,UAAM,WAA0B,QAAQ,YAAY,CAAC;AACrD,UAAM,cAAwB,kBAAkB,eAAe,CAAC;AAEhE,UAAM,YAAY,SAChB,OAAO,CAAC,MAAM,aAAa,0BAAS,EACpC,QAAQ,CAAC,MAAO,EAAgB,cAAc,CAAC,CAAC;AAElD,UAAM,kBAAkB,YAAY,SAAS,IAAI,UAAU,OAAO,CAAC,OAAO,CAAC,YAAY,SAAS,GAAG,IAAI,CAAC,IAAI;AAE5G,UAAM,SAAS,gBAAgB,WAAW;AAE1C,QAAI,YAAY,SAAS,GAAG;AAC3B,aAAO;AAAA,QACN,KAAK;AAAA,QACL,OAAO;AAAA,QACP,SAAS,SACN,2CAA2C,YAAY,KAAK,IAAI,CAAC,MACjE,cAAc,gBAAgB,MAAM,6BAA6B,gBAAgB,IAAI,CAAC,OAAO,GAAG,IAAI,EAAE,KAAK,IAAI,CAAC;AAAA,MACpH;AAAA,IACD;AAEA,WAAO;AAAA,MACN,KAAK;AAAA,MACL,OAAO;AAAA,MACP,SAAS,SACN,qCACA,cAAc,UAAU,MAAM,kBAAkB,UAAU,IAAI,CAAC,OAAO,GAAG,IAAI,EAAE,KAAK,IAAI,CAAC;AAAA,IAC7F;AAAA,EACD;AACD;;;ACjDA,IAAAC,mBAAuC;AAQhC,SAAS,+BAA+B;AAC9C,SAAO,OAAO;AAAA,IACb;AAAA,IACA;AAAA,EACD,MAGM;AACL,QAAI,kBAAkB,sBAAsB,MAAM;AACjD,aAAO,EAAE,KAAK,mBAAmB,OAAO,MAAM,SAAS,mDAAmD;AAAA,IAC3G;AAEA,UAAM,gBAA0B,kBAAkB,oBAAoB,CAAC;AACvE,UAAM,WAA0B,QAAQ,YAAY,CAAC;AAErD,UAAM,kBAAkB,SACtB,OAAO,CAAC,MAAM,aAAa,0BAAS,EACpC,QAAQ,CAAC,MAAO,EAAgB,cAAc,CAAC,CAAC,EAChD,IAAI,CAAC,OAAO,GAAG,IAAI;AAGrB,QAAI,cAAc,WAAW,GAAG;AAC/B,YAAMC,UAAS,gBAAgB,SAAS;AACxC,aAAO;AAAA,QACN,KAAK;AAAA,QACL,OAAOA;AAAA,QACP,SAASA,UACN,yBAAyB,gBAAgB,KAAK,IAAI,CAAC,KACnD;AAAA,MACJ;AAAA,IACD;AAEA,UAAM,eAAe,cAAc,OAAO,CAAC,SAAS,gBAAgB,SAAS,IAAI,CAAC;AAClF,UAAM,SAAS,aAAa,SAAS;AAErC,WAAO;AAAA,MACN,KAAK;AAAA,MACL,OAAO;AAAA,MACP,SAAS,SACN,4BAA4B,aAAa,KAAK,IAAI,CAAC,KACnD,4DAA4D,cAAc,KAAK,IAAI,CAAC,aAAa,gBAAgB,SAAS,IAAI,gBAAgB,KAAK,IAAI,IAAI,MAAM;AAAA,IACrK;AAAA,EACD;AACD;;;ACnDA,IAAAC,oBAAuC;AAmBhC,SAAS,2BAA2B;AAC1C,SAAO,OAAO;AAAA,IACb;AAAA,IACA;AAAA,EACD,MAGM;AACL,UAAM,eAAsC,kBAAkB,yBAAyB,CAAC;AAExF,QAAI,aAAa,WAAW,GAAG;AAC9B,aAAO,EAAE,KAAK,cAAc,OAAO,MAAM,SAAS,iDAAiD;AAAA,IACpG;AAEA,UAAM,WAA0B,QAAQ,YAAY,CAAC;AAErD,UAAM,eAAe,SACnB,OAAO,CAAC,MAAM,aAAa,2BAAS,EACpC,QAAQ,CAAC,MAAO,EAAgB,cAAc,CAAC,CAAC;AAElD,UAAM,UAAgE,CAAC;AAEvE,eAAW,eAAe,cAAc;AACvC,YAAM,gBAAgB,aAAa,OAAO,CAAC,OAAO,GAAG,SAAS,YAAY,IAAI;AAC9E,YAAM,YAAsB,CAAC;AAC7B,UAAI,SAAS;AAGb,UAAI,YAAY,UAAU,QAAW;AACpC,cAAM,UAAU,cAAc,UAAU,YAAY;AACpD,YAAI,CAAC,SAAS;AACb,mBAAS;AACT,oBAAU;AAAA,YACT,qBAAqB,YAAY,KAAK,iBAAiB,cAAc,MAAM;AAAA,UAC5E;AAAA,QACD,OAAO;AACN,oBAAU;AAAA,YACT,kBAAkB,cAAc,MAAM,OAAO,YAAY,KAAK;AAAA,UAC/D;AAAA,QACD;AAAA,MACD;AAGA,UAAI,YAAY,UAAU;AACzB,YAAI,cAAc,WAAW,GAAG;AAC/B,mBAAS;AACT,oBAAU,KAAK,kBAAkB;AAAA,QAClC,OAAO;AACN,gBAAM,WAAW,cAAc,KAAK,CAAC,OAAO;AAC3C,gBAAI;AACH,qBAAO,YAAY,SAAU,GAAG,IAA+B;AAAA,YAChE,QAAQ;AACP,qBAAO;AAAA,YACR;AAAA,UACD,CAAC;AACD,cAAI,CAAC,UAAU;AACd,qBAAS;AACT,sBAAU;AAAA,cACT,mCAAmC,cAAc,MAAM;AAAA,YACxD;AAAA,UACD,OAAO;AACN,sBAAU,KAAK,yBAAyB;AAAA,UACzC;AAAA,QACD;AAAA,MACD;AAGA,UAAI,YAAY,UAAU,UAAa,CAAC,YAAY,UAAU;AAC7D,YAAI,cAAc,WAAW,GAAG;AAC/B,mBAAS;AACT,oBAAU,KAAK,kBAAkB;AAAA,QAClC,OAAO;AACN,oBAAU,KAAK,UAAU,cAAc,MAAM,UAAU;AAAA,QACxD;AAAA,MACD;AAEA,cAAQ,KAAK;AAAA,QACZ,MAAM,YAAY;AAAA,QAClB;AAAA,QACA,SAAS,IAAI,YAAY,IAAI,MAAM,UAAU,KAAK,IAAI,CAAC;AAAA,MACxD,CAAC;AAAA,IACF;AAEA,UAAM,YAAY,QAAQ,MAAM,CAAC,MAAM,EAAE,MAAM;AAE/C,WAAO;AAAA,MACN,KAAK;AAAA,MACL,OAAO;AAAA,MACP,SAAS,QAAQ,IAAI,CAAC,MAAM,EAAE,OAAO,EAAE,KAAK,IAAI;AAAA,IACjD;AAAA,EACD;AACD;;;AZtFA,SAAS,oBAAoB,WAAgB,KAA0B;AACtE,SAAO,OAAO,EAAE,SAAS,iBAAiB,MAAM;AAC/C,QAAI,CAAC,kBAAkB,qBAAqB;AAC3C,aAAO,EAAE,KAAK,OAAO,MAAM,SAAS,6CAA6C;AAAA,IAClF;AACA,WAAO,UAAU,EAAE,SAAS,kBAAkB,iBAAiB,oBAAoB,CAAC;AAAA,EACrF;AACD;AAEA,SAAS,gBAAgB,SAAiB,WAAgD;AACzF,QAAM,aAAwC,CAAC;AAC/C,MAAI,QAAQ;AAEZ,aAAW,KAAK,EAAE,MAAM,QAAQ,SAAS,QAAQ,CAAC;AAElD,aAAW,QAAQ,WAAW;AAC7B,UAAM,KAAK,KAAK,EAAE,KAAK;AACvB,eAAW,KAAK;AAAA,MACf,MAAM;AAAA,MACN,SAAS;AAAA,MACT,YAAY,CAAC,EAAE,UAAU,EAAE,MAAM,WAAW,KAAK,GAAG,IAAI,MAAM,WAAW,CAAC;AAAA,IAC3E,CAAC;AACD,eAAW,KAAK,EAAE,MAAM,QAAQ,SAAS,OAAO,cAAc,GAAG,CAAC;AAAA,EACnE;AAEA,aAAW,KAAK,EAAE,MAAM,aAAa,SAAS,MAAM,CAAC;AAErD,SAAO;AACR;AAuBO,SAAS,YAAY,OAAuC;AAClE,QAAM,YAAY,MAAM,IAAI,CAAC,MAAO,OAAO,MAAM,WAAW,IAAI,EAAE,IAAK;AACvE,QAAM,aAAa,MAAM,OAAO,CAAC,MAAgC,OAAO,MAAM,QAAQ;AAEtF,SAAO,CAAC,QAAQ;AACf,UAAM,sBAAyB;AAAA,MAC9B;AAAA,YACC,kDAA+B,EAAE,qBAAqB,YAAY,mBAAmB,SAAS,CAAC;AAAA,QAC/F;AAAA,MACD;AAAA,IACD;AAEA,QAAI,WAAW,WAAW,GAAG;AAC5B,aAAO;AAAA,QACN,WAAW;AAAA,QACX,kBAAkB,EAAE,qBAAqB,gBAAgB,IAAI,SAAS,SAAS,EAAE;AAAA,MAClF;AAAA,IACD;AAEA,UAAM,iBAAoB,kBAAc,yBAAyB,CAAC;AAIlE,UAAM,oBAAiC,OAAO,EAAE,SAAS,iBAAiB,MAAM;AAC/E,YAAM,mBAAmB,MAAM,oBAAoB,EAAE,SAAS,iBAAiB,CAAC;AAChF,YAAM,cAAc,MAAM,eAAe;AAAA,QACxC;AAAA,QACA,kBAAkB,EAAE,GAAG,kBAAkB,uBAAuB,WAAW;AAAA,MAC5E,CAAC;AAED,YAAM,mBAAmB,QAAQ,iBAAiB,KAAK;AACvD,YAAM,cAAc,QAAQ,YAAY,KAAK;AAE7C,aAAO;AAAA,QACN,KAAK;AAAA,QACL,OAAO,oBAAoB;AAAA,QAC3B,SAAS,CAAC,iBAAiB,SAAS,YAAY,OAAO,EAAE,OAAO,OAAO,EAAE,KAAK,IAAI;AAAA,MACnF;AAAA,IACD;AAEA,WAAO;AAAA,MACN,WAAW;AAAA,MACX,kBAAkB,EAAE,qBAAqB,gBAAgB,IAAI,SAAS,SAAS,EAAE;AAAA,IAClF;AAAA,EACD;AACD;AASO,SAAS,SAAS,UAA+B;AACvD,SAAO,MAAM;AACZ,UAAM,SAAS,cAAc;AAC7B,UAAM,QAAQ,OAAO;AACrB,WAAO;AAAA,MACN,WAAc,kBAAc,wBAAwB,OAAO,aAAa,OAAO,QAAQ,CAAC;AAAA,MACxF,kBAAkB,CAAC;AAAA,IACpB;AAAA,EACD;AACD;AAOO,SAAS,QAAQ,SAA6C;AACpE,SAAO,OAAO;AAAA,IACb,WAAc,kBAAc,2BAA2B,CAAC;AAAA,IACxD,kBAAkB;AAAA,MACjB,mBAAmB;AAAA,MACnB,GAAI,SAAS,QAAQ,SAAS,EAAE,aAAa,QAAQ,OAAO,IAAI,CAAC;AAAA,IAClE;AAAA,EACD;AACD;AAOO,SAAS,mBAAmB,MAA2B;AAC7D,SAAO,MAAM;AACZ,UAAM,SAAS,cAAc;AAC7B,UAAM,QAAQ,OAAO;AACrB,WAAO;AAAA,MACN,WAAc,kBAAc,wBAAwB,OAAO,aAAa,KAAK,CAAC;AAAA,MAC9E,kBAAkB,EAAE,kBAAkB,KAAK;AAAA,IAC5C;AAAA,EACD;AACD;AAOO,SAAS,cAAc,OAA+B;AAC5D,SAAO,OAAO;AAAA,IACb,WAAc,kBAAc,6BAA6B,CAAC;AAAA,IAC1D,kBAAkB;AAAA,MACjB,mBAAmB;AAAA,MACnB,GAAI,OAAO,SAAS,EAAE,kBAAkB,MAAM,IAAI,CAAC;AAAA,IACpD;AAAA,EACD;AACD;AAGO,SAAS,SAAS,SAAgC;AACxD,SAAO,OAAO;AAAA,IACb,WAAc,kBAAc,+BAA+B,CAAC;AAAA,IAC5D,kBAAkB,EAAE,kBAAkB,QAAQ;AAAA,EAC/C;AACD;AAGO,SAAS,YAAY,SAAgC;AAC3D,SAAO,OAAO;AAAA,IACb,WAAc,kBAAc,+BAA+B,CAAC;AAAA,IAC5D,kBAAkB,EAAE,wBAAwB,QAAQ;AAAA,EACrD;AACD;","names":["import_messages","result","tc","ls","import_messages","import_messages","import_messages","reasoning","import_traceable","import_messages","import_traceable","params","response","import_universal","import_prompts","import_traceable","import_universal","llmJudge","import_messages","import_messages","import_messages","passed","import_messages"]}
|