@dvina/agents 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/eval/index.ts","../../src/eval/config.ts","../../src/eval/suite.ts","../../src/eval/target.ts","../../src/runtime/langchain/model-resolver.ts","../../src/eval/expectations.ts","../../src/eval/evaluators/language.ts","../../src/eval/evaluators/response-content.ts","../../src/eval/evaluators/no-tool-calls.ts"],"sourcesContent":["// ── Configuration ───────────────────────────────────────────────────\nexport { configureEvals } from './config';\nexport type { EvalConfig } from './config';\n\n// ── Suite API ────────────────────────────────────────────────────────\nexport { defineSuite, human, ai, toolResult } from './suite';\nexport type { SuiteConfig, TestCase, ToolDef } from './suite';\n\n// ── Expectations ─────────────────────────────────────────────────────\nexport { toolsCalled, llmJudge, noTools, respondsInLanguage, contains, notContains } from './expectations';\nexport type { Expectation } from './expectations';\n","import type { LangchainModelConfig } from '../runtime/langchain/model-resolver';\n\nexport interface EvalConfig {\n\tmodelConfig: LangchainModelConfig;\n\tmodel: string;\n\t/** Model for evaluators needing LLM calls (language detection, LLM-as-judge). Defaults to `model`. */\n\tevaluatorModel?: string;\n\t/** System prompt prepended to every eval invocation. Can be overridden per-suite or per-case. */\n\tsystemPrompt?: string;\n}\n\nlet _config: EvalConfig | null = null;\n\nexport function configureEvals(config: EvalConfig): void {\n\t_config = config;\n}\n\nexport function getEvalConfig(): EvalConfig {\n\tif (!_config) {\n\t\tthrow new Error('Evals not configured. Call configureEvals() in your vitest setupFiles.');\n\t}\n\treturn _config;\n}\n","import * as ls from 'langsmith/vitest';\nimport { BaseMessage } from '@langchain/core/messages';\nimport { createEvalTarget, type MockToolDef } from './target';\nimport { type Expectation } from './expectations';\nimport { getEvalConfig } from './config';\n\n// ── Message builders ─────────────────────────────────────────────────\n\nexport function human(content: string) {\n\treturn { role: 'human' as const, content };\n}\n\nexport function ai(content: string, toolCalls?: string[]) {\n\treturn { role: 'ai' as const, content, ...(toolCalls ? { toolCalls } : {}) };\n}\n\nexport function toolResult(content: string) {\n\treturn { role: 'tool' as const, content };\n}\n\n// ── Public types ─────────────────────────────────────────────────────\n\ntype Message = ReturnType<typeof human> | ReturnType<typeof ai> | ReturnType<typeof toolResult>;\n\nexport interface ToolDef {\n\tdescription: string;\n\tschema?: Record<string, string>;\n\t/** Auto-stringified if not a string or function. */\n\tresponse: unknown | ((input: Record<string, unknown>, callCount: number) => string);\n}\n\nexport interface TestCase {\n\t/** Test name. Defaults to the last human message content if omitted. */\n\tname?: string;\n\tmessages: Message[];\n\tsystemPrompt?: string;\n\t/** Override suite-level tools for this case. */\n\ttools?: Record<string, ToolDef>;\n\texpect: Expectation[];\n}\n\ntype TargetFn = (inputs: { systemPrompt?: string; messages: Message[]; tools: MockToolDef[] }) => Promise<{ messages: BaseMessage[] }>;\n\nexport interface SuiteConfig {\n\t/** Custom target function, or model string override. Auto-created from global config if omitted. */\n\ttarget?: TargetFn | string;\n\t/** System prompt for all cases in this suite. Overrides the global prompt; can be overridden per-case. */\n\tsystemPrompt?: string;\n\ttools: Record<string, ToolDef>;\n\tcases: TestCase[];\n}\n\n// ── Helpers ──────────────────────────────────────────────────────────\n\nfunction toMockTools(defs: Record<string, ToolDef>): MockToolDef[] {\n\treturn Object.entries(defs).map(([name, def]) => ({\n\t\tname,\n\t\tdescription: def.description,\n\t\tschema: def.schema ?? {},\n\t\tresponse:\n\t\t\ttypeof def.response === 'function'\n\t\t\t\t? (def.response as MockToolDef['response'])\n\t\t\t\t: typeof def.response === 'string'\n\t\t\t\t\t? def.response\n\t\t\t\t\t: JSON.stringify(def.response),\n\t}));\n}\n\n/** Strip function responses so the object is JSON-serialisable for langsmith hashing. */\nfunction toSerializableTools(tools: MockToolDef[]): Record<string, unknown>[] {\n\treturn tools.map((t) => ({\n\t\t...t,\n\t\tresponse: typeof t.response === 'function' ? '<function>' : t.response,\n\t}));\n}\n\nfunction lastHumanContent(messages: Message[]): string {\n\tfor (let i = messages.length - 1; i >= 0; i--) {\n\t\tif (messages[i].role === 'human') return messages[i].content;\n\t}\n\treturn messages[0]?.content ?? '';\n}\n\n// ── Main entry point ─────────────────────────────────────────────────\n\n/**\n * Defines an eval suite. Internally registers `ls.describe` / `ls.test`\n * so vitest discovers the tests — eval files only need to call this function.\n */\nfunction resolveTarget(config: SuiteConfig): TargetFn {\n\tif (typeof config.target === 'function') return config.target;\n\tconst evalConfig = getEvalConfig();\n\tconst model = typeof config.target === 'string' ? config.target : evalConfig.model;\n\treturn createEvalTarget(evalConfig.modelConfig, model);\n}\n\nexport function defineSuite(name: string, config: SuiteConfig): void {\n\tconst target = resolveTarget(config);\n\tconst suiteTools = config.tools;\n\tconst globalPrompt = getEvalConfig().systemPrompt;\n\n\tls.describe(name, () => {\n\t\tfor (const tc of config.cases) {\n\t\t\tconst testName = tc.name ?? lastHumanContent(tc.messages);\n\t\t\tconst tools = toMockTools(tc.tools ?? suiteTools);\n\t\t\tconst ctx = { message: lastHumanContent(tc.messages) };\n\n\t\t\tconst resolved = tc.expect.map((exp) => exp(ctx));\n\t\t\tconst evaluators = resolved.map((r) => r.evaluator);\n\t\t\tconst referenceOutputs = Object.assign({}, ...resolved.map((r) => r.referenceOutputs));\n\n\t\t\t// Precedence: case > suite > global\n\t\t\tconst systemPrompt = tc.systemPrompt ?? config.systemPrompt ?? globalPrompt;\n\n\t\t\tconst targetInputs = {\n\t\t\t\tmessages: tc.messages,\n\t\t\t\ttools,\n\t\t\t\t...(systemPrompt ? { systemPrompt } : {}),\n\t\t\t};\n\n\t\t\tls.test(\n\t\t\t\ttestName,\n\t\t\t\t{\n\t\t\t\t\tinputs: {\n\t\t\t\t\t\tmessages: tc.messages,\n\t\t\t\t\t\ttools: toSerializableTools(tools),\n\t\t\t\t\t\t...(systemPrompt ? { systemPrompt } : {}),\n\t\t\t\t\t},\n\t\t\t\t\treferenceOutputs,\n\t\t\t\t},\n\t\t\t\tasync ({ referenceOutputs: refOut }) => {\n\t\t\t\t\tconst output = await target(targetInputs);\n\t\t\t\t\tls.logOutputs(output);\n\t\t\t\t\tfor (const evaluator of evaluators) {\n\t\t\t\t\t\tawait evaluator({ outputs: output, referenceOutputs: refOut ?? {} });\n\t\t\t\t\t}\n\t\t\t\t},\n\t\t\t);\n\t\t}\n\t});\n}\n","import { BaseChatModel } from '@langchain/core/language_models/chat_models';\nimport { tool } from '@langchain/core/tools';\nimport { AIMessage, BaseMessage, HumanMessage, SystemMessage, ToolMessage } from '@langchain/core/messages';\nimport { z } from 'zod';\nimport { LangchainModelResolver, type LangchainModelConfig } from '../runtime/langchain/model-resolver';\nimport { getEvalConfig } from './config';\n\nexport interface MockToolDef {\n\tname: string;\n\tdescription: string;\n\tschema: Record<string, unknown>;\n\t/**\n\t * Canned response the mock tool returns.\n\t * Can be a static string, or a function that receives input and returns a response.\n\t * If a function is provided, it receives the full invocation count as a second arg\n\t * to support scenarios like \"first call fails, second call succeeds\".\n\t */\n\tresponse: string | ((input: Record<string, unknown>, callCount: number) => string);\n}\n\nexport interface EvalTargetInput {\n\tsystemPrompt?: string;\n\tmessages: Array<{ role: string; content: string; toolCalls?: string[] }>;\n\t/** @deprecated Use `messages` with `human()` builders instead. */\n\tuserMessages?: string[];\n\ttools: MockToolDef[];\n}\n\nconst MAX_AGENT_LOOPS = 10;\n\n/**\n * Converts the simple message format into LangChain BaseMessage instances.\n * For `ai` messages with `toolCalls`, generates tool_call IDs and wires up\n * the following `tool` messages as ToolMessages with matching IDs.\n */\nfunction convertMessages(msgs: EvalTargetInput['messages']): BaseMessage[] {\n\tconst result: BaseMessage[] = [];\n\tlet tcIdx = 0;\n\tlet pendingToolCalls: Array<{ id: string; name: string }> = [];\n\n\tfor (const msg of msgs) {\n\t\tif (msg.role === 'human') {\n\t\t\tresult.push(new HumanMessage(msg.content));\n\t\t} else if (msg.role === 'ai') {\n\t\t\tif (msg.toolCalls && msg.toolCalls.length > 0) {\n\t\t\t\tpendingToolCalls = msg.toolCalls.map((name) => ({\n\t\t\t\t\tid: `hist_tc${++tcIdx}`,\n\t\t\t\t\tname,\n\t\t\t\t}));\n\t\t\t\tresult.push(\n\t\t\t\t\tnew AIMessage({\n\t\t\t\t\t\tcontent: msg.content,\n\t\t\t\t\t\ttool_calls: pendingToolCalls.map((tc) => ({\n\t\t\t\t\t\t\tid: tc.id,\n\t\t\t\t\t\t\tname: tc.name,\n\t\t\t\t\t\t\targs: {},\n\t\t\t\t\t\t})),\n\t\t\t\t\t}),\n\t\t\t\t);\n\t\t\t} else {\n\t\t\t\tresult.push(new AIMessage(msg.content));\n\t\t\t}\n\t\t} else if (msg.role === 'tool') {\n\t\t\tconst tc = pendingToolCalls.shift();\n\t\t\tif (!tc) throw new Error('toolResult() without a preceding ai() with toolCalls');\n\t\t\tresult.push(\n\t\t\t\tnew ToolMessage({\n\t\t\t\t\tcontent: msg.content,\n\t\t\t\t\ttool_call_id: tc.id,\n\t\t\t\t\tname: tc.name,\n\t\t\t\t}),\n\t\t\t);\n\t\t}\n\t}\n\n\treturn result;\n}\n\n/**\n * Creates a LangSmith-compatible target function that runs an agentic loop\n * with mock tools and returns the full message trajectory.\n */\nexport function createEvalTarget(modelConfig?: LangchainModelConfig, modelString?: string) {\n\treturn async (inputs: EvalTargetInput): Promise<{ messages: BaseMessage[] }> => {\n\t\tconst config = modelConfig && modelString ? { modelConfig, model: modelString } : getEvalConfig();\n\t\tconst resolver = new LangchainModelResolver(config.modelConfig);\n\t\tconst model = resolver.resolve(config.model) as BaseChatModel;\n\n\t\t// Track invocation counts per tool for stateful mock responses\n\t\tconst toolCallCounts: Record<string, number> = {};\n\n\t\t// Create langchain tools from mock definitions\n\t\tconst langchainTools = inputs.tools.map((mockTool) => {\n\t\t\ttoolCallCounts[mockTool.name] = 0;\n\n\t\t\treturn tool(\n\t\t\t\tasync (toolInput: Record<string, unknown>) => {\n\t\t\t\t\ttoolCallCounts[mockTool.name]++;\n\t\t\t\t\tif (typeof mockTool.response === 'function') {\n\t\t\t\t\t\treturn mockTool.response(toolInput, toolCallCounts[mockTool.name]);\n\t\t\t\t\t}\n\t\t\t\t\treturn mockTool.response;\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\tname: mockTool.name,\n\t\t\t\t\tdescription: mockTool.description,\n\t\t\t\t\tschema: z.object(\n\t\t\t\t\t\tObject.fromEntries(\n\t\t\t\t\t\t\tObject.entries(mockTool.schema).map(([key, val]) => {\n\t\t\t\t\t\t\t\tif (typeof val === 'string') return [key, z.string().describe(val)];\n\t\t\t\t\t\t\t\tif (typeof val === 'number') return [key, z.number().describe(String(val))];\n\t\t\t\t\t\t\t\treturn [key, z.any()];\n\t\t\t\t\t\t\t}),\n\t\t\t\t\t\t),\n\t\t\t\t\t),\n\t\t\t\t},\n\t\t\t);\n\t\t});\n\n\t\tconst boundModel = langchainTools.length > 0 ? model.bindTools!(langchainTools) : model;\n\n\t\tconst messages: BaseMessage[] = [];\n\n\t\tif (inputs.systemPrompt) {\n\t\t\tmessages.push(new SystemMessage(inputs.systemPrompt));\n\t\t}\n\n\t\t// Normalize: support legacy userMessages format from existing datasets\n\t\tconst inputMessages =\n\t\t\tinputs.messages ?? (inputs.userMessages ?? []).map((content: string) => ({ role: 'human', content }));\n\n\t\t// Convert and push all messages (history + final human)\n\t\tmessages.push(...convertMessages(inputMessages));\n\n\t\t// Agentic loop: keep calling model until it stops making tool calls\n\t\tlet loopCount = 0;\n\t\twhile (loopCount < MAX_AGENT_LOOPS) {\n\t\t\tloopCount++;\n\n\t\t\tconst response = await boundModel.invoke(messages);\n\t\t\tmessages.push(response as BaseMessage);\n\n\t\t\tconst aiMessage = response as AIMessage;\n\t\t\tif (!aiMessage.tool_calls || aiMessage.tool_calls.length === 0) {\n\t\t\t\tbreak;\n\t\t\t}\n\n\t\t\t// Execute tool calls and add results\n\t\t\tfor (const tc of aiMessage.tool_calls) {\n\t\t\t\tconst mockTool = langchainTools.find((t) => t.name === tc.name);\n\t\t\t\tif (mockTool) {\n\t\t\t\t\tconst result = await mockTool.invoke(tc.args);\n\t\t\t\t\tmessages.push(\n\t\t\t\t\t\tnew ToolMessage({\n\t\t\t\t\t\t\tcontent: typeof result === 'string' ? result : JSON.stringify(result),\n\t\t\t\t\t\t\ttool_call_id: tc.id!,\n\t\t\t\t\t\t\tname: tc.name,\n\t\t\t\t\t\t}),\n\t\t\t\t\t);\n\t\t\t\t} else {\n\t\t\t\t\tmessages.push(\n\t\t\t\t\t\tnew ToolMessage({\n\t\t\t\t\t\t\tcontent: `Tool \"${tc.name}\" not found`,\n\t\t\t\t\t\t\ttool_call_id: tc.id!,\n\t\t\t\t\t\t\tname: tc.name,\n\t\t\t\t\t\t}),\n\t\t\t\t\t);\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\treturn { messages };\n\t};\n}\n","import { BaseLanguageModel } from '@langchain/core/language_models/base';\nimport { AzureChatOpenAI, ChatOpenAI } from '@langchain/openai';\n\nexport type LangchainOpenAIConfig = {\n\tapiKey: string;\n};\n\nexport type LangchainAzureConfig = {\n\tmodel: string;\n\tapiKey: string;\n\tendpoint: string;\n\tapiVersion: string;\n};\n\nexport type LangchainModelConfig = {\n\topenai?: Record<string, LangchainOpenAIConfig>;\n\tazure?: Record<string, LangchainAzureConfig>;\n};\n\nexport class LangchainModelResolver {\n\tconstructor(private config: LangchainModelConfig) {}\n\n\tresolve(modelString: string, tags?: string[]): BaseLanguageModel {\n\t\tconst parts = modelString.split(':');\n\n\t\tif (parts.length === 2) {\n\t\t\tconst [provider, modelName] = parts;\n\t\t\treturn this.resolveByProvider(provider, 'default', modelName, tags);\n\t\t}\n\n\t\tif (parts.length === 3) {\n\t\t\tconst [provider, configName, modelName] = parts;\n\t\t\treturn this.resolveByProvider(provider, configName, modelName, tags);\n\t\t}\n\n\t\tthrow new Error(\n\t\t\t'Model string must follow format \"provider:modelName\" (uses \"default\" config) or \"provider:configName:modelName\"',\n\t\t);\n\t}\n\n\tprivate resolveByProvider(\n\t\tprovider: string,\n\t\tconfigName: string,\n\t\tmodelName: string,\n\t\ttags?: string[],\n\t): BaseLanguageModel {\n\t\tswitch (provider) {\n\t\t\tcase 'openai':\n\t\t\t\treturn this.resolveOpenAI(configName, modelName, tags);\n\t\t\tcase 'azure':\n\t\t\t\treturn this.resolveAzure(configName, modelName, tags);\n\t\t\tdefault:\n\t\t\t\tthrow new Error(`Unsupported model provider: ${provider}`);\n\t\t}\n\t}\n\n\tprivate resolveOpenAI(configName: string, modelName: string, tags?: string[]): ChatOpenAI {\n\t\tconst providerConfig = this.config.openai?.[configName];\n\t\tif (!providerConfig) {\n\t\t\tthrow new Error(`Configuration \"${configName}\" for provider \"openai\" is missing`);\n\t\t}\n\n\t\treturn new ChatOpenAI({\n\t\t\tapiKey: providerConfig.apiKey,\n\t\t\tmodelName: modelName,\n\t\t\ttags: tags,\n\t\t});\n\t}\n\n\tprivate resolveAzure(configName: string, deploymentName: string, tags?: string[]): AzureChatOpenAI {\n\t\tconst providerConfig = this.config.azure?.[configName];\n\t\tif (!providerConfig) {\n\t\t\tthrow new Error(`Configuration \"${configName}\" for provider \"azure\" is missing`);\n\t\t}\n\n\t\treturn new AzureChatOpenAI({\n\t\t\tmodel: providerConfig.model, // shows (perhaps even uses) 3.5-turbo when not specifid\n\t\t\tazureOpenAIApiKey: providerConfig.apiKey,\n\t\t\tazureOpenAIApiInstanceName: this.extractInstanceName(providerConfig.endpoint),\n\t\t\tazureOpenAIApiDeploymentName: deploymentName,\n\t\t\tazureOpenAIApiVersion: providerConfig.apiVersion,\n\t\t\ttags: tags,\n\t\t});\n\t}\n\n\tprivate extractInstanceName(endpoint: string): string {\n\t\ttry {\n\t\t\tconst url = new URL(endpoint);\n\t\t\treturn url.hostname.split('.')[0];\n\t\t} catch (e) {\n\t\t\treturn endpoint;\n\t\t}\n\t}\n}\n","import * as ls from 'langsmith/vitest';\nimport {\n\tcreateTrajectoryMatchEvaluator,\n\tcreateTrajectoryLLMAsJudge,\n\tTRAJECTORY_ACCURACY_PROMPT,\n} from 'agentevals';\nimport { createLanguageEvaluator } from './evaluators/language';\nimport { createResponseContentEvaluator } from './evaluators/response-content';\nimport { createNoToolCallsEvaluator } from './evaluators/no-tool-calls';\nimport { getEvalConfig } from './config';\n\n// ── Types ────────────────────────────────────────────────────────────\n\ntype EvaluatorFn = (args: {\n\toutputs: Record<string, any>;\n\treferenceOutputs: Record<string, any>;\n}) => Promise<any>;\n\ninterface ResolvedExpectation {\n\tevaluator: EvaluatorFn;\n\treferenceOutputs: Record<string, unknown>;\n}\n\n/** A factory that receives test context and returns an evaluator + its referenceOutputs. */\nexport type Expectation = (ctx: { message: string }) => ResolvedExpectation;\n\n// ── Helpers ──────────────────────────────────────────────────────────\n\nfunction withTrajectoryGuard(evaluator: any, key: string): EvaluatorFn {\n\treturn async ({ outputs, referenceOutputs }) => {\n\t\tif (!referenceOutputs?.referenceTrajectory) {\n\t\t\treturn { key, score: true, comment: 'No referenceTrajectory specified, skipping' };\n\t\t}\n\t\treturn evaluator({ outputs, referenceOutputs: referenceOutputs.referenceTrajectory });\n\t};\n}\n\nfunction buildTrajectory(message: string, toolNames: string[]): Record<string, unknown>[] {\n\tconst trajectory: Record<string, unknown>[] = [];\n\tlet tcIdx = 0;\n\n\ttrajectory.push({ role: 'user', content: message });\n\n\tfor (const name of toolNames) {\n\t\tconst id = `tc${++tcIdx}`;\n\t\ttrajectory.push({\n\t\t\trole: 'assistant',\n\t\t\tcontent: '',\n\t\t\ttool_calls: [{ function: { name, arguments: '{}' }, id, type: 'function' }],\n\t\t});\n\t\ttrajectory.push({ role: 'tool', content: '...', tool_call_id: id });\n\t}\n\n\ttrajectory.push({ role: 'assistant', content: '...' });\n\n\treturn trajectory;\n}\n\n// ── Expectation functions ────────────────────────────────────────────\n\n/**\n * Expect the agent to call tools in order (superset trajectory match).\n * Empty `[]` means the agent should answer directly without calling any tools.\n */\nexport function toolsCalled(tools: string[]): Expectation {\n\treturn (ctx) => ({\n\t\tevaluator: ls.wrapEvaluator(\n\t\t\twithTrajectoryGuard(\n\t\t\t\tcreateTrajectoryMatchEvaluator({ trajectoryMatchMode: 'superset', toolArgsMatchMode: 'ignore' }) as any,\n\t\t\t\t'trajectory_match',\n\t\t\t),\n\t\t),\n\t\treferenceOutputs: { referenceTrajectory: buildTrajectory(ctx.message, tools) },\n\t});\n}\n\n/**\n * Run an LLM-as-judge evaluator on the trajectory.\n * Requires `toolsCalled` in the same expect array.\n * Uses the globally configured evaluator model.\n */\nexport function llmJudge(): Expectation {\n\treturn () => {\n\t\tconst config = getEvalConfig();\n\t\tconst model = config.evaluatorModel ?? config.model;\n\t\treturn {\n\t\t\tevaluator: ls.wrapEvaluator(\n\t\t\t\twithTrajectoryGuard(\n\t\t\t\t\tcreateTrajectoryLLMAsJudge({ prompt: TRAJECTORY_ACCURACY_PROMPT, model }) as any,\n\t\t\t\t\t'trajectory_llm_judge',\n\t\t\t\t),\n\t\t\t),\n\t\t\treferenceOutputs: {},\n\t\t};\n\t};\n}\n\n/** Assert the agent made zero tool calls. */\nexport function noTools(): Expectation {\n\treturn () => ({\n\t\tevaluator: ls.wrapEvaluator(createNoToolCallsEvaluator()),\n\t\treferenceOutputs: { expectNoToolCalls: true },\n\t});\n}\n\n/**\n * Assert the response is in the given language (ISO 639-1 code).\n * Uses the globally configured evaluator model for language detection.\n * @param code - ISO 639-1 language code (e.g. 'en', 'tr', 'de').\n */\nexport function respondsInLanguage(code: string): Expectation {\n\treturn () => {\n\t\tconst config = getEvalConfig();\n\t\tconst model = config.evaluatorModel ?? config.model;\n\t\treturn {\n\t\t\tevaluator: ls.wrapEvaluator(createLanguageEvaluator(config.modelConfig, model)),\n\t\t\treferenceOutputs: { expectedLanguage: code },\n\t\t};\n\t};\n}\n\n/** Assert the response contains all given strings. */\nexport function contains(strings: string[]): Expectation {\n\treturn () => ({\n\t\tevaluator: ls.wrapEvaluator(createResponseContentEvaluator()),\n\t\treferenceOutputs: { responseContains: strings },\n\t});\n}\n\n/** Assert the response does not contain any of the given strings. */\nexport function notContains(strings: string[]): Expectation {\n\treturn () => ({\n\t\tevaluator: ls.wrapEvaluator(createResponseContentEvaluator()),\n\t\treferenceOutputs: { responseMustNotContain: strings },\n\t});\n}\n","import { AIMessage, BaseMessage } from '@langchain/core/messages';\nimport { BaseChatModel } from '@langchain/core/language_models/chat_models';\nimport { LangchainModelResolver, type LangchainModelConfig } from '../../runtime/langchain/model-resolver';\n\n/**\n * Creates a custom evaluator that checks whether the agent's final response\n * is in the expected language. Uses a cheap LLM call for language detection.\n */\nexport function createLanguageEvaluator(modelConfig: LangchainModelConfig, model: string) {\n\tconst resolver = new LangchainModelResolver(modelConfig);\n\tconst judge = resolver.resolve(model) as BaseChatModel;\n\n\treturn async ({\n\t\toutputs,\n\t\treferenceOutputs,\n\t}: {\n\t\toutputs: Record<string, any>;\n\t\treferenceOutputs?: Record<string, any>;\n\t}) => {\n\t\tconst expectedLanguage = referenceOutputs?.expectedLanguage;\n\t\tif (!expectedLanguage) {\n\t\t\treturn { key: 'language_match', score: true, comment: 'No expected language specified, skipping' };\n\t\t}\n\n\t\t// Extract the last AI message text from the trajectory\n\t\tconst messages: BaseMessage[] = outputs.messages || [];\n\t\tconst lastAiMessage = [...messages].reverse().find((m) => m instanceof AIMessage);\n\n\t\tif (!lastAiMessage) {\n\t\t\treturn { key: 'language_match', score: false, comment: 'No AI message found in trajectory' };\n\t\t}\n\n\t\tconst responseText = typeof lastAiMessage.content === 'string' ? lastAiMessage.content : JSON.stringify(lastAiMessage.content);\n\n\t\t// Use LLM to detect the language\n\t\tconst detection = await judge.invoke([\n\t\t\t{\n\t\t\t\trole: 'system',\n\t\t\t\tcontent: 'You are a language detection tool. Respond with ONLY the ISO 639-1 language code (e.g., \"en\", \"tr\", \"de\", \"fr\") of the text provided. Nothing else.',\n\t\t\t},\n\t\t\t{\n\t\t\t\trole: 'user',\n\t\t\t\tcontent: responseText,\n\t\t\t},\n\t\t]);\n\n\t\tconst detectedLanguage = (typeof detection.content === 'string' ? detection.content : '').trim().toLowerCase();\n\n\t\tconst matches = detectedLanguage === expectedLanguage.toLowerCase();\n\n\t\treturn {\n\t\t\tkey: 'language_match',\n\t\t\tscore: matches,\n\t\t\tcomment: matches\n\t\t\t\t? `Response language matches expected: ${expectedLanguage}`\n\t\t\t\t: `Expected \"${expectedLanguage}\" but detected \"${detectedLanguage}\"`,\n\t\t};\n\t};\n}\n","import { AIMessage, BaseMessage } from '@langchain/core/messages';\n\n/**\n * Creates a custom evaluator that checks whether the agent's final response\n * contains expected strings and doesn't contain forbidden strings.\n */\nexport function createResponseContentEvaluator() {\n\treturn async ({\n\t\toutputs,\n\t\treferenceOutputs,\n\t}: {\n\t\toutputs: Record<string, any>;\n\t\treferenceOutputs?: Record<string, any>;\n\t}) => {\n\t\tconst mustContain: string[] = referenceOutputs?.responseContains || [];\n\t\tconst mustNotContain: string[] = referenceOutputs?.responseMustNotContain || [];\n\n\t\tif (mustContain.length === 0 && mustNotContain.length === 0) {\n\t\t\treturn { key: 'response_content', score: true, comment: 'No content assertions specified, skipping' };\n\t\t}\n\n\t\t// Extract the last AI message text from the trajectory\n\t\tconst messages: BaseMessage[] = outputs.messages || [];\n\t\tconst lastAiMessage = [...messages].reverse().find((m) => m instanceof AIMessage);\n\n\t\tif (!lastAiMessage) {\n\t\t\treturn { key: 'response_content', score: false, comment: 'No AI message found in trajectory' };\n\t\t}\n\n\t\tconst responseText = (typeof lastAiMessage.content === 'string' ? lastAiMessage.content : JSON.stringify(lastAiMessage.content)).toLowerCase();\n\n\t\tconst failures: string[] = [];\n\n\t\tfor (const expected of mustContain) {\n\t\t\tif (!responseText.includes(expected.toLowerCase())) {\n\t\t\t\tfailures.push(`Missing expected text: \"${expected}\"`);\n\t\t\t}\n\t\t}\n\n\t\tfor (const forbidden of mustNotContain) {\n\t\t\tif (responseText.includes(forbidden.toLowerCase())) {\n\t\t\t\tfailures.push(`Contains forbidden text: \"${forbidden}\"`);\n\t\t\t}\n\t\t}\n\n\t\tconst passed = failures.length === 0;\n\n\t\treturn {\n\t\t\tkey: 'response_content',\n\t\t\tscore: passed,\n\t\t\tcomment: passed ? 'All content assertions passed' : failures.join('; '),\n\t\t};\n\t};\n}\n","import { AIMessage, BaseMessage } from '@langchain/core/messages';\n\n/**\n * Creates a custom evaluator that asserts the agent made zero tool calls.\n * Useful for scenarios like greetings where the agent should just respond with text.\n */\nexport function createNoToolCallsEvaluator() {\n\treturn async ({\n\t\toutputs,\n\t\treferenceOutputs,\n\t}: {\n\t\toutputs: Record<string, any>;\n\t\treferenceOutputs?: Record<string, any>;\n\t}) => {\n\t\t// Only run this evaluator if the reference explicitly expects no tool calls\n\t\tif (referenceOutputs?.maxToolCalls !== 0 && referenceOutputs?.expectNoToolCalls !== true) {\n\t\t\treturn { key: 'no_tool_calls', score: true, comment: 'No tool call restriction specified, skipping' };\n\t\t}\n\n\t\tconst messages: BaseMessage[] = outputs.messages || [];\n\n\t\tconst toolCalls = messages\n\t\t\t.filter((m) => m instanceof AIMessage)\n\t\t\t.flatMap((m) => (m as AIMessage).tool_calls || []);\n\n\t\tconst passed = toolCalls.length === 0;\n\n\t\treturn {\n\t\t\tkey: 'no_tool_calls',\n\t\t\tscore: passed,\n\t\t\tcomment: passed\n\t\t\t\t? 'No tool calls made (as expected)'\n\t\t\t\t: `Agent made ${toolCalls.length} tool call(s): ${toolCalls.map((tc) => tc.name).join(', ')}`,\n\t\t};\n\t};\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACWA,IAAI,UAA6B;AAE1B,SAAS,eAAe,QAA0B;AACxD,YAAU;AACX;AAEO,SAAS,gBAA4B;AAC3C,MAAI,CAAC,SAAS;AACb,UAAM,IAAI,MAAM,wEAAwE;AAAA,EACzF;AACA,SAAO;AACR;;;ACtBA,SAAoB;;;ACCpB,mBAAqB;AACrB,sBAAiF;AACjF,iBAAkB;;;ACFlB,oBAA4C;AAkBrC,IAAM,yBAAN,MAA6B;AAAA,EACnC,YAAoB,QAA8B;AAA9B;AAAA,EAA+B;AAAA,EAEnD,QAAQ,aAAqB,MAAoC;AAChE,UAAM,QAAQ,YAAY,MAAM,GAAG;AAEnC,QAAI,MAAM,WAAW,GAAG;AACvB,YAAM,CAAC,UAAU,SAAS,IAAI;AAC9B,aAAO,KAAK,kBAAkB,UAAU,WAAW,WAAW,IAAI;AAAA,IACnE;AAEA,QAAI,MAAM,WAAW,GAAG;AACvB,YAAM,CAAC,UAAU,YAAY,SAAS,IAAI;AAC1C,aAAO,KAAK,kBAAkB,UAAU,YAAY,WAAW,IAAI;AAAA,IACpE;AAEA,UAAM,IAAI;AAAA,MACT;AAAA,IACD;AAAA,EACD;AAAA,EAEQ,kBACP,UACA,YACA,WACA,MACoB;AACpB,YAAQ,UAAU;AAAA,MACjB,KAAK;AACJ,eAAO,KAAK,cAAc,YAAY,WAAW,IAAI;AAAA,MACtD,KAAK;AACJ,eAAO,KAAK,aAAa,YAAY,WAAW,IAAI;AAAA,MACrD;AACC,cAAM,IAAI,MAAM,+BAA+B,QAAQ,EAAE;AAAA,IAC3D;AAAA,EACD;AAAA,EAEQ,cAAc,YAAoB,WAAmB,MAA6B;AACzF,UAAM,iBAAiB,KAAK,OAAO,SAAS,UAAU;AACtD,QAAI,CAAC,gBAAgB;AACpB,YAAM,IAAI,MAAM,kBAAkB,UAAU,oCAAoC;AAAA,IACjF;AAEA,WAAO,IAAI,yBAAW;AAAA,MACrB,QAAQ,eAAe;AAAA,MACvB;AAAA,MACA;AAAA,IACD,CAAC;AAAA,EACF;AAAA,EAEQ,aAAa,YAAoB,gBAAwB,MAAkC;AAClG,UAAM,iBAAiB,KAAK,OAAO,QAAQ,UAAU;AACrD,QAAI,CAAC,gBAAgB;AACpB,YAAM,IAAI,MAAM,kBAAkB,UAAU,mCAAmC;AAAA,IAChF;AAEA,WAAO,IAAI,8BAAgB;AAAA,MAC1B,OAAO,eAAe;AAAA;AAAA,MACtB,mBAAmB,eAAe;AAAA,MAClC,4BAA4B,KAAK,oBAAoB,eAAe,QAAQ;AAAA,MAC5E,8BAA8B;AAAA,MAC9B,uBAAuB,eAAe;AAAA,MACtC;AAAA,IACD,CAAC;AAAA,EACF;AAAA,EAEQ,oBAAoB,UAA0B;AACrD,QAAI;AACH,YAAM,MAAM,IAAI,IAAI,QAAQ;AAC5B,aAAO,IAAI,SAAS,MAAM,GAAG,EAAE,CAAC;AAAA,IACjC,SAAS,GAAG;AACX,aAAO;AAAA,IACR;AAAA,EACD;AACD;;;ADjEA,IAAM,kBAAkB;AAOxB,SAAS,gBAAgB,MAAkD;AAC1E,QAAM,SAAwB,CAAC;AAC/B,MAAI,QAAQ;AACZ,MAAI,mBAAwD,CAAC;AAE7D,aAAW,OAAO,MAAM;AACvB,QAAI,IAAI,SAAS,SAAS;AACzB,aAAO,KAAK,IAAI,6BAAa,IAAI,OAAO,CAAC;AAAA,IAC1C,WAAW,IAAI,SAAS,MAAM;AAC7B,UAAI,IAAI,aAAa,IAAI,UAAU,SAAS,GAAG;AAC9C,2BAAmB,IAAI,UAAU,IAAI,CAAC,UAAU;AAAA,UAC/C,IAAI,UAAU,EAAE,KAAK;AAAA,UACrB;AAAA,QACD,EAAE;AACF,eAAO;AAAA,UACN,IAAI,0BAAU;AAAA,YACb,SAAS,IAAI;AAAA,YACb,YAAY,iBAAiB,IAAI,CAAC,QAAQ;AAAA,cACzC,IAAI,GAAG;AAAA,cACP,MAAM,GAAG;AAAA,cACT,MAAM,CAAC;AAAA,YACR,EAAE;AAAA,UACH,CAAC;AAAA,QACF;AAAA,MACD,OAAO;AACN,eAAO,KAAK,IAAI,0BAAU,IAAI,OAAO,CAAC;AAAA,MACvC;AAAA,IACD,WAAW,IAAI,SAAS,QAAQ;AAC/B,YAAM,KAAK,iBAAiB,MAAM;AAClC,UAAI,CAAC,GAAI,OAAM,IAAI,MAAM,sDAAsD;AAC/E,aAAO;AAAA,QACN,IAAI,4BAAY;AAAA,UACf,SAAS,IAAI;AAAA,UACb,cAAc,GAAG;AAAA,UACjB,MAAM,GAAG;AAAA,QACV,CAAC;AAAA,MACF;AAAA,IACD;AAAA,EACD;AAEA,SAAO;AACR;AAMO,SAAS,iBAAiB,aAAoC,aAAsB;AAC1F,SAAO,OAAO,WAAkE;AAC/E,UAAM,SAAS,eAAe,cAAc,EAAE,aAAa,OAAO,YAAY,IAAI,cAAc;AAChG,UAAM,WAAW,IAAI,uBAAuB,OAAO,WAAW;AAC9D,UAAM,QAAQ,SAAS,QAAQ,OAAO,KAAK;AAG3C,UAAM,iBAAyC,CAAC;AAGhD,UAAM,iBAAiB,OAAO,MAAM,IAAI,CAAC,aAAa;AACrD,qBAAe,SAAS,IAAI,IAAI;AAEhC,iBAAO;AAAA,QACN,OAAO,cAAuC;AAC7C,yBAAe,SAAS,IAAI;AAC5B,cAAI,OAAO,SAAS,aAAa,YAAY;AAC5C,mBAAO,SAAS,SAAS,WAAW,eAAe,SAAS,IAAI,CAAC;AAAA,UAClE;AACA,iBAAO,SAAS;AAAA,QACjB;AAAA,QACA;AAAA,UACC,MAAM,SAAS;AAAA,UACf,aAAa,SAAS;AAAA,UACtB,QAAQ,aAAE;AAAA,YACT,OAAO;AAAA,cACN,OAAO,QAAQ,SAAS,MAAM,EAAE,IAAI,CAAC,CAAC,KAAK,GAAG,MAAM;AACnD,oBAAI,OAAO,QAAQ,SAAU,QAAO,CAAC,KAAK,aAAE,OAAO,EAAE,SAAS,GAAG,CAAC;AAClE,oBAAI,OAAO,QAAQ,SAAU,QAAO,CAAC,KAAK,aAAE,OAAO,EAAE,SAAS,OAAO,GAAG,CAAC,CAAC;AAC1E,uBAAO,CAAC,KAAK,aAAE,IAAI,CAAC;AAAA,cACrB,CAAC;AAAA,YACF;AAAA,UACD;AAAA,QACD;AAAA,MACD;AAAA,IACD,CAAC;AAED,UAAM,aAAa,eAAe,SAAS,IAAI,MAAM,UAAW,cAAc,IAAI;AAElF,UAAM,WAA0B,CAAC;AAEjC,QAAI,OAAO,cAAc;AACxB,eAAS,KAAK,IAAI,8BAAc,OAAO,YAAY,CAAC;AAAA,IACrD;AAGA,UAAM,gBACL,OAAO,aAAa,OAAO,gBAAgB,CAAC,GAAG,IAAI,CAAC,aAAqB,EAAE,MAAM,SAAS,QAAQ,EAAE;AAGrG,aAAS,KAAK,GAAG,gBAAgB,aAAa,CAAC;AAG/C,QAAI,YAAY;AAChB,WAAO,YAAY,iBAAiB;AACnC;AAEA,YAAM,WAAW,MAAM,WAAW,OAAO,QAAQ;AACjD,eAAS,KAAK,QAAuB;AAErC,YAAM,YAAY;AAClB,UAAI,CAAC,UAAU,cAAc,UAAU,WAAW,WAAW,GAAG;AAC/D;AAAA,MACD;AAGA,iBAAW,MAAM,UAAU,YAAY;AACtC,cAAM,WAAW,eAAe,KAAK,CAAC,MAAM,EAAE,SAAS,GAAG,IAAI;AAC9D,YAAI,UAAU;AACb,gBAAM,SAAS,MAAM,SAAS,OAAO,GAAG,IAAI;AAC5C,mBAAS;AAAA,YACR,IAAI,4BAAY;AAAA,cACf,SAAS,OAAO,WAAW,WAAW,SAAS,KAAK,UAAU,MAAM;AAAA,cACpE,cAAc,GAAG;AAAA,cACjB,MAAM,GAAG;AAAA,YACV,CAAC;AAAA,UACF;AAAA,QACD,OAAO;AACN,mBAAS;AAAA,YACR,IAAI,4BAAY;AAAA,cACf,SAAS,SAAS,GAAG,IAAI;AAAA,cACzB,cAAc,GAAG;AAAA,cACjB,MAAM,GAAG;AAAA,YACV,CAAC;AAAA,UACF;AAAA,QACD;AAAA,MACD;AAAA,IACD;AAEA,WAAO,EAAE,SAAS;AAAA,EACnB;AACD;;;ADrKO,SAAS,MAAM,SAAiB;AACtC,SAAO,EAAE,MAAM,SAAkB,QAAQ;AAC1C;AAEO,SAAS,GAAG,SAAiB,WAAsB;AACzD,SAAO,EAAE,MAAM,MAAe,SAAS,GAAI,YAAY,EAAE,UAAU,IAAI,CAAC,EAAG;AAC5E;AAEO,SAAS,WAAW,SAAiB;AAC3C,SAAO,EAAE,MAAM,QAAiB,QAAQ;AACzC;AAoCA,SAAS,YAAY,MAA8C;AAClE,SAAO,OAAO,QAAQ,IAAI,EAAE,IAAI,CAAC,CAAC,MAAM,GAAG,OAAO;AAAA,IACjD;AAAA,IACA,aAAa,IAAI;AAAA,IACjB,QAAQ,IAAI,UAAU,CAAC;AAAA,IACvB,UACC,OAAO,IAAI,aAAa,aACpB,IAAI,WACL,OAAO,IAAI,aAAa,WACvB,IAAI,WACJ,KAAK,UAAU,IAAI,QAAQ;AAAA,EACjC,EAAE;AACH;AAGA,SAAS,oBAAoB,OAAiD;AAC7E,SAAO,MAAM,IAAI,CAAC,OAAO;AAAA,IACxB,GAAG;AAAA,IACH,UAAU,OAAO,EAAE,aAAa,aAAa,eAAe,EAAE;AAAA,EAC/D,EAAE;AACH;AAEA,SAAS,iBAAiB,UAA6B;AACtD,WAAS,IAAI,SAAS,SAAS,GAAG,KAAK,GAAG,KAAK;AAC9C,QAAI,SAAS,CAAC,EAAE,SAAS,QAAS,QAAO,SAAS,CAAC,EAAE;AAAA,EACtD;AACA,SAAO,SAAS,CAAC,GAAG,WAAW;AAChC;AAQA,SAAS,cAAc,QAA+B;AACrD,MAAI,OAAO,OAAO,WAAW,WAAY,QAAO,OAAO;AACvD,QAAM,aAAa,cAAc;AACjC,QAAM,QAAQ,OAAO,OAAO,WAAW,WAAW,OAAO,SAAS,WAAW;AAC7E,SAAO,iBAAiB,WAAW,aAAa,KAAK;AACtD;AAEO,SAAS,YAAY,MAAc,QAA2B;AACpE,QAAM,SAAS,cAAc,MAAM;AACnC,QAAM,aAAa,OAAO;AAC1B,QAAM,eAAe,cAAc,EAAE;AAErC,EAAG,YAAS,MAAM,MAAM;AACvB,eAAW,MAAM,OAAO,OAAO;AAC9B,YAAM,WAAW,GAAG,QAAQ,iBAAiB,GAAG,QAAQ;AACxD,YAAM,QAAQ,YAAY,GAAG,SAAS,UAAU;AAChD,YAAM,MAAM,EAAE,SAAS,iBAAiB,GAAG,QAAQ,EAAE;AAErD,YAAM,WAAW,GAAG,OAAO,IAAI,CAAC,QAAQ,IAAI,GAAG,CAAC;AAChD,YAAM,aAAa,SAAS,IAAI,CAAC,MAAM,EAAE,SAAS;AAClD,YAAM,mBAAmB,OAAO,OAAO,CAAC,GAAG,GAAG,SAAS,IAAI,CAAC,MAAM,EAAE,gBAAgB,CAAC;AAGrF,YAAM,eAAe,GAAG,gBAAgB,OAAO,gBAAgB;AAE/D,YAAM,eAAe;AAAA,QACpB,UAAU,GAAG;AAAA,QACb;AAAA,QACA,GAAI,eAAe,EAAE,aAAa,IAAI,CAAC;AAAA,MACxC;AAEA,MAAG;AAAA,QACF;AAAA,QACA;AAAA,UACC,QAAQ;AAAA,YACP,UAAU,GAAG;AAAA,YACb,OAAO,oBAAoB,KAAK;AAAA,YAChC,GAAI,eAAe,EAAE,aAAa,IAAI,CAAC;AAAA,UACxC;AAAA,UACA;AAAA,QACD;AAAA,QACA,OAAO,EAAE,kBAAkB,OAAO,MAAM;AACvC,gBAAM,SAAS,MAAM,OAAO,YAAY;AACxC,UAAG,cAAW,MAAM;AACpB,qBAAW,aAAa,YAAY;AACnC,kBAAM,UAAU,EAAE,SAAS,QAAQ,kBAAkB,UAAU,CAAC,EAAE,CAAC;AAAA,UACpE;AAAA,QACD;AAAA,MACD;AAAA,IACD;AAAA,EACD,CAAC;AACF;;;AG5IA,IAAAA,MAAoB;AACpB,wBAIO;;;ACLP,IAAAC,mBAAuC;AAQhC,SAAS,wBAAwB,aAAmC,OAAe;AACzF,QAAM,WAAW,IAAI,uBAAuB,WAAW;AACvD,QAAM,QAAQ,SAAS,QAAQ,KAAK;AAEpC,SAAO,OAAO;AAAA,IACb;AAAA,IACA;AAAA,EACD,MAGM;AACL,UAAM,mBAAmB,kBAAkB;AAC3C,QAAI,CAAC,kBAAkB;AACtB,aAAO,EAAE,KAAK,kBAAkB,OAAO,MAAM,SAAS,2CAA2C;AAAA,IAClG;AAGA,UAAM,WAA0B,QAAQ,YAAY,CAAC;AACrD,UAAM,gBAAgB,CAAC,GAAG,QAAQ,EAAE,QAAQ,EAAE,KAAK,CAAC,MAAM,aAAa,0BAAS;AAEhF,QAAI,CAAC,eAAe;AACnB,aAAO,EAAE,KAAK,kBAAkB,OAAO,OAAO,SAAS,oCAAoC;AAAA,IAC5F;AAEA,UAAM,eAAe,OAAO,cAAc,YAAY,WAAW,cAAc,UAAU,KAAK,UAAU,cAAc,OAAO;AAG7H,UAAM,YAAY,MAAM,MAAM,OAAO;AAAA,MACpC;AAAA,QACC,MAAM;AAAA,QACN,SAAS;AAAA,MACV;AAAA,MACA;AAAA,QACC,MAAM;AAAA,QACN,SAAS;AAAA,MACV;AAAA,IACD,CAAC;AAED,UAAM,oBAAoB,OAAO,UAAU,YAAY,WAAW,UAAU,UAAU,IAAI,KAAK,EAAE,YAAY;AAE7G,UAAM,UAAU,qBAAqB,iBAAiB,YAAY;AAElE,WAAO;AAAA,MACN,KAAK;AAAA,MACL,OAAO;AAAA,MACP,SAAS,UACN,uCAAuC,gBAAgB,KACvD,aAAa,gBAAgB,mBAAmB,gBAAgB;AAAA,IACpE;AAAA,EACD;AACD;;;AC1DA,IAAAC,mBAAuC;AAMhC,SAAS,iCAAiC;AAChD,SAAO,OAAO;AAAA,IACb;AAAA,IACA;AAAA,EACD,MAGM;AACL,UAAM,cAAwB,kBAAkB,oBAAoB,CAAC;AACrE,UAAM,iBAA2B,kBAAkB,0BAA0B,CAAC;AAE9E,QAAI,YAAY,WAAW,KAAK,eAAe,WAAW,GAAG;AAC5D,aAAO,EAAE,KAAK,oBAAoB,OAAO,MAAM,SAAS,4CAA4C;AAAA,IACrG;AAGA,UAAM,WAA0B,QAAQ,YAAY,CAAC;AACrD,UAAM,gBAAgB,CAAC,GAAG,QAAQ,EAAE,QAAQ,EAAE,KAAK,CAAC,MAAM,aAAa,0BAAS;AAEhF,QAAI,CAAC,eAAe;AACnB,aAAO,EAAE,KAAK,oBAAoB,OAAO,OAAO,SAAS,oCAAoC;AAAA,IAC9F;AAEA,UAAM,gBAAgB,OAAO,cAAc,YAAY,WAAW,cAAc,UAAU,KAAK,UAAU,cAAc,OAAO,GAAG,YAAY;AAE7I,UAAM,WAAqB,CAAC;AAE5B,eAAW,YAAY,aAAa;AACnC,UAAI,CAAC,aAAa,SAAS,SAAS,YAAY,CAAC,GAAG;AACnD,iBAAS,KAAK,2BAA2B,QAAQ,GAAG;AAAA,MACrD;AAAA,IACD;AAEA,eAAW,aAAa,gBAAgB;AACvC,UAAI,aAAa,SAAS,UAAU,YAAY,CAAC,GAAG;AACnD,iBAAS,KAAK,6BAA6B,SAAS,GAAG;AAAA,MACxD;AAAA,IACD;AAEA,UAAM,SAAS,SAAS,WAAW;AAEnC,WAAO;AAAA,MACN,KAAK;AAAA,MACL,OAAO;AAAA,MACP,SAAS,SAAS,kCAAkC,SAAS,KAAK,IAAI;AAAA,IACvE;AAAA,EACD;AACD;;;ACrDA,IAAAC,mBAAuC;AAMhC,SAAS,6BAA6B;AAC5C,SAAO,OAAO;AAAA,IACb;AAAA,IACA;AAAA,EACD,MAGM;AAEL,QAAI,kBAAkB,iBAAiB,KAAK,kBAAkB,sBAAsB,MAAM;AACzF,aAAO,EAAE,KAAK,iBAAiB,OAAO,MAAM,SAAS,+CAA+C;AAAA,IACrG;AAEA,UAAM,WAA0B,QAAQ,YAAY,CAAC;AAErD,UAAM,YAAY,SAChB,OAAO,CAAC,MAAM,aAAa,0BAAS,EACpC,QAAQ,CAAC,MAAO,EAAgB,cAAc,CAAC,CAAC;AAElD,UAAM,SAAS,UAAU,WAAW;AAEpC,WAAO;AAAA,MACN,KAAK;AAAA,MACL,OAAO;AAAA,MACP,SAAS,SACN,qCACA,cAAc,UAAU,MAAM,kBAAkB,UAAU,IAAI,CAAC,OAAO,GAAG,IAAI,EAAE,KAAK,IAAI,CAAC;AAAA,IAC7F;AAAA,EACD;AACD;;;AHPA,SAAS,oBAAoB,WAAgB,KAA0B;AACtE,SAAO,OAAO,EAAE,SAAS,iBAAiB,MAAM;AAC/C,QAAI,CAAC,kBAAkB,qBAAqB;AAC3C,aAAO,EAAE,KAAK,OAAO,MAAM,SAAS,6CAA6C;AAAA,IAClF;AACA,WAAO,UAAU,EAAE,SAAS,kBAAkB,iBAAiB,oBAAoB,CAAC;AAAA,EACrF;AACD;AAEA,SAAS,gBAAgB,SAAiB,WAAgD;AACzF,QAAM,aAAwC,CAAC;AAC/C,MAAI,QAAQ;AAEZ,aAAW,KAAK,EAAE,MAAM,QAAQ,SAAS,QAAQ,CAAC;AAElD,aAAW,QAAQ,WAAW;AAC7B,UAAM,KAAK,KAAK,EAAE,KAAK;AACvB,eAAW,KAAK;AAAA,MACf,MAAM;AAAA,MACN,SAAS;AAAA,MACT,YAAY,CAAC,EAAE,UAAU,EAAE,MAAM,WAAW,KAAK,GAAG,IAAI,MAAM,WAAW,CAAC;AAAA,IAC3E,CAAC;AACD,eAAW,KAAK,EAAE,MAAM,QAAQ,SAAS,OAAO,cAAc,GAAG,CAAC;AAAA,EACnE;AAEA,aAAW,KAAK,EAAE,MAAM,aAAa,SAAS,MAAM,CAAC;AAErD,SAAO;AACR;AAQO,SAAS,YAAY,OAA8B;AACzD,SAAO,CAAC,SAAS;AAAA,IAChB,WAAc;AAAA,MACb;AAAA,YACC,kDAA+B,EAAE,qBAAqB,YAAY,mBAAmB,SAAS,CAAC;AAAA,QAC/F;AAAA,MACD;AAAA,IACD;AAAA,IACA,kBAAkB,EAAE,qBAAqB,gBAAgB,IAAI,SAAS,KAAK,EAAE;AAAA,EAC9E;AACD;AAOO,SAAS,WAAwB;AACvC,SAAO,MAAM;AACZ,UAAM,SAAS,cAAc;AAC7B,UAAM,QAAQ,OAAO,kBAAkB,OAAO;AAC9C,WAAO;AAAA,MACN,WAAc;AAAA,QACb;AAAA,cACC,8CAA2B,EAAE,QAAQ,8CAA4B,MAAM,CAAC;AAAA,UACxE;AAAA,QACD;AAAA,MACD;AAAA,MACA,kBAAkB,CAAC;AAAA,IACpB;AAAA,EACD;AACD;AAGO,SAAS,UAAuB;AACtC,SAAO,OAAO;AAAA,IACb,WAAc,kBAAc,2BAA2B,CAAC;AAAA,IACxD,kBAAkB,EAAE,mBAAmB,KAAK;AAAA,EAC7C;AACD;AAOO,SAAS,mBAAmB,MAA2B;AAC7D,SAAO,MAAM;AACZ,UAAM,SAAS,cAAc;AAC7B,UAAM,QAAQ,OAAO,kBAAkB,OAAO;AAC9C,WAAO;AAAA,MACN,WAAc,kBAAc,wBAAwB,OAAO,aAAa,KAAK,CAAC;AAAA,MAC9E,kBAAkB,EAAE,kBAAkB,KAAK;AAAA,IAC5C;AAAA,EACD;AACD;AAGO,SAAS,SAAS,SAAgC;AACxD,SAAO,OAAO;AAAA,IACb,WAAc,kBAAc,+BAA+B,CAAC;AAAA,IAC5D,kBAAkB,EAAE,kBAAkB,QAAQ;AAAA,EAC/C;AACD;AAGO,SAAS,YAAY,SAAgC;AAC3D,SAAO,OAAO;AAAA,IACb,WAAc,kBAAc,+BAA+B,CAAC;AAAA,IAC5D,kBAAkB,EAAE,wBAAwB,QAAQ;AAAA,EACrD;AACD;","names":["ls","import_messages","import_messages","import_messages"]}
1
+ {"version":3,"sources":["../../src/eval/index.ts","../../src/eval/config.ts","../../src/eval/suite.ts","../../src/eval/target.ts","../../src/runtime/langchain/model-resolver.ts","../../src/runtime/langchain/utils.ts","../../src/eval/expectations.ts","../../src/eval/evaluators/language.ts","../../src/eval/evaluators/response-content.ts","../../src/eval/evaluators/no-tool-calls.ts"],"sourcesContent":["// ── Configuration ───────────────────────────────────────────────────\nexport { configureEvals } from './config';\nexport type { EvalConfig, CreateTargetFn } from './config';\n\n// ── Suite API ────────────────────────────────────────────────────────\nexport { defineSuite, human, ai, toolResult, fromToolSpecs } from './suite';\nexport type { SuiteConfig, TestCase, ToolDef } from './suite';\n\n// ── Expectations ─────────────────────────────────────────────────────\nexport { toolsCalled, llmJudge, noTools, respondsInLanguage, contains, notContains } from './expectations';\nexport type { Expectation } from './expectations';\n","import type { Agent, ToolDefinition } from '../core/agent.interface';\nimport type { LangchainModelConfig } from '../runtime/langchain/model-resolver';\n\n/** Factory that creates a fresh Agent per test case. Receives extra suite-level tools as ToolDefinition[]. */\nexport type CreateTargetFn = (extraTools: ToolDefinition[]) => Agent | Promise<Agent>;\n\nexport interface EvalConfig {\n\t/** Required for model-based target and LLM evaluators (respondsInLanguage, llmJudge). */\n\tmodelConfig: LangchainModelConfig;\n\t/** Required for model-based target. Also used as fallback for evaluatorModel. */\n\tmodel?: string;\n\t/** Model for evaluators needing LLM calls (language detection, LLM-as-judge). */\n\tevaluatorModel: string;\n\t/** System prompt for model-based target. Ignored when createTarget is used. Can be overridden per-suite or per-case. */\n\tsystemPrompt?: string;\n\t/** Factory that creates a fresh Agent per test case. When set, this is the default target. */\n\tcreateTarget?: CreateTargetFn;\n}\n\nlet _config: EvalConfig | null = null;\n\nexport function configureEvals(config: EvalConfig): void {\n\t_config = config;\n}\n\nexport function getEvalConfig(): EvalConfig {\n\tif (!_config) {\n\t\tthrow new Error('Evals not configured. Call configureEvals() in your vitest setupFiles.');\n\t}\n\treturn _config;\n}\n\n","import * as ls from 'langsmith/vitest';\nimport { BaseMessage } from '@langchain/core/messages';\nimport { createEvalTarget, runAgentTarget, type MockToolDef } from './target';\nimport { type Expectation } from './expectations';\nimport { getEvalConfig, type CreateTargetFn } from './config';\nimport {\n\ttype Message,\n\ttype HumanMessage,\n\ttype AiMessage,\n\ttype ToolMessage,\n\ttype ToolSpec,\n} from '../core/agent.interface';\n\n// ── Message builders ─────────────────────────────────────────────────\n\nexport function human(content: string): HumanMessage {\n\treturn { role: 'human', content: [{ type: 'text', text: content }] };\n}\n\nexport function ai(content: string, toolCalls?: string[]): AiMessage {\n\treturn { role: 'ai', content, ...(toolCalls ? { toolCalls: toolCalls.map((name) => ({ name })) } : {}) };\n}\n\nexport function toolResult(name: string, output: string): ToolMessage {\n\treturn { role: 'tool', name, output };\n}\n\nexport interface ToolDef {\n\tdescription: string;\n\t/** A plain key→description record, or a ZodObject passed through from a ToolSpec. */\n\tschema?: Record<string, string> | import('zod').ZodObject<any>;\n\t/** Auto-stringified if not a string or function. */\n\tresponse: unknown | ((input: Record<string, unknown>, callCount: number) => string);\n}\n\nexport interface TestCase {\n\t/** Test name. Defaults to the last human message content if omitted. */\n\tname?: string;\n\tmessages: Message[];\n\tsystemPrompt?: string;\n\t/** Override suite-level tools for this case. */\n\ttools?: Record<string, ToolDef>;\n\texpect: Expectation[];\n}\n\ntype TargetFn = (inputs: { systemPrompt?: string; messages: Message[]; tools: MockToolDef[] }) => Promise<{ messages: BaseMessage[] }>;\n\nexport interface SuiteConfig {\n\t/** Custom target function, or model string override. Auto-created from global config if omitted. */\n\ttarget?: TargetFn | string;\n\t/** Factory that creates a fresh Agent per test case. Overrides global createTarget. */\n\tcreateTarget?: CreateTargetFn;\n\t/** System prompt for all cases in this suite. Overrides the global prompt; can be overridden per-case. */\n\tsystemPrompt?: string;\n\ttools?: Record<string, ToolDef>;\n\tcases: TestCase[];\n}\n\n// ── Helpers ──────────────────────────────────────────────────────────\n\n/**\n * Converts a `ToolSpec[]` (from a real tool provider) into the\n * `Record<string, ToolDef>` that `defineSuite` expects.\n *\n * `responses` maps tool names to canned mock responses. Tools without an\n * entry in `responses` default to `''`.\n */\nexport function fromToolSpecs(\n\tspecs: ToolSpec[],\n\tresponses: Record<string, ToolDef['response']> = {},\n): Record<string, ToolDef> {\n\treturn Object.fromEntries(\n\t\tspecs.map((spec) => [\n\t\t\tspec.name,\n\t\t\t{\n\t\t\t\tdescription: spec.description,\n\t\t\t\tschema: spec.inputSchema,\n\t\t\t\tresponse: responses[spec.name] ?? '',\n\t\t\t} satisfies ToolDef,\n\t\t]),\n\t);\n}\n\nfunction toMockTools(defs: Record<string, ToolDef>): MockToolDef[] {\n\treturn Object.entries(defs).map(([name, def]) => ({\n\t\tname,\n\t\tdescription: def.description,\n\t\tschema: def.schema ?? {},\n\t\tresponse:\n\t\t\ttypeof def.response === 'function'\n\t\t\t\t? (def.response as MockToolDef['response'])\n\t\t\t\t: typeof def.response === 'string'\n\t\t\t\t\t? def.response\n\t\t\t\t\t: JSON.stringify(def.response),\n\t}));\n}\n\n/** Strip function responses and ZodObjects so the object is JSON-serialisable for langsmith hashing. */\nfunction toSerializableTools(tools: MockToolDef[]): Record<string, unknown>[] {\n\treturn tools.map((t) => ({\n\t\t...t,\n\t\tschema: t.schema instanceof Object && 'shape' in t.schema ? '<ZodObject>' : t.schema,\n\t\tresponse: typeof t.response === 'function' ? '<function>' : t.response,\n\t}));\n}\n\nfunction lastHumanContent(messages: Message[]): string {\n\tfor (let i = messages.length - 1; i >= 0; i--) {\n\t\tconst msg = messages[i];\n\t\tif (msg.role === 'human') {\n\t\t\tconst textBlock = msg.content.find((c) => c.type === 'text');\n\t\t\treturn textBlock ? textBlock.text : '';\n\t\t}\n\t}\n\treturn '';\n}\n\n// ── Main entry point ─────────────────────────────────────────────────\n\n/**\n * Defines an eval suite. Internally registers `ls.describe` / `ls.test`\n * so vitest discovers the tests — eval files only need to call this function.\n */\nfunction resolveModelTarget(config: SuiteConfig): TargetFn {\n\tif (typeof config.target === 'function') return config.target;\n\tconst evalConfig = getEvalConfig();\n\tif (!evalConfig.model && typeof config.target !== 'string') {\n\t\tthrow new Error('model is required for model-based target. Add it to your configureEvals() call.');\n\t}\n\tconst model = typeof config.target === 'string' ? config.target : evalConfig.model!;\n\treturn createEvalTarget(evalConfig.modelConfig, model);\n}\n\nfunction resolveCreateTarget(config: SuiteConfig): CreateTargetFn | undefined {\n\treturn config.createTarget ?? getEvalConfig().createTarget;\n}\n\nexport function defineSuite(name: string, config: SuiteConfig): void {\n\tconst suiteTools = config.tools ?? {};\n\tconst createTarget = config.target ? undefined : resolveCreateTarget(config);\n\n\tls.describe(name, () => {\n\t\tfor (const tc of config.cases) {\n\t\t\tconst testName = tc.name ?? lastHumanContent(tc.messages);\n\t\t\tconst caseToolDefs = tc.tools ?? suiteTools;\n\t\t\tconst tools = toMockTools(caseToolDefs);\n\t\t\tconst ctx = { message: lastHumanContent(tc.messages) };\n\n\t\t\tconst resolved = tc.expect.map((exp) => exp(ctx));\n\t\t\tconst evaluators = resolved.map((r) => r.evaluator);\n\t\t\tconst referenceOutputs = Object.assign({}, ...resolved.map((r) => r.referenceOutputs));\n\n\t\t\tls.test(\n\t\t\t\ttestName,\n\t\t\t\t{\n\t\t\t\t\tinputs: {\n\t\t\t\t\t\tmessages: tc.messages,\n\t\t\t\t\t\ttools: toSerializableTools(tools),\n\t\t\t\t\t},\n\t\t\t\t\treferenceOutputs,\n\t\t\t\t},\n\t\t\t\tasync ({ referenceOutputs: refOut }) => {\n\t\t\t\t\tlet output: { messages: BaseMessage[] };\n\n\t\t\t\t\tif (createTarget) {\n\t\t\t\t\t\t// Agent mode: create a fresh agent per test, run it, convert result\n\t\t\t\t\t\toutput = await runAgentTarget(createTarget, tc.messages, caseToolDefs);\n\t\t\t\t\t} else {\n\t\t\t\t\t\t// Model mode: use model-based target with mock tools\n\t\t\t\t\t\tconst target = resolveModelTarget(config);\n\t\t\t\t\t\tconst globalPrompt = getEvalConfig().systemPrompt;\n\t\t\t\t\t\tconst systemPrompt = tc.systemPrompt ?? config.systemPrompt ?? globalPrompt;\n\t\t\t\t\t\toutput = await target({\n\t\t\t\t\t\t\tmessages: tc.messages,\n\t\t\t\t\t\t\ttools,\n\t\t\t\t\t\t\t...(systemPrompt ? { systemPrompt } : {}),\n\t\t\t\t\t\t});\n\t\t\t\t\t}\n\n\t\t\t\t\tls.logOutputs(output);\n\t\t\t\t\tfor (const evaluator of evaluators) {\n\t\t\t\t\t\tawait evaluator({ outputs: output, referenceOutputs: refOut ?? {} });\n\t\t\t\t\t}\n\t\t\t\t},\n\t\t\t);\n\t\t}\n\t});\n}\n","import { BaseChatModel } from '@langchain/core/language_models/chat_models';\nimport { tool } from '@langchain/core/tools';\nimport { AIMessage, BaseMessage, SystemMessage, ToolMessage } from '@langchain/core/messages';\nimport { z } from 'zod';\nimport { LangchainModelResolver, type LangchainModelConfig } from '../runtime/langchain/model-resolver';\nimport { type CreateTargetFn, getEvalConfig } from './config';\nimport type { AgentResult, Message as AgentMessage, ToolCallContentBlock, ToolDefinition } from '../core/agent.interface';\nimport { convertToLangchainMessages } from '../runtime/langchain/utils';\nimport type { ToolDef } from './suite';\n\nexport interface MockToolDef {\n\tname: string;\n\tdescription: string;\n\tschema: z.ZodObject<any> | Record<string, unknown>;\n\t/**\n\t * Canned response the mock tool returns.\n\t * Can be a static string, or a function that receives input and returns a response.\n\t * If a function is provided, it receives the full invocation count as a second arg\n\t * to support scenarios like \"first call fails, second call succeeds\".\n\t */\n\tresponse: string | ((input: Record<string, unknown>, callCount: number) => string);\n}\n\nexport interface EvalTargetInput {\n\tsystemPrompt?: string;\n\tmessages: AgentMessage[];\n\ttools: MockToolDef[];\n}\n\nconst MAX_AGENT_LOOPS = 10;\n\n/**\n * Creates a LangSmith-compatible target function that runs an agentic loop\n * with mock tools and returns the full message trajectory.\n */\nexport function createEvalTarget(modelConfig?: LangchainModelConfig, modelString?: string) {\n\treturn async (inputs: EvalTargetInput): Promise<{ messages: BaseMessage[] }> => {\n\t\tconst config = modelConfig && modelString ? { modelConfig, model: modelString } : getEvalConfig();\n\t\tif (!config.model) {\n\t\t\tthrow new Error('model is required for model-based target. Add it to your configureEvals() call.');\n\t\t}\n\t\tconst resolver = new LangchainModelResolver(config.modelConfig);\n\t\tconst model = resolver.resolve(config.model) as BaseChatModel;\n\n\t\t// Track invocation counts per tool for stateful mock responses\n\t\tconst toolCallCounts: Record<string, number> = {};\n\n\t\t// Create langchain tools from mock definitions\n\t\tconst langchainTools = inputs.tools.map((mockTool) => {\n\t\t\ttoolCallCounts[mockTool.name] = 0;\n\n\t\t\treturn tool(\n\t\t\t\tasync (toolInput: Record<string, unknown>) => {\n\t\t\t\t\ttoolCallCounts[mockTool.name]++;\n\t\t\t\t\tif (typeof mockTool.response === 'function') {\n\t\t\t\t\t\treturn mockTool.response(toolInput, toolCallCounts[mockTool.name]);\n\t\t\t\t\t}\n\t\t\t\t\treturn mockTool.response;\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\tname: mockTool.name,\n\t\t\t\t\tdescription: mockTool.description,\n\t\t\t\t\tschema:\n\t\t\t\t\t\tmockTool.schema instanceof z.ZodObject\n\t\t\t\t\t\t\t? mockTool.schema\n\t\t\t\t\t\t\t: z.object(\n\t\t\t\t\t\t\t\t\tObject.fromEntries(\n\t\t\t\t\t\t\t\t\t\tObject.entries(mockTool.schema).map(([key, val]) => {\n\t\t\t\t\t\t\t\t\t\t\tif (typeof val === 'string') return [key, z.string().describe(val)];\n\t\t\t\t\t\t\t\t\t\t\tif (typeof val === 'number') return [key, z.number().describe(String(val))];\n\t\t\t\t\t\t\t\t\t\t\treturn [key, z.any()];\n\t\t\t\t\t\t\t\t\t\t}),\n\t\t\t\t\t\t\t\t\t),\n\t\t\t\t\t\t\t\t),\n\t\t\t\t},\n\t\t\t);\n\t\t});\n\n\t\tconst boundModel = langchainTools.length > 0 ? model.bindTools!(langchainTools) : model;\n\n\t\tconst messages: BaseMessage[] = [];\n\n\t\tif (inputs.systemPrompt) {\n\t\t\tmessages.push(new SystemMessage(inputs.systemPrompt));\n\t\t}\n\n\t\t// Convert and push all messages (history + final human)\n\t\tmessages.push(...convertToLangchainMessages(inputs.messages));\n\n\t\t// Agentic loop: keep calling model until it stops making tool calls\n\t\tlet loopCount = 0;\n\t\twhile (loopCount < MAX_AGENT_LOOPS) {\n\t\t\tloopCount++;\n\n\t\t\tconst response = await boundModel.invoke(messages);\n\t\t\tmessages.push(response as BaseMessage);\n\n\t\t\tconst aiMessage = response as AIMessage;\n\t\t\tif (!aiMessage.tool_calls || aiMessage.tool_calls.length === 0) {\n\t\t\t\tbreak;\n\t\t\t}\n\n\t\t\t// Execute tool calls and add results\n\t\t\tfor (const tc of aiMessage.tool_calls) {\n\t\t\t\tconst mockTool = langchainTools.find((t) => t.name === tc.name);\n\t\t\t\tif (mockTool) {\n\t\t\t\t\tconst result = await mockTool.invoke(tc.args);\n\t\t\t\t\tmessages.push(\n\t\t\t\t\t\tnew ToolMessage({\n\t\t\t\t\t\t\tcontent: typeof result === 'string' ? result : JSON.stringify(result),\n\t\t\t\t\t\t\ttool_call_id: tc.id!,\n\t\t\t\t\t\t\tname: tc.name,\n\t\t\t\t\t\t}),\n\t\t\t\t\t);\n\t\t\t\t} else {\n\t\t\t\t\tmessages.push(\n\t\t\t\t\t\tnew ToolMessage({\n\t\t\t\t\t\t\tcontent: `Tool \"${tc.name}\" not found`,\n\t\t\t\t\t\t\ttool_call_id: tc.id!,\n\t\t\t\t\t\t\tname: tc.name,\n\t\t\t\t\t\t}),\n\t\t\t\t\t);\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\treturn { messages };\n\t};\n}\n\n// ── Agent-based target ──────────────────────────────────────────────\n\n/**\n * Converts an `AgentResult` (from `Agent.run()`) into LangChain `BaseMessage[]`\n * so existing evaluators (trajectory match, no-tool-calls, response-content, language) work unchanged.\n *\n * Consecutive `tool_call` content blocks are grouped into a single `AIMessage` with `tool_calls`,\n * followed by one `ToolMessage` per call.\n */\nexport function agentResultToMessages(inputMessages: AgentMessage[], result: AgentResult): BaseMessage[] {\n\t// Include input messages for trajectory context\n\tconst messages: BaseMessage[] = convertToLangchainMessages(inputMessages);\n\n\t// Group content blocks into BaseMessages\n\tlet pendingToolCalls: { id: string; name: string; args: Record<string, unknown>; output: string }[] = [];\n\n\tfor (const block of result.content) {\n\t\tif (block.type === 'tool_call') {\n\t\t\tconst tc = block as ToolCallContentBlock;\n\t\t\tpendingToolCalls.push({\n\t\t\t\tid: tc.toolCallId,\n\t\t\t\tname: tc.name,\n\t\t\t\targs: tc.input ? JSON.parse(tc.input) : {},\n\t\t\t\toutput: tc.output,\n\t\t\t});\n\t\t} else if (block.type === 'text') {\n\t\t\t// Flush any pending tool calls before the text block\n\t\t\tif (pendingToolCalls.length > 0) {\n\t\t\t\tmessages.push(\n\t\t\t\t\tnew AIMessage({\n\t\t\t\t\t\tcontent: '',\n\t\t\t\t\t\ttool_calls: pendingToolCalls.map((tc) => ({ id: tc.id, name: tc.name, args: tc.args })),\n\t\t\t\t\t}),\n\t\t\t\t);\n\t\t\t\tfor (const tc of pendingToolCalls) {\n\t\t\t\t\tmessages.push(new ToolMessage({ content: tc.output, tool_call_id: tc.id, name: tc.name }));\n\t\t\t\t}\n\t\t\t\tpendingToolCalls = [];\n\t\t\t}\n\t\t\tmessages.push(new AIMessage(block.output));\n\t\t}\n\t}\n\n\t// Flush remaining tool calls (agent ended mid-tool-use, unlikely but safe)\n\tif (pendingToolCalls.length > 0) {\n\t\tmessages.push(\n\t\t\tnew AIMessage({\n\t\t\t\tcontent: '',\n\t\t\t\ttool_calls: pendingToolCalls.map((tc) => ({ id: tc.id, name: tc.name, args: tc.args })),\n\t\t\t}),\n\t\t);\n\t\tfor (const tc of pendingToolCalls) {\n\t\t\tmessages.push(new ToolMessage({ content: tc.output, tool_call_id: tc.id, name: tc.name }));\n\t\t}\n\t}\n\n\treturn messages;\n}\n\n/**\n * Converts eval `Record<string, ToolDef>` into `ToolDefinition[]` with mock `exec` functions,\n * suitable for passing to an `AgentFactory.createAgent()` call.\n */\nexport function toolDefsToDefinitions(defs: Record<string, ToolDef>): ToolDefinition[] {\n\tconst callCounts: Record<string, number> = {};\n\n\treturn Object.entries(defs).map(([name, def]) => {\n\t\tcallCounts[name] = 0;\n\n\t\treturn {\n\t\t\tname,\n\t\t\ttoolKit: 'eval-mock',\n\t\t\tdescription: def.description,\n\t\t\tinputSchema:\n\t\t\t\tdef.schema instanceof z.ZodObject\n\t\t\t\t\t? def.schema\n\t\t\t\t\t: z.object(\n\t\t\t\t\t\t\tObject.fromEntries(\n\t\t\t\t\t\t\t\tObject.entries(def.schema ?? {}).map(([key, val]) => {\n\t\t\t\t\t\t\t\t\tif (typeof val === 'string') return [key, z.string().describe(val)];\n\t\t\t\t\t\t\t\t\treturn [key, z.any()];\n\t\t\t\t\t\t\t\t}),\n\t\t\t\t\t\t\t),\n\t\t\t\t\t\t),\n\t\t\texec: async (input: Record<string, unknown>) => {\n\t\t\t\tcallCounts[name]++;\n\t\t\t\tif (typeof def.response === 'function') {\n\t\t\t\t\treturn (def.response as (input: Record<string, unknown>, callCount: number) => string)(\n\t\t\t\t\t\tinput,\n\t\t\t\t\t\tcallCounts[name],\n\t\t\t\t\t);\n\t\t\t\t}\n\t\t\t\treturn typeof def.response === 'string' ? def.response : JSON.stringify(def.response);\n\t\t\t},\n\t\t} satisfies ToolDefinition;\n\t});\n}\n\n/**\n * Runs a real `Agent` as the eval target. Creates a fresh agent per invocation via the factory,\n * sends human messages, and converts the `AgentResult` to `{ messages: BaseMessage[] }`.\n */\nexport async function runAgentTarget(\n\tcreateTarget: CreateTargetFn,\n\tevalMessages: AgentMessage[],\n\textraToolDefs: Record<string, ToolDef>,\n): Promise<{ messages: BaseMessage[] }> {\n\tconst extraTools = Object.keys(extraToolDefs).length > 0 ? toolDefsToDefinitions(extraToolDefs) : [];\n\tconst agent = await createTarget(extraTools);\n\n\tconst result = await agent.run({\n\t\tthreadId: `eval_${Date.now()}_${Math.random().toString(36).slice(2)}`,\n\t\tmessages: evalMessages,\n\t});\n\n\treturn { messages: agentResultToMessages(evalMessages, result) };\n}\n","import { BaseLanguageModel } from '@langchain/core/language_models/base';\nimport { AzureChatOpenAI, ChatOpenAI } from '@langchain/openai';\n\nexport type LangchainOpenAIConfig = {\n\tapiKey: string;\n};\n\nexport type LangchainAzureResourceConfig = {\n\tapiKey: string;\n\tendpoint: string;\n\tmodels: {\n\t\tmodel: string;\n\t\tapiVersion: string;\n\t\tdeploymentName: string;\n\t}[];\n};\n\nexport type ResourceName = string;\n\nexport type LangchainModelConfig = {\n\topenai?: Record<string, LangchainOpenAIConfig>;\n\tazure?: Record<ResourceName, LangchainAzureResourceConfig>;\n};\n\nexport class LangchainModelResolver {\n\tconstructor(private config: LangchainModelConfig) {}\n\n\tresolve(modelString: string, tags?: string[]): BaseLanguageModel {\n\t\tconst parts = modelString.split(':');\n\n\t\tif (parts.length === 2) {\n\t\t\tconst [provider, modelName] = parts;\n\t\t\treturn this.resolveByProvider(provider, 'default', modelName, tags);\n\t\t}\n\n\t\tif (parts.length === 3) {\n\t\t\tconst [provider, configName, modelName] = parts;\n\t\t\treturn this.resolveByProvider(provider, configName, modelName, tags);\n\t\t}\n\n\t\tthrow new Error(\n\t\t\t'Model string must follow format \"provider:modelName\" (uses \"default\" config) or \"provider:configName:modelName\"',\n\t\t);\n\t}\n\n\tprivate resolveByProvider(\n\t\tprovider: string,\n\t\tconfigName: string,\n\t\tmodelName: string,\n\t\ttags?: string[],\n\t): BaseLanguageModel {\n\t\tswitch (provider) {\n\t\t\tcase 'openai':\n\t\t\t\treturn this.resolveOpenAI(configName, modelName, tags);\n\t\t\tcase 'azure':\n\t\t\t\treturn this.resolveAzure(configName, modelName, tags);\n\t\t\tdefault:\n\t\t\t\tthrow new Error(`Unsupported model provider: ${provider}`);\n\t\t}\n\t}\n\n\tprivate resolveOpenAI(configName: string, modelName: string, tags?: string[]): ChatOpenAI {\n\t\tconst providerConfig = this.config.openai?.[configName];\n\t\tif (!providerConfig) {\n\t\t\tthrow new Error(`Configuration \"${configName}\" for provider \"openai\" is missing`);\n\t\t}\n\n\t\treturn new ChatOpenAI({\n\t\t\tapiKey: providerConfig.apiKey,\n\t\t\tmodelName: modelName,\n\t\t\ttags: tags,\n\t\t});\n\t}\n\n\tprivate resolveAzure(resourceName: string, modelName: string, tags?: string[]): AzureChatOpenAI {\n\t\tconst resource = this.config.azure?.[resourceName];\n\t\tif (!resource) {\n\t\t\tthrow new Error(`Resource \"${resourceName}\" for provider \"azure\" is missing`);\n\t\t}\n\n\t\tconst modelEntry = resource.models.find((m) => m.model === modelName);\n\t\tif (!modelEntry) {\n\t\t\tthrow new Error(`Model \"${modelName}\" not found in Azure resource \"${resourceName}\"`);\n\t\t}\n\n\t\treturn new AzureChatOpenAI({\n\t\t\tmodel: modelEntry.model,\n\t\t\tazureOpenAIApiKey: resource.apiKey,\n\t\t\tazureOpenAIApiInstanceName: this.extractInstanceName(resource.endpoint),\n\t\t\tazureOpenAIApiDeploymentName: modelEntry.deploymentName,\n\t\t\tazureOpenAIApiVersion: modelEntry.apiVersion,\n\t\t\ttags: tags,\n\t\t});\n\t}\n\n\tprivate extractInstanceName(endpoint: string): string {\n\t\ttry {\n\t\t\tconst url = new URL(endpoint);\n\t\t\treturn url.hostname.split('.')[0];\n\t\t} catch (e) {\n\t\t\treturn endpoint;\n\t\t}\n\t}\n}\n","import { Message } from '@core/agent.interface';\nimport { AIMessage, BaseMessage, HumanMessage, ToolMessage } from 'langchain';\n\nexport function convertToLangchainMessages(messages: Message[]): BaseMessage[] {\n\tconst result: BaseMessage[] = [];\n\tlet tcIdx = 0;\n\tlet pendingToolCallIds: string[] = [];\n\n\tfor (const msg of messages) {\n\t\tif (msg.role === 'human') {\n\t\t\tresult.push(\n\t\t\t\tnew HumanMessage({\n\t\t\t\t\tcontent: msg.content.map((c) => {\n\t\t\t\t\t\tif (c.type === 'image') {\n\t\t\t\t\t\t\treturn { type: 'image_url', image_url: { url: c.url } };\n\t\t\t\t\t\t}\n\t\t\t\t\t\treturn c;\n\t\t\t\t\t}) as any,\n\t\t\t\t}),\n\t\t\t);\n\t\t} else if (msg.role === 'ai') {\n\t\t\tif (msg.toolCalls && msg.toolCalls.length > 0) {\n\t\t\t\tpendingToolCallIds = msg.toolCalls.map(() => `tc_${++tcIdx}`);\n\t\t\t\tresult.push(\n\t\t\t\t\tnew AIMessage({\n\t\t\t\t\t\tcontent: msg.content,\n\t\t\t\t\t\ttool_calls: msg.toolCalls.map((tc, i) => ({\n\t\t\t\t\t\t\tid: pendingToolCallIds[i],\n\t\t\t\t\t\t\tname: tc.name,\n\t\t\t\t\t\t\targs: tc.input ? JSON.parse(tc.input) : {},\n\t\t\t\t\t\t})),\n\t\t\t\t\t}),\n\t\t\t\t);\n\t\t\t} else {\n\t\t\t\tresult.push(new AIMessage(msg.content));\n\t\t\t}\n\t\t} else if (msg.role === 'tool') {\n\t\t\tconst toolCallId = pendingToolCallIds.shift();\n\t\t\tif (!toolCallId)\n\t\t\t\tthrow new Error(`ToolMessage for \"${msg.name}\" without a preceding AiMessage with toolCalls`);\n\t\t\tresult.push(\n\t\t\t\tnew ToolMessage({\n\t\t\t\t\tcontent: msg.output,\n\t\t\t\t\ttool_call_id: toolCallId,\n\t\t\t\t\tname: msg.name,\n\t\t\t\t}),\n\t\t\t);\n\t\t}\n\t}\n\n\treturn result;\n}\n","import * as ls from 'langsmith/vitest';\nimport {\n\tcreateTrajectoryMatchEvaluator,\n\tcreateTrajectoryLLMAsJudge,\n\tTRAJECTORY_ACCURACY_PROMPT,\n} from 'agentevals';\nimport { createLanguageEvaluator } from './evaluators/language';\nimport { createResponseContentEvaluator } from './evaluators/response-content';\nimport { createNoToolCallsEvaluator } from './evaluators/no-tool-calls';\nimport { getEvalConfig } from './config';\n\n// ── Types ────────────────────────────────────────────────────────────\n\ntype EvaluatorFn = (args: {\n\toutputs: Record<string, any>;\n\treferenceOutputs: Record<string, any>;\n}) => Promise<any>;\n\ninterface ResolvedExpectation {\n\tevaluator: EvaluatorFn;\n\treferenceOutputs: Record<string, unknown>;\n}\n\n/** A factory that receives test context and returns an evaluator + its referenceOutputs. */\nexport type Expectation = (ctx: { message: string }) => ResolvedExpectation;\n\n// ── Helpers ──────────────────────────────────────────────────────────\n\nfunction withTrajectoryGuard(evaluator: any, key: string): EvaluatorFn {\n\treturn async ({ outputs, referenceOutputs }) => {\n\t\tif (!referenceOutputs?.referenceTrajectory) {\n\t\t\treturn { key, score: true, comment: 'No referenceTrajectory specified, skipping' };\n\t\t}\n\t\treturn evaluator({ outputs, referenceOutputs: referenceOutputs.referenceTrajectory });\n\t};\n}\n\nfunction buildTrajectory(message: string, toolNames: string[]): Record<string, unknown>[] {\n\tconst trajectory: Record<string, unknown>[] = [];\n\tlet tcIdx = 0;\n\n\ttrajectory.push({ role: 'user', content: message });\n\n\tfor (const name of toolNames) {\n\t\tconst id = `tc${++tcIdx}`;\n\t\ttrajectory.push({\n\t\t\trole: 'assistant',\n\t\t\tcontent: '',\n\t\t\ttool_calls: [{ function: { name, arguments: '{}' }, id, type: 'function' }],\n\t\t});\n\t\ttrajectory.push({ role: 'tool', content: '...', tool_call_id: id });\n\t}\n\n\ttrajectory.push({ role: 'assistant', content: '...' });\n\n\treturn trajectory;\n}\n\n// ── Expectation functions ────────────────────────────────────────────\n\n/**\n * Expect the agent to call tools in order (superset trajectory match).\n * Empty `[]` means the agent should answer directly without calling any tools.\n */\nexport function toolsCalled(tools: string[]): Expectation {\n\treturn (ctx) => ({\n\t\tevaluator: ls.wrapEvaluator(\n\t\t\twithTrajectoryGuard(\n\t\t\t\tcreateTrajectoryMatchEvaluator({ trajectoryMatchMode: 'superset', toolArgsMatchMode: 'ignore' }) as any,\n\t\t\t\t'trajectory_match',\n\t\t\t),\n\t\t),\n\t\treferenceOutputs: { referenceTrajectory: buildTrajectory(ctx.message, tools) },\n\t});\n}\n\n/**\n * Run an LLM-as-judge evaluator on the trajectory.\n * Requires `toolsCalled` in the same expect array.\n * Uses the globally configured evaluator model.\n */\nexport function llmJudge(): Expectation {\n\treturn () => {\n\t\tconst config = getEvalConfig();\n\t\tconst model = config.evaluatorModel;\n\t\treturn {\n\t\t\tevaluator: ls.wrapEvaluator(\n\t\t\t\twithTrajectoryGuard(\n\t\t\t\t\tcreateTrajectoryLLMAsJudge({ prompt: TRAJECTORY_ACCURACY_PROMPT, model }) as any,\n\t\t\t\t\t'trajectory_llm_judge',\n\t\t\t\t),\n\t\t\t),\n\t\t\treferenceOutputs: {},\n\t\t};\n\t};\n}\n\n/** Assert the agent made zero tool calls. */\nexport function noTools(): Expectation {\n\treturn () => ({\n\t\tevaluator: ls.wrapEvaluator(createNoToolCallsEvaluator()),\n\t\treferenceOutputs: { expectNoToolCalls: true },\n\t});\n}\n\n/**\n * Assert the response is in the given language (ISO 639-1 code).\n * Uses the globally configured evaluator model for language detection.\n * @param code - ISO 639-1 language code (e.g. 'en', 'tr', 'de').\n */\nexport function respondsInLanguage(code: string): Expectation {\n\treturn () => {\n\t\tconst config = getEvalConfig();\n\t\tconst model = config.evaluatorModel;\n\t\treturn {\n\t\t\tevaluator: ls.wrapEvaluator(createLanguageEvaluator(config.modelConfig, model)),\n\t\t\treferenceOutputs: { expectedLanguage: code },\n\t\t};\n\t};\n}\n\n/** Assert the response contains all given strings. */\nexport function contains(strings: string[]): Expectation {\n\treturn () => ({\n\t\tevaluator: ls.wrapEvaluator(createResponseContentEvaluator()),\n\t\treferenceOutputs: { responseContains: strings },\n\t});\n}\n\n/** Assert the response does not contain any of the given strings. */\nexport function notContains(strings: string[]): Expectation {\n\treturn () => ({\n\t\tevaluator: ls.wrapEvaluator(createResponseContentEvaluator()),\n\t\treferenceOutputs: { responseMustNotContain: strings },\n\t});\n}\n","import { AIMessage, BaseMessage } from '@langchain/core/messages';\nimport { BaseChatModel } from '@langchain/core/language_models/chat_models';\nimport { LangchainModelResolver, type LangchainModelConfig } from '../../runtime/langchain/model-resolver';\n\n/**\n * Creates a custom evaluator that checks whether the agent's final response\n * is in the expected language. Uses a cheap LLM call for language detection.\n */\nexport function createLanguageEvaluator(modelConfig: LangchainModelConfig, model: string) {\n\tconst resolver = new LangchainModelResolver(modelConfig);\n\tconst judge = resolver.resolve(model) as BaseChatModel;\n\n\treturn async ({\n\t\toutputs,\n\t\treferenceOutputs,\n\t}: {\n\t\toutputs: Record<string, any>;\n\t\treferenceOutputs?: Record<string, any>;\n\t}) => {\n\t\tconst expectedLanguage = referenceOutputs?.expectedLanguage;\n\t\tif (!expectedLanguage) {\n\t\t\treturn { key: 'language_match', score: true, comment: 'No expected language specified, skipping' };\n\t\t}\n\n\t\t// Extract the last AI message text from the trajectory\n\t\tconst messages: BaseMessage[] = outputs.messages || [];\n\t\tconst lastAiMessage = [...messages].reverse().find((m) => m instanceof AIMessage);\n\n\t\tif (!lastAiMessage) {\n\t\t\treturn { key: 'language_match', score: false, comment: 'No AI message found in trajectory' };\n\t\t}\n\n\t\tconst responseText = typeof lastAiMessage.content === 'string' ? lastAiMessage.content : JSON.stringify(lastAiMessage.content);\n\n\t\t// Use LLM to detect the language\n\t\tconst detection = await judge.invoke([\n\t\t\t{\n\t\t\t\trole: 'system',\n\t\t\t\tcontent: 'You are a language detection tool. Respond with ONLY the ISO 639-1 language code (e.g., \"en\", \"tr\", \"de\", \"fr\") of the text provided. Nothing else.',\n\t\t\t},\n\t\t\t{\n\t\t\t\trole: 'user',\n\t\t\t\tcontent: responseText,\n\t\t\t},\n\t\t]);\n\n\t\tconst detectedLanguage = (typeof detection.content === 'string' ? detection.content : '').trim().toLowerCase();\n\n\t\tconst matches = detectedLanguage === expectedLanguage.toLowerCase();\n\n\t\treturn {\n\t\t\tkey: 'language_match',\n\t\t\tscore: matches,\n\t\t\tcomment: matches\n\t\t\t\t? `Response language matches expected: ${expectedLanguage}`\n\t\t\t\t: `Expected \"${expectedLanguage}\" but detected \"${detectedLanguage}\"`,\n\t\t};\n\t};\n}\n","import { AIMessage, BaseMessage } from '@langchain/core/messages';\n\n/**\n * Creates a custom evaluator that checks whether the agent's final response\n * contains expected strings and doesn't contain forbidden strings.\n */\nexport function createResponseContentEvaluator() {\n\treturn async ({\n\t\toutputs,\n\t\treferenceOutputs,\n\t}: {\n\t\toutputs: Record<string, any>;\n\t\treferenceOutputs?: Record<string, any>;\n\t}) => {\n\t\tconst mustContain: string[] = referenceOutputs?.responseContains || [];\n\t\tconst mustNotContain: string[] = referenceOutputs?.responseMustNotContain || [];\n\n\t\tif (mustContain.length === 0 && mustNotContain.length === 0) {\n\t\t\treturn { key: 'response_content', score: true, comment: 'No content assertions specified, skipping' };\n\t\t}\n\n\t\t// Extract the last AI message text from the trajectory\n\t\tconst messages: BaseMessage[] = outputs.messages || [];\n\t\tconst lastAiMessage = [...messages].reverse().find((m) => m instanceof AIMessage);\n\n\t\tif (!lastAiMessage) {\n\t\t\treturn { key: 'response_content', score: false, comment: 'No AI message found in trajectory' };\n\t\t}\n\n\t\tconst responseText = (typeof lastAiMessage.content === 'string' ? lastAiMessage.content : JSON.stringify(lastAiMessage.content)).toLowerCase();\n\n\t\tconst failures: string[] = [];\n\n\t\tfor (const expected of mustContain) {\n\t\t\tif (!responseText.includes(expected.toLowerCase())) {\n\t\t\t\tfailures.push(`Missing expected text: \"${expected}\"`);\n\t\t\t}\n\t\t}\n\n\t\tfor (const forbidden of mustNotContain) {\n\t\t\tif (responseText.includes(forbidden.toLowerCase())) {\n\t\t\t\tfailures.push(`Contains forbidden text: \"${forbidden}\"`);\n\t\t\t}\n\t\t}\n\n\t\tconst passed = failures.length === 0;\n\n\t\treturn {\n\t\t\tkey: 'response_content',\n\t\t\tscore: passed,\n\t\t\tcomment: passed ? 'All content assertions passed' : failures.join('; '),\n\t\t};\n\t};\n}\n","import { AIMessage, BaseMessage } from '@langchain/core/messages';\n\n/**\n * Creates a custom evaluator that asserts the agent made zero tool calls.\n * Useful for scenarios like greetings where the agent should just respond with text.\n */\nexport function createNoToolCallsEvaluator() {\n\treturn async ({\n\t\toutputs,\n\t\treferenceOutputs,\n\t}: {\n\t\toutputs: Record<string, any>;\n\t\treferenceOutputs?: Record<string, any>;\n\t}) => {\n\t\t// Only run this evaluator if the reference explicitly expects no tool calls\n\t\tif (referenceOutputs?.maxToolCalls !== 0 && referenceOutputs?.expectNoToolCalls !== true) {\n\t\t\treturn { key: 'no_tool_calls', score: true, comment: 'No tool call restriction specified, skipping' };\n\t\t}\n\n\t\tconst messages: BaseMessage[] = outputs.messages || [];\n\n\t\tconst toolCalls = messages\n\t\t\t.filter((m) => m instanceof AIMessage)\n\t\t\t.flatMap((m) => (m as AIMessage).tool_calls || []);\n\n\t\tconst passed = toolCalls.length === 0;\n\n\t\treturn {\n\t\t\tkey: 'no_tool_calls',\n\t\t\tscore: passed,\n\t\t\tcomment: passed\n\t\t\t\t? 'No tool calls made (as expected)'\n\t\t\t\t: `Agent made ${toolCalls.length} tool call(s): ${toolCalls.map((tc) => tc.name).join(', ')}`,\n\t\t};\n\t};\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACmBA,IAAI,UAA6B;AAE1B,SAAS,eAAe,QAA0B;AACxD,YAAU;AACX;AAEO,SAAS,gBAA4B;AAC3C,MAAI,CAAC,SAAS;AACb,UAAM,IAAI,MAAM,wEAAwE;AAAA,EACzF;AACA,SAAO;AACR;;;AC9BA,SAAoB;;;ACCpB,mBAAqB;AACrB,sBAAmE;AACnE,iBAAkB;;;ACFlB,oBAA4C;AAuBrC,IAAM,yBAAN,MAA6B;AAAA,EACnC,YAAoB,QAA8B;AAA9B;AAAA,EAA+B;AAAA,EAEnD,QAAQ,aAAqB,MAAoC;AAChE,UAAM,QAAQ,YAAY,MAAM,GAAG;AAEnC,QAAI,MAAM,WAAW,GAAG;AACvB,YAAM,CAAC,UAAU,SAAS,IAAI;AAC9B,aAAO,KAAK,kBAAkB,UAAU,WAAW,WAAW,IAAI;AAAA,IACnE;AAEA,QAAI,MAAM,WAAW,GAAG;AACvB,YAAM,CAAC,UAAU,YAAY,SAAS,IAAI;AAC1C,aAAO,KAAK,kBAAkB,UAAU,YAAY,WAAW,IAAI;AAAA,IACpE;AAEA,UAAM,IAAI;AAAA,MACT;AAAA,IACD;AAAA,EACD;AAAA,EAEQ,kBACP,UACA,YACA,WACA,MACoB;AACpB,YAAQ,UAAU;AAAA,MACjB,KAAK;AACJ,eAAO,KAAK,cAAc,YAAY,WAAW,IAAI;AAAA,MACtD,KAAK;AACJ,eAAO,KAAK,aAAa,YAAY,WAAW,IAAI;AAAA,MACrD;AACC,cAAM,IAAI,MAAM,+BAA+B,QAAQ,EAAE;AAAA,IAC3D;AAAA,EACD;AAAA,EAEQ,cAAc,YAAoB,WAAmB,MAA6B;AACzF,UAAM,iBAAiB,KAAK,OAAO,SAAS,UAAU;AACtD,QAAI,CAAC,gBAAgB;AACpB,YAAM,IAAI,MAAM,kBAAkB,UAAU,oCAAoC;AAAA,IACjF;AAEA,WAAO,IAAI,yBAAW;AAAA,MACrB,QAAQ,eAAe;AAAA,MACvB;AAAA,MACA;AAAA,IACD,CAAC;AAAA,EACF;AAAA,EAEQ,aAAa,cAAsB,WAAmB,MAAkC;AAC/F,UAAM,WAAW,KAAK,OAAO,QAAQ,YAAY;AACjD,QAAI,CAAC,UAAU;AACd,YAAM,IAAI,MAAM,aAAa,YAAY,mCAAmC;AAAA,IAC7E;AAEA,UAAM,aAAa,SAAS,OAAO,KAAK,CAAC,MAAM,EAAE,UAAU,SAAS;AACpE,QAAI,CAAC,YAAY;AAChB,YAAM,IAAI,MAAM,UAAU,SAAS,kCAAkC,YAAY,GAAG;AAAA,IACrF;AAEA,WAAO,IAAI,8BAAgB;AAAA,MAC1B,OAAO,WAAW;AAAA,MAClB,mBAAmB,SAAS;AAAA,MAC5B,4BAA4B,KAAK,oBAAoB,SAAS,QAAQ;AAAA,MACtE,8BAA8B,WAAW;AAAA,MACzC,uBAAuB,WAAW;AAAA,MAClC;AAAA,IACD,CAAC;AAAA,EACF;AAAA,EAEQ,oBAAoB,UAA0B;AACrD,QAAI;AACH,YAAM,MAAM,IAAI,IAAI,QAAQ;AAC5B,aAAO,IAAI,SAAS,MAAM,GAAG,EAAE,CAAC;AAAA,IACjC,SAAS,GAAG;AACX,aAAO;AAAA,IACR;AAAA,EACD;AACD;;;ACtGA,uBAAkE;AAE3D,SAAS,2BAA2B,UAAoC;AAC9E,QAAM,SAAwB,CAAC;AAC/B,MAAI,QAAQ;AACZ,MAAI,qBAA+B,CAAC;AAEpC,aAAW,OAAO,UAAU;AAC3B,QAAI,IAAI,SAAS,SAAS;AACzB,aAAO;AAAA,QACN,IAAI,8BAAa;AAAA,UAChB,SAAS,IAAI,QAAQ,IAAI,CAAC,MAAM;AAC/B,gBAAI,EAAE,SAAS,SAAS;AACvB,qBAAO,EAAE,MAAM,aAAa,WAAW,EAAE,KAAK,EAAE,IAAI,EAAE;AAAA,YACvD;AACA,mBAAO;AAAA,UACR,CAAC;AAAA,QACF,CAAC;AAAA,MACF;AAAA,IACD,WAAW,IAAI,SAAS,MAAM;AAC7B,UAAI,IAAI,aAAa,IAAI,UAAU,SAAS,GAAG;AAC9C,6BAAqB,IAAI,UAAU,IAAI,MAAM,MAAM,EAAE,KAAK,EAAE;AAC5D,eAAO;AAAA,UACN,IAAI,2BAAU;AAAA,YACb,SAAS,IAAI;AAAA,YACb,YAAY,IAAI,UAAU,IAAI,CAAC,IAAI,OAAO;AAAA,cACzC,IAAI,mBAAmB,CAAC;AAAA,cACxB,MAAM,GAAG;AAAA,cACT,MAAM,GAAG,QAAQ,KAAK,MAAM,GAAG,KAAK,IAAI,CAAC;AAAA,YAC1C,EAAE;AAAA,UACH,CAAC;AAAA,QACF;AAAA,MACD,OAAO;AACN,eAAO,KAAK,IAAI,2BAAU,IAAI,OAAO,CAAC;AAAA,MACvC;AAAA,IACD,WAAW,IAAI,SAAS,QAAQ;AAC/B,YAAM,aAAa,mBAAmB,MAAM;AAC5C,UAAI,CAAC;AACJ,cAAM,IAAI,MAAM,oBAAoB,IAAI,IAAI,gDAAgD;AAC7F,aAAO;AAAA,QACN,IAAI,6BAAY;AAAA,UACf,SAAS,IAAI;AAAA,UACb,cAAc;AAAA,UACd,MAAM,IAAI;AAAA,QACX,CAAC;AAAA,MACF;AAAA,IACD;AAAA,EACD;AAEA,SAAO;AACR;;;AFtBA,IAAM,kBAAkB;AAMjB,SAAS,iBAAiB,aAAoC,aAAsB;AAC1F,SAAO,OAAO,WAAkE;AAC/E,UAAM,SAAS,eAAe,cAAc,EAAE,aAAa,OAAO,YAAY,IAAI,cAAc;AAChG,QAAI,CAAC,OAAO,OAAO;AAClB,YAAM,IAAI,MAAM,iFAAiF;AAAA,IAClG;AACA,UAAM,WAAW,IAAI,uBAAuB,OAAO,WAAW;AAC9D,UAAM,QAAQ,SAAS,QAAQ,OAAO,KAAK;AAG3C,UAAM,iBAAyC,CAAC;AAGhD,UAAM,iBAAiB,OAAO,MAAM,IAAI,CAAC,aAAa;AACrD,qBAAe,SAAS,IAAI,IAAI;AAEhC,iBAAO;AAAA,QACN,OAAO,cAAuC;AAC7C,yBAAe,SAAS,IAAI;AAC5B,cAAI,OAAO,SAAS,aAAa,YAAY;AAC5C,mBAAO,SAAS,SAAS,WAAW,eAAe,SAAS,IAAI,CAAC;AAAA,UAClE;AACA,iBAAO,SAAS;AAAA,QACjB;AAAA,QACA;AAAA,UACC,MAAM,SAAS;AAAA,UACf,aAAa,SAAS;AAAA,UACtB,QACC,SAAS,kBAAkB,aAAE,YAC1B,SAAS,SACT,aAAE;AAAA,YACF,OAAO;AAAA,cACN,OAAO,QAAQ,SAAS,MAAM,EAAE,IAAI,CAAC,CAAC,KAAK,GAAG,MAAM;AACnD,oBAAI,OAAO,QAAQ,SAAU,QAAO,CAAC,KAAK,aAAE,OAAO,EAAE,SAAS,GAAG,CAAC;AAClE,oBAAI,OAAO,QAAQ,SAAU,QAAO,CAAC,KAAK,aAAE,OAAO,EAAE,SAAS,OAAO,GAAG,CAAC,CAAC;AAC1E,uBAAO,CAAC,KAAK,aAAE,IAAI,CAAC;AAAA,cACrB,CAAC;AAAA,YACF;AAAA,UACD;AAAA,QACJ;AAAA,MACD;AAAA,IACD,CAAC;AAED,UAAM,aAAa,eAAe,SAAS,IAAI,MAAM,UAAW,cAAc,IAAI;AAElF,UAAM,WAA0B,CAAC;AAEjC,QAAI,OAAO,cAAc;AACxB,eAAS,KAAK,IAAI,8BAAc,OAAO,YAAY,CAAC;AAAA,IACrD;AAGA,aAAS,KAAK,GAAG,2BAA2B,OAAO,QAAQ,CAAC;AAG5D,QAAI,YAAY;AAChB,WAAO,YAAY,iBAAiB;AACnC;AAEA,YAAM,WAAW,MAAM,WAAW,OAAO,QAAQ;AACjD,eAAS,KAAK,QAAuB;AAErC,YAAM,YAAY;AAClB,UAAI,CAAC,UAAU,cAAc,UAAU,WAAW,WAAW,GAAG;AAC/D;AAAA,MACD;AAGA,iBAAW,MAAM,UAAU,YAAY;AACtC,cAAM,WAAW,eAAe,KAAK,CAAC,MAAM,EAAE,SAAS,GAAG,IAAI;AAC9D,YAAI,UAAU;AACb,gBAAM,SAAS,MAAM,SAAS,OAAO,GAAG,IAAI;AAC5C,mBAAS;AAAA,YACR,IAAI,4BAAY;AAAA,cACf,SAAS,OAAO,WAAW,WAAW,SAAS,KAAK,UAAU,MAAM;AAAA,cACpE,cAAc,GAAG;AAAA,cACjB,MAAM,GAAG;AAAA,YACV,CAAC;AAAA,UACF;AAAA,QACD,OAAO;AACN,mBAAS;AAAA,YACR,IAAI,4BAAY;AAAA,cACf,SAAS,SAAS,GAAG,IAAI;AAAA,cACzB,cAAc,GAAG;AAAA,cACjB,MAAM,GAAG;AAAA,YACV,CAAC;AAAA,UACF;AAAA,QACD;AAAA,MACD;AAAA,IACD;AAEA,WAAO,EAAE,SAAS;AAAA,EACnB;AACD;AAWO,SAAS,sBAAsB,eAA+B,QAAoC;AAExG,QAAM,WAA0B,2BAA2B,aAAa;AAGxE,MAAI,mBAAkG,CAAC;AAEvG,aAAW,SAAS,OAAO,SAAS;AACnC,QAAI,MAAM,SAAS,aAAa;AAC/B,YAAM,KAAK;AACX,uBAAiB,KAAK;AAAA,QACrB,IAAI,GAAG;AAAA,QACP,MAAM,GAAG;AAAA,QACT,MAAM,GAAG,QAAQ,KAAK,MAAM,GAAG,KAAK,IAAI,CAAC;AAAA,QACzC,QAAQ,GAAG;AAAA,MACZ,CAAC;AAAA,IACF,WAAW,MAAM,SAAS,QAAQ;AAEjC,UAAI,iBAAiB,SAAS,GAAG;AAChC,iBAAS;AAAA,UACR,IAAI,0BAAU;AAAA,YACb,SAAS;AAAA,YACT,YAAY,iBAAiB,IAAI,CAAC,QAAQ,EAAE,IAAI,GAAG,IAAI,MAAM,GAAG,MAAM,MAAM,GAAG,KAAK,EAAE;AAAA,UACvF,CAAC;AAAA,QACF;AACA,mBAAW,MAAM,kBAAkB;AAClC,mBAAS,KAAK,IAAI,4BAAY,EAAE,SAAS,GAAG,QAAQ,cAAc,GAAG,IAAI,MAAM,GAAG,KAAK,CAAC,CAAC;AAAA,QAC1F;AACA,2BAAmB,CAAC;AAAA,MACrB;AACA,eAAS,KAAK,IAAI,0BAAU,MAAM,MAAM,CAAC;AAAA,IAC1C;AAAA,EACD;AAGA,MAAI,iBAAiB,SAAS,GAAG;AAChC,aAAS;AAAA,MACR,IAAI,0BAAU;AAAA,QACb,SAAS;AAAA,QACT,YAAY,iBAAiB,IAAI,CAAC,QAAQ,EAAE,IAAI,GAAG,IAAI,MAAM,GAAG,MAAM,MAAM,GAAG,KAAK,EAAE;AAAA,MACvF,CAAC;AAAA,IACF;AACA,eAAW,MAAM,kBAAkB;AAClC,eAAS,KAAK,IAAI,4BAAY,EAAE,SAAS,GAAG,QAAQ,cAAc,GAAG,IAAI,MAAM,GAAG,KAAK,CAAC,CAAC;AAAA,IAC1F;AAAA,EACD;AAEA,SAAO;AACR;AAMO,SAAS,sBAAsB,MAAiD;AACtF,QAAM,aAAqC,CAAC;AAE5C,SAAO,OAAO,QAAQ,IAAI,EAAE,IAAI,CAAC,CAAC,MAAM,GAAG,MAAM;AAChD,eAAW,IAAI,IAAI;AAEnB,WAAO;AAAA,MACN;AAAA,MACA,SAAS;AAAA,MACT,aAAa,IAAI;AAAA,MACjB,aACC,IAAI,kBAAkB,aAAE,YACrB,IAAI,SACJ,aAAE;AAAA,QACF,OAAO;AAAA,UACN,OAAO,QAAQ,IAAI,UAAU,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,KAAK,GAAG,MAAM;AACpD,gBAAI,OAAO,QAAQ,SAAU,QAAO,CAAC,KAAK,aAAE,OAAO,EAAE,SAAS,GAAG,CAAC;AAClE,mBAAO,CAAC,KAAK,aAAE,IAAI,CAAC;AAAA,UACrB,CAAC;AAAA,QACF;AAAA,MACD;AAAA,MACH,MAAM,OAAO,UAAmC;AAC/C,mBAAW,IAAI;AACf,YAAI,OAAO,IAAI,aAAa,YAAY;AACvC,iBAAQ,IAAI;AAAA,YACX;AAAA,YACA,WAAW,IAAI;AAAA,UAChB;AAAA,QACD;AACA,eAAO,OAAO,IAAI,aAAa,WAAW,IAAI,WAAW,KAAK,UAAU,IAAI,QAAQ;AAAA,MACrF;AAAA,IACD;AAAA,EACD,CAAC;AACF;AAMA,eAAsB,eACrB,cACA,cACA,eACuC;AACvC,QAAM,aAAa,OAAO,KAAK,aAAa,EAAE,SAAS,IAAI,sBAAsB,aAAa,IAAI,CAAC;AACnG,QAAM,QAAQ,MAAM,aAAa,UAAU;AAE3C,QAAM,SAAS,MAAM,MAAM,IAAI;AAAA,IAC9B,UAAU,QAAQ,KAAK,IAAI,CAAC,IAAI,KAAK,OAAO,EAAE,SAAS,EAAE,EAAE,MAAM,CAAC,CAAC;AAAA,IACnE,UAAU;AAAA,EACX,CAAC;AAED,SAAO,EAAE,UAAU,sBAAsB,cAAc,MAAM,EAAE;AAChE;;;ADvOO,SAAS,MAAM,SAA+B;AACpD,SAAO,EAAE,MAAM,SAAS,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,QAAQ,CAAC,EAAE;AACpE;AAEO,SAAS,GAAG,SAAiB,WAAiC;AACpE,SAAO,EAAE,MAAM,MAAM,SAAS,GAAI,YAAY,EAAE,WAAW,UAAU,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE,EAAE,IAAI,CAAC,EAAG;AACxG;AAEO,SAAS,WAAW,MAAc,QAA6B;AACrE,SAAO,EAAE,MAAM,QAAQ,MAAM,OAAO;AACrC;AA0CO,SAAS,cACf,OACA,YAAiD,CAAC,GACxB;AAC1B,SAAO,OAAO;AAAA,IACb,MAAM,IAAI,CAAC,SAAS;AAAA,MACnB,KAAK;AAAA,MACL;AAAA,QACC,aAAa,KAAK;AAAA,QAClB,QAAQ,KAAK;AAAA,QACb,UAAU,UAAU,KAAK,IAAI,KAAK;AAAA,MACnC;AAAA,IACD,CAAC;AAAA,EACF;AACD;AAEA,SAAS,YAAY,MAA8C;AAClE,SAAO,OAAO,QAAQ,IAAI,EAAE,IAAI,CAAC,CAAC,MAAM,GAAG,OAAO;AAAA,IACjD;AAAA,IACA,aAAa,IAAI;AAAA,IACjB,QAAQ,IAAI,UAAU,CAAC;AAAA,IACvB,UACC,OAAO,IAAI,aAAa,aACpB,IAAI,WACL,OAAO,IAAI,aAAa,WACvB,IAAI,WACJ,KAAK,UAAU,IAAI,QAAQ;AAAA,EACjC,EAAE;AACH;AAGA,SAAS,oBAAoB,OAAiD;AAC7E,SAAO,MAAM,IAAI,CAAC,OAAO;AAAA,IACxB,GAAG;AAAA,IACH,QAAQ,EAAE,kBAAkB,UAAU,WAAW,EAAE,SAAS,gBAAgB,EAAE;AAAA,IAC9E,UAAU,OAAO,EAAE,aAAa,aAAa,eAAe,EAAE;AAAA,EAC/D,EAAE;AACH;AAEA,SAAS,iBAAiB,UAA6B;AACtD,WAAS,IAAI,SAAS,SAAS,GAAG,KAAK,GAAG,KAAK;AAC9C,UAAM,MAAM,SAAS,CAAC;AACtB,QAAI,IAAI,SAAS,SAAS;AACzB,YAAM,YAAY,IAAI,QAAQ,KAAK,CAAC,MAAM,EAAE,SAAS,MAAM;AAC3D,aAAO,YAAY,UAAU,OAAO;AAAA,IACrC;AAAA,EACD;AACA,SAAO;AACR;AAQA,SAAS,mBAAmB,QAA+B;AAC1D,MAAI,OAAO,OAAO,WAAW,WAAY,QAAO,OAAO;AACvD,QAAM,aAAa,cAAc;AACjC,MAAI,CAAC,WAAW,SAAS,OAAO,OAAO,WAAW,UAAU;AAC3D,UAAM,IAAI,MAAM,iFAAiF;AAAA,EAClG;AACA,QAAM,QAAQ,OAAO,OAAO,WAAW,WAAW,OAAO,SAAS,WAAW;AAC7E,SAAO,iBAAiB,WAAW,aAAa,KAAK;AACtD;AAEA,SAAS,oBAAoB,QAAiD;AAC7E,SAAO,OAAO,gBAAgB,cAAc,EAAE;AAC/C;AAEO,SAAS,YAAY,MAAc,QAA2B;AACpE,QAAM,aAAa,OAAO,SAAS,CAAC;AACpC,QAAM,eAAe,OAAO,SAAS,SAAY,oBAAoB,MAAM;AAE3E,EAAG,YAAS,MAAM,MAAM;AACvB,eAAW,MAAM,OAAO,OAAO;AAC9B,YAAM,WAAW,GAAG,QAAQ,iBAAiB,GAAG,QAAQ;AACxD,YAAM,eAAe,GAAG,SAAS;AACjC,YAAM,QAAQ,YAAY,YAAY;AACtC,YAAM,MAAM,EAAE,SAAS,iBAAiB,GAAG,QAAQ,EAAE;AAErD,YAAM,WAAW,GAAG,OAAO,IAAI,CAAC,QAAQ,IAAI,GAAG,CAAC;AAChD,YAAM,aAAa,SAAS,IAAI,CAAC,MAAM,EAAE,SAAS;AAClD,YAAM,mBAAmB,OAAO,OAAO,CAAC,GAAG,GAAG,SAAS,IAAI,CAAC,MAAM,EAAE,gBAAgB,CAAC;AAErF,MAAG;AAAA,QACF;AAAA,QACA;AAAA,UACC,QAAQ;AAAA,YACP,UAAU,GAAG;AAAA,YACb,OAAO,oBAAoB,KAAK;AAAA,UACjC;AAAA,UACA;AAAA,QACD;AAAA,QACA,OAAO,EAAE,kBAAkB,OAAO,MAAM;AACvC,cAAI;AAEJ,cAAI,cAAc;AAEjB,qBAAS,MAAM,eAAe,cAAc,GAAG,UAAU,YAAY;AAAA,UACtE,OAAO;AAEN,kBAAM,SAAS,mBAAmB,MAAM;AACxC,kBAAM,eAAe,cAAc,EAAE;AACrC,kBAAM,eAAe,GAAG,gBAAgB,OAAO,gBAAgB;AAC/D,qBAAS,MAAM,OAAO;AAAA,cACrB,UAAU,GAAG;AAAA,cACb;AAAA,cACA,GAAI,eAAe,EAAE,aAAa,IAAI,CAAC;AAAA,YACxC,CAAC;AAAA,UACF;AAEA,UAAG,cAAW,MAAM;AACpB,qBAAW,aAAa,YAAY;AACnC,kBAAM,UAAU,EAAE,SAAS,QAAQ,kBAAkB,UAAU,CAAC,EAAE,CAAC;AAAA,UACpE;AAAA,QACD;AAAA,MACD;AAAA,IACD;AAAA,EACD,CAAC;AACF;;;AI3LA,IAAAA,MAAoB;AACpB,wBAIO;;;ACLP,IAAAC,mBAAuC;AAQhC,SAAS,wBAAwB,aAAmC,OAAe;AACzF,QAAM,WAAW,IAAI,uBAAuB,WAAW;AACvD,QAAM,QAAQ,SAAS,QAAQ,KAAK;AAEpC,SAAO,OAAO;AAAA,IACb;AAAA,IACA;AAAA,EACD,MAGM;AACL,UAAM,mBAAmB,kBAAkB;AAC3C,QAAI,CAAC,kBAAkB;AACtB,aAAO,EAAE,KAAK,kBAAkB,OAAO,MAAM,SAAS,2CAA2C;AAAA,IAClG;AAGA,UAAM,WAA0B,QAAQ,YAAY,CAAC;AACrD,UAAM,gBAAgB,CAAC,GAAG,QAAQ,EAAE,QAAQ,EAAE,KAAK,CAAC,MAAM,aAAa,0BAAS;AAEhF,QAAI,CAAC,eAAe;AACnB,aAAO,EAAE,KAAK,kBAAkB,OAAO,OAAO,SAAS,oCAAoC;AAAA,IAC5F;AAEA,UAAM,eAAe,OAAO,cAAc,YAAY,WAAW,cAAc,UAAU,KAAK,UAAU,cAAc,OAAO;AAG7H,UAAM,YAAY,MAAM,MAAM,OAAO;AAAA,MACpC;AAAA,QACC,MAAM;AAAA,QACN,SAAS;AAAA,MACV;AAAA,MACA;AAAA,QACC,MAAM;AAAA,QACN,SAAS;AAAA,MACV;AAAA,IACD,CAAC;AAED,UAAM,oBAAoB,OAAO,UAAU,YAAY,WAAW,UAAU,UAAU,IAAI,KAAK,EAAE,YAAY;AAE7G,UAAM,UAAU,qBAAqB,iBAAiB,YAAY;AAElE,WAAO;AAAA,MACN,KAAK;AAAA,MACL,OAAO;AAAA,MACP,SAAS,UACN,uCAAuC,gBAAgB,KACvD,aAAa,gBAAgB,mBAAmB,gBAAgB;AAAA,IACpE;AAAA,EACD;AACD;;;AC1DA,IAAAC,mBAAuC;AAMhC,SAAS,iCAAiC;AAChD,SAAO,OAAO;AAAA,IACb;AAAA,IACA;AAAA,EACD,MAGM;AACL,UAAM,cAAwB,kBAAkB,oBAAoB,CAAC;AACrE,UAAM,iBAA2B,kBAAkB,0BAA0B,CAAC;AAE9E,QAAI,YAAY,WAAW,KAAK,eAAe,WAAW,GAAG;AAC5D,aAAO,EAAE,KAAK,oBAAoB,OAAO,MAAM,SAAS,4CAA4C;AAAA,IACrG;AAGA,UAAM,WAA0B,QAAQ,YAAY,CAAC;AACrD,UAAM,gBAAgB,CAAC,GAAG,QAAQ,EAAE,QAAQ,EAAE,KAAK,CAAC,MAAM,aAAa,0BAAS;AAEhF,QAAI,CAAC,eAAe;AACnB,aAAO,EAAE,KAAK,oBAAoB,OAAO,OAAO,SAAS,oCAAoC;AAAA,IAC9F;AAEA,UAAM,gBAAgB,OAAO,cAAc,YAAY,WAAW,cAAc,UAAU,KAAK,UAAU,cAAc,OAAO,GAAG,YAAY;AAE7I,UAAM,WAAqB,CAAC;AAE5B,eAAW,YAAY,aAAa;AACnC,UAAI,CAAC,aAAa,SAAS,SAAS,YAAY,CAAC,GAAG;AACnD,iBAAS,KAAK,2BAA2B,QAAQ,GAAG;AAAA,MACrD;AAAA,IACD;AAEA,eAAW,aAAa,gBAAgB;AACvC,UAAI,aAAa,SAAS,UAAU,YAAY,CAAC,GAAG;AACnD,iBAAS,KAAK,6BAA6B,SAAS,GAAG;AAAA,MACxD;AAAA,IACD;AAEA,UAAM,SAAS,SAAS,WAAW;AAEnC,WAAO;AAAA,MACN,KAAK;AAAA,MACL,OAAO;AAAA,MACP,SAAS,SAAS,kCAAkC,SAAS,KAAK,IAAI;AAAA,IACvE;AAAA,EACD;AACD;;;ACrDA,IAAAC,mBAAuC;AAMhC,SAAS,6BAA6B;AAC5C,SAAO,OAAO;AAAA,IACb;AAAA,IACA;AAAA,EACD,MAGM;AAEL,QAAI,kBAAkB,iBAAiB,KAAK,kBAAkB,sBAAsB,MAAM;AACzF,aAAO,EAAE,KAAK,iBAAiB,OAAO,MAAM,SAAS,+CAA+C;AAAA,IACrG;AAEA,UAAM,WAA0B,QAAQ,YAAY,CAAC;AAErD,UAAM,YAAY,SAChB,OAAO,CAAC,MAAM,aAAa,0BAAS,EACpC,QAAQ,CAAC,MAAO,EAAgB,cAAc,CAAC,CAAC;AAElD,UAAM,SAAS,UAAU,WAAW;AAEpC,WAAO;AAAA,MACN,KAAK;AAAA,MACL,OAAO;AAAA,MACP,SAAS,SACN,qCACA,cAAc,UAAU,MAAM,kBAAkB,UAAU,IAAI,CAAC,OAAO,GAAG,IAAI,EAAE,KAAK,IAAI,CAAC;AAAA,IAC7F;AAAA,EACD;AACD;;;AHPA,SAAS,oBAAoB,WAAgB,KAA0B;AACtE,SAAO,OAAO,EAAE,SAAS,iBAAiB,MAAM;AAC/C,QAAI,CAAC,kBAAkB,qBAAqB;AAC3C,aAAO,EAAE,KAAK,OAAO,MAAM,SAAS,6CAA6C;AAAA,IAClF;AACA,WAAO,UAAU,EAAE,SAAS,kBAAkB,iBAAiB,oBAAoB,CAAC;AAAA,EACrF;AACD;AAEA,SAAS,gBAAgB,SAAiB,WAAgD;AACzF,QAAM,aAAwC,CAAC;AAC/C,MAAI,QAAQ;AAEZ,aAAW,KAAK,EAAE,MAAM,QAAQ,SAAS,QAAQ,CAAC;AAElD,aAAW,QAAQ,WAAW;AAC7B,UAAM,KAAK,KAAK,EAAE,KAAK;AACvB,eAAW,KAAK;AAAA,MACf,MAAM;AAAA,MACN,SAAS;AAAA,MACT,YAAY,CAAC,EAAE,UAAU,EAAE,MAAM,WAAW,KAAK,GAAG,IAAI,MAAM,WAAW,CAAC;AAAA,IAC3E,CAAC;AACD,eAAW,KAAK,EAAE,MAAM,QAAQ,SAAS,OAAO,cAAc,GAAG,CAAC;AAAA,EACnE;AAEA,aAAW,KAAK,EAAE,MAAM,aAAa,SAAS,MAAM,CAAC;AAErD,SAAO;AACR;AAQO,SAAS,YAAY,OAA8B;AACzD,SAAO,CAAC,SAAS;AAAA,IAChB,WAAc;AAAA,MACb;AAAA,YACC,kDAA+B,EAAE,qBAAqB,YAAY,mBAAmB,SAAS,CAAC;AAAA,QAC/F;AAAA,MACD;AAAA,IACD;AAAA,IACA,kBAAkB,EAAE,qBAAqB,gBAAgB,IAAI,SAAS,KAAK,EAAE;AAAA,EAC9E;AACD;AAOO,SAAS,WAAwB;AACvC,SAAO,MAAM;AACZ,UAAM,SAAS,cAAc;AAC7B,UAAM,QAAQ,OAAO;AACrB,WAAO;AAAA,MACN,WAAc;AAAA,QACb;AAAA,cACC,8CAA2B,EAAE,QAAQ,8CAA4B,MAAM,CAAC;AAAA,UACxE;AAAA,QACD;AAAA,MACD;AAAA,MACA,kBAAkB,CAAC;AAAA,IACpB;AAAA,EACD;AACD;AAGO,SAAS,UAAuB;AACtC,SAAO,OAAO;AAAA,IACb,WAAc,kBAAc,2BAA2B,CAAC;AAAA,IACxD,kBAAkB,EAAE,mBAAmB,KAAK;AAAA,EAC7C;AACD;AAOO,SAAS,mBAAmB,MAA2B;AAC7D,SAAO,MAAM;AACZ,UAAM,SAAS,cAAc;AAC7B,UAAM,QAAQ,OAAO;AACrB,WAAO;AAAA,MACN,WAAc,kBAAc,wBAAwB,OAAO,aAAa,KAAK,CAAC;AAAA,MAC9E,kBAAkB,EAAE,kBAAkB,KAAK;AAAA,IAC5C;AAAA,EACD;AACD;AAGO,SAAS,SAAS,SAAgC;AACxD,SAAO,OAAO;AAAA,IACb,WAAc,kBAAc,+BAA+B,CAAC;AAAA,IAC5D,kBAAkB,EAAE,kBAAkB,QAAQ;AAAA,EAC/C;AACD;AAGO,SAAS,YAAY,SAAgC;AAC3D,SAAO,OAAO;AAAA,IACb,WAAc,kBAAc,+BAA+B,CAAC;AAAA,IAC5D,kBAAkB,EAAE,wBAAwB,QAAQ;AAAA,EACrD;AACD;","names":["ls","import_messages","import_messages","import_messages"]}
@@ -1,6 +1,7 @@
1
1
  import {
2
- LangchainModelResolver
3
- } from "../chunk-LEEZCLZM.mjs";
2
+ LangchainModelResolver,
3
+ convertToLangchainMessages
4
+ } from "../chunk-NHWEEBN2.mjs";
4
5
 
5
6
  // src/eval/config.ts
6
7
  var _config = null;
@@ -19,52 +20,15 @@ import * as ls from "langsmith/vitest";
19
20
 
20
21
  // src/eval/target.ts
21
22
  import { tool } from "@langchain/core/tools";
22
- import { AIMessage, HumanMessage, SystemMessage, ToolMessage } from "@langchain/core/messages";
23
+ import { AIMessage, SystemMessage, ToolMessage } from "@langchain/core/messages";
23
24
  import { z } from "zod";
24
25
  var MAX_AGENT_LOOPS = 10;
25
- function convertMessages(msgs) {
26
- const result = [];
27
- let tcIdx = 0;
28
- let pendingToolCalls = [];
29
- for (const msg of msgs) {
30
- if (msg.role === "human") {
31
- result.push(new HumanMessage(msg.content));
32
- } else if (msg.role === "ai") {
33
- if (msg.toolCalls && msg.toolCalls.length > 0) {
34
- pendingToolCalls = msg.toolCalls.map((name) => ({
35
- id: `hist_tc${++tcIdx}`,
36
- name
37
- }));
38
- result.push(
39
- new AIMessage({
40
- content: msg.content,
41
- tool_calls: pendingToolCalls.map((tc) => ({
42
- id: tc.id,
43
- name: tc.name,
44
- args: {}
45
- }))
46
- })
47
- );
48
- } else {
49
- result.push(new AIMessage(msg.content));
50
- }
51
- } else if (msg.role === "tool") {
52
- const tc = pendingToolCalls.shift();
53
- if (!tc) throw new Error("toolResult() without a preceding ai() with toolCalls");
54
- result.push(
55
- new ToolMessage({
56
- content: msg.content,
57
- tool_call_id: tc.id,
58
- name: tc.name
59
- })
60
- );
61
- }
62
- }
63
- return result;
64
- }
65
26
  function createEvalTarget(modelConfig, modelString) {
66
27
  return async (inputs) => {
67
28
  const config = modelConfig && modelString ? { modelConfig, model: modelString } : getEvalConfig();
29
+ if (!config.model) {
30
+ throw new Error("model is required for model-based target. Add it to your configureEvals() call.");
31
+ }
68
32
  const resolver = new LangchainModelResolver(config.modelConfig);
69
33
  const model = resolver.resolve(config.model);
70
34
  const toolCallCounts = {};
@@ -81,7 +45,7 @@ function createEvalTarget(modelConfig, modelString) {
81
45
  {
82
46
  name: mockTool.name,
83
47
  description: mockTool.description,
84
- schema: z.object(
48
+ schema: mockTool.schema instanceof z.ZodObject ? mockTool.schema : z.object(
85
49
  Object.fromEntries(
86
50
  Object.entries(mockTool.schema).map(([key, val]) => {
87
51
  if (typeof val === "string") return [key, z.string().describe(val)];
@@ -98,8 +62,7 @@ function createEvalTarget(modelConfig, modelString) {
98
62
  if (inputs.systemPrompt) {
99
63
  messages.push(new SystemMessage(inputs.systemPrompt));
100
64
  }
101
- const inputMessages = inputs.messages ?? (inputs.userMessages ?? []).map((content) => ({ role: "human", content }));
102
- messages.push(...convertMessages(inputMessages));
65
+ messages.push(...convertToLangchainMessages(inputs.messages));
103
66
  let loopCount = 0;
104
67
  while (loopCount < MAX_AGENT_LOOPS) {
105
68
  loopCount++;
@@ -134,16 +97,107 @@ function createEvalTarget(modelConfig, modelString) {
134
97
  return { messages };
135
98
  };
136
99
  }
100
+ function agentResultToMessages(inputMessages, result) {
101
+ const messages = convertToLangchainMessages(inputMessages);
102
+ let pendingToolCalls = [];
103
+ for (const block of result.content) {
104
+ if (block.type === "tool_call") {
105
+ const tc = block;
106
+ pendingToolCalls.push({
107
+ id: tc.toolCallId,
108
+ name: tc.name,
109
+ args: tc.input ? JSON.parse(tc.input) : {},
110
+ output: tc.output
111
+ });
112
+ } else if (block.type === "text") {
113
+ if (pendingToolCalls.length > 0) {
114
+ messages.push(
115
+ new AIMessage({
116
+ content: "",
117
+ tool_calls: pendingToolCalls.map((tc) => ({ id: tc.id, name: tc.name, args: tc.args }))
118
+ })
119
+ );
120
+ for (const tc of pendingToolCalls) {
121
+ messages.push(new ToolMessage({ content: tc.output, tool_call_id: tc.id, name: tc.name }));
122
+ }
123
+ pendingToolCalls = [];
124
+ }
125
+ messages.push(new AIMessage(block.output));
126
+ }
127
+ }
128
+ if (pendingToolCalls.length > 0) {
129
+ messages.push(
130
+ new AIMessage({
131
+ content: "",
132
+ tool_calls: pendingToolCalls.map((tc) => ({ id: tc.id, name: tc.name, args: tc.args }))
133
+ })
134
+ );
135
+ for (const tc of pendingToolCalls) {
136
+ messages.push(new ToolMessage({ content: tc.output, tool_call_id: tc.id, name: tc.name }));
137
+ }
138
+ }
139
+ return messages;
140
+ }
141
+ function toolDefsToDefinitions(defs) {
142
+ const callCounts = {};
143
+ return Object.entries(defs).map(([name, def]) => {
144
+ callCounts[name] = 0;
145
+ return {
146
+ name,
147
+ toolKit: "eval-mock",
148
+ description: def.description,
149
+ inputSchema: def.schema instanceof z.ZodObject ? def.schema : z.object(
150
+ Object.fromEntries(
151
+ Object.entries(def.schema ?? {}).map(([key, val]) => {
152
+ if (typeof val === "string") return [key, z.string().describe(val)];
153
+ return [key, z.any()];
154
+ })
155
+ )
156
+ ),
157
+ exec: async (input) => {
158
+ callCounts[name]++;
159
+ if (typeof def.response === "function") {
160
+ return def.response(
161
+ input,
162
+ callCounts[name]
163
+ );
164
+ }
165
+ return typeof def.response === "string" ? def.response : JSON.stringify(def.response);
166
+ }
167
+ };
168
+ });
169
+ }
170
+ async function runAgentTarget(createTarget, evalMessages, extraToolDefs) {
171
+ const extraTools = Object.keys(extraToolDefs).length > 0 ? toolDefsToDefinitions(extraToolDefs) : [];
172
+ const agent = await createTarget(extraTools);
173
+ const result = await agent.run({
174
+ threadId: `eval_${Date.now()}_${Math.random().toString(36).slice(2)}`,
175
+ messages: evalMessages
176
+ });
177
+ return { messages: agentResultToMessages(evalMessages, result) };
178
+ }
137
179
 
138
180
  // src/eval/suite.ts
139
181
  function human(content) {
140
- return { role: "human", content };
182
+ return { role: "human", content: [{ type: "text", text: content }] };
141
183
  }
142
184
  function ai(content, toolCalls) {
143
- return { role: "ai", content, ...toolCalls ? { toolCalls } : {} };
185
+ return { role: "ai", content, ...toolCalls ? { toolCalls: toolCalls.map((name) => ({ name })) } : {} };
186
+ }
187
+ function toolResult(name, output) {
188
+ return { role: "tool", name, output };
144
189
  }
145
- function toolResult(content) {
146
- return { role: "tool", content };
190
+ function fromToolSpecs(specs, responses = {}) {
191
+ return Object.fromEntries(
192
+ specs.map((spec) => [
193
+ spec.name,
194
+ {
195
+ description: spec.description,
196
+ schema: spec.inputSchema,
197
+ response: responses[spec.name] ?? ""
198
+ }
199
+ ])
200
+ );
147
201
  }
148
202
  function toMockTools(defs) {
149
203
  return Object.entries(defs).map(([name, def]) => ({
@@ -156,51 +210,67 @@ function toMockTools(defs) {
156
210
  function toSerializableTools(tools) {
157
211
  return tools.map((t) => ({
158
212
  ...t,
213
+ schema: t.schema instanceof Object && "shape" in t.schema ? "<ZodObject>" : t.schema,
159
214
  response: typeof t.response === "function" ? "<function>" : t.response
160
215
  }));
161
216
  }
162
217
  function lastHumanContent(messages) {
163
218
  for (let i = messages.length - 1; i >= 0; i--) {
164
- if (messages[i].role === "human") return messages[i].content;
219
+ const msg = messages[i];
220
+ if (msg.role === "human") {
221
+ const textBlock = msg.content.find((c) => c.type === "text");
222
+ return textBlock ? textBlock.text : "";
223
+ }
165
224
  }
166
- return messages[0]?.content ?? "";
225
+ return "";
167
226
  }
168
- function resolveTarget(config) {
227
+ function resolveModelTarget(config) {
169
228
  if (typeof config.target === "function") return config.target;
170
229
  const evalConfig = getEvalConfig();
230
+ if (!evalConfig.model && typeof config.target !== "string") {
231
+ throw new Error("model is required for model-based target. Add it to your configureEvals() call.");
232
+ }
171
233
  const model = typeof config.target === "string" ? config.target : evalConfig.model;
172
234
  return createEvalTarget(evalConfig.modelConfig, model);
173
235
  }
236
+ function resolveCreateTarget(config) {
237
+ return config.createTarget ?? getEvalConfig().createTarget;
238
+ }
174
239
  function defineSuite(name, config) {
175
- const target = resolveTarget(config);
176
- const suiteTools = config.tools;
177
- const globalPrompt = getEvalConfig().systemPrompt;
240
+ const suiteTools = config.tools ?? {};
241
+ const createTarget = config.target ? void 0 : resolveCreateTarget(config);
178
242
  ls.describe(name, () => {
179
243
  for (const tc of config.cases) {
180
244
  const testName = tc.name ?? lastHumanContent(tc.messages);
181
- const tools = toMockTools(tc.tools ?? suiteTools);
245
+ const caseToolDefs = tc.tools ?? suiteTools;
246
+ const tools = toMockTools(caseToolDefs);
182
247
  const ctx = { message: lastHumanContent(tc.messages) };
183
248
  const resolved = tc.expect.map((exp) => exp(ctx));
184
249
  const evaluators = resolved.map((r) => r.evaluator);
185
250
  const referenceOutputs = Object.assign({}, ...resolved.map((r) => r.referenceOutputs));
186
- const systemPrompt = tc.systemPrompt ?? config.systemPrompt ?? globalPrompt;
187
- const targetInputs = {
188
- messages: tc.messages,
189
- tools,
190
- ...systemPrompt ? { systemPrompt } : {}
191
- };
192
251
  ls.test(
193
252
  testName,
194
253
  {
195
254
  inputs: {
196
255
  messages: tc.messages,
197
- tools: toSerializableTools(tools),
198
- ...systemPrompt ? { systemPrompt } : {}
256
+ tools: toSerializableTools(tools)
199
257
  },
200
258
  referenceOutputs
201
259
  },
202
260
  async ({ referenceOutputs: refOut }) => {
203
- const output = await target(targetInputs);
261
+ let output;
262
+ if (createTarget) {
263
+ output = await runAgentTarget(createTarget, tc.messages, caseToolDefs);
264
+ } else {
265
+ const target = resolveModelTarget(config);
266
+ const globalPrompt = getEvalConfig().systemPrompt;
267
+ const systemPrompt = tc.systemPrompt ?? config.systemPrompt ?? globalPrompt;
268
+ output = await target({
269
+ messages: tc.messages,
270
+ tools,
271
+ ...systemPrompt ? { systemPrompt } : {}
272
+ });
273
+ }
204
274
  ls.logOutputs(output);
205
275
  for (const evaluator of evaluators) {
206
276
  await evaluator({ outputs: output, referenceOutputs: refOut ?? {} });
@@ -356,7 +426,7 @@ function toolsCalled(tools) {
356
426
  function llmJudge() {
357
427
  return () => {
358
428
  const config = getEvalConfig();
359
- const model = config.evaluatorModel ?? config.model;
429
+ const model = config.evaluatorModel;
360
430
  return {
361
431
  evaluator: ls2.wrapEvaluator(
362
432
  withTrajectoryGuard(
@@ -377,7 +447,7 @@ function noTools() {
377
447
  function respondsInLanguage(code) {
378
448
  return () => {
379
449
  const config = getEvalConfig();
380
- const model = config.evaluatorModel ?? config.model;
450
+ const model = config.evaluatorModel;
381
451
  return {
382
452
  evaluator: ls2.wrapEvaluator(createLanguageEvaluator(config.modelConfig, model)),
383
453
  referenceOutputs: { expectedLanguage: code }
@@ -401,6 +471,7 @@ export {
401
471
  configureEvals,
402
472
  contains,
403
473
  defineSuite,
474
+ fromToolSpecs,
404
475
  human,
405
476
  llmJudge,
406
477
  noTools,