@dvina/agents 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -71,18 +71,21 @@ var LangchainModelResolver = class {
71
71
  tags
72
72
  });
73
73
  }
74
- resolveAzure(configName, deploymentName, tags) {
75
- const providerConfig = this.config.azure?.[configName];
76
- if (!providerConfig) {
77
- throw new Error(`Configuration "${configName}" for provider "azure" is missing`);
74
+ resolveAzure(resourceName, modelName, tags) {
75
+ const resource = this.config.azure?.[resourceName];
76
+ if (!resource) {
77
+ throw new Error(`Resource "${resourceName}" for provider "azure" is missing`);
78
+ }
79
+ const modelEntry = resource.models.find((m) => m.model === modelName);
80
+ if (!modelEntry) {
81
+ throw new Error(`Model "${modelName}" not found in Azure resource "${resourceName}"`);
78
82
  }
79
83
  return new AzureChatOpenAI({
80
- model: providerConfig.model,
81
- // shows (perhaps even uses) 3.5-turbo when not specifid
82
- azureOpenAIApiKey: providerConfig.apiKey,
83
- azureOpenAIApiInstanceName: this.extractInstanceName(providerConfig.endpoint),
84
- azureOpenAIApiDeploymentName: deploymentName,
85
- azureOpenAIApiVersion: providerConfig.apiVersion,
84
+ model: modelEntry.model,
85
+ azureOpenAIApiKey: resource.apiKey,
86
+ azureOpenAIApiInstanceName: this.extractInstanceName(resource.endpoint),
87
+ azureOpenAIApiDeploymentName: modelEntry.deploymentName,
88
+ azureOpenAIApiVersion: modelEntry.apiVersion,
86
89
  tags
87
90
  });
88
91
  }
@@ -96,10 +99,61 @@ var LangchainModelResolver = class {
96
99
  }
97
100
  };
98
101
 
102
+ // src/runtime/langchain/utils.ts
103
+ import { AIMessage, HumanMessage, ToolMessage } from "langchain";
104
+ function convertToLangchainMessages(messages) {
105
+ const result = [];
106
+ let tcIdx = 0;
107
+ let pendingToolCallIds = [];
108
+ for (const msg of messages) {
109
+ if (msg.role === "human") {
110
+ result.push(
111
+ new HumanMessage({
112
+ content: msg.content.map((c) => {
113
+ if (c.type === "image") {
114
+ return { type: "image_url", image_url: { url: c.url } };
115
+ }
116
+ return c;
117
+ })
118
+ })
119
+ );
120
+ } else if (msg.role === "ai") {
121
+ if (msg.toolCalls && msg.toolCalls.length > 0) {
122
+ pendingToolCallIds = msg.toolCalls.map(() => `tc_${++tcIdx}`);
123
+ result.push(
124
+ new AIMessage({
125
+ content: msg.content,
126
+ tool_calls: msg.toolCalls.map((tc, i) => ({
127
+ id: pendingToolCallIds[i],
128
+ name: tc.name,
129
+ args: tc.input ? JSON.parse(tc.input) : {}
130
+ }))
131
+ })
132
+ );
133
+ } else {
134
+ result.push(new AIMessage(msg.content));
135
+ }
136
+ } else if (msg.role === "tool") {
137
+ const toolCallId = pendingToolCallIds.shift();
138
+ if (!toolCallId)
139
+ throw new Error(`ToolMessage for "${msg.name}" without a preceding AiMessage with toolCalls`);
140
+ result.push(
141
+ new ToolMessage({
142
+ content: msg.output,
143
+ tool_call_id: toolCallId,
144
+ name: msg.name
145
+ })
146
+ );
147
+ }
148
+ }
149
+ return result;
150
+ }
151
+
99
152
  export {
100
153
  __require,
101
154
  __commonJS,
102
155
  __toESM,
156
+ convertToLangchainMessages,
103
157
  LangchainModelResolver
104
158
  };
105
- //# sourceMappingURL=chunk-LEEZCLZM.mjs.map
159
+ //# sourceMappingURL=chunk-NHWEEBN2.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/runtime/langchain/model-resolver.ts","../src/runtime/langchain/utils.ts"],"sourcesContent":["import { BaseLanguageModel } from '@langchain/core/language_models/base';\nimport { AzureChatOpenAI, ChatOpenAI } from '@langchain/openai';\n\nexport type LangchainOpenAIConfig = {\n\tapiKey: string;\n};\n\nexport type LangchainAzureResourceConfig = {\n\tapiKey: string;\n\tendpoint: string;\n\tmodels: {\n\t\tmodel: string;\n\t\tapiVersion: string;\n\t\tdeploymentName: string;\n\t}[];\n};\n\nexport type ResourceName = string;\n\nexport type LangchainModelConfig = {\n\topenai?: Record<string, LangchainOpenAIConfig>;\n\tazure?: Record<ResourceName, LangchainAzureResourceConfig>;\n};\n\nexport class LangchainModelResolver {\n\tconstructor(private config: LangchainModelConfig) {}\n\n\tresolve(modelString: string, tags?: string[]): BaseLanguageModel {\n\t\tconst parts = modelString.split(':');\n\n\t\tif (parts.length === 2) {\n\t\t\tconst [provider, modelName] = parts;\n\t\t\treturn this.resolveByProvider(provider, 'default', modelName, tags);\n\t\t}\n\n\t\tif (parts.length === 3) {\n\t\t\tconst [provider, configName, modelName] = parts;\n\t\t\treturn this.resolveByProvider(provider, configName, modelName, tags);\n\t\t}\n\n\t\tthrow new Error(\n\t\t\t'Model string must follow format \"provider:modelName\" (uses \"default\" config) or \"provider:configName:modelName\"',\n\t\t);\n\t}\n\n\tprivate resolveByProvider(\n\t\tprovider: string,\n\t\tconfigName: string,\n\t\tmodelName: string,\n\t\ttags?: string[],\n\t): BaseLanguageModel {\n\t\tswitch (provider) {\n\t\t\tcase 'openai':\n\t\t\t\treturn this.resolveOpenAI(configName, modelName, tags);\n\t\t\tcase 'azure':\n\t\t\t\treturn this.resolveAzure(configName, modelName, tags);\n\t\t\tdefault:\n\t\t\t\tthrow new Error(`Unsupported model provider: ${provider}`);\n\t\t}\n\t}\n\n\tprivate resolveOpenAI(configName: string, modelName: string, tags?: string[]): ChatOpenAI {\n\t\tconst providerConfig = this.config.openai?.[configName];\n\t\tif (!providerConfig) {\n\t\t\tthrow new Error(`Configuration \"${configName}\" for provider \"openai\" is missing`);\n\t\t}\n\n\t\treturn new ChatOpenAI({\n\t\t\tapiKey: providerConfig.apiKey,\n\t\t\tmodelName: modelName,\n\t\t\ttags: tags,\n\t\t});\n\t}\n\n\tprivate resolveAzure(resourceName: string, modelName: string, tags?: string[]): AzureChatOpenAI {\n\t\tconst resource = this.config.azure?.[resourceName];\n\t\tif (!resource) {\n\t\t\tthrow new Error(`Resource \"${resourceName}\" for provider \"azure\" is missing`);\n\t\t}\n\n\t\tconst modelEntry = resource.models.find((m) => m.model === modelName);\n\t\tif (!modelEntry) {\n\t\t\tthrow new Error(`Model \"${modelName}\" not found in Azure resource \"${resourceName}\"`);\n\t\t}\n\n\t\treturn new AzureChatOpenAI({\n\t\t\tmodel: modelEntry.model,\n\t\t\tazureOpenAIApiKey: resource.apiKey,\n\t\t\tazureOpenAIApiInstanceName: this.extractInstanceName(resource.endpoint),\n\t\t\tazureOpenAIApiDeploymentName: modelEntry.deploymentName,\n\t\t\tazureOpenAIApiVersion: modelEntry.apiVersion,\n\t\t\ttags: tags,\n\t\t});\n\t}\n\n\tprivate extractInstanceName(endpoint: string): string {\n\t\ttry {\n\t\t\tconst url = new URL(endpoint);\n\t\t\treturn url.hostname.split('.')[0];\n\t\t} catch (e) {\n\t\t\treturn endpoint;\n\t\t}\n\t}\n}\n","import { Message } from '@core/agent.interface';\nimport { AIMessage, BaseMessage, HumanMessage, ToolMessage } from 'langchain';\n\nexport function convertToLangchainMessages(messages: Message[]): BaseMessage[] {\n\tconst result: BaseMessage[] = [];\n\tlet tcIdx = 0;\n\tlet pendingToolCallIds: string[] = [];\n\n\tfor (const msg of messages) {\n\t\tif (msg.role === 'human') {\n\t\t\tresult.push(\n\t\t\t\tnew HumanMessage({\n\t\t\t\t\tcontent: msg.content.map((c) => {\n\t\t\t\t\t\tif (c.type === 'image') {\n\t\t\t\t\t\t\treturn { type: 'image_url', image_url: { url: c.url } };\n\t\t\t\t\t\t}\n\t\t\t\t\t\treturn c;\n\t\t\t\t\t}) as any,\n\t\t\t\t}),\n\t\t\t);\n\t\t} else if (msg.role === 'ai') {\n\t\t\tif (msg.toolCalls && msg.toolCalls.length > 0) {\n\t\t\t\tpendingToolCallIds = msg.toolCalls.map(() => `tc_${++tcIdx}`);\n\t\t\t\tresult.push(\n\t\t\t\t\tnew AIMessage({\n\t\t\t\t\t\tcontent: msg.content,\n\t\t\t\t\t\ttool_calls: msg.toolCalls.map((tc, i) => ({\n\t\t\t\t\t\t\tid: pendingToolCallIds[i],\n\t\t\t\t\t\t\tname: tc.name,\n\t\t\t\t\t\t\targs: tc.input ? JSON.parse(tc.input) : {},\n\t\t\t\t\t\t})),\n\t\t\t\t\t}),\n\t\t\t\t);\n\t\t\t} else {\n\t\t\t\tresult.push(new AIMessage(msg.content));\n\t\t\t}\n\t\t} else if (msg.role === 'tool') {\n\t\t\tconst toolCallId = pendingToolCallIds.shift();\n\t\t\tif (!toolCallId)\n\t\t\t\tthrow new Error(`ToolMessage for \"${msg.name}\" without a preceding AiMessage with toolCalls`);\n\t\t\tresult.push(\n\t\t\t\tnew ToolMessage({\n\t\t\t\t\tcontent: msg.output,\n\t\t\t\t\ttool_call_id: toolCallId,\n\t\t\t\t\tname: msg.name,\n\t\t\t\t}),\n\t\t\t);\n\t\t}\n\t}\n\n\treturn result;\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AACA,SAAS,iBAAiB,kBAAkB;AAuBrC,IAAM,yBAAN,MAA6B;AAAA,EACnC,YAAoB,QAA8B;AAA9B;AAAA,EAA+B;AAAA,EAEnD,QAAQ,aAAqB,MAAoC;AAChE,UAAM,QAAQ,YAAY,MAAM,GAAG;AAEnC,QAAI,MAAM,WAAW,GAAG;AACvB,YAAM,CAAC,UAAU,SAAS,IAAI;AAC9B,aAAO,KAAK,kBAAkB,UAAU,WAAW,WAAW,IAAI;AAAA,IACnE;AAEA,QAAI,MAAM,WAAW,GAAG;AACvB,YAAM,CAAC,UAAU,YAAY,SAAS,IAAI;AAC1C,aAAO,KAAK,kBAAkB,UAAU,YAAY,WAAW,IAAI;AAAA,IACpE;AAEA,UAAM,IAAI;AAAA,MACT;AAAA,IACD;AAAA,EACD;AAAA,EAEQ,kBACP,UACA,YACA,WACA,MACoB;AACpB,YAAQ,UAAU;AAAA,MACjB,KAAK;AACJ,eAAO,KAAK,cAAc,YAAY,WAAW,IAAI;AAAA,MACtD,KAAK;AACJ,eAAO,KAAK,aAAa,YAAY,WAAW,IAAI;AAAA,MACrD;AACC,cAAM,IAAI,MAAM,+BAA+B,QAAQ,EAAE;AAAA,IAC3D;AAAA,EACD;AAAA,EAEQ,cAAc,YAAoB,WAAmB,MAA6B;AACzF,UAAM,iBAAiB,KAAK,OAAO,SAAS,UAAU;AACtD,QAAI,CAAC,gBAAgB;AACpB,YAAM,IAAI,MAAM,kBAAkB,UAAU,oCAAoC;AAAA,IACjF;AAEA,WAAO,IAAI,WAAW;AAAA,MACrB,QAAQ,eAAe;AAAA,MACvB;AAAA,MACA;AAAA,IACD,CAAC;AAAA,EACF;AAAA,EAEQ,aAAa,cAAsB,WAAmB,MAAkC;AAC/F,UAAM,WAAW,KAAK,OAAO,QAAQ,YAAY;AACjD,QAAI,CAAC,UAAU;AACd,YAAM,IAAI,MAAM,aAAa,YAAY,mCAAmC;AAAA,IAC7E;AAEA,UAAM,aAAa,SAAS,OAAO,KAAK,CAAC,MAAM,EAAE,UAAU,SAAS;AACpE,QAAI,CAAC,YAAY;AAChB,YAAM,IAAI,MAAM,UAAU,SAAS,kCAAkC,YAAY,GAAG;AAAA,IACrF;AAEA,WAAO,IAAI,gBAAgB;AAAA,MAC1B,OAAO,WAAW;AAAA,MAClB,mBAAmB,SAAS;AAAA,MAC5B,4BAA4B,KAAK,oBAAoB,SAAS,QAAQ;AAAA,MACtE,8BAA8B,WAAW;AAAA,MACzC,uBAAuB,WAAW;AAAA,MAClC;AAAA,IACD,CAAC;AAAA,EACF;AAAA,EAEQ,oBAAoB,UAA0B;AACrD,QAAI;AACH,YAAM,MAAM,IAAI,IAAI,QAAQ;AAC5B,aAAO,IAAI,SAAS,MAAM,GAAG,EAAE,CAAC;AAAA,IACjC,SAAS,GAAG;AACX,aAAO;AAAA,IACR;AAAA,EACD;AACD;;;ACtGA,SAAS,WAAwB,cAAc,mBAAmB;AAE3D,SAAS,2BAA2B,UAAoC;AAC9E,QAAM,SAAwB,CAAC;AAC/B,MAAI,QAAQ;AACZ,MAAI,qBAA+B,CAAC;AAEpC,aAAW,OAAO,UAAU;AAC3B,QAAI,IAAI,SAAS,SAAS;AACzB,aAAO;AAAA,QACN,IAAI,aAAa;AAAA,UAChB,SAAS,IAAI,QAAQ,IAAI,CAAC,MAAM;AAC/B,gBAAI,EAAE,SAAS,SAAS;AACvB,qBAAO,EAAE,MAAM,aAAa,WAAW,EAAE,KAAK,EAAE,IAAI,EAAE;AAAA,YACvD;AACA,mBAAO;AAAA,UACR,CAAC;AAAA,QACF,CAAC;AAAA,MACF;AAAA,IACD,WAAW,IAAI,SAAS,MAAM;AAC7B,UAAI,IAAI,aAAa,IAAI,UAAU,SAAS,GAAG;AAC9C,6BAAqB,IAAI,UAAU,IAAI,MAAM,MAAM,EAAE,KAAK,EAAE;AAC5D,eAAO;AAAA,UACN,IAAI,UAAU;AAAA,YACb,SAAS,IAAI;AAAA,YACb,YAAY,IAAI,UAAU,IAAI,CAAC,IAAI,OAAO;AAAA,cACzC,IAAI,mBAAmB,CAAC;AAAA,cACxB,MAAM,GAAG;AAAA,cACT,MAAM,GAAG,QAAQ,KAAK,MAAM,GAAG,KAAK,IAAI,CAAC;AAAA,YAC1C,EAAE;AAAA,UACH,CAAC;AAAA,QACF;AAAA,MACD,OAAO;AACN,eAAO,KAAK,IAAI,UAAU,IAAI,OAAO,CAAC;AAAA,MACvC;AAAA,IACD,WAAW,IAAI,SAAS,QAAQ;AAC/B,YAAM,aAAa,mBAAmB,MAAM;AAC5C,UAAI,CAAC;AACJ,cAAM,IAAI,MAAM,oBAAoB,IAAI,IAAI,gDAAgD;AAC7F,aAAO;AAAA,QACN,IAAI,YAAY;AAAA,UACf,SAAS,IAAI;AAAA,UACb,cAAc;AAAA,UACd,MAAM,IAAI;AAAA,QACX,CAAC;AAAA,MACF;AAAA,IACD;AAAA,EACD;AAEA,SAAO;AACR;","names":[]}
@@ -1,20 +1,28 @@
1
- import { L as LangchainModelConfig } from '../model-resolver-BRAaBV9n.mjs';
1
+ import { T as ToolDefinition, A as Agent, L as LangchainModelConfig, M as Message, d as AiMessage, a as ToolSpec, H as HumanMessage, f as ToolMessage } from '../model-resolver-U0J9x1a6.mjs';
2
+ import * as zod from 'zod';
3
+ import { z } from 'zod';
2
4
  import { BaseMessage } from '@langchain/core/messages';
3
5
 
6
+ /** Factory that creates a fresh Agent per test case. Receives extra suite-level tools as ToolDefinition[]. */
7
+ type CreateTargetFn = (extraTools: ToolDefinition[]) => Agent | Promise<Agent>;
4
8
  interface EvalConfig {
9
+ /** Required for model-based target and LLM evaluators (respondsInLanguage, llmJudge). */
5
10
  modelConfig: LangchainModelConfig;
6
- model: string;
7
- /** Model for evaluators needing LLM calls (language detection, LLM-as-judge). Defaults to `model`. */
8
- evaluatorModel?: string;
9
- /** System prompt prepended to every eval invocation. Can be overridden per-suite or per-case. */
11
+ /** Required for model-based target. Also used as fallback for evaluatorModel. */
12
+ model?: string;
13
+ /** Model for evaluators needing LLM calls (language detection, LLM-as-judge). */
14
+ evaluatorModel: string;
15
+ /** System prompt for model-based target. Ignored when createTarget is used. Can be overridden per-suite or per-case. */
10
16
  systemPrompt?: string;
17
+ /** Factory that creates a fresh Agent per test case. When set, this is the default target. */
18
+ createTarget?: CreateTargetFn;
11
19
  }
12
20
  declare function configureEvals(config: EvalConfig): void;
13
21
 
14
22
  interface MockToolDef {
15
23
  name: string;
16
24
  description: string;
17
- schema: Record<string, unknown>;
25
+ schema: z.ZodObject<any> | Record<string, unknown>;
18
26
  /**
19
27
  * Canned response the mock tool returns.
20
28
  * Can be a static string, or a function that receives input and returns a response.
@@ -60,23 +68,13 @@ declare function contains(strings: string[]): Expectation;
60
68
  /** Assert the response does not contain any of the given strings. */
61
69
  declare function notContains(strings: string[]): Expectation;
62
70
 
63
- declare function human(content: string): {
64
- role: "human";
65
- content: string;
66
- };
67
- declare function ai(content: string, toolCalls?: string[]): {
68
- toolCalls?: string[] | undefined;
69
- role: "ai";
70
- content: string;
71
- };
72
- declare function toolResult(content: string): {
73
- role: "tool";
74
- content: string;
75
- };
76
- type Message = ReturnType<typeof human> | ReturnType<typeof ai> | ReturnType<typeof toolResult>;
71
+ declare function human(content: string): HumanMessage;
72
+ declare function ai(content: string, toolCalls?: string[]): AiMessage;
73
+ declare function toolResult(name: string, output: string): ToolMessage;
77
74
  interface ToolDef {
78
75
  description: string;
79
- schema?: Record<string, string>;
76
+ /** A plain key→description record, or a ZodObject passed through from a ToolSpec. */
77
+ schema?: Record<string, string> | zod.ZodObject<any>;
80
78
  /** Auto-stringified if not a string or function. */
81
79
  response: unknown | ((input: Record<string, unknown>, callCount: number) => string);
82
80
  }
@@ -99,11 +97,21 @@ type TargetFn = (inputs: {
99
97
  interface SuiteConfig {
100
98
  /** Custom target function, or model string override. Auto-created from global config if omitted. */
101
99
  target?: TargetFn | string;
100
+ /** Factory that creates a fresh Agent per test case. Overrides global createTarget. */
101
+ createTarget?: CreateTargetFn;
102
102
  /** System prompt for all cases in this suite. Overrides the global prompt; can be overridden per-case. */
103
103
  systemPrompt?: string;
104
- tools: Record<string, ToolDef>;
104
+ tools?: Record<string, ToolDef>;
105
105
  cases: TestCase[];
106
106
  }
107
+ /**
108
+ * Converts a `ToolSpec[]` (from a real tool provider) into the
109
+ * `Record<string, ToolDef>` that `defineSuite` expects.
110
+ *
111
+ * `responses` maps tool names to canned mock responses. Tools without an
112
+ * entry in `responses` default to `''`.
113
+ */
114
+ declare function fromToolSpecs(specs: ToolSpec[], responses?: Record<string, ToolDef['response']>): Record<string, ToolDef>;
107
115
  declare function defineSuite(name: string, config: SuiteConfig): void;
108
116
 
109
- export { type EvalConfig, type Expectation, type SuiteConfig, type TestCase, type ToolDef, ai, configureEvals, contains, defineSuite, human, llmJudge, noTools, notContains, respondsInLanguage, toolResult, toolsCalled };
117
+ export { type CreateTargetFn, type EvalConfig, type Expectation, type SuiteConfig, type TestCase, type ToolDef, ai, configureEvals, contains, defineSuite, fromToolSpecs, human, llmJudge, noTools, notContains, respondsInLanguage, toolResult, toolsCalled };
@@ -1,20 +1,28 @@
1
- import { L as LangchainModelConfig } from '../model-resolver-BRAaBV9n.js';
1
+ import { T as ToolDefinition, A as Agent, L as LangchainModelConfig, M as Message, d as AiMessage, a as ToolSpec, H as HumanMessage, f as ToolMessage } from '../model-resolver-U0J9x1a6.js';
2
+ import * as zod from 'zod';
3
+ import { z } from 'zod';
2
4
  import { BaseMessage } from '@langchain/core/messages';
3
5
 
6
+ /** Factory that creates a fresh Agent per test case. Receives extra suite-level tools as ToolDefinition[]. */
7
+ type CreateTargetFn = (extraTools: ToolDefinition[]) => Agent | Promise<Agent>;
4
8
  interface EvalConfig {
9
+ /** Required for model-based target and LLM evaluators (respondsInLanguage, llmJudge). */
5
10
  modelConfig: LangchainModelConfig;
6
- model: string;
7
- /** Model for evaluators needing LLM calls (language detection, LLM-as-judge). Defaults to `model`. */
8
- evaluatorModel?: string;
9
- /** System prompt prepended to every eval invocation. Can be overridden per-suite or per-case. */
11
+ /** Required for model-based target. Also used as fallback for evaluatorModel. */
12
+ model?: string;
13
+ /** Model for evaluators needing LLM calls (language detection, LLM-as-judge). */
14
+ evaluatorModel: string;
15
+ /** System prompt for model-based target. Ignored when createTarget is used. Can be overridden per-suite or per-case. */
10
16
  systemPrompt?: string;
17
+ /** Factory that creates a fresh Agent per test case. When set, this is the default target. */
18
+ createTarget?: CreateTargetFn;
11
19
  }
12
20
  declare function configureEvals(config: EvalConfig): void;
13
21
 
14
22
  interface MockToolDef {
15
23
  name: string;
16
24
  description: string;
17
- schema: Record<string, unknown>;
25
+ schema: z.ZodObject<any> | Record<string, unknown>;
18
26
  /**
19
27
  * Canned response the mock tool returns.
20
28
  * Can be a static string, or a function that receives input and returns a response.
@@ -60,23 +68,13 @@ declare function contains(strings: string[]): Expectation;
60
68
  /** Assert the response does not contain any of the given strings. */
61
69
  declare function notContains(strings: string[]): Expectation;
62
70
 
63
- declare function human(content: string): {
64
- role: "human";
65
- content: string;
66
- };
67
- declare function ai(content: string, toolCalls?: string[]): {
68
- toolCalls?: string[] | undefined;
69
- role: "ai";
70
- content: string;
71
- };
72
- declare function toolResult(content: string): {
73
- role: "tool";
74
- content: string;
75
- };
76
- type Message = ReturnType<typeof human> | ReturnType<typeof ai> | ReturnType<typeof toolResult>;
71
+ declare function human(content: string): HumanMessage;
72
+ declare function ai(content: string, toolCalls?: string[]): AiMessage;
73
+ declare function toolResult(name: string, output: string): ToolMessage;
77
74
  interface ToolDef {
78
75
  description: string;
79
- schema?: Record<string, string>;
76
+ /** A plain key→description record, or a ZodObject passed through from a ToolSpec. */
77
+ schema?: Record<string, string> | zod.ZodObject<any>;
80
78
  /** Auto-stringified if not a string or function. */
81
79
  response: unknown | ((input: Record<string, unknown>, callCount: number) => string);
82
80
  }
@@ -99,11 +97,21 @@ type TargetFn = (inputs: {
99
97
  interface SuiteConfig {
100
98
  /** Custom target function, or model string override. Auto-created from global config if omitted. */
101
99
  target?: TargetFn | string;
100
+ /** Factory that creates a fresh Agent per test case. Overrides global createTarget. */
101
+ createTarget?: CreateTargetFn;
102
102
  /** System prompt for all cases in this suite. Overrides the global prompt; can be overridden per-case. */
103
103
  systemPrompt?: string;
104
- tools: Record<string, ToolDef>;
104
+ tools?: Record<string, ToolDef>;
105
105
  cases: TestCase[];
106
106
  }
107
+ /**
108
+ * Converts a `ToolSpec[]` (from a real tool provider) into the
109
+ * `Record<string, ToolDef>` that `defineSuite` expects.
110
+ *
111
+ * `responses` maps tool names to canned mock responses. Tools without an
112
+ * entry in `responses` default to `''`.
113
+ */
114
+ declare function fromToolSpecs(specs: ToolSpec[], responses?: Record<string, ToolDef['response']>): Record<string, ToolDef>;
107
115
  declare function defineSuite(name: string, config: SuiteConfig): void;
108
116
 
109
- export { type EvalConfig, type Expectation, type SuiteConfig, type TestCase, type ToolDef, ai, configureEvals, contains, defineSuite, human, llmJudge, noTools, notContains, respondsInLanguage, toolResult, toolsCalled };
117
+ export { type CreateTargetFn, type EvalConfig, type Expectation, type SuiteConfig, type TestCase, type ToolDef, ai, configureEvals, contains, defineSuite, fromToolSpecs, human, llmJudge, noTools, notContains, respondsInLanguage, toolResult, toolsCalled };
@@ -34,6 +34,7 @@ __export(eval_exports, {
34
34
  configureEvals: () => configureEvals,
35
35
  contains: () => contains,
36
36
  defineSuite: () => defineSuite,
37
+ fromToolSpecs: () => fromToolSpecs,
37
38
  human: () => human,
38
39
  llmJudge: () => llmJudge,
39
40
  noTools: () => noTools,
@@ -105,18 +106,21 @@ var LangchainModelResolver = class {
105
106
  tags
106
107
  });
107
108
  }
108
- resolveAzure(configName, deploymentName, tags) {
109
- const providerConfig = this.config.azure?.[configName];
110
- if (!providerConfig) {
111
- throw new Error(`Configuration "${configName}" for provider "azure" is missing`);
109
+ resolveAzure(resourceName, modelName, tags) {
110
+ const resource = this.config.azure?.[resourceName];
111
+ if (!resource) {
112
+ throw new Error(`Resource "${resourceName}" for provider "azure" is missing`);
113
+ }
114
+ const modelEntry = resource.models.find((m) => m.model === modelName);
115
+ if (!modelEntry) {
116
+ throw new Error(`Model "${modelName}" not found in Azure resource "${resourceName}"`);
112
117
  }
113
118
  return new import_openai.AzureChatOpenAI({
114
- model: providerConfig.model,
115
- // shows (perhaps even uses) 3.5-turbo when not specifid
116
- azureOpenAIApiKey: providerConfig.apiKey,
117
- azureOpenAIApiInstanceName: this.extractInstanceName(providerConfig.endpoint),
118
- azureOpenAIApiDeploymentName: deploymentName,
119
- azureOpenAIApiVersion: providerConfig.apiVersion,
119
+ model: modelEntry.model,
120
+ azureOpenAIApiKey: resource.apiKey,
121
+ azureOpenAIApiInstanceName: this.extractInstanceName(resource.endpoint),
122
+ azureOpenAIApiDeploymentName: modelEntry.deploymentName,
123
+ azureOpenAIApiVersion: modelEntry.apiVersion,
120
124
  tags
121
125
  });
122
126
  }
@@ -130,51 +134,64 @@ var LangchainModelResolver = class {
130
134
  }
131
135
  };
132
136
 
133
- // src/eval/target.ts
134
- var MAX_AGENT_LOOPS = 10;
135
- function convertMessages(msgs) {
137
+ // src/runtime/langchain/utils.ts
138
+ var import_langchain = require("langchain");
139
+ function convertToLangchainMessages(messages) {
136
140
  const result = [];
137
141
  let tcIdx = 0;
138
- let pendingToolCalls = [];
139
- for (const msg of msgs) {
142
+ let pendingToolCallIds = [];
143
+ for (const msg of messages) {
140
144
  if (msg.role === "human") {
141
- result.push(new import_messages.HumanMessage(msg.content));
145
+ result.push(
146
+ new import_langchain.HumanMessage({
147
+ content: msg.content.map((c) => {
148
+ if (c.type === "image") {
149
+ return { type: "image_url", image_url: { url: c.url } };
150
+ }
151
+ return c;
152
+ })
153
+ })
154
+ );
142
155
  } else if (msg.role === "ai") {
143
156
  if (msg.toolCalls && msg.toolCalls.length > 0) {
144
- pendingToolCalls = msg.toolCalls.map((name) => ({
145
- id: `hist_tc${++tcIdx}`,
146
- name
147
- }));
157
+ pendingToolCallIds = msg.toolCalls.map(() => `tc_${++tcIdx}`);
148
158
  result.push(
149
- new import_messages.AIMessage({
159
+ new import_langchain.AIMessage({
150
160
  content: msg.content,
151
- tool_calls: pendingToolCalls.map((tc) => ({
152
- id: tc.id,
161
+ tool_calls: msg.toolCalls.map((tc, i) => ({
162
+ id: pendingToolCallIds[i],
153
163
  name: tc.name,
154
- args: {}
164
+ args: tc.input ? JSON.parse(tc.input) : {}
155
165
  }))
156
166
  })
157
167
  );
158
168
  } else {
159
- result.push(new import_messages.AIMessage(msg.content));
169
+ result.push(new import_langchain.AIMessage(msg.content));
160
170
  }
161
171
  } else if (msg.role === "tool") {
162
- const tc = pendingToolCalls.shift();
163
- if (!tc) throw new Error("toolResult() without a preceding ai() with toolCalls");
172
+ const toolCallId = pendingToolCallIds.shift();
173
+ if (!toolCallId)
174
+ throw new Error(`ToolMessage for "${msg.name}" without a preceding AiMessage with toolCalls`);
164
175
  result.push(
165
- new import_messages.ToolMessage({
166
- content: msg.content,
167
- tool_call_id: tc.id,
168
- name: tc.name
176
+ new import_langchain.ToolMessage({
177
+ content: msg.output,
178
+ tool_call_id: toolCallId,
179
+ name: msg.name
169
180
  })
170
181
  );
171
182
  }
172
183
  }
173
184
  return result;
174
185
  }
186
+
187
+ // src/eval/target.ts
188
+ var MAX_AGENT_LOOPS = 10;
175
189
  function createEvalTarget(modelConfig, modelString) {
176
190
  return async (inputs) => {
177
191
  const config = modelConfig && modelString ? { modelConfig, model: modelString } : getEvalConfig();
192
+ if (!config.model) {
193
+ throw new Error("model is required for model-based target. Add it to your configureEvals() call.");
194
+ }
178
195
  const resolver = new LangchainModelResolver(config.modelConfig);
179
196
  const model = resolver.resolve(config.model);
180
197
  const toolCallCounts = {};
@@ -191,7 +208,7 @@ function createEvalTarget(modelConfig, modelString) {
191
208
  {
192
209
  name: mockTool.name,
193
210
  description: mockTool.description,
194
- schema: import_zod.z.object(
211
+ schema: mockTool.schema instanceof import_zod.z.ZodObject ? mockTool.schema : import_zod.z.object(
195
212
  Object.fromEntries(
196
213
  Object.entries(mockTool.schema).map(([key, val]) => {
197
214
  if (typeof val === "string") return [key, import_zod.z.string().describe(val)];
@@ -208,8 +225,7 @@ function createEvalTarget(modelConfig, modelString) {
208
225
  if (inputs.systemPrompt) {
209
226
  messages.push(new import_messages.SystemMessage(inputs.systemPrompt));
210
227
  }
211
- const inputMessages = inputs.messages ?? (inputs.userMessages ?? []).map((content) => ({ role: "human", content }));
212
- messages.push(...convertMessages(inputMessages));
228
+ messages.push(...convertToLangchainMessages(inputs.messages));
213
229
  let loopCount = 0;
214
230
  while (loopCount < MAX_AGENT_LOOPS) {
215
231
  loopCount++;
@@ -244,16 +260,107 @@ function createEvalTarget(modelConfig, modelString) {
244
260
  return { messages };
245
261
  };
246
262
  }
263
+ function agentResultToMessages(inputMessages, result) {
264
+ const messages = convertToLangchainMessages(inputMessages);
265
+ let pendingToolCalls = [];
266
+ for (const block of result.content) {
267
+ if (block.type === "tool_call") {
268
+ const tc = block;
269
+ pendingToolCalls.push({
270
+ id: tc.toolCallId,
271
+ name: tc.name,
272
+ args: tc.input ? JSON.parse(tc.input) : {},
273
+ output: tc.output
274
+ });
275
+ } else if (block.type === "text") {
276
+ if (pendingToolCalls.length > 0) {
277
+ messages.push(
278
+ new import_messages.AIMessage({
279
+ content: "",
280
+ tool_calls: pendingToolCalls.map((tc) => ({ id: tc.id, name: tc.name, args: tc.args }))
281
+ })
282
+ );
283
+ for (const tc of pendingToolCalls) {
284
+ messages.push(new import_messages.ToolMessage({ content: tc.output, tool_call_id: tc.id, name: tc.name }));
285
+ }
286
+ pendingToolCalls = [];
287
+ }
288
+ messages.push(new import_messages.AIMessage(block.output));
289
+ }
290
+ }
291
+ if (pendingToolCalls.length > 0) {
292
+ messages.push(
293
+ new import_messages.AIMessage({
294
+ content: "",
295
+ tool_calls: pendingToolCalls.map((tc) => ({ id: tc.id, name: tc.name, args: tc.args }))
296
+ })
297
+ );
298
+ for (const tc of pendingToolCalls) {
299
+ messages.push(new import_messages.ToolMessage({ content: tc.output, tool_call_id: tc.id, name: tc.name }));
300
+ }
301
+ }
302
+ return messages;
303
+ }
304
+ function toolDefsToDefinitions(defs) {
305
+ const callCounts = {};
306
+ return Object.entries(defs).map(([name, def]) => {
307
+ callCounts[name] = 0;
308
+ return {
309
+ name,
310
+ toolKit: "eval-mock",
311
+ description: def.description,
312
+ inputSchema: def.schema instanceof import_zod.z.ZodObject ? def.schema : import_zod.z.object(
313
+ Object.fromEntries(
314
+ Object.entries(def.schema ?? {}).map(([key, val]) => {
315
+ if (typeof val === "string") return [key, import_zod.z.string().describe(val)];
316
+ return [key, import_zod.z.any()];
317
+ })
318
+ )
319
+ ),
320
+ exec: async (input) => {
321
+ callCounts[name]++;
322
+ if (typeof def.response === "function") {
323
+ return def.response(
324
+ input,
325
+ callCounts[name]
326
+ );
327
+ }
328
+ return typeof def.response === "string" ? def.response : JSON.stringify(def.response);
329
+ }
330
+ };
331
+ });
332
+ }
333
+ async function runAgentTarget(createTarget, evalMessages, extraToolDefs) {
334
+ const extraTools = Object.keys(extraToolDefs).length > 0 ? toolDefsToDefinitions(extraToolDefs) : [];
335
+ const agent = await createTarget(extraTools);
336
+ const result = await agent.run({
337
+ threadId: `eval_${Date.now()}_${Math.random().toString(36).slice(2)}`,
338
+ messages: evalMessages
339
+ });
340
+ return { messages: agentResultToMessages(evalMessages, result) };
341
+ }
247
342
 
248
343
  // src/eval/suite.ts
249
344
  function human(content) {
250
- return { role: "human", content };
345
+ return { role: "human", content: [{ type: "text", text: content }] };
251
346
  }
252
347
  function ai(content, toolCalls) {
253
- return { role: "ai", content, ...toolCalls ? { toolCalls } : {} };
348
+ return { role: "ai", content, ...toolCalls ? { toolCalls: toolCalls.map((name) => ({ name })) } : {} };
254
349
  }
255
- function toolResult(content) {
256
- return { role: "tool", content };
350
+ function toolResult(name, output) {
351
+ return { role: "tool", name, output };
352
+ }
353
+ function fromToolSpecs(specs, responses = {}) {
354
+ return Object.fromEntries(
355
+ specs.map((spec) => [
356
+ spec.name,
357
+ {
358
+ description: spec.description,
359
+ schema: spec.inputSchema,
360
+ response: responses[spec.name] ?? ""
361
+ }
362
+ ])
363
+ );
257
364
  }
258
365
  function toMockTools(defs) {
259
366
  return Object.entries(defs).map(([name, def]) => ({
@@ -266,51 +373,67 @@ function toMockTools(defs) {
266
373
  function toSerializableTools(tools) {
267
374
  return tools.map((t) => ({
268
375
  ...t,
376
+ schema: t.schema instanceof Object && "shape" in t.schema ? "<ZodObject>" : t.schema,
269
377
  response: typeof t.response === "function" ? "<function>" : t.response
270
378
  }));
271
379
  }
272
380
  function lastHumanContent(messages) {
273
381
  for (let i = messages.length - 1; i >= 0; i--) {
274
- if (messages[i].role === "human") return messages[i].content;
382
+ const msg = messages[i];
383
+ if (msg.role === "human") {
384
+ const textBlock = msg.content.find((c) => c.type === "text");
385
+ return textBlock ? textBlock.text : "";
386
+ }
275
387
  }
276
- return messages[0]?.content ?? "";
388
+ return "";
277
389
  }
278
- function resolveTarget(config) {
390
+ function resolveModelTarget(config) {
279
391
  if (typeof config.target === "function") return config.target;
280
392
  const evalConfig = getEvalConfig();
393
+ if (!evalConfig.model && typeof config.target !== "string") {
394
+ throw new Error("model is required for model-based target. Add it to your configureEvals() call.");
395
+ }
281
396
  const model = typeof config.target === "string" ? config.target : evalConfig.model;
282
397
  return createEvalTarget(evalConfig.modelConfig, model);
283
398
  }
399
+ function resolveCreateTarget(config) {
400
+ return config.createTarget ?? getEvalConfig().createTarget;
401
+ }
284
402
  function defineSuite(name, config) {
285
- const target = resolveTarget(config);
286
- const suiteTools = config.tools;
287
- const globalPrompt = getEvalConfig().systemPrompt;
403
+ const suiteTools = config.tools ?? {};
404
+ const createTarget = config.target ? void 0 : resolveCreateTarget(config);
288
405
  ls.describe(name, () => {
289
406
  for (const tc of config.cases) {
290
407
  const testName = tc.name ?? lastHumanContent(tc.messages);
291
- const tools = toMockTools(tc.tools ?? suiteTools);
408
+ const caseToolDefs = tc.tools ?? suiteTools;
409
+ const tools = toMockTools(caseToolDefs);
292
410
  const ctx = { message: lastHumanContent(tc.messages) };
293
411
  const resolved = tc.expect.map((exp) => exp(ctx));
294
412
  const evaluators = resolved.map((r) => r.evaluator);
295
413
  const referenceOutputs = Object.assign({}, ...resolved.map((r) => r.referenceOutputs));
296
- const systemPrompt = tc.systemPrompt ?? config.systemPrompt ?? globalPrompt;
297
- const targetInputs = {
298
- messages: tc.messages,
299
- tools,
300
- ...systemPrompt ? { systemPrompt } : {}
301
- };
302
414
  ls.test(
303
415
  testName,
304
416
  {
305
417
  inputs: {
306
418
  messages: tc.messages,
307
- tools: toSerializableTools(tools),
308
- ...systemPrompt ? { systemPrompt } : {}
419
+ tools: toSerializableTools(tools)
309
420
  },
310
421
  referenceOutputs
311
422
  },
312
423
  async ({ referenceOutputs: refOut }) => {
313
- const output = await target(targetInputs);
424
+ let output;
425
+ if (createTarget) {
426
+ output = await runAgentTarget(createTarget, tc.messages, caseToolDefs);
427
+ } else {
428
+ const target = resolveModelTarget(config);
429
+ const globalPrompt = getEvalConfig().systemPrompt;
430
+ const systemPrompt = tc.systemPrompt ?? config.systemPrompt ?? globalPrompt;
431
+ output = await target({
432
+ messages: tc.messages,
433
+ tools,
434
+ ...systemPrompt ? { systemPrompt } : {}
435
+ });
436
+ }
314
437
  ls.logOutputs(output);
315
438
  for (const evaluator of evaluators) {
316
439
  await evaluator({ outputs: output, referenceOutputs: refOut ?? {} });
@@ -462,7 +585,7 @@ function toolsCalled(tools) {
462
585
  function llmJudge() {
463
586
  return () => {
464
587
  const config = getEvalConfig();
465
- const model = config.evaluatorModel ?? config.model;
588
+ const model = config.evaluatorModel;
466
589
  return {
467
590
  evaluator: ls2.wrapEvaluator(
468
591
  withTrajectoryGuard(
@@ -483,7 +606,7 @@ function noTools() {
483
606
  function respondsInLanguage(code) {
484
607
  return () => {
485
608
  const config = getEvalConfig();
486
- const model = config.evaluatorModel ?? config.model;
609
+ const model = config.evaluatorModel;
487
610
  return {
488
611
  evaluator: ls2.wrapEvaluator(createLanguageEvaluator(config.modelConfig, model)),
489
612
  referenceOutputs: { expectedLanguage: code }
@@ -508,6 +631,7 @@ function notContains(strings) {
508
631
  configureEvals,
509
632
  contains,
510
633
  defineSuite,
634
+ fromToolSpecs,
511
635
  human,
512
636
  llmJudge,
513
637
  noTools,