@dvina/agents 0.3.7 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-NHWEEBN2.mjs +159 -0
- package/dist/chunk-NHWEEBN2.mjs.map +1 -0
- package/dist/eval/index.d.mts +117 -0
- package/dist/eval/index.d.ts +117 -0
- package/dist/eval/index.js +643 -0
- package/dist/eval/index.js.map +1 -0
- package/dist/eval/index.mjs +483 -0
- package/dist/eval/index.mjs.map +1 -0
- package/dist/index.d.mts +4 -138
- package/dist/index.d.ts +4 -138
- package/dist/index.js +74 -42
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +11 -125
- package/dist/index.mjs.map +1 -1
- package/dist/model-resolver-U0J9x1a6.d.mts +158 -0
- package/dist/model-resolver-U0J9x1a6.d.ts +158 -0
- package/package.json +33 -4
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
var __create = Object.create;
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
6
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
7
|
+
var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
|
|
8
|
+
get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
|
|
9
|
+
}) : x)(function(x) {
|
|
10
|
+
if (typeof require !== "undefined") return require.apply(this, arguments);
|
|
11
|
+
throw Error('Dynamic require of "' + x + '" is not supported');
|
|
12
|
+
});
|
|
13
|
+
var __commonJS = (cb, mod) => function __require2() {
|
|
14
|
+
return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
|
|
15
|
+
};
|
|
16
|
+
var __copyProps = (to, from, except, desc) => {
|
|
17
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
18
|
+
for (let key of __getOwnPropNames(from))
|
|
19
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
20
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
21
|
+
}
|
|
22
|
+
return to;
|
|
23
|
+
};
|
|
24
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
25
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
26
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
27
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
28
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
29
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
30
|
+
mod
|
|
31
|
+
));
|
|
32
|
+
|
|
33
|
+
// src/runtime/langchain/model-resolver.ts
|
|
34
|
+
import { AzureChatOpenAI, ChatOpenAI } from "@langchain/openai";
|
|
35
|
+
var LangchainModelResolver = class {
|
|
36
|
+
constructor(config) {
|
|
37
|
+
this.config = config;
|
|
38
|
+
}
|
|
39
|
+
resolve(modelString, tags) {
|
|
40
|
+
const parts = modelString.split(":");
|
|
41
|
+
if (parts.length === 2) {
|
|
42
|
+
const [provider, modelName] = parts;
|
|
43
|
+
return this.resolveByProvider(provider, "default", modelName, tags);
|
|
44
|
+
}
|
|
45
|
+
if (parts.length === 3) {
|
|
46
|
+
const [provider, configName, modelName] = parts;
|
|
47
|
+
return this.resolveByProvider(provider, configName, modelName, tags);
|
|
48
|
+
}
|
|
49
|
+
throw new Error(
|
|
50
|
+
'Model string must follow format "provider:modelName" (uses "default" config) or "provider:configName:modelName"'
|
|
51
|
+
);
|
|
52
|
+
}
|
|
53
|
+
resolveByProvider(provider, configName, modelName, tags) {
|
|
54
|
+
switch (provider) {
|
|
55
|
+
case "openai":
|
|
56
|
+
return this.resolveOpenAI(configName, modelName, tags);
|
|
57
|
+
case "azure":
|
|
58
|
+
return this.resolveAzure(configName, modelName, tags);
|
|
59
|
+
default:
|
|
60
|
+
throw new Error(`Unsupported model provider: ${provider}`);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
resolveOpenAI(configName, modelName, tags) {
|
|
64
|
+
const providerConfig = this.config.openai?.[configName];
|
|
65
|
+
if (!providerConfig) {
|
|
66
|
+
throw new Error(`Configuration "${configName}" for provider "openai" is missing`);
|
|
67
|
+
}
|
|
68
|
+
return new ChatOpenAI({
|
|
69
|
+
apiKey: providerConfig.apiKey,
|
|
70
|
+
modelName,
|
|
71
|
+
tags
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
resolveAzure(resourceName, modelName, tags) {
|
|
75
|
+
const resource = this.config.azure?.[resourceName];
|
|
76
|
+
if (!resource) {
|
|
77
|
+
throw new Error(`Resource "${resourceName}" for provider "azure" is missing`);
|
|
78
|
+
}
|
|
79
|
+
const modelEntry = resource.models.find((m) => m.model === modelName);
|
|
80
|
+
if (!modelEntry) {
|
|
81
|
+
throw new Error(`Model "${modelName}" not found in Azure resource "${resourceName}"`);
|
|
82
|
+
}
|
|
83
|
+
return new AzureChatOpenAI({
|
|
84
|
+
model: modelEntry.model,
|
|
85
|
+
azureOpenAIApiKey: resource.apiKey,
|
|
86
|
+
azureOpenAIApiInstanceName: this.extractInstanceName(resource.endpoint),
|
|
87
|
+
azureOpenAIApiDeploymentName: modelEntry.deploymentName,
|
|
88
|
+
azureOpenAIApiVersion: modelEntry.apiVersion,
|
|
89
|
+
tags
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
extractInstanceName(endpoint) {
|
|
93
|
+
try {
|
|
94
|
+
const url = new URL(endpoint);
|
|
95
|
+
return url.hostname.split(".")[0];
|
|
96
|
+
} catch (e) {
|
|
97
|
+
return endpoint;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
// src/runtime/langchain/utils.ts
|
|
103
|
+
import { AIMessage, HumanMessage, ToolMessage } from "langchain";
|
|
104
|
+
function convertToLangchainMessages(messages) {
|
|
105
|
+
const result = [];
|
|
106
|
+
let tcIdx = 0;
|
|
107
|
+
let pendingToolCallIds = [];
|
|
108
|
+
for (const msg of messages) {
|
|
109
|
+
if (msg.role === "human") {
|
|
110
|
+
result.push(
|
|
111
|
+
new HumanMessage({
|
|
112
|
+
content: msg.content.map((c) => {
|
|
113
|
+
if (c.type === "image") {
|
|
114
|
+
return { type: "image_url", image_url: { url: c.url } };
|
|
115
|
+
}
|
|
116
|
+
return c;
|
|
117
|
+
})
|
|
118
|
+
})
|
|
119
|
+
);
|
|
120
|
+
} else if (msg.role === "ai") {
|
|
121
|
+
if (msg.toolCalls && msg.toolCalls.length > 0) {
|
|
122
|
+
pendingToolCallIds = msg.toolCalls.map(() => `tc_${++tcIdx}`);
|
|
123
|
+
result.push(
|
|
124
|
+
new AIMessage({
|
|
125
|
+
content: msg.content,
|
|
126
|
+
tool_calls: msg.toolCalls.map((tc, i) => ({
|
|
127
|
+
id: pendingToolCallIds[i],
|
|
128
|
+
name: tc.name,
|
|
129
|
+
args: tc.input ? JSON.parse(tc.input) : {}
|
|
130
|
+
}))
|
|
131
|
+
})
|
|
132
|
+
);
|
|
133
|
+
} else {
|
|
134
|
+
result.push(new AIMessage(msg.content));
|
|
135
|
+
}
|
|
136
|
+
} else if (msg.role === "tool") {
|
|
137
|
+
const toolCallId = pendingToolCallIds.shift();
|
|
138
|
+
if (!toolCallId)
|
|
139
|
+
throw new Error(`ToolMessage for "${msg.name}" without a preceding AiMessage with toolCalls`);
|
|
140
|
+
result.push(
|
|
141
|
+
new ToolMessage({
|
|
142
|
+
content: msg.output,
|
|
143
|
+
tool_call_id: toolCallId,
|
|
144
|
+
name: msg.name
|
|
145
|
+
})
|
|
146
|
+
);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
return result;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
export {
|
|
153
|
+
__require,
|
|
154
|
+
__commonJS,
|
|
155
|
+
__toESM,
|
|
156
|
+
convertToLangchainMessages,
|
|
157
|
+
LangchainModelResolver
|
|
158
|
+
};
|
|
159
|
+
//# sourceMappingURL=chunk-NHWEEBN2.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/runtime/langchain/model-resolver.ts","../src/runtime/langchain/utils.ts"],"sourcesContent":["import { BaseLanguageModel } from '@langchain/core/language_models/base';\nimport { AzureChatOpenAI, ChatOpenAI } from '@langchain/openai';\n\nexport type LangchainOpenAIConfig = {\n\tapiKey: string;\n};\n\nexport type LangchainAzureResourceConfig = {\n\tapiKey: string;\n\tendpoint: string;\n\tmodels: {\n\t\tmodel: string;\n\t\tapiVersion: string;\n\t\tdeploymentName: string;\n\t}[];\n};\n\nexport type ResourceName = string;\n\nexport type LangchainModelConfig = {\n\topenai?: Record<string, LangchainOpenAIConfig>;\n\tazure?: Record<ResourceName, LangchainAzureResourceConfig>;\n};\n\nexport class LangchainModelResolver {\n\tconstructor(private config: LangchainModelConfig) {}\n\n\tresolve(modelString: string, tags?: string[]): BaseLanguageModel {\n\t\tconst parts = modelString.split(':');\n\n\t\tif (parts.length === 2) {\n\t\t\tconst [provider, modelName] = parts;\n\t\t\treturn this.resolveByProvider(provider, 'default', modelName, tags);\n\t\t}\n\n\t\tif (parts.length === 3) {\n\t\t\tconst [provider, configName, modelName] = parts;\n\t\t\treturn this.resolveByProvider(provider, configName, modelName, tags);\n\t\t}\n\n\t\tthrow new Error(\n\t\t\t'Model string must follow format \"provider:modelName\" (uses \"default\" config) or \"provider:configName:modelName\"',\n\t\t);\n\t}\n\n\tprivate resolveByProvider(\n\t\tprovider: string,\n\t\tconfigName: string,\n\t\tmodelName: string,\n\t\ttags?: string[],\n\t): BaseLanguageModel {\n\t\tswitch (provider) {\n\t\t\tcase 'openai':\n\t\t\t\treturn this.resolveOpenAI(configName, modelName, tags);\n\t\t\tcase 'azure':\n\t\t\t\treturn this.resolveAzure(configName, modelName, tags);\n\t\t\tdefault:\n\t\t\t\tthrow new Error(`Unsupported model provider: ${provider}`);\n\t\t}\n\t}\n\n\tprivate resolveOpenAI(configName: string, modelName: string, tags?: string[]): ChatOpenAI {\n\t\tconst providerConfig = this.config.openai?.[configName];\n\t\tif (!providerConfig) {\n\t\t\tthrow new Error(`Configuration \"${configName}\" for provider \"openai\" is missing`);\n\t\t}\n\n\t\treturn new ChatOpenAI({\n\t\t\tapiKey: providerConfig.apiKey,\n\t\t\tmodelName: modelName,\n\t\t\ttags: tags,\n\t\t});\n\t}\n\n\tprivate resolveAzure(resourceName: string, modelName: string, tags?: string[]): AzureChatOpenAI {\n\t\tconst resource = this.config.azure?.[resourceName];\n\t\tif (!resource) {\n\t\t\tthrow new Error(`Resource \"${resourceName}\" for provider \"azure\" is missing`);\n\t\t}\n\n\t\tconst modelEntry = resource.models.find((m) => m.model === modelName);\n\t\tif (!modelEntry) {\n\t\t\tthrow new Error(`Model \"${modelName}\" not found in Azure resource \"${resourceName}\"`);\n\t\t}\n\n\t\treturn new AzureChatOpenAI({\n\t\t\tmodel: modelEntry.model,\n\t\t\tazureOpenAIApiKey: resource.apiKey,\n\t\t\tazureOpenAIApiInstanceName: this.extractInstanceName(resource.endpoint),\n\t\t\tazureOpenAIApiDeploymentName: modelEntry.deploymentName,\n\t\t\tazureOpenAIApiVersion: modelEntry.apiVersion,\n\t\t\ttags: tags,\n\t\t});\n\t}\n\n\tprivate extractInstanceName(endpoint: string): string {\n\t\ttry {\n\t\t\tconst url = new URL(endpoint);\n\t\t\treturn url.hostname.split('.')[0];\n\t\t} catch (e) {\n\t\t\treturn endpoint;\n\t\t}\n\t}\n}\n","import { Message } from '@core/agent.interface';\nimport { AIMessage, BaseMessage, HumanMessage, ToolMessage } from 'langchain';\n\nexport function convertToLangchainMessages(messages: Message[]): BaseMessage[] {\n\tconst result: BaseMessage[] = [];\n\tlet tcIdx = 0;\n\tlet pendingToolCallIds: string[] = [];\n\n\tfor (const msg of messages) {\n\t\tif (msg.role === 'human') {\n\t\t\tresult.push(\n\t\t\t\tnew HumanMessage({\n\t\t\t\t\tcontent: msg.content.map((c) => {\n\t\t\t\t\t\tif (c.type === 'image') {\n\t\t\t\t\t\t\treturn { type: 'image_url', image_url: { url: c.url } };\n\t\t\t\t\t\t}\n\t\t\t\t\t\treturn c;\n\t\t\t\t\t}) as any,\n\t\t\t\t}),\n\t\t\t);\n\t\t} else if (msg.role === 'ai') {\n\t\t\tif (msg.toolCalls && msg.toolCalls.length > 0) {\n\t\t\t\tpendingToolCallIds = msg.toolCalls.map(() => `tc_${++tcIdx}`);\n\t\t\t\tresult.push(\n\t\t\t\t\tnew AIMessage({\n\t\t\t\t\t\tcontent: msg.content,\n\t\t\t\t\t\ttool_calls: msg.toolCalls.map((tc, i) => ({\n\t\t\t\t\t\t\tid: pendingToolCallIds[i],\n\t\t\t\t\t\t\tname: tc.name,\n\t\t\t\t\t\t\targs: tc.input ? JSON.parse(tc.input) : {},\n\t\t\t\t\t\t})),\n\t\t\t\t\t}),\n\t\t\t\t);\n\t\t\t} else {\n\t\t\t\tresult.push(new AIMessage(msg.content));\n\t\t\t}\n\t\t} else if (msg.role === 'tool') {\n\t\t\tconst toolCallId = pendingToolCallIds.shift();\n\t\t\tif (!toolCallId)\n\t\t\t\tthrow new Error(`ToolMessage for \"${msg.name}\" without a preceding AiMessage with toolCalls`);\n\t\t\tresult.push(\n\t\t\t\tnew ToolMessage({\n\t\t\t\t\tcontent: msg.output,\n\t\t\t\t\ttool_call_id: toolCallId,\n\t\t\t\t\tname: msg.name,\n\t\t\t\t}),\n\t\t\t);\n\t\t}\n\t}\n\n\treturn result;\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AACA,SAAS,iBAAiB,kBAAkB;AAuBrC,IAAM,yBAAN,MAA6B;AAAA,EACnC,YAAoB,QAA8B;AAA9B;AAAA,EAA+B;AAAA,EAEnD,QAAQ,aAAqB,MAAoC;AAChE,UAAM,QAAQ,YAAY,MAAM,GAAG;AAEnC,QAAI,MAAM,WAAW,GAAG;AACvB,YAAM,CAAC,UAAU,SAAS,IAAI;AAC9B,aAAO,KAAK,kBAAkB,UAAU,WAAW,WAAW,IAAI;AAAA,IACnE;AAEA,QAAI,MAAM,WAAW,GAAG;AACvB,YAAM,CAAC,UAAU,YAAY,SAAS,IAAI;AAC1C,aAAO,KAAK,kBAAkB,UAAU,YAAY,WAAW,IAAI;AAAA,IACpE;AAEA,UAAM,IAAI;AAAA,MACT;AAAA,IACD;AAAA,EACD;AAAA,EAEQ,kBACP,UACA,YACA,WACA,MACoB;AACpB,YAAQ,UAAU;AAAA,MACjB,KAAK;AACJ,eAAO,KAAK,cAAc,YAAY,WAAW,IAAI;AAAA,MACtD,KAAK;AACJ,eAAO,KAAK,aAAa,YAAY,WAAW,IAAI;AAAA,MACrD;AACC,cAAM,IAAI,MAAM,+BAA+B,QAAQ,EAAE;AAAA,IAC3D;AAAA,EACD;AAAA,EAEQ,cAAc,YAAoB,WAAmB,MAA6B;AACzF,UAAM,iBAAiB,KAAK,OAAO,SAAS,UAAU;AACtD,QAAI,CAAC,gBAAgB;AACpB,YAAM,IAAI,MAAM,kBAAkB,UAAU,oCAAoC;AAAA,IACjF;AAEA,WAAO,IAAI,WAAW;AAAA,MACrB,QAAQ,eAAe;AAAA,MACvB;AAAA,MACA;AAAA,IACD,CAAC;AAAA,EACF;AAAA,EAEQ,aAAa,cAAsB,WAAmB,MAAkC;AAC/F,UAAM,WAAW,KAAK,OAAO,QAAQ,YAAY;AACjD,QAAI,CAAC,UAAU;AACd,YAAM,IAAI,MAAM,aAAa,YAAY,mCAAmC;AAAA,IAC7E;AAEA,UAAM,aAAa,SAAS,OAAO,KAAK,CAAC,MAAM,EAAE,UAAU,SAAS;AACpE,QAAI,CAAC,YAAY;AAChB,YAAM,IAAI,MAAM,UAAU,SAAS,kCAAkC,YAAY,GAAG;AAAA,IACrF;AAEA,WAAO,IAAI,gBAAgB;AAAA,MAC1B,OAAO,WAAW;AAAA,MAClB,mBAAmB,SAAS;AAAA,MAC5B,4BAA4B,KAAK,oBAAoB,SAAS,QAAQ;AAAA,MACtE,8BAA8B,WAAW;AAAA,MACzC,uBAAuB,WAAW;AAAA,MAClC;AAAA,IACD,CAAC;AAAA,EACF;AAAA,EAEQ,oBAAoB,UAA0B;AACrD,QAAI;AACH,YAAM,MAAM,IAAI,IAAI,QAAQ;AAC5B,aAAO,IAAI,SAAS,MAAM,GAAG,EAAE,CAAC;AAAA,IACjC,SAAS,GAAG;AACX,aAAO;AAAA,IACR;AAAA,EACD;AACD;;;ACtGA,SAAS,WAAwB,cAAc,mBAAmB;AAE3D,SAAS,2BAA2B,UAAoC;AAC9E,QAAM,SAAwB,CAAC;AAC/B,MAAI,QAAQ;AACZ,MAAI,qBAA+B,CAAC;AAEpC,aAAW,OAAO,UAAU;AAC3B,QAAI,IAAI,SAAS,SAAS;AACzB,aAAO;AAAA,QACN,IAAI,aAAa;AAAA,UAChB,SAAS,IAAI,QAAQ,IAAI,CAAC,MAAM;AAC/B,gBAAI,EAAE,SAAS,SAAS;AACvB,qBAAO,EAAE,MAAM,aAAa,WAAW,EAAE,KAAK,EAAE,IAAI,EAAE;AAAA,YACvD;AACA,mBAAO;AAAA,UACR,CAAC;AAAA,QACF,CAAC;AAAA,MACF;AAAA,IACD,WAAW,IAAI,SAAS,MAAM;AAC7B,UAAI,IAAI,aAAa,IAAI,UAAU,SAAS,GAAG;AAC9C,6BAAqB,IAAI,UAAU,IAAI,MAAM,MAAM,EAAE,KAAK,EAAE;AAC5D,eAAO;AAAA,UACN,IAAI,UAAU;AAAA,YACb,SAAS,IAAI;AAAA,YACb,YAAY,IAAI,UAAU,IAAI,CAAC,IAAI,OAAO;AAAA,cACzC,IAAI,mBAAmB,CAAC;AAAA,cACxB,MAAM,GAAG;AAAA,cACT,MAAM,GAAG,QAAQ,KAAK,MAAM,GAAG,KAAK,IAAI,CAAC;AAAA,YAC1C,EAAE;AAAA,UACH,CAAC;AAAA,QACF;AAAA,MACD,OAAO;AACN,eAAO,KAAK,IAAI,UAAU,IAAI,OAAO,CAAC;AAAA,MACvC;AAAA,IACD,WAAW,IAAI,SAAS,QAAQ;AAC/B,YAAM,aAAa,mBAAmB,MAAM;AAC5C,UAAI,CAAC;AACJ,cAAM,IAAI,MAAM,oBAAoB,IAAI,IAAI,gDAAgD;AAC7F,aAAO;AAAA,QACN,IAAI,YAAY;AAAA,UACf,SAAS,IAAI;AAAA,UACb,cAAc;AAAA,UACd,MAAM,IAAI;AAAA,QACX,CAAC;AAAA,MACF;AAAA,IACD;AAAA,EACD;AAEA,SAAO;AACR;","names":[]}
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import { T as ToolDefinition, A as Agent, L as LangchainModelConfig, M as Message, d as AiMessage, a as ToolSpec, H as HumanMessage, f as ToolMessage } from '../model-resolver-U0J9x1a6.mjs';
|
|
2
|
+
import * as zod from 'zod';
|
|
3
|
+
import { z } from 'zod';
|
|
4
|
+
import { BaseMessage } from '@langchain/core/messages';
|
|
5
|
+
|
|
6
|
+
/** Factory that creates a fresh Agent per test case. Receives extra suite-level tools as ToolDefinition[]. */
|
|
7
|
+
type CreateTargetFn = (extraTools: ToolDefinition[]) => Agent | Promise<Agent>;
|
|
8
|
+
interface EvalConfig {
|
|
9
|
+
/** Required for model-based target and LLM evaluators (respondsInLanguage, llmJudge). */
|
|
10
|
+
modelConfig: LangchainModelConfig;
|
|
11
|
+
/** Required for model-based target. Also used as fallback for evaluatorModel. */
|
|
12
|
+
model?: string;
|
|
13
|
+
/** Model for evaluators needing LLM calls (language detection, LLM-as-judge). */
|
|
14
|
+
evaluatorModel: string;
|
|
15
|
+
/** System prompt for model-based target. Ignored when createTarget is used. Can be overridden per-suite or per-case. */
|
|
16
|
+
systemPrompt?: string;
|
|
17
|
+
/** Factory that creates a fresh Agent per test case. When set, this is the default target. */
|
|
18
|
+
createTarget?: CreateTargetFn;
|
|
19
|
+
}
|
|
20
|
+
declare function configureEvals(config: EvalConfig): void;
|
|
21
|
+
|
|
22
|
+
interface MockToolDef {
|
|
23
|
+
name: string;
|
|
24
|
+
description: string;
|
|
25
|
+
schema: z.ZodObject<any> | Record<string, unknown>;
|
|
26
|
+
/**
|
|
27
|
+
* Canned response the mock tool returns.
|
|
28
|
+
* Can be a static string, or a function that receives input and returns a response.
|
|
29
|
+
* If a function is provided, it receives the full invocation count as a second arg
|
|
30
|
+
* to support scenarios like "first call fails, second call succeeds".
|
|
31
|
+
*/
|
|
32
|
+
response: string | ((input: Record<string, unknown>, callCount: number) => string);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
type EvaluatorFn = (args: {
|
|
36
|
+
outputs: Record<string, any>;
|
|
37
|
+
referenceOutputs: Record<string, any>;
|
|
38
|
+
}) => Promise<any>;
|
|
39
|
+
interface ResolvedExpectation {
|
|
40
|
+
evaluator: EvaluatorFn;
|
|
41
|
+
referenceOutputs: Record<string, unknown>;
|
|
42
|
+
}
|
|
43
|
+
/** A factory that receives test context and returns an evaluator + its referenceOutputs. */
|
|
44
|
+
type Expectation = (ctx: {
|
|
45
|
+
message: string;
|
|
46
|
+
}) => ResolvedExpectation;
|
|
47
|
+
/**
|
|
48
|
+
* Expect the agent to call tools in order (superset trajectory match).
|
|
49
|
+
* Empty `[]` means the agent should answer directly without calling any tools.
|
|
50
|
+
*/
|
|
51
|
+
declare function toolsCalled(tools: string[]): Expectation;
|
|
52
|
+
/**
|
|
53
|
+
* Run an LLM-as-judge evaluator on the trajectory.
|
|
54
|
+
* Requires `toolsCalled` in the same expect array.
|
|
55
|
+
* Uses the globally configured evaluator model.
|
|
56
|
+
*/
|
|
57
|
+
declare function llmJudge(): Expectation;
|
|
58
|
+
/** Assert the agent made zero tool calls. */
|
|
59
|
+
declare function noTools(): Expectation;
|
|
60
|
+
/**
|
|
61
|
+
* Assert the response is in the given language (ISO 639-1 code).
|
|
62
|
+
* Uses the globally configured evaluator model for language detection.
|
|
63
|
+
* @param code - ISO 639-1 language code (e.g. 'en', 'tr', 'de').
|
|
64
|
+
*/
|
|
65
|
+
declare function respondsInLanguage(code: string): Expectation;
|
|
66
|
+
/** Assert the response contains all given strings. */
|
|
67
|
+
declare function contains(strings: string[]): Expectation;
|
|
68
|
+
/** Assert the response does not contain any of the given strings. */
|
|
69
|
+
declare function notContains(strings: string[]): Expectation;
|
|
70
|
+
|
|
71
|
+
declare function human(content: string): HumanMessage;
|
|
72
|
+
declare function ai(content: string, toolCalls?: string[]): AiMessage;
|
|
73
|
+
declare function toolResult(name: string, output: string): ToolMessage;
|
|
74
|
+
interface ToolDef {
|
|
75
|
+
description: string;
|
|
76
|
+
/** A plain key→description record, or a ZodObject passed through from a ToolSpec. */
|
|
77
|
+
schema?: Record<string, string> | zod.ZodObject<any>;
|
|
78
|
+
/** Auto-stringified if not a string or function. */
|
|
79
|
+
response: unknown | ((input: Record<string, unknown>, callCount: number) => string);
|
|
80
|
+
}
|
|
81
|
+
interface TestCase {
|
|
82
|
+
/** Test name. Defaults to the last human message content if omitted. */
|
|
83
|
+
name?: string;
|
|
84
|
+
messages: Message[];
|
|
85
|
+
systemPrompt?: string;
|
|
86
|
+
/** Override suite-level tools for this case. */
|
|
87
|
+
tools?: Record<string, ToolDef>;
|
|
88
|
+
expect: Expectation[];
|
|
89
|
+
}
|
|
90
|
+
type TargetFn = (inputs: {
|
|
91
|
+
systemPrompt?: string;
|
|
92
|
+
messages: Message[];
|
|
93
|
+
tools: MockToolDef[];
|
|
94
|
+
}) => Promise<{
|
|
95
|
+
messages: BaseMessage[];
|
|
96
|
+
}>;
|
|
97
|
+
interface SuiteConfig {
|
|
98
|
+
/** Custom target function, or model string override. Auto-created from global config if omitted. */
|
|
99
|
+
target?: TargetFn | string;
|
|
100
|
+
/** Factory that creates a fresh Agent per test case. Overrides global createTarget. */
|
|
101
|
+
createTarget?: CreateTargetFn;
|
|
102
|
+
/** System prompt for all cases in this suite. Overrides the global prompt; can be overridden per-case. */
|
|
103
|
+
systemPrompt?: string;
|
|
104
|
+
tools?: Record<string, ToolDef>;
|
|
105
|
+
cases: TestCase[];
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Converts a `ToolSpec[]` (from a real tool provider) into the
|
|
109
|
+
* `Record<string, ToolDef>` that `defineSuite` expects.
|
|
110
|
+
*
|
|
111
|
+
* `responses` maps tool names to canned mock responses. Tools without an
|
|
112
|
+
* entry in `responses` default to `''`.
|
|
113
|
+
*/
|
|
114
|
+
declare function fromToolSpecs(specs: ToolSpec[], responses?: Record<string, ToolDef['response']>): Record<string, ToolDef>;
|
|
115
|
+
declare function defineSuite(name: string, config: SuiteConfig): void;
|
|
116
|
+
|
|
117
|
+
export { type CreateTargetFn, type EvalConfig, type Expectation, type SuiteConfig, type TestCase, type ToolDef, ai, configureEvals, contains, defineSuite, fromToolSpecs, human, llmJudge, noTools, notContains, respondsInLanguage, toolResult, toolsCalled };
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import { T as ToolDefinition, A as Agent, L as LangchainModelConfig, M as Message, d as AiMessage, a as ToolSpec, H as HumanMessage, f as ToolMessage } from '../model-resolver-U0J9x1a6.js';
|
|
2
|
+
import * as zod from 'zod';
|
|
3
|
+
import { z } from 'zod';
|
|
4
|
+
import { BaseMessage } from '@langchain/core/messages';
|
|
5
|
+
|
|
6
|
+
/** Factory that creates a fresh Agent per test case. Receives extra suite-level tools as ToolDefinition[]. */
|
|
7
|
+
type CreateTargetFn = (extraTools: ToolDefinition[]) => Agent | Promise<Agent>;
|
|
8
|
+
interface EvalConfig {
|
|
9
|
+
/** Required for model-based target and LLM evaluators (respondsInLanguage, llmJudge). */
|
|
10
|
+
modelConfig: LangchainModelConfig;
|
|
11
|
+
/** Required for model-based target. Also used as fallback for evaluatorModel. */
|
|
12
|
+
model?: string;
|
|
13
|
+
/** Model for evaluators needing LLM calls (language detection, LLM-as-judge). */
|
|
14
|
+
evaluatorModel: string;
|
|
15
|
+
/** System prompt for model-based target. Ignored when createTarget is used. Can be overridden per-suite or per-case. */
|
|
16
|
+
systemPrompt?: string;
|
|
17
|
+
/** Factory that creates a fresh Agent per test case. When set, this is the default target. */
|
|
18
|
+
createTarget?: CreateTargetFn;
|
|
19
|
+
}
|
|
20
|
+
declare function configureEvals(config: EvalConfig): void;
|
|
21
|
+
|
|
22
|
+
interface MockToolDef {
|
|
23
|
+
name: string;
|
|
24
|
+
description: string;
|
|
25
|
+
schema: z.ZodObject<any> | Record<string, unknown>;
|
|
26
|
+
/**
|
|
27
|
+
* Canned response the mock tool returns.
|
|
28
|
+
* Can be a static string, or a function that receives input and returns a response.
|
|
29
|
+
* If a function is provided, it receives the full invocation count as a second arg
|
|
30
|
+
* to support scenarios like "first call fails, second call succeeds".
|
|
31
|
+
*/
|
|
32
|
+
response: string | ((input: Record<string, unknown>, callCount: number) => string);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
type EvaluatorFn = (args: {
|
|
36
|
+
outputs: Record<string, any>;
|
|
37
|
+
referenceOutputs: Record<string, any>;
|
|
38
|
+
}) => Promise<any>;
|
|
39
|
+
interface ResolvedExpectation {
|
|
40
|
+
evaluator: EvaluatorFn;
|
|
41
|
+
referenceOutputs: Record<string, unknown>;
|
|
42
|
+
}
|
|
43
|
+
/** A factory that receives test context and returns an evaluator + its referenceOutputs. */
|
|
44
|
+
type Expectation = (ctx: {
|
|
45
|
+
message: string;
|
|
46
|
+
}) => ResolvedExpectation;
|
|
47
|
+
/**
|
|
48
|
+
* Expect the agent to call tools in order (superset trajectory match).
|
|
49
|
+
* Empty `[]` means the agent should answer directly without calling any tools.
|
|
50
|
+
*/
|
|
51
|
+
declare function toolsCalled(tools: string[]): Expectation;
|
|
52
|
+
/**
|
|
53
|
+
* Run an LLM-as-judge evaluator on the trajectory.
|
|
54
|
+
* Requires `toolsCalled` in the same expect array.
|
|
55
|
+
* Uses the globally configured evaluator model.
|
|
56
|
+
*/
|
|
57
|
+
declare function llmJudge(): Expectation;
|
|
58
|
+
/** Assert the agent made zero tool calls. */
|
|
59
|
+
declare function noTools(): Expectation;
|
|
60
|
+
/**
|
|
61
|
+
* Assert the response is in the given language (ISO 639-1 code).
|
|
62
|
+
* Uses the globally configured evaluator model for language detection.
|
|
63
|
+
* @param code - ISO 639-1 language code (e.g. 'en', 'tr', 'de').
|
|
64
|
+
*/
|
|
65
|
+
declare function respondsInLanguage(code: string): Expectation;
|
|
66
|
+
/** Assert the response contains all given strings. */
|
|
67
|
+
declare function contains(strings: string[]): Expectation;
|
|
68
|
+
/** Assert the response does not contain any of the given strings. */
|
|
69
|
+
declare function notContains(strings: string[]): Expectation;
|
|
70
|
+
|
|
71
|
+
declare function human(content: string): HumanMessage;
|
|
72
|
+
declare function ai(content: string, toolCalls?: string[]): AiMessage;
|
|
73
|
+
declare function toolResult(name: string, output: string): ToolMessage;
|
|
74
|
+
interface ToolDef {
|
|
75
|
+
description: string;
|
|
76
|
+
/** A plain key→description record, or a ZodObject passed through from a ToolSpec. */
|
|
77
|
+
schema?: Record<string, string> | zod.ZodObject<any>;
|
|
78
|
+
/** Auto-stringified if not a string or function. */
|
|
79
|
+
response: unknown | ((input: Record<string, unknown>, callCount: number) => string);
|
|
80
|
+
}
|
|
81
|
+
interface TestCase {
|
|
82
|
+
/** Test name. Defaults to the last human message content if omitted. */
|
|
83
|
+
name?: string;
|
|
84
|
+
messages: Message[];
|
|
85
|
+
systemPrompt?: string;
|
|
86
|
+
/** Override suite-level tools for this case. */
|
|
87
|
+
tools?: Record<string, ToolDef>;
|
|
88
|
+
expect: Expectation[];
|
|
89
|
+
}
|
|
90
|
+
type TargetFn = (inputs: {
|
|
91
|
+
systemPrompt?: string;
|
|
92
|
+
messages: Message[];
|
|
93
|
+
tools: MockToolDef[];
|
|
94
|
+
}) => Promise<{
|
|
95
|
+
messages: BaseMessage[];
|
|
96
|
+
}>;
|
|
97
|
+
interface SuiteConfig {
|
|
98
|
+
/** Custom target function, or model string override. Auto-created from global config if omitted. */
|
|
99
|
+
target?: TargetFn | string;
|
|
100
|
+
/** Factory that creates a fresh Agent per test case. Overrides global createTarget. */
|
|
101
|
+
createTarget?: CreateTargetFn;
|
|
102
|
+
/** System prompt for all cases in this suite. Overrides the global prompt; can be overridden per-case. */
|
|
103
|
+
systemPrompt?: string;
|
|
104
|
+
tools?: Record<string, ToolDef>;
|
|
105
|
+
cases: TestCase[];
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Converts a `ToolSpec[]` (from a real tool provider) into the
|
|
109
|
+
* `Record<string, ToolDef>` that `defineSuite` expects.
|
|
110
|
+
*
|
|
111
|
+
* `responses` maps tool names to canned mock responses. Tools without an
|
|
112
|
+
* entry in `responses` default to `''`.
|
|
113
|
+
*/
|
|
114
|
+
declare function fromToolSpecs(specs: ToolSpec[], responses?: Record<string, ToolDef['response']>): Record<string, ToolDef>;
|
|
115
|
+
declare function defineSuite(name: string, config: SuiteConfig): void;
|
|
116
|
+
|
|
117
|
+
export { type CreateTargetFn, type EvalConfig, type Expectation, type SuiteConfig, type TestCase, type ToolDef, ai, configureEvals, contains, defineSuite, fromToolSpecs, human, llmJudge, noTools, notContains, respondsInLanguage, toolResult, toolsCalled };
|