@sanity/ailf 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/config/models.ts +15 -3
- package/dist/_vendor/ailf-core/config-helpers.d.ts +14 -17
- package/dist/_vendor/ailf-core/config-helpers.js +22 -2
- package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/examples/index.js +25 -0
- package/dist/_vendor/ailf-core/index.d.ts +2 -2
- package/dist/_vendor/ailf-core/index.js +1 -1
- package/dist/_vendor/ailf-core/ports/context.d.ts +2 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +2 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +13 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +1 -3
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +78 -23
- package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
- package/dist/adapters/config-sources/file-config-adapter.js +1 -0
- package/dist/adapters/config-sources/ts-config-loader.js +21 -13
- package/dist/adapters/task-sources/content-lake-task-source.js +17 -20
- package/dist/adapters/task-sources/index.d.ts +2 -2
- package/dist/adapters/task-sources/index.js +2 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
- package/dist/adapters/task-sources/repo-schemas.js +227 -19
- package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
- package/dist/adapters/task-sources/repo-task-source.js +81 -122
- package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
- package/dist/adapters/task-sources/repo-validation.js +126 -5
- package/dist/adapters/task-sources/task-file-loader.d.ts +2 -2
- package/dist/adapters/task-sources/task-file-loader.js +2 -2
- package/dist/commands/coverage-audit.js +3 -1
- package/dist/commands/init.d.ts +6 -4
- package/dist/commands/init.js +302 -23
- package/dist/commands/validate-tasks.d.ts +2 -2
- package/dist/commands/validate-tasks.js +26 -15
- package/dist/composition-root.d.ts +13 -1
- package/dist/composition-root.js +73 -41
- package/dist/index.d.ts +41 -0
- package/dist/index.js +48 -0
- package/dist/orchestration/build-step-sequence.js +4 -2
- package/dist/orchestration/steps/fetch-docs-step.js +2 -3
- package/dist/orchestration/steps/generate-configs-step.js +28 -12
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +105 -68
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
- package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/literacy-bridge.js +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
- package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
- package/dist/pipeline/compiler/mode-bases/index.js +4 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
- package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/{agent-harness-handler.d.ts → agent-harness/types.d.ts} +3 -24
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +4 -5
- package/dist/pipeline/compiler/mode-handlers/index.js +4 -6
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/{mcp-assertions.d.ts → mcp-server/assertions.d.ts} +2 -10
- package/dist/pipeline/compiler/mode-handlers/{mcp-assertions.js → mcp-server/assertions.js} +63 -6
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
- package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
- package/dist/pipeline/compiler/preset-loader.js +99 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +6 -9
- package/dist/pipeline/compiler/presets/sanity-literacy.js +10 -156
- package/dist/pipeline/expand-tasks.d.ts +2 -2
- package/dist/pipeline/expand-tasks.js +2 -2
- package/dist/pipeline/generate-configs.js +1 -1
- package/dist/pipeline/map-request-to-config.js +1 -0
- package/dist/pipeline/mirror-repo-tasks.d.ts +7 -7
- package/dist/pipeline/mirror-repo-tasks.js +9 -9
- package/dist/pipeline/plan.js +1 -1
- package/package.json +11 -3
- package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
- package/dist/_vendor/ailf-tasks/cli.js +0 -61
- package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
- package/dist/_vendor/ailf-tasks/index.js +0 -16
- package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
- package/dist/_vendor/ailf-tasks/parser.js +0 -73
- package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
- package/dist/_vendor/ailf-tasks/schemas.js +0 -180
- package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
- package/dist/_vendor/ailf-tasks/validation.js +0 -162
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +0 -485
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +0 -76
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +0 -245
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +0 -89
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +0 -379
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +0 -67
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +0 -309
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MCP server task compilation — core compiler logic.
|
|
3
|
+
*
|
|
4
|
+
* Produces Promptfoo configuration from MCP server task definitions:
|
|
5
|
+
* 1. A provider config pointing to the MCP server
|
|
6
|
+
* 2. Test cases with tool-call assertions
|
|
7
|
+
* 3. Appropriate prompts for the evaluation
|
|
8
|
+
*/
|
|
9
|
+
import { buildMCPAssertions } from "./assertions.js";
|
|
10
|
+
import { buildMCPProvider } from "./provider-config.js";
|
|
11
|
+
import { validateMCPTask } from "./validation.js";
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
// Public API
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
/**
|
|
16
|
+
* Compile an MCP server task definition into Promptfoo configuration.
|
|
17
|
+
*
|
|
18
|
+
* This is the core of the MCP mode handler. It produces:
|
|
19
|
+
* 1. A provider config pointing to the MCP server
|
|
20
|
+
* 2. Test cases with tool-call assertions
|
|
21
|
+
* 3. Appropriate prompts for the evaluation
|
|
22
|
+
*/
|
|
23
|
+
export function compileMCPTask(task, options) {
|
|
24
|
+
const warnings = [];
|
|
25
|
+
// Validate
|
|
26
|
+
const validationErrors = validateMCPTask(task);
|
|
27
|
+
if (validationErrors.length > 0) {
|
|
28
|
+
for (const err of validationErrors) {
|
|
29
|
+
warnings.push(`MCP task "${task.id}": ${err.field} — ${err.message}`);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
// Build providers (one LLM provider per model, each with MCP config)
|
|
33
|
+
const providers = buildMCPProvider(task, options?.models ?? [], warnings);
|
|
34
|
+
// Build prompts
|
|
35
|
+
const prompts = buildMCPPrompts(task);
|
|
36
|
+
// Build test cases
|
|
37
|
+
const tests = buildMCPTestCases(task, options, warnings);
|
|
38
|
+
return { providers, tests, prompts, warnings };
|
|
39
|
+
}
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
// Prompt assembly
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
function buildMCPPrompts(task) {
|
|
44
|
+
// MCP mode uses a single prompt — the task description
|
|
45
|
+
const promptText = task.prompt?.text ??
|
|
46
|
+
task.prompt?.vars?.task ??
|
|
47
|
+
task.description ??
|
|
48
|
+
`Test MCP server: ${task.title}`;
|
|
49
|
+
return [
|
|
50
|
+
{
|
|
51
|
+
id: "mcp-test",
|
|
52
|
+
label: `MCP: ${task.title}`,
|
|
53
|
+
raw: String(promptText),
|
|
54
|
+
},
|
|
55
|
+
];
|
|
56
|
+
}
|
|
57
|
+
// ---------------------------------------------------------------------------
|
|
58
|
+
// Test case assembly
|
|
59
|
+
// ---------------------------------------------------------------------------
|
|
60
|
+
function buildMCPTestCases(task, options, warnings) {
|
|
61
|
+
const tests = [];
|
|
62
|
+
// Build assertion context
|
|
63
|
+
const assertionContext = {
|
|
64
|
+
capabilities: task.capabilities ?? [],
|
|
65
|
+
graderProvider: options?.graderProvider,
|
|
66
|
+
taskId: task.id,
|
|
67
|
+
};
|
|
68
|
+
// Compile assertions
|
|
69
|
+
// Cast GeneralizedAssertionDefinition[] → AssertionInput[] (structurally compatible)
|
|
70
|
+
const assertions = [];
|
|
71
|
+
if (task.assertions) {
|
|
72
|
+
const rawAssertions = task.assertions;
|
|
73
|
+
const { assertions: mapped, warnings: assertionWarnings } = buildMCPAssertions(rawAssertions, assertionContext);
|
|
74
|
+
assertions.push(...mapped);
|
|
75
|
+
warnings.push(...assertionWarnings);
|
|
76
|
+
}
|
|
77
|
+
// Build test case vars
|
|
78
|
+
const vars = {
|
|
79
|
+
task: task.prompt?.vars?.task ?? task.description ?? `Test: ${task.title}`,
|
|
80
|
+
...(task.prompt?.vars ?? {}),
|
|
81
|
+
};
|
|
82
|
+
// Primary test case
|
|
83
|
+
tests.push({
|
|
84
|
+
description: `${task.id} — ${task.title}`,
|
|
85
|
+
vars,
|
|
86
|
+
...(assertions.length > 0 ? { assert: assertions } : {}),
|
|
87
|
+
});
|
|
88
|
+
// Multi-turn test cases
|
|
89
|
+
if (task.multiTurn?.turns && task.multiTurn.turns.length > 0) {
|
|
90
|
+
tests.push({
|
|
91
|
+
description: `${task.id} — ${task.title} [multi-turn]`,
|
|
92
|
+
vars: {
|
|
93
|
+
...vars,
|
|
94
|
+
__multiTurn: task.multiTurn.turns,
|
|
95
|
+
},
|
|
96
|
+
...(assertions.length > 0 ? { assert: assertions } : {}),
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
return tests;
|
|
100
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MCP Server mode handler — directory barrel.
|
|
3
|
+
*
|
|
4
|
+
* MCPServerModeHandler — compilation rules for `mcp-server` evaluation mode.
|
|
5
|
+
*
|
|
6
|
+
* This is the first non-literacy mode handler, proving the compiler
|
|
7
|
+
* architecture works end-to-end. It translates MCP server task definitions
|
|
8
|
+
* into Promptfoo configuration with:
|
|
9
|
+
*
|
|
10
|
+
* - An MCP provider that wraps the server under test
|
|
11
|
+
* - Tool-call assertions compiled to Promptfoo `javascript` assertions
|
|
12
|
+
* - Server lifecycle management via Promptfoo provider hooks
|
|
13
|
+
* - Multi-turn conversation support via Promptfoo's `steps` syntax
|
|
14
|
+
*
|
|
15
|
+
* @see docs/exec-plans/architecture-overhaul/phase-3-mcp-server-mode.md
|
|
16
|
+
* @see packages/core/src/types/eval-mode-config.ts — MCPServerModeConfig
|
|
17
|
+
* @see packages/core/src/types/generalized-task.ts — MCPServerTaskDefinition
|
|
18
|
+
*/
|
|
19
|
+
import type { ModeHandler } from "../../../../_vendor/ailf-core/index.d.ts";
|
|
20
|
+
/** ModeHandler-conformant export for the mcp-server evaluation mode. */
|
|
21
|
+
export declare const handler: ModeHandler;
|
|
22
|
+
export type { MCPAssertionContext, MCPCompileOptions, MCPCompileResult, MCPValidationError, } from "./types.js";
|
|
23
|
+
export { buildMCPAssertions } from "./assertions.js";
|
|
24
|
+
export { compileMCPTask } from "./compiler.js";
|
|
25
|
+
export { validateMCPTask } from "./validation.js";
|
|
26
|
+
export { MCP_PROMPT_TEMPLATES } from "./prompts.js";
|
|
27
|
+
export { DEFAULT_MAX_TOOL_ROUNDS, MCP_PROVIDER_PATH, } from "./provider-config.js";
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MCP Server mode handler — directory barrel.
|
|
3
|
+
*
|
|
4
|
+
* MCPServerModeHandler — compilation rules for `mcp-server` evaluation mode.
|
|
5
|
+
*
|
|
6
|
+
* This is the first non-literacy mode handler, proving the compiler
|
|
7
|
+
* architecture works end-to-end. It translates MCP server task definitions
|
|
8
|
+
* into Promptfoo configuration with:
|
|
9
|
+
*
|
|
10
|
+
* - An MCP provider that wraps the server under test
|
|
11
|
+
* - Tool-call assertions compiled to Promptfoo `javascript` assertions
|
|
12
|
+
* - Server lifecycle management via Promptfoo provider hooks
|
|
13
|
+
* - Multi-turn conversation support via Promptfoo's `steps` syntax
|
|
14
|
+
*
|
|
15
|
+
* @see docs/exec-plans/architecture-overhaul/phase-3-mcp-server-mode.md
|
|
16
|
+
* @see packages/core/src/types/eval-mode-config.ts — MCPServerModeConfig
|
|
17
|
+
* @see packages/core/src/types/generalized-task.ts — MCPServerTaskDefinition
|
|
18
|
+
*/
|
|
19
|
+
import { compileMCPTask } from "./compiler.js";
|
|
20
|
+
import { MCP_PROMPT_TEMPLATES } from "./prompts.js";
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
// ModeHandler adapter
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
/** ModeHandler-conformant export for the mcp-server evaluation mode. */
|
|
25
|
+
export const handler = {
|
|
26
|
+
getPrompts() {
|
|
27
|
+
return MCP_PROMPT_TEMPLATES;
|
|
28
|
+
},
|
|
29
|
+
compileTask(task, ctx) {
|
|
30
|
+
if (!("mode" in task) || task.mode !== "mcp-server") {
|
|
31
|
+
throw new Error(`MCP server handler received task with mode "${task.mode ?? "undefined"}" — expected "mcp-server"`);
|
|
32
|
+
}
|
|
33
|
+
const result = compileMCPTask(task, {
|
|
34
|
+
graderProvider: ctx.graderProvider,
|
|
35
|
+
models: ctx.models,
|
|
36
|
+
});
|
|
37
|
+
return {
|
|
38
|
+
providers: result.providers,
|
|
39
|
+
tests: result.tests,
|
|
40
|
+
prompts: result.prompts,
|
|
41
|
+
warnings: result.warnings,
|
|
42
|
+
};
|
|
43
|
+
},
|
|
44
|
+
};
|
|
45
|
+
// Assertions
|
|
46
|
+
export { buildMCPAssertions } from "./assertions.js";
|
|
47
|
+
// Compilation
|
|
48
|
+
export { compileMCPTask } from "./compiler.js";
|
|
49
|
+
// Validation
|
|
50
|
+
export { validateMCPTask } from "./validation.js";
|
|
51
|
+
// Prompts
|
|
52
|
+
export { MCP_PROMPT_TEMPLATES } from "./prompts.js";
|
|
53
|
+
// Provider config
|
|
54
|
+
export { DEFAULT_MAX_TOOL_ROUNDS, MCP_PROVIDER_PATH, } from "./provider-config.js";
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Canonical MCP server prompt templates.
|
|
3
|
+
*
|
|
4
|
+
* Handler-owned prompts for MCP server evaluations. Instructs the model to
|
|
5
|
+
* interact with MCP tools rather than writing standalone code.
|
|
6
|
+
*/
|
|
7
|
+
import type { PromptTemplate } from "../../../../_vendor/ailf-core/index.d.ts";
|
|
8
|
+
export declare const MCP_PROMPT_TEMPLATES: Record<string, PromptTemplate>;
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Canonical MCP server prompt templates.
|
|
3
|
+
*
|
|
4
|
+
* Handler-owned prompts for MCP server evaluations. Instructs the model to
|
|
5
|
+
* interact with MCP tools rather than writing standalone code.
|
|
6
|
+
*/
|
|
7
|
+
export const MCP_PROMPT_TEMPLATES = {
|
|
8
|
+
"mcp-server": {
|
|
9
|
+
id: "mcp-server",
|
|
10
|
+
label: "MCP Server Tool Use",
|
|
11
|
+
template: `You are an AI assistant with access to an MCP (Model Context Protocol) server that provides tools for interacting with a Sanity content backend.
|
|
12
|
+
|
|
13
|
+
## Task
|
|
14
|
+
{{task}}
|
|
15
|
+
|
|
16
|
+
## Instructions
|
|
17
|
+
|
|
18
|
+
1. Use the available MCP tools to complete the task
|
|
19
|
+
2. Call tools with the correct parameters as described in their schemas
|
|
20
|
+
3. Interpret tool responses and use the results to accomplish the goal
|
|
21
|
+
4. If a tool returns an error, explain the issue clearly
|
|
22
|
+
5. Prefer using specific tools over broad queries when possible
|
|
23
|
+
|
|
24
|
+
Complete the task using the MCP tools provided:
|
|
25
|
+
`,
|
|
26
|
+
variables: ["task"],
|
|
27
|
+
},
|
|
28
|
+
};
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MCP server provider assembly — builds Promptfoo provider configs.
|
|
3
|
+
*/
|
|
4
|
+
import type { MCPServerTaskDefinition, ModeProviderEntry } from "../../../../_vendor/ailf-core/index.d.ts";
|
|
5
|
+
import type { PromptfooProvider } from "../../promptfoo-compiler.js";
|
|
6
|
+
/** Default max tool rounds for MCP multi-turn execution */
|
|
7
|
+
export declare const DEFAULT_MAX_TOOL_ROUNDS = 5;
|
|
8
|
+
/** Provider path relative to eval package dist */
|
|
9
|
+
export declare const MCP_PROVIDER_PATH = "file://dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js";
|
|
10
|
+
/**
|
|
11
|
+
* Build custom MCP tool provider configs — one per model.
|
|
12
|
+
*
|
|
13
|
+
* Each provider uses the custom mcp-tool-provider.ts which implements a
|
|
14
|
+
* multi-turn tool execution loop. The LLM receives a prompt, discovers
|
|
15
|
+
* MCP tools, calls them, gets results, and continues until it produces
|
|
16
|
+
* a final text answer or exhausts maxToolRounds.
|
|
17
|
+
*
|
|
18
|
+
* Config shape passed to the custom provider:
|
|
19
|
+
* { model, mcpServer: { url, auth, name }, mcpTools, maxToolRounds, temperature, ... }
|
|
20
|
+
*/
|
|
21
|
+
export declare function buildMCPProvider(task: MCPServerTaskDefinition, models: ModeProviderEntry[], warnings: string[]): PromptfooProvider[];
|
|
22
|
+
/**
|
|
23
|
+
* Build the MCP server connection config for the custom provider.
|
|
24
|
+
*
|
|
25
|
+
* Shape: { url?, command?, name?, auth? }
|
|
26
|
+
* The custom mcp-tool-provider.ts uses this to connect to the MCP server.
|
|
27
|
+
*/
|
|
28
|
+
export declare function buildMCPServerConfig(task: MCPServerTaskDefinition, warnings: string[]): Record<string, unknown>;
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MCP server provider assembly — builds Promptfoo provider configs.
|
|
3
|
+
*/
|
|
4
|
+
// ---------------------------------------------------------------------------
|
|
5
|
+
// Constants
|
|
6
|
+
// ---------------------------------------------------------------------------
|
|
7
|
+
/** Default max tool rounds for MCP multi-turn execution */
|
|
8
|
+
export const DEFAULT_MAX_TOOL_ROUNDS = 5;
|
|
9
|
+
/** Provider path relative to eval package dist */
|
|
10
|
+
export const MCP_PROVIDER_PATH = "file://dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js";
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
// Provider assembly
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
/**
|
|
15
|
+
* Build custom MCP tool provider configs — one per model.
|
|
16
|
+
*
|
|
17
|
+
* Each provider uses the custom mcp-tool-provider.ts which implements a
|
|
18
|
+
* multi-turn tool execution loop. The LLM receives a prompt, discovers
|
|
19
|
+
* MCP tools, calls them, gets results, and continues until it produces
|
|
20
|
+
* a final text answer or exhausts maxToolRounds.
|
|
21
|
+
*
|
|
22
|
+
* Config shape passed to the custom provider:
|
|
23
|
+
* { model, mcpServer: { url, auth, name }, mcpTools, maxToolRounds, temperature, ... }
|
|
24
|
+
*/
|
|
25
|
+
export function buildMCPProvider(task, models, warnings) {
|
|
26
|
+
// Build the MCP server config
|
|
27
|
+
const mcpServer = buildMCPServerConfig(task, warnings);
|
|
28
|
+
const mcpTools = task.capabilities ?? undefined;
|
|
29
|
+
const maxToolRounds = task.maxToolRounds ?? DEFAULT_MAX_TOOL_ROUNDS;
|
|
30
|
+
// Helper to build a provider entry for a given model
|
|
31
|
+
function makeProvider(modelId, label, modelConfig) {
|
|
32
|
+
return {
|
|
33
|
+
id: MCP_PROVIDER_PATH,
|
|
34
|
+
label: `${label} + MCP`,
|
|
35
|
+
config: {
|
|
36
|
+
model: modelId,
|
|
37
|
+
mcpServer,
|
|
38
|
+
...(mcpTools ? { mcpTools } : {}),
|
|
39
|
+
maxToolRounds,
|
|
40
|
+
...(modelConfig ?? {}),
|
|
41
|
+
},
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
// Task-level model override takes precedence over registry models
|
|
45
|
+
const taskModels = task.models;
|
|
46
|
+
if (taskModels && taskModels.length > 0) {
|
|
47
|
+
return taskModels.map((modelId) => makeProvider(modelId, modelId));
|
|
48
|
+
}
|
|
49
|
+
// Use registry models (already filtered to mcp-server mode)
|
|
50
|
+
if (models.length === 0) {
|
|
51
|
+
warnings.push(`MCP task "${task.id}": no models available. Add "mcp-server" to a ` +
|
|
52
|
+
"model's modes array in config/models.ts, or set models on the task.");
|
|
53
|
+
return [
|
|
54
|
+
makeProvider("anthropic:messages:claude-sonnet-4-20250514", "Claude Sonnet 4"),
|
|
55
|
+
];
|
|
56
|
+
}
|
|
57
|
+
return models.map((model) => makeProvider(model.id, model.label, model.config));
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Build the MCP server connection config for the custom provider.
|
|
61
|
+
*
|
|
62
|
+
* Shape: { url?, command?, name?, auth? }
|
|
63
|
+
* The custom mcp-tool-provider.ts uses this to connect to the MCP server.
|
|
64
|
+
*/
|
|
65
|
+
export function buildMCPServerConfig(task, warnings) {
|
|
66
|
+
const config = task.serverConfig;
|
|
67
|
+
if (!config) {
|
|
68
|
+
warnings.push(`MCP task "${task.id}": no serverConfig — using placeholder. ` +
|
|
69
|
+
"Set serverConfig.command or serverConfig.url to point to your MCP server.");
|
|
70
|
+
return { name: task.id };
|
|
71
|
+
}
|
|
72
|
+
const serverConfig = { name: task.id };
|
|
73
|
+
if (config.transport === "stdio") {
|
|
74
|
+
serverConfig.command = config.command;
|
|
75
|
+
}
|
|
76
|
+
else {
|
|
77
|
+
serverConfig.url = config.url;
|
|
78
|
+
}
|
|
79
|
+
// Auth config
|
|
80
|
+
if (config.auth) {
|
|
81
|
+
serverConfig.auth = config.auth;
|
|
82
|
+
}
|
|
83
|
+
else if (config.env) {
|
|
84
|
+
const tokenKey = Object.keys(config.env).find((k) => /token|auth|key/i.test(k));
|
|
85
|
+
if (tokenKey) {
|
|
86
|
+
const val = config.env[tokenKey];
|
|
87
|
+
let envVar = val;
|
|
88
|
+
if (val.startsWith("$env(") && val.endsWith(")")) {
|
|
89
|
+
envVar = val.slice(5, -1);
|
|
90
|
+
}
|
|
91
|
+
if (!envVar || !/^[A-Za-z_][A-Za-z0-9_]*$/.test(envVar)) {
|
|
92
|
+
warnings.push(`MCP task: env var name "${envVar}" from "${val}" is not a valid ` +
|
|
93
|
+
"identifier — skipping auth config");
|
|
94
|
+
}
|
|
95
|
+
else {
|
|
96
|
+
serverConfig.auth = {
|
|
97
|
+
type: "bearer",
|
|
98
|
+
token: `{{env.${envVar}}}`,
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
return serverConfig;
|
|
104
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared types for the MCP server mode handler.
|
|
3
|
+
*/
|
|
4
|
+
import type { ModeProviderEntry } from "../../../../_vendor/ailf-core/index.d.ts";
|
|
5
|
+
import type { PromptfooPrompt, PromptfooProvider, PromptfooTestCase } from "../../promptfoo-compiler.js";
|
|
6
|
+
/** Options for compiling an MCP server task */
|
|
7
|
+
export interface MCPCompileOptions {
|
|
8
|
+
/** Grader provider for LLM-graded assertions */
|
|
9
|
+
graderProvider?: string;
|
|
10
|
+
/** Model providers to evaluate with (from registry, filtered by mcp-server mode) */
|
|
11
|
+
models?: ModeProviderEntry[];
|
|
12
|
+
}
|
|
13
|
+
/** Result of compiling a single MCP task */
|
|
14
|
+
export interface MCPCompileResult {
|
|
15
|
+
/** Promptfoo provider config for the MCP server */
|
|
16
|
+
providers: PromptfooProvider[];
|
|
17
|
+
/** Compiled test cases */
|
|
18
|
+
tests: PromptfooTestCase[];
|
|
19
|
+
/** Prompts for MCP evaluation */
|
|
20
|
+
prompts: PromptfooPrompt[];
|
|
21
|
+
/** Warnings generated during compilation */
|
|
22
|
+
warnings: string[];
|
|
23
|
+
}
|
|
24
|
+
/** Validation errors for MCP task definitions */
|
|
25
|
+
export interface MCPValidationError {
|
|
26
|
+
field: string;
|
|
27
|
+
message: string;
|
|
28
|
+
}
|
|
29
|
+
/** Context for building MCP assertions */
|
|
30
|
+
export interface MCPAssertionContext {
|
|
31
|
+
/** Task ID (for error messages) */
|
|
32
|
+
taskId: string;
|
|
33
|
+
/** Expected server capabilities */
|
|
34
|
+
capabilities: string[];
|
|
35
|
+
/** Grader provider for LLM-graded assertions */
|
|
36
|
+
graderProvider?: string;
|
|
37
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Validation for MCP server task definitions.
|
|
3
|
+
*/
|
|
4
|
+
import type { MCPServerTaskDefinition } from "../../../../_vendor/ailf-core/index.d.ts";
|
|
5
|
+
import type { MCPValidationError } from "./types.js";
|
|
6
|
+
/**
|
|
7
|
+
* Validate that an MCP task definition has all required fields.
|
|
8
|
+
*/
|
|
9
|
+
export declare function validateMCPTask(task: MCPServerTaskDefinition): MCPValidationError[];
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Validation for MCP server task definitions.
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Validate that an MCP task definition has all required fields.
|
|
6
|
+
*/
|
|
7
|
+
export function validateMCPTask(task) {
|
|
8
|
+
const errors = [];
|
|
9
|
+
if (!task.id) {
|
|
10
|
+
errors.push({ field: "id", message: "Task ID is required" });
|
|
11
|
+
}
|
|
12
|
+
if (!task.title) {
|
|
13
|
+
errors.push({ field: "title", message: "Task title is required" });
|
|
14
|
+
}
|
|
15
|
+
if (task.serverConfig) {
|
|
16
|
+
const { transport, command, url } = task.serverConfig;
|
|
17
|
+
if (transport === "stdio" && !command) {
|
|
18
|
+
errors.push({
|
|
19
|
+
field: "serverConfig.command",
|
|
20
|
+
message: "Server command is required for stdio transport (e.g., 'node dist/server.js')",
|
|
21
|
+
});
|
|
22
|
+
}
|
|
23
|
+
if ((transport === "sse" || transport === "streamable-http") && !url) {
|
|
24
|
+
errors.push({
|
|
25
|
+
field: "serverConfig.url",
|
|
26
|
+
message: `Server URL is required for ${transport} transport`,
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
// Assertions should reference MCP-compatible types
|
|
31
|
+
if (task.assertions) {
|
|
32
|
+
for (const assertion of task.assertions) {
|
|
33
|
+
if (assertion.type === "tool-called" &&
|
|
34
|
+
!("value" in assertion && assertion.value)) {
|
|
35
|
+
errors.push({
|
|
36
|
+
field: "assertions",
|
|
37
|
+
message: 'tool-called assertion requires a "value" specifying the tool name',
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return errors;
|
|
43
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MCPToolProvider — Custom Promptfoo provider for MCP tool-use evaluation.
|
|
3
|
+
*
|
|
4
|
+
* Orchestrates the MCP evaluation flow:
|
|
5
|
+
* 1. Connects to the MCP server and discovers available tools
|
|
6
|
+
* 2. Selects the appropriate LLM backend based on model ID prefix
|
|
7
|
+
* 3. Delegates the multi-turn tool loop to the backend
|
|
8
|
+
* 4. Formats the result for Promptfoo (including tool call summary)
|
|
9
|
+
*
|
|
10
|
+
* Promptfoo config usage:
|
|
11
|
+
*
|
|
12
|
+
* providers:
|
|
13
|
+
* - id: file://dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js
|
|
14
|
+
* label: "Claude Opus 4.6 + MCP"
|
|
15
|
+
* config:
|
|
16
|
+
* model: anthropic:messages:claude-opus-4-6
|
|
17
|
+
* maxToolRounds: 5
|
|
18
|
+
* temperature: 0.2
|
|
19
|
+
* max_tokens: 4096
|
|
20
|
+
* mcpServer:
|
|
21
|
+
* url: https://mcp.sanity.io
|
|
22
|
+
* auth: { type: bearer, token: "{{env.SANITY_API_TOKEN}}" }
|
|
23
|
+
* name: mcp-live-query-documents
|
|
24
|
+
* mcpTools: [query_documents, get_schema]
|
|
25
|
+
*/
|
|
26
|
+
import type { CallApiContextParams, ProviderOptions, ProviderResponse } from "./types.js";
|
|
27
|
+
export default class MCPToolProvider {
|
|
28
|
+
config: Record<string, unknown>;
|
|
29
|
+
private providerId;
|
|
30
|
+
constructor(options?: ProviderOptions);
|
|
31
|
+
id(): string;
|
|
32
|
+
callApi(prompt: string, _context?: CallApiContextParams): Promise<ProviderResponse>;
|
|
33
|
+
}
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MCPToolProvider — Custom Promptfoo provider for MCP tool-use evaluation.
|
|
3
|
+
*
|
|
4
|
+
* Orchestrates the MCP evaluation flow:
|
|
5
|
+
* 1. Connects to the MCP server and discovers available tools
|
|
6
|
+
* 2. Selects the appropriate LLM backend based on model ID prefix
|
|
7
|
+
* 3. Delegates the multi-turn tool loop to the backend
|
|
8
|
+
* 4. Formats the result for Promptfoo (including tool call summary)
|
|
9
|
+
*
|
|
10
|
+
* Promptfoo config usage:
|
|
11
|
+
*
|
|
12
|
+
* providers:
|
|
13
|
+
* - id: file://dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js
|
|
14
|
+
* label: "Claude Opus 4.6 + MCP"
|
|
15
|
+
* config:
|
|
16
|
+
* model: anthropic:messages:claude-opus-4-6
|
|
17
|
+
* maxToolRounds: 5
|
|
18
|
+
* temperature: 0.2
|
|
19
|
+
* max_tokens: 4096
|
|
20
|
+
* mcpServer:
|
|
21
|
+
* url: https://mcp.sanity.io
|
|
22
|
+
* auth: { type: bearer, token: "{{env.SANITY_API_TOKEN}}" }
|
|
23
|
+
* name: mcp-live-query-documents
|
|
24
|
+
* mcpTools: [query_documents, get_schema]
|
|
25
|
+
*/
|
|
26
|
+
import { config as loadDotenv } from "dotenv";
|
|
27
|
+
import { connectMCP } from "./mcp-connection.js";
|
|
28
|
+
import { runAnthropicToolLoop } from "./tool-loop-anthropic.js";
|
|
29
|
+
import { runOpenAIToolLoop } from "./tool-loop-openai.js";
|
|
30
|
+
loadDotenv({
|
|
31
|
+
override: true,
|
|
32
|
+
path: new URL("../../../../../.env", import.meta.url).pathname,
|
|
33
|
+
});
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
// Backend registry — maps model ID prefixes to tool loop implementations
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
const BACKENDS = {
|
|
38
|
+
anthropic: runAnthropicToolLoop,
|
|
39
|
+
openai: runOpenAIToolLoop,
|
|
40
|
+
};
|
|
41
|
+
/**
|
|
42
|
+
* Resolve the LLM backend from a model ID.
|
|
43
|
+
*
|
|
44
|
+
* Model IDs follow the pattern `provider:type:model-name` (e.g.,
|
|
45
|
+
* `anthropic:messages:claude-opus-4-6`). The first segment determines
|
|
46
|
+
* which backend handles the tool loop.
|
|
47
|
+
*/
|
|
48
|
+
function resolveBackend(modelId) {
|
|
49
|
+
const prefix = modelId.split(":")[0];
|
|
50
|
+
const backend = BACKENDS[prefix];
|
|
51
|
+
if (!backend) {
|
|
52
|
+
const supported = Object.keys(BACKENDS).join(", ");
|
|
53
|
+
throw new Error(`No backend for model "${modelId}". Supported prefixes: ${supported}`);
|
|
54
|
+
}
|
|
55
|
+
// Extract the model name for the API (e.g., "claude-opus-4-6" from "anthropic:messages:claude-opus-4-6")
|
|
56
|
+
const parts = modelId.split(":");
|
|
57
|
+
const modelName = parts.length > 2 ? parts.slice(2).join(":") : parts[parts.length - 1];
|
|
58
|
+
return { backend, modelName };
|
|
59
|
+
}
|
|
60
|
+
// ---------------------------------------------------------------------------
|
|
61
|
+
// Helpers
|
|
62
|
+
// ---------------------------------------------------------------------------
|
|
63
|
+
/** Append a machine-readable tool call summary for assertion detection */
|
|
64
|
+
function appendToolSummary(text, log) {
|
|
65
|
+
if (log.length === 0)
|
|
66
|
+
return text;
|
|
67
|
+
const names = JSON.stringify(log.map((tc) => tc.name));
|
|
68
|
+
return `${text}\n\n<!-- MCP_TOOLS_CALLED: ${names} -->`;
|
|
69
|
+
}
|
|
70
|
+
/** Resolve the API key for a given model prefix */
|
|
71
|
+
function resolveApiKey(prefix, config) {
|
|
72
|
+
if (config.apiKey)
|
|
73
|
+
return String(config.apiKey);
|
|
74
|
+
const envMap = {
|
|
75
|
+
anthropic: "ANTHROPIC_API_KEY",
|
|
76
|
+
openai: "OPENAI_API_KEY",
|
|
77
|
+
};
|
|
78
|
+
const envVar = envMap[prefix];
|
|
79
|
+
return envVar ? process.env[envVar] : undefined;
|
|
80
|
+
}
|
|
81
|
+
// ---------------------------------------------------------------------------
|
|
82
|
+
// Provider class
|
|
83
|
+
// ---------------------------------------------------------------------------
|
|
84
|
+
export default class MCPToolProvider {
|
|
85
|
+
config;
|
|
86
|
+
providerId;
|
|
87
|
+
constructor(options = {}) {
|
|
88
|
+
this.config = options.config || {};
|
|
89
|
+
this.providerId = options.id || "mcp-tool-provider";
|
|
90
|
+
}
|
|
91
|
+
id() {
|
|
92
|
+
return this.providerId;
|
|
93
|
+
}
|
|
94
|
+
async callApi(prompt, _context) {
|
|
95
|
+
const mcpServerConfig = this.config.mcpServer;
|
|
96
|
+
if (!mcpServerConfig) {
|
|
97
|
+
return { error: "mcpServer config is required", output: undefined };
|
|
98
|
+
}
|
|
99
|
+
// Resolve model and backend
|
|
100
|
+
const modelId = this.config.model || "anthropic:messages:claude-opus-4-6";
|
|
101
|
+
let backend;
|
|
102
|
+
let modelName;
|
|
103
|
+
try {
|
|
104
|
+
const resolved = resolveBackend(modelId);
|
|
105
|
+
backend = resolved.backend;
|
|
106
|
+
modelName = resolved.modelName;
|
|
107
|
+
}
|
|
108
|
+
catch (err) {
|
|
109
|
+
return {
|
|
110
|
+
error: err instanceof Error ? err.message : String(err),
|
|
111
|
+
output: undefined,
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
// Resolve API key
|
|
115
|
+
const prefix = modelId.split(":")[0];
|
|
116
|
+
const apiKey = resolveApiKey(prefix, this.config);
|
|
117
|
+
if (!apiKey) {
|
|
118
|
+
return {
|
|
119
|
+
error: `API key not found for ${prefix}. Set ${prefix.toUpperCase()}_API_KEY in env or config.apiKey.`,
|
|
120
|
+
output: undefined,
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
// Connect to MCP server
|
|
124
|
+
let mcpClient;
|
|
125
|
+
try {
|
|
126
|
+
mcpClient = await connectMCP(mcpServerConfig);
|
|
127
|
+
}
|
|
128
|
+
catch (err) {
|
|
129
|
+
return {
|
|
130
|
+
error: `Failed to connect to MCP server: ${err instanceof Error ? err.message : String(err)}`,
|
|
131
|
+
output: undefined,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
try {
|
|
135
|
+
// Filter tools by capabilities
|
|
136
|
+
const allTools = mcpClient.getAllTools();
|
|
137
|
+
const toolFilter = this.config.mcpTools;
|
|
138
|
+
const tools = toolFilter
|
|
139
|
+
? allTools.filter((t) => toolFilter.includes(t.name))
|
|
140
|
+
: allTools;
|
|
141
|
+
if (tools.length === 0) {
|
|
142
|
+
return {
|
|
143
|
+
error: "No MCP tools available after filtering. Check mcpTools config and server capabilities.",
|
|
144
|
+
output: undefined,
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
// Run the tool loop
|
|
148
|
+
const result = await backend({
|
|
149
|
+
prompt,
|
|
150
|
+
tools,
|
|
151
|
+
callTool: mcpClient.callTool,
|
|
152
|
+
maxToolRounds: this.config.maxToolRounds || 5,
|
|
153
|
+
model: modelName,
|
|
154
|
+
temperature: this.config.temperature ?? 0.2,
|
|
155
|
+
maxTokens: this.config.max_tokens || 4096,
|
|
156
|
+
apiKey,
|
|
157
|
+
});
|
|
158
|
+
return {
|
|
159
|
+
cost: 0,
|
|
160
|
+
metadata: {
|
|
161
|
+
toolRounds: result.toolRounds,
|
|
162
|
+
toolCallLog: result.toolCallLog,
|
|
163
|
+
exhaustedRounds: result.exhaustedRounds,
|
|
164
|
+
latencyMs: result.latencyMs,
|
|
165
|
+
},
|
|
166
|
+
output: appendToolSummary(result.output, result.toolCallLog),
|
|
167
|
+
tokenUsage: result.tokenUsage,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
finally {
|
|
171
|
+
await mcpClient.cleanup().catch(() => { });
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|