@sanity/ailf 2.0.2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. package/LICENSE +21 -0
  2. package/dist/_vendor/ailf-core/examples/index.d.ts +50 -1
  3. package/dist/_vendor/ailf-core/examples/index.js +66 -1
  4. package/dist/agent-harness/assertions-runtime.d.ts +49 -0
  5. package/dist/agent-harness/assertions-runtime.js +138 -0
  6. package/dist/agent-harness/provider.d.ts +58 -0
  7. package/dist/agent-harness/provider.js +104 -0
  8. package/dist/cli.js +0 -0
  9. package/dist/commands/init.js +3 -0
  10. package/dist/orchestration/steps/generate-configs-step.d.ts +7 -0
  11. package/dist/orchestration/steps/generate-configs-step.js +35 -2
  12. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +39 -25
  13. package/dist/pipeline/compiler/compiler-to-yaml.js +78 -7
  14. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +9 -0
  15. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +28 -85
  16. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +22 -15
  17. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +8 -1
  18. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +42 -12
  19. package/package.json +25 -24
  20. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +0 -10
  21. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +0 -185
  22. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +0 -6
  23. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +0 -42
  24. package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
  25. package/dist/_vendor/ailf-tasks/cli.js +0 -61
  26. package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
  27. package/dist/_vendor/ailf-tasks/index.js +0 -16
  28. package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
  29. package/dist/_vendor/ailf-tasks/parser.js +0 -73
  30. package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
  31. package/dist/_vendor/ailf-tasks/schemas.js +0 -180
  32. package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
  33. package/dist/_vendor/ailf-tasks/validation.js +0 -162
  34. package/dist/adapters/task-sources/yaml-task-source.d.ts +0 -18
  35. package/dist/adapters/task-sources/yaml-task-source.js +0 -139
  36. package/dist/agent-observer/test-imports.d.ts +0 -7
  37. package/dist/agent-observer/test-imports.js +0 -185
  38. package/dist/commands/update-quality-scores.d.ts +0 -5
  39. package/dist/commands/update-quality-scores.js +0 -20
  40. package/dist/lib/agent-behavior-report.d.ts +0 -8
  41. package/dist/lib/agent-behavior-report.js +0 -185
  42. package/dist/lib/baseline.d.ts +0 -19
  43. package/dist/lib/baseline.js +0 -153
  44. package/dist/lib/calculate-scores.d.ts +0 -23
  45. package/dist/lib/calculate-scores.js +0 -42
  46. package/dist/lib/compare.d.ts +0 -18
  47. package/dist/lib/compare.js +0 -170
  48. package/dist/lib/coverage-audit.d.ts +0 -4
  49. package/dist/lib/coverage-audit.js +0 -42
  50. package/dist/lib/discovery-report.d.ts +0 -13
  51. package/dist/lib/discovery-report.js +0 -57
  52. package/dist/lib/fetch-docs.d.ts +0 -30
  53. package/dist/lib/fetch-docs.js +0 -171
  54. package/dist/lib/generate-configs.d.ts +0 -25
  55. package/dist/lib/generate-configs.js +0 -42
  56. package/dist/lib/grader-api.d.ts +0 -21
  57. package/dist/lib/grader-api.js +0 -34
  58. package/dist/lib/grader-compare.d.ts +0 -19
  59. package/dist/lib/grader-compare.js +0 -91
  60. package/dist/lib/grader-consistency.d.ts +0 -27
  61. package/dist/lib/grader-consistency.js +0 -79
  62. package/dist/lib/grader-sensitivity.d.ts +0 -19
  63. package/dist/lib/grader-sensitivity.js +0 -75
  64. package/dist/lib/grader-validate.d.ts +0 -19
  65. package/dist/lib/grader-validate.js +0 -78
  66. package/dist/lib/measure-retrieval.d.ts +0 -14
  67. package/dist/lib/measure-retrieval.js +0 -71
  68. package/dist/lib/pr-comment.d.ts +0 -16
  69. package/dist/lib/pr-comment.js +0 -28
  70. package/dist/lib/readiness-report.d.ts +0 -13
  71. package/dist/lib/readiness-report.js +0 -108
  72. package/dist/lib/webhook-server.d.ts +0 -11
  73. package/dist/lib/webhook-server.js +0 -24
  74. package/dist/lib/weekly-digest.d.ts +0 -24
  75. package/dist/lib/weekly-digest.js +0 -148
  76. package/dist/orchestration/env-bridge.d.ts +0 -21
  77. package/dist/orchestration/env-bridge.js +0 -66
  78. package/dist/orchestration/steps/fetch-docs-shell.d.ts +0 -17
  79. package/dist/orchestration/steps/fetch-docs-shell.js +0 -30
  80. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +0 -9
  81. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +0 -339
  82. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +0 -70
  83. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +0 -485
  84. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +0 -76
  85. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +0 -245
  86. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +0 -89
  87. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +0 -379
  88. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +0 -50
  89. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +0 -334
  90. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +0 -69
  91. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +0 -307
  92. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +0 -65
  93. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +0 -368
  94. package/dist/pipeline/compiler/task-bridge.d.ts +0 -41
  95. package/dist/pipeline/compiler/task-bridge.js +0 -92
  96. package/dist/pipeline/expand-tasks.d.ts +0 -232
  97. package/dist/pipeline/expand-tasks.js +0 -467
  98. package/dist/pipeline/generate-configs.d.ts +0 -92
  99. package/dist/pipeline/generate-configs.js +0 -445
  100. package/dist/pipeline/steps/calculate-scores-step.d.ts +0 -11
  101. package/dist/pipeline/steps/calculate-scores-step.js +0 -89
  102. package/dist/pipeline/steps/compare-step.d.ts +0 -18
  103. package/dist/pipeline/steps/compare-step.js +0 -90
  104. package/dist/pipeline/steps/eval-step.d.ts +0 -53
  105. package/dist/pipeline/steps/eval-step.js +0 -347
  106. package/dist/pipeline/steps/fetch-docs-step.d.ts +0 -11
  107. package/dist/pipeline/steps/fetch-docs-step.js +0 -84
  108. package/dist/pipeline/steps/generate-configs-step.d.ts +0 -11
  109. package/dist/pipeline/steps/generate-configs-step.js +0 -98
  110. package/dist/pipeline/steps/grader-consistency-step.d.ts +0 -21
  111. package/dist/pipeline/steps/grader-consistency-step.js +0 -74
  112. package/dist/pipeline/steps/publish-report-step.d.ts +0 -57
  113. package/dist/pipeline/steps/publish-report-step.js +0 -243
  114. package/dist/pipeline/steps/report-step.d.ts +0 -13
  115. package/dist/pipeline/steps/report-step.js +0 -56
  116. package/dist/pipeline/steps/update-scores-step.d.ts +0 -11
  117. package/dist/pipeline/steps/update-scores-step.js +0 -42
  118. package/dist/scripts/agent-behavior-report.d.ts +0 -19
  119. package/dist/scripts/agent-behavior-report.js +0 -315
  120. package/dist/scripts/baseline.d.ts +0 -43
  121. package/dist/scripts/baseline.js +0 -267
  122. package/dist/scripts/calculate-scores.d.ts +0 -166
  123. package/dist/scripts/calculate-scores.js +0 -1296
  124. package/dist/scripts/compare.d.ts +0 -22
  125. package/dist/scripts/compare.js +0 -334
  126. package/dist/scripts/coverage-audit.d.ts +0 -44
  127. package/dist/scripts/coverage-audit.js +0 -209
  128. package/dist/scripts/debug-eval.d.ts +0 -19
  129. package/dist/scripts/debug-eval.js +0 -73
  130. package/dist/scripts/discovery-report.d.ts +0 -58
  131. package/dist/scripts/discovery-report.js +0 -250
  132. package/dist/scripts/fetch-docs.d.ts +0 -35
  133. package/dist/scripts/fetch-docs.js +0 -472
  134. package/dist/scripts/generate-configs.d.ts +0 -66
  135. package/dist/scripts/generate-configs.js +0 -459
  136. package/dist/scripts/grader-api.d.ts +0 -27
  137. package/dist/scripts/grader-api.js +0 -206
  138. package/dist/scripts/grader-compare.d.ts +0 -22
  139. package/dist/scripts/grader-compare.js +0 -368
  140. package/dist/scripts/grader-consistency.d.ts +0 -20
  141. package/dist/scripts/grader-consistency.js +0 -313
  142. package/dist/scripts/grader-sensitivity.d.ts +0 -22
  143. package/dist/scripts/grader-sensitivity.js +0 -354
  144. package/dist/scripts/grader-validate.d.ts +0 -19
  145. package/dist/scripts/grader-validate.js +0 -267
  146. package/dist/scripts/measure-retrieval.d.ts +0 -10
  147. package/dist/scripts/measure-retrieval.js +0 -145
  148. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +0 -24
  149. package/dist/scripts/migrate-tasks-to-content-lake.js +0 -328
  150. package/dist/scripts/pipeline.d.ts +0 -76
  151. package/dist/scripts/pipeline.js +0 -1031
  152. package/dist/scripts/pr-comment.d.ts +0 -10
  153. package/dist/scripts/pr-comment.js +0 -510
  154. package/dist/scripts/readiness-report.d.ts +0 -88
  155. package/dist/scripts/readiness-report.js +0 -342
  156. package/dist/scripts/update-quality-scores.d.ts +0 -15
  157. package/dist/scripts/update-quality-scores.js +0 -184
  158. package/dist/scripts/validate-task-sources.d.ts +0 -21
  159. package/dist/scripts/validate-task-sources.js +0 -210
  160. package/dist/scripts/validate.d.ts +0 -13
  161. package/dist/scripts/validate.js +0 -79
  162. package/dist/scripts/webhook-server.d.ts +0 -26
  163. package/dist/scripts/webhook-server.js +0 -147
  164. package/dist/scripts/weekly-digest.d.ts +0 -24
  165. package/dist/scripts/weekly-digest.js +0 -144
  166. package/dist/sinks/format-slack.d.ts +0 -64
  167. package/dist/sinks/format-slack.js +0 -306
  168. package/dist/sinks/slack-sink.d.ts +0 -27
  169. package/dist/sinks/slack-sink.js +0 -78
  170. package/dist/sinks/webhook-sink.d.ts +0 -19
  171. package/dist/sinks/webhook-sink.js +0 -50
  172. package/tasks/.expanded.agentic.yaml +0 -280
  173. package/tasks/.expanded.yaml +0 -565
@@ -1,368 +0,0 @@
1
- /**
2
- * mcp-tool-provider.ts — Custom Promptfoo provider for MCP tool-use evaluation.
3
- *
4
- * Implements a multi-turn tool execution loop: the LLM receives a prompt,
5
- * discovers MCP tools, calls them, gets results, and continues until it
6
- * produces a final text answer or exhausts maxToolRounds.
7
- *
8
- * Promptfoo's built-in Anthropic/OpenAI providers with config.mcp only do
9
- * single-turn tool calls. This provider fills that gap by managing the
10
- * full conversation loop, similar to the agentic-provider.ts pattern.
11
- *
12
- * Promptfoo config usage:
13
- *
14
- * providers:
15
- * - id: file://dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js
16
- * label: "Claude Opus 4.6 + MCP"
17
- * config:
18
- * model: anthropic:messages:claude-opus-4-6
19
- * maxToolRounds: 5
20
- * temperature: 0.2
21
- * max_tokens: 4096
22
- * mcpServer:
23
- * url: https://mcp.sanity.io
24
- * auth: { type: bearer, token: "{{env.SANITY_API_TOKEN}}" }
25
- * name: mcp-live-query-documents
26
- * mcpTools: [query_documents, get_schema]
27
- */
28
- import { config as loadDotenv } from "dotenv";
29
- loadDotenv({
30
- override: true,
31
- path: new URL("../../../../.env", import.meta.url).pathname,
32
- });
33
- // ---------------------------------------------------------------------------
34
- // Provider
35
- // ---------------------------------------------------------------------------
36
- export default class MCPToolProvider {
37
- config;
38
- providerId;
39
- constructor(options = {}) {
40
- this.config = options.config || {};
41
- this.providerId = options.id || "mcp-tool-provider";
42
- }
43
- id() {
44
- return this.providerId;
45
- }
46
- async callApi(prompt, _context) {
47
- const mcpServerConfig = this.config.mcpServer;
48
- if (!mcpServerConfig) {
49
- return { error: "mcpServer config is required", output: undefined };
50
- }
51
- // Resolve model provider
52
- const modelId = this.config.model ||
53
- "anthropic:messages:claude-sonnet-4-20250514";
54
- if (modelId.startsWith("anthropic:")) {
55
- return this.runAnthropicLoop(prompt, mcpServerConfig, modelId);
56
- }
57
- // For now, only Anthropic is supported. OpenAI support can be added later.
58
- return {
59
- error: `MCP tool provider only supports Anthropic models for now. Got: ${modelId}`,
60
- output: undefined,
61
- };
62
- }
63
- // -------------------------------------------------------------------------
64
- // Anthropic multi-turn MCP tool loop
65
- // -------------------------------------------------------------------------
66
- async runAnthropicLoop(prompt, mcpServerConfig, modelId) {
67
- // Parse model name from provider ID (e.g., "anthropic:messages:claude-opus-4-6" → "claude-opus-4-6")
68
- const modelParts = modelId.split(":");
69
- const model = modelParts.length > 2
70
- ? modelParts.slice(2).join(":")
71
- : modelParts[modelParts.length - 1];
72
- const temperature = this.config.temperature ?? 0.2;
73
- const maxTokens = this.config.max_tokens || 4096;
74
- const maxToolRounds = this.config.maxToolRounds || 5;
75
- const apiKey = this.config.apiKey || process.env.ANTHROPIC_API_KEY;
76
- if (!apiKey) {
77
- return {
78
- error: "ANTHROPIC_API_KEY not set. Configure it in env or provider config.",
79
- output: undefined,
80
- };
81
- }
82
- // Connect to MCP server and discover tools
83
- let mcpClient;
84
- try {
85
- mcpClient = await this.connectMCP(mcpServerConfig);
86
- }
87
- catch (err) {
88
- return {
89
- error: `Failed to connect to MCP server: ${err instanceof Error ? err.message : String(err)}`,
90
- output: undefined,
91
- };
92
- }
93
- try {
94
- // Get available tools and convert to Anthropic format
95
- const mcpTools = mcpClient.getAllTools();
96
- const toolFilter = this.config.mcpTools;
97
- const filteredTools = toolFilter
98
- ? mcpTools.filter((t) => toolFilter.includes(t.name))
99
- : mcpTools;
100
- const tools = filteredTools.map((t) => ({
101
- name: t.name,
102
- description: t.description || `MCP tool: ${t.name}`,
103
- input_schema: t.inputSchema || { type: "object", properties: {} },
104
- }));
105
- if (tools.length === 0) {
106
- return {
107
- error: "No MCP tools available after filtering. Check mcpTools config and server capabilities.",
108
- output: undefined,
109
- };
110
- }
111
- /** Append a machine-readable tool call summary to output for assertion detection */
112
- function appendToolSummary(text, log) {
113
- if (log.length === 0)
114
- return text;
115
- const names = JSON.stringify(log.map((tc) => tc.name));
116
- return `${text}\n\n<!-- MCP_TOOLS_CALLED: ${names} -->`;
117
- }
118
- const systemPrompt = "You are an AI assistant with access to tools provided by an MCP server. " +
119
- "Use the available tools to complete the task. Call tools with correct parameters, " +
120
- "interpret responses, and provide a complete answer.";
121
- const messages = [{ content: prompt, role: "user" }];
122
- let inputTokens = 0;
123
- let outputTokens = 0;
124
- const startTime = Date.now();
125
- const toolCallLog = [];
126
- for (let round = 0; round <= maxToolRounds; round++) {
127
- const isLastRound = round === maxToolRounds;
128
- // On last round, omit tools to force a final text response
129
- if (isLastRound) {
130
- const lastMsg = messages[messages.length - 1];
131
- const synthesisText = "You've used the tools available. Based on the information gathered, " +
132
- "provide your complete, final answer now.";
133
- if (lastMsg?.role === "user" && Array.isArray(lastMsg.content)) {
134
- ;
135
- lastMsg.content.push({
136
- type: "text",
137
- text: synthesisText,
138
- });
139
- }
140
- else {
141
- messages.push({ content: synthesisText, role: "user" });
142
- }
143
- }
144
- const body = {
145
- max_tokens: maxTokens,
146
- messages,
147
- model,
148
- system: systemPrompt,
149
- temperature,
150
- };
151
- if (!isLastRound) {
152
- body.tools = tools;
153
- }
154
- const response = await fetch("https://api.anthropic.com/v1/messages", {
155
- body: JSON.stringify(body),
156
- headers: {
157
- "anthropic-version": "2023-06-01",
158
- "Content-Type": "application/json",
159
- "x-api-key": apiKey,
160
- },
161
- method: "POST",
162
- });
163
- const data = (await response.json());
164
- if (data.error) {
165
- return {
166
- error: data.error.message ??
167
- `Anthropic API error: ${JSON.stringify(data.error)}`,
168
- output: undefined,
169
- };
170
- }
171
- inputTokens += data.usage?.input_tokens ?? 0;
172
- outputTokens += data.usage?.output_tokens ?? 0;
173
- if (!data.content?.length) {
174
- return {
175
- cost: 0,
176
- metadata: { toolRounds: round, toolCallLog },
177
- output: "",
178
- tokenUsage: {
179
- completion: outputTokens,
180
- prompt: inputTokens,
181
- total: inputTokens + outputTokens,
182
- },
183
- };
184
- }
185
- // Add assistant response to history
186
- messages.push({ content: data.content, role: "assistant" });
187
- // Check if model wants to use tools
188
- const toolUseBlocks = data.content.filter((b) => b.type === "tool_use");
189
- if (data.stop_reason !== "tool_use" || toolUseBlocks.length === 0) {
190
- // Model is done — extract text
191
- const textBlocks = data.content.filter((b) => b.type === "text");
192
- const rawOutput = textBlocks.map((b) => b.text || "").join("\n") || "";
193
- return {
194
- cost: 0,
195
- metadata: {
196
- toolRounds: round,
197
- toolCallLog,
198
- latencyMs: Date.now() - startTime,
199
- },
200
- output: appendToolSummary(rawOutput, toolCallLog),
201
- tokenUsage: {
202
- completion: outputTokens,
203
- prompt: inputTokens,
204
- total: inputTokens + outputTokens,
205
- },
206
- };
207
- }
208
- // Execute each tool call via MCP
209
- const toolResults = [];
210
- for (const toolUse of toolUseBlocks) {
211
- const toolName = toolUse.name;
212
- const toolInput = (toolUse.input || {});
213
- try {
214
- const result = await mcpClient.callTool(toolName, toolInput);
215
- const content = result.error
216
- ? JSON.stringify({ error: result.error })
217
- : result.content;
218
- toolCallLog.push({
219
- name: toolName,
220
- input: toolInput,
221
- output: content,
222
- });
223
- toolResults.push({
224
- content,
225
- tool_use_id: toolUse.id,
226
- type: "tool_result",
227
- });
228
- }
229
- catch (err) {
230
- const errMsg = err instanceof Error ? err.message : String(err);
231
- toolCallLog.push({
232
- name: toolName,
233
- input: toolInput,
234
- output: `Error: ${errMsg}`,
235
- });
236
- toolResults.push({
237
- content: JSON.stringify({ error: errMsg }),
238
- tool_use_id: toolUse.id,
239
- type: "tool_result",
240
- });
241
- }
242
- }
243
- // Add tool results to conversation
244
- messages.push({ content: toolResults, role: "user" });
245
- }
246
- // Exhausted rounds
247
- const lastAssistant = [...messages]
248
- .reverse()
249
- .find((m) => m.role === "assistant");
250
- let lastText = "";
251
- if (lastAssistant && Array.isArray(lastAssistant.content)) {
252
- lastText = lastAssistant.content
253
- .filter((b) => b.type === "text")
254
- .map((b) => b.text || "")
255
- .join("\n");
256
- }
257
- return {
258
- cost: 0,
259
- metadata: {
260
- toolRounds: maxToolRounds,
261
- exhaustedRounds: true,
262
- toolCallLog,
263
- latencyMs: Date.now() - startTime,
264
- },
265
- output: appendToolSummary(lastText || "[Exhausted tool rounds without final answer]", toolCallLog),
266
- tokenUsage: {
267
- completion: outputTokens,
268
- prompt: inputTokens,
269
- total: inputTokens + outputTokens,
270
- },
271
- };
272
- }
273
- finally {
274
- await mcpClient.cleanup().catch(() => { });
275
- }
276
- }
277
- // -------------------------------------------------------------------------
278
- // MCP client management
279
- // -------------------------------------------------------------------------
280
- async connectMCP(serverConfig) {
281
- // Dynamically import Promptfoo's MCPClient — reuse its MCP SDK integration
282
- // rather than adding a direct dependency on @modelcontextprotocol/sdk
283
- const { Client } = await import("@modelcontextprotocol/sdk/client/index.js");
284
- const client = new Client({
285
- name: "ailf-mcp-eval",
286
- version: "1.0.0",
287
- });
288
- // Resolve auth — render {{env.VAR}} templates
289
- const resolvedConfig = this.resolveEnvTemplates(serverConfig);
290
- // Determine transport type and connect
291
- let closeTransport;
292
- if (resolvedConfig.command) {
293
- // stdio transport
294
- const { StdioClientTransport } = await import("@modelcontextprotocol/sdk/client/stdio.js");
295
- const parts = String(resolvedConfig.command).split(/\s+/);
296
- const transport = new StdioClientTransport({
297
- command: parts[0],
298
- args: parts.slice(1),
299
- env: process.env,
300
- });
301
- await client.connect(transport);
302
- closeTransport = () => transport.close();
303
- }
304
- else if (resolvedConfig.url) {
305
- // streamable-http transport
306
- const { StreamableHTTPClientTransport } = await import("@modelcontextprotocol/sdk/client/streamableHttp.js");
307
- const headers = {};
308
- const auth = resolvedConfig.auth;
309
- if (auth?.type === "bearer" && auth.token) {
310
- headers["Authorization"] = `Bearer ${auth.token}`;
311
- }
312
- const transport = new StreamableHTTPClientTransport(new URL(String(resolvedConfig.url)), { requestInit: { headers } });
313
- await client.connect(transport);
314
- closeTransport = () => transport.close();
315
- }
316
- else {
317
- throw new Error("MCP server config must have either 'command' (stdio) or 'url' (http)");
318
- }
319
- // Discover tools
320
- const { tools: toolsList } = await client.listTools();
321
- const allTools = toolsList.map((t) => ({
322
- name: t.name,
323
- description: t.description,
324
- inputSchema: t.inputSchema,
325
- }));
326
- return {
327
- getAllTools: () => allTools,
328
- callTool: async (name, args) => {
329
- const result = await client.callTool({ name, arguments: args });
330
- let content = "";
331
- if (result?.content) {
332
- if (Array.isArray(result.content)) {
333
- content = result.content
334
- .map((c) => c.text || JSON.stringify(c))
335
- .join("\n");
336
- }
337
- else {
338
- content = String(result.content);
339
- }
340
- }
341
- return { content, error: result.isError ? content : undefined };
342
- },
343
- cleanup: async () => {
344
- await closeTransport().catch(() => { });
345
- },
346
- };
347
- }
348
- /**
349
- * Resolve {{env.VAR}} templates in config values.
350
- */
351
- resolveEnvTemplates(config) {
352
- const resolved = {};
353
- for (const [key, value] of Object.entries(config)) {
354
- if (typeof value === "string") {
355
- resolved[key] = value.replace(/\{\{env\.(\w+)\}\}/g, (_, varName) => {
356
- return process.env[varName] || "";
357
- });
358
- }
359
- else if (value && typeof value === "object" && !Array.isArray(value)) {
360
- resolved[key] = this.resolveEnvTemplates(value);
361
- }
362
- else {
363
- resolved[key] = value;
364
- }
365
- }
366
- return resolved;
367
- }
368
- }
@@ -1,41 +0,0 @@
1
- /**
2
- * task-bridge.ts — Bidirectional bridge between old TaskDefinition and new LiteracyTaskDefinition.
3
- *
4
- * Enables incremental migration: consumers can convert between the two types
5
- * without changing their internal logic. Once all consumers use
6
- * GeneralizedTaskDefinition, this module is deleted (Wave 3 task 6).
7
- *
8
- * Field mapping (TaskDefinition ↔ LiteracyTaskDefinition):
9
- * id ↔ id
10
- * description ↔ title
11
- * featureArea ↔ area
12
- * taskPrompt ↔ prompt.text (fallback: prompt.template)
13
- * canonicalDocs ↔ context.docs
14
- * referenceSolution ↔ referenceSolution
15
- * docCoverage ↔ docCoverage
16
- * assertions ↔ assertions (structurally identical)
17
- * baseline ↔ baseline (structurally identical)
18
- * tags ↔ tags
19
- * status ↔ status
20
- * extraVars ↔ prompt.vars
21
- *
22
- * The assertion and doc-ref sub-types are structurally identical between
23
- * the old and new type systems, so no field-level remapping is needed
24
- * for those — only a TypeScript-level cast.
25
- */
26
- import type { LiteracyTaskDefinition, TaskDefinition } from "../../_vendor/ailf-core/index.d.ts";
27
- /**
28
- * Convert an old-style TaskDefinition to the new LiteracyTaskDefinition.
29
- *
30
- * Every field of TaskDefinition has a corresponding field in LiteracyTaskDefinition,
31
- * so this conversion is lossless.
32
- */
33
- export declare function toGeneralized(task: TaskDefinition): LiteracyTaskDefinition;
34
- /**
35
- * Convert a new LiteracyTaskDefinition to the old TaskDefinition shape.
36
- *
37
- * Fields that only exist on LiteracyTaskDefinition (description, difficulty,
38
- * metadata, rubric, providers, options, context.fixtures, prompt.systemMessage)
39
- * are dropped — the old type has no place for them.
40
- */
41
- export declare function toLiteracyTask(task: LiteracyTaskDefinition): TaskDefinition;
@@ -1,92 +0,0 @@
1
- /**
2
- * task-bridge.ts — Bidirectional bridge between old TaskDefinition and new LiteracyTaskDefinition.
3
- *
4
- * Enables incremental migration: consumers can convert between the two types
5
- * without changing their internal logic. Once all consumers use
6
- * GeneralizedTaskDefinition, this module is deleted (Wave 3 task 6).
7
- *
8
- * Field mapping (TaskDefinition ↔ LiteracyTaskDefinition):
9
- * id ↔ id
10
- * description ↔ title
11
- * featureArea ↔ area
12
- * taskPrompt ↔ prompt.text (fallback: prompt.template)
13
- * canonicalDocs ↔ context.docs
14
- * referenceSolution ↔ referenceSolution
15
- * docCoverage ↔ docCoverage
16
- * assertions ↔ assertions (structurally identical)
17
- * baseline ↔ baseline (structurally identical)
18
- * tags ↔ tags
19
- * status ↔ status
20
- * extraVars ↔ prompt.vars
21
- *
22
- * The assertion and doc-ref sub-types are structurally identical between
23
- * the old and new type systems, so no field-level remapping is needed
24
- * for those — only a TypeScript-level cast.
25
- */
26
- // ---------------------------------------------------------------------------
27
- // toGeneralized — old TaskDefinition → LiteracyTaskDefinition
28
- // ---------------------------------------------------------------------------
29
- /**
30
- * Convert an old-style TaskDefinition to the new LiteracyTaskDefinition.
31
- *
32
- * Every field of TaskDefinition has a corresponding field in LiteracyTaskDefinition,
33
- * so this conversion is lossless.
34
- */
35
- export function toGeneralized(task) {
36
- const result = {
37
- mode: "literacy",
38
- id: task.id,
39
- title: task.description,
40
- area: task.featureArea,
41
- prompt: {
42
- text: task.taskPrompt,
43
- ...(task.extraVars != null ? { vars: task.extraVars } : {}),
44
- },
45
- context: {
46
- docs: task.canonicalDocs,
47
- },
48
- referenceSolution: task.referenceSolution,
49
- docCoverage: task.docCoverage,
50
- assertions: task.assertions,
51
- };
52
- // Only set optional fields when present to preserve round-trip identity
53
- if (task.baseline != null)
54
- result.baseline = task.baseline;
55
- if (task.tags != null)
56
- result.tags = task.tags;
57
- if (task.status != null)
58
- result.status = task.status;
59
- return result;
60
- }
61
- // ---------------------------------------------------------------------------
62
- // toLiteracyTask — LiteracyTaskDefinition → old TaskDefinition
63
- // ---------------------------------------------------------------------------
64
- /**
65
- * Convert a new LiteracyTaskDefinition to the old TaskDefinition shape.
66
- *
67
- * Fields that only exist on LiteracyTaskDefinition (description, difficulty,
68
- * metadata, rubric, providers, options, context.fixtures, prompt.systemMessage)
69
- * are dropped — the old type has no place for them.
70
- */
71
- export function toLiteracyTask(task) {
72
- const result = {
73
- id: task.id,
74
- description: task.title,
75
- featureArea: task.area ?? "",
76
- taskPrompt: task.prompt?.text ?? task.prompt?.template ?? "",
77
- canonicalDocs: (task.context?.docs ?? []),
78
- referenceSolution: task.referenceSolution ?? "",
79
- docCoverage: task.docCoverage ?? false,
80
- assertions: (task.assertions ?? []),
81
- };
82
- // Only set optional fields when present to preserve round-trip identity
83
- if (task.baseline != null)
84
- result.baseline = task.baseline;
85
- if (task.tags != null)
86
- result.tags = task.tags;
87
- if (task.status != null)
88
- result.status = task.status;
89
- if (task.prompt?.vars != null)
90
- result.extraVars = task.prompt.vars;
91
- return result;
92
- }