@superblocksteam/vite-plugin-file-sync 2.0.67 → 2.0.68-next.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai-service/agent/tools/apis/analysis.d.ts.map +1 -1
- package/dist/ai-service/agent/tools/apis/analysis.js +4 -0
- package/dist/ai-service/agent/tools/apis/analysis.js.map +1 -1
- package/dist/ai-service/agent/tools/apis/api-executor.d.ts +9 -1
- package/dist/ai-service/agent/tools/apis/api-executor.d.ts.map +1 -1
- package/dist/ai-service/agent/tools/apis/api-executor.js +4 -1
- package/dist/ai-service/agent/tools/apis/api-executor.js.map +1 -1
- package/dist/ai-service/agent/tools/apis/api-validation-orchestrator.d.ts +1 -0
- package/dist/ai-service/agent/tools/apis/api-validation-orchestrator.d.ts.map +1 -1
- package/dist/ai-service/agent/tools/apis/api-validation-orchestrator.js +1 -1
- package/dist/ai-service/agent/tools/apis/api-validation-orchestrator.js.map +1 -1
- package/dist/ai-service/agent/tools/apis/test-api.d.ts +5 -0
- package/dist/ai-service/agent/tools/apis/test-api.d.ts.map +1 -1
- package/dist/ai-service/agent/tools/apis/test-api.js +9 -1
- package/dist/ai-service/agent/tools/apis/test-api.js.map +1 -1
- package/dist/ai-service/agent/tools2/tools/read.d.ts +1 -1
- package/dist/ai-service/index.d.ts +23 -2
- package/dist/ai-service/index.d.ts.map +1 -1
- package/dist/ai-service/index.js +99 -0
- package/dist/ai-service/index.js.map +1 -1
- package/dist/ai-service/judge/debug-browser.d.ts +8 -0
- package/dist/ai-service/judge/debug-browser.d.ts.map +1 -0
- package/dist/ai-service/judge/debug-browser.js +79 -0
- package/dist/ai-service/judge/debug-browser.js.map +1 -0
- package/dist/ai-service/judge/index.d.ts +12 -0
- package/dist/ai-service/judge/index.d.ts.map +1 -0
- package/dist/ai-service/judge/index.js +11 -0
- package/dist/ai-service/judge/index.js.map +1 -0
- package/dist/ai-service/judge/integration/mcp-client.d.ts +82 -0
- package/dist/ai-service/judge/integration/mcp-client.d.ts.map +1 -0
- package/dist/ai-service/judge/integration/mcp-client.js +276 -0
- package/dist/ai-service/judge/integration/mcp-client.js.map +1 -0
- package/dist/ai-service/judge/integration/playwright-bridge.d.ts +142 -0
- package/dist/ai-service/judge/integration/playwright-bridge.d.ts.map +1 -0
- package/dist/ai-service/judge/integration/playwright-bridge.js +217 -0
- package/dist/ai-service/judge/integration/playwright-bridge.js.map +1 -0
- package/dist/ai-service/judge/judge-eval-http.d.ts +3 -0
- package/dist/ai-service/judge/judge-eval-http.d.ts.map +1 -0
- package/dist/ai-service/judge/judge-eval-http.js +541 -0
- package/dist/ai-service/judge/judge-eval-http.js.map +1 -0
- package/dist/ai-service/judge/judge-eval-service-runner.d.ts +35 -0
- package/dist/ai-service/judge/judge-eval-service-runner.d.ts.map +1 -0
- package/dist/ai-service/judge/judge-eval-service-runner.js +124 -0
- package/dist/ai-service/judge/judge-eval-service-runner.js.map +1 -0
- package/dist/ai-service/judge/judge-executor.d.ts +65 -0
- package/dist/ai-service/judge/judge-executor.d.ts.map +1 -0
- package/dist/ai-service/judge/judge-executor.js +334 -0
- package/dist/ai-service/judge/judge-executor.js.map +1 -0
- package/dist/ai-service/judge/judge-service.d.ts +161 -0
- package/dist/ai-service/judge/judge-service.d.ts.map +1 -0
- package/dist/ai-service/judge/judge-service.js +241 -0
- package/dist/ai-service/judge/judge-service.js.map +1 -0
- package/dist/ai-service/judge/prompts/evaluation-criteria.d.ts +37 -0
- package/dist/ai-service/judge/prompts/evaluation-criteria.d.ts.map +1 -0
- package/dist/ai-service/judge/prompts/evaluation-criteria.js +283 -0
- package/dist/ai-service/judge/prompts/evaluation-criteria.js.map +1 -0
- package/dist/ai-service/judge/prompts/system-prompt.d.ts +30 -0
- package/dist/ai-service/judge/prompts/system-prompt.d.ts.map +1 -0
- package/dist/ai-service/judge/prompts/system-prompt.js +212 -0
- package/dist/ai-service/judge/prompts/system-prompt.js.map +1 -0
- package/dist/ai-service/judge/storage/csv-storage.d.ts +99 -0
- package/dist/ai-service/judge/storage/csv-storage.d.ts.map +1 -0
- package/dist/ai-service/judge/storage/csv-storage.js +274 -0
- package/dist/ai-service/judge/storage/csv-storage.js.map +1 -0
- package/dist/ai-service/judge/storage/index.d.ts +9 -0
- package/dist/ai-service/judge/storage/index.d.ts.map +1 -0
- package/dist/ai-service/judge/storage/index.js +7 -0
- package/dist/ai-service/judge/storage/index.js.map +1 -0
- package/dist/ai-service/judge/storage/interface.d.ts +51 -0
- package/dist/ai-service/judge/storage/interface.d.ts.map +1 -0
- package/dist/ai-service/judge/storage/interface.js +8 -0
- package/dist/ai-service/judge/storage/interface.js.map +1 -0
- package/dist/ai-service/judge/storage/types.d.ts +54 -0
- package/dist/ai-service/judge/storage/types.d.ts.map +1 -0
- package/dist/ai-service/judge/storage/types.js +7 -0
- package/dist/ai-service/judge/storage/types.js.map +1 -0
- package/dist/ai-service/judge/tools/index.d.ts +22 -0
- package/dist/ai-service/judge/tools/index.d.ts.map +1 -0
- package/dist/ai-service/judge/tools/index.js +29 -0
- package/dist/ai-service/judge/tools/index.js.map +1 -0
- package/dist/ai-service/judge/tools/playwright-action.d.ts +18 -0
- package/dist/ai-service/judge/tools/playwright-action.d.ts.map +1 -0
- package/dist/ai-service/judge/tools/playwright-action.js +171 -0
- package/dist/ai-service/judge/tools/playwright-action.js.map +1 -0
- package/dist/ai-service/judge/tools/submit-feedback.d.ts +41 -0
- package/dist/ai-service/judge/tools/submit-feedback.d.ts.map +1 -0
- package/dist/ai-service/judge/tools/submit-feedback.js +150 -0
- package/dist/ai-service/judge/tools/submit-feedback.js.map +1 -0
- package/dist/ai-service/judge/types.d.ts +169 -0
- package/dist/ai-service/judge/types.d.ts.map +1 -0
- package/dist/ai-service/judge/types.js +8 -0
- package/dist/ai-service/judge/types.js.map +1 -0
- package/dist/ai-service/llm/interaction/adapters/vercel.d.ts.map +1 -1
- package/dist/ai-service/llm/interaction/adapters/vercel.js.map +1 -1
- package/dist/ai-service/llm/interaction/provider.d.ts +10 -9
- package/dist/ai-service/llm/interaction/provider.d.ts.map +1 -1
- package/dist/ai-service/llmobs/middleware/stream-text.d.ts +8 -8
- package/dist/ai-service/llmobs/middleware/stream-text.d.ts.map +1 -1
- package/dist/ai-service/llmobs/middleware/stream-text.js.map +1 -1
- package/dist/ai-service/llmobs/tracer.d.ts.map +1 -1
- package/dist/ai-service/llmobs/tracer.js +2 -1
- package/dist/ai-service/llmobs/tracer.js.map +1 -1
- package/dist/ai-service/mcp/embedded-playwright-mcp-server.d.ts +53 -0
- package/dist/ai-service/mcp/embedded-playwright-mcp-server.d.ts.map +1 -0
- package/dist/ai-service/mcp/embedded-playwright-mcp-server.js +541 -0
- package/dist/ai-service/mcp/embedded-playwright-mcp-server.js.map +1 -0
- package/dist/ai-service/mcp/playwright-server.d.ts +114 -0
- package/dist/ai-service/mcp/playwright-server.d.ts.map +1 -0
- package/dist/ai-service/mcp/playwright-server.js +109 -0
- package/dist/ai-service/mcp/playwright-server.js.map +1 -0
- package/dist/server-rpc/client.js +1 -1
- package/dist/server-rpc/client.js.map +1 -1
- package/dist/socket-manager.d.ts.map +1 -1
- package/dist/socket-manager.js +8 -0
- package/dist/socket-manager.js.map +1 -1
- package/package.json +7 -6
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Playwright action tool for browser automation via MCP.
|
|
3
|
+
*
|
|
4
|
+
* Provides the judge with browser automation capabilities
|
|
5
|
+
* to interact with and evaluate the generated application.
|
|
6
|
+
*/
|
|
7
|
+
import { z } from "zod";
|
|
8
|
+
import { createToolFactory, ToolCategory } from "../../agent/tools2/types.js";
|
|
9
|
+
/**
|
|
10
|
+
* Input schema for Playwright action tool.
|
|
11
|
+
*/
|
|
12
|
+
const playwrightActionSchema = z.object({
|
|
13
|
+
action: z
|
|
14
|
+
.enum([
|
|
15
|
+
"navigate",
|
|
16
|
+
"click",
|
|
17
|
+
"fill",
|
|
18
|
+
"screenshot", // currently disabled, leads to prompt_too_long errors
|
|
19
|
+
"getText",
|
|
20
|
+
"waitForSelector",
|
|
21
|
+
"evaluate",
|
|
22
|
+
"getUrl",
|
|
23
|
+
"reload",
|
|
24
|
+
])
|
|
25
|
+
.describe("The Playwright action to perform"),
|
|
26
|
+
params: z.record(z.any()).describe("Action-specific parameters").optional(),
|
|
27
|
+
});
|
|
28
|
+
/**
|
|
29
|
+
* Creates a Playwright action tool factory.
|
|
30
|
+
*
|
|
31
|
+
* This tool allows the judge to perform browser automation
|
|
32
|
+
* actions through the Playwright MCP server.
|
|
33
|
+
*/
|
|
34
|
+
export const playwrightActionToolFactory = createToolFactory("playwright_action", (playwrightBridge) => ({
|
|
35
|
+
description: `Execute browser automation actions via Playwright.
|
|
36
|
+
|
|
37
|
+
Available actions:
|
|
38
|
+
- navigate: Go to a URL (params: { url: string })
|
|
39
|
+
- click: Click an element (params: { selector: string })
|
|
40
|
+
- fill: Fill an input field (params: { selector: string, value: string })
|
|
41
|
+
- screenshot: Capture screenshot (params: { fullPage?: boolean })
|
|
42
|
+
- getText: Get text from element (params: { selector: string })
|
|
43
|
+
- waitForSelector: Wait for element (params: { selector: string, timeout?: number })
|
|
44
|
+
- evaluate: Run JavaScript in browser context (params: { script: string }) - NOTE: Use plain JavaScript only, NO import/export/require statements
|
|
45
|
+
- getUrl: Get current URL (no params)
|
|
46
|
+
- reload: Reload page (no params)
|
|
47
|
+
|
|
48
|
+
Examples:
|
|
49
|
+
- Navigate: { action: "navigate", params: { url: "http://localhost:3000" } }
|
|
50
|
+
- Click button: { action: "click", params: { selector: "button[type=submit]" } }
|
|
51
|
+
- Fill input: { action: "fill", params: { selector: "#email", value: "test@example.com" } }
|
|
52
|
+
- Get text: { action: "getText", params: { selector: "h1" } }
|
|
53
|
+
- Screenshot: { action: "screenshot", params: { fullPage: true } }`,
|
|
54
|
+
category: ToolCategory.GENERAL,
|
|
55
|
+
inputSchema: playwrightActionSchema,
|
|
56
|
+
// This is a read-only tool from the filesystem perspective
|
|
57
|
+
readOnly: true,
|
|
58
|
+
toModelOutput: (output) => {
|
|
59
|
+
if ("screenshot" in output) {
|
|
60
|
+
return {
|
|
61
|
+
type: "content",
|
|
62
|
+
value: [
|
|
63
|
+
{
|
|
64
|
+
type: "media",
|
|
65
|
+
data: output.screenshot,
|
|
66
|
+
mediaType: "image/png",
|
|
67
|
+
},
|
|
68
|
+
],
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
else {
|
|
72
|
+
return {
|
|
73
|
+
type: "json",
|
|
74
|
+
value: output,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
},
|
|
78
|
+
execute: async ({ action, params = {} }) => {
|
|
79
|
+
console.log("🎭 Playwright Tool: Executing action", {
|
|
80
|
+
action,
|
|
81
|
+
hasParams: Object.keys(params).length > 0,
|
|
82
|
+
paramKeys: Object.keys(params),
|
|
83
|
+
});
|
|
84
|
+
// Pre-validate evaluate action to provide better error messages
|
|
85
|
+
if (action === "evaluate" && params.script) {
|
|
86
|
+
const script = params.script;
|
|
87
|
+
console.log("🎭 Playwright Tool: Evaluate action detected", {
|
|
88
|
+
scriptLength: script.length,
|
|
89
|
+
scriptPreview: script.slice(0, 100),
|
|
90
|
+
});
|
|
91
|
+
if (script.includes("import ") ||
|
|
92
|
+
script.includes("export ") ||
|
|
93
|
+
script.includes("require(")) {
|
|
94
|
+
console.log("🎭 Playwright Tool: Detected import/export/require in script, returning error");
|
|
95
|
+
// Return instructive error instead of failing
|
|
96
|
+
return {
|
|
97
|
+
success: false,
|
|
98
|
+
error: "Your JavaScript code contains import/export/require statements which cannot run in a browser console. Please rewrite using plain JavaScript. For example: instead of 'import React from \"react\"', check if React exists with 'typeof window.React !== \"undefined\"'. Use document.querySelector() to find elements.",
|
|
99
|
+
suggestion: "Try using browser APIs directly: document.querySelector, window.location, etc.",
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
console.log("🎭 Playwright Tool: Calling playwrightBridge.executeAction");
|
|
104
|
+
const result = await playwrightBridge.executeAction(action, params);
|
|
105
|
+
if (!result.success) {
|
|
106
|
+
// Return error to model so it can retry with corrected approach
|
|
107
|
+
return {
|
|
108
|
+
success: false,
|
|
109
|
+
error: result.error || "Action failed",
|
|
110
|
+
suggestion: action === "evaluate"
|
|
111
|
+
? "Make sure your JavaScript code is browser-compatible (no imports/exports)"
|
|
112
|
+
: undefined,
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
// Format response based on action type
|
|
116
|
+
switch (action) {
|
|
117
|
+
case "navigate":
|
|
118
|
+
return {
|
|
119
|
+
success: true,
|
|
120
|
+
message: `Navigated to ${params.url}`,
|
|
121
|
+
};
|
|
122
|
+
case "click":
|
|
123
|
+
return {
|
|
124
|
+
success: true,
|
|
125
|
+
message: `Clicked element: ${params.selector}`,
|
|
126
|
+
};
|
|
127
|
+
case "fill":
|
|
128
|
+
return {
|
|
129
|
+
success: true,
|
|
130
|
+
message: `Filled ${params.selector} with value`,
|
|
131
|
+
};
|
|
132
|
+
case "screenshot":
|
|
133
|
+
// Screenshots are currently disabled to prevent prompt_too_long errors
|
|
134
|
+
return {
|
|
135
|
+
success: false,
|
|
136
|
+
message: "tool is unavailable",
|
|
137
|
+
};
|
|
138
|
+
case "getText":
|
|
139
|
+
return {
|
|
140
|
+
success: true,
|
|
141
|
+
text: result.data,
|
|
142
|
+
};
|
|
143
|
+
case "getUrl":
|
|
144
|
+
return {
|
|
145
|
+
success: true,
|
|
146
|
+
url: result.data,
|
|
147
|
+
};
|
|
148
|
+
case "evaluate":
|
|
149
|
+
return {
|
|
150
|
+
success: true,
|
|
151
|
+
result: result.data,
|
|
152
|
+
};
|
|
153
|
+
case "waitForSelector":
|
|
154
|
+
return {
|
|
155
|
+
success: true,
|
|
156
|
+
message: `Element found: ${params.selector}`,
|
|
157
|
+
};
|
|
158
|
+
case "reload":
|
|
159
|
+
return {
|
|
160
|
+
success: true,
|
|
161
|
+
message: "Page reloaded",
|
|
162
|
+
};
|
|
163
|
+
default:
|
|
164
|
+
return {
|
|
165
|
+
success: true,
|
|
166
|
+
data: result.data,
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
},
|
|
170
|
+
}));
|
|
171
|
+
//# sourceMappingURL=playwright-action.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"playwright-action.js","sourceRoot":"","sources":["../../../../src/ai-service/judge/tools/playwright-action.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,iBAAiB,EAAE,YAAY,EAAE,MAAM,6BAA6B,CAAC;AAG9E;;GAEG;AACH,MAAM,sBAAsB,GAAG,CAAC,CAAC,MAAM,CAAC;IACtC,MAAM,EAAE,CAAC;SACN,IAAI,CAAC;QACJ,UAAU;QACV,OAAO;QACP,MAAM;QACN,YAAY,EAAE,sDAAsD;QACpE,SAAS;QACT,iBAAiB;QACjB,UAAU;QACV,QAAQ;QACR,QAAQ;KACT,CAAC;SACD,QAAQ,CAAC,kCAAkC,CAAC;IAE/C,MAAM,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,QAAQ,CAAC,4BAA4B,CAAC,CAAC,QAAQ,EAAE;CAC5E,CAAC,CAAC;AAEH;;;;;GAKG;AACH,MAAM,CAAC,MAAM,2BAA2B,GAAG,iBAAiB,CAC1D,mBAAmB,EACnB,CAAC,gBAAkC,EAAE,EAAE,CAAC,CAAC;IACvC,WAAW,EAAE;;;;;;;;;;;;;;;;;;mEAkBkD;IAE/D,QAAQ,EAAE,YAAY,CAAC,OAAO;IAE9B,WAAW,EAAE,sBAAsB;IAEnC,2DAA2D;IAC3D,QAAQ,EAAE,IAAI;IACd,aAAa,EAAE,CAAC,MAAM,EAAE,EAAE;QACxB,IAAI,YAAY,IAAI,MAAM,EAAE,CAAC;YAC3B,OAAO;gBACL,IAAI,EAAE,SAAS;gBACf,KAAK,EAAE;oBACL;wBACE,IAAI,EAAE,OAAO;wBACb,IAAI,EAAE,MAAM,CAAC,UAAU;wBACvB,SAAS,EAAE,WAAW;qBACvB;iBACF;aACF,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,OAAO;gBACL,IAAI,EAAE,MAAM;gBACZ,KAAK,EAAE,MAAM;aACd,CAAC;QACJ,CAAC;IACH,CAAC;IACD,OAAO,EAAE,KAAK,EAAE,EAAE,MAAM,EAAE,MAAM,GAAG,EAAE,EAAE,EAAE,EAAE;QACzC,OAAO,CAAC,GAAG,CAAC,sCAAsC,EAAE;YAClD,MAAM;YACN,SAAS,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,MAAM,GAAG,CAAC;YACzC,SAAS,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC;SAC/B,CAAC,CAAC;QAEH,gEAAgE;QAChE,IAAI,MAAM,KAAK,UAAU,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAC3C,MAAM,MAAM,GAAG,MAAM,CAAC,MAAgB,CAAC;YACvC,OAAO,CAAC,GAAG,CAAC,8CAA8C,EAAE;gBAC1D,YAAY,EAAE,MAAM,CAAC,MAAM;gBAC3B,aAAa,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;aACpC,CAAC,CAAC;YAEH,IACE,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC;gBAC1B,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC;gBAC1B,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAC,EAC3B,CAAC;gBACD,OAAO,CAAC,GAAG,CACT,+EAA+E,CAChF,CAAC;gBACF,8CAA8C;gBAC9C,OAAO;oBACL,OAAO,EAAE,KAAK;oBACd,KAAK,EACH,wTAAwT;oBAC1T,UAAU,EACR,gFAAgF;iBACnF,CAAC;YACJ,CAAC;QACH,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,4DAA4D,CAAC,CAAC;QAC1E,MAAM,MAAM,GAAG,MAAM,gBAAgB,CAAC,aAAa,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QAEpE,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACpB,gEAAgE;YAChE,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,KAAK,EAAE,MAAM,CAAC,KAAK,IAAI,eAAe;gBACtC,UAAU,EACR,MAAM,KAAK,UAAU;oBACnB,CAAC,CAAC,2EAA2E;oBAC7E,CAAC,CAAC,SAAS;aAChB,CAAC;QACJ,CAAC;QAED,uCAAuC;QACvC,QAAQ,MAAM,EAAE,CAAC;YACf,KAAK,UAAU;gBACb,OAAO;oBACL,OAAO,EAAE,IAAI;oBACb,OAAO,EAAE,gBAAgB,MAAM,CAAC,GAAG,EAAE;iBACtC,CAAC;YAEJ,KAAK,OAAO;gBACV,OAAO;oBACL,OAAO,EAAE,IAAI;oBACb,OAAO,EAAE,oBAAoB,MAAM,CAAC,QAAQ,EAAE;iBAC/C,CAAC;YAEJ,KAAK,MAAM;gBACT,OAAO;oBACL,OAAO,EAAE,IAAI;oBACb,OAAO,EAAE,UAAU,MAAM,CAAC,QAAQ,aAAa;iBAChD,CAAC;YAEJ,KAAK,YAAY;gBACf,uEAAuE;gBACvE,OAAO;oBACL,OAAO,EAAE,KAAK;oBACd,OAAO,EAAE,qBAAqB;iBAC/B,CAAC;YAEJ,KAAK,SAAS;gBACZ,OAAO;oBACL,OAAO,EAAE,IAAI;oBACb,IAAI,EAAE,MAAM,CAAC,IAAI;iBAClB,CAAC;YAEJ,KAAK,QAAQ;gBACX,OAAO;oBACL,OAAO,EAAE,IAAI;oBACb,GAAG,EAAE,MAAM,CAAC,IAAI;iBACjB,CAAC;YAEJ,KAAK,UAAU;gBACb,OAAO;oBACL,OAAO,EAAE,IAAI;oBACb,MAAM,EAAE,MAAM,CAAC,IAAI;iBACpB,CAAC;YAEJ,KAAK,iBAAiB;gBACpB,OAAO;oBACL,OAAO,EAAE,IAAI;oBACb,OAAO,EAAE,kBAAkB,MAAM,CAAC,QAAQ,EAAE;iBAC7C,CAAC;YAEJ,KAAK,QAAQ;gBACX,OAAO;oBACL,OAAO,EAAE,IAAI;oBACb,OAAO,EAAE,eAAe;iBACzB,CAAC;YAEJ;gBACE,OAAO;oBACL,OAAO,EAAE,IAAI;oBACb,IAAI,EAAE,MAAM,CAAC,IAAI;iBAClB,CAAC;QACN,CAAC;IACH,CAAC;CACF,CAAC,CACH,CAAC"}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Feedback submission tool for judge evaluation.
|
|
3
|
+
*
|
|
4
|
+
* Allows the judge to submit structured evaluation results
|
|
5
|
+
* after assessing the generated application.
|
|
6
|
+
*/
|
|
7
|
+
import type { JudgeEvaluation } from "../types.js";
|
|
8
|
+
/**
|
|
9
|
+
* Creates a feedback submission tool factory.
|
|
10
|
+
*
|
|
11
|
+
* This tool allows the judge to submit a comprehensive
|
|
12
|
+
* evaluation after testing the application.
|
|
13
|
+
*/
|
|
14
|
+
export declare const submitFeedbackToolFactory: import("../../agent/tools2/types.js").ToolFactory<unknown, import("../../agent/tools2/types.js").Tool<{
|
|
15
|
+
summary: string;
|
|
16
|
+
passed: boolean;
|
|
17
|
+
suggestions: string[];
|
|
18
|
+
overallScore: number;
|
|
19
|
+
criteriaEvaluations: {
|
|
20
|
+
reasoning: string;
|
|
21
|
+
criterion: string;
|
|
22
|
+
passed: boolean;
|
|
23
|
+
score: number;
|
|
24
|
+
evidence?: string | undefined;
|
|
25
|
+
suggestions?: string[] | undefined;
|
|
26
|
+
}[];
|
|
27
|
+
strengths?: string[] | undefined;
|
|
28
|
+
weaknesses?: string[] | undefined;
|
|
29
|
+
confidence?: number | undefined;
|
|
30
|
+
}, {
|
|
31
|
+
success: boolean;
|
|
32
|
+
evaluation: JudgeEvaluation;
|
|
33
|
+
}>>;
|
|
34
|
+
/**
|
|
35
|
+
* Helper function to validate evaluation completeness.
|
|
36
|
+
*
|
|
37
|
+
* @param evaluation - Evaluation to validate
|
|
38
|
+
* @returns True if evaluation is complete
|
|
39
|
+
*/
|
|
40
|
+
export declare function isEvaluationComplete(evaluation: JudgeEvaluation): boolean;
|
|
41
|
+
//# sourceMappingURL=submit-feedback.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"submit-feedback.d.ts","sourceRoot":"","sources":["../../../../src/ai-service/judge/tools/submit-feedback.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAwDnD;;;;;GAKG;AACH,eAAO,MAAM,yBAAyB;;;;;;;;;;;;;;;;;aAkDZ,OAAO;gBAAc,eAAe;GA0C7D,CAAC;AAEF;;;;;GAKG;AACH,wBAAgB,oBAAoB,CAAC,UAAU,EAAE,eAAe,GAAG,OAAO,CAQzE"}
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Feedback submission tool for judge evaluation.
|
|
3
|
+
*
|
|
4
|
+
* Allows the judge to submit structured evaluation results
|
|
5
|
+
* after assessing the generated application.
|
|
6
|
+
*/
|
|
7
|
+
import { z } from "zod";
|
|
8
|
+
import { createToolFactory, ToolCategory } from "../../agent/tools2/types.js";
|
|
9
|
+
/**
|
|
10
|
+
* Schema for criterion evaluation input.
|
|
11
|
+
*/
|
|
12
|
+
const criterionEvaluationSchema = z.object({
|
|
13
|
+
criterion: z.string().describe("The criterion being evaluated"),
|
|
14
|
+
passed: z.boolean().describe("Whether the criterion passed"),
|
|
15
|
+
score: z.number().min(0).max(100).describe("Score from 0-100"),
|
|
16
|
+
reasoning: z.string().describe("Detailed reasoning for the evaluation"),
|
|
17
|
+
evidence: z
|
|
18
|
+
.string()
|
|
19
|
+
.optional()
|
|
20
|
+
.describe("Supporting evidence (e.g., screenshot URL)"),
|
|
21
|
+
suggestions: z
|
|
22
|
+
.array(z.string())
|
|
23
|
+
.optional()
|
|
24
|
+
.describe("Improvement suggestions"),
|
|
25
|
+
});
|
|
26
|
+
/**
|
|
27
|
+
* Schema for feedback submission input.
|
|
28
|
+
*/
|
|
29
|
+
const submitFeedbackSchema = z.object({
|
|
30
|
+
overallScore: z.number().min(0).max(100).describe("Overall score from 0-100"),
|
|
31
|
+
passed: z.boolean().describe("Overall pass/fail determination"),
|
|
32
|
+
criteriaEvaluations: z
|
|
33
|
+
.array(criterionEvaluationSchema)
|
|
34
|
+
.describe("Individual criterion evaluations"),
|
|
35
|
+
summary: z.string().describe("Executive summary of the evaluation"),
|
|
36
|
+
suggestions: z
|
|
37
|
+
.array(z.string())
|
|
38
|
+
.describe("High-level suggestions for improvement"),
|
|
39
|
+
strengths: z
|
|
40
|
+
.array(z.string())
|
|
41
|
+
.optional()
|
|
42
|
+
.describe("Identified strengths in the implementation"),
|
|
43
|
+
weaknesses: z
|
|
44
|
+
.array(z.string())
|
|
45
|
+
.optional()
|
|
46
|
+
.describe("Identified weaknesses in the implementation"),
|
|
47
|
+
confidence: z
|
|
48
|
+
.number()
|
|
49
|
+
.min(0)
|
|
50
|
+
.max(1)
|
|
51
|
+
.optional()
|
|
52
|
+
.describe("Confidence level of the evaluation (0-1)"),
|
|
53
|
+
});
|
|
54
|
+
/**
|
|
55
|
+
* Creates a feedback submission tool factory.
|
|
56
|
+
*
|
|
57
|
+
* This tool allows the judge to submit a comprehensive
|
|
58
|
+
* evaluation after testing the application.
|
|
59
|
+
*/
|
|
60
|
+
export const submitFeedbackToolFactory = createToolFactory("submitFeedback", () => ({
|
|
61
|
+
description: `Submit your final evaluation of the application.
|
|
62
|
+
|
|
63
|
+
Call this tool once you have:
|
|
64
|
+
1. Tested all functional requirements
|
|
65
|
+
2. Verified UI/UX requirements
|
|
66
|
+
3. Checked data integration
|
|
67
|
+
4. Gathered sufficient evidence
|
|
68
|
+
|
|
69
|
+
Provide a comprehensive assessment with:
|
|
70
|
+
- Overall score (0-100)
|
|
71
|
+
- Pass/fail determination
|
|
72
|
+
- Individual criterion evaluations with scores and reasoning
|
|
73
|
+
- Executive summary
|
|
74
|
+
- Specific suggestions for improvement
|
|
75
|
+
- Identified strengths and weaknesses
|
|
76
|
+
- Confidence level in your evaluation
|
|
77
|
+
|
|
78
|
+
Example:
|
|
79
|
+
{
|
|
80
|
+
"overallScore": 85,
|
|
81
|
+
"passed": true,
|
|
82
|
+
"criteriaEvaluations": [
|
|
83
|
+
{
|
|
84
|
+
"criterion": "User can create new records",
|
|
85
|
+
"passed": true,
|
|
86
|
+
"score": 90,
|
|
87
|
+
"reasoning": "The create functionality works well with proper validation",
|
|
88
|
+
"evidence": "screenshot_create_form.png",
|
|
89
|
+
"suggestions": ["Add confirmation dialog for create action"]
|
|
90
|
+
}
|
|
91
|
+
],
|
|
92
|
+
"summary": "The application successfully implements core CRUD operations with good UX",
|
|
93
|
+
"suggestions": ["Improve error handling", "Add loading states"],
|
|
94
|
+
"strengths": ["Clean UI", "Responsive design", "Good data validation"],
|
|
95
|
+
"weaknesses": ["Limited error feedback", "No undo functionality"],
|
|
96
|
+
"confidence": 0.9
|
|
97
|
+
}`,
|
|
98
|
+
category: ToolCategory.GENERAL,
|
|
99
|
+
inputSchema: submitFeedbackSchema,
|
|
100
|
+
// This is a write operation in terms of evaluation
|
|
101
|
+
readOnly: false,
|
|
102
|
+
execute: async (input) => {
|
|
103
|
+
// Validate that we have at least one criterion evaluation
|
|
104
|
+
if (input.criteriaEvaluations.length === 0) {
|
|
105
|
+
throw new Error("At least one criterion evaluation is required");
|
|
106
|
+
}
|
|
107
|
+
// Calculate weighted score if not matching overall
|
|
108
|
+
const avgScore = input.criteriaEvaluations.reduce((sum, e) => sum + e.score, 0) /
|
|
109
|
+
input.criteriaEvaluations.length;
|
|
110
|
+
// Warn if overall score differs significantly from average
|
|
111
|
+
if (Math.abs(input.overallScore - avgScore) > 20) {
|
|
112
|
+
console.warn(`Overall score (${input.overallScore}) differs significantly from average criterion score (${avgScore})`);
|
|
113
|
+
}
|
|
114
|
+
// Construct the evaluation object
|
|
115
|
+
const evaluation = {
|
|
116
|
+
overallScore: input.overallScore,
|
|
117
|
+
passed: input.passed,
|
|
118
|
+
criteriaEvaluations: input.criteriaEvaluations,
|
|
119
|
+
summary: input.summary,
|
|
120
|
+
suggestions: input.suggestions,
|
|
121
|
+
strengths: input.strengths,
|
|
122
|
+
weaknesses: input.weaknesses,
|
|
123
|
+
confidence: input.confidence,
|
|
124
|
+
// Token usage will be filled by the executor
|
|
125
|
+
tokensUsed: {
|
|
126
|
+
input: 0,
|
|
127
|
+
output: 0,
|
|
128
|
+
total: 0,
|
|
129
|
+
},
|
|
130
|
+
};
|
|
131
|
+
return {
|
|
132
|
+
success: true,
|
|
133
|
+
evaluation,
|
|
134
|
+
};
|
|
135
|
+
},
|
|
136
|
+
}));
|
|
137
|
+
/**
|
|
138
|
+
* Helper function to validate evaluation completeness.
|
|
139
|
+
*
|
|
140
|
+
* @param evaluation - Evaluation to validate
|
|
141
|
+
* @returns True if evaluation is complete
|
|
142
|
+
*/
|
|
143
|
+
export function isEvaluationComplete(evaluation) {
|
|
144
|
+
return (evaluation.overallScore !== undefined &&
|
|
145
|
+
evaluation.passed !== undefined &&
|
|
146
|
+
evaluation.criteriaEvaluations.length > 0 &&
|
|
147
|
+
evaluation.summary.length > 0 &&
|
|
148
|
+
evaluation.suggestions.length > 0);
|
|
149
|
+
}
|
|
150
|
+
//# sourceMappingURL=submit-feedback.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"submit-feedback.js","sourceRoot":"","sources":["../../../../src/ai-service/judge/tools/submit-feedback.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,iBAAiB,EAAE,YAAY,EAAE,MAAM,6BAA6B,CAAC;AAG9E;;GAEG;AACH,MAAM,yBAAyB,GAAG,CAAC,CAAC,MAAM,CAAC;IACzC,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,+BAA+B,CAAC;IAC/D,MAAM,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,8BAA8B,CAAC;IAC5D,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,kBAAkB,CAAC;IAC9D,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uCAAuC,CAAC;IACvE,QAAQ,EAAE,CAAC;SACR,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,CAAC,4CAA4C,CAAC;IACzD,WAAW,EAAE,CAAC;SACX,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;SACjB,QAAQ,EAAE;SACV,QAAQ,CAAC,yBAAyB,CAAC;CACvC,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;IACpC,YAAY,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,0BAA0B,CAAC;IAE7E,MAAM,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,iCAAiC,CAAC;IAE/D,mBAAmB,EAAE,CAAC;SACnB,KAAK,CAAC,yBAAyB,CAAC;SAChC,QAAQ,CAAC,kCAAkC,CAAC;IAE/C,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,qCAAqC,CAAC;IAEnE,WAAW,EAAE,CAAC;SACX,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;SACjB,QAAQ,CAAC,wCAAwC,CAAC;IAErD,SAAS,EAAE,CAAC;SACT,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;SACjB,QAAQ,EAAE;SACV,QAAQ,CAAC,4CAA4C,CAAC;IAEzD,UAAU,EAAE,CAAC;SACV,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;SACjB,QAAQ,EAAE;SACV,QAAQ,CAAC,6CAA6C,CAAC;IAE1D,UAAU,EAAE,CAAC;SACV,MAAM,EAAE;SACR,GAAG,CAAC,CAAC,CAAC;SACN,GAAG,CAAC,CAAC,CAAC;SACN,QAAQ,EAAE;SACV,QAAQ,CAAC,0CAA0C,CAAC;CACxD,CAAC,CAAC;AAEH;;;;;GAKG;AACH,MAAM,CAAC,MAAM,yBAAyB,GAAG,iBAAiB,CACxD,gBAAgB,EAChB,GAAG,EAAE,CAAC,CAAC;IACL,WAAW,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAoCf;IAEE,QAAQ,EAAE,YAAY,CAAC,OAAO;IAE9B,WAAW,EAAE,oBAAoB;IAEjC,mDAAmD;IACnD,QAAQ,EAAE,KAAK;IAEf,OAAO,EAAE,KAAK,EACZ,KAAK,EACuD,EAAE;QAC9D,0DAA0D;QAC1D,IAAI,KAAK,CAAC,mBAAmB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3C,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC;QACnE,CAAC;QAED,mDAAmD;QACnD,MAAM,QAAQ,GACZ,KAAK,CAAC,mBAAmB,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;YAC9D,KAAK,CAAC,mBAAmB,CAAC,MAAM,CAAC;QAEnC,2DAA2D;QAC3D,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,YAAY,GAAG,QAAQ,CAAC,GAAG,EAAE,EAAE,CAAC;YACjD,OAAO,CAAC,IAAI,CACV,kBAAkB,KAAK,CAAC,YAAY,yDAAyD,QAAQ,GAAG,CACzG,CAAC;QACJ,CAAC;QAED,kCAAkC;QAClC,MAAM,UAAU,GAAoB;YAClC,YAAY,EAAE,KAAK,CAAC,YAAY;YAChC,MAAM,EAAE,KAAK,CAAC,MAAM;YACpB,mBAAmB,EAAE,KAAK,CAAC,mBAAmB;YAC9C,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,WAAW,EAAE,KAAK,CAAC,WAAW;YAC9B,SAAS,EAAE,KAAK,CAAC,SAAS;YAC1B,UAAU,EAAE,KAAK,CAAC,UAAU;YAC5B,UAAU,EAAE,KAAK,CAAC,UAAU;YAC5B,6CAA6C;YAC7C,UAAU,EAAE;gBACV,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,CAAC;gBACT,KAAK,EAAE,CAAC;aACT;SACF,CAAC;QAEF,OAAO;YACL,OAAO,EAAE,IAAI;YACb,UAAU;SACX,CAAC;IACJ,CAAC;CACF,CAAC,CACH,CAAC;AAEF;;;;;GAKG;AACH,MAAM,UAAU,oBAAoB,CAAC,UAA2B;IAC9D,OAAO,CACL,UAAU,CAAC,YAAY,KAAK,SAAS;QACrC,UAAU,CAAC,MAAM,KAAK,SAAS;QAC/B,UAAU,CAAC,mBAAmB,CAAC,MAAM,GAAG,CAAC;QACzC,UAAU,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC;QAC7B,UAAU,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAClC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Judge evaluation types for AI simulation testing.
|
|
3
|
+
*
|
|
4
|
+
* Provides structured types for evaluation criteria, results, and scoring
|
|
5
|
+
* used by the LLM judge to assess AI-generated applications.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Evaluation criteria for the judge to assess.
|
|
9
|
+
*
|
|
10
|
+
* Defines the requirements and expectations that the generated
|
|
11
|
+
* application should meet across different dimensions.
|
|
12
|
+
*/
|
|
13
|
+
export interface EvaluationCriteria {
|
|
14
|
+
/** Functional requirements that must be met */
|
|
15
|
+
functionalRequirements: string[];
|
|
16
|
+
/** UI/UX requirements for interface design and usability */
|
|
17
|
+
uiRequirements: string[];
|
|
18
|
+
/** Data integration and flow requirements */
|
|
19
|
+
dataRequirements: string[];
|
|
20
|
+
/** Performance and efficiency considerations */
|
|
21
|
+
performanceRequirements?: string[];
|
|
22
|
+
/** Additional custom criteria */
|
|
23
|
+
customCriteria?: string[];
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Judge evaluation result for a single criterion.
|
|
27
|
+
*
|
|
28
|
+
* Represents the assessment of one specific requirement
|
|
29
|
+
* with scoring, reasoning, and evidence.
|
|
30
|
+
*/
|
|
31
|
+
export interface CriterionEvaluation {
|
|
32
|
+
/** The criterion being evaluated */
|
|
33
|
+
criterion: string;
|
|
34
|
+
/** Whether the criterion passed evaluation */
|
|
35
|
+
passed: boolean;
|
|
36
|
+
/** Numerical score from 0-100 */
|
|
37
|
+
score: number;
|
|
38
|
+
/** Detailed reasoning for the evaluation */
|
|
39
|
+
reasoning: string;
|
|
40
|
+
/** Supporting evidence (screenshots, DOM state, etc.) */
|
|
41
|
+
evidence?: string;
|
|
42
|
+
/** Suggestions for improvement */
|
|
43
|
+
suggestions?: string[];
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Complete judge evaluation result.
|
|
47
|
+
*
|
|
48
|
+
* Comprehensive assessment of the AI-generated application
|
|
49
|
+
* including overall scoring and detailed criterion evaluations.
|
|
50
|
+
*/
|
|
51
|
+
export interface JudgeEvaluation {
|
|
52
|
+
/** Overall score from 0-100 */
|
|
53
|
+
overallScore: number;
|
|
54
|
+
/** Overall pass/fail determination */
|
|
55
|
+
passed: boolean;
|
|
56
|
+
/** Individual criterion evaluations */
|
|
57
|
+
criteriaEvaluations: CriterionEvaluation[];
|
|
58
|
+
/** Executive summary of the evaluation */
|
|
59
|
+
summary: string;
|
|
60
|
+
/** High-level suggestions for improvement */
|
|
61
|
+
suggestions: string[];
|
|
62
|
+
/** Strengths identified in the implementation */
|
|
63
|
+
strengths?: string[];
|
|
64
|
+
/** Weaknesses identified in the implementation */
|
|
65
|
+
weaknesses?: string[];
|
|
66
|
+
/** Token usage statistics */
|
|
67
|
+
tokensUsed: {
|
|
68
|
+
input: number;
|
|
69
|
+
output: number;
|
|
70
|
+
cached?: number;
|
|
71
|
+
total: number;
|
|
72
|
+
};
|
|
73
|
+
/** Time taken for evaluation in milliseconds */
|
|
74
|
+
evaluationDurationMs?: number;
|
|
75
|
+
/** Confidence level of the evaluation (0-1) */
|
|
76
|
+
confidence?: number;
|
|
77
|
+
/** Error message if evaluation failed */
|
|
78
|
+
error?: string;
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Configuration for the judge execution.
|
|
82
|
+
*/
|
|
83
|
+
export interface JudgeConfig {
|
|
84
|
+
/** Playwright MCP server URL */
|
|
85
|
+
playwrightMcpUrl: string;
|
|
86
|
+
/** JWT token for AI Gateway authentication */
|
|
87
|
+
jwt?: string;
|
|
88
|
+
/** Maximum steps the judge can take */
|
|
89
|
+
maxSteps?: number;
|
|
90
|
+
/** Timeout for evaluation in milliseconds */
|
|
91
|
+
timeoutMs?: number;
|
|
92
|
+
/** Minimum passing score (0-100) */
|
|
93
|
+
passingThreshold?: number;
|
|
94
|
+
/** Whether to capture screenshots as evidence */
|
|
95
|
+
captureScreenshots?: boolean;
|
|
96
|
+
/** Whether to include detailed reasoning */
|
|
97
|
+
detailedReasoning?: boolean;
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* MCP tool call parameters for Playwright actions.
|
|
101
|
+
*/
|
|
102
|
+
export interface PlaywrightActionParams {
|
|
103
|
+
/** The action to perform */
|
|
104
|
+
action: "navigate" | "click" | "fill" | "screenshot" | "getText" | "waitForSelector" | "evaluate" | "getUrl" | "reload";
|
|
105
|
+
/** Action-specific parameters */
|
|
106
|
+
params: Record<string, any>;
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Result from a Playwright action execution.
|
|
110
|
+
*/
|
|
111
|
+
export interface PlaywrightActionResult {
|
|
112
|
+
/** Whether the action succeeded */
|
|
113
|
+
success: boolean;
|
|
114
|
+
/** Result data from the action */
|
|
115
|
+
data?: any;
|
|
116
|
+
/** Error message if action failed */
|
|
117
|
+
error?: string;
|
|
118
|
+
/** Screenshot if captured */
|
|
119
|
+
screenshot?: string;
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Judge context passed to tools.
|
|
123
|
+
*/
|
|
124
|
+
export interface JudgeContext {
|
|
125
|
+
/** Application URL being evaluated */
|
|
126
|
+
appUrl: string;
|
|
127
|
+
/** Original simulation prompts */
|
|
128
|
+
prompts: string[];
|
|
129
|
+
/** Evaluation criteria */
|
|
130
|
+
criteria: EvaluationCriteria;
|
|
131
|
+
/** Current evaluation state */
|
|
132
|
+
evaluationState: {
|
|
133
|
+
criteriaChecked: string[];
|
|
134
|
+
evidence: Map<string, string>;
|
|
135
|
+
scores: Map<string, number>;
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Extended simulation result with judge evaluation.
|
|
140
|
+
*
|
|
141
|
+
* Augments the standard simulation result with judge assessment data.
|
|
142
|
+
*/
|
|
143
|
+
export interface EvaluatedSimulationResult {
|
|
144
|
+
/** Base simulation result */
|
|
145
|
+
appUrl?: string;
|
|
146
|
+
promptId: string;
|
|
147
|
+
success: boolean;
|
|
148
|
+
error?: string;
|
|
149
|
+
duration: number;
|
|
150
|
+
stepResults: Array<{
|
|
151
|
+
prompt: string;
|
|
152
|
+
success: boolean;
|
|
153
|
+
duration: number;
|
|
154
|
+
error?: string;
|
|
155
|
+
}>;
|
|
156
|
+
tokens: {
|
|
157
|
+
input: number;
|
|
158
|
+
output: number;
|
|
159
|
+
cached?: number;
|
|
160
|
+
total: number;
|
|
161
|
+
};
|
|
162
|
+
/** Judge evaluation if performed */
|
|
163
|
+
judgeEvaluation?: JudgeEvaluation;
|
|
164
|
+
/** Error during evaluation if any */
|
|
165
|
+
evaluationError?: string;
|
|
166
|
+
/** Time taken for judge evaluation */
|
|
167
|
+
evaluationDuration?: number;
|
|
168
|
+
}
|
|
169
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/ai-service/judge/types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;;;;GAKG;AACH,MAAM,WAAW,kBAAkB;IACjC,+CAA+C;IAC/C,sBAAsB,EAAE,MAAM,EAAE,CAAC;IAEjC,4DAA4D;IAC5D,cAAc,EAAE,MAAM,EAAE,CAAC;IAEzB,6CAA6C;IAC7C,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAE3B,gDAAgD;IAChD,uBAAuB,CAAC,EAAE,MAAM,EAAE,CAAC;IAEnC,iCAAiC;IACjC,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;CAC3B;AAED;;;;;GAKG;AACH,MAAM,WAAW,mBAAmB;IAClC,oCAAoC;IACpC,SAAS,EAAE,MAAM,CAAC;IAElB,8CAA8C;IAC9C,MAAM,EAAE,OAAO,CAAC;IAEhB,iCAAiC;IACjC,KAAK,EAAE,MAAM,CAAC;IAEd,4CAA4C;IAC5C,SAAS,EAAE,MAAM,CAAC;IAElB,yDAAyD;IACzD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB,kCAAkC;IAClC,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;CACxB;AAED;;;;;GAKG;AACH,MAAM,WAAW,eAAe;IAC9B,+BAA+B;IAC/B,YAAY,EAAE,MAAM,CAAC;IAErB,sCAAsC;IACtC,MAAM,EAAE,OAAO,CAAC;IAEhB,uCAAuC;IACvC,mBAAmB,EAAE,mBAAmB,EAAE,CAAC;IAE3C,0CAA0C;IAC1C,OAAO,EAAE,MAAM,CAAC;IAEhB,6CAA6C;IAC7C,WAAW,EAAE,MAAM,EAAE,CAAC;IAEtB,iDAAiD;IACjD,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IAErB,kDAAkD;IAClD,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IAEtB,6BAA6B;IAC7B,UAAU,EAAE;QACV,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,EAAE,MAAM,CAAC;QACf,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,KAAK,EAAE,MAAM,CAAC;KACf,CAAC;IAEF,gDAAgD;IAChD,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAE9B,+CAA+C;IAC/C,UAAU,CAAC,EAAE,MAAM,CAAC;IAEpB,yCAAyC;IACzC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,gCAAgC;IAChC,gBAAgB,EAAE,MAAM,CAAC;IAEzB,8CAA8C;IAC9C,GAAG,CAAC,EAAE,MAAM,CAAC;IAEb,uCAAuC;IACvC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB,6CAA6C;IAC7C,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,oCAAoC;IACpC,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAE1B,iDAAiD;IACjD,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAE7B,4CAA4C;IAC5C,iBAAiB,CAAC,EAAE,OAAO,CAAC;CAC7B;AAED;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,4BAA4B;IAC5B,MAAM,EACF,UAAU,GACV,OAAO,GACP,MAAM,GACN,YAAY,GACZ,SAAS,GACT,iBAAiB,GACjB,UAAU,GACV,QAAQ,GACR,QAAQ,CAAC;IAEb,iCAAiC;IACjC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAC7B;AAED;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,mCAAmC;IACnC,OAAO,EAAE,OAAO,CAAC;IAEjB,kCAAkC;IAClC,IAAI,CAAC,EAAE,GAAG,CAAC;IAEX,qCAAqC;IACrC,KAAK,CAAC,EAAE,MAAM,CAAC;IAEf,6BAA6B;IAC7B,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,sCAAsC;IACtC,MAAM,EAAE,MAAM,CAAC;IAEf,kCAAkC;IAClC,OAAO,EAAE,MAAM,EAAE,CAAC;IAElB,0BAA0B;IAC1B,QAAQ,EAAE,kBAAkB,CAAC;IAE7B,+BAA+B;IAC/B,eAAe,EAAE;QACf,eAAe,EAAE,MAAM,EAAE,CAAC;QAC1B,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QAC9B,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAC7B,CAAC;CACH;AAED;;;;GAIG;AACH,MAAM,WAAW,yBAAyB;IACxC,6BAA6B;IAC7B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,KAAK,CAAC;QACjB,MAAM,EAAE,MAAM,CAAC;QACf,OAAO,EAAE,OAAO,CAAC;QACjB,QAAQ,EAAE,MAAM,CAAC;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC;KAChB,CAAC,CAAC;IACH,MAAM,EAAE;QACN,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,EAAE,MAAM,CAAC;QACf,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,KAAK,EAAE,MAAM,CAAC;KACf,CAAC;IAEF,oCAAoC;IACpC,eAAe,CAAC,EAAE,eAAe,CAAC;IAElC,qCAAqC;IACrC,eAAe,CAAC,EAAE,MAAM,CAAC;IAEzB,sCAAsC;IACtC,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/ai-service/judge/types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vercel.d.ts","sourceRoot":"","sources":["../../../../../src/ai-service/llm/interaction/adapters/vercel.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAKH,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,gCAAgC,CAAC;AAC3D,OAAO,KAAK,EAEV,kBAAkB,EAClB,iBAAiB,EACjB,gBAAgB,EACjB,MAAM,gBAAgB,CAAC;AAGxB;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,qBAAa,uBAAwB,YAAW,kBAAkB;IAChE;;;;;OAKG;IACG,UAAU,CAAC,KAAK,SAAS,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,EACxE,OAAO,EAAE,iBAAiB,CAAC,KAAK,CAAC,GAChC,OAAO,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"vercel.d.ts","sourceRoot":"","sources":["../../../../../src/ai-service/llm/interaction/adapters/vercel.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAKH,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,gCAAgC,CAAC;AAC3D,OAAO,KAAK,EAEV,kBAAkB,EAClB,iBAAiB,EACjB,gBAAgB,EACjB,MAAM,gBAAgB,CAAC;AAGxB;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,qBAAa,uBAAwB,YAAW,kBAAkB;IAChE;;;;;OAKG;IACG,UAAU,CAAC,KAAK,SAAS,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,EACxE,OAAO,EAAE,iBAAiB,CAAC,KAAK,CAAC,GAChC,OAAO,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC;CAmHpC;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,oBAAoB,IAAI,kBAAkB,CAEzD"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vercel.js","sourceRoot":"","sources":["../../../../../src/ai-service/llm/interaction/adapters/vercel.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,IAAI,CAAC;AAChC,OAAO,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AACxD,OAAO,EAAE,mBAAmB,EAAE,MAAM,wBAAwB,CAAC;AAU7D;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,OAAO,uBAAuB;IAClC;;;;;OAKG;IACH,KAAK,CAAC,UAAU,CACd,OAAiC;QAEjC,sDAAsD;QACtD,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,GAAG,IAAI,EAAE,GAAG,OAAO,CAAC;QAE7C,oDAAoD;QACpD,IAAI,aAAa,GAAG,KAAK,CAAC;QAC1B,MAAM,cAAc,GAAG,CAAC,KAAc,EAAE,EAAE,CACxC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;QAC5D,MAAM,aAAa,GAAG,KAAK,EAAE,KAAc,EAAE,EAAE;YAC7C,IAAI,CAAC,IAAI,CAAC,OAAO,IAAI,aAAa;gBAAE,OAAO;YAC3C,aAAa,GAAG,IAAI,CAAC;YACrB,MAAM,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC;QAC5C,CAAC,CAAC;QAEF,oEAAoE;QACpE,IAAI,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,IAAI,EAAE,CAAC;YAC5B,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;QAC5E,CAAC;QAED,yDAAyD;QACzD,4DAA4D;QAC5D,wEAAwE;QACxE,MAAM,SAAS,GAAG,UAAU,CAAkB;YAC5C,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC/C,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,KAAK,EAAE,IAAI,CAAC,KAA0B;YACtC,GAAG,CAAC,IAAI,CAAC,QAAQ,KAAK,SAAS,IAAI,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAC;YAC/D,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,eAAe,EAAE,IAAI,CAAC,eAAe;YACrC,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,sBAAsB,EAAE,IAAI,CAAC,sBAAsB;YACnD,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,OAAO,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,aAAa,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS;YACzE,OAAO,EAAE,IAAI,CAAC,OAAO;SACtB,CAAC,CAAC;QAEH,MAAM,WAAW,GAAG,SAAS,CAAC,
|
|
1
|
+
{"version":3,"file":"vercel.js","sourceRoot":"","sources":["../../../../../src/ai-service/llm/interaction/adapters/vercel.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,IAAI,CAAC;AAChC,OAAO,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AACxD,OAAO,EAAE,mBAAmB,EAAE,MAAM,wBAAwB,CAAC;AAU7D;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,OAAO,uBAAuB;IAClC;;;;;OAKG;IACH,KAAK,CAAC,UAAU,CACd,OAAiC;QAEjC,sDAAsD;QACtD,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,GAAG,IAAI,EAAE,GAAG,OAAO,CAAC;QAE7C,oDAAoD;QACpD,IAAI,aAAa,GAAG,KAAK,CAAC;QAC1B,MAAM,cAAc,GAAG,CAAC,KAAc,EAAE,EAAE,CACxC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;QAC5D,MAAM,aAAa,GAAG,KAAK,EAAE,KAAc,EAAE,EAAE;YAC7C,IAAI,CAAC,IAAI,CAAC,OAAO,IAAI,aAAa;gBAAE,OAAO;YAC3C,aAAa,GAAG,IAAI,CAAC;YACrB,MAAM,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC;QAC5C,CAAC,CAAC;QAEF,oEAAoE;QACpE,IAAI,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,IAAI,EAAE,CAAC;YAC5B,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;QAC5E,CAAC;QAED,yDAAyD;QACzD,4DAA4D;QAC5D,wEAAwE;QACxE,MAAM,SAAS,GAAG,UAAU,CAAkB;YAC5C,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC/C,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,KAAK,EAAE,IAAI,CAAC,KAA0B;YACtC,GAAG,CAAC,IAAI,CAAC,QAAQ,KAAK,SAAS,IAAI,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAC;YAC/D,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,eAAe,EAAE,IAAI,CAAC,eAAe;YACrC,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,sBAAsB,EAAE,IAAI,CAAC,sBAAsB;YACnD,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,OAAO,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,aAAa,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS;YACzE,OAAO,EAAE,IAAI,CAAC,OAAO;SACtB,CAAC,CAAC;QAEH,MAAM,WAAW,GAAG,SAAS,CAAC,QAE7B,CAAC;QACF,MAAM,eAAe,GAAgC,WAAW,CAAC,KAAK,CACpE,KAAK,EAAE,KAAK,EAAE,EAAE;YACd,MAAM,aAAa,CAAC,KAAK,CAAC,CAAC;YAC3B,MAAM,KAAK,CAAC;QACd,CAAC,CACF,CAAC;QAEF,MAAM,aAAa,GAAqB,eAAe,CAAC,IAAI,CAC1D,GAAG,EAAE,CAAC,SAAS,EACf,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CACjB,CAAC;QAEF,MAAM,2BAA2B,GAAG,CAClC,MAA8B,EACN,EAAE;YAC1B,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,EAAE,CAAC;YAChD,IAAI,gBAAyB,CAAC;YAC9B,KAAK,aAAa,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE;gBAChC,gBAAgB,GAAG,KAAK,CAAC;YAC3B,CAAC,CAAC,CAAC;YAEH,MAAM,QAAQ,GAAG,IAAI,cAAc,CAAI;gBACrC,KAAK,CAAC,IAAI,CAAC,UAAU;oBACnB,IAAI,gBAAgB,EAAE,CAAC;wBACrB,UAAU,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;wBACnC,OAAO;oBACT,CAAC;oBAED,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;oBAE9C,IAAI,gBAAgB,EAAE,CAAC;wBACrB,UAAU,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;wBACnC,OAAO;oBACT,CAAC;oBAED,IAAI,IAAI,EAAE,CAAC;wBACT,MAAM,UAAU,GAAG,MAAM,aAAa,CAAC;wBACvC,IAAI,UAAU,EAAE,CAAC;4BACf,UAAU,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;wBAC/B,CAAC;6BAAM,CAAC;4BACN,UAAU,CAAC,KAAK,EAAE,CAAC;wBACrB,CAAC;wBACD,OAAO;oBACT,CAAC;oBAED,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;gBAC5B,CAAC;gBACD,KAAK,CAAC,MAAM;oBACV,MAAM,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC;gBAC5B,CAAC;aACF,CAAC,CAAC;YAEH,OAAO,QAAkC,CAAC;QAC5C,CAAC,CAAC;QAEF,iEAAiE;QACjE,oEAAoE;QACpE,MAAM,MAAM,GAAG,kBAAkB,CAAC,SAAS,CAAC,CAAC;QAC7C,MAAM,kBAAkB,GAA4B;YAClD,GAAG,MAAM;YACT,QAAQ,EAAE,eAAe;YACzB,UAAU,EAAE,2BAA2B,CAAC,MAAM,CAAC,UAAU,CAAC;YAC1D,UAAU,EAAE,2BAA2B,CAAC,MAAM,CAAC,UAAU,CAAC;SAC3D,CAAC;QAEF,kDAAkD;QAClD,kEAAkE;QAClE,OAAO,SAAS;YACd,CAAC,CAAC,mBAAmB,CAAQ,kBAAkB,EAAE,SAAS,CAAC;YAC3D,CAAC,CAAC,kBAAkB,CAAC;IACzB,CAAC;CACF;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAM,UAAU,oBAAoB;IAClC,OAAO,IAAI,uBAAuB,EAAE,CAAC;AACvC,CAAC"}
|
|
@@ -15,7 +15,7 @@ import type { Tool } from "../../agent/tools2/types.js";
|
|
|
15
15
|
import type { StopCondition } from "../../util/stop-condition.js";
|
|
16
16
|
import type { LanguageModelV2 } from "@ai-sdk/provider";
|
|
17
17
|
import type { ProviderOptions } from "@ai-sdk/provider-utils";
|
|
18
|
-
import type { TextStreamPart, LanguageModelUsage, ModelMessage, UserModelMessage } from "ai";
|
|
18
|
+
import type { TextStreamPart, LanguageModelUsage, ModelMessage, UserModelMessage, ToolChoice } from "ai";
|
|
19
19
|
/**
|
|
20
20
|
* Provider interface for LLM streaming interactions.
|
|
21
21
|
*
|
|
@@ -81,7 +81,7 @@ export interface StreamTextOptions<TOOLS extends Record<string, Tool>> {
|
|
|
81
81
|
* Called before each LLM step.
|
|
82
82
|
* Can transform the step parameters before execution.
|
|
83
83
|
*/
|
|
84
|
-
prepareStep?: (step: StepParameters) => Promise<StepParameters
|
|
84
|
+
prepareStep?: (step: StepParameters<TOOLS>) => Promise<StepParameters<TOOLS>> | StepParameters<TOOLS>;
|
|
85
85
|
/**
|
|
86
86
|
* Called for each chunk in the stream during generation.
|
|
87
87
|
*/
|
|
@@ -91,12 +91,12 @@ export interface StreamTextOptions<TOOLS extends Record<string, Tool>> {
|
|
|
91
91
|
/**
|
|
92
92
|
* Called after each step completes (after tool execution).
|
|
93
93
|
*/
|
|
94
|
-
onStepFinish?: (step: StepResult) => Promise<void> | void;
|
|
94
|
+
onStepFinish?: (step: StepResult<TOOLS>) => Promise<void> | void;
|
|
95
95
|
/**
|
|
96
96
|
* Called when the entire multi-step interaction finishes.
|
|
97
97
|
* This fires after generation completes, before stream consumption.
|
|
98
98
|
*/
|
|
99
|
-
onFinish?: (result: FinalResult) => Promise<void> | void;
|
|
99
|
+
onFinish?: (result: FinalResult<TOOLS>) => Promise<void> | void;
|
|
100
100
|
/**
|
|
101
101
|
* Called after stream consumption completes.
|
|
102
102
|
*
|
|
@@ -126,9 +126,10 @@ export interface StreamTextOptions<TOOLS extends Record<string, Tool>> {
|
|
|
126
126
|
/**
|
|
127
127
|
* Parameters for a single LLM step.
|
|
128
128
|
*/
|
|
129
|
-
export interface StepParameters {
|
|
129
|
+
export interface StepParameters<TOOLS extends Record<string, Tool>> {
|
|
130
130
|
messages: ModelMessage[];
|
|
131
131
|
stepNumber: number;
|
|
132
|
+
toolChoice?: ToolChoice<TOOLS>;
|
|
132
133
|
}
|
|
133
134
|
/**
|
|
134
135
|
* A chunk from the streaming response.
|
|
@@ -140,14 +141,14 @@ import type { StepResult as VercelStepResult } from "ai";
|
|
|
140
141
|
* Result of a single LLM step.
|
|
141
142
|
* Re-exported from Vercel AI SDK to avoid type incompatibilities.
|
|
142
143
|
*/
|
|
143
|
-
export type StepResult = VercelStepResult<
|
|
144
|
+
export type StepResult<TOOLS extends Record<string, Tool>> = VercelStepResult<TOOLS>;
|
|
144
145
|
/**
|
|
145
146
|
* Final result of the entire multi-step interaction.
|
|
146
147
|
* This is the result passed to onFinish callback.
|
|
147
148
|
* Re-exported from Vercel AI SDK's internal type.
|
|
148
149
|
*/
|
|
149
|
-
export type FinalResult = StepResult & {
|
|
150
|
-
readonly steps: StepResult[];
|
|
150
|
+
export type FinalResult<TOOLS extends Record<string, Tool>> = StepResult<TOOLS> & {
|
|
151
|
+
readonly steps: StepResult<TOOLS>[];
|
|
151
152
|
readonly totalUsage: LanguageModelUsage;
|
|
152
153
|
};
|
|
153
154
|
import type { AsyncIterableStream } from "ai";
|
|
@@ -169,6 +170,6 @@ export type StreamTextResult<TOOLS extends Record<string, Tool> = Record<string,
|
|
|
169
170
|
/** Stream of all events (text, tool calls, etc.) */
|
|
170
171
|
fullStream: AsyncIterableStream<TextStreamPart<TOOLS>>;
|
|
171
172
|
/** Promise that resolves with the final result */
|
|
172
|
-
response: Promise<FinalResult
|
|
173
|
+
response: Promise<FinalResult<TOOLS>>;
|
|
173
174
|
};
|
|
174
175
|
//# sourceMappingURL=provider.d.ts.map
|