@superblocksteam/vite-plugin-file-sync 2.0.67 → 2.0.68-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. package/dist/ai-service/agent/tools/apis/analysis.d.ts.map +1 -1
  2. package/dist/ai-service/agent/tools/apis/analysis.js +4 -0
  3. package/dist/ai-service/agent/tools/apis/analysis.js.map +1 -1
  4. package/dist/ai-service/agent/tools/apis/api-executor.d.ts +9 -1
  5. package/dist/ai-service/agent/tools/apis/api-executor.d.ts.map +1 -1
  6. package/dist/ai-service/agent/tools/apis/api-executor.js +4 -1
  7. package/dist/ai-service/agent/tools/apis/api-executor.js.map +1 -1
  8. package/dist/ai-service/agent/tools/apis/api-validation-orchestrator.d.ts +1 -0
  9. package/dist/ai-service/agent/tools/apis/api-validation-orchestrator.d.ts.map +1 -1
  10. package/dist/ai-service/agent/tools/apis/api-validation-orchestrator.js +1 -1
  11. package/dist/ai-service/agent/tools/apis/api-validation-orchestrator.js.map +1 -1
  12. package/dist/ai-service/agent/tools/apis/test-api.d.ts +5 -0
  13. package/dist/ai-service/agent/tools/apis/test-api.d.ts.map +1 -1
  14. package/dist/ai-service/agent/tools/apis/test-api.js +37 -18
  15. package/dist/ai-service/agent/tools/apis/test-api.js.map +1 -1
  16. package/dist/ai-service/agent/tools/build-delete-file.d.ts.map +1 -1
  17. package/dist/ai-service/agent/tools/build-delete-file.js +29 -0
  18. package/dist/ai-service/agent/tools/build-delete-file.js.map +1 -1
  19. package/dist/ai-service/agent/tools/integrations/execute-request.d.ts.map +1 -1
  20. package/dist/ai-service/agent/tools/integrations/execute-request.js +5 -4
  21. package/dist/ai-service/agent/tools/integrations/execute-request.js.map +1 -1
  22. package/dist/ai-service/agent/tools.d.ts.map +1 -1
  23. package/dist/ai-service/agent/tools.js +17 -6
  24. package/dist/ai-service/agent/tools.js.map +1 -1
  25. package/dist/ai-service/agent/tools2/entity-permissions.d.ts +23 -20
  26. package/dist/ai-service/agent/tools2/entity-permissions.d.ts.map +1 -1
  27. package/dist/ai-service/agent/tools2/entity-permissions.js +15 -11
  28. package/dist/ai-service/agent/tools2/entity-permissions.js.map +1 -1
  29. package/dist/ai-service/agent/tools2/example.js +2 -2
  30. package/dist/ai-service/agent/tools2/example.js.map +1 -1
  31. package/dist/ai-service/agent/tools2/index.d.ts +1 -1
  32. package/dist/ai-service/agent/tools2/index.d.ts.map +1 -1
  33. package/dist/ai-service/agent/tools2/index.js +1 -1
  34. package/dist/ai-service/agent/tools2/index.js.map +1 -1
  35. package/dist/ai-service/agent/tools2/registry.d.ts +4 -4
  36. package/dist/ai-service/agent/tools2/registry.d.ts.map +1 -1
  37. package/dist/ai-service/agent/tools2/registry.js +42 -29
  38. package/dist/ai-service/agent/tools2/registry.js.map +1 -1
  39. package/dist/ai-service/agent/tools2/tools/read.d.ts +1 -1
  40. package/dist/ai-service/agent/tools2/types.d.ts +36 -15
  41. package/dist/ai-service/agent/tools2/types.d.ts.map +1 -1
  42. package/dist/ai-service/agent/tools2/types.js.map +1 -1
  43. package/dist/ai-service/chat/chat-session-store.d.ts +5 -7
  44. package/dist/ai-service/chat/chat-session-store.d.ts.map +1 -1
  45. package/dist/ai-service/chat/chat-session-store.js +36 -17
  46. package/dist/ai-service/chat/chat-session-store.js.map +1 -1
  47. package/dist/ai-service/chat/extract-history.d.ts +0 -85
  48. package/dist/ai-service/chat/extract-history.d.ts.map +1 -1
  49. package/dist/ai-service/chat/extract-history.js +3 -239
  50. package/dist/ai-service/chat/extract-history.js.map +1 -1
  51. package/dist/ai-service/index.d.ts +26 -4
  52. package/dist/ai-service/index.d.ts.map +1 -1
  53. package/dist/ai-service/index.js +125 -4
  54. package/dist/ai-service/index.js.map +1 -1
  55. package/dist/ai-service/judge/debug-browser.d.ts +8 -0
  56. package/dist/ai-service/judge/debug-browser.d.ts.map +1 -0
  57. package/dist/ai-service/judge/debug-browser.js +79 -0
  58. package/dist/ai-service/judge/debug-browser.js.map +1 -0
  59. package/dist/ai-service/judge/index.d.ts +12 -0
  60. package/dist/ai-service/judge/index.d.ts.map +1 -0
  61. package/dist/ai-service/judge/index.js +11 -0
  62. package/dist/ai-service/judge/index.js.map +1 -0
  63. package/dist/ai-service/judge/integration/mcp-client.d.ts +82 -0
  64. package/dist/ai-service/judge/integration/mcp-client.d.ts.map +1 -0
  65. package/dist/ai-service/judge/integration/mcp-client.js +276 -0
  66. package/dist/ai-service/judge/integration/mcp-client.js.map +1 -0
  67. package/dist/ai-service/judge/integration/playwright-bridge.d.ts +142 -0
  68. package/dist/ai-service/judge/integration/playwright-bridge.d.ts.map +1 -0
  69. package/dist/ai-service/judge/integration/playwright-bridge.js +217 -0
  70. package/dist/ai-service/judge/integration/playwright-bridge.js.map +1 -0
  71. package/dist/ai-service/judge/judge-eval-http.d.ts +3 -0
  72. package/dist/ai-service/judge/judge-eval-http.d.ts.map +1 -0
  73. package/dist/ai-service/judge/judge-eval-http.js +541 -0
  74. package/dist/ai-service/judge/judge-eval-http.js.map +1 -0
  75. package/dist/ai-service/judge/judge-eval-service-runner.d.ts +35 -0
  76. package/dist/ai-service/judge/judge-eval-service-runner.d.ts.map +1 -0
  77. package/dist/ai-service/judge/judge-eval-service-runner.js +124 -0
  78. package/dist/ai-service/judge/judge-eval-service-runner.js.map +1 -0
  79. package/dist/ai-service/judge/judge-executor.d.ts +65 -0
  80. package/dist/ai-service/judge/judge-executor.d.ts.map +1 -0
  81. package/dist/ai-service/judge/judge-executor.js +334 -0
  82. package/dist/ai-service/judge/judge-executor.js.map +1 -0
  83. package/dist/ai-service/judge/judge-service.d.ts +161 -0
  84. package/dist/ai-service/judge/judge-service.d.ts.map +1 -0
  85. package/dist/ai-service/judge/judge-service.js +241 -0
  86. package/dist/ai-service/judge/judge-service.js.map +1 -0
  87. package/dist/ai-service/judge/prompts/evaluation-criteria.d.ts +37 -0
  88. package/dist/ai-service/judge/prompts/evaluation-criteria.d.ts.map +1 -0
  89. package/dist/ai-service/judge/prompts/evaluation-criteria.js +283 -0
  90. package/dist/ai-service/judge/prompts/evaluation-criteria.js.map +1 -0
  91. package/dist/ai-service/judge/prompts/system-prompt.d.ts +30 -0
  92. package/dist/ai-service/judge/prompts/system-prompt.d.ts.map +1 -0
  93. package/dist/ai-service/judge/prompts/system-prompt.js +212 -0
  94. package/dist/ai-service/judge/prompts/system-prompt.js.map +1 -0
  95. package/dist/ai-service/judge/storage/csv-storage.d.ts +99 -0
  96. package/dist/ai-service/judge/storage/csv-storage.d.ts.map +1 -0
  97. package/dist/ai-service/judge/storage/csv-storage.js +274 -0
  98. package/dist/ai-service/judge/storage/csv-storage.js.map +1 -0
  99. package/dist/ai-service/judge/storage/index.d.ts +9 -0
  100. package/dist/ai-service/judge/storage/index.d.ts.map +1 -0
  101. package/dist/ai-service/judge/storage/index.js +7 -0
  102. package/dist/ai-service/judge/storage/index.js.map +1 -0
  103. package/dist/ai-service/judge/storage/interface.d.ts +51 -0
  104. package/dist/ai-service/judge/storage/interface.d.ts.map +1 -0
  105. package/dist/ai-service/judge/storage/interface.js +8 -0
  106. package/dist/ai-service/judge/storage/interface.js.map +1 -0
  107. package/dist/ai-service/judge/storage/types.d.ts +54 -0
  108. package/dist/ai-service/judge/storage/types.d.ts.map +1 -0
  109. package/dist/ai-service/judge/storage/types.js +7 -0
  110. package/dist/ai-service/judge/storage/types.js.map +1 -0
  111. package/dist/ai-service/judge/tools/index.d.ts +22 -0
  112. package/dist/ai-service/judge/tools/index.d.ts.map +1 -0
  113. package/dist/ai-service/judge/tools/index.js +29 -0
  114. package/dist/ai-service/judge/tools/index.js.map +1 -0
  115. package/dist/ai-service/judge/tools/playwright-action.d.ts +18 -0
  116. package/dist/ai-service/judge/tools/playwright-action.d.ts.map +1 -0
  117. package/dist/ai-service/judge/tools/playwright-action.js +171 -0
  118. package/dist/ai-service/judge/tools/playwright-action.js.map +1 -0
  119. package/dist/ai-service/judge/tools/submit-feedback.d.ts +41 -0
  120. package/dist/ai-service/judge/tools/submit-feedback.d.ts.map +1 -0
  121. package/dist/ai-service/judge/tools/submit-feedback.js +150 -0
  122. package/dist/ai-service/judge/tools/submit-feedback.js.map +1 -0
  123. package/dist/ai-service/judge/types.d.ts +169 -0
  124. package/dist/ai-service/judge/types.d.ts.map +1 -0
  125. package/dist/ai-service/judge/types.js +8 -0
  126. package/dist/ai-service/judge/types.js.map +1 -0
  127. package/dist/ai-service/llm/context/constants.d.ts +7 -0
  128. package/dist/ai-service/llm/context/constants.d.ts.map +1 -1
  129. package/dist/ai-service/llm/context/constants.js +7 -0
  130. package/dist/ai-service/llm/context/constants.js.map +1 -1
  131. package/dist/ai-service/llm/context/context.d.ts +8 -1
  132. package/dist/ai-service/llm/context/context.d.ts.map +1 -1
  133. package/dist/ai-service/llm/context/context.js +47 -12
  134. package/dist/ai-service/llm/context/context.js.map +1 -1
  135. package/dist/ai-service/llm/context/internal-types.d.ts +1 -0
  136. package/dist/ai-service/llm/context/internal-types.d.ts.map +1 -1
  137. package/dist/ai-service/llm/context/internal-types.js.map +1 -1
  138. package/dist/ai-service/llm/context/manager.d.ts +2 -1
  139. package/dist/ai-service/llm/context/manager.d.ts.map +1 -1
  140. package/dist/ai-service/llm/context/manager.js +2 -1
  141. package/dist/ai-service/llm/context/manager.js.map +1 -1
  142. package/dist/ai-service/llm/context/utils/message-utils.d.ts +10 -0
  143. package/dist/ai-service/llm/context/utils/message-utils.d.ts.map +1 -1
  144. package/dist/ai-service/llm/context/utils/message-utils.js +74 -0
  145. package/dist/ai-service/llm/context/utils/message-utils.js.map +1 -1
  146. package/dist/ai-service/llm/error.d.ts +1 -1
  147. package/dist/ai-service/llm/interaction/adapters/vercel.d.ts.map +1 -1
  148. package/dist/ai-service/llm/interaction/adapters/vercel.js.map +1 -1
  149. package/dist/ai-service/llm/interaction/provider.d.ts +10 -9
  150. package/dist/ai-service/llm/interaction/provider.d.ts.map +1 -1
  151. package/dist/ai-service/llmobs/middleware/stream-text.d.ts +8 -8
  152. package/dist/ai-service/llmobs/middleware/stream-text.d.ts.map +1 -1
  153. package/dist/ai-service/llmobs/middleware/stream-text.js.map +1 -1
  154. package/dist/ai-service/llmobs/tracer.d.ts.map +1 -1
  155. package/dist/ai-service/llmobs/tracer.js +2 -1
  156. package/dist/ai-service/llmobs/tracer.js.map +1 -1
  157. package/dist/ai-service/mcp/embedded-playwright-mcp-server.d.ts +53 -0
  158. package/dist/ai-service/mcp/embedded-playwright-mcp-server.d.ts.map +1 -0
  159. package/dist/ai-service/mcp/embedded-playwright-mcp-server.js +541 -0
  160. package/dist/ai-service/mcp/embedded-playwright-mcp-server.js.map +1 -0
  161. package/dist/ai-service/mcp/playwright-server.d.ts +114 -0
  162. package/dist/ai-service/mcp/playwright-server.d.ts.map +1 -0
  163. package/dist/ai-service/mcp/playwright-server.js +109 -0
  164. package/dist/ai-service/mcp/playwright-server.js.map +1 -0
  165. package/dist/ai-service/state-machine/clark-fsm.d.ts +4 -1
  166. package/dist/ai-service/state-machine/clark-fsm.d.ts.map +1 -1
  167. package/dist/ai-service/state-machine/clark-fsm.js +3 -1
  168. package/dist/ai-service/state-machine/clark-fsm.js.map +1 -1
  169. package/dist/ai-service/state-machine/handlers/idle.d.ts.map +1 -1
  170. package/dist/ai-service/state-machine/handlers/idle.js +3 -1
  171. package/dist/ai-service/state-machine/handlers/idle.js.map +1 -1
  172. package/dist/ai-service/state-machine/handlers/runtime-reviewing.d.ts.map +1 -1
  173. package/dist/ai-service/state-machine/handlers/runtime-reviewing.js +4 -1
  174. package/dist/ai-service/state-machine/handlers/runtime-reviewing.js.map +1 -1
  175. package/dist/ai-service/state-machine/helpers/context-id.d.ts +1 -1
  176. package/dist/ai-service/state-machine/helpers/context-id.d.ts.map +1 -1
  177. package/dist/ai-service/state-machine/helpers/context-id.js +6 -7
  178. package/dist/ai-service/state-machine/helpers/context-id.js.map +1 -1
  179. package/dist/ai-service/state-machine/mocks.d.ts +1 -0
  180. package/dist/ai-service/state-machine/mocks.d.ts.map +1 -1
  181. package/dist/ai-service/state-machine/mocks.js +5 -1
  182. package/dist/ai-service/state-machine/mocks.js.map +1 -1
  183. package/dist/server-rpc/client.js +1 -1
  184. package/dist/server-rpc/client.js.map +1 -1
  185. package/dist/socket-manager.d.ts.map +1 -1
  186. package/dist/socket-manager.js +26 -6
  187. package/dist/socket-manager.js.map +1 -1
  188. package/dist/sync-service/index.d.ts +5 -0
  189. package/dist/sync-service/index.d.ts.map +1 -1
  190. package/dist/sync-service/index.js +13 -1
  191. package/dist/sync-service/index.js.map +1 -1
  192. package/package.json +7 -6
@@ -0,0 +1,124 @@
1
+ import fs from "node:fs";
2
+ import os from "node:os";
3
+ import path from "node:path";
4
+ import { getLogger } from "../../util/logger.js";
5
+ import { OperationPassthrough } from "../../util/operation-queue.js";
6
+ import { AppShell } from "../app-interface/shell.js";
7
+ import { LLMClient } from "../llm/client.js";
8
+ import { ContextManager } from "../llm/context/manager.js";
9
+ import { LocalContextStorage } from "../llm/context/storage/local.js";
10
+ import { createLLMProvider } from "../llm/provider.js";
11
+ import { PlaywrightMcpServerManager } from "../mcp/playwright-server.js";
12
+ import { JudgeService } from "./judge-service.js";
13
+ import { CsvJudgeStorage } from "./storage/csv-storage.js";
14
+ export async function runJudgeServiceEvaluation(options) {
15
+ const logger = getLogger();
16
+ const repoRoot = findWorkspaceRoot(process.cwd());
17
+ const jwtProvider = () => options.jwt ??
18
+ process.env.JUDGE_JWT ??
19
+ process.env.SUPERBLOCKS_JWT ??
20
+ process.env.SUPERBLOCKS_TOKEN ??
21
+ undefined;
22
+ const { judgeService } = await createJudgeService(repoRoot, logger, jwtProvider);
23
+ try {
24
+ const evaluation = await judgeService.evaluateApplication(options.promptId, options.prompt, options.appId ?? options.promptId, {
25
+ appUrl: options.appUrl,
26
+ complexity: options.complexity ?? "medium",
27
+ storageStatePath: options.storageStatePath,
28
+ jwt: options.jwt,
29
+ llmConfig: options.llmConfig,
30
+ storageStateData: options.storageStateData,
31
+ sessionStorageData: options.sessionStorageData,
32
+ extraOrigins: options.extraOrigins,
33
+ config: options.configOverrides,
34
+ });
35
+ return evaluation;
36
+ }
37
+ finally {
38
+ await judgeService.dispose();
39
+ }
40
+ }
41
+ async function createJudgeService(repoRoot, logger = getLogger(), getJwt) {
42
+ const fsQueue = new OperationPassthrough();
43
+ const appShell = new AppShell({
44
+ appRootDirPath: repoRoot,
45
+ fsOperationQueue: fsQueue,
46
+ });
47
+ const contextStorage = new LocalContextStorage(path.join(repoRoot, ".superblocks/context"));
48
+ const contextManager = new ContextManager({ storage: contextStorage });
49
+ const llmClient = new LLMClient({
50
+ contextManager,
51
+ appShell,
52
+ });
53
+ const llmProviderSettings = buildLlmProviderSettings();
54
+ const llmProvider = createLLMProvider(llmProviderSettings, getJwt);
55
+ const storageLocation = resolveJudgeStorageLocation(repoRoot);
56
+ await fs.promises.mkdir(storageLocation.dir, { recursive: true });
57
+ const judgeStorage = new CsvJudgeStorage(storageLocation.dir, storageLocation.filename);
58
+ const mcpServerManager = new PlaywrightMcpServerManager(logger);
59
+ const judgeService = new JudgeService({
60
+ llmClient,
61
+ llmProvider,
62
+ storage: judgeStorage,
63
+ mcpServerManager,
64
+ logger,
65
+ });
66
+ return { judgeService };
67
+ }
68
+ function buildLlmProviderSettings() {
69
+ const baseUrl = process.env.SUPERBLOCKS_BASE_URL || "http://localhost:3000";
70
+ return {
71
+ anthropic: {
72
+ baseURL: `${baseUrl}/api/v1/ai/proxy/anthropic`,
73
+ apiKey: process.env.ANTHROPIC_API_KEY ?? "placeholder",
74
+ },
75
+ clark: {
76
+ baseURL: `${baseUrl}/api/v1/ai/inference`,
77
+ upstreamProvider: process.env.SUPERBLOCKS_UPSTREAM_PROVIDER === "vertex"
78
+ ? "vertex"
79
+ : "bedrock",
80
+ },
81
+ };
82
+ }
83
+ function resolveJudgeStorageLocation(repoRoot) {
84
+ const configuredPath = process.env.JUDGE_STORAGE_PATH;
85
+ if (configuredPath) {
86
+ const expanded = configuredPath.startsWith("~/")
87
+ ? path.join(os.homedir(), configuredPath.slice(2))
88
+ : configuredPath;
89
+ if (expanded.endsWith(".csv")) {
90
+ return {
91
+ dir: path.dirname(expanded),
92
+ filename: path.basename(expanded),
93
+ };
94
+ }
95
+ return { dir: expanded, filename: "evaluations.csv" };
96
+ }
97
+ const defaultDir = path.join(repoRoot, ".superblocks/judge-evaluations");
98
+ return {
99
+ dir: defaultDir,
100
+ filename: "evaluations.csv",
101
+ };
102
+ }
103
+ function findWorkspaceRoot(startDir) {
104
+ let current = startDir;
105
+ let last = "";
106
+ while (current !== last) {
107
+ const packageJsonPath = path.join(current, "package.json");
108
+ if (fs.existsSync(packageJsonPath)) {
109
+ try {
110
+ const pkg = JSON.parse(fs.readFileSync(packageJsonPath, "utf-8"));
111
+ if (pkg.workspaces) {
112
+ return current;
113
+ }
114
+ }
115
+ catch {
116
+ // ignore parse errors and continue
117
+ }
118
+ }
119
+ last = current;
120
+ current = path.dirname(current);
121
+ }
122
+ return startDir;
123
+ }
124
+ //# sourceMappingURL=judge-eval-service-runner.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"judge-eval-service-runner.js","sourceRoot":"","sources":["../../../src/ai-service/judge/judge-eval-service-runner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACjD,OAAO,EAAE,oBAAoB,EAAE,MAAM,+BAA+B,CAAC;AACrE,OAAO,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAC;AACrD,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EAAE,mBAAmB,EAAE,MAAM,iCAAiC,CAAC;AACtE,OAAO,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,EAAE,0BAA0B,EAAE,MAAM,6BAA6B,CAAC;AACzE,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,eAAe,EAAE,MAAM,0BAA0B,CAAC;AA4B3D,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAC7C,OAAkC;IAElC,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;IAC3B,MAAM,QAAQ,GAAG,iBAAiB,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;IAElD,MAAM,WAAW,GAAG,GAAG,EAAE,CACvB,OAAO,CAAC,GAAG;QACX,OAAO,CAAC,GAAG,CAAC,SAAS;QACrB,OAAO,CAAC,GAAG,CAAC,eAAe;QAC3B,OAAO,CAAC,GAAG,CAAC,iBAAiB;QAC7B,SAAS,CAAC;IAEZ,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,kBAAkB,CAC/C,QAAQ,EACR,MAAM,EACN,WAAW,CACZ,CAAC;IAEF,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,MAAM,YAAY,CAAC,mBAAmB,CACvD,OAAO,CAAC,QAAQ,EAChB,OAAO,CAAC,MAAM,EACd,OAAO,CAAC,KAAK,IAAI,OAAO,CAAC,QAAQ,EACjC;YACE,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,UAAU,EAAE,OAAO,CAAC,UAAU,IAAI,QAAQ;YAC1C,gBAAgB,EAAE,OAAO,CAAC,gBAAgB;YAC1C,GAAG,EAAE,OAAO,CAAC,GAAG;YAChB,SAAS,EAAE,OAAO,CAAC,SAAS;YAC5B,gBAAgB,EAAE,OAAO,CAAC,gBAAgB;YAC1C,kBAAkB,EAAE,OAAO,CAAC,kBAAkB;YAC9C,YAAY,EAAE,OAAO,CAAC,YAAY;YAClC,MAAM,EAAE,OAAO,CAAC,eAAe;SAChC,CACF,CAAC;QAEF,OAAO,UAAU,CAAC;IACpB,CAAC;YAAS,CAAC;QACT,MAAM,YAAY,CAAC,OAAO,EAAE,CAAC;IAC/B,CAAC;AACH,CAAC;AAED,KAAK,UAAU,kBAAkB,CAC/B,QAAgB,EAChB,MAAM,GAAG,SAAS,EAAE,EACpB,MAAiC;IAEjC,MAAM,OAAO,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAC3C,MAAM,QAAQ,GAAG,IAAI,QAAQ,CAAC;QAC5B,cAAc,EAAE,QAAQ;QACxB,gBAAgB,EAAE,OAAO;KAC1B,CAAC,CAAC;IAEH,MAAM,cAAc,GAAG,IAAI,mBAAmB,CAC5C,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,sBAAsB,CAAC,CAC5C,CAAC;IACF,MAAM,cAAc,GAAG,IAAI,cAAc,CAAC,EAAE,OAAO,EAAE,cAAc,EAAE,CAAC,CAAC;IAEvE,MAAM,SAAS,GAAG,IAAI,SAAS,CAAC;QAC9B,cAAc;QACd,QAAQ;KACT,CAAC,CAAC;IAEH,MAAM,mBAAmB,GAAG,wBAAwB,EAAE,CAAC;IACvD,MAAM,WAAW,GAAG,iBAAiB,CAAC,mBAAmB,EAAE,MAAM,CAAC,CAAC;IAEnE,MAAM,eAAe,GAAG,2BAA2B,CAAC,QAAQ,CAAC,CAAC;IAC9D,MAAM,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,eAAe,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAClE,MAAM,YAAY,GAAG,IAAI,eAAe,CACtC,eAAe,CAAC,GAAG,EACnB,eAAe,CAAC,QAAQ,CACzB,CAAC;IAEF,MAAM,gBAAgB,GAAG,IAAI,0BAA0B,CAAC,MAAM,CAAC,CAAC;IAEhE,MAAM,YAAY,GAAG,IAAI,YAAY,CAAC;QACpC,SAAS;QACT,WAAW;QACX,OAAO,EAAE,YAAY;QACrB,gBAAgB;QAChB,MAAM;KACP,CAAC,CAAC;IAEH,OAAO,EAAE,YAAY,EAAE,CAAC;AAC1B,CAAC;AAED,SAAS,wBAAwB;IAC/B,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,oBAAoB,IAAI,uBAAuB,CAAC;IAE5E,OAAO;QACL,SAAS,EAAE;YACT,OAAO,EAAE,GAAG,OAAO,4BAA4B;YAC/C,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,aAAa;SACvD;QACD,KAAK,EAAE;YACL,OAAO,EAAE,GAAG,OAAO,sBAAsB;YACzC,gBAAgB,EACd,OAAO,CAAC,GAAG,CAAC,6BAA6B,KAAK,QAAQ;gBACpD,CAAC,CAAC,QAAQ;gBACV,CAAC,CAAC,SAAS;SAChB;KACF,CAAC;AACJ,CAAC;AAED,SAAS,2BAA2B,CAAC,QAAgB;IACnD,MAAM,cAAc,GAAG,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC;IACtD,IAAI,cAAc,EAAE,CAAC;QACnB,MAAM,QAAQ,GAAG,cAAc,CAAC,UAAU,CAAC,IAAI,CAAC;YAC9C,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,EAAE,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAClD,CAAC,CAAC,cAAc,CAAC;QAEnB,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC9B,OAAO;gBACL,GAAG,EAAE,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC;gBAC3B,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;aAClC,CAAC;QACJ,CAAC;QAED,OAAO,EAAE,GAAG,EAAE,QAAQ,EAAE,QAAQ,EAAE,iBAAiB,EAAE,CAAC;IACxD,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,gCAAgC,CAAC,CAAC;IACzE,OAAO;QACL,GAAG,EAAE,UAAU;QACf,QAAQ,EAAE,iBAAiB;KAC5B,CAAC;AACJ,CAAC;AAED,SAAS,iBAAiB,CAAC,QAAgB;IACzC,IAAI,OAAO,GAAG,QAAQ,CAAC;IACvB,IAAI,IAAI,GAAG,EAAE,CAAC;IACd,OAAO,OAAO,KAAK,IAAI,EAAE,CAAC;QACxB,MAAM,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,cAAc,CAAC,CAAC;QAC3D,IAAI,EAAE,CAAC,UAAU,CAAC,eAAe,CAAC,EAAE,CAAC;YACnC,IAAI,CAAC;gBACH,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,eAAe,EAAE,OAAO,CAAC,CAAC,CAAC;gBAClE,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC;oBACnB,OAAO,OAAO,CAAC;gBACjB,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,mCAAmC;YACrC,CAAC;QACH,CAAC;QACD,IAAI,GAAG,OAAO,CAAC;QACf,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;IAClC,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -0,0 +1,65 @@
1
+ /**
2
+ * Judge executor for evaluating AI-generated applications.
3
+ *
4
+ * Orchestrates the evaluation process using LLM with browser
5
+ * automation capabilities through Playwright MCP.
6
+ */
7
+ import type { JudgeEvaluation, EvaluationCriteria, JudgeConfig } from "./types.js";
8
+ import type { Logger } from "../../util/logger.js";
9
+ import type { LLMClient } from "../llm/client.js";
10
+ import type { LanguageModelV2 } from "@ai-sdk/provider";
11
+ /**
12
+ * Executes judge evaluations for AI-generated applications.
13
+ *
14
+ * Uses LLM with Playwright browser automation to comprehensively
15
+ * evaluate applications against defined criteria.
16
+ */
17
+ export declare class JudgeExecutor {
18
+ private llmClient;
19
+ private model;
20
+ private logger;
21
+ private playwrightBridge;
22
+ private config;
23
+ constructor(llmClient: LLMClient, model: LanguageModelV2, logger: Logger, config: Partial<JudgeConfig>);
24
+ /**
25
+ * Initializes the judge executor.
26
+ *
27
+ * Sets up connections to required services.
28
+ */
29
+ initialize(): Promise<void>;
30
+ /**
31
+ * Evaluates a simulation result against criteria.
32
+ *
33
+ * @param simulationResult - The simulation to evaluate
34
+ * @param criteria - Evaluation criteria
35
+ * @param appUrl - URL of the application to evaluate
36
+ * @returns Comprehensive evaluation result
37
+ */
38
+ evaluateSimulation(simulationResult: {
39
+ promptId: string;
40
+ success: boolean;
41
+ stepResults: Array<{
42
+ prompt: string;
43
+ }>;
44
+ tokens: {
45
+ input: number;
46
+ output: number;
47
+ total: number;
48
+ };
49
+ duration: number;
50
+ }, criteria: EvaluationCriteria, appUrl: string): Promise<JudgeEvaluation>;
51
+ /**
52
+ * Builds the user prompt for judge evaluation.
53
+ *
54
+ * @param simulationResult - Simulation to evaluate
55
+ * @param criteria - Evaluation criteria
56
+ * @param appUrl - Application URL
57
+ * @returns Formatted user prompt
58
+ */
59
+ private buildUserPrompt;
60
+ /**
61
+ * Cleans up resources.
62
+ */
63
+ cleanup(): Promise<void>;
64
+ }
65
+ //# sourceMappingURL=judge-executor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"judge-executor.d.ts","sourceRoot":"","sources":["../../../src/ai-service/judge/judge-executor.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAMH,OAAO,KAAK,EACV,eAAe,EACf,kBAAkB,EAClB,WAAW,EAEZ,MAAM,YAAY,CAAC;AACpB,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,sBAAsB,CAAC;AACnD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAClD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AAaxD;;;;;GAKG;AACH,qBAAa,aAAa;IAKtB,OAAO,CAAC,SAAS;IACjB,OAAO,CAAC,KAAK;IACb,OAAO,CAAC,MAAM;IANhB,OAAO,CAAC,gBAAgB,CAAmB;IAC3C,OAAO,CAAC,MAAM,CAAc;gBAGlB,SAAS,EAAE,SAAS,EACpB,KAAK,EAAE,eAAe,EACtB,MAAM,EAAE,MAAM,EACtB,MAAM,EAAE,OAAO,CAAC,WAAW,CAAC;IAa9B;;;;OAIG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAKjC;;;;;;;OAOG;IACG,kBAAkB,CACtB,gBAAgB,EAAE;QAChB,QAAQ,EAAE,MAAM,CAAC;QACjB,OAAO,EAAE,OAAO,CAAC;QACjB,WAAW,EAAE,KAAK,CAAC;YAAE,MAAM,EAAE,MAAM,CAAA;SAAE,CAAC,CAAC;QACvC,MAAM,EAAE;YAAE,KAAK,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,MAAM,CAAC;YAAC,KAAK,EAAE,MAAM,CAAA;SAAE,CAAC;QACzD,QAAQ,EAAE,MAAM,CAAC;KAClB,EACD,QAAQ,EAAE,kBAAkB,EAC5B,MAAM,EAAE,MAAM,GACb,OAAO,CAAC,eAAe,CAAC;IA+Q3B;;;;;;;OAOG;IACH,OAAO,CAAC,eAAe;IAoDvB;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B"}
@@ -0,0 +1,334 @@
1
+ /**
2
+ * Judge executor for evaluating AI-generated applications.
3
+ *
4
+ * Orchestrates the evaluation process using LLM with browser
5
+ * automation capabilities through Playwright MCP.
6
+ */
7
+ import { safeJsonStringify } from "../util/safe-stringify.js";
8
+ import { PlaywrightBridge } from "./integration/playwright-bridge.js";
9
+ import { buildJudgeSystemPrompt } from "./prompts/system-prompt.js";
10
+ import { buildJudgeTools } from "./tools/index.js";
11
+ /**
12
+ * Default judge configuration.
13
+ */
14
+ const DEFAULT_JUDGE_CONFIG = {
15
+ maxSteps: 30,
16
+ timeoutMs: 5 * 60 * 1000, // 5 minutes
17
+ passingThreshold: 70,
18
+ captureScreenshots: true,
19
+ detailedReasoning: true,
20
+ };
21
+ /**
22
+ * Executes judge evaluations for AI-generated applications.
23
+ *
24
+ * Uses LLM with Playwright browser automation to comprehensively
25
+ * evaluate applications against defined criteria.
26
+ */
27
+ export class JudgeExecutor {
28
+ llmClient;
29
+ model;
30
+ logger;
31
+ playwrightBridge;
32
+ config;
33
+ constructor(llmClient, model, logger, config) {
34
+ this.llmClient = llmClient;
35
+ this.model = model;
36
+ this.logger = logger;
37
+ this.config = {
38
+ ...DEFAULT_JUDGE_CONFIG,
39
+ ...config,
40
+ };
41
+ this.playwrightBridge = new PlaywrightBridge(this.config.playwrightMcpUrl, logger);
42
+ }
43
+ /**
44
+ * Initializes the judge executor.
45
+ *
46
+ * Sets up connections to required services.
47
+ */
48
+ async initialize() {
49
+ await this.playwrightBridge.initialize();
50
+ this.logger.info("Judge executor initialized");
51
+ }
52
+ /**
53
+ * Evaluates a simulation result against criteria.
54
+ *
55
+ * @param simulationResult - The simulation to evaluate
56
+ * @param criteria - Evaluation criteria
57
+ * @param appUrl - URL of the application to evaluate
58
+ * @returns Comprehensive evaluation result
59
+ */
60
+ async evaluateSimulation(simulationResult, criteria, appUrl) {
61
+ const startTime = Date.now();
62
+ this.logger.info(`Starting evaluation for ${simulationResult.promptId} at ${appUrl}. Criteria: ${criteria.functionalRequirements.length} functional, ${criteria.uiRequirements.length} UI, ${criteria.dataRequirements.length} data, ${criteria.performanceRequirements?.length || 0} performance`);
63
+ try {
64
+ // Build judge context for tools
65
+ const _judgeContext = {
66
+ appUrl,
67
+ prompts: simulationResult.stepResults.map((sr) => sr.prompt),
68
+ criteria,
69
+ evaluationState: {
70
+ criteriaChecked: [],
71
+ evidence: new Map(),
72
+ scores: new Map(),
73
+ },
74
+ };
75
+ // Build tools with Playwright bridge
76
+ this.logger.info("Building tools");
77
+ const tools = (await buildJudgeTools(this.playwrightBridge));
78
+ // Build system prompt
79
+ this.logger.info("Building system prompt");
80
+ const systemPrompt = buildJudgeSystemPrompt(criteria, this.config);
81
+ // Build user prompt
82
+ this.logger.info("Building user prompt");
83
+ const userPrompt = this.buildUserPrompt(simulationResult, criteria, appUrl);
84
+ // Track evaluation result
85
+ let evaluation = null;
86
+ let tokenUsage = { input: 0, output: 0, cached: 0, total: 0 };
87
+ // Execute judge with llmClient
88
+ this.logger.info("Starting evaluation with config:", safeJsonStringify(this.config));
89
+ const maxSteps = this.config.maxSteps || 5;
90
+ const result = await this.llmClient.streamText({
91
+ model: this.model,
92
+ system: systemPrompt,
93
+ user: {
94
+ role: "user",
95
+ content: userPrompt,
96
+ },
97
+ context: {
98
+ contextId: {
99
+ appId: "judge",
100
+ userId: "system",
101
+ name: `judge-${simulationResult.promptId}-${Date.now()}`,
102
+ },
103
+ contextOptions: {
104
+ maxTokens: 100_000,
105
+ tokenEstimation: {
106
+ minCharsPerToken: 1.5,
107
+ },
108
+ },
109
+ // Note: Judge contexts are ephemeral (not persisted)
110
+ },
111
+ tools,
112
+ maxSteps,
113
+ prepareStep: (step) => {
114
+ const remainingSteps = maxSteps - step.stepNumber - 1;
115
+ const halfwayPoint = Math.floor(maxSteps / 2);
116
+ this.logger.info(`Preparing step ${step.stepNumber} (maxSteps=${maxSteps}, remaining=${remainingSteps})`);
117
+ // On the final step: force submitFeedback by removing other tools
118
+ if (step.stepNumber === maxSteps - 1) {
119
+ this.logger.info(`Final step ${step.stepNumber} - restricting to submitFeedback tool only`);
120
+ // Add a user message to force the issue and restrict tools
121
+ const warningMessage = {
122
+ role: "user",
123
+ content: [
124
+ {
125
+ type: "text",
126
+ text: "**CRITICAL: This is your FINAL step. You MUST call submitFeedback now with your evaluation. No other tools are available.**",
127
+ },
128
+ ],
129
+ };
130
+ return {
131
+ ...step,
132
+ messages: [...step.messages, warningMessage],
133
+ activeTools: ["submitFeedback"],
134
+ };
135
+ }
136
+ // Warn from halfway point onwards
137
+ else if (step.stepNumber >= halfwayPoint && remainingSteps > 0) {
138
+ this.logger.info(`Step ${step.stepNumber} - warning about ${remainingSteps} remaining steps`);
139
+ const warningMessage = {
140
+ role: "user",
141
+ content: [
142
+ {
143
+ type: "text",
144
+ text: `**IMPORTANT: You have ${remainingSteps} step${remainingSteps === 1 ? "" : "s"} remaining before you must call submitFeedback. Plan accordingly and wrap up your evaluation.**`,
145
+ },
146
+ ],
147
+ };
148
+ return {
149
+ ...step,
150
+ messages: [...step.messages, warningMessage],
151
+ };
152
+ }
153
+ return step;
154
+ },
155
+ stopWhen: ({ steps }) => {
156
+ const lastStep = steps[steps.length - 1];
157
+ const hasSubmitFeedback = lastStep?.toolCalls?.some((tc) => tc.toolName === "submitFeedback");
158
+ return hasSubmitFeedback ?? false;
159
+ },
160
+ onStepFinish: (stepResult) => {
161
+ // Extract token usage
162
+ if (stepResult.usage) {
163
+ const usage = stepResult.usage;
164
+ tokenUsage = {
165
+ input: usage.inputTokens || 0,
166
+ output: usage.outputTokens || 0,
167
+ cached: usage.cachedTokens || 0,
168
+ total: usage.totalTokens || 0,
169
+ };
170
+ }
171
+ if (stepResult.reasoningText) {
172
+ this.logger.info(`Step finished - reasoning: ${stepResult.reasoningText}`);
173
+ }
174
+ if (stepResult.text) {
175
+ this.logger.info(`Step finished - text: ${stepResult.text}`);
176
+ }
177
+ },
178
+ onFinish: (finishResult) => {
179
+ // Extract final token usage
180
+ if (finishResult.usage) {
181
+ const usage = finishResult.usage;
182
+ tokenUsage = {
183
+ input: usage.inputTokens || 0,
184
+ output: usage.outputTokens || 0,
185
+ cached: usage.cachedTokens || 0,
186
+ total: usage.totalTokens || 0,
187
+ };
188
+ }
189
+ this.logger.info(`Stream finished - finishReason: ${finishResult.finishReason}, hasEvaluation: ${!!evaluation}`);
190
+ },
191
+ });
192
+ // Process stream to extract evaluation
193
+ this.logger.info("Processing stream chunks");
194
+ for await (const chunk of result.fullStream) {
195
+ // Log chunk types for debugging
196
+ if (chunk.type === "tool-call") {
197
+ const toolName = chunk.toolName;
198
+ const args = JSON.stringify(chunk.args || {}).slice(0, 200);
199
+ this.logger.info(`Tool call - ${toolName} with args: ${args}`);
200
+ }
201
+ else if (chunk.type === "tool-result") {
202
+ const toolName = chunk.toolName;
203
+ const success = !!chunk.output?.success;
204
+ const hasError = !!chunk.output?.error;
205
+ this.logger.info(`Tool result - ${toolName} success=${success} hasError=${hasError}`);
206
+ }
207
+ // Check for tool results to extract evaluation
208
+ if (chunk.type === "tool-result" &&
209
+ "toolName" in chunk &&
210
+ chunk.toolName === "submitFeedback") {
211
+ this.logger.info("Found submitFeedback result");
212
+ const toolResult = chunk.output;
213
+ if (toolResult && toolResult.evaluation) {
214
+ evaluation = toolResult.evaluation;
215
+ this.logger.info(`Evaluation extracted - Score: ${evaluation.overallScore}, Passed: ${evaluation.passed}`);
216
+ }
217
+ }
218
+ }
219
+ // Ensure we got an evaluation
220
+ if (!evaluation) {
221
+ // Create a default evaluation if none was submitted
222
+ evaluation = {
223
+ overallScore: 0,
224
+ passed: false,
225
+ criteriaEvaluations: [],
226
+ summary: "Judge did not submit an evaluation",
227
+ suggestions: [],
228
+ tokensUsed: tokenUsage,
229
+ evaluationDurationMs: Date.now() - startTime,
230
+ };
231
+ }
232
+ else {
233
+ // Add metadata to evaluation
234
+ evaluation.tokensUsed = tokenUsage;
235
+ evaluation.evaluationDurationMs = Date.now() - startTime;
236
+ }
237
+ // Check if evaluation passes threshold
238
+ if (evaluation.overallScore < (this.config.passingThreshold || 70)) {
239
+ this.logger.warn(`Evaluation below passing threshold - score: ${evaluation.overallScore}, threshold: ${this.config.passingThreshold}`);
240
+ }
241
+ return evaluation;
242
+ }
243
+ catch (error) {
244
+ const errorMsg = String(error);
245
+ this.logger.error("Judge evaluation failed", {
246
+ error: {
247
+ kind: "JudgeEvaluationError",
248
+ message: errorMsg,
249
+ stack: error instanceof Error ? error.stack : undefined,
250
+ },
251
+ });
252
+ // Don't crash the process - return a failed evaluation instead
253
+ // This allows the system to continue with other tests
254
+ return {
255
+ overallScore: 0,
256
+ passed: false,
257
+ criteriaEvaluations: [],
258
+ summary: `Judge evaluation failed: ${errorMsg}`,
259
+ suggestions: [
260
+ errorMsg.includes("import") && errorMsg.includes("module")
261
+ ? "The evaluation attempted to use ES6 module syntax in browser context. Only plain JavaScript is supported."
262
+ : "The evaluation encountered an error. Please check the logs for details.",
263
+ ],
264
+ tokensUsed: { input: 0, output: 0, cached: 0, total: 0 },
265
+ evaluationDurationMs: Date.now() - startTime,
266
+ error: errorMsg,
267
+ };
268
+ }
269
+ finally {
270
+ // Cleanup
271
+ try {
272
+ await this.playwrightBridge.cleanup();
273
+ }
274
+ catch (cleanupError) {
275
+ // Log but don't throw cleanup errors
276
+ this.logger.warn(`Cleanup error: ${String(cleanupError)}`);
277
+ }
278
+ }
279
+ }
280
+ /**
281
+ * Builds the user prompt for judge evaluation.
282
+ *
283
+ * @param simulationResult - Simulation to evaluate
284
+ * @param criteria - Evaluation criteria
285
+ * @param appUrl - Application URL
286
+ * @returns Formatted user prompt
287
+ */
288
+ buildUserPrompt(simulationResult, criteria, appUrl) {
289
+ const promptList = simulationResult.stepResults
290
+ .map((sr, idx) => `${idx + 1}. ${sr.prompt}`)
291
+ .join("\n");
292
+ return `Please evaluate the application at: ${appUrl}
293
+
294
+ The AI agent was given the following prompts:
295
+ ${promptList}
296
+
297
+ The agent completed the task in ${simulationResult.duration}ms using:
298
+ - Input tokens: ${simulationResult.tokens.input}
299
+ - Output tokens: ${simulationResult.tokens.output}
300
+ - Total tokens: ${simulationResult.tokens.total}
301
+
302
+ Evaluation Criteria:
303
+
304
+ Functional Requirements (40% weight):
305
+ ${criteria.functionalRequirements.map((r) => `- ${r}`).join("\n")}
306
+
307
+ UI/UX Requirements (30% weight):
308
+ ${criteria.uiRequirements.map((r) => `- ${r}`).join("\n")}
309
+
310
+ Data Requirements (20% weight):
311
+ ${criteria.dataRequirements.map((r) => `- ${r}`).join("\n")}
312
+
313
+ ${criteria.performanceRequirements?.length
314
+ ? `Performance Requirements (10% weight):\n${criteria.performanceRequirements
315
+ .map((r) => `- ${r}`)
316
+ .join("\n")}`
317
+ : ""}
318
+
319
+ Instructions:
320
+ 1. Navigate to the application using the playwright_action tool
321
+ 2. Systematically test each requirement
322
+ 3. Capture screenshots as evidence when appropriate
323
+ 4. Use the submitFeedback tool to submit your final evaluation
324
+
325
+ Be thorough but efficient. Focus on objective assessment of whether requirements are met.`;
326
+ }
327
+ /**
328
+ * Cleans up resources.
329
+ */
330
+ async cleanup() {
331
+ await this.playwrightBridge.cleanup();
332
+ }
333
+ }
334
+ //# sourceMappingURL=judge-executor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"judge-executor.js","sourceRoot":"","sources":["../../../src/ai-service/judge/judge-executor.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,gBAAgB,EAAE,MAAM,oCAAoC,CAAC;AACtE,OAAO,EAAE,sBAAsB,EAAE,MAAM,4BAA4B,CAAC;AACpE,OAAO,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AAWnD;;GAEG;AACH,MAAM,oBAAoB,GAAyB;IACjD,QAAQ,EAAE,EAAE;IACZ,SAAS,EAAE,CAAC,GAAG,EAAE,GAAG,IAAI,EAAE,YAAY;IACtC,gBAAgB,EAAE,EAAE;IACpB,kBAAkB,EAAE,IAAI;IACxB,iBAAiB,EAAE,IAAI;CACxB,CAAC;AAEF;;;;;GAKG;AACH,MAAM,OAAO,aAAa;IAKd;IACA;IACA;IANF,gBAAgB,CAAmB;IACnC,MAAM,CAAc;IAE5B,YACU,SAAoB,EACpB,KAAsB,EACtB,MAAc,EACtB,MAA4B;QAHpB,cAAS,GAAT,SAAS,CAAW;QACpB,UAAK,GAAL,KAAK,CAAiB;QACtB,WAAM,GAAN,MAAM,CAAQ;QAGtB,IAAI,CAAC,MAAM,GAAG;YACZ,GAAG,oBAAoB;YACvB,GAAG,MAAM;SACK,CAAC;QAEjB,IAAI,CAAC,gBAAgB,GAAG,IAAI,gBAAgB,CAC1C,IAAI,CAAC,MAAM,CAAC,gBAAgB,EAC5B,MAAM,CACP,CAAC;IACJ,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,UAAU;QACd,MAAM,IAAI,CAAC,gBAAgB,CAAC,UAAU,EAAE,CAAC;QACzC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;IACjD,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,kBAAkB,CACtB,gBAMC,EACD,QAA4B,EAC5B,MAAc;QAEd,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,IAAI,CAAC,MAAM,CAAC,IAAI,CACd,2BAA2B,gBAAgB,CAAC,QAAQ,OAAO,MAAM,eAAe,QAAQ,CAAC,sBAAsB,CAAC,MAAM,gBAAgB,QAAQ,CAAC,cAAc,CAAC,MAAM,QAAQ,QAAQ,CAAC,gBAAgB,CAAC,MAAM,UAAU,QAAQ,CAAC,uBAAuB,EAAE,MAAM,IAAI,CAAC,cAAc,CAClR,CAAC;QAEF,IAAI,CAAC;YACH,gCAAgC;YAChC,MAAM,aAAa,GAAiB;gBAClC,MAAM;gBACN,OAAO,EAAE,gBAAgB,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,MAAM,CAAC;gBAC5D,QAAQ;gBACR,eAAe,EAAE;oBACf,eAAe,EAAE,EAAE;oBACnB,QAAQ,EAAE,IAAI,GAAG,EAAE;oBACnB,MAAM,EAAE,IAAI,GAAG,EAAE;iBAClB;aACF,CAAC;YAEF,qCAAqC;YACrC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;YACnC,MAAM,KAAK,GAAG,CAAC,MAAM,eAAe,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAG1D,CAAC;YAEF,sBAAsB;YACtB,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;YAC3C,MAAM,YAAY,GAAG,sBAAsB,CAAC,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;YAEnE,oBAAoB;YACpB,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;YACzC,MAAM,UAAU,GAAG,IAAI,CAAC,eAAe,CACrC,gBAAgB,EAChB,QAAQ,EACR,MAAM,CACP,CAAC;YAEF,0BAA0B;YAC1B,IAAI,UAAU,GAA2B,IAAI,CAAC;YAC9C,IAAI,UAAU,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;YAE9D,+BAA+B;YAC/B,IAAI,CAAC,MAAM,CAAC,IAAI,CACd,kCAAkC,EAClC,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,CAC/B,CAAC;YACF,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,IAAI,CAAC,CAAC;YAE3C,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC;gBAC7C,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,MAAM,EAAE,YAAY;gBACpB,IAAI,EAAE;oBACJ,IAAI,EAAE,MAAe;oBACrB,OAAO,EAAE,UAAU;iBACpB;gBACD,OAAO,EAAE;oBACP,SAAS,EAAE;wBACT,KAAK,EAAE,OAAO;wBACd,MAAM,EAAE,QAAQ;wBAChB,IAAI,EAAE,SAAS,gBAAgB,CAAC,QAAQ,IAAI,IAAI,CAAC,GAAG,EAAE,EAAE;qBACzD;oBACD,cAAc,EAAE;wBACd,SAAS,EAAE,OAAO;wBAClB,eAAe,EAAE;4BACf,gBAAgB,EAAE,GAAG;yBACtB;qBACF;oBACD,qDAAqD;iBACtD;gBACD,KAAK;gBACL,QAAQ;gBACR,WAAW,EAAE,CAAC,IAAI,EAAE,EAAE;oBACpB,MAAM,cAAc,GAAG,QAAQ,GAAG,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC;oBACtD,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC;oBAE9C,IAAI,CAAC,MAAM,CAAC,IAAI,CACd,kBAAkB,IAAI,CAAC,UAAU,cAAc,QAAQ,eAAe,cAAc,GAAG,CACxF,CAAC;oBAEF,kEAAkE;oBAClE,IAAI,IAAI,CAAC,UAAU,KAAK,QAAQ,GAAG,CAAC,EAAE,CAAC;wBACrC,IAAI,CAAC,MAAM,CAAC,IAAI,CACd,cAAc,IAAI,CAAC,UAAU,4CAA4C,CAC1E,CAAC;wBAEF,2DAA2D;wBAC3D,MAAM,cAAc,GAAG;4BACrB,IAAI,EAAE,MAAe;4BACrB,OAAO,EAAE;gCACP;oCACE,IAAI,EAAE,MAAe;oCACrB,IAAI,EAAE,6HAA6H;iCACpI;6BACF;yBACF,CAAC;wBAEF,OAAO;4BACL,GAAG,IAAI;4BACP,QAAQ,EAAE,CAAC,GAAG,IAAI,CAAC,QAAQ,EAAE,cAAc,CAAC;4BAC5C,WAAW,EAAE,CAAC,gBAAgB,CAAC;yBAChC,CAAC;oBACJ,CAAC;oBACD,kCAAkC;yBAC7B,IAAI,IAAI,CAAC,UAAU,IAAI,YAAY,IAAI,cAAc,GAAG,CAAC,EAAE,CAAC;wBAC/D,IAAI,CAAC,MAAM,CAAC,IAAI,CACd,QAAQ,IAAI,CAAC,UAAU,oBAAoB,cAAc,kBAAkB,CAC5E,CAAC;wBAEF,MAAM,cAAc,GAAG;4BACrB,IAAI,EAAE,MAAe;4BACrB,OAAO,EAAE;gCACP;oCACE,IAAI,EAAE,MAAe;oCACrB,IAAI,EAAE,yBAAyB,cAAc,QAAQ,cAAc,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,iGAAiG;iCACtL;6BACF;yBACF,CAAC;wBAEF,OAAO;4BACL,GAAG,IAAI;4BACP,QAAQ,EAAE,CAAC,GAAG,IAAI,CAAC,QAAQ,EAAE,cAAc,CAAC;yBAC7C,CAAC;oBACJ,CAAC;oBAED,OAAO,IAAI,CAAC;gBACd,CAAC;gBACD,QAAQ,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE;oBACtB,MAAM,QAAQ,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;oBACzC,MAAM,iBAAiB,GAAG,QAAQ,EAAE,SAAS,EAAE,IAAI,CACjD,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,QAAQ,KAAK,gBAAgB,CACzC,CAAC;oBACF,OAAO,iBAAiB,IAAI,KAAK,CAAC;gBACpC,CAAC;gBACD,YAAY,EAAE,CAAC,UAAU,EAAE,EAAE;oBAC3B,sBAAsB;oBACtB,IAAI,UAAU,CAAC,KAAK,EAAE,CAAC;wBACrB,MAAM,KAAK,GAAG,UAAU,CAAC,KAAY,CAAC;wBACtC,UAAU,GAAG;4BACX,KAAK,EAAE,KAAK,CAAC,WAAW,IAAI,CAAC;4BAC7B,MAAM,EAAE,KAAK,CAAC,YAAY,IAAI,CAAC;4BAC/B,MAAM,EAAE,KAAK,CAAC,YAAY,IAAI,CAAC;4BAC/B,KAAK,EAAE,KAAK,CAAC,WAAW,IAAI,CAAC;yBAC9B,CAAC;oBACJ,CAAC;oBACD,IAAI,UAAU,CAAC,aAAa,EAAE,CAAC;wBAC7B,IAAI,CAAC,MAAM,CAAC,IAAI,CACd,8BAA8B,UAAU,CAAC,aAAa,EAAE,CACzD,CAAC;oBACJ,CAAC;oBACD,IAAI,UAAU,CAAC,IAAI,EAAE,CAAC;wBACpB,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,yBAAyB,UAAU,CAAC,IAAI,EAAE,CAAC,CAAC;oBAC/D,CAAC;gBACH,CAAC;gBACD,QAAQ,EAAE,CAAC,YAAY,EAAE,EAAE;oBACzB,4BAA4B;oBAC5B,IAAI,YAAY,CAAC,KAAK,EAAE,CAAC;wBACvB,MAAM,KAAK,GAAG,YAAY,CAAC,KAAY,CAAC;wBACxC,UAAU,GAAG;4BACX,KAAK,EAAE,KAAK,CAAC,WAAW,IAAI,CAAC;4BAC7B,MAAM,EAAE,KAAK,CAAC,YAAY,IAAI,CAAC;4BAC/B,MAAM,EAAE,KAAK,CAAC,YAAY,IAAI,CAAC;4BAC/B,KAAK,EAAE,KAAK,CAAC,WAAW,IAAI,CAAC;yBAC9B,CAAC;oBACJ,CAAC;oBAED,IAAI,CAAC,MAAM,CAAC,IAAI,CACd,mCAAmC,YAAY,CAAC,YAAY,oBAAoB,CAAC,CAAC,UAAU,EAAE,CAC/F,CAAC;gBACJ,CAAC;aACF,CAAC,CAAC;YAEH,uCAAuC;YACvC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;YAC7C,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;gBAC5C,gCAAgC;gBAChC,IAAI,KAAK,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;oBAC/B,MAAM,QAAQ,GAAI,KAAa,CAAC,QAAQ,CAAC;oBACzC,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAE,KAAa,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;oBACrE,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,eAAe,QAAQ,eAAe,IAAI,EAAE,CAAC,CAAC;gBACjE,CAAC;qBAAM,IAAI,KAAK,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;oBACxC,MAAM,QAAQ,GAAI,KAAa,CAAC,QAAQ,CAAC;oBACzC,MAAM,OAAO,GAAG,CAAC,CAAE,KAAa,CAAC,MAAM,EAAE,OAAO,CAAC;oBACjD,MAAM,QAAQ,GAAG,CAAC,CAAE,KAAa,CAAC,MAAM,EAAE,KAAK,CAAC;oBAChD,IAAI,CAAC,MAAM,CAAC,IAAI,CACd,iBAAiB,QAAQ,YAAY,OAAO,aAAa,QAAQ,EAAE,CACpE,CAAC;gBACJ,CAAC;gBAED,+CAA+C;gBAC/C,IACE,KAAK,CAAC,IAAI,KAAK,aAAa;oBAC5B,UAAU,IAAI,KAAK;oBACnB,KAAK,CAAC,QAAQ,KAAK,gBAAgB,EACnC,CAAC;oBACD,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;oBAChD,MAAM,UAAU,GAAI,KAAa,CAAC,MAAM,CAAC;oBACzC,IAAI,UAAU,IAAI,UAAU,CAAC,UAAU,EAAE,CAAC;wBACxC,UAAU,GAAG,UAAU,CAAC,UAA6B,CAAC;wBACtD,IAAI,CAAC,MAAM,CAAC,IAAI,CACd,iCAAiC,UAAU,CAAC,YAAY,aAAa,UAAU,CAAC,MAAM,EAAE,CACzF,CAAC;oBACJ,CAAC;gBACH,CAAC;YACH,CAAC;YAED,8BAA8B;YAC9B,IAAI,CAAC,UAAU,EAAE,CAAC;gBAChB,oDAAoD;gBACpD,UAAU,GAAG;oBACX,YAAY,EAAE,CAAC;oBACf,MAAM,EAAE,KAAK;oBACb,mBAAmB,EAAE,EAAE;oBACvB,OAAO,EAAE,oCAAoC;oBAC7C,WAAW,EAAE,EAAE;oBACf,UAAU,EAAE,UAAU;oBACtB,oBAAoB,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;iBAC7C,CAAC;YACJ,CAAC;iBAAM,CAAC;gBACN,6BAA6B;gBAC7B,UAAU,CAAC,UAAU,GAAG,UAAU,CAAC;gBACnC,UAAU,CAAC,oBAAoB,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YAC3D,CAAC;YAED,uCAAuC;YACvC,IAAI,UAAU,CAAC,YAAY,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,gBAAgB,IAAI,EAAE,CAAC,EAAE,CAAC;gBACnE,IAAI,CAAC,MAAM,CAAC,IAAI,CACd,+CAA+C,UAAU,CAAC,YAAY,gBAAgB,IAAI,CAAC,MAAM,CAAC,gBAAgB,EAAE,CACrH,CAAC;YACJ,CAAC;YAED,OAAO,UAAU,CAAC;QACpB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;YAE/B,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,yBAAyB,EAAE;gBAC3C,KAAK,EAAE;oBACL,IAAI,EAAE,sBAAsB;oBAC5B,OAAO,EAAE,QAAQ;oBACjB,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;iBACxD;aACF,CAAC,CAAC;YAEH,+DAA+D;YAC/D,sDAAsD;YACtD,OAAO;gBACL,YAAY,EAAE,CAAC;gBACf,MAAM,EAAE,KAAK;gBACb,mBAAmB,EAAE,EAAE;gBACvB,OAAO,EAAE,4BAA4B,QAAQ,EAAE;gBAC/C,WAAW,EAAE;oBACX,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC;wBACxD,CAAC,CAAC,2GAA2G;wBAC7G,CAAC,CAAC,yEAAyE;iBAC9E;gBACD,UAAU,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE;gBACxD,oBAAoB,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;gBAC5C,KAAK,EAAE,QAAQ;aAChB,CAAC;QACJ,CAAC;gBAAS,CAAC;YACT,UAAU;YACV,IAAI,CAAC;gBACH,MAAM,IAAI,CAAC,gBAAgB,CAAC,OAAO,EAAE,CAAC;YACxC,CAAC;YAAC,OAAO,YAAY,EAAE,CAAC;gBACtB,qCAAqC;gBACrC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,kBAAkB,MAAM,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC;YAC7D,CAAC;QACH,CAAC;IACH,CAAC;IAED;;;;;;;OAOG;IACK,eAAe,CACrB,gBAKC,EACD,QAA4B,EAC5B,MAAc;QAEd,MAAM,UAAU,GAAG,gBAAgB,CAAC,WAAW;aAC5C,GAAG,CAAC,CAAC,EAAE,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,KAAK,EAAE,CAAC,MAAM,EAAE,CAAC;aAC5C,IAAI,CAAC,IAAI,CAAC,CAAC;QAEd,OAAO,uCAAuC,MAAM;;;EAGtD,UAAU;;kCAEsB,gBAAgB,CAAC,QAAQ;kBACzC,gBAAgB,CAAC,MAAM,CAAC,KAAK;mBAC5B,gBAAgB,CAAC,MAAM,CAAC,MAAM;kBAC/B,gBAAgB,CAAC,MAAM,CAAC,KAAK;;;;;EAK7C,QAAQ,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;;;EAG/D,QAAQ,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;;;EAGvD,QAAQ,CAAC,gBAAgB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;;EAGzD,QAAQ,CAAC,uBAAuB,EAAE,MAAM;YACtC,CAAC,CAAC,2CAA2C,QAAQ,CAAC,uBAAuB;iBACxE,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC;iBACpB,IAAI,CAAC,IAAI,CAAC,EAAE;YACjB,CAAC,CAAC,EACN;;;;;;;;0FAQ0F,CAAC;IACzF,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,MAAM,IAAI,CAAC,gBAAgB,CAAC,OAAO,EAAE,CAAC;IACxC,CAAC;CACF"}