auto-feedback 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/README.md +180 -0
  2. package/build/capture/console-collector.d.ts +16 -0
  3. package/build/capture/console-collector.js +43 -0
  4. package/build/capture/error-collector.d.ts +15 -0
  5. package/build/capture/error-collector.js +47 -0
  6. package/build/capture/network-collector.d.ts +16 -0
  7. package/build/capture/network-collector.js +76 -0
  8. package/build/capture/process-collector.d.ts +16 -0
  9. package/build/capture/process-collector.js +48 -0
  10. package/build/capture/types.d.ts +61 -0
  11. package/build/capture/types.js +5 -0
  12. package/build/index.d.ts +6 -0
  13. package/build/index.js +41 -0
  14. package/build/interaction/selectors.d.ts +26 -0
  15. package/build/interaction/selectors.js +84 -0
  16. package/build/interaction/types.d.ts +56 -0
  17. package/build/interaction/types.js +5 -0
  18. package/build/process/cleanup.d.ts +23 -0
  19. package/build/process/cleanup.js +50 -0
  20. package/build/process/launcher.d.ts +22 -0
  21. package/build/process/launcher.js +54 -0
  22. package/build/process/monitor.d.ts +14 -0
  23. package/build/process/monitor.js +67 -0
  24. package/build/process/types.d.ts +84 -0
  25. package/build/process/types.js +5 -0
  26. package/build/screenshot/auto-capture.d.ts +14 -0
  27. package/build/screenshot/auto-capture.js +38 -0
  28. package/build/screenshot/capture.d.ts +21 -0
  29. package/build/screenshot/capture.js +48 -0
  30. package/build/screenshot/optimize.d.ts +19 -0
  31. package/build/screenshot/optimize.js +28 -0
  32. package/build/screenshot/types.d.ts +43 -0
  33. package/build/screenshot/types.js +4 -0
  34. package/build/server.d.ts +10 -0
  35. package/build/server.js +18 -0
  36. package/build/session-manager.d.ts +119 -0
  37. package/build/session-manager.js +284 -0
  38. package/build/tools/check-port.d.ts +10 -0
  39. package/build/tools/check-port.js +40 -0
  40. package/build/tools/click-element.d.ts +13 -0
  41. package/build/tools/click-element.js +118 -0
  42. package/build/tools/get-console-logs.d.ts +7 -0
  43. package/build/tools/get-console-logs.js +55 -0
  44. package/build/tools/get-element-state.d.ts +14 -0
  45. package/build/tools/get-element-state.js +116 -0
  46. package/build/tools/get-errors.d.ts +7 -0
  47. package/build/tools/get-errors.js +40 -0
  48. package/build/tools/get-network-logs.d.ts +7 -0
  49. package/build/tools/get-network-logs.js +58 -0
  50. package/build/tools/get-process-output.d.ts +7 -0
  51. package/build/tools/get-process-output.js +55 -0
  52. package/build/tools/get-screenshot.d.ts +7 -0
  53. package/build/tools/get-screenshot.js +32 -0
  54. package/build/tools/index.d.ts +9 -0
  55. package/build/tools/index.js +117 -0
  56. package/build/tools/launch-electron.d.ts +13 -0
  57. package/build/tools/launch-electron.js +97 -0
  58. package/build/tools/launch-web-server.d.ts +13 -0
  59. package/build/tools/launch-web-server.js +88 -0
  60. package/build/tools/launch-windows-exe.d.ts +13 -0
  61. package/build/tools/launch-windows-exe.js +81 -0
  62. package/build/tools/navigate.d.ts +13 -0
  63. package/build/tools/navigate.js +137 -0
  64. package/build/tools/run-workflow.d.ts +14 -0
  65. package/build/tools/run-workflow.js +207 -0
  66. package/build/tools/screenshot-desktop.d.ts +13 -0
  67. package/build/tools/screenshot-desktop.js +80 -0
  68. package/build/tools/screenshot-electron.d.ts +13 -0
  69. package/build/tools/screenshot-electron.js +72 -0
  70. package/build/tools/screenshot-web.d.ts +13 -0
  71. package/build/tools/screenshot-web.js +129 -0
  72. package/build/tools/stop-process.d.ts +14 -0
  73. package/build/tools/stop-process.js +41 -0
  74. package/build/tools/type-text.d.ts +13 -0
  75. package/build/tools/type-text.js +137 -0
  76. package/build/tools/wait-for-element.d.ts +14 -0
  77. package/build/tools/wait-for-element.js +93 -0
  78. package/build/types/index.d.ts +31 -0
  79. package/build/types/index.js +4 -0
  80. package/build/utils/errors.d.ts +26 -0
  81. package/build/utils/errors.js +62 -0
  82. package/build/utils/shutdown.d.ts +16 -0
  83. package/build/utils/shutdown.js +34 -0
  84. package/build/workflow/assertions.d.ts +25 -0
  85. package/build/workflow/assertions.js +326 -0
  86. package/build/workflow/executor.d.ts +34 -0
  87. package/build/workflow/executor.js +269 -0
  88. package/build/workflow/types.d.ts +95 -0
  89. package/build/workflow/types.js +6 -0
  90. package/package.json +36 -0
@@ -0,0 +1,13 @@
1
+ /**
2
+ * launch_web_server MCP tool (PROC-01)
3
+ * Spawns a dev server process and waits for it to become ready
4
+ */
5
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
6
+ import { SessionManager } from "../session-manager.js";
7
+ /**
8
+ * Register the launch_web_server tool with the MCP server
9
+ *
10
+ * @param server - MCP server instance
11
+ * @param sessionManager - Session manager for resource tracking
12
+ */
13
+ export declare function registerLaunchWebServerTool(server: McpServer, sessionManager: SessionManager): void;
@@ -0,0 +1,88 @@
1
+ /**
2
+ * launch_web_server MCP tool (PROC-01)
3
+ * Spawns a dev server process and waits for it to become ready
4
+ */
5
+ import path from "path";
6
+ import { z } from "zod";
7
+ import { createToolError, createToolResult } from "../utils/errors.js";
8
+ import { spawnCrossPlatform, attachProcessListeners, } from "../process/launcher.js";
9
+ import { detectServerReady } from "../process/monitor.js";
10
+ import { createProcessResource } from "../process/cleanup.js";
11
+ import { attachProcessCollector } from "../capture/process-collector.js";
12
+ /**
13
+ * Register the launch_web_server tool with the MCP server
14
+ *
15
+ * @param server - MCP server instance
16
+ * @param sessionManager - Session manager for resource tracking
17
+ */
18
+ export function registerLaunchWebServerTool(server, sessionManager) {
19
+ server.tool("launch_web_server", "Launch a web dev server and wait for it to be ready. Use to start npm/vite/webpack dev servers for testing.", {
20
+ sessionId: z.string().describe("Session ID to track this process"),
21
+ command: z
22
+ .string()
23
+ .describe("Command to run (e.g., 'npm', 'npx', 'node')"),
24
+ args: z
25
+ .array(z.string())
26
+ .describe("Command arguments (e.g., ['run', 'dev'])"),
27
+ cwd: z.string().describe("Working directory for the project"),
28
+ port: z
29
+ .number()
30
+ .int()
31
+ .min(1)
32
+ .max(65535)
33
+ .describe("Expected port the server will listen on"),
34
+ timeoutMs: z
35
+ .number()
36
+ .int()
37
+ .min(1000)
38
+ .max(300000)
39
+ .optional()
40
+ .describe("Readiness timeout in ms (default: 60000)"),
41
+ }, async ({ sessionId, command, args, cwd, port, timeoutMs }) => {
42
+ try {
43
+ // Validate session exists
44
+ const session = sessionManager.get(sessionId);
45
+ if (!session) {
46
+ return createToolError(`Session not found: ${sessionId}`, "The session may have already been ended or never existed", "Create a session first with create_session.");
47
+ }
48
+ console.error(`[launch_web_server] Launching: ${command} ${args.join(" ")} in ${cwd} on port ${port}`);
49
+ // Resolve cwd to absolute path
50
+ const resolvedCwd = path.resolve(cwd);
51
+ // Spawn the process
52
+ const child = spawnCrossPlatform(command, args, { cwd: resolvedCwd });
53
+ // Attach logging listeners
54
+ attachProcessListeners(child, `WebServer:${port}`);
55
+ // Attach process output collector for retrieval
56
+ const processCollector = attachProcessCollector(child);
57
+ sessionManager.setProcessCollector(sessionId, `WebServer:${port}`, processCollector);
58
+ // Register process as a session resource for automatic cleanup
59
+ const resource = createProcessResource(child, "web-server");
60
+ sessionManager.addResource(sessionId, resource);
61
+ // Wait for server readiness
62
+ try {
63
+ await detectServerReady(child, port, timeoutMs ?? 60000);
64
+ }
65
+ catch (error) {
66
+ const message = error instanceof Error ? error.message : String(error);
67
+ const processExited = child.exitCode !== null;
68
+ return createToolError("Web server failed to become ready", processExited
69
+ ? `Process exited with code ${child.exitCode}. ${message}`
70
+ : message, "Check the command and port. The process may have crashed -- check server logs.");
71
+ }
72
+ return createToolResult({
73
+ sessionId,
74
+ type: "web-server",
75
+ pid: child.pid,
76
+ port,
77
+ status: "ready",
78
+ command,
79
+ args,
80
+ cwd: resolvedCwd,
81
+ });
82
+ }
83
+ catch (error) {
84
+ const message = error instanceof Error ? error.message : String(error);
85
+ return createToolError("Unexpected error launching web server", message, "Check command, arguments, and working directory are correct.");
86
+ }
87
+ });
88
+ }
@@ -0,0 +1,13 @@
1
+ /**
2
+ * launch_windows_exe MCP tool (PROC-03)
3
+ * Spawns a Windows executable and tracks its process as a session resource
4
+ */
5
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
6
+ import { SessionManager } from "../session-manager.js";
7
+ /**
8
+ * Register the launch_windows_exe tool with the MCP server
9
+ *
10
+ * Spawns a Windows .exe, waits briefly for immediate spawn errors,
11
+ * and registers the process as a session resource for cleanup.
12
+ */
13
+ export declare function registerLaunchWindowsExeTool(server: McpServer, sessionManager: SessionManager): void;
@@ -0,0 +1,81 @@
1
+ /**
2
+ * launch_windows_exe MCP tool (PROC-03)
3
+ * Spawns a Windows executable and tracks its process as a session resource
4
+ */
5
+ import path from "path";
6
+ import { z } from "zod";
7
+ import { createToolError, createToolResult } from "../utils/errors.js";
8
+ import { spawnCrossPlatform, attachProcessListeners, } from "../process/launcher.js";
9
+ import { createProcessResource } from "../process/cleanup.js";
10
+ import { attachProcessCollector } from "../capture/process-collector.js";
11
+ /**
12
+ * Register the launch_windows_exe tool with the MCP server
13
+ *
14
+ * Spawns a Windows .exe, waits briefly for immediate spawn errors,
15
+ * and registers the process as a session resource for cleanup.
16
+ */
17
+ export function registerLaunchWindowsExeTool(server, sessionManager) {
18
+ server.tool("launch_windows_exe", "Launch a Windows executable and track its process. Use to start .exe applications for GUI testing.", {
19
+ sessionId: z.string().describe("Session ID to track this process"),
20
+ exePath: z.string().describe("Absolute path to the .exe file"),
21
+ args: z
22
+ .array(z.string())
23
+ .optional()
24
+ .describe("Command line arguments for the executable"),
25
+ cwd: z
26
+ .string()
27
+ .optional()
28
+ .describe("Working directory (defaults to exe's directory)"),
29
+ }, async ({ sessionId, exePath, args, cwd }) => {
30
+ try {
31
+ // Validate session exists
32
+ const session = sessionManager.get(sessionId);
33
+ if (!session) {
34
+ return createToolError(`Session not found: ${sessionId}`, "The session may have already been ended or never existed", "Create a session first with create_session.");
35
+ }
36
+ // Resolve paths
37
+ const resolvedExePath = path.resolve(exePath);
38
+ const resolvedCwd = cwd
39
+ ? path.resolve(cwd)
40
+ : path.dirname(resolvedExePath);
41
+ console.error(`[launch_windows_exe] Launching: ${resolvedExePath}`);
42
+ // Spawn the process
43
+ const child = spawnCrossPlatform(resolvedExePath, args ?? [], {
44
+ cwd: resolvedCwd,
45
+ });
46
+ attachProcessListeners(child, `WinExe:${path.basename(resolvedExePath)}`);
47
+ // Attach process output collector for retrieval
48
+ const processCollector = attachProcessCollector(child);
49
+ sessionManager.setProcessCollector(sessionId, `WinExe:${path.basename(resolvedExePath)}`, processCollector);
50
+ // Wait briefly for immediate spawn errors
51
+ await new Promise((resolve, reject) => {
52
+ const errorHandler = (err) => reject(err);
53
+ child.on("error", errorHandler);
54
+ setTimeout(() => {
55
+ child.removeListener("error", errorHandler);
56
+ resolve();
57
+ }, 1000);
58
+ });
59
+ // Check if process exited immediately (bad path, missing deps, etc.)
60
+ if (child.exitCode !== null) {
61
+ return createToolError(`Process exited immediately with code ${child.exitCode}`, `Attempted to launch: ${resolvedExePath}`, "Check that the .exe path is correct and the application can run from the command line");
62
+ }
63
+ // Register as session resource for cleanup
64
+ const resource = createProcessResource(child, "windows-exe");
65
+ sessionManager.addResource(sessionId, resource);
66
+ return createToolResult({
67
+ sessionId,
68
+ type: "windows-exe",
69
+ pid: child.pid,
70
+ status: "running",
71
+ exePath: resolvedExePath,
72
+ args: args ?? [],
73
+ cwd: resolvedCwd,
74
+ });
75
+ }
76
+ catch (error) {
77
+ const message = error instanceof Error ? error.message : String(error);
78
+ return createToolError("Failed to launch Windows executable", message, "Check the .exe path is correct and the file exists.");
79
+ }
80
+ });
81
+ }
@@ -0,0 +1,13 @@
1
+ /**
2
+ * navigate MCP tool
3
+ * Navigates to URLs and uses browser back/forward on web or Electron pages
4
+ */
5
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
6
+ import { SessionManager } from "../session-manager.js";
7
+ /**
8
+ * Register the navigate tool with the MCP server
9
+ *
10
+ * @param server - MCP server instance
11
+ * @param sessionManager - Session manager for resource tracking
12
+ */
13
+ export declare function registerNavigateTool(server: McpServer, sessionManager: SessionManager): void;
@@ -0,0 +1,137 @@
1
+ /**
2
+ * navigate MCP tool
3
+ * Navigates to URLs and uses browser back/forward on web or Electron pages
4
+ */
5
+ import { z } from "zod";
6
+ import { createToolError, createScreenshotResult } from "../utils/errors.js";
7
+ import { capturePlaywrightPage } from "../screenshot/capture.js";
8
+ import { optimizeScreenshot } from "../screenshot/optimize.js";
9
+ import { getActivePage } from "../interaction/selectors.js";
10
+ /**
11
+ * Register the navigate tool with the MCP server
12
+ *
13
+ * @param server - MCP server instance
14
+ * @param sessionManager - Session manager for resource tracking
15
+ */
16
+ export function registerNavigateTool(server, sessionManager) {
17
+ server.tool("navigate", "Navigate to a URL or use browser back/forward on a web or Electron page. Returns a screenshot of the resulting page. Use to load pages, follow links, or retrace steps.", {
18
+ sessionId: z
19
+ .string()
20
+ .describe("Session ID from create_session"),
21
+ action: z
22
+ .enum(["goto", "back", "forward"])
23
+ .default("goto")
24
+ .describe("Navigation action: goto (load URL), back (browser back), forward (browser forward)"),
25
+ url: z
26
+ .string()
27
+ .optional()
28
+ .describe("URL to navigate to (required when action is 'goto')"),
29
+ pageIdentifier: z
30
+ .string()
31
+ .optional()
32
+ .describe("URL or 'electron' to target a specific page. Omit if session has only one page."),
33
+ waitUntil: z
34
+ .enum(["load", "domcontentloaded", "commit"])
35
+ .optional()
36
+ .describe("When to consider navigation complete (default: 'load'). Do NOT use 'networkidle' — it is unreliable."),
37
+ timeout: z
38
+ .number()
39
+ .int()
40
+ .min(0)
41
+ .optional()
42
+ .describe("Max wait time in ms (default: 30000)"),
43
+ }, async ({ sessionId, action, url, pageIdentifier, waitUntil, timeout, }) => {
44
+ try {
45
+ // Validate session exists
46
+ const session = sessionManager.get(sessionId);
47
+ if (!session) {
48
+ const availableSessions = sessionManager.list();
49
+ return createToolError(`Session not found: ${sessionId}`, "The session may have already been ended", availableSessions.length > 0
50
+ ? `Available sessions: ${availableSessions.join(", ")}`
51
+ : "Create a session first with create_session.");
52
+ }
53
+ // Validate URL is provided for goto action
54
+ if (action === "goto" && !url) {
55
+ return createToolError("URL is required when action is 'goto'", "The 'goto' action navigates to a specific URL", "Provide a url parameter, e.g. url: 'https://example.com'");
56
+ }
57
+ // Find the active page
58
+ const pageResult = getActivePage(sessionManager, sessionId, pageIdentifier);
59
+ if (!pageResult.success) {
60
+ return createToolError(pageResult.error, `Session: ${sessionId}`, pageResult.availablePages
61
+ ? `Available pages: ${pageResult.availablePages.join(", ")}`
62
+ : undefined);
63
+ }
64
+ const { page, identifier: currentIdentifier, type: pageType } = pageResult;
65
+ const effectiveTimeout = timeout ?? 30000;
66
+ const effectiveWaitUntil = waitUntil ?? "load";
67
+ // Perform the navigation action
68
+ if (action === "goto") {
69
+ await page.goto(url, {
70
+ waitUntil: effectiveWaitUntil,
71
+ timeout: effectiveTimeout,
72
+ });
73
+ // Update PageReference URL in SessionManager for web pages
74
+ // so page discovery continues to work with the new URL
75
+ if (pageType === "web" && currentIdentifier !== "electron") {
76
+ const oldRef = sessionManager.getPageRef(sessionId, currentIdentifier);
77
+ if (oldRef) {
78
+ sessionManager.removePageRef(sessionId, currentIdentifier);
79
+ sessionManager.setPageRef(sessionId, url, {
80
+ ...oldRef,
81
+ url: url,
82
+ });
83
+ }
84
+ }
85
+ }
86
+ else if (action === "back") {
87
+ const response = await page.goBack({
88
+ waitUntil: effectiveWaitUntil,
89
+ timeout: effectiveTimeout,
90
+ });
91
+ if (response === null) {
92
+ return createToolError("Cannot go back", "No previous page in browser history", "Navigate to a URL first before using back.");
93
+ }
94
+ }
95
+ else {
96
+ // action === "forward"
97
+ const response = await page.goForward({
98
+ waitUntil: effectiveWaitUntil,
99
+ timeout: effectiveTimeout,
100
+ });
101
+ if (response === null) {
102
+ return createToolError("Cannot go forward", "No forward page in browser history", "Use back first before using forward.");
103
+ }
104
+ }
105
+ // Capture post-navigation screenshot
106
+ const rawBuffer = await capturePlaywrightPage(page, {
107
+ fullPage: false,
108
+ });
109
+ const optimized = await optimizeScreenshot(rawBuffer, {
110
+ maxWidth: 1280,
111
+ quality: 80,
112
+ });
113
+ const imageBase64 = optimized.data.toString("base64");
114
+ return createScreenshotResult({
115
+ sessionId,
116
+ action,
117
+ url: page.url(),
118
+ success: true,
119
+ }, imageBase64, optimized.mimeType);
120
+ }
121
+ catch (error) {
122
+ const message = error instanceof Error ? error.message : String(error);
123
+ // Timeout: page took too long to load
124
+ if (message.includes("Timeout") ||
125
+ message.includes("timeout")) {
126
+ return createToolError("Navigation timed out", `Page did not finish loading within ${timeout ?? 30000}ms`, "Increase the timeout, use waitUntil: 'domcontentloaded' for faster resolution, or check if the URL is correct.");
127
+ }
128
+ // Network errors (Chrome: net::ERR_, Firefox: NS_ERROR_)
129
+ if (message.includes("net::ERR_") ||
130
+ message.includes("NS_ERROR_")) {
131
+ return createToolError("Network error during navigation", message, "Check if the URL is correct and the server is running. For local servers, ensure the dev server is started.");
132
+ }
133
+ // Default error
134
+ return createToolError("Navigation failed", `Action: ${action}, URL: ${url ?? "N/A"} — ${message}`, "Take a screenshot to check the current page state.");
135
+ }
136
+ });
137
+ }
@@ -0,0 +1,14 @@
1
+ /**
2
+ * run_workflow MCP tool
3
+ * Executes multi-step workflows on web or Electron pages with per-step
4
+ * screenshot capture and diagnostic log tracking.
5
+ */
6
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
7
+ import { SessionManager } from "../session-manager.js";
8
+ /**
9
+ * Register the run_workflow tool with the MCP server
10
+ *
11
+ * @param server - MCP server instance
12
+ * @param sessionManager - Session manager for resource tracking
13
+ */
14
+ export declare function registerRunWorkflowTool(server: McpServer, sessionManager: SessionManager): void;
@@ -0,0 +1,207 @@
1
+ /**
2
+ * run_workflow MCP tool
3
+ * Executes multi-step workflows on web or Electron pages with per-step
4
+ * screenshot capture and diagnostic log tracking.
5
+ */
6
+ import { z } from "zod";
7
+ import { createToolError } from "../utils/errors.js";
8
+ import { getActivePage } from "../interaction/selectors.js";
9
+ import { executeWorkflow, validateStep } from "../workflow/executor.js";
10
+ /**
11
+ * Register the run_workflow tool with the MCP server
12
+ *
13
+ * @param server - MCP server instance
14
+ * @param sessionManager - Session manager for resource tracking
15
+ */
16
+ export function registerRunWorkflowTool(server, sessionManager) {
17
+ server.tool("run_workflow", "Execute a multi-step workflow on a web or Electron page. Runs actions in sequence, captures screenshot and logs at each step, stops on first error. Use for form filling, navigation flows, or multi-step UI verification with pass/fail assertions.", {
18
+ sessionId: z
19
+ .string()
20
+ .describe("Session ID from create_session"),
21
+ steps: z
22
+ .array(z.object({
23
+ action: z
24
+ .enum(["click", "type", "navigate", "screenshot", "wait", "assert"])
25
+ .describe("Action to perform"),
26
+ selector: z
27
+ .string()
28
+ .optional()
29
+ .describe("Element selector — CSS, text=, role=, testid=, xpath= (required for click, type, wait)"),
30
+ text: z
31
+ .string()
32
+ .optional()
33
+ .describe("Text to type (required for type action)"),
34
+ url: z
35
+ .string()
36
+ .optional()
37
+ .describe("URL to navigate to (required for navigate action)"),
38
+ button: z
39
+ .enum(["left", "right", "middle"])
40
+ .optional()
41
+ .describe("Mouse button for click (default: left)"),
42
+ clickCount: z
43
+ .number()
44
+ .int()
45
+ .min(1)
46
+ .max(3)
47
+ .optional()
48
+ .describe("Click count: 1=single, 2=double, 3=triple"),
49
+ pressSequentially: z
50
+ .boolean()
51
+ .optional()
52
+ .describe("Type one key at a time instead of fill (default: false)"),
53
+ clear: z
54
+ .boolean()
55
+ .optional()
56
+ .describe("Clear field before typing (default: true)"),
57
+ fullPage: z
58
+ .boolean()
59
+ .optional()
60
+ .describe("Capture full scrollable page in screenshot (default: false)"),
61
+ state: z
62
+ .enum(["visible", "hidden", "attached", "detached"])
63
+ .optional()
64
+ .describe("State to wait for (default: visible)"),
65
+ timeout: z
66
+ .number()
67
+ .int()
68
+ .min(0)
69
+ .optional()
70
+ .describe("Step timeout in ms (default: 30000)"),
71
+ assertType: z
72
+ .enum([
73
+ "exists",
74
+ "not-exists",
75
+ "visible",
76
+ "hidden",
77
+ "text-equals",
78
+ "text-contains",
79
+ "has-attribute",
80
+ "attribute-equals",
81
+ "enabled",
82
+ "disabled",
83
+ "checked",
84
+ "not-checked",
85
+ "value-equals",
86
+ ])
87
+ .optional()
88
+ .describe("Assertion type (required for assert action). Checks element state and reports pass/fail."),
89
+ expected: z
90
+ .string()
91
+ .optional()
92
+ .describe("Expected value for text-equals, text-contains, value-equals, attribute-equals assertions"),
93
+ attribute: z
94
+ .string()
95
+ .optional()
96
+ .describe("Attribute name for has-attribute, attribute-equals assertions"),
97
+ }))
98
+ .min(1)
99
+ .max(20)
100
+ .describe("Action steps to execute in order (max 20)"),
101
+ pageIdentifier: z
102
+ .string()
103
+ .optional()
104
+ .describe("URL or 'electron' to target a specific page. Omit if session has only one page."),
105
+ }, async ({ sessionId, steps, pageIdentifier }) => {
106
+ try {
107
+ // 1. Validate session exists
108
+ const session = sessionManager.get(sessionId);
109
+ if (!session) {
110
+ const availableSessions = sessionManager.list();
111
+ return createToolError(`Session not found: ${sessionId}`, "The session may have already been ended", availableSessions.length > 0
112
+ ? `Available sessions: ${availableSessions.join(", ")}`
113
+ : "Create a session first with create_session.");
114
+ }
115
+ // 2. Discover page
116
+ const pageResult = getActivePage(sessionManager, sessionId, pageIdentifier);
117
+ if (!pageResult.success) {
118
+ return createToolError(pageResult.error, `Session: ${sessionId}`, pageResult.availablePages
119
+ ? `Available pages: ${pageResult.availablePages.join(", ")}`
120
+ : undefined);
121
+ }
122
+ const { page, identifier } = pageResult;
123
+ // 3. Validate all steps up front before executing any
124
+ const validationErrors = [];
125
+ for (let i = 0; i < steps.length; i++) {
126
+ const error = validateStep(steps[i], i);
127
+ if (error) {
128
+ validationErrors.push(error);
129
+ }
130
+ }
131
+ if (validationErrors.length > 0) {
132
+ return createToolError("Workflow validation failed", validationErrors.join("; "), "Fix the step parameters and retry.");
133
+ }
134
+ // 4. Execute workflow
135
+ const result = await executeWorkflow({
136
+ page,
137
+ steps,
138
+ sessionManager,
139
+ sessionId,
140
+ pageIdentifier: identifier,
141
+ });
142
+ // 5. Build multi-content response
143
+ const content = [];
144
+ // Summary header
145
+ const summary = {
146
+ workflow: result.failedStep !== undefined ? "stopped" : "complete",
147
+ totalSteps: result.totalSteps,
148
+ completedSteps: result.completedSteps,
149
+ failedAtStep: result.failedStep,
150
+ };
151
+ // Include assertion counts when assert steps are present
152
+ const assertSteps = result.steps.filter((r) => r.assertion);
153
+ if (assertSteps.length > 0) {
154
+ summary.assertionsPassed = result.steps.filter((r) => r.assertion?.passed === true).length;
155
+ summary.assertionsFailed = result.steps.filter((r) => r.assertion?.passed === false).length;
156
+ }
157
+ content.push({
158
+ type: "text",
159
+ text: JSON.stringify(summary, null, 2),
160
+ });
161
+ // Per-step text metadata and screenshot images
162
+ for (const r of result.steps) {
163
+ const stepMeta = {
164
+ step: r.stepIndex,
165
+ action: r.action,
166
+ success: r.success,
167
+ error: r.error,
168
+ consoleLogs: r.consoleDelta.length,
169
+ errors: r.errorDelta.length,
170
+ };
171
+ // Include assertion result when present
172
+ if (r.assertion) {
173
+ stepMeta.assertion = r.assertion;
174
+ }
175
+ // Include error details inline when present
176
+ if (r.errorDelta.length > 0) {
177
+ stepMeta.errorDetails = r.errorDelta;
178
+ }
179
+ // Include console errors inline when present
180
+ const consoleErrors = r.consoleDelta.filter((e) => e.level === "error");
181
+ if (consoleErrors.length > 0) {
182
+ stepMeta.consoleErrors = consoleErrors;
183
+ }
184
+ content.push({
185
+ type: "text",
186
+ text: JSON.stringify(stepMeta, null, 2),
187
+ });
188
+ // Add screenshot image if captured
189
+ if (r.screenshotBase64 && r.screenshotMimeType) {
190
+ content.push({
191
+ type: "image",
192
+ data: r.screenshotBase64,
193
+ mimeType: r.screenshotMimeType,
194
+ });
195
+ }
196
+ }
197
+ return {
198
+ content,
199
+ isError: result.failedStep !== undefined,
200
+ };
201
+ }
202
+ catch (error) {
203
+ const message = error instanceof Error ? error.message : String(error);
204
+ return createToolError("Workflow execution failed", message, "Take a screenshot to check current page state.");
205
+ }
206
+ });
207
+ }
@@ -0,0 +1,13 @@
1
+ /**
2
+ * screenshot_desktop MCP tool
3
+ * Captures a screenshot of a Windows desktop application window by PID
4
+ */
5
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
6
+ import { SessionManager } from "../session-manager.js";
7
+ /**
8
+ * Register the screenshot_desktop tool with the MCP server
9
+ *
10
+ * @param server - MCP server instance
11
+ * @param sessionManager - Session manager for resource tracking
12
+ */
13
+ export declare function registerScreenshotDesktopTool(server: McpServer, sessionManager: SessionManager): void;
@@ -0,0 +1,80 @@
1
+ /**
2
+ * screenshot_desktop MCP tool
3
+ * Captures a screenshot of a Windows desktop application window by PID
4
+ */
5
+ import { z } from "zod";
6
+ import { createToolError, createScreenshotResult } from "../utils/errors.js";
7
+ import { captureDesktopWindow } from "../screenshot/capture.js";
8
+ import { optimizeScreenshot } from "../screenshot/optimize.js";
9
+ /**
10
+ * Register the screenshot_desktop tool with the MCP server
11
+ *
12
+ * @param server - MCP server instance
13
+ * @param sessionManager - Session manager for resource tracking
14
+ */
15
+ export function registerScreenshotDesktopTool(server, sessionManager) {
16
+ server.tool("screenshot_desktop", "Capture a screenshot of a Windows desktop application window by PID. Use after launch_windows_exe to see the app.", {
17
+ sessionId: z
18
+ .string()
19
+ .describe("Session ID (for resource tracking)"),
20
+ pid: z
21
+ .number()
22
+ .int()
23
+ .positive()
24
+ .describe("Process ID of the target window (from launch_windows_exe result)"),
25
+ maxWidth: z
26
+ .number()
27
+ .int()
28
+ .min(100)
29
+ .max(3840)
30
+ .optional()
31
+ .describe("Max image width in pixels (default: 1280)"),
32
+ quality: z
33
+ .number()
34
+ .int()
35
+ .min(1)
36
+ .max(100)
37
+ .optional()
38
+ .describe("WebP quality 1-100 (default: 80)"),
39
+ }, async ({ sessionId, pid, maxWidth, quality }) => {
40
+ try {
41
+ const session = sessionManager.get(sessionId);
42
+ if (!session) {
43
+ return createToolError(`Session not found: ${sessionId}`, "The session may have already been ended", "Create a session first with create_session.");
44
+ }
45
+ console.error(`[screenshot_desktop] Capturing window PID ${pid} for session ${sessionId}`);
46
+ // Capture raw PNG via node-screenshots
47
+ const rawBuffer = await captureDesktopWindow(pid);
48
+ // Optimize: resize + WebP
49
+ const optimized = await optimizeScreenshot(rawBuffer, {
50
+ maxWidth: maxWidth ?? 1280,
51
+ quality: quality ?? 80,
52
+ });
53
+ const imageBase64 = optimized.data.toString("base64");
54
+ return createScreenshotResult({
55
+ sessionId,
56
+ type: "desktop",
57
+ pid,
58
+ mode: "viewport",
59
+ width: optimized.width,
60
+ height: optimized.height,
61
+ originalSize: rawBuffer.length,
62
+ optimizedSize: optimized.data.length,
63
+ }, imageBase64, optimized.mimeType);
64
+ }
65
+ catch (error) {
66
+ const message = error instanceof Error ? error.message : String(error);
67
+ // Provide specific guidance based on error type
68
+ let suggestedFix = "Check the PID is correct and the window is visible.";
69
+ if (message.includes("No window found")) {
70
+ suggestedFix =
71
+ "The process may not have a visible window yet. Wait a moment and retry, or check the PID from launch_windows_exe output.";
72
+ }
73
+ else if (message.includes("minimized")) {
74
+ suggestedFix =
75
+ "The window is minimized. The user needs to restore it before capturing.";
76
+ }
77
+ return createToolError("Failed to capture desktop screenshot", message, suggestedFix);
78
+ }
79
+ });
80
+ }