@skyramp/mcp 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -40,11 +40,6 @@ export async function registerPlaywrightTools(server, options) {
40
40
  'browser_select_option',
41
41
  'browser_hover',
42
42
  'browser_drag',
43
- 'browser_mouse_move_xy',
44
- 'browser_mouse_click_xy',
45
- 'browser_mouse_drag_xy',
46
- 'browser_mouse_down',
47
- 'browser_mouse_up',
48
43
  'browser_file_upload',
49
44
  'browser_evaluate',
50
45
  'browser_tabs',
@@ -54,8 +49,11 @@ export async function registerPlaywrightTools(server, options) {
54
49
  'browser_assert',
55
50
  'browser_assert_api_request',
56
51
  'browser_assert_table_cell',
52
+ 'browser_mouse_action',
53
+ 'browser_visual_snapshot',
57
54
  'skyramp_export_zip',
58
55
  'skyramp_load_trace',
56
+ 'browser_mouse_action',
59
57
  // DOM Analyzer tools (Phase C)
60
58
  'browser_blueprint',
61
59
  'browser_blueprint_diff',
@@ -92,26 +90,7 @@ function jsonSchemaToZod(schema) {
92
90
  const required = new Set(schema.required || []);
93
91
  const shape = {};
94
92
  for (const [key, prop] of Object.entries(properties)) {
95
- let field;
96
- switch (prop.type) {
97
- case "string":
98
- field = prop.enum
99
- ? z.enum(prop.enum)
100
- : z.string();
101
- break;
102
- case "number":
103
- case "integer":
104
- field = z.number();
105
- break;
106
- case "boolean":
107
- field = z.boolean();
108
- break;
109
- case "array":
110
- field = z.array(prop.items ? jsonSchemaPropertyToZod(prop.items) : z.unknown());
111
- break;
112
- default:
113
- field = z.unknown();
114
- }
93
+ let field = jsonSchemaPropertyToZod(prop);
115
94
  if (prop.description)
116
95
  field = field.describe(prop.description);
117
96
  if (prop.default !== undefined)
@@ -135,6 +114,24 @@ function jsonSchemaPropertyToZod(prop) {
135
114
  return z.boolean();
136
115
  case "array":
137
116
  return z.array(prop.items ? jsonSchemaPropertyToZod(prop.items) : z.unknown());
117
+ case "object": {
118
+ // Nested object (e.g. a clip rect). Recurse into its properties so the
119
+ // SDK registers a real object schema; without this it falls through to
120
+ // z.unknown(), which serializes the nested object as a string and the
121
+ // inner backend's z.object(...) then rejects it.
122
+ const nestedProps = prop.properties || {};
123
+ const nestedRequired = new Set(prop.required || []);
124
+ const nestedShape = {};
125
+ for (const [k, p] of Object.entries(nestedProps)) {
126
+ let f = jsonSchemaPropertyToZod(p);
127
+ if (p.description)
128
+ f = f.describe(p.description);
129
+ if (!nestedRequired.has(k))
130
+ f = f.optional();
131
+ nestedShape[k] = f;
132
+ }
133
+ return z.object(nestedShape);
134
+ }
138
135
  default:
139
136
  return z.unknown();
140
137
  }
@@ -128,6 +128,7 @@ ${userPrompt ? "Generate only the tests that the user requested from the Additio
128
128
  - Example: If enrichment reveals that sending \`discount_value\` without \`discount_type\` silently orphans the value (a concrete bug), complete all planned GENERATE items first, then generate this discovered scenario as an extra test and report it in \`newTestsCreated\`.
129
129
  - Total generated: Follow the "Budget: N generate" line in the Execution Plan. Process every GENERATE-tagged item in order. Backfill from ADDITIONAL candidates (highest-ranked first) until \`newTestsCreated\` reaches ${maxGenerate} or all candidates are exhausted.
130
130
  - **UI test priority**: If the PR scope assessment shows any UI/E2E budget OR \`uiContext.changedFrontendFiles\` is non-empty (the deterministic server signal — populated for all supported frontend file types including \`.tsx\`/\`.jsx\`/\`.vue\`/\`.svelte\`/\`.dart\`), you MUST attempt to generate at least one UI test. Use \`browser_navigate\` to the app's base URL — if the app responds, record a trace and generate the test.
131
+ **Flutter web apps:** Skyramp's Playwright tools automatically enable Flutter's accessibility semantics tree on every \`browser_navigate\` call — you do NOT need to manually click \`flt-semantics-placeholder\` or add any activation step to the trace. Do NOT log an \`issuesFound\` entry about Flutter canvas rendering or accessibility activation — this is handled transparently.
131
132
  **Skip only if one of these conditions is met:**
132
133
  - **(a) App is unreachable** — \`browser_navigate\` fails or connection is refused.
133
134
  - **(b) Unintegrated non-route component** — the changed file is a leaf component (not a framework route/entrypoint) that has no integration point in the running app. To confirm:
@@ -0,0 +1,123 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var mouseActions_exports = {};
20
+ __export(mouseActions_exports, {
21
+ DEFAULT_DRAG_STEPS: () => DEFAULT_DRAG_STEPS,
22
+ decodeModifierKeys: () => decodeModifierKeys,
23
+ decomposeDrag: () => decomposeDrag,
24
+ mouseActionToJsonl: () => mouseActionToJsonl,
25
+ mouseJsonlToCode: () => mouseJsonlToCode
26
+ });
27
+ module.exports = __toCommonJS(mouseActions_exports);
28
+ const DEFAULT_DRAG_STEPS = 10;
29
+ function decodeModifierKeys(mask, platform = typeof process !== "undefined" ? process.platform : "linux") {
30
+ if (!mask)
31
+ return [];
32
+ const keys = [];
33
+ if (mask & 1) keys.push("Alt");
34
+ if ((mask & 6) === 6) {
35
+ keys.push(platform === "darwin" ? "Meta" : "Control");
36
+ } else {
37
+ if (mask & 2) keys.push("Control");
38
+ if (mask & 4) keys.push("Meta");
39
+ }
40
+ if (mask & 8) keys.push("Shift");
41
+ return keys;
42
+ }
43
+ function decomposeDrag(start, end, steps = DEFAULT_DRAG_STEPS) {
44
+ return [
45
+ { name: "mouse.move", position: start },
46
+ { name: "mouse.down" },
47
+ { name: "mouse.move", position: end, steps },
48
+ { name: "mouse.up" }
49
+ ];
50
+ }
51
+ function mouseActionToJsonl(params) {
52
+ const button = params.button ?? "left";
53
+ const modifiers = params.modifiers ?? 0;
54
+ switch (params.action) {
55
+ case "move": {
56
+ if (params.x === void 0 || params.y === void 0)
57
+ return { error: 'mouse action "move" requires x and y.' };
58
+ const move = params.steps !== void 0 ? { name: "mouse.move", position: { x: params.x, y: params.y }, steps: params.steps } : { name: "mouse.move", position: { x: params.x, y: params.y } };
59
+ return { actions: [move] };
60
+ }
61
+ case "down":
62
+ return { actions: [button === "left" ? { name: "mouse.down" } : { name: "mouse.down", button }] };
63
+ case "up":
64
+ return { actions: [button === "left" ? { name: "mouse.up" } : { name: "mouse.up", button }] };
65
+ case "wheel": {
66
+ if (params.deltaX === void 0 && params.deltaY === void 0)
67
+ return { error: 'mouse action "wheel" requires deltaX and/or deltaY.' };
68
+ if (params.x === void 0 || params.y === void 0)
69
+ return { error: 'mouse action "wheel" requires x and y (the scroll target) so the pointer is moved there before scrolling.' };
70
+ const position = { x: params.x, y: params.y };
71
+ return { actions: [
72
+ { name: "mouse.move", position },
73
+ { name: "mouse.wheel", position, deltaX: params.deltaX ?? 0, deltaY: params.deltaY ?? 0, modifiers }
74
+ ] };
75
+ }
76
+ case "click": {
77
+ if (params.x === void 0 || params.y === void 0)
78
+ return { error: 'mouse action "click" requires x and y.' };
79
+ return { actions: [{ name: "click", selector: "body", position: { x: params.x, y: params.y }, button, modifiers, clickCount: params.clickCount ?? 1 }] };
80
+ }
81
+ case "drag": {
82
+ if (params.x === void 0 || params.y === void 0 || params.endX === void 0 || params.endY === void 0)
83
+ return { error: 'mouse action "drag" requires x, y, endX and endY.' };
84
+ return { actions: decomposeDrag({ x: params.x, y: params.y }, { x: params.endX, y: params.endY }, params.steps) };
85
+ }
86
+ default:
87
+ return { error: `Unknown mouse action: ${params.action}` };
88
+ }
89
+ }
90
+ function mouseJsonlToCode(a) {
91
+ switch (a.name) {
92
+ case "mouse.move":
93
+ return [a.steps !== void 0 ? `await page.mouse.move(${a.position.x}, ${a.position.y}, { steps: ${a.steps} });` : `await page.mouse.move(${a.position.x}, ${a.position.y});`];
94
+ case "mouse.down":
95
+ return [a.button && a.button !== "left" ? `await page.mouse.down({ button: '${a.button}' });` : `await page.mouse.down();`];
96
+ case "mouse.up":
97
+ return [a.button && a.button !== "left" ? `await page.mouse.up({ button: '${a.button}' });` : `await page.mouse.up();`];
98
+ case "mouse.wheel":
99
+ return withModifierLines(a.modifiers, `await page.mouse.wheel(${a.deltaX}, ${a.deltaY});`);
100
+ case "click": {
101
+ const opts = a.button !== "left" || a.clickCount !== 1 ? `, { button: '${a.button}', clickCount: ${a.clickCount} }` : "";
102
+ return withModifierLines(a.modifiers, `await page.mouse.click(${a.position.x}, ${a.position.y}${opts});`);
103
+ }
104
+ }
105
+ }
106
+ function withModifierLines(mask, line) {
107
+ const keys = decodeModifierKeys(mask);
108
+ if (!keys.length)
109
+ return [line];
110
+ return [
111
+ ...keys.map((k) => `await page.keyboard.down('${k}');`),
112
+ line,
113
+ ...[...keys].reverse().map((k) => `await page.keyboard.up('${k}');`)
114
+ ];
115
+ }
116
+ // Annotate the CommonJS export names for ESM import in node:
117
+ 0 && (module.exports = {
118
+ DEFAULT_DRAG_STEPS,
119
+ decodeModifierKeys,
120
+ decomposeDrag,
121
+ mouseActionToJsonl,
122
+ mouseJsonlToCode
123
+ });
@@ -0,0 +1,95 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var visualSnapshot_exports = {};
20
+ __export(visualSnapshot_exports, {
21
+ buildVisualSnapshotJsonl: () => buildVisualSnapshotJsonl,
22
+ nextSnapshotFilename: () => nextSnapshotFilename,
23
+ parseSnapshotCounter: () => parseSnapshotCounter,
24
+ visualSnapshotToCode: () => visualSnapshotToCode
25
+ });
26
+ module.exports = __toCommonJS(visualSnapshot_exports);
27
+ const FILENAME_PREFIX = {
28
+ page: "page",
29
+ element: "el",
30
+ region: "region"
31
+ };
32
+ function nextSnapshotFilename(type, counter) {
33
+ return `${FILENAME_PREFIX[type]}-${String(counter).padStart(3, "0")}.png`;
34
+ }
35
+ function parseSnapshotCounter(type, filename) {
36
+ const m = new RegExp(`^${FILENAME_PREFIX[type]}-(\\d+)\\.png$`, "i").exec(filename);
37
+ return m ? parseInt(m[1], 10) : null;
38
+ }
39
+ function buildVisualSnapshotJsonl(input) {
40
+ if (!input.filename.toLowerCase().endsWith(".png"))
41
+ return { error: `visual snapshot filename must end in .png (got "${input.filename}").` };
42
+ switch (input.snapshotType) {
43
+ case "page": {
44
+ const action = { name: "visualSnapshot", snapshotType: "page", filename: input.filename };
45
+ if (input.fullPage !== void 0)
46
+ action.fullPage = input.fullPage;
47
+ if (input.screenshotStyle !== void 0)
48
+ action.screenshotStyle = input.screenshotStyle;
49
+ return { action };
50
+ }
51
+ case "element": {
52
+ if (!input.selector)
53
+ return { error: 'visual snapshot "element" requires a resolved selector (pass a ref to the tool).' };
54
+ return { action: { name: "visualSnapshot", snapshotType: "element", filename: input.filename, selector: input.selector } };
55
+ }
56
+ case "region": {
57
+ const c = input.clip;
58
+ if (!c || c.width === void 0 || c.height === void 0 || c.x === void 0 || c.y === void 0)
59
+ return { error: 'visual snapshot "region" requires a clip { x, y, width, height }.' };
60
+ return { action: { name: "visualSnapshot", snapshotType: "region", filename: input.filename, clip: { x: c.x, y: c.y, width: c.width, height: c.height } } };
61
+ }
62
+ default:
63
+ return { error: `Unknown visual snapshot type: ${input.snapshotType}` };
64
+ }
65
+ }
66
+ function visualSnapshotToCode(a, locatorExpr) {
67
+ const q = (s) => s.replace(/\\/g, "\\\\").replace(/'/g, "\\'");
68
+ const file = q(a.filename);
69
+ switch (a.snapshotType) {
70
+ case "page": {
71
+ const opts = [];
72
+ if (a.fullPage)
73
+ opts.push("fullPage: true");
74
+ if (a.screenshotStyle)
75
+ opts.push(`style: '${q(a.screenshotStyle)}'`);
76
+ const optsStr = opts.length ? `, { ${opts.join(", ")} }` : "";
77
+ return `await expect(page).toHaveScreenshot('${file}'${optsStr});`;
78
+ }
79
+ case "element": {
80
+ const target = locatorExpr ? `page.${locatorExpr}` : `page.locator('${q(a.selector)}')`;
81
+ return `await expect(${target}).toHaveScreenshot('${file}');`;
82
+ }
83
+ case "region": {
84
+ const { x, y, width, height } = a.clip;
85
+ return `await expect(page).toHaveScreenshot('${file}', { clip: { x: ${x}, y: ${y}, width: ${width}, height: ${height} } });`;
86
+ }
87
+ }
88
+ }
89
+ // Annotate the CommonJS export names for ESM import in node:
90
+ 0 && (module.exports = {
91
+ buildVisualSnapshotJsonl,
92
+ nextSnapshotFilename,
93
+ parseSnapshotCounter,
94
+ visualSnapshotToCode
95
+ });
@@ -31,6 +31,7 @@ module.exports = __toCommonJS(loadTraceTool_exports);
31
31
  var import_mcpBundle = require("playwright-core/lib/mcpBundle");
32
32
  var import_utils = require("playwright-core/lib/utils");
33
33
  var import_tool = require("../sdk/tool");
34
+ var import_mouseActions = require("./common/mouseActions");
34
35
  const loadTraceSchema = {
35
36
  name: "skyramp_load_trace",
36
37
  title: "Load and replay a Skyramp trace",
@@ -103,6 +104,8 @@ function describeStep(action, index) {
103
104
  detail += ` = ${JSON.stringify(a.value)}`;
104
105
  else if (a.name === "press" && a.key !== void 0)
105
106
  detail += ` ${a.key}`;
107
+ else if (a.name === "visualSnapshot")
108
+ detail = `${a.snapshotType ?? ""}${a.filename ? ` ${a.filename}` : ""}`.trim();
106
109
  return `#${index + 1} ${a.name}${onPage}${detail ? ` ${detail}` : ""}`;
107
110
  }
108
111
  function listStepsFrom(allActions, fromIndex) {
@@ -183,6 +186,17 @@ function urlMatchesPattern(currentUrl, pattern) {
183
186
  return (0, import_utils.urlMatches)(void 0, currentUrl, pattern);
184
187
  return currentUrl.toLowerCase().includes(pattern.toLowerCase());
185
188
  }
189
+ async function withMouseModifiers(page, mask, fn) {
190
+ const keys = (0, import_mouseActions.decodeModifierKeys)(mask);
191
+ for (const k of keys)
192
+ await page.keyboard.down(k);
193
+ try {
194
+ await fn();
195
+ } finally {
196
+ for (const k of [...keys].reverse())
197
+ await page.keyboard.up(k);
198
+ }
199
+ }
186
200
  const DEFAULT_ACTION_TIMEOUT = 15e3;
187
201
  async function performClientAction(page, actionInContext) {
188
202
  const action = actionInContext.action;
@@ -215,6 +229,40 @@ async function performClientAction(page, actionInContext) {
215
229
  await page.keyboard.press(shortcut);
216
230
  return;
217
231
  }
232
+ if (action.name === "mouse.move") {
233
+ if (!action.position)
234
+ return;
235
+ if (action.steps !== void 0)
236
+ await page.mouse.move(action.position.x, action.position.y, { steps: action.steps });
237
+ else
238
+ await page.mouse.move(action.position.x, action.position.y);
239
+ return;
240
+ }
241
+ if (action.name === "mouse.down") {
242
+ if (action.button && action.button !== "left")
243
+ await page.mouse.down({ button: action.button });
244
+ else
245
+ await page.mouse.down();
246
+ return;
247
+ }
248
+ if (action.name === "mouse.up") {
249
+ if (action.button && action.button !== "left")
250
+ await page.mouse.up({ button: action.button });
251
+ else
252
+ await page.mouse.up();
253
+ return;
254
+ }
255
+ if (action.name === "mouse.wheel") {
256
+ await withMouseModifiers(page, action.modifiers, () => page.mouse.wheel(action.deltaX ?? 0, action.deltaY ?? 0));
257
+ return;
258
+ }
259
+ if (action.name === "click" && action.position && (!action.selector || action.selector === "body")) {
260
+ const button = action.button ?? "left";
261
+ const clickCount = action.clickCount ?? 1;
262
+ const needsOpts = button !== "left" || clickCount !== 1;
263
+ await withMouseModifiers(page, action.modifiers, () => needsOpts ? page.mouse.click(action.position.x, action.position.y, { button, clickCount }) : page.mouse.click(action.position.x, action.position.y));
264
+ return;
265
+ }
218
266
  if (!action.selector)
219
267
  return;
220
268
  const selector = buildFullSelector(actionInContext.frame.framePath, action.selector);
@@ -0,0 +1,131 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var mouseActionTool_exports = {};
20
+ __export(mouseActionTool_exports, {
21
+ _schema: () => mouseActionSchema,
22
+ executeMouseAction: () => executeMouseAction,
23
+ mouseActionMcpTool: () => mouseActionMcpTool,
24
+ mouseActionSchema: () => mouseActionSchema
25
+ });
26
+ module.exports = __toCommonJS(mouseActionTool_exports);
27
+ var import_mcpBundle = require("playwright-core/lib/mcpBundle");
28
+ var import_tool = require("../sdk/tool");
29
+ var import_mouseActions = require("./common/mouseActions");
30
+ const mouseActionSchema = {
31
+ name: "browser_mouse_action",
32
+ title: "Mouse action (coordinate-based)",
33
+ description: [
34
+ "Perform coordinate-based mouse interactions: drag-and-drop, slider adjustment, canvas/diagram manipulation, scrolling, and other gestures that operate on viewport pixel positions rather than element references.",
35
+ "Primary uses: drag an item between positions (drag), move a slider thumb (drag), pan/draw on a canvas (move/down/up), and scroll a region (wheel).",
36
+ "The `action` parameter selects the gesture:",
37
+ "- drag: press at (x, y), move to (endX, endY), release. Decomposed into low-level mouse events (move\u2192down\u2192move\u2192up) so it works for sliders, canvas, and custom drag-handles where high-level element drag fails.",
38
+ "- move / down / up: individual pointer steps for composing custom gestures.",
39
+ "- wheel: scroll by deltaX / deltaY at the pointer position.",
40
+ '- click: a single position click at (x, y). Use for canvas hotspots, or as a fallback for an interactive element that has no usable accessibility ref or is occluded by an overlay (where browser_click reports "intercepts pointer events"). For ordinary elements, prefer browser_click.',
41
+ "Coordinates are viewport pixels, obtained from a snapshot or screenshot."
42
+ ].join(" "),
43
+ inputSchema: import_mcpBundle.z.object({
44
+ action: import_mcpBundle.z.enum(["move", "down", "up", "click", "wheel", "drag"]).describe("The mouse action to perform."),
45
+ x: import_mcpBundle.z.number().optional().describe("X coordinate (viewport px). Required for move/click/wheel; the start X for drag."),
46
+ y: import_mcpBundle.z.number().optional().describe("Y coordinate (viewport px). Required for move/click/wheel; the start Y for drag."),
47
+ endX: import_mcpBundle.z.number().optional().describe("End X coordinate for drag."),
48
+ endY: import_mcpBundle.z.number().optional().describe("End Y coordinate for drag."),
49
+ button: import_mcpBundle.z.enum(["left", "right", "middle"]).optional().describe("Mouse button for down/up/click. Defaults to left."),
50
+ clickCount: import_mcpBundle.z.number().int().positive().optional().describe("Number of clicks for click (e.g. 2 for double-click). Defaults to 1."),
51
+ deltaX: import_mcpBundle.z.number().optional().describe("Horizontal scroll delta for wheel."),
52
+ deltaY: import_mcpBundle.z.number().optional().describe("Vertical scroll delta for wheel."),
53
+ steps: import_mcpBundle.z.number().int().positive().optional().describe("Intermediate move steps for move/drag (smoother movement)."),
54
+ modifiers: import_mcpBundle.z.number().int().min(0).max(15).optional().describe("Modifier bitmask (Alt=1, Control=2, Meta=4, Shift=8) for click/wheel. Range 0-15.")
55
+ }),
56
+ type: "action"
57
+ };
58
+ function mouseActionMcpTool() {
59
+ return (0, import_tool.toMcpTool)(mouseActionSchema);
60
+ }
61
+ async function executeMouseAction(page, params) {
62
+ const built = (0, import_mouseActions.mouseActionToJsonl)(params);
63
+ if ("error" in built)
64
+ return { result: { content: [{ type: "text", text: `### Error
65
+ ${built.error}` }], isError: true }, actions: [] };
66
+ if (!page)
67
+ return { result: { content: [{ type: "text", text: "### Error\nNo open page. Call browser_navigate first." }], isError: true }, actions: [] };
68
+ const code = [];
69
+ try {
70
+ for (const a of built.actions) {
71
+ code.push(...(0, import_mouseActions.mouseJsonlToCode)(a));
72
+ await runOne(page, a);
73
+ }
74
+ } catch (e) {
75
+ return { result: { content: [{ type: "text", text: `### Error
76
+ Mouse action failed: ${e.message}` }], isError: true }, actions: [] };
77
+ }
78
+ return {
79
+ result: { content: [{ type: "text", text: `### Ran Playwright code
80
+ ${code.join("\n")}` }] },
81
+ actions: built.actions
82
+ };
83
+ }
84
+ async function runOne(page, a) {
85
+ switch (a.name) {
86
+ case "mouse.move":
87
+ if (a.steps !== void 0)
88
+ await page.mouse.move(a.position.x, a.position.y, { steps: a.steps });
89
+ else
90
+ await page.mouse.move(a.position.x, a.position.y);
91
+ return;
92
+ case "mouse.down":
93
+ if (a.button && a.button !== "left")
94
+ await page.mouse.down({ button: a.button });
95
+ else
96
+ await page.mouse.down();
97
+ return;
98
+ case "mouse.up":
99
+ if (a.button && a.button !== "left")
100
+ await page.mouse.up({ button: a.button });
101
+ else
102
+ await page.mouse.up();
103
+ return;
104
+ case "mouse.wheel":
105
+ await withModifiers(page, a.modifiers, () => page.mouse.wheel(a.deltaX, a.deltaY));
106
+ return;
107
+ case "click": {
108
+ const needsOpts = a.button !== "left" || a.clickCount !== 1;
109
+ await withModifiers(page, a.modifiers, () => needsOpts ? page.mouse.click(a.position.x, a.position.y, { button: a.button, clickCount: a.clickCount }) : page.mouse.click(a.position.x, a.position.y));
110
+ return;
111
+ }
112
+ }
113
+ }
114
+ async function withModifiers(page, mask, fn) {
115
+ const keys = (0, import_mouseActions.decodeModifierKeys)(mask);
116
+ for (const k of keys)
117
+ await page.keyboard.down(k);
118
+ try {
119
+ await fn();
120
+ } finally {
121
+ for (const k of [...keys].reverse())
122
+ await page.keyboard.up(k);
123
+ }
124
+ }
125
+ // Annotate the CommonJS export names for ESM import in node:
126
+ 0 && (module.exports = {
127
+ _schema,
128
+ executeMouseAction,
129
+ mouseActionMcpTool,
130
+ mouseActionSchema
131
+ });
@@ -46,11 +46,14 @@ var import_assertTool = require("./assertTool");
46
46
  var import_assertApiRequestTool = require("./assertApiRequestTool");
47
47
  var import_loadTraceTool = require("./loadTraceTool");
48
48
  var import_skyRampImport = require("./skyRampImport");
49
+ var import_mouseActionTool = require("./mouseActionTool");
50
+ var import_visualSnapshotTool = require("./visualSnapshotTool");
51
+ var import_visualSnapshot = require("./common/visualSnapshot");
49
52
  var import_utils = require("playwright-core/lib/utils");
50
53
  var import_types = require("./types");
51
54
  const traceDebug = (0, import_utilsBundle.debug)("pw:mcp:trace");
52
55
  class TraceRecordingBackend {
53
- // true while page.reload() is in progress — suppresses spurious popup tracking
56
+ // per-type baseline filename counter
54
57
  constructor(options) {
55
58
  this._trackedActions = [];
56
59
  this._initialized = false;
@@ -61,6 +64,8 @@ class TraceRecordingBackend {
61
64
  this._pendingPopupAlias = null;
62
65
  // popup alias to stamp on the NEXT tracked click
63
66
  this._reloading = false;
67
+ // true while page.reload() is in progress — suppresses spurious popup tracking
68
+ this._visualSnapshotCounters = { page: 0, element: 0, region: 0 };
64
69
  this._options = options || {};
65
70
  this._outputDir = options?.outputDir || process.cwd();
66
71
  this._tempDir = import_fs.default.mkdtempSync(import_path.default.join(import_os.default.tmpdir(), "skyramp-trace-"));
@@ -119,7 +124,7 @@ class TraceRecordingBackend {
119
124
  }
120
125
  async listTools() {
121
126
  const browserTools = await this._browserBackend.listTools();
122
- return [...browserTools, (0, import_exportTool.exportZipMcpTool)(), (0, import_assertTool.assertMcpTool)(), (0, import_assertApiRequestTool.assertApiRequestMcpTool)(), (0, import_loadTraceTool.loadTraceMcpTool)()];
127
+ return [...browserTools, (0, import_exportTool.exportZipMcpTool)(), (0, import_assertTool.assertMcpTool)(), (0, import_assertApiRequestTool.assertApiRequestMcpTool)(), (0, import_loadTraceTool.loadTraceMcpTool)(), (0, import_mouseActionTool.mouseActionMcpTool)(), (0, import_visualSnapshotTool.visualSnapshotMcpTool)()];
123
128
  }
124
129
  async callTool(name, args, progress) {
125
130
  if (!this._initialized)
@@ -146,6 +151,14 @@ class TraceRecordingBackend {
146
151
  const parsed = import_loadTraceTool.loadTraceSchema.inputSchema.parse(args || {});
147
152
  return this._handleLoadTrace(parsed);
148
153
  }
154
+ if (name === import_mouseActionTool.mouseActionSchema.name) {
155
+ const parsed = import_mouseActionTool.mouseActionSchema.inputSchema.parse(args || {});
156
+ return this._handleMouseAction(parsed);
157
+ }
158
+ if (name === import_visualSnapshotTool.visualSnapshotSchema.name) {
159
+ const parsed = import_visualSnapshotTool.visualSnapshotSchema.inputSchema.parse(args || {});
160
+ return this._handleVisualSnapshot(parsed);
161
+ }
149
162
  if (name === import_assertTool.assertToolSchema.name) {
150
163
  const parsed = import_assertTool.assertToolSchema.inputSchema.parse(args || {});
151
164
  return this._handleAssert(parsed);
@@ -320,6 +333,120 @@ Reloaded current page: ${currentUrl}
320
333
  serverClosed() {
321
334
  void this._autoExportAndClose().catch(import_log.logUnhandledError);
322
335
  }
336
+ /**
337
+ * Execute a coordinate-based mouse action and track each decomposed sub-action
338
+ * (mouse.move/down/up/wheel or a position click) into _trackedActions so it is
339
+ * exported into the combined JSONL trace. The JSONL field shapes come straight
340
+ * from common/mouseActions, which is the single source of truth shared with
341
+ * the export and seeding paths.
342
+ */
343
+ async _handleMouseAction(params) {
344
+ const page = this._browserBackend.context?.currentTab()?.page;
345
+ const timestamp = Date.now();
346
+ const pageAlias = this._currentPageAlias;
347
+ const { result, actions } = await (0, import_mouseActionTool.executeMouseAction)(page, params);
348
+ if (result.isError)
349
+ return result;
350
+ actions.forEach((a, i) => {
351
+ this._trackedActions.push({
352
+ toolName: "browser_mouse_action",
353
+ args: a,
354
+ code: "",
355
+ // Stagger timestamps so a decomposed drag keeps its move<down<move<up order.
356
+ timestamp: timestamp + i,
357
+ pageAlias
358
+ });
359
+ });
360
+ traceDebug(`Tracked ${actions.length} mouse sub-action(s) for "${params.action}" on ${pageAlias}`);
361
+ return result;
362
+ }
363
+ /**
364
+ * Handle browser_visual_snapshot: record a `visualSnapshot` marker that
365
+ * exports to expect(...).toHaveScreenshot(filename), so the generated test
366
+ * pixel-compares against a baseline.
367
+ *
368
+ * This is marker-only by design: the baseline image is created/updated by
369
+ * Playwright on the first test run (into its snapshot dir), NOT captured here.
370
+ * Taking a live screenshot at record time would be throwaway, and browser_
371
+ * take_screenshot has no clip parameter, so a region screenshot could not even
372
+ * be honored — it would mislead by returning a full-viewport image. So we only
373
+ * emit the marker, mirroring browser_assert_api_request.
374
+ *
375
+ * For an element snapshot, the ref is still resolved to a durable selector via
376
+ * the same hover->selector path browser_assert uses (testid > role > text, with
377
+ * the snapshot-accessible-name fallback for brittle/Flutter ids); that also
378
+ * validates the ref exists. Iframe and GoJS-diagram snapshots from the recorder
379
+ * are out of scope here (see common/visualSnapshot.ts).
380
+ */
381
+ async _handleVisualSnapshot(params) {
382
+ const timestamp = Date.now();
383
+ const pageAlias = this._currentPageAlias;
384
+ const input = {
385
+ snapshotType: params.snapshotType,
386
+ filename: params.filename ?? this._nextSnapshotFilename(params.snapshotType),
387
+ fullPage: params.fullPage,
388
+ clip: params.clip
389
+ };
390
+ if (params.snapshotType === "element") {
391
+ if (!params.ref)
392
+ return { content: [{ type: "text", text: '### Error\nsnapshotType "element" requires a ref (from the latest browser_snapshot).' }], isError: true };
393
+ const resolved = await this._resolveRefToLocator(params.ref, params.element ?? "");
394
+ if (!resolved)
395
+ return { content: [{ type: "text", text: `### Error
396
+ Could not resolve a durable selector for ref=${params.ref}. Take a fresh browser_snapshot and retry, or use snapshotType "region".` }], isError: true };
397
+ input.selector = resolved.selector;
398
+ }
399
+ const built = (0, import_visualSnapshot.buildVisualSnapshotJsonl)(input);
400
+ if ("error" in built)
401
+ return { content: [{ type: "text", text: `### Error
402
+ ${built.error}` }], isError: true };
403
+ this._visualSnapshotCounters[params.snapshotType]++;
404
+ this._advanceSnapshotCounterFor(params.snapshotType, input.filename);
405
+ this._trackedActions.push({
406
+ toolName: "browser_visual_snapshot",
407
+ args: built.action,
408
+ code: "",
409
+ timestamp,
410
+ pageAlias
411
+ });
412
+ traceDebug(`Tracked visualSnapshot (${params.snapshotType}) "${input.filename}" on ${pageAlias}`);
413
+ const targetDesc = params.snapshotType === "element" ? ` (${input.selector})` : params.snapshotType === "region" && params.clip ? ` (clip ${params.clip.width}x${params.clip.height} at ${params.clip.x},${params.clip.y})` : params.fullPage ? " (full page)" : "";
414
+ return { content: [{ type: "text", text: `### Visual snapshot recorded
415
+ Baseline "${input.filename}" (${params.snapshotType})${targetDesc} recorded; the generated test will assert toHaveScreenshot against it. The baseline image is created on the first test run.` }] };
416
+ }
417
+ /** Next auto-generated baseline filename for a snapshot type (page-NNN.png, etc.). */
418
+ _nextSnapshotFilename(type) {
419
+ return (0, import_visualSnapshot.nextSnapshotFilename)(type, this._visualSnapshotCounters[type] + 1);
420
+ }
421
+ /**
422
+ * Resolve a snapshot ref to a durable Playwright selector, mirroring the
423
+ * element-resolution path of _handleAssert: hover to get the resolved code,
424
+ * parse it to a selector, and prefer a snapshot-accessible-name selector over
425
+ * a brittle raw-CSS id (the Flutter-durable fallback). Returns null if the ref
426
+ * can't be resolved.
427
+ */
428
+ async _resolveRefToLocator(ref, element) {
429
+ const hoverResult = await this._browserBackend.callTool("browser_hover", { element, ref });
430
+ if (hoverResult.isError)
431
+ return null;
432
+ const hoverCode = (0, import_response.parseResponse)(hoverResult)?.code ?? "";
433
+ const locatorMatch = hoverCode.match(/await\s+page\.(.*?)\.hover\(\)/s);
434
+ if (!locatorMatch)
435
+ return null;
436
+ const locatorExpr = locatorMatch[1].trim();
437
+ let parsed = this._codeToLocator(locatorExpr);
438
+ if (!parsed || parsed.locator.kind === "css") {
439
+ const snapResult = await this._browserBackend.callTool("browser_snapshot", {});
440
+ if (!snapResult.isError) {
441
+ const snapText = snapResult.content?.map((c) => c.type === "text" ? c.text : "").join("") || "";
442
+ const refLine = snapText.split("\n").find((l) => l.includes(`[ref=${ref}]`)) || "";
443
+ const fromSnapshot = this._extractLocatorForRef(refLine);
444
+ if (fromSnapshot)
445
+ parsed = fromSnapshot;
446
+ }
447
+ }
448
+ return parsed;
449
+ }
323
450
  /**
324
451
  * Load a prior Skyramp trace and replay it against the live browser, honoring
325
452
  * an optional stop point, then seed _trackedActions with the replayed actions
@@ -433,6 +560,8 @@ Continue recording with browser_* tools, then call skyramp_export_zip to write t
433
560
  const seeded = this._seedTrackedActionFields(a, locatorExpr);
434
561
  if (!seeded)
435
562
  return;
563
+ if (seeded.toolName === "browser_visual_snapshot")
564
+ this._advanceSnapshotCounterFor(seeded.args.snapshotType, seeded.args.filename);
436
565
  this._trackedActions.push({
437
566
  ...seeded,
438
567
  timestamp: action.startTime,
@@ -440,6 +569,19 @@ Continue recording with browser_* tools, then call skyramp_export_zip to write t
440
569
  framePath: action.frame.framePath?.length ? action.frame.framePath : void 0
441
570
  });
442
571
  }
572
+ /**
573
+ * Bump the per-type snapshot counter to at least the number embedded in a
574
+ * seeded baseline filename (`<prefix>-NNN.png`), so subsequently-recorded
575
+ * snapshots of that type don't reuse a loaded trace's filename. No-op if the
576
+ * filename doesn't carry a parseable counter.
577
+ */
578
+ _advanceSnapshotCounterFor(snapshotType, filename) {
579
+ if (!snapshotType || !filename || this._visualSnapshotCounters[snapshotType] === void 0)
580
+ return;
581
+ const n = (0, import_visualSnapshot.parseSnapshotCounter)(snapshotType, filename);
582
+ if (n !== null && n > this._visualSnapshotCounters[snapshotType])
583
+ this._visualSnapshotCounters[snapshotType] = n;
584
+ }
443
585
  /**
444
586
  * Build the { toolName, code, args } triple a seeded (replayed) action must
445
587
  * carry so it round-trips through skyRampExport.buildJsonlContent exactly as
@@ -455,6 +597,14 @@ Continue recording with browser_* tools, then call skyramp_export_zip to write t
455
597
  */
456
598
  _seedTrackedActionFields(a, locatorExpr) {
457
599
  const sq = (s) => `'${String(s).replace(/\\/g, "\\\\").replace(/'/g, "\\'").replace(/\n/g, "\\n").replace(/\r/g, "\\r").replace(/\t/g, "\\t")}'`;
600
+ if (a.name === "mouse.move" || a.name === "mouse.down" || a.name === "mouse.up" || a.name === "mouse.wheel") {
601
+ const args = this._seedMouseActionArgs(a);
602
+ if (!args)
603
+ return null;
604
+ return { toolName: "browser_mouse_action", code: "", args };
605
+ }
606
+ if (a.name === "click" && a.position && (!a.selector || a.selector === "body"))
607
+ return { toolName: "browser_mouse_action", code: "", args: { name: "click", selector: "body", position: a.position, button: a.button ?? "left", modifiers: a.modifiers ?? 0, clickCount: a.clickCount ?? 1 } };
458
608
  const SELECTOR_REQUIRED = /* @__PURE__ */ new Set(["click", "hover", "fill", "pressSequentially", "check", "uncheck", "select"]);
459
609
  if (SELECTOR_REQUIRED.has(a.name) && !locatorExpr)
460
610
  return null;
@@ -526,6 +676,73 @@ Continue recording with browser_* tools, then call skyramp_export_zip to write t
526
676
  return { toolName: "browser_assert", code: `assertChecked:${a.selector}:${!!a.checked}`, args: { type: "checked", selector: a.selector, checked: !!a.checked } };
527
677
  case "assertVisible":
528
678
  return { toolName: "browser_assert", code: `assertVisible:${a.selector}`, args: { type: "visible", selector: a.selector } };
679
+ case "visualSnapshot": {
680
+ const args = this._seedVisualSnapshotArgs(a);
681
+ return args ? { toolName: "browser_visual_snapshot", code: "", args } : null;
682
+ }
683
+ default:
684
+ return null;
685
+ }
686
+ }
687
+ /**
688
+ * Build the normalized JSONL args for a re-seeded visualSnapshot action,
689
+ * following the VisualSnapshotJsonl contract in common/visualSnapshot.ts. Only
690
+ * known fields per snapshotType are emitted; a snapshot missing its required
691
+ * field (element->selector, region->clip) or with an unsupported type
692
+ * (gojsDiagram is recorder-only) is rejected (returns null) so the caller
693
+ * skips it rather than exporting an invalid shape.
694
+ */
695
+ _seedVisualSnapshotArgs(a) {
696
+ if (!a.filename || !String(a.filename).toLowerCase().endsWith(".png"))
697
+ return null;
698
+ switch (a.snapshotType) {
699
+ case "page": {
700
+ const args = { name: "visualSnapshot", snapshotType: "page", filename: a.filename };
701
+ if (a.fullPage !== void 0)
702
+ args.fullPage = a.fullPage;
703
+ if (a.screenshotStyle !== void 0)
704
+ args.screenshotStyle = a.screenshotStyle;
705
+ return args;
706
+ }
707
+ case "element":
708
+ if (!a.selector)
709
+ return null;
710
+ return { name: "visualSnapshot", snapshotType: "element", filename: a.filename, selector: a.selector };
711
+ case "region": {
712
+ const c = a.clip;
713
+ if (!c || c.x === void 0 || c.y === void 0 || c.width === void 0 || c.height === void 0)
714
+ return null;
715
+ return { name: "visualSnapshot", snapshotType: "region", filename: a.filename, clip: { x: c.x, y: c.y, width: c.width, height: c.height } };
716
+ }
717
+ default:
718
+ return null;
719
+ }
720
+ }
721
+ /**
722
+ * Build the normalized JSONL args for a re-seeded coordinate mouse action
723
+ * (mouse.move/down/up/wheel), following the MouseJsonlAction contract in
724
+ * common/mouseActions.ts. Only known fields are emitted (so stray fields from
725
+ * the loaded trace line are not forwarded), wheel deltas/modifiers default to
726
+ * 0, and a move/wheel missing its position is rejected (returns null) so the
727
+ * caller skips it rather than exporting an invalid shape for the Go consumer.
728
+ */
729
+ _seedMouseActionArgs(a) {
730
+ switch (a.name) {
731
+ case "mouse.move": {
732
+ if (!a.position)
733
+ return null;
734
+ const args = { name: "mouse.move", position: a.position };
735
+ if (a.steps !== void 0)
736
+ args.steps = a.steps;
737
+ return args;
738
+ }
739
+ case "mouse.down":
740
+ case "mouse.up":
741
+ return a.button && a.button !== "left" ? { name: a.name, button: a.button } : { name: a.name };
742
+ case "mouse.wheel":
743
+ if (!a.position)
744
+ return null;
745
+ return { name: "mouse.wheel", position: a.position, deltaX: a.deltaX ?? 0, deltaY: a.deltaY ?? 0, modifiers: a.modifiers ?? 0 };
529
746
  default:
530
747
  return null;
531
748
  }
@@ -654,14 +871,26 @@ Could not extract selector from hover result.` }], isError: true };
654
871
  }
655
872
  const locatorExpr = locatorMatch[1].trim();
656
873
  let parsed = this._codeToLocator(locatorExpr);
657
- if (!parsed) {
874
+ const snapResult = await this._browserBackend.callTool("browser_snapshot", {});
875
+ if (snapResult.isError) {
876
+ const errText = snapResult.content?.[0]?.type === "text" ? snapResult.content[0].text : "";
658
877
  return { content: [{ type: "text", text: `### Assertion Failed
659
- Could not parse locator: ${locatorExpr}` }], isError: true };
878
+ Could not capture page snapshot to verify the assertion. ${errText}` }], isError: true };
660
879
  }
661
- const snapResult = await this._browserBackend.callTool("browser_snapshot", {});
662
880
  const snapText = snapResult.content?.map((c) => c.type === "text" ? c.text : "").join("") || "";
663
881
  const snapLines = snapText.split("\n");
664
882
  const refLine = snapLines.find((l) => l.includes(`[ref=${params.ref}]`)) || "";
883
+ if (!parsed || parsed.locator.kind === "css") {
884
+ const fromSnapshot = this._extractLocatorForRef(refLine);
885
+ if (fromSnapshot) {
886
+ traceDebug(`Resolved assert selector from snapshot accessible name (hover code was ${parsed ? "brittle CSS" : "unparseable"}: ${locatorExpr})`);
887
+ parsed = fromSnapshot;
888
+ }
889
+ }
890
+ if (!parsed) {
891
+ return { content: [{ type: "text", text: `### Assertion Failed
892
+ Could not parse locator: ${locatorExpr}` }], isError: true };
893
+ }
665
894
  const textMatch = refLine.match(/^\s*-\s*\w+\s+"([^"]*)"/);
666
895
  const elementText = textMatch?.[1] || "";
667
896
  const valueMatch = refLine.match(/\]:\s*(.+)$/);
@@ -739,6 +968,13 @@ ${details}` }]
739
968
  if (ariaRefMatch) {
740
969
  return null;
741
970
  }
971
+ const cssMatch = expr.match(/locator\(\s*['"]([#.][^'"]+)['"]\s*\)/);
972
+ if (cssMatch) {
973
+ return {
974
+ selector: cssMatch[1],
975
+ locator: { kind: "css", body: cssMatch[1], options: {} }
976
+ };
977
+ }
742
978
  return null;
743
979
  }
744
980
  /**
@@ -0,0 +1,63 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var visualSnapshotTool_exports = {};
20
+ __export(visualSnapshotTool_exports, {
21
+ visualSnapshotMcpTool: () => visualSnapshotMcpTool,
22
+ visualSnapshotSchema: () => visualSnapshotSchema
23
+ });
24
+ module.exports = __toCommonJS(visualSnapshotTool_exports);
25
+ var import_mcpBundle = require("playwright-core/lib/mcpBundle");
26
+ var import_tool = require("../sdk/tool");
27
+ const visualSnapshotSchema = {
28
+ name: "browser_visual_snapshot",
29
+ title: "Visual snapshot (screenshot baseline)",
30
+ description: [
31
+ "Record a visual-regression baseline: stores a toHaveScreenshot() assertion in the trace so the generated test pixel-compares the page/element/region against a baseline image on every run (the baseline is created on the test's first run).",
32
+ "Use this to lock the visual appearance of a page, an element, or a screen region after a key action (e.g. a rendered chart, a styled component, a confirmation screen).",
33
+ "The `snapshotType` parameter selects the target:",
34
+ "- page: the whole page (set fullPage to capture the full scrollable page rather than just the viewport).",
35
+ "- element: a single element, identified by its snapshot ref. The ref is resolved to a durable selector for the generated test.",
36
+ "- region: a rectangular area given by clip (x, y, width, height) in viewport pixels, read from a normal (non-fullPage) screenshot.",
37
+ "This complements browser_assert (which checks text/value/state): use browser_visual_snapshot when the thing to verify is how it LOOKS, not its text."
38
+ ].join(" "),
39
+ inputSchema: import_mcpBundle.z.object({
40
+ snapshotType: import_mcpBundle.z.enum(["page", "element", "region"]).describe("What to capture: whole page, a single element (by ref), or a pixel region (by clip)."),
41
+ ref: import_mcpBundle.z.string().optional().describe('Element snapshot ref to capture. Required for snapshotType "element".'),
42
+ element: import_mcpBundle.z.string().optional().describe("Human-readable description of the element (paired with ref) for permission and logging."),
43
+ fullPage: import_mcpBundle.z.boolean().optional().describe('For snapshotType "page": capture the full scrollable page instead of just the viewport.'),
44
+ clip: import_mcpBundle.z.object({
45
+ x: import_mcpBundle.z.number().describe("Left edge, in viewport pixels (distance from the visible top-left, not the document top)."),
46
+ y: import_mcpBundle.z.number().describe("Top edge, in viewport pixels (distance from the visible top-left, not the document top)."),
47
+ width: import_mcpBundle.z.number().positive().describe("Region width in pixels."),
48
+ height: import_mcpBundle.z.number().positive().describe("Region height in pixels.")
49
+ }).optional().describe('For snapshotType "region": the rectangle to capture, in VIEWPORT pixels. Read these coordinates from a normal (viewport) browser_take_screenshot, NOT a fullPage one \u2014 the region is clipped to the visible viewport, so document/scrolled coordinates will be off.'),
50
+ filename: import_mcpBundle.z.string().optional().describe("Baseline filename. Auto-generated (page-NNN.png / el-NNN.png / region-NNN.png) when omitted.")
51
+ }),
52
+ // Marker-only: records a trace marker, does not mutate the page. Mirrors the
53
+ // other marker tools (browser_assert, browser_assert_api_request).
54
+ type: "readOnly"
55
+ };
56
+ function visualSnapshotMcpTool() {
57
+ return (0, import_tool.toMcpTool)(visualSnapshotSchema);
58
+ }
59
+ // Annotate the CommonJS export names for ESM import in node:
60
+ 0 && (module.exports = {
61
+ visualSnapshotMcpTool,
62
+ visualSnapshotSchema
63
+ });
@@ -227,6 +227,11 @@ function trackedActionToJsonl(action, pageGuid, timestamp) {
227
227
  }
228
228
  if (toolName === "browser_press_key")
229
229
  return JSON.stringify({ name: "press", key: args.key, modifiers: modifiersArrayToMask(args.modifiers), selector: "", ...base });
230
+ if (toolName === "browser_mouse_action") {
231
+ if (!args.name)
232
+ return null;
233
+ return JSON.stringify({ ...args, ...base });
234
+ }
230
235
  if (!code)
231
236
  return null;
232
237
  const extracted = extractLocatorFromCode(code);
@@ -347,6 +352,32 @@ function assertActionToJsonl(action, pageGuid, timestamp) {
347
352
  return null;
348
353
  }
349
354
  }
355
+ function visualSnapshotActionToJsonl(action, pageGuid, timestamp) {
356
+ const args = action.args;
357
+ if (!args || !args.filename || !String(args.filename).toLowerCase().endsWith(".png"))
358
+ return null;
359
+ const base = {
360
+ signals: [],
361
+ timestamp: String(timestamp),
362
+ pageGuid,
363
+ pageAlias: action.pageAlias ?? DEFAULT_PAGE_ALIAS,
364
+ framePath: action.framePath ?? DEFAULT_FRAME_PATH
365
+ };
366
+ if (args.snapshotType === "page")
367
+ return JSON.stringify({ name: "visualSnapshot", snapshotType: "page", filename: args.filename, ...args.fullPage ? { fullPage: true } : {}, ...args.screenshotStyle ? { screenshotStyle: args.screenshotStyle } : {}, ...base });
368
+ if (args.snapshotType === "element") {
369
+ if (!args.selector)
370
+ return null;
371
+ return JSON.stringify({ name: "visualSnapshot", snapshotType: "element", filename: args.filename, selector: args.selector, ...base });
372
+ }
373
+ if (args.snapshotType === "region") {
374
+ const c = args.clip;
375
+ if (!c || c.x === void 0 || c.y === void 0 || c.width === void 0 || c.height === void 0)
376
+ return null;
377
+ return JSON.stringify({ name: "visualSnapshot", snapshotType: "region", filename: args.filename, clip: { x: c.x, y: c.y, width: c.width, height: c.height }, ...base });
378
+ }
379
+ return null;
380
+ }
350
381
  function selectorToLocator(selector) {
351
382
  const testidMatch = selector.match(/internal:testid=\[data-testid="([^"]+)"/);
352
383
  if (testidMatch)
@@ -513,6 +544,16 @@ function buildJsonlContent(actions, browserName, harPath) {
513
544
  actionCount++;
514
545
  continue;
515
546
  }
547
+ if (action.toolName === "browser_visual_snapshot") {
548
+ const vsLine = visualSnapshotActionToJsonl(action, pageGuid, action.timestamp);
549
+ if (vsLine) {
550
+ lines.push(vsLine);
551
+ actionCount++;
552
+ } else {
553
+ skipped.push(action.toolName);
554
+ }
555
+ continue;
556
+ }
516
557
  if ((action.toolName === "browser_type" || action.toolName === "browser_press_sequentially") && action.args.submit) {
517
558
  const fillLine = trackedActionToJsonl(action, pageGuid, action.timestamp);
518
559
  if (fillLine) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@skyramp/mcp",
3
- "version": "0.2.2",
3
+ "version": "0.2.4",
4
4
  "main": "build/index.js",
5
5
  "exports": {
6
6
  ".": "./build/index.js",