chrometools-mcp 3.5.0 → 3.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,18 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ ## [3.5.1] - 2026-02-16
6
+
7
+ ### Fixed
8
+ - **APOM selector uniqueness** — `generateSelector()` in APOM tree converter now verifies CSS selector uniqueness against the entire document instead of just parent element. Fixes critical bug where `click(id: "button_X")` could click the wrong element (e.g., navigation button instead of action button) when multiple elements shared the same class like `.ant-btn`
9
+ - **findElementsByText click timeout** — `executeElementAction` click now uses adaptive strategy with 5s timeout and JS fallback instead of raw Puppeteer `element.click()` which could hang indefinitely on elements inside complex layouts (antd Tabs, scrollable containers)
10
+ - **findElementsByText non-unique selectors** — `getUniqueSelectorInPage` fallback now checks selector uniqueness at each level of path building (max depth 5→8), preventing clicks on wrong elements when multiple matches exist
11
+
12
+ ### Changed
13
+ - **Screenshot defaults optimized** — Default format changed from PNG/auto to JPEG quality 40 for all screenshot tools, reducing token usage from ~15-25k to ~5-10k tokens per screenshot
14
+ - **Action screenshots compressed** — Screenshots from click/findElementsByText/hover with `screenshot: true` now use lightweight JPEG (quality 40, maxWidth 800) instead of raw PNG, dramatically reducing context consumption
15
+ - **Jimp warmup** — Pre-warms Jimp image processor at server startup (non-blocking, after transport connect) to avoid cold-start delays on first screenshot
16
+
5
17
  ## [3.5.0] - 2026-02-16
6
18
 
7
19
  ### Added
package/README.md CHANGED
@@ -8,7 +8,7 @@
8
8
 
9
9
  **For AI Agents & Developers:**
10
10
  - 🎯 **56+ specialized tools** for browser automation - from simple clicks to Figma comparisons
11
- - 🧠 **APOM (Agent Page Object Model)** - AI-friendly page representation (~8-10k tokens vs 15-25k for screenshots)
11
+ - 🧠 **APOM (Agent Page Object Model)** - AI-friendly page representation (~8-10k tokens vs 5-10k for screenshots)
12
12
  - 🔄 **Persistent browser sessions** - pages stay open between commands for iterative workflows
13
13
  - ⚡ **Framework-aware** - handles React, Vue, Angular events and state updates automatically
14
14
  - 📸 **Visual testing** - compare designs pixel-by-pixel with Figma integration
@@ -322,7 +322,7 @@ executeScenario({ name: "login_flow", parameters: { email: "user@test.com" } })
322
322
  1. ✅ **`analyzePage()`** - PRIMARY tool for reading page content
323
323
  - Gets forms, inputs, buttons, links with current values
324
324
  - Use `refresh: true` after interactions to see updated state
325
- - Efficient: 2-5k tokens vs screenshot 15-25k
325
+ - Efficient: 2-5k tokens vs screenshot 5-10k
326
326
  2. ✅ **`findElementsByText()`** - Find specific elements by visible text
327
327
  3. ✅ **`getElement()`** - Get HTML of specific element
328
328
  4. ⚠️ **`executeScript()`** - LAST RESORT, only if above failed
@@ -397,7 +397,7 @@ executeScenario({ name: "login_flow", parameters: { email: "user@test.com" } })
397
397
  - `useLegacyFormat` (optional): Return legacy format instead of APOM (default: false - APOM is the default)
398
398
  - `registerElements` (optional): Auto-register elements for ID-based usage (default: true) - `groupBy` (optional): 'type' or 'flat' - how to group elements (default: 'type') - **Why better than screenshot**:
399
399
  - Shows actual data (form values, validation errors) not just visual
400
- - Uses 2-5k tokens vs screenshot 15-25k tokens
400
+ - Uses 2-5k tokens vs screenshot 5-10k tokens
401
401
  - Returns structured data with **unique element IDs** for easy interaction
402
402
  - **Detects UI frameworks** (MUI, Ant Design, Chakra, Bootstrap, Vuetify, Semantic UI) - **Extracts dropdown options** from both native `<select>` and custom UI components- **Returns**:
403
403
  - **APOM format** (default): Tree-structured Page Object Model with unique IDs - `tree` - Hierarchical tree of page elements (optimized: ~82% smaller than flat format)
@@ -674,11 +674,11 @@ Capture optimized screenshot of specific element with smart compression and auto
674
674
  - `padding` (optional): Padding in pixels (default: 0)
675
675
  - `maxWidth` (optional): Max width for auto-scaling (default: 1024, null for original size)
676
676
  - `maxHeight` (optional): Max height for auto-scaling (default: 8000, null for original size)
677
- - `quality` (optional): JPEG quality 1-100 (default: 80)
678
- - `format` (optional): 'png', 'jpeg', or 'auto' (default: 'auto')
677
+ - `quality` (optional): JPEG quality 1-100 (default: 40)
678
+ - `format` (optional): 'png', 'jpeg', or 'auto' (default: 'jpeg')
679
679
  - **Use case**: Visual documentation, bug reports
680
- - **Returns**: Optimized image with metadata
681
- - **Default behavior**: Auto-scales to 1024px width and 8000px height (API limit) and uses smart compression to reduce AI token usage
680
+ - **Returns**: Optimized image with metadata (~5-10k tokens)
681
+ - **Default behavior**: JPEG at quality 40, auto-scales to 1024px width and 8000px height (API limit). For higher quality, explicitly set `quality` and `format` parameters
682
682
  - **Automatic compression**: If image exceeds 3 MB, automatically reduces quality or scales down to fit within limit
683
683
  - **For original quality**: Set `maxWidth: null`, `maxHeight: null` and `format: 'png'` (still enforces 3 MB limit)
684
684
 
@@ -692,7 +692,7 @@ Save optimized screenshot to filesystem without returning in context, with autom
692
692
  - `maxHeight` (optional): Max height for auto-scaling (default: 8000, null for original)
693
693
  - `quality` (optional): JPEG quality 1-100 (default: 80)
694
694
  - `format` (optional): 'png', 'jpeg', or 'auto' (default: 'auto')
695
- - **Use case**: Baseline screenshots, file storage
695
+ - **Use case**: Baseline screenshots, file storage (higher quality defaults than `screenshot` tool)
696
696
  - **Returns**: File path and metadata (not image data)
697
697
  - **Default behavior**: Auto-scales and compresses to save disk space
698
698
  - **Automatic compression**: If image exceeds 3 MB, automatically reduces quality or scales down to fit within limit
@@ -462,8 +462,10 @@ function getUniqueSelectorInPage(element) {
462
462
  }
463
463
 
464
464
  // 8. Fallback: nth-of-type with path
465
+ // Build path up to 8 levels, verifying uniqueness
465
466
  let current = element;
466
467
  const path = [];
468
+ const MAX_PATH_DEPTH = 8;
467
469
 
468
470
  while (current && current.tagName) {
469
471
  let selector = current.tagName.toLowerCase();
@@ -497,10 +499,21 @@ function getUniqueSelectorInPage(element) {
497
499
  }
498
500
 
499
501
  path.unshift(selector);
502
+
503
+ // Check if current path is already unique
504
+ try {
505
+ const candidateSelector = path.join(' > ');
506
+ if (document.querySelectorAll(candidateSelector).length === 1) {
507
+ return candidateSelector;
508
+ }
509
+ } catch (e) {
510
+ // Invalid selector, continue building path
511
+ }
512
+
500
513
  current = current.parentElement;
501
514
 
502
- // Stop at body or after 5 levels
503
- if (!current || current.tagName === 'BODY' || path.length >= 5) {
515
+ // Stop at body or after max depth
516
+ if (!current || current.tagName === 'BODY' || path.length >= MAX_PATH_DEPTH) {
504
517
  break;
505
518
  }
506
519
  }
package/index.js CHANGED
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env node
1
+ #!/usr/bin/env node
2
2
 
3
3
  import {Server} from "@modelcontextprotocol/sdk/server/index.js";
4
4
  import {StdioServerTransport} from "@modelcontextprotocol/sdk/server/stdio.js";
@@ -2334,7 +2334,7 @@ Start coding now.`;
2334
2334
  return {
2335
2335
  content: [
2336
2336
  { type: 'text', text: JSON.stringify(response, null, 2) },
2337
- { type: 'image', data: actionResult.screenshot, mimeType: 'image/png' }
2337
+ { type: 'image', data: actionResult.screenshot, mimeType: actionResult.screenshotMimeType || 'image/png' }
2338
2338
  ]
2339
2339
  };
2340
2340
  }
@@ -2760,7 +2760,7 @@ Start coding now.`;
2760
2760
  return {
2761
2761
  content: [
2762
2762
  { type: 'text', text: JSON.stringify(response, null, 2) },
2763
- { type: 'image', data: actionResult.screenshot, mimeType: 'image/png' }
2763
+ { type: 'image', data: actionResult.screenshot, mimeType: actionResult.screenshotMimeType || 'image/png' }
2764
2764
  ]
2765
2765
  };
2766
2766
  }
@@ -3938,6 +3938,19 @@ async function main() {
3938
3938
 
3939
3939
  console.error("chrometools-mcp server running on stdio");
3940
3940
  console.error("Browser will be initialized on first openBrowser call");
3941
+
3942
+ // Pre-warm Jimp AFTER server is connected (non-blocking)
3943
+ // Jimp v0.22 constructor is thenable - awaiting it before server.connect()
3944
+ // would block transport and cause MCP client timeout
3945
+ (async () => {
3946
+ try {
3947
+ const img = await new Jimp(1, 1, 0x000000ff);
3948
+ await img.getBufferAsync(Jimp.MIME_JPEG);
3949
+ console.error("[chrometools-mcp] Jimp pre-warmed");
3950
+ } catch (e) {
3951
+ console.error("[chrometools-mcp] Jimp pre-warm failed:", e.message);
3952
+ }
3953
+ })();
3941
3954
  }
3942
3955
 
3943
3956
  main().catch((error) => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "chrometools-mcp",
3
- "version": "3.5.0",
3
+ "version": "3.5.1",
4
4
  "description": "MCP (Model Context Protocol) server for Chrome automation using Puppeteer. Persistent browser sessions, UI framework detection (MUI, Ant Design, etc.), Page Object support, visual testing, Figma comparison. Works seamlessly in WSL, Linux, macOS, and Windows.",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -1003,30 +1003,29 @@ function buildAPOMTree(interactiveOnly = true, viewportOnly = false) {
1003
1003
  if (stableClass) {
1004
1004
  const escapedClass = CSS.escape(stableClass);
1005
1005
  const classSelector = `.${escapedClass}`;
1006
- // Verify it's unique within parent context
1007
- if (element.parentElement) {
1008
- try {
1009
- const matches = element.parentElement.querySelectorAll(classSelector);
1010
- if (matches.length === 1 && matches[0] === element) {
1011
- return classSelector;
1012
- }
1013
- } catch (e) {
1014
- // Invalid selector, continue to path-based approach
1006
+ // Verify it's unique in the ENTIRE document (not just parent)
1007
+ try {
1008
+ const matches = document.querySelectorAll(classSelector);
1009
+ if (matches.length === 1 && matches[0] === element) {
1010
+ return classSelector;
1015
1011
  }
1012
+ } catch (e) {
1013
+ // Invalid selector, continue to path-based approach
1016
1014
  }
1017
1015
  }
1018
1016
 
1019
- // Build path from parent
1017
+ // Build path from element to body, checking uniqueness at each level
1020
1018
  const path = [];
1021
1019
  let current = element;
1020
+ const MAX_PATH_DEPTH = 8;
1022
1021
 
1023
- while (current && current !== document.body) {
1022
+ while (current && current !== document.body && path.length < MAX_PATH_DEPTH) {
1024
1023
  let selector = current.tagName.toLowerCase();
1025
1024
 
1026
1025
  // Add stable class if available (escaped for CSS selector safety)
1027
- const stableClass = getStableClassName(current);
1028
- if (stableClass) {
1029
- selector += `.${CSS.escape(stableClass)}`;
1026
+ const cls = getStableClassName(current);
1027
+ if (cls) {
1028
+ selector += `.${CSS.escape(cls)}`;
1030
1029
  }
1031
1030
 
1032
1031
  // Add nth-of-type if needed
@@ -1041,6 +1040,15 @@ function buildAPOMTree(interactiveOnly = true, viewportOnly = false) {
1041
1040
  }
1042
1041
 
1043
1042
  path.unshift(selector);
1043
+
1044
+ // Check if current path is already unique in the document
1045
+ try {
1046
+ const candidateSelector = path.join(' > ');
1047
+ if (document.querySelectorAll(candidateSelector).length === 1) {
1048
+ return candidateSelector;
1049
+ }
1050
+ } catch (e) { /* continue building path */ }
1051
+
1044
1052
  current = current.parentElement;
1045
1053
  }
1046
1054
 
@@ -92,7 +92,7 @@ export const toolDefinitions = [
92
92
  },
93
93
  {
94
94
  name: "screenshot",
95
- description: "Capture element image (15-25k tokens). Use analyzePage for form data/validation (8-10k tokens).",
95
+ description: "Capture element image (5-10k tokens). Use analyzePage for form data/validation (8-10k tokens).",
96
96
  inputSchema: {
97
97
  type: "object",
98
98
  properties: {
@@ -100,8 +100,8 @@ export const toolDefinitions = [
100
100
  padding: { type: "number", description: "Padding px (default: 0)" },
101
101
  maxWidth: { type: "number", description: "Max width px (default: 1024, null=original)" },
102
102
  maxHeight: { type: "number", description: "Max height px (default: 8000, null=original)" },
103
- quality: { type: "number", minimum: 1, maximum: 100, description: "JPEG quality (default: 80)" },
104
- format: { type: "string", enum: ["png", "jpeg", "auto"], description: "Format (default: auto)" },
103
+ quality: { type: "number", minimum: 1, maximum: 100, description: "JPEG quality (default: 40)" },
104
+ format: { type: "string", enum: ["png", "jpeg", "auto"], description: "Format (default: jpeg)" },
105
105
  },
106
106
  required: ["selector"],
107
107
  },
@@ -115,8 +115,8 @@ export const ScreenshotSchema = z.object({
115
115
  padding: z.number().optional().describe("Padding around element in pixels (default: 0)"),
116
116
  maxWidth: z.number().nullable().optional().describe("Maximum width in pixels, auto-scales if larger (default: 1024, set to null for original size)"),
117
117
  maxHeight: z.number().nullable().optional().describe("Maximum height in pixels, auto-scales if larger (default: 8000 for API limit, set to null for original size)"),
118
- quality: z.number().min(1).max(100).optional().describe("JPEG quality 1-100 (default: 80, only applies to JPEG format)"),
119
- format: z.enum(['png', 'jpeg', 'auto']).optional().describe("Image format: 'png', 'jpeg', or 'auto' (default: 'auto' - chooses based on size)"),
118
+ quality: z.number().min(1).max(100).optional().describe("JPEG quality 1-100 (default: 40)"),
119
+ format: z.enum(['png', 'jpeg', 'auto']).optional().describe("Image format (default: 'jpeg')"),
120
120
  }).refine(data => (data.id && !data.selector) || (!data.id && data.selector), {
121
121
  message: "Either 'id' or 'selector' must be provided, but not both"
122
122
  });
@@ -5,6 +5,7 @@
5
5
 
6
6
  import { runPostClickDiagnostics, formatDiagnosticsForAI } from '../post-click-diagnostics.js';
7
7
  import { generateClickHints } from '../hints-generator.js';
8
+ import { processScreenshot } from '../screenshot-processor.js';
8
9
 
9
10
  /**
10
11
  * Execute click action on element with adaptive strategy
@@ -186,10 +187,16 @@ export async function executeClickAction(page, element, options = {}) {
186
187
  { type: "text", text: `Clicked: ${identifier}${hintsText}${diagnosticsText}` }
187
188
  ];
188
189
 
189
- // Only add screenshot if requested
190
+ // Only add screenshot if requested — lightweight JPEG for action confirmation
190
191
  if (screenshot === true) {
191
- const screenshotData = await page.screenshot({ encoding: 'base64', fullPage: false });
192
- content.push({ type: "image", data: screenshotData, mimeType: "image/png" });
192
+ const screenshotBuffer = await page.screenshot({ encoding: 'binary', fullPage: false });
193
+ const processed = await processScreenshot(screenshotBuffer, {
194
+ maxWidth: 800,
195
+ maxHeight: 4000,
196
+ quality: 40,
197
+ format: 'jpeg',
198
+ });
199
+ content.push({ type: "image", data: processed.buffer.toString('base64'), mimeType: processed.mimeType });
193
200
  }
194
201
 
195
202
  return { content };
@@ -14,8 +14,8 @@ import { processScreenshot } from '../image-processing.js';
14
14
  * @param {number} options.padding - Padding around element in pixels (default: 0)
15
15
  * @param {number|null} options.maxWidth - Max width for scaling (default: 1024, null for original)
16
16
  * @param {number|null} options.maxHeight - Max height for scaling (default: 8000, null for original)
17
- * @param {number} options.quality - JPEG quality 1-100 (default: 80)
18
- * @param {string} options.format - Image format: 'png', 'jpeg', 'auto' (default: 'auto')
17
+ * @param {number} options.quality - JPEG quality 1-100 (default: 50)
18
+ * @param {string} options.format - Image format: 'png', 'jpeg', 'auto' (default: 'jpeg')
19
19
  * @returns {Promise<Object>} Result with content array (text + image)
20
20
  */
21
21
  export async function executeScreenshotAction(page, element, options = {}) {
@@ -24,8 +24,8 @@ export async function executeScreenshotAction(page, element, options = {}) {
24
24
  padding = 0,
25
25
  maxWidth = 1024,
26
26
  maxHeight = 8000,
27
- quality = 80,
28
- format = 'auto'
27
+ quality = 40,
28
+ format = 'jpeg'
29
29
  } = options;
30
30
 
31
31
  // Scroll to element to ensure it's in viewport
@@ -1,3 +1,25 @@
1
+ import { processScreenshot } from './screenshot-processor.js';
2
+
3
+ // Lightweight action screenshot: small JPEG for confirming actions worked
4
+ // These are "report" screenshots, not for detailed analysis
5
+ async function takeActionScreenshot(page, clip) {
6
+ const screenshotBuffer = await page.screenshot({
7
+ encoding: 'binary',
8
+ fullPage: false,
9
+ ...(clip ? { clip } : {})
10
+ });
11
+ const processed = await processScreenshot(screenshotBuffer, {
12
+ maxWidth: 800,
13
+ maxHeight: 4000,
14
+ quality: 40,
15
+ format: 'jpeg',
16
+ });
17
+ return {
18
+ data: processed.buffer.toString('base64'),
19
+ mimeType: processed.mimeType,
20
+ };
21
+ }
22
+
1
23
  // Helper function to execute actions on elements
2
24
  export async function executeElementAction(page, selector, action) {
3
25
  if (!action || !action.type) {
@@ -17,13 +39,27 @@ export async function executeElementAction(page, selector, action) {
17
39
 
18
40
  switch (action.type) {
19
41
  case 'click':
20
- await element.click();
42
+ // Scroll element into view first (direct JS, avoids Puppeteer's scrollIntoViewIfNeeded hang)
43
+ await element.evaluate(el => el.scrollIntoView({ behavior: 'instant', block: 'center' }));
44
+
45
+ // Click with timeout + JS fallback (Puppeteer's click can hang in complex layouts)
46
+ try {
47
+ await Promise.race([
48
+ element.click(),
49
+ new Promise((_, reject) => setTimeout(() => reject(new Error('click timeout')), 5000))
50
+ ]);
51
+ } catch (e) {
52
+ // Fallback to JS click (bypasses Puppeteer's coordinate-based click)
53
+ await element.evaluate(el => el.click());
54
+ }
55
+
21
56
  await new Promise(resolve => setTimeout(resolve, action.waitAfter || 1500));
22
57
  result.message = `Clicked on ${selector}`;
23
58
 
24
59
  if (action.screenshot) {
25
- const screenshot = await page.screenshot({ encoding: 'base64', fullPage: false });
26
- result.screenshot = screenshot;
60
+ const { data, mimeType } = await takeActionScreenshot(page);
61
+ result.screenshot = data;
62
+ result.screenshotMimeType = mimeType;
27
63
  }
28
64
  break;
29
65
 
@@ -38,8 +74,9 @@ export async function executeElementAction(page, selector, action) {
38
74
  result.message = `Typed "${action.text}" into ${selector}`;
39
75
 
40
76
  if (action.screenshot) {
41
- const screenshot = await page.screenshot({ encoding: 'base64', fullPage: false });
42
- result.screenshot = screenshot;
77
+ const { data, mimeType } = await takeActionScreenshot(page);
78
+ result.screenshot = data;
79
+ result.screenshotMimeType = mimeType;
43
80
  }
44
81
  break;
45
82
 
@@ -65,9 +102,10 @@ export async function executeElementAction(page, selector, action) {
65
102
  width: Math.max(box.width, 1),
66
103
  height: Math.max(box.height, 1)
67
104
  };
68
- const screenshot = await page.screenshot({ clip, encoding: 'base64' });
105
+ const { data: screenshotData, mimeType: screenshotMime } = await takeActionScreenshot(page, clip);
69
106
  result.message = `Captured screenshot of ${selector}`;
70
- result.screenshot = screenshot;
107
+ result.screenshot = screenshotData;
108
+ result.screenshotMimeType = screenshotMime;
71
109
  break;
72
110
 
73
111
  case 'hover':
@@ -76,8 +114,9 @@ export async function executeElementAction(page, selector, action) {
76
114
  result.message = `Hovered over ${selector}`;
77
115
 
78
116
  if (action.screenshot) {
79
- const screenshot = await page.screenshot({ encoding: 'base64', fullPage: false });
80
- result.screenshot = screenshot;
117
+ const { data, mimeType } = await takeActionScreenshot(page);
118
+ result.screenshot = data;
119
+ result.screenshotMimeType = mimeType;
81
120
  }
82
121
  break;
83
122
 
@@ -101,8 +140,9 @@ export async function executeElementAction(page, selector, action) {
101
140
  result.message = `Applied styles to ${selector}`;
102
141
 
103
142
  if (action.screenshot) {
104
- const screenshot = await page.screenshot({ encoding: 'base64', fullPage: false });
105
- result.screenshot = screenshot;
143
+ const { data, mimeType } = await takeActionScreenshot(page);
144
+ result.screenshot = data;
145
+ result.screenshotMimeType = mimeType;
106
146
  }
107
147
  break;
108
148