@yusufffararatt/dombridge-mcp 2.7.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +559 -0
- package/bin/cli.js +88 -0
- package/package.json +54 -0
- package/src/bridge/http-server.js +290 -0
- package/src/bridge/middleware.js +56 -0
- package/src/bridge/routes.js +1003 -0
- package/src/bridge-daemon.js +172 -0
- package/src/cli/auto-config.js +120 -0
- package/src/constants.js +13 -0
- package/src/index.js +279 -0
- package/src/mcp-bridge.js +136 -0
- package/src/metrics/error-codes.js +44 -0
- package/src/metrics/index.js +3 -0
- package/src/metrics/metrics-db.js +269 -0
- package/src/metrics/metrics-recorder.js +240 -0
- package/src/metrics/metrics-report.js +146 -0
- package/src/profiles/profile-db.js +159 -0
- package/src/profiles/profile-enricher.js +333 -0
- package/src/profiles/profile-manager.js +563 -0
- package/src/profiles/profile-repo.js +183 -0
- package/src/state/bridge-client.js +272 -0
- package/src/state/bridge-persistence.js +205 -0
- package/src/state/cache.js +38 -0
- package/src/state/extension-state.js +321 -0
- package/src/tools/action_tools.js +218 -0
- package/src/tools/analyze-page.js +247 -0
- package/src/tools/debug-mcp-state.js +172 -0
- package/src/tools/discover-apis.js +186 -0
- package/src/tools/execute-js.js +284 -0
- package/src/tools/export-session.js +171 -0
- package/src/tools/extract-data.js +395 -0
- package/src/tools/get-element.js +281 -0
- package/src/tools/get-network-trace.js +471 -0
- package/src/tools/index.js +110 -0
- package/src/tools/manage-site-profile.js +153 -0
- package/src/tools/paginate.js +444 -0
- package/src/tools/quick-scan.js +418 -0
- package/src/tools/screenshot_tools.js +117 -0
- package/src/utils/circuit-breaker.js +112 -0
- package/src/utils/extract-density.js +21 -0
- package/src/utils/logger.js +31 -0
- package/src/utils/paginate-detector.js +24 -0
- package/src/utils/rate-limiter.js +244 -0
- package/src/utils/run-script.js +37 -0
- package/src/utils/selector-validator.js +95 -0
- package/src/utils/state-validator.js +354 -0
- package/src/utils/tab-resolver.js +70 -0
- package/src/utils/workflow-helper.js +292 -0
- package/src/utils/workflow-state.js +177 -0
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Extract Data Tool
|
|
3
|
+
* Scans embedded/SSR data and stores scraper-relevant paths in the profile.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { buildProfileEnrichment } from '../profiles/profile-enricher.js';
|
|
7
|
+
import { loadProfile, saveProfile } from '../profiles/profile-manager.js';
|
|
8
|
+
import { resolveActiveDomain } from '../utils/tab-resolver.js';
|
|
9
|
+
import { filterNoiseSources } from '../utils/extract-density.js';
|
|
10
|
+
|
|
11
|
+
export const EXTRACT_DATA_SCRIPT = `(function() {
|
|
12
|
+
var sources = [];
|
|
13
|
+
|
|
14
|
+
var propsKeys = Object.keys(window).filter(function(k) {
|
|
15
|
+
return /__PROPS$|__PROPS__$|__DATA$|__DATA__$|__STATE$|__STATE__$|__STORE$/.test(k);
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
propsKeys.forEach(function(key) {
|
|
19
|
+
try {
|
|
20
|
+
var val = window[key];
|
|
21
|
+
if (val && typeof val === 'object') {
|
|
22
|
+
sources.push({ key: key, type: 'window_props', data: val });
|
|
23
|
+
}
|
|
24
|
+
} catch(e) {}
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
if (window.__NEXT_DATA__ && window.__NEXT_DATA__.props) {
|
|
28
|
+
sources.push({ key: '__NEXT_DATA__.props', type: 'next_ssr', data: window.__NEXT_DATA__.props });
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// Next.js App Router: __next_f is an array of [hash, chunk] tuples
|
|
32
|
+
// Chunks come in two formats:
|
|
33
|
+
// 1. Standard JSON: parseable directly via JSON.parse
|
|
34
|
+
// 2. RSC stream: multi-line "id:value" pairs where value may be JSON
|
|
35
|
+
// We try JSON.parse first; if it fails, we extract "id:{...}" lines as JSON objects.
|
|
36
|
+
if (Array.isArray(window.__next_f)) {
|
|
37
|
+
try {
|
|
38
|
+
var merged = {};
|
|
39
|
+
var collisionKeys = [];
|
|
40
|
+
var chunks = [];
|
|
41
|
+
var rscLines = [];
|
|
42
|
+
window.__next_f.forEach(function(item) {
|
|
43
|
+
if (Array.isArray(item) && item.length >= 2) {
|
|
44
|
+
try {
|
|
45
|
+
chunks.push(item[1]);
|
|
46
|
+
// Try 1: direct JSON.parse (standard format)
|
|
47
|
+
var parsed = JSON.parse(item[1]);
|
|
48
|
+
if (parsed && typeof parsed === 'object') {
|
|
49
|
+
Object.keys(parsed).forEach(function(k) {
|
|
50
|
+
if (merged.hasOwnProperty(k) && collisionKeys.indexOf(k) === -1) {
|
|
51
|
+
collisionKeys.push(k);
|
|
52
|
+
}
|
|
53
|
+
merged[k] = parsed[k];
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
} catch(e) {
|
|
57
|
+
// Try 2: RSC stream format — each line is "id:value"
|
|
58
|
+
// Extract lines where value starts with '{' (JSON object)
|
|
59
|
+
var lines = item[1].split('\\n');
|
|
60
|
+
lines.forEach(function(line) {
|
|
61
|
+
var colonIdx = line.indexOf(':');
|
|
62
|
+
if (colonIdx > 0) {
|
|
63
|
+
var value = line.substring(colonIdx + 1);
|
|
64
|
+
if (value.charAt(0) === '{' || value.charAt(0) === '[') {
|
|
65
|
+
try {
|
|
66
|
+
var obj = JSON.parse(value);
|
|
67
|
+
if (obj && typeof obj === 'object') {
|
|
68
|
+
var objectsToMerge = [];
|
|
69
|
+
var findTopLevelObjects = function(val) {
|
|
70
|
+
if (!val || typeof val !== 'object') return;
|
|
71
|
+
if (Array.isArray(val)) {
|
|
72
|
+
val.forEach(findTopLevelObjects);
|
|
73
|
+
} else {
|
|
74
|
+
objectsToMerge.push(val);
|
|
75
|
+
}
|
|
76
|
+
};
|
|
77
|
+
findTopLevelObjects(obj);
|
|
78
|
+
|
|
79
|
+
objectsToMerge.forEach(function(targetObj) {
|
|
80
|
+
rscLines.push({ id: line.substring(0, colonIdx), keys: Object.keys(targetObj).slice(0, 10) });
|
|
81
|
+
Object.keys(targetObj).forEach(function(k) {
|
|
82
|
+
if (merged.hasOwnProperty(k) && collisionKeys.indexOf(k) === -1) {
|
|
83
|
+
collisionKeys.push(k);
|
|
84
|
+
}
|
|
85
|
+
merged[k] = targetObj[k];
|
|
86
|
+
});
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
} catch(e2) { /* not a JSON object/array — skip */ }
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
});
|
|
96
|
+
if (Object.keys(merged).length > 0 || chunks.length > 0) {
|
|
97
|
+
var data = Object.keys(merged).length > 0 ? merged : { __rawChunks: chunks.length, __note: 'RSC payload — use execute_js to explore individual chunks' };
|
|
98
|
+
if (collisionKeys.length > 0) {
|
|
99
|
+
data.__collisionKeys = collisionKeys;
|
|
100
|
+
data.__note = (data.__note ? data.__note + ' ' : '') + 'Overlapping keys across chunks: ' + collisionKeys.join(', ') + '. Later values win. Use execute_js to inspect individual chunks.';
|
|
101
|
+
}
|
|
102
|
+
if (rscLines.length > 0) {
|
|
103
|
+
data.__rscParseMode = 'stream';
|
|
104
|
+
data.__rscObjectCount = rscLines.length;
|
|
105
|
+
}
|
|
106
|
+
sources.push({ key: '__next_f', type: 'next_rsc', data: data });
|
|
107
|
+
}
|
|
108
|
+
} catch(e) {}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Next.js _N_E objects (standard JSON, used by Next.js runtime)
|
|
112
|
+
if (window._N_E && typeof window._N_E === 'object') {
|
|
113
|
+
sources.push({ key: '_N_E', type: 'next_runtime', data: window._N_E });
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
if (window.__NUXT__ && window.__NUXT__.data) {
|
|
117
|
+
sources.push({ key: '__NUXT__.data', type: 'nuxt_ssr', data: window.__NUXT__.data });
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
var storeKeys = ['__REDUX_STATE__', '__INITIAL_STATE__', '__INITIAL_DATA__', '__APP_STATE__',
|
|
121
|
+
'__STORE__', '__store__', '__STATE__', '__PRELOADED_STATE__'];
|
|
122
|
+
storeKeys.forEach(function(key) {
|
|
123
|
+
try {
|
|
124
|
+
if (window[key] && typeof window[key] === 'object' && !propsKeys.includes(key)) {
|
|
125
|
+
sources.push({ key: key, type: 'store', data: window[key] });
|
|
126
|
+
}
|
|
127
|
+
} catch(e) {}
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
var jsonScripts = document.querySelectorAll('script[type="application/json"]');
|
|
131
|
+
jsonScripts.forEach(function(el, i) {
|
|
132
|
+
try {
|
|
133
|
+
var parsed = JSON.parse(el.textContent);
|
|
134
|
+
if (parsed && typeof parsed === 'object') {
|
|
135
|
+
sources.push({ key: 'script[type=application/json][' + i + ']', type: 'inline_json', data: parsed });
|
|
136
|
+
}
|
|
137
|
+
} catch(e) {}
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
if (sources.length === 0) return { found: false, sources: [] };
|
|
141
|
+
|
|
142
|
+
var inferType = function(val) {
|
|
143
|
+
if (val === null) return 'null';
|
|
144
|
+
if (Array.isArray(val)) return 'array';
|
|
145
|
+
return typeof val;
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
var buildSchema = function(obj, path, depth, maxDepth) {
|
|
149
|
+
var schema = {};
|
|
150
|
+
if (depth > maxDepth) return { __truncated: true };
|
|
151
|
+
if (typeof obj !== 'object' || obj === null) return {};
|
|
152
|
+
|
|
153
|
+
var keys = Array.isArray(obj) ? [] : Object.keys(obj).slice(0, 30);
|
|
154
|
+
if (Array.isArray(obj)) {
|
|
155
|
+
if (obj.length === 0) return { __empty_array: true };
|
|
156
|
+
var firstItem = obj[0];
|
|
157
|
+
var itemSchema = (firstItem && typeof firstItem === 'object')
|
|
158
|
+
? buildSchema(firstItem, path + '[]', depth + 1, maxDepth)
|
|
159
|
+
: { __type: inferType(firstItem), __example: String(firstItem).substring(0, 50) };
|
|
160
|
+
return { __type: 'array', __length: obj.length, __items: itemSchema };
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
keys.forEach(function(k) {
|
|
164
|
+
var val = obj[k];
|
|
165
|
+
var t = inferType(val);
|
|
166
|
+
if (t === 'object' || t === 'array') {
|
|
167
|
+
schema[k] = buildSchema(val, path + '.' + k, depth + 1, maxDepth);
|
|
168
|
+
} else if (t !== 'function') {
|
|
169
|
+
var example = val !== null && val !== undefined ? String(val).substring(0, 80) : null;
|
|
170
|
+
schema[k] = { __type: t, __example: example };
|
|
171
|
+
}
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
if (Object.keys(obj).length > 30) {
|
|
175
|
+
schema.__note = '... ' + (Object.keys(obj).length - 30) + ' more keys truncated';
|
|
176
|
+
}
|
|
177
|
+
return schema;
|
|
178
|
+
};
|
|
179
|
+
|
|
180
|
+
var collectLeafPaths = function(obj, prefix, result, depth, maxDepth) {
|
|
181
|
+
if (depth > maxDepth) return;
|
|
182
|
+
if (typeof obj !== 'object' || obj === null) return;
|
|
183
|
+
|
|
184
|
+
var keys = Array.isArray(obj)
|
|
185
|
+
? (obj.length > 0 ? ['0'] : [])
|
|
186
|
+
: Object.keys(obj).slice(0, 20);
|
|
187
|
+
|
|
188
|
+
keys.forEach(function(k) {
|
|
189
|
+
var val = Array.isArray(obj) ? obj[0] : obj[k];
|
|
190
|
+
var path = Array.isArray(obj) ? prefix + '[]' : prefix + '.' + k;
|
|
191
|
+
var t = inferType(val);
|
|
192
|
+
if (t === 'object' || t === 'array') {
|
|
193
|
+
collectLeafPaths(val, path, result, depth + 1, maxDepth);
|
|
194
|
+
} else if (t !== 'function') {
|
|
195
|
+
var example = val !== null && val !== undefined ? String(val).substring(0, 60) : null;
|
|
196
|
+
result.push({ path: path, type: t, example: example });
|
|
197
|
+
}
|
|
198
|
+
});
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
var MAX_DEPTH = 5;
|
|
202
|
+
return {
|
|
203
|
+
found: true,
|
|
204
|
+
sources: sources.map(function(src) {
|
|
205
|
+
var schema = buildSchema(src.data, src.key, 0, MAX_DEPTH);
|
|
206
|
+
var leafPaths = [];
|
|
207
|
+
collectLeafPaths(src.data, src.key, leafPaths, 0, MAX_DEPTH);
|
|
208
|
+
return {
|
|
209
|
+
key: src.key,
|
|
210
|
+
type: src.type,
|
|
211
|
+
schema: schema,
|
|
212
|
+
leafPaths: leafPaths.slice(0, 50),
|
|
213
|
+
isArray: Array.isArray(src.data),
|
|
214
|
+
arrayLength: Array.isArray(src.data) ? src.data.length : null,
|
|
215
|
+
topLevelKeyCount: (src.data && typeof src.data === 'object' && !Array.isArray(src.data))
|
|
216
|
+
? Object.keys(src.data).length
|
|
217
|
+
: null
|
|
218
|
+
};
|
|
219
|
+
})
|
|
220
|
+
};
|
|
221
|
+
})()`;
|
|
222
|
+
|
|
223
|
+
const TYPE_LABELS = {
|
|
224
|
+
next_ssr: 'Next.js SSR (Pages Router)',
|
|
225
|
+
next_rsc: 'Next.js RSC payload',
|
|
226
|
+
next_runtime: 'Next.js Runtime State',
|
|
227
|
+
nuxt_ssr: 'Nuxt SSR',
|
|
228
|
+
window_props: 'Window Props',
|
|
229
|
+
store: 'State Store',
|
|
230
|
+
inline_json: 'Inline JSON Script'
|
|
231
|
+
};
|
|
232
|
+
|
|
233
|
+
function buildSourceDiff(existingProfile, sources) {
|
|
234
|
+
const existingKeys = new Set((existingProfile?.dataSchema?.sources || []).map((source) => source.key));
|
|
235
|
+
const existingCount = existingKeys.size;
|
|
236
|
+
if (existingCount === 0) return '';
|
|
237
|
+
|
|
238
|
+
const newCount = sources.filter((source) => !existingKeys.has(source.key)).length;
|
|
239
|
+
return `${existingCount} known source${existingCount !== 1 ? 's' : ''} in profile${newCount > 0 ? `, **${newCount} new** found` : ', no new sources'}`;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
export const extractDataTool = {
|
|
243
|
+
name: 'extract_data',
|
|
244
|
+
description: `This is a tool from the dombridge MCP server.
|
|
245
|
+
Automatically scans embedded/SSR data sources on the current page and writes extraction-ready summaries into the site profile.
|
|
246
|
+
|
|
247
|
+
WORKFLOW POSITION: Use after analyze_page when the site appears SSR-heavy or embeds state in window objects.
|
|
248
|
+
|
|
249
|
+
MULTI-TAB: Call debug_mcp_state() first to get tab IDs, then pass tabId to extract data from a specific tab.`,
|
|
250
|
+
inputSchema: {
|
|
251
|
+
type: 'object',
|
|
252
|
+
properties: {
|
|
253
|
+
verbose: {
|
|
254
|
+
type: 'boolean',
|
|
255
|
+
description: 'Return more schema/path detail (default: false)'
|
|
256
|
+
},
|
|
257
|
+
tabId: {
|
|
258
|
+
type: 'number',
|
|
259
|
+
description: 'Target tab ID (optional). Omit to use active tab.'
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
},
|
|
263
|
+
handler: async (args, bridgeClient) => {
|
|
264
|
+
if (!bridgeClient.isConnected) {
|
|
265
|
+
return {
|
|
266
|
+
content: [{
|
|
267
|
+
type: 'text',
|
|
268
|
+
text: 'Error: Extension not connected.\nREQUIRED STEPS:\n1. Reload webpage\n2. Ensure the Chrome extension is active'
|
|
269
|
+
}],
|
|
270
|
+
isError: true
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
const { verbose = false, tabId } = args || {};
|
|
275
|
+
const requestId = `extract-data-${Date.now()}-${Math.floor(Math.random() * 1000)}`;
|
|
276
|
+
|
|
277
|
+
try {
|
|
278
|
+
// Queue execute-js request via bridge daemon
|
|
279
|
+
const requestPayload = {
|
|
280
|
+
code: EXTRACT_DATA_SCRIPT,
|
|
281
|
+
timeout: 10000,
|
|
282
|
+
id: requestId,
|
|
283
|
+
...(tabId !== undefined ? { tabId } : {})
|
|
284
|
+
};
|
|
285
|
+
await bridgeClient.queueRequest('execute-js', requestPayload);
|
|
286
|
+
|
|
287
|
+
// Wait for result (15s script + 3s buffer)
|
|
288
|
+
const timeout = 15000;
|
|
289
|
+
const resultItem = await bridgeClient.waitForResult('js-execution', requestId, timeout + 3000);
|
|
290
|
+
|
|
291
|
+
if (!resultItem) {
|
|
292
|
+
return {
|
|
293
|
+
content: [{ type: 'text', text: `Timeout: extract_data did not complete within ${timeout}ms.` }],
|
|
294
|
+
isError: true
|
|
295
|
+
};
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
// CSP auto-bypass wraps result in { _cspBypassed, result }. Unwrap it.
|
|
299
|
+
let rawResult = resultItem.result;
|
|
300
|
+
if (rawResult && rawResult._cspBypassed !== undefined && rawResult.result !== undefined) {
|
|
301
|
+
rawResult = rawResult.result;
|
|
302
|
+
}
|
|
303
|
+
const result = rawResult;
|
|
304
|
+
|
|
305
|
+
if (result?.error) {
|
|
306
|
+
return {
|
|
307
|
+
content: [{ type: 'text', text: `extract_data failed: ${result.error}` }],
|
|
308
|
+
isError: true
|
|
309
|
+
};
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
if (!result || !result.found) {
|
|
313
|
+
return {
|
|
314
|
+
content: [{
|
|
315
|
+
type: 'text',
|
|
316
|
+
text: `No embedded data found on this page.\n\nThis page may be API-driven, use a non-standard state pattern, or require interaction before data loads.\n\nTry \`discover_apis()\` or \`get_network_trace()\` after interacting with the page.`
|
|
317
|
+
}]
|
|
318
|
+
};
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
const domain = await resolveActiveDomain(bridgeClient, tabId);
|
|
322
|
+
const existingProfile = domain ? loadProfile(domain) : null;
|
|
323
|
+
const originalSourceCount = result.sources.length;
|
|
324
|
+
const diffNote = domain ? buildSourceDiff(existingProfile, result.sources) : '';
|
|
325
|
+
|
|
326
|
+
// Filter RequireJS-style noise sources (low leaf density) when source count is high
|
|
327
|
+
result.sources = filterNoiseSources(result.sources);
|
|
328
|
+
const filteredOut = originalSourceCount - result.sources.length;
|
|
329
|
+
const densityNote = filteredOut > 0
|
|
330
|
+
? `Filtered ${filteredOut} low-density source(s) (RequireJS-style loaders). Pass verbose=true to inspect raw count.`
|
|
331
|
+
: '';
|
|
332
|
+
|
|
333
|
+
const enrichment = buildProfileEnrichment('extract_data', result);
|
|
334
|
+
|
|
335
|
+
if (domain && Object.keys(enrichment).length > 0) {
|
|
336
|
+
saveProfile(domain, enrichment);
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
const lines = [
|
|
340
|
+
`Embedded data sources found: ${result.sources.length}`,
|
|
341
|
+
filteredOut > 0 ? `(raw count: ${originalSourceCount}, filtered: ${filteredOut})` : '',
|
|
342
|
+
diffNote ? `Profile diff: ${diffNote}` : '',
|
|
343
|
+
densityNote,
|
|
344
|
+
''
|
|
345
|
+
].filter(Boolean);
|
|
346
|
+
|
|
347
|
+
result.sources.forEach((source, index) => {
|
|
348
|
+
const typeLabel = TYPE_LABELS[source.type] || source.type;
|
|
349
|
+
lines.push(`### ${index + 1}. \`${source.key}\` _(${typeLabel})_`);
|
|
350
|
+
|
|
351
|
+
if (source.isArray) {
|
|
352
|
+
lines.push(`- Type: **Array** with ${source.arrayLength} items`);
|
|
353
|
+
} else if (source.topLevelKeyCount !== null) {
|
|
354
|
+
lines.push(`- Type: **Object** with ${source.topLevelKeyCount} top-level keys`);
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
const schemaKeys = Object.keys(source.schema || {}).filter((key) => !key.startsWith('__'));
|
|
358
|
+
if (schemaKeys.length > 0) {
|
|
359
|
+
lines.push(`- Top-level fields: \`${schemaKeys.slice(0, 10).join('`, `')}\`${schemaKeys.length > 10 ? ` ... +${schemaKeys.length - 10} more` : ''}`);
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
if (Array.isArray(source.leafPaths) && source.leafPaths.length > 0) {
|
|
363
|
+
const shown = source.leafPaths.slice(0, verbose ? 15 : 7);
|
|
364
|
+
lines.push(`- Extraction-ready paths (${source.leafPaths.length} found${!verbose && source.leafPaths.length > 7 ? ', showing top 7' : ''}):`);
|
|
365
|
+
shown.forEach((path) => {
|
|
366
|
+
const example = path.example ? ` -> \`${String(path.example).substring(0, verbose ? 60 : 40)}\`` : '';
|
|
367
|
+
lines.push(` - \`${path.path}\` _(${path.type})_${example}`);
|
|
368
|
+
});
|
|
369
|
+
if (!verbose && source.leafPaths.length > shown.length) {
|
|
370
|
+
lines.push(` - _... ${source.leafPaths.length - shown.length} more paths_`);
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
lines.push('');
|
|
375
|
+
});
|
|
376
|
+
|
|
377
|
+
if (domain) {
|
|
378
|
+
lines.push(`Saved to profile: \`manage_site_profile({ action: 'load', domain: "${domain}" })\``);
|
|
379
|
+
}
|
|
380
|
+
lines.push('---');
|
|
381
|
+
lines.push('Next steps:');
|
|
382
|
+
lines.push('- Use `execute_js({ code: "JSON.stringify(window.YOUR_PATH_HERE, null, 2)" })` to inspect the real data');
|
|
383
|
+
lines.push('- Use `get_element()` + `get_network_trace()` if you need to connect DOM values to API responses');
|
|
384
|
+
|
|
385
|
+
return {
|
|
386
|
+
content: [{ type: 'text', text: lines.join('\n') }]
|
|
387
|
+
};
|
|
388
|
+
} catch (e) {
|
|
389
|
+
return {
|
|
390
|
+
content: [{ type: 'text', text: `Error: ${e.message}` }],
|
|
391
|
+
isError: true
|
|
392
|
+
};
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
};
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool: get_element
|
|
3
|
+
* Unified element tool — programmatic selection (CSS/XPath) OR retrieval of manually selected element.
|
|
4
|
+
* Merges the former get_selected_element and select_element tools.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { StateValidator } from '../utils/state-validator.js';
|
|
8
|
+
import { RateLimiter } from '../utils/rate-limiter.js';
|
|
9
|
+
import { circuitBreakers } from '../utils/circuit-breaker.js';
|
|
10
|
+
import { suggestNextTools, formatWorkflowSuggestions, formatHardGuard } from '../utils/workflow-helper.js';
|
|
11
|
+
import { VALIDATION_SCRIPT, formatAlternatives } from '../utils/selector-validator.js';
|
|
12
|
+
import { runScript } from '../utils/run-script.js';
|
|
13
|
+
|
|
14
|
+
export const getElementTool = {
|
|
15
|
+
name: 'get_element',
|
|
16
|
+
description: `Select or retrieve a DOM element — programmatically or from user's manual selection.
|
|
17
|
+
|
|
18
|
+
WORKFLOW POSITION: 🔵 First Step - Use before get_network_trace
|
|
19
|
+
|
|
20
|
+
TWO MODES:
|
|
21
|
+
|
|
22
|
+
1. PROGRAMMATIC (provide css/xpath/text) — NO manual browser interaction needed:
|
|
23
|
+
get_element({ css: '.product-price' })
|
|
24
|
+
get_element({ xpath: '//h1[@class="title"]' })
|
|
25
|
+
get_element({ css: 'img[src*="lock"]' }) ← attribute-based selection
|
|
26
|
+
get_element({ css: '[data-testid="product"]' }) ← data-attribute selection
|
|
27
|
+
|
|
28
|
+
2. MANUAL RETRIEVAL (no selector) — retrieves element user clicked in browser:
|
|
29
|
+
get_element()
|
|
30
|
+
|
|
31
|
+
PARAMETERS:
|
|
32
|
+
- css (optional): CSS selector — preferred method
|
|
33
|
+
- xpath (optional): XPath expression
|
|
34
|
+
- text (optional): Exact visible text — last resort, unreliable on nested DOM
|
|
35
|
+
- triggerNetworkTrace (optional): Auto-run network trace after selection (default: true)
|
|
36
|
+
- tabId (optional): Target a specific tab by ID. Get IDs from debug_mcp_state(). Omit to use active tab.
|
|
37
|
+
|
|
38
|
+
AUTONOMOUS WORKFLOW:
|
|
39
|
+
1. analyze_page() → understand page structure
|
|
40
|
+
2. get_element({ css: '.price' }) → select target element
|
|
41
|
+
3. get_network_trace() → find matching API
|
|
42
|
+
|
|
43
|
+
MANUAL WORKFLOW:
|
|
44
|
+
1. User clicks element in browser extension
|
|
45
|
+
2. get_element() → retrieve what was selected
|
|
46
|
+
3. get_network_trace() → find matching API`,
|
|
47
|
+
|
|
48
|
+
inputSchema: {
|
|
49
|
+
type: 'object',
|
|
50
|
+
properties: {
|
|
51
|
+
css: {
|
|
52
|
+
type: 'string',
|
|
53
|
+
description: "CSS selector to programmatically find the element. E.g. \".price\", \"#title\", \"[data-testid='product']\", \"img[src*='lock']\", \"a[href^='https']\". Attribute selectors are fully supported."
|
|
54
|
+
},
|
|
55
|
+
xpath: {
|
|
56
|
+
type: 'string',
|
|
57
|
+
description: 'XPath expression to find the element. Use when CSS is insufficient.'
|
|
58
|
+
},
|
|
59
|
+
text: {
|
|
60
|
+
type: 'string',
|
|
61
|
+
description: '⚠️ Exact visible text content to find (last resort — unreliable on nested DOM).'
|
|
62
|
+
},
|
|
63
|
+
triggerNetworkTrace: {
|
|
64
|
+
type: 'boolean',
|
|
65
|
+
description: 'Auto-run network trace after programmatic selection (default: true)'
|
|
66
|
+
},
|
|
67
|
+
verbose: {
|
|
68
|
+
type: 'boolean',
|
|
69
|
+
description: 'Return full detail: xpath, all attributes, sessionInfo, pageAnalysis detail (default: false)'
|
|
70
|
+
},
|
|
71
|
+
tabId: {
|
|
72
|
+
type: 'number',
|
|
73
|
+
description: 'Target tab ID (optional). Omit to use active tab. Get IDs from debug_mcp_state().'
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
},
|
|
77
|
+
|
|
78
|
+
handler: async (args, bridgeClient) => {
|
|
79
|
+
const { css, xpath, text, triggerNetworkTrace = true, verbose = false, tabId } = args || {};
|
|
80
|
+
// Backward compat: selectorInfo object de kabul et
|
|
81
|
+
const selectorInfo = (css || xpath || text)
|
|
82
|
+
? { css, xpath, text }
|
|
83
|
+
: (args?.selectorInfo || null);
|
|
84
|
+
const isProgrammatic = selectorInfo && (selectorInfo.css || selectorInfo.xpath || selectorInfo.text);
|
|
85
|
+
|
|
86
|
+
if (isProgrammatic) {
|
|
87
|
+
return handleProgrammaticSelection(selectorInfo, triggerNetworkTrace, verbose, bridgeClient, tabId);
|
|
88
|
+
} else {
|
|
89
|
+
return handleManualRetrieval(verbose, bridgeClient);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
// --- Programmatic selection (formerly select_element) ---
|
|
95
|
+
|
|
96
|
+
async function handleProgrammaticSelection(selectorInfo, triggerNetworkTrace, verbose, bridgeClient, tabId) {
|
|
97
|
+
// Hard guard: extension must be connected
|
|
98
|
+
if (!bridgeClient.isConnected) {
|
|
99
|
+
return formatHardGuard('get_element', {
|
|
100
|
+
missing: ['extension_connection'],
|
|
101
|
+
suggestions: ['Reload the webpage and ensure the Chrome extension is active']
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// CSS selector varsa önceden validate et (execute_action ile aynı yaklaşım)
|
|
106
|
+
if (selectorInfo.css) {
|
|
107
|
+
try {
|
|
108
|
+
const v = await runScript(VALIDATION_SCRIPT(selectorInfo.css), bridgeClient, 5000, { tabId });
|
|
109
|
+
if (v && !v.found) {
|
|
110
|
+
const altLines = formatAlternatives(v.alternatives);
|
|
111
|
+
|
|
112
|
+
// Semantic alternatives for heading tags — many modern SPAs (X.com, React apps)
|
|
113
|
+
// use [role=heading], [data-testid*="title"], or styled <div> elements instead of
|
|
114
|
+
// semantic <h1>/<h2>/<h3>. Suggest these so the AI can recover without guessing.
|
|
115
|
+
const semanticAlts = [];
|
|
116
|
+
if (/^h[1-6]$/i.test(selectorInfo.css)) {
|
|
117
|
+
const headingLevel = selectorInfo.css.toLowerCase();
|
|
118
|
+
semanticAlts.push(`[role="${headingLevel}"]`);
|
|
119
|
+
semanticAlts.push(`[role="heading"][aria-level="${headingLevel[1]}"]`);
|
|
120
|
+
semanticAlts.push(`[data-testid*="title"]`);
|
|
121
|
+
semanticAlts.push(`[aria-label*="title"]`);
|
|
122
|
+
semanticAlts.push(`[class*="title"]`);
|
|
123
|
+
}
|
|
124
|
+
const semanticHint = semanticAlts.length > 0
|
|
125
|
+
? `\n\nSemantic alternatives to try:\n${semanticAlts.map(a => ` - \`${a}\``).join('\n')}`
|
|
126
|
+
: '';
|
|
127
|
+
|
|
128
|
+
return {
|
|
129
|
+
content: [{
|
|
130
|
+
type: 'text',
|
|
131
|
+
text: `❌ Element not found: \`${selectorInfo.css}\`\n\nTROUBLESHOOTING:\n- Verify selector with execute_js: document.querySelector('${selectorInfo.css}')\n- Check that the element is visible\n- Try a different selector type (css → xpath or vice versa)${semanticHint}${altLines ? '\n\nSimilar elements on page:\n' + altLines : ''}`
|
|
132
|
+
}],
|
|
133
|
+
isError: true
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
} catch (_) { /* validation failure is non-blocking — proceed with select_element */ }
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const requestId = `sel-elem-${Date.now()}-${Math.floor(Math.random() * 1000)}`;
|
|
140
|
+
|
|
141
|
+
try {
|
|
142
|
+
// Queue element selection request via bridge daemon
|
|
143
|
+
await circuitBreakers.selectElement.execute(() =>
|
|
144
|
+
bridgeClient.queueRequest('select-element', {
|
|
145
|
+
selectorInfo, triggerNetworkTrace, id: requestId, ...(tabId ? { tabId } : {})
|
|
146
|
+
})
|
|
147
|
+
);
|
|
148
|
+
|
|
149
|
+
// Wait for result via bridge daemon polling
|
|
150
|
+
const resultItem = await bridgeClient.waitForResult('select-element', requestId, 10000);
|
|
151
|
+
|
|
152
|
+
if (!resultItem) {
|
|
153
|
+
return {
|
|
154
|
+
content: [{ type: 'text', text: `❌ Timeout: Extension did not respond within 10s. Check if extension is active.` }],
|
|
155
|
+
isError: true
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
if (!resultItem.success || resultItem.error) {
|
|
160
|
+
const isXPath = !!selectorInfo.xpath;
|
|
161
|
+
const verifyHint = isXPath
|
|
162
|
+
? `Verify selector with execute_js: const r = document.evaluate('${selectorInfo.xpath}', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null); r.singleNodeValue`
|
|
163
|
+
: `Verify selector with execute_js: document.querySelector('${selectorInfo.css}')`;
|
|
164
|
+
const otherSelector = isXPath ? 'css' : 'xpath';
|
|
165
|
+
return {
|
|
166
|
+
content: [{
|
|
167
|
+
type: 'text',
|
|
168
|
+
text: `❌ Element Selection Failed: ${resultItem.error || 'Unknown error'}\n\nTROUBLESHOOTING:\n- ${verifyHint}\n- Check that the element is visible\n- Try a different selector type (${isXPath ? 'xpath' : 'css'} → ${otherSelector})`
|
|
169
|
+
}],
|
|
170
|
+
isError: true
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const el = resultItem.element;
|
|
175
|
+
const selectorUsed = selectorInfo.css || selectorInfo.xpath || selectorInfo.text;
|
|
176
|
+
|
|
177
|
+
let text = `✅ Element Selected: **${el?.tagName?.toLowerCase() || 'element'}**\n\n`;
|
|
178
|
+
text += `**Selector used:** \`${selectorUsed}\`\n`;
|
|
179
|
+
text += `**CSS Selector:** \`${el?.cssSelector || 'N/A'}\`\n`;
|
|
180
|
+
|
|
181
|
+
// XPath: verbose only
|
|
182
|
+
if (verbose) {
|
|
183
|
+
text += `**XPath:** \`${el?.xpath || 'N/A'}\`\n`;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
if (el?.stableSelector) {
|
|
187
|
+
const meta = el?.stableSelectorMeta;
|
|
188
|
+
const conf = (verbose && meta?.confidence) ? ` _(${meta.confidence} — ${meta.reason})_` : '';
|
|
189
|
+
text += `**Stable Selector:** \`${el.stableSelector}\`${conf}\n`;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
if (el?.textContent) {
|
|
193
|
+
const limit = verbose ? 200 : 100;
|
|
194
|
+
text += `**Text:** ${el.textContent.substring(0, limit)}${el.textContent.length > limit ? '...' : ''}`;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
if (el?.attributes && Object.keys(el.attributes).length > 0) {
|
|
198
|
+
const attrLimit = verbose ? Object.keys(el.attributes).length : 3;
|
|
199
|
+
const attrs = Object.entries(el.attributes).slice(0, attrLimit).map(([k, v]) => `${k}="${v}"`).join(', ');
|
|
200
|
+
const extra = Object.keys(el.attributes).length > attrLimit ? ` +${Object.keys(el.attributes).length - attrLimit} more` : '';
|
|
201
|
+
text += `\n**Attributes:** ${attrs}${extra}`;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
text += triggerNetworkTrace
|
|
205
|
+
? `\n\n🔄 Network trace triggered. Call \`get_network_trace()\` to see API matches.`
|
|
206
|
+
: `\n\n💡 Next: Call \`get_network_trace()\` to find which API provides this element's data.`;
|
|
207
|
+
|
|
208
|
+
return { content: [{ type: 'text', text }] };
|
|
209
|
+
} catch (e) {
|
|
210
|
+
return {
|
|
211
|
+
isError: true,
|
|
212
|
+
content: [{
|
|
213
|
+
type: 'text',
|
|
214
|
+
text: `Error: ${e.message}\n\nREQUIRED STEPS:\n1. Verify selector syntax\n2. Make sure the element is present and the page is loaded`
|
|
215
|
+
}]
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// --- Manual retrieval (formerly get_selected_element) ---
|
|
221
|
+
|
|
222
|
+
async function handleManualRetrieval(verbose, bridgeClient) {
|
|
223
|
+
return await RateLimiter.executeWithRetry(
|
|
224
|
+
'get_element',
|
|
225
|
+
async () => {
|
|
226
|
+
const connValidation = StateValidator.validateConnection(bridgeClient);
|
|
227
|
+
if (!connValidation.valid) return StateValidator.formatValidationError(connValidation);
|
|
228
|
+
|
|
229
|
+
const validation = StateValidator.validateSelectedElement(bridgeClient);
|
|
230
|
+
if (!validation.valid) return StateValidator.formatValidationError(validation);
|
|
231
|
+
|
|
232
|
+
const el = validation.data;
|
|
233
|
+
|
|
234
|
+
let output = `✅ Selected Element Retrieved:\n\n`;
|
|
235
|
+
output += `📍 CSS: ${el.cssSelector}\n`;
|
|
236
|
+
if (verbose) output += `📍 XPath: ${el.xpath}\n`;
|
|
237
|
+
if (el.stableSelector) {
|
|
238
|
+
const meta = el.stableSelectorMeta;
|
|
239
|
+
const conf = (verbose && meta?.confidence) ? ` (${meta.confidence} — ${meta.reason})` : '';
|
|
240
|
+
output += `📍 Stable: ${el.stableSelector}${conf}\n`;
|
|
241
|
+
}
|
|
242
|
+
output += `🏷️ Tag: ${el.tagName}\n`;
|
|
243
|
+
|
|
244
|
+
if (Object.keys(el.attributes || {}).length > 0) {
|
|
245
|
+
const attrLimit = verbose ? Object.keys(el.attributes).length : 3;
|
|
246
|
+
output += `\n📋 Attributes:\n`;
|
|
247
|
+
Object.entries(el.attributes).slice(0, attrLimit).forEach(([key, value]) => {
|
|
248
|
+
output += ` ${key}: ${value.length > 50 ? value.substring(0, 50) + '...' : value}\n`;
|
|
249
|
+
});
|
|
250
|
+
const remaining = Object.keys(el.attributes).length - attrLimit;
|
|
251
|
+
if (remaining > 0) output += ` ... and ${remaining} more\n`;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
const analysis = bridgeClient.pageAnalysis;
|
|
255
|
+
if (analysis?.timestamp) {
|
|
256
|
+
const detections = [];
|
|
257
|
+
if (analysis.ssrDetected) detections.push('SSR');
|
|
258
|
+
if (analysis.initialStateDetected) detections.push('Initial State');
|
|
259
|
+
if (analysis.embeddedDataDetected) detections.push('Embedded JSON');
|
|
260
|
+
|
|
261
|
+
output += `\n🔍 **Data Source:** ${detections.length > 0 ? detections.join(', ') : 'Dynamic API load'} | Network: ${(analysis.networkCaptureRate || 0).toFixed(0)}%\n`;
|
|
262
|
+
|
|
263
|
+
if (verbose) {
|
|
264
|
+
if (analysis.recommendation) output += `💡 ${analysis.recommendation}\n`;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// sessionInfo: verbose only
|
|
269
|
+
if (verbose && el.sessionInfo) {
|
|
270
|
+
const si = el.sessionInfo;
|
|
271
|
+
output += `\n🔐 Session: ${si.cookiesDetected} cookies (${si.hasAuthCookie ? 'auth cookie found' : 'no auth cookie'}) | ${si.botProtection}\n`;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
const suggestions = suggestNextTools('get_element', bridgeClient);
|
|
275
|
+
output += formatWorkflowSuggestions(suggestions);
|
|
276
|
+
|
|
277
|
+
return { content: [{ type: 'text', text: output }] };
|
|
278
|
+
},
|
|
279
|
+
{ maxRetries: 3 }
|
|
280
|
+
);
|
|
281
|
+
}
|