@yusufffararatt/dombridge-mcp 2.7.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +559 -0
- package/bin/cli.js +88 -0
- package/package.json +54 -0
- package/src/bridge/http-server.js +290 -0
- package/src/bridge/middleware.js +56 -0
- package/src/bridge/routes.js +1003 -0
- package/src/bridge-daemon.js +172 -0
- package/src/cli/auto-config.js +120 -0
- package/src/constants.js +13 -0
- package/src/index.js +279 -0
- package/src/mcp-bridge.js +136 -0
- package/src/metrics/error-codes.js +44 -0
- package/src/metrics/index.js +3 -0
- package/src/metrics/metrics-db.js +269 -0
- package/src/metrics/metrics-recorder.js +240 -0
- package/src/metrics/metrics-report.js +146 -0
- package/src/profiles/profile-db.js +159 -0
- package/src/profiles/profile-enricher.js +333 -0
- package/src/profiles/profile-manager.js +563 -0
- package/src/profiles/profile-repo.js +183 -0
- package/src/state/bridge-client.js +272 -0
- package/src/state/bridge-persistence.js +205 -0
- package/src/state/cache.js +38 -0
- package/src/state/extension-state.js +321 -0
- package/src/tools/action_tools.js +218 -0
- package/src/tools/analyze-page.js +247 -0
- package/src/tools/debug-mcp-state.js +172 -0
- package/src/tools/discover-apis.js +186 -0
- package/src/tools/execute-js.js +284 -0
- package/src/tools/export-session.js +171 -0
- package/src/tools/extract-data.js +395 -0
- package/src/tools/get-element.js +281 -0
- package/src/tools/get-network-trace.js +471 -0
- package/src/tools/index.js +110 -0
- package/src/tools/manage-site-profile.js +153 -0
- package/src/tools/paginate.js +444 -0
- package/src/tools/quick-scan.js +418 -0
- package/src/tools/screenshot_tools.js +117 -0
- package/src/utils/circuit-breaker.js +112 -0
- package/src/utils/extract-density.js +21 -0
- package/src/utils/logger.js +31 -0
- package/src/utils/paginate-detector.js +24 -0
- package/src/utils/rate-limiter.js +244 -0
- package/src/utils/run-script.js +37 -0
- package/src/utils/selector-validator.js +95 -0
- package/src/utils/state-validator.js +354 -0
- package/src/utils/tab-resolver.js +70 -0
- package/src/utils/workflow-helper.js +292 -0
- package/src/utils/workflow-state.js +177 -0
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Analyze Page Tool
|
|
3
|
+
* Sayfanın yapısını, framework'ünü ve veri kaynaklarını analiz eder.
|
|
4
|
+
*
|
|
5
|
+
* Phase 2.4: Refactored from (args, extensionData, httpPort) to (args, bridgeClient).
|
|
6
|
+
* All HTTP communication now goes through BridgeClient.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { softGuardAnalyzePage, formatSoftGuard } from '../utils/workflow-helper.js';
|
|
10
|
+
import { enrichProfile } from '../profiles/profile-enricher.js';
|
|
11
|
+
import { extractDomain } from '../profiles/profile-manager.js';
|
|
12
|
+
|
|
13
|
+
export const analyzePageTool = {
|
|
14
|
+
name: 'analyze_page',
|
|
15
|
+
description: `This is a tool from the dombridge MCP server.
|
|
16
|
+
Analyze the current page's structure, frameworks, data containers, and SSR data — NO element selection needed.
|
|
17
|
+
|
|
18
|
+
WORKFLOW POSITION: 🟢 First Step - Run before anything else to understand the page.
|
|
19
|
+
|
|
20
|
+
RETURNS (default: summary):
|
|
21
|
+
- Detected frameworks, SSR markers, page title/URL
|
|
22
|
+
- Interactive element counts (forms, buttons, inputs)
|
|
23
|
+
- Data source signal (SSR / API-driven / unknown)
|
|
24
|
+
- DOM stats
|
|
25
|
+
- Top 3 captured API calls (when includeApis: true)
|
|
26
|
+
|
|
27
|
+
Use verbose: true for full detail: headings, all data containers, storageKeys, ssrData detail.
|
|
28
|
+
|
|
29
|
+
PARAMETERS:
|
|
30
|
+
- includeApis (optional, default: false): Also return top 3 captured API calls inline.
|
|
31
|
+
- verbose (optional, default: false): Return full detail instead of summary.
|
|
32
|
+
- tabId (optional): Target a specific tab by ID. Get IDs from debug_mcp_state().
|
|
33
|
+
|
|
34
|
+
MULTI-TAB: Call debug_mcp_state() first to get tab IDs, then pass tabId to analyze a specific tab.
|
|
35
|
+
Example: analyze_page({ tabId: 142, includeApis: true })
|
|
36
|
+
|
|
37
|
+
AUTONOMOUS WORKFLOW:
|
|
38
|
+
1. analyze_page({ includeApis: true }) → page structure + API snapshot in one call
|
|
39
|
+
2. get_element({ selectorInfo: { css: 'SELECTOR' } }) → pick a data element
|
|
40
|
+
3. get_network_trace() → find matching API
|
|
41
|
+
`,
|
|
42
|
+
inputSchema: {
|
|
43
|
+
type: 'object',
|
|
44
|
+
properties: {
|
|
45
|
+
includeApis: {
|
|
46
|
+
type: 'boolean',
|
|
47
|
+
description: 'Also return top 3 captured API calls inline (default: false). Saves a separate discover_apis call.'
|
|
48
|
+
},
|
|
49
|
+
verbose: {
|
|
50
|
+
type: 'boolean',
|
|
51
|
+
description: 'Return full detail: headings, all data containers, storageKeys, ssrData (default: false)'
|
|
52
|
+
},
|
|
53
|
+
tabId: {
|
|
54
|
+
type: 'number',
|
|
55
|
+
description: 'Target tab ID (optional). Omit to use active tab. Get IDs from debug_mcp_state().'
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
},
|
|
59
|
+
handler: async (args, bridgeClient) => {
|
|
60
|
+
if (!bridgeClient.isConnected) {
|
|
61
|
+
return {
|
|
62
|
+
content: [{
|
|
63
|
+
type: 'text',
|
|
64
|
+
text: `❌ Error: Extension not connected.
|
|
65
|
+
REQUIRED STEPS:
|
|
66
|
+
1. Reload webpage
|
|
67
|
+
2. Ensure the Chrome extension is active`
|
|
68
|
+
}],
|
|
69
|
+
isError: true
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const { includeApis = false, verbose = false, tabId } = args || {};
|
|
74
|
+
const requestId = `analyze-${Date.now()}-${Math.floor(Math.random() * 1000)}`;
|
|
75
|
+
|
|
76
|
+
// Soft guard: very recent scan on this tab — append hint to output, don't block
|
|
77
|
+
const softHints = softGuardAnalyzePage({ tabId });
|
|
78
|
+
|
|
79
|
+
try {
|
|
80
|
+
// Queue analyze-page request via bridge daemon
|
|
81
|
+
await bridgeClient.queueRequest('analyze-page', {
|
|
82
|
+
id: requestId,
|
|
83
|
+
...(tabId !== undefined ? { tabId } : {})
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
// Wait for result (max 12 seconds + buffer)
|
|
87
|
+
const timeout = 12000;
|
|
88
|
+
const resultItem = await bridgeClient.waitForResult('analyze-page', requestId, timeout + 3000);
|
|
89
|
+
|
|
90
|
+
if (!resultItem) {
|
|
91
|
+
return {
|
|
92
|
+
content: [{
|
|
93
|
+
type: 'text',
|
|
94
|
+
text: `❌ Timeout: Page analysis did not complete within ${timeout}ms.`
|
|
95
|
+
}],
|
|
96
|
+
isError: true
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const r = resultItem.result;
|
|
101
|
+
|
|
102
|
+
// Auto-save: profil'e framework + page karakteristiklerini kaydet
|
|
103
|
+
if (r && !r.error && r.meta?.url) {
|
|
104
|
+
const domain = extractDomain(r.meta.url);
|
|
105
|
+
if (domain) enrichProfile(domain, 'analyze_page', r);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if (r && r.error) {
|
|
109
|
+
return {
|
|
110
|
+
content: [{
|
|
111
|
+
type: 'text',
|
|
112
|
+
text: `❌ Analysis failed: ${r.error}`
|
|
113
|
+
}],
|
|
114
|
+
isError: true
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
if (!r) {
|
|
118
|
+
return {
|
|
119
|
+
content: [{
|
|
120
|
+
type: 'text',
|
|
121
|
+
text: `❌ Analysis failed: Script returned no data. The page may still be navigating or Chrome could not access the tab contents yet.`
|
|
122
|
+
}],
|
|
123
|
+
isError: true
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// Format output
|
|
128
|
+
const lines = [
|
|
129
|
+
`🔍 **Page Analysis: ${r.meta?.title || r.meta?.url || 'Unknown Page'}**`,
|
|
130
|
+
`URL: ${r.meta?.url || 'N/A'}`,
|
|
131
|
+
''
|
|
132
|
+
];
|
|
133
|
+
|
|
134
|
+
// ── Summary mode (default) ───────────────────────────────
|
|
135
|
+
const hasSSR = r.ssrData && Object.keys(r.ssrData).length > 0;
|
|
136
|
+
const ssrKeys = hasSSR ? Object.keys(r.ssrData) : [];
|
|
137
|
+
const dataSignal = hasSSR ? `SSR (${ssrKeys.join(', ')})` : (r.dataContainers?.length > 0 ? 'API-driven' : 'unknown');
|
|
138
|
+
|
|
139
|
+
lines.push(`**🧩 Frameworks:** ${r.frameworks?.length ? r.frameworks.join(', ') : 'none / vanilla'}`);
|
|
140
|
+
lines.push(`**📦 Data source:** ${dataSignal}`);
|
|
141
|
+
|
|
142
|
+
if (r.elements) {
|
|
143
|
+
lines.push(`**🎯 Elements:** forms ${r.elements.forms} | inputs ${r.elements.inputs} | buttons ${r.elements.buttons} | links ${r.elements.links}`);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
if (r.domStats) {
|
|
147
|
+
lines.push(`**🌳 DOM:** ${r.domStats.totalElements} elements, depth ${r.domStats.maxDepth}`);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// ── Verbose extras ───────────────────────────────────────
|
|
151
|
+
if (verbose) {
|
|
152
|
+
if (hasSSR) {
|
|
153
|
+
lines.push(`\n**📦 SSR Detail:** ${Object.entries(r.ssrData).map(([k, v]) => `${k}${typeof v === 'object' ? ': ' + JSON.stringify(v) : ''}`).join(', ')}`);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
if (r.dataContainers?.length > 0) {
|
|
157
|
+
const ECOMMERCE_DOMAINS = ['trendyol', 'amazon', 'hepsiburada', 'shopify', 'etsy', 'ebay', 'n11', 'gittigidiyor', 'ciceksepeti'];
|
|
158
|
+
const pageHost = r.meta?.url || '';
|
|
159
|
+
const isEcommerce = ECOMMERCE_DOMAINS.some(d => pageHost.includes(d));
|
|
160
|
+
lines.push(`\n**📋 Data Containers:**`);
|
|
161
|
+
r.dataContainers.forEach(c => {
|
|
162
|
+
const isProductSelector = c.selector === '[class*="product"]';
|
|
163
|
+
const note = (isProductSelector && !isEcommerce) ? ' _(generic match — verify for non-ecommerce pages)_' : '';
|
|
164
|
+
lines.push(`- \`${c.selector}\`: ${c.count} elements${note}`);
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
if (r.headings?.length > 0) {
|
|
169
|
+
lines.push(`\n**📝 Headings:**`);
|
|
170
|
+
r.headings.forEach(h => lines.push(`- ${h.tag}: "${h.text}"`));
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
if (r.storageKeys?.local?.length > 0) {
|
|
174
|
+
lines.push(`\n**🗄️ LocalStorage:** ${r.storageKeys.local.join(', ')}`);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
lines.push(`\n---\n💡 **Next steps:**`);
|
|
179
|
+
if (hasSSR) {
|
|
180
|
+
lines.push(`- SSR data detected! Use \`execute_js\` to read \`window.__NEXT_DATA__\` or \`window.__INITIAL_STATE__\` directly.`);
|
|
181
|
+
}
|
|
182
|
+
lines.push(`- \`get_element({ selectorInfo: { css: 'SELECTOR' } })\` → select a data element`);
|
|
183
|
+
lines.push(`- \`get_network_trace()\` → find matching API`);
|
|
184
|
+
|
|
185
|
+
// Inline API discovery when includeApis: true
|
|
186
|
+
if (includeApis) {
|
|
187
|
+
const apiLines = await fetchTopApis(bridgeClient, tabId);
|
|
188
|
+
lines.push(...apiLines);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Soft guard hint (appended after content, non-blocking)
|
|
192
|
+
const softNote = formatSoftGuard(softHints);
|
|
193
|
+
if (softNote) lines.push(softNote);
|
|
194
|
+
|
|
195
|
+
return {
|
|
196
|
+
content: [{ type: 'text', text: lines.join('\n') }]
|
|
197
|
+
};
|
|
198
|
+
|
|
199
|
+
} catch (e) {
|
|
200
|
+
return {
|
|
201
|
+
content: [{ type: 'text', text: `❌ Error: ${e.message}` }],
|
|
202
|
+
isError: true
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
};
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Fetch top 3 captured API calls inline (for includeApis: true)
|
|
210
|
+
* Uses BridgeClient for HTTP communication.
|
|
211
|
+
*/
|
|
212
|
+
async function fetchTopApis(bridgeClient, tabId) {
|
|
213
|
+
const requestId = `raw-net-${Date.now()}-${Math.floor(Math.random() * 1000)}`;
|
|
214
|
+
const lines = [];
|
|
215
|
+
|
|
216
|
+
try {
|
|
217
|
+
await bridgeClient.queueRequest('raw-network-requests', {
|
|
218
|
+
method: 'all',
|
|
219
|
+
limit: 3,
|
|
220
|
+
includeBody: false,
|
|
221
|
+
id: requestId,
|
|
222
|
+
...(tabId !== undefined ? { tabId } : {})
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
const resultItem = await bridgeClient.waitForResult('raw-network', requestId, 6000 + 2000);
|
|
226
|
+
|
|
227
|
+
if (!resultItem) {
|
|
228
|
+
// Bug #1 fix: be explicit about empty/no response
|
|
229
|
+
return ['\n**📡 API Discovery:** No network data available yet. Try `discover_apis()` after page interaction.'];
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
const requests = resultItem.requests || [];
|
|
233
|
+
if (requests.length === 0) {
|
|
234
|
+
return [`\n**📡 API Discovery:** No network requests captured for this tab (0 calls). Try \`discover_apis()\` after page interaction.`];
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
lines.push(`\n**📡 Top API Calls (${requests.length} of ${resultItem.total || requests.length} captured):**`);
|
|
238
|
+
requests.forEach((req, i) => {
|
|
239
|
+
lines.push(`- ${i + 1}. \`${req.method}\` ${req.url.length > 80 ? req.url.substring(0, 80) + '…' : req.url} ${req.status ? `[${req.status}]` : ''}`);
|
|
240
|
+
});
|
|
241
|
+
lines.push(`_Use \`discover_apis()\` for full list with filtering._`);
|
|
242
|
+
return lines;
|
|
243
|
+
} catch (e) {
|
|
244
|
+
// Non-critical — but report the error
|
|
245
|
+
return [`\n**📡 API Discovery:** Failed to fetch — ${e.message}`];
|
|
246
|
+
}
|
|
247
|
+
}
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool: debug_mcp_state
|
|
3
|
+
* MCP server's internal state for debugging — enriched with connection health
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { readFileSync } from 'fs';
|
|
7
|
+
import { RateLimiter } from '../utils/rate-limiter.js';
|
|
8
|
+
import { circuitBreakers } from '../utils/circuit-breaker.js';
|
|
9
|
+
|
|
10
|
+
export const debugMcpStateTool = {
|
|
11
|
+
name: 'debug_mcp_state',
|
|
12
|
+
description: `Show MCP server's internal state for debugging. Also lists open browser tabs for multi-tab targeting.
|
|
13
|
+
|
|
14
|
+
WORKFLOW POSITION: 🔧 Debug/Setup Tool
|
|
15
|
+
|
|
16
|
+
WHEN TO CALL:
|
|
17
|
+
- Extension appears disconnected or tools are timing out
|
|
18
|
+
- Circuit breakers showing OPEN state
|
|
19
|
+
- BEFORE using tabId parameter in execute_js / execute_action / capture_screenshot / discover_apis
|
|
20
|
+
→ This tool shows all open tab IDs so you can target the right one
|
|
21
|
+
|
|
22
|
+
RETURNS:
|
|
23
|
+
- Connection status + heartbeat health
|
|
24
|
+
- Open browser tabs: ID, URL, title (use these IDs for tabId parameter)
|
|
25
|
+
- Selected element state
|
|
26
|
+
- Circuit breaker states (CLOSED/OPEN/HALF_OPEN)
|
|
27
|
+
- Pending request queue
|
|
28
|
+
|
|
29
|
+
⚠️ Do NOT call during normal single-tab workflows — use only when needed.`,
|
|
30
|
+
inputSchema: {
|
|
31
|
+
type: 'object',
|
|
32
|
+
properties: {},
|
|
33
|
+
required: []
|
|
34
|
+
},
|
|
35
|
+
handler: async (args, bridgeClient) => {
|
|
36
|
+
return await RateLimiter.executeWithRetry(
|
|
37
|
+
'debug_mcp_state',
|
|
38
|
+
async () => {
|
|
39
|
+
const health = bridgeClient._connectionHealth
|
|
40
|
+
? (typeof bridgeClient._connectionHealth.getStatus === 'function'
|
|
41
|
+
? bridgeClient._connectionHealth.getStatus()
|
|
42
|
+
: bridgeClient._connectionHealth)
|
|
43
|
+
: { connected: bridgeClient.isConnected, stale: false };
|
|
44
|
+
|
|
45
|
+
const uptimeSeconds = Math.floor(process.uptime());
|
|
46
|
+
|
|
47
|
+
// Get version from package.json
|
|
48
|
+
let version = 'unknown';
|
|
49
|
+
try {
|
|
50
|
+
const pkgPath = new URL('../../package.json', import.meta.url);
|
|
51
|
+
const pkg = JSON.parse(readFileSync(pkgPath, 'utf8'));
|
|
52
|
+
version = pkg.version;
|
|
53
|
+
} catch {
|
|
54
|
+
// Fallback if file read fails
|
|
55
|
+
version = '2.6.3';
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Fetch tab list from extension via bridge daemon (best-effort with 3s timeout)
|
|
59
|
+
let tabList = null;
|
|
60
|
+
try {
|
|
61
|
+
const tabRequestId = `tabs-debug-${Date.now()}`;
|
|
62
|
+
await bridgeClient.queueRequest('tabs', { id: tabRequestId });
|
|
63
|
+
const tabResult = await bridgeClient.waitForResult('tabs', tabRequestId, 3000);
|
|
64
|
+
if (tabResult && tabResult.tabs) {
|
|
65
|
+
tabList = tabResult.tabs;
|
|
66
|
+
}
|
|
67
|
+
} catch { /* ignore — tab list is best-effort */ }
|
|
68
|
+
|
|
69
|
+
let output = `## MCP Server Internal State\n\n`;
|
|
70
|
+
output += `Server Version: ${version}\n`;
|
|
71
|
+
output += `HTTP Port: ${bridgeClient.port}\n`;
|
|
72
|
+
output += `Process Uptime: ${uptimeSeconds}s\n\n`;
|
|
73
|
+
|
|
74
|
+
// Open Tabs (for multi-tab targeting)
|
|
75
|
+
output += `## OPEN TABS\n\n`;
|
|
76
|
+
if (tabList && tabList.length > 0) {
|
|
77
|
+
output += `_Pass tabId to execute_js / execute_action / capture_screenshot / discover_apis to target a specific tab._\n\n`;
|
|
78
|
+
tabList.forEach(t => {
|
|
79
|
+
const activeFlag = t.active ? ' ← active' : '';
|
|
80
|
+
const title = t.title ? ` "${t.title.substring(0, 50)}"` : '';
|
|
81
|
+
output += `- [${t.id}] ${t.url}${title}${activeFlag}\n`;
|
|
82
|
+
});
|
|
83
|
+
} else if (!bridgeClient.isConnected) {
|
|
84
|
+
output += `_(Extension not connected — connect first to see tabs)_\n`;
|
|
85
|
+
} else {
|
|
86
|
+
output += `_(No tabs found or extension did not respond in time)_\n`;
|
|
87
|
+
}
|
|
88
|
+
output += `\n`;
|
|
89
|
+
|
|
90
|
+
// Connection Health
|
|
91
|
+
output += `## CONNECTION HEALTH\n\n`;
|
|
92
|
+
output += `Status: ${health.connected ? '🟢 Connected' : '🔴 Disconnected'}\n`;
|
|
93
|
+
output += `Stale: ${health.stale ? '⚠️ Yes (no heartbeat)' : '✅ No'}\n`;
|
|
94
|
+
output += `Last Heartbeat: ${health.lastHeartbeat || 'Never'}\n`;
|
|
95
|
+
output += `Heartbeat Count: ${health.heartbeatCount || 0}\n`;
|
|
96
|
+
output += `Connection Uptime: ${health.uptimeHuman || 'N/A'}\n`;
|
|
97
|
+
const reconnectCount = health.reconnectCount || 0;
|
|
98
|
+
const knownSessions = health.knownSessionCount || 0;
|
|
99
|
+
output += `Reconnect Count: ${reconnectCount}`;
|
|
100
|
+
if (knownSessions > 1) {
|
|
101
|
+
output += ` (${knownSessions} tabs/sessions detected — normal for multi-tab browsing)`;
|
|
102
|
+
}
|
|
103
|
+
output += `\n\n`;
|
|
104
|
+
|
|
105
|
+
// State Details
|
|
106
|
+
output += `## STATE DETAILS\n\n`;
|
|
107
|
+
const sel = bridgeClient.selectedElement;
|
|
108
|
+
if (sel) {
|
|
109
|
+
output += `Selected Element: Yes\n`;
|
|
110
|
+
output += ` CSS: ${sel.cssSelector}\n`;
|
|
111
|
+
if (sel.stableSelector) {
|
|
112
|
+
const meta = sel.stableSelectorMeta;
|
|
113
|
+
const conf = meta?.confidence ? ` (${meta.confidence} — ${meta.reason})` : '';
|
|
114
|
+
output += ` Stable: ${sel.stableSelector}${conf}\n`;
|
|
115
|
+
} else {
|
|
116
|
+
output += ` Stable: ⚠️ none — element has no stable attribute (data-testid/data-cy/data-qa-id/aria-label/id)\n`;
|
|
117
|
+
}
|
|
118
|
+
} else {
|
|
119
|
+
output += `Selected Element: No\n`;
|
|
120
|
+
}
|
|
121
|
+
output += `Network Trace Matches: ${bridgeClient.networkTrace.totalMatches || 0}\n`;
|
|
122
|
+
output += `WebSocket Matches: ${bridgeClient.websocketTrace?.totalMatches || 0}\n`;
|
|
123
|
+
output += `Element Value: ${bridgeClient.networkTrace.elementValue || 'N/A'}\n\n`;
|
|
124
|
+
|
|
125
|
+
// Pending Requests
|
|
126
|
+
output += `## PENDING REQUESTS\n\n`;
|
|
127
|
+
output += `JS Execution: ${bridgeClient.jsExecutionRequest ? '⏳ Pending' : '✅ None'}\n`;
|
|
128
|
+
output += `Action Queue: ${bridgeClient.actionExecutionRequest ? '⏳ Pending' : '✅ None'}\n`;
|
|
129
|
+
output += `Screenshot: ${bridgeClient.captureScreenshotRequest ? '⏳ Pending' : '✅ None'}\n`;
|
|
130
|
+
output += `Raw Network: ${(bridgeClient.rawNetworkRequests?.length || 0) > 0 ? `⏳ ${bridgeClient.rawNetworkRequests.length} pending` : '✅ None'}\n`;
|
|
131
|
+
output += `Analyze Page: ${(bridgeClient.analyzePageRequests?.length || 0) > 0 ? `⏳ ${bridgeClient.analyzePageRequests.length} pending` : '✅ None'}\n`;
|
|
132
|
+
output += `Select Element: ${bridgeClient.selectElementRequest ? '⏳ Pending' : '✅ None'}\n\n`;
|
|
133
|
+
|
|
134
|
+
// Circuit Breaker Status
|
|
135
|
+
output += `## CIRCUIT BREAKERS\n\n`;
|
|
136
|
+
Object.values(circuitBreakers).forEach(cb => {
|
|
137
|
+
const s = cb.getStatus();
|
|
138
|
+
const icon = s.state === 'CLOSED' ? '🟢' : s.state === 'OPEN' ? '🔴' : '🟡';
|
|
139
|
+
output += `${icon} ${s.name}: ${s.state} (failures: ${s.failures})\n`;
|
|
140
|
+
});
|
|
141
|
+
output += '\n';
|
|
142
|
+
|
|
143
|
+
// Recent Events
|
|
144
|
+
if (health.recentEvents && health.recentEvents.length > 0) {
|
|
145
|
+
output += `## RECENT EVENTS\n\n`;
|
|
146
|
+
health.recentEvents.forEach(event => {
|
|
147
|
+
output += `- ${event.time}: ${event.type}\n`;
|
|
148
|
+
});
|
|
149
|
+
output += '\n';
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Connection Info
|
|
153
|
+
output += `## CONNECTION INFO\n\n`;
|
|
154
|
+
output += `Extension → MCP Bridge: http://localhost:${bridgeClient.port}\n`;
|
|
155
|
+
output += `Heartbeat Endpoint: POST /api/heartbeat\n`;
|
|
156
|
+
output += `Health Check: GET /health`;
|
|
157
|
+
|
|
158
|
+
return {
|
|
159
|
+
content: [
|
|
160
|
+
{
|
|
161
|
+
type: 'text',
|
|
162
|
+
text: output
|
|
163
|
+
}
|
|
164
|
+
]
|
|
165
|
+
};
|
|
166
|
+
},
|
|
167
|
+
{
|
|
168
|
+
maxRetries: 1
|
|
169
|
+
}
|
|
170
|
+
);
|
|
171
|
+
}
|
|
172
|
+
};
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Discover APIs Tool
|
|
3
|
+
* Lists captured network calls and saves scraper-relevant metadata to the site profile.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { circuitBreakers } from '../utils/circuit-breaker.js';
|
|
7
|
+
import { softGuardDiscoverApis, formatSoftGuard } from '../utils/workflow-helper.js';
|
|
8
|
+
import { resolveActiveDomain } from '../utils/tab-resolver.js';
|
|
9
|
+
|
|
10
|
+
export const discoverApisTool = {
|
|
11
|
+
name: 'discover_apis',
|
|
12
|
+
description: `This is a tool from the dombridge MCP server.
|
|
13
|
+
List all captured API calls on the current page WITHOUT requiring element selection.
|
|
14
|
+
|
|
15
|
+
WORKFLOW POSITION: First Step - use when you need live API discovery or want to refresh saved endpoint intelligence.
|
|
16
|
+
|
|
17
|
+
NOTE:
|
|
18
|
+
- Successful responses are auto-saved into the site profile.
|
|
19
|
+
- Use manage_site_profile({ action: 'load', domain: '...' }) to inspect the saved endpoint dossier afterward.
|
|
20
|
+
|
|
21
|
+
MULTI-TAB: Call debug_mcp_state() first to get tab IDs, then pass tabId to discover APIs on a specific tab.`,
|
|
22
|
+
inputSchema: {
|
|
23
|
+
type: 'object',
|
|
24
|
+
properties: {
|
|
25
|
+
urlPattern: {
|
|
26
|
+
type: 'string',
|
|
27
|
+
description: 'URL substring or regex pattern to filter results (e.g. "/api/", "graphql", "product")'
|
|
28
|
+
},
|
|
29
|
+
method: {
|
|
30
|
+
type: 'string',
|
|
31
|
+
enum: ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'all'],
|
|
32
|
+
description: 'HTTP method filter. Default: "all"'
|
|
33
|
+
},
|
|
34
|
+
limit: {
|
|
35
|
+
type: 'number',
|
|
36
|
+
description: 'Max number of results to return. Default: 3, max: 20.',
|
|
37
|
+
default: 3,
|
|
38
|
+
maximum: 20
|
|
39
|
+
},
|
|
40
|
+
includeBody: {
|
|
41
|
+
type: 'boolean',
|
|
42
|
+
description: 'Include request/response bodies in verbose mode. Default: false.'
|
|
43
|
+
},
|
|
44
|
+
verbose: {
|
|
45
|
+
type: 'boolean',
|
|
46
|
+
description: 'Return full detail: contentType, timestamp, request/response bodies (default: false)',
|
|
47
|
+
default: false
|
|
48
|
+
},
|
|
49
|
+
tabId: {
|
|
50
|
+
type: 'number',
|
|
51
|
+
description: 'Target tab ID (optional). Omit to use active tab.'
|
|
52
|
+
},
|
|
53
|
+
force: {
|
|
54
|
+
type: 'boolean',
|
|
55
|
+
description: 'Skip soft guard hint about a fresh profile and run discovery anyway.',
|
|
56
|
+
default: false
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
},
|
|
60
|
+
handler: async (args, bridgeClient) => {
|
|
61
|
+
if (!bridgeClient.isConnected) {
|
|
62
|
+
return {
|
|
63
|
+
content: [{
|
|
64
|
+
type: 'text',
|
|
65
|
+
text: 'Error: Extension not connected.\nREQUIRED STEPS:\n1. Reload webpage\n2. Ensure the Chrome extension is active'
|
|
66
|
+
}],
|
|
67
|
+
isError: true
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const requestId = `raw-net-${Date.now()}-${Math.floor(Math.random() * 1000)}`;
|
|
72
|
+
const { urlPattern, method = 'all', includeBody = false, verbose = false, tabId, force = false } = args || {};
|
|
73
|
+
const limit = Math.min(args?.limit ?? 3, 20);
|
|
74
|
+
const domain = await resolveActiveDomain(bridgeClient, tabId);
|
|
75
|
+
const softHints = softGuardDiscoverApis(bridgeClient, { domain, force });
|
|
76
|
+
|
|
77
|
+
try {
|
|
78
|
+
await circuitBreakers.discoverApis.execute(() =>
|
|
79
|
+
bridgeClient.queueRequest('raw-network-requests', { urlPattern, method, limit, includeBody, id: requestId, ...(tabId !== undefined ? { tabId } : {}) })
|
|
80
|
+
);
|
|
81
|
+
|
|
82
|
+
const timeout = 20000;
|
|
83
|
+
const resultItem = await bridgeClient.waitForResult('raw-network', requestId, timeout);
|
|
84
|
+
|
|
85
|
+
if (resultItem) {
|
|
86
|
+
|
|
87
|
+
const allRequests = (resultItem.requests || []).sort((a, b) => {
|
|
88
|
+
const aOk = a.status >= 200 && a.status < 300 ? 0 : 1;
|
|
89
|
+
const bOk = b.status >= 200 && b.status < 300 ? 0 : 1;
|
|
90
|
+
if (aOk !== bOk) return aOk - bOk;
|
|
91
|
+
return (b.timestamp || 0) - (a.timestamp || 0);
|
|
92
|
+
});
|
|
93
|
+
const requests = allRequests.slice(0, limit);
|
|
94
|
+
|
|
95
|
+
if (requests.length === 0) {
|
|
96
|
+
return {
|
|
97
|
+
content: [{
|
|
98
|
+
type: 'text',
|
|
99
|
+
text: `No API calls captured yet${urlPattern ? ` matching "${urlPattern}"` : ''}.\n\nTIPS:\n- Interact with the page first (scroll, click)\n- Use execute_action to trigger lazy-loaded requests\n- Try without urlPattern to see the full capture`
|
|
100
|
+
}]
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Push captures to bridge state via HTTP POST (process boundary)
|
|
105
|
+
// so manage_site_profile (running in this same MCP process) can read
|
|
106
|
+
// them back via bridgeClient.getCapturedEndpoints(domain).
|
|
107
|
+
if (domain) {
|
|
108
|
+
for (const req of allRequests) {
|
|
109
|
+
try {
|
|
110
|
+
await bridgeClient.addCapturedEndpoint(domain, {
|
|
111
|
+
method: req.method,
|
|
112
|
+
url: req.url,
|
|
113
|
+
status: req.status,
|
|
114
|
+
contentType: req.contentType
|
|
115
|
+
});
|
|
116
|
+
} catch (_) {
|
|
117
|
+
// best-effort — don't fail the whole discovery
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const lines = [
|
|
123
|
+
`API discovery: ${requests.length} call${requests.length !== 1 ? 's' : ''} shown (total captured: ${resultItem.total})`,
|
|
124
|
+
urlPattern ? `Filter: "${urlPattern}"` : '',
|
|
125
|
+
''
|
|
126
|
+
].filter(Boolean);
|
|
127
|
+
|
|
128
|
+
requests.forEach((request, index) => {
|
|
129
|
+
const urlDisplay = request.url.length > 80 ? `${request.url.substring(0, 80)}...` : request.url;
|
|
130
|
+
lines.push(`**${index + 1}. ${request.method} ${urlDisplay}** ${request.status ? `[${request.status}]` : ''}`);
|
|
131
|
+
|
|
132
|
+
if (verbose) {
|
|
133
|
+
if (request.contentType) lines.push(` Content-Type: ${request.contentType}`);
|
|
134
|
+
if (request.timestamp) lines.push(` Time: ${new Date(request.timestamp).toLocaleTimeString()}`);
|
|
135
|
+
if (includeBody && request.requestBody) {
|
|
136
|
+
const body = typeof request.requestBody === 'string'
|
|
137
|
+
? request.requestBody
|
|
138
|
+
: JSON.stringify(request.requestBody, null, 2);
|
|
139
|
+
lines.push(` Request Body: ${body.substring(0, 500)}${body.length > 500 ? '...' : ''}`);
|
|
140
|
+
}
|
|
141
|
+
if (includeBody && request.responseBody) {
|
|
142
|
+
const body = typeof request.responseBody === 'string'
|
|
143
|
+
? request.responseBody
|
|
144
|
+
: JSON.stringify(request.responseBody, null, 2);
|
|
145
|
+
lines.push(` Response: ${body.substring(0, 500)}${body.length > 500 ? '...' : ''}`);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
lines.push('');
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
if (domain) {
|
|
153
|
+
lines.push(`Captured ${allRequests.length} endpoint${allRequests.length !== 1 ? 's' : ''}. Inspect: \`manage_site_profile({ action: "load", domain: "${domain}" })\``);
|
|
154
|
+
}
|
|
155
|
+
if (verbose && !includeBody) {
|
|
156
|
+
lines.push('💡 Add `includeBody: true` to see request/response bodies.');
|
|
157
|
+
}
|
|
158
|
+
lines.push('---');
|
|
159
|
+
lines.push('Next steps:');
|
|
160
|
+
lines.push('- `get_element({ css: "SELECTOR" })` -> pick the field you want to scrape');
|
|
161
|
+
lines.push('- `get_network_trace()` -> connect that field to the correct API response');
|
|
162
|
+
lines.push('- `discover_apis({ limit: 20 })` -> inspect more captured calls');
|
|
163
|
+
|
|
164
|
+
const softNote = formatSoftGuard(softHints);
|
|
165
|
+
if (softNote) lines.push(softNote);
|
|
166
|
+
|
|
167
|
+
return {
|
|
168
|
+
content: [{ type: 'text', text: lines.join('\n') }]
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
return {
|
|
173
|
+
content: [{
|
|
174
|
+
type: 'text',
|
|
175
|
+
text: `Timeout: Extension did not respond within ${timeout}ms. Check if the extension is active and the page has finished loading.`
|
|
176
|
+
}],
|
|
177
|
+
isError: true
|
|
178
|
+
};
|
|
179
|
+
} catch (e) {
|
|
180
|
+
return {
|
|
181
|
+
content: [{ type: 'text', text: `Error: ${e.message}` }],
|
|
182
|
+
isError: true
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
};
|