@yusufffararatt/dombridge-mcp 2.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +559 -0
  2. package/bin/cli.js +88 -0
  3. package/package.json +54 -0
  4. package/src/bridge/http-server.js +290 -0
  5. package/src/bridge/middleware.js +56 -0
  6. package/src/bridge/routes.js +1003 -0
  7. package/src/bridge-daemon.js +172 -0
  8. package/src/cli/auto-config.js +120 -0
  9. package/src/constants.js +13 -0
  10. package/src/index.js +279 -0
  11. package/src/mcp-bridge.js +136 -0
  12. package/src/metrics/error-codes.js +44 -0
  13. package/src/metrics/index.js +3 -0
  14. package/src/metrics/metrics-db.js +269 -0
  15. package/src/metrics/metrics-recorder.js +240 -0
  16. package/src/metrics/metrics-report.js +146 -0
  17. package/src/profiles/profile-db.js +159 -0
  18. package/src/profiles/profile-enricher.js +333 -0
  19. package/src/profiles/profile-manager.js +563 -0
  20. package/src/profiles/profile-repo.js +183 -0
  21. package/src/state/bridge-client.js +272 -0
  22. package/src/state/bridge-persistence.js +205 -0
  23. package/src/state/cache.js +38 -0
  24. package/src/state/extension-state.js +321 -0
  25. package/src/tools/action_tools.js +218 -0
  26. package/src/tools/analyze-page.js +247 -0
  27. package/src/tools/debug-mcp-state.js +172 -0
  28. package/src/tools/discover-apis.js +186 -0
  29. package/src/tools/execute-js.js +284 -0
  30. package/src/tools/export-session.js +171 -0
  31. package/src/tools/extract-data.js +395 -0
  32. package/src/tools/get-element.js +281 -0
  33. package/src/tools/get-network-trace.js +471 -0
  34. package/src/tools/index.js +110 -0
  35. package/src/tools/manage-site-profile.js +153 -0
  36. package/src/tools/paginate.js +444 -0
  37. package/src/tools/quick-scan.js +418 -0
  38. package/src/tools/screenshot_tools.js +117 -0
  39. package/src/utils/circuit-breaker.js +112 -0
  40. package/src/utils/extract-density.js +21 -0
  41. package/src/utils/logger.js +31 -0
  42. package/src/utils/paginate-detector.js +24 -0
  43. package/src/utils/rate-limiter.js +244 -0
  44. package/src/utils/run-script.js +37 -0
  45. package/src/utils/selector-validator.js +95 -0
  46. package/src/utils/state-validator.js +354 -0
  47. package/src/utils/tab-resolver.js +70 -0
  48. package/src/utils/workflow-helper.js +292 -0
  49. package/src/utils/workflow-state.js +177 -0
@@ -0,0 +1,247 @@
1
+ /**
2
+ * Analyze Page Tool
3
+ * Sayfanın yapısını, framework'ünü ve veri kaynaklarını analiz eder.
4
+ *
5
+ * Phase 2.4: Refactored from (args, extensionData, httpPort) to (args, bridgeClient).
6
+ * All HTTP communication now goes through BridgeClient.
7
+ */
8
+
9
+ import { softGuardAnalyzePage, formatSoftGuard } from '../utils/workflow-helper.js';
10
+ import { enrichProfile } from '../profiles/profile-enricher.js';
11
+ import { extractDomain } from '../profiles/profile-manager.js';
12
+
13
+ export const analyzePageTool = {
14
+ name: 'analyze_page',
15
+ description: `This is a tool from the dombridge MCP server.
16
+ Analyze the current page's structure, frameworks, data containers, and SSR data — NO element selection needed.
17
+
18
+ WORKFLOW POSITION: 🟢 First Step - Run before anything else to understand the page.
19
+
20
+ RETURNS (default: summary):
21
+ - Detected frameworks, SSR markers, page title/URL
22
+ - Interactive element counts (forms, buttons, inputs)
23
+ - Data source signal (SSR / API-driven / unknown)
24
+ - DOM stats
25
+ - Top 3 captured API calls (when includeApis: true)
26
+
27
+ Use verbose: true for full detail: headings, all data containers, storageKeys, ssrData detail.
28
+
29
+ PARAMETERS:
30
+ - includeApis (optional, default: false): Also return top 3 captured API calls inline.
31
+ - verbose (optional, default: false): Return full detail instead of summary.
32
+ - tabId (optional): Target a specific tab by ID. Get IDs from debug_mcp_state().
33
+
34
+ MULTI-TAB: Call debug_mcp_state() first to get tab IDs, then pass tabId to analyze a specific tab.
35
+ Example: analyze_page({ tabId: 142, includeApis: true })
36
+
37
+ AUTONOMOUS WORKFLOW:
38
+ 1. analyze_page({ includeApis: true }) → page structure + API snapshot in one call
39
+ 2. get_element({ selectorInfo: { css: 'SELECTOR' } }) → pick a data element
40
+ 3. get_network_trace() → find matching API
41
+ `,
42
+ inputSchema: {
43
+ type: 'object',
44
+ properties: {
45
+ includeApis: {
46
+ type: 'boolean',
47
+ description: 'Also return top 3 captured API calls inline (default: false). Saves a separate discover_apis call.'
48
+ },
49
+ verbose: {
50
+ type: 'boolean',
51
+ description: 'Return full detail: headings, all data containers, storageKeys, ssrData (default: false)'
52
+ },
53
+ tabId: {
54
+ type: 'number',
55
+ description: 'Target tab ID (optional). Omit to use active tab. Get IDs from debug_mcp_state().'
56
+ }
57
+ }
58
+ },
59
+ handler: async (args, bridgeClient) => {
60
+ if (!bridgeClient.isConnected) {
61
+ return {
62
+ content: [{
63
+ type: 'text',
64
+ text: `❌ Error: Extension not connected.
65
+ REQUIRED STEPS:
66
+ 1. Reload webpage
67
+ 2. Ensure the Chrome extension is active`
68
+ }],
69
+ isError: true
70
+ };
71
+ }
72
+
73
+ const { includeApis = false, verbose = false, tabId } = args || {};
74
+ const requestId = `analyze-${Date.now()}-${Math.floor(Math.random() * 1000)}`;
75
+
76
+ // Soft guard: very recent scan on this tab — append hint to output, don't block
77
+ const softHints = softGuardAnalyzePage({ tabId });
78
+
79
+ try {
80
+ // Queue analyze-page request via bridge daemon
81
+ await bridgeClient.queueRequest('analyze-page', {
82
+ id: requestId,
83
+ ...(tabId !== undefined ? { tabId } : {})
84
+ });
85
+
86
+ // Wait for result (max 12 seconds + buffer)
87
+ const timeout = 12000;
88
+ const resultItem = await bridgeClient.waitForResult('analyze-page', requestId, timeout + 3000);
89
+
90
+ if (!resultItem) {
91
+ return {
92
+ content: [{
93
+ type: 'text',
94
+ text: `❌ Timeout: Page analysis did not complete within ${timeout}ms.`
95
+ }],
96
+ isError: true
97
+ };
98
+ }
99
+
100
+ const r = resultItem.result;
101
+
102
+ // Auto-save: profil'e framework + page karakteristiklerini kaydet
103
+ if (r && !r.error && r.meta?.url) {
104
+ const domain = extractDomain(r.meta.url);
105
+ if (domain) enrichProfile(domain, 'analyze_page', r);
106
+ }
107
+
108
+ if (r && r.error) {
109
+ return {
110
+ content: [{
111
+ type: 'text',
112
+ text: `❌ Analysis failed: ${r.error}`
113
+ }],
114
+ isError: true
115
+ };
116
+ }
117
+ if (!r) {
118
+ return {
119
+ content: [{
120
+ type: 'text',
121
+ text: `❌ Analysis failed: Script returned no data. The page may still be navigating or Chrome could not access the tab contents yet.`
122
+ }],
123
+ isError: true
124
+ };
125
+ }
126
+
127
+ // Format output
128
+ const lines = [
129
+ `🔍 **Page Analysis: ${r.meta?.title || r.meta?.url || 'Unknown Page'}**`,
130
+ `URL: ${r.meta?.url || 'N/A'}`,
131
+ ''
132
+ ];
133
+
134
+ // ── Summary mode (default) ───────────────────────────────
135
+ const hasSSR = r.ssrData && Object.keys(r.ssrData).length > 0;
136
+ const ssrKeys = hasSSR ? Object.keys(r.ssrData) : [];
137
+ const dataSignal = hasSSR ? `SSR (${ssrKeys.join(', ')})` : (r.dataContainers?.length > 0 ? 'API-driven' : 'unknown');
138
+
139
+ lines.push(`**🧩 Frameworks:** ${r.frameworks?.length ? r.frameworks.join(', ') : 'none / vanilla'}`);
140
+ lines.push(`**📦 Data source:** ${dataSignal}`);
141
+
142
+ if (r.elements) {
143
+ lines.push(`**🎯 Elements:** forms ${r.elements.forms} | inputs ${r.elements.inputs} | buttons ${r.elements.buttons} | links ${r.elements.links}`);
144
+ }
145
+
146
+ if (r.domStats) {
147
+ lines.push(`**🌳 DOM:** ${r.domStats.totalElements} elements, depth ${r.domStats.maxDepth}`);
148
+ }
149
+
150
+ // ── Verbose extras ───────────────────────────────────────
151
+ if (verbose) {
152
+ if (hasSSR) {
153
+ lines.push(`\n**📦 SSR Detail:** ${Object.entries(r.ssrData).map(([k, v]) => `${k}${typeof v === 'object' ? ': ' + JSON.stringify(v) : ''}`).join(', ')}`);
154
+ }
155
+
156
+ if (r.dataContainers?.length > 0) {
157
+ const ECOMMERCE_DOMAINS = ['trendyol', 'amazon', 'hepsiburada', 'shopify', 'etsy', 'ebay', 'n11', 'gittigidiyor', 'ciceksepeti'];
158
+ const pageHost = r.meta?.url || '';
159
+ const isEcommerce = ECOMMERCE_DOMAINS.some(d => pageHost.includes(d));
160
+ lines.push(`\n**📋 Data Containers:**`);
161
+ r.dataContainers.forEach(c => {
162
+ const isProductSelector = c.selector === '[class*="product"]';
163
+ const note = (isProductSelector && !isEcommerce) ? ' _(generic match — verify for non-ecommerce pages)_' : '';
164
+ lines.push(`- \`${c.selector}\`: ${c.count} elements${note}`);
165
+ });
166
+ }
167
+
168
+ if (r.headings?.length > 0) {
169
+ lines.push(`\n**📝 Headings:**`);
170
+ r.headings.forEach(h => lines.push(`- ${h.tag}: "${h.text}"`));
171
+ }
172
+
173
+ if (r.storageKeys?.local?.length > 0) {
174
+ lines.push(`\n**🗄️ LocalStorage:** ${r.storageKeys.local.join(', ')}`);
175
+ }
176
+ }
177
+
178
+ lines.push(`\n---\n💡 **Next steps:**`);
179
+ if (hasSSR) {
180
+ lines.push(`- SSR data detected! Use \`execute_js\` to read \`window.__NEXT_DATA__\` or \`window.__INITIAL_STATE__\` directly.`);
181
+ }
182
+ lines.push(`- \`get_element({ selectorInfo: { css: 'SELECTOR' } })\` → select a data element`);
183
+ lines.push(`- \`get_network_trace()\` → find matching API`);
184
+
185
+ // Inline API discovery when includeApis: true
186
+ if (includeApis) {
187
+ const apiLines = await fetchTopApis(bridgeClient, tabId);
188
+ lines.push(...apiLines);
189
+ }
190
+
191
+ // Soft guard hint (appended after content, non-blocking)
192
+ const softNote = formatSoftGuard(softHints);
193
+ if (softNote) lines.push(softNote);
194
+
195
+ return {
196
+ content: [{ type: 'text', text: lines.join('\n') }]
197
+ };
198
+
199
+ } catch (e) {
200
+ return {
201
+ content: [{ type: 'text', text: `❌ Error: ${e.message}` }],
202
+ isError: true
203
+ };
204
+ }
205
+ }
206
+ };
207
+
208
+ /**
209
+ * Fetch top 3 captured API calls inline (for includeApis: true)
210
+ * Uses BridgeClient for HTTP communication.
211
+ */
212
+ async function fetchTopApis(bridgeClient, tabId) {
213
+ const requestId = `raw-net-${Date.now()}-${Math.floor(Math.random() * 1000)}`;
214
+ const lines = [];
215
+
216
+ try {
217
+ await bridgeClient.queueRequest('raw-network-requests', {
218
+ method: 'all',
219
+ limit: 3,
220
+ includeBody: false,
221
+ id: requestId,
222
+ ...(tabId !== undefined ? { tabId } : {})
223
+ });
224
+
225
+ const resultItem = await bridgeClient.waitForResult('raw-network', requestId, 6000 + 2000);
226
+
227
+ if (!resultItem) {
228
+ // Bug #1 fix: be explicit about empty/no response
229
+ return ['\n**📡 API Discovery:** No network data available yet. Try `discover_apis()` after page interaction.'];
230
+ }
231
+
232
+ const requests = resultItem.requests || [];
233
+ if (requests.length === 0) {
234
+ return [`\n**📡 API Discovery:** No network requests captured for this tab (0 calls). Try \`discover_apis()\` after page interaction.`];
235
+ }
236
+
237
+ lines.push(`\n**📡 Top API Calls (${requests.length} of ${resultItem.total || requests.length} captured):**`);
238
+ requests.forEach((req, i) => {
239
+ lines.push(`- ${i + 1}. \`${req.method}\` ${req.url.length > 80 ? req.url.substring(0, 80) + '…' : req.url} ${req.status ? `[${req.status}]` : ''}`);
240
+ });
241
+ lines.push(`_Use \`discover_apis()\` for full list with filtering._`);
242
+ return lines;
243
+ } catch (e) {
244
+ // Non-critical — but report the error
245
+ return [`\n**📡 API Discovery:** Failed to fetch — ${e.message}`];
246
+ }
247
+ }
@@ -0,0 +1,172 @@
1
+ /**
2
+ * Tool: debug_mcp_state
3
+ * MCP server's internal state for debugging — enriched with connection health
4
+ */
5
+
6
+ import { readFileSync } from 'fs';
7
+ import { RateLimiter } from '../utils/rate-limiter.js';
8
+ import { circuitBreakers } from '../utils/circuit-breaker.js';
9
+
10
+ export const debugMcpStateTool = {
11
+ name: 'debug_mcp_state',
12
+ description: `Show MCP server's internal state for debugging. Also lists open browser tabs for multi-tab targeting.
13
+
14
+ WORKFLOW POSITION: 🔧 Debug/Setup Tool
15
+
16
+ WHEN TO CALL:
17
+ - Extension appears disconnected or tools are timing out
18
+ - Circuit breakers showing OPEN state
19
+ - BEFORE using tabId parameter in execute_js / execute_action / capture_screenshot / discover_apis
20
+ → This tool shows all open tab IDs so you can target the right one
21
+
22
+ RETURNS:
23
+ - Connection status + heartbeat health
24
+ - Open browser tabs: ID, URL, title (use these IDs for tabId parameter)
25
+ - Selected element state
26
+ - Circuit breaker states (CLOSED/OPEN/HALF_OPEN)
27
+ - Pending request queue
28
+
29
+ ⚠️ Do NOT call during normal single-tab workflows — use only when needed.`,
30
+ inputSchema: {
31
+ type: 'object',
32
+ properties: {},
33
+ required: []
34
+ },
35
+ handler: async (args, bridgeClient) => {
36
+ return await RateLimiter.executeWithRetry(
37
+ 'debug_mcp_state',
38
+ async () => {
39
+ const health = bridgeClient._connectionHealth
40
+ ? (typeof bridgeClient._connectionHealth.getStatus === 'function'
41
+ ? bridgeClient._connectionHealth.getStatus()
42
+ : bridgeClient._connectionHealth)
43
+ : { connected: bridgeClient.isConnected, stale: false };
44
+
45
+ const uptimeSeconds = Math.floor(process.uptime());
46
+
47
+ // Get version from package.json
48
+ let version = 'unknown';
49
+ try {
50
+ const pkgPath = new URL('../../package.json', import.meta.url);
51
+ const pkg = JSON.parse(readFileSync(pkgPath, 'utf8'));
52
+ version = pkg.version;
53
+ } catch {
54
+ // Fallback if file read fails
55
+ version = '2.6.3';
56
+ }
57
+
58
+ // Fetch tab list from extension via bridge daemon (best-effort with 3s timeout)
59
+ let tabList = null;
60
+ try {
61
+ const tabRequestId = `tabs-debug-${Date.now()}`;
62
+ await bridgeClient.queueRequest('tabs', { id: tabRequestId });
63
+ const tabResult = await bridgeClient.waitForResult('tabs', tabRequestId, 3000);
64
+ if (tabResult && tabResult.tabs) {
65
+ tabList = tabResult.tabs;
66
+ }
67
+ } catch { /* ignore — tab list is best-effort */ }
68
+
69
+ let output = `## MCP Server Internal State\n\n`;
70
+ output += `Server Version: ${version}\n`;
71
+ output += `HTTP Port: ${bridgeClient.port}\n`;
72
+ output += `Process Uptime: ${uptimeSeconds}s\n\n`;
73
+
74
+ // Open Tabs (for multi-tab targeting)
75
+ output += `## OPEN TABS\n\n`;
76
+ if (tabList && tabList.length > 0) {
77
+ output += `_Pass tabId to execute_js / execute_action / capture_screenshot / discover_apis to target a specific tab._\n\n`;
78
+ tabList.forEach(t => {
79
+ const activeFlag = t.active ? ' ← active' : '';
80
+ const title = t.title ? ` "${t.title.substring(0, 50)}"` : '';
81
+ output += `- [${t.id}] ${t.url}${title}${activeFlag}\n`;
82
+ });
83
+ } else if (!bridgeClient.isConnected) {
84
+ output += `_(Extension not connected — connect first to see tabs)_\n`;
85
+ } else {
86
+ output += `_(No tabs found or extension did not respond in time)_\n`;
87
+ }
88
+ output += `\n`;
89
+
90
+ // Connection Health
91
+ output += `## CONNECTION HEALTH\n\n`;
92
+ output += `Status: ${health.connected ? '🟢 Connected' : '🔴 Disconnected'}\n`;
93
+ output += `Stale: ${health.stale ? '⚠️ Yes (no heartbeat)' : '✅ No'}\n`;
94
+ output += `Last Heartbeat: ${health.lastHeartbeat || 'Never'}\n`;
95
+ output += `Heartbeat Count: ${health.heartbeatCount || 0}\n`;
96
+ output += `Connection Uptime: ${health.uptimeHuman || 'N/A'}\n`;
97
+ const reconnectCount = health.reconnectCount || 0;
98
+ const knownSessions = health.knownSessionCount || 0;
99
+ output += `Reconnect Count: ${reconnectCount}`;
100
+ if (knownSessions > 1) {
101
+ output += ` (${knownSessions} tabs/sessions detected — normal for multi-tab browsing)`;
102
+ }
103
+ output += `\n\n`;
104
+
105
+ // State Details
106
+ output += `## STATE DETAILS\n\n`;
107
+ const sel = bridgeClient.selectedElement;
108
+ if (sel) {
109
+ output += `Selected Element: Yes\n`;
110
+ output += ` CSS: ${sel.cssSelector}\n`;
111
+ if (sel.stableSelector) {
112
+ const meta = sel.stableSelectorMeta;
113
+ const conf = meta?.confidence ? ` (${meta.confidence} — ${meta.reason})` : '';
114
+ output += ` Stable: ${sel.stableSelector}${conf}\n`;
115
+ } else {
116
+ output += ` Stable: ⚠️ none — element has no stable attribute (data-testid/data-cy/data-qa-id/aria-label/id)\n`;
117
+ }
118
+ } else {
119
+ output += `Selected Element: No\n`;
120
+ }
121
+ output += `Network Trace Matches: ${bridgeClient.networkTrace.totalMatches || 0}\n`;
122
+ output += `WebSocket Matches: ${bridgeClient.websocketTrace?.totalMatches || 0}\n`;
123
+ output += `Element Value: ${bridgeClient.networkTrace.elementValue || 'N/A'}\n\n`;
124
+
125
+ // Pending Requests
126
+ output += `## PENDING REQUESTS\n\n`;
127
+ output += `JS Execution: ${bridgeClient.jsExecutionRequest ? '⏳ Pending' : '✅ None'}\n`;
128
+ output += `Action Queue: ${bridgeClient.actionExecutionRequest ? '⏳ Pending' : '✅ None'}\n`;
129
+ output += `Screenshot: ${bridgeClient.captureScreenshotRequest ? '⏳ Pending' : '✅ None'}\n`;
130
+ output += `Raw Network: ${(bridgeClient.rawNetworkRequests?.length || 0) > 0 ? `⏳ ${bridgeClient.rawNetworkRequests.length} pending` : '✅ None'}\n`;
131
+ output += `Analyze Page: ${(bridgeClient.analyzePageRequests?.length || 0) > 0 ? `⏳ ${bridgeClient.analyzePageRequests.length} pending` : '✅ None'}\n`;
132
+ output += `Select Element: ${bridgeClient.selectElementRequest ? '⏳ Pending' : '✅ None'}\n\n`;
133
+
134
+ // Circuit Breaker Status
135
+ output += `## CIRCUIT BREAKERS\n\n`;
136
+ Object.values(circuitBreakers).forEach(cb => {
137
+ const s = cb.getStatus();
138
+ const icon = s.state === 'CLOSED' ? '🟢' : s.state === 'OPEN' ? '🔴' : '🟡';
139
+ output += `${icon} ${s.name}: ${s.state} (failures: ${s.failures})\n`;
140
+ });
141
+ output += '\n';
142
+
143
+ // Recent Events
144
+ if (health.recentEvents && health.recentEvents.length > 0) {
145
+ output += `## RECENT EVENTS\n\n`;
146
+ health.recentEvents.forEach(event => {
147
+ output += `- ${event.time}: ${event.type}\n`;
148
+ });
149
+ output += '\n';
150
+ }
151
+
152
+ // Connection Info
153
+ output += `## CONNECTION INFO\n\n`;
154
+ output += `Extension → MCP Bridge: http://localhost:${bridgeClient.port}\n`;
155
+ output += `Heartbeat Endpoint: POST /api/heartbeat\n`;
156
+ output += `Health Check: GET /health`;
157
+
158
+ return {
159
+ content: [
160
+ {
161
+ type: 'text',
162
+ text: output
163
+ }
164
+ ]
165
+ };
166
+ },
167
+ {
168
+ maxRetries: 1
169
+ }
170
+ );
171
+ }
172
+ };
@@ -0,0 +1,186 @@
1
+ /**
2
+ * Discover APIs Tool
3
+ * Lists captured network calls and saves scraper-relevant metadata to the site profile.
4
+ */
5
+
6
+ import { circuitBreakers } from '../utils/circuit-breaker.js';
7
+ import { softGuardDiscoverApis, formatSoftGuard } from '../utils/workflow-helper.js';
8
+ import { resolveActiveDomain } from '../utils/tab-resolver.js';
9
+
10
+ export const discoverApisTool = {
11
+ name: 'discover_apis',
12
+ description: `This is a tool from the dombridge MCP server.
13
+ List all captured API calls on the current page WITHOUT requiring element selection.
14
+
15
+ WORKFLOW POSITION: First Step - use when you need live API discovery or want to refresh saved endpoint intelligence.
16
+
17
+ NOTE:
18
+ - Successful responses are auto-saved into the site profile.
19
+ - Use manage_site_profile({ action: 'load', domain: '...' }) to inspect the saved endpoint dossier afterward.
20
+
21
+ MULTI-TAB: Call debug_mcp_state() first to get tab IDs, then pass tabId to discover APIs on a specific tab.`,
22
+ inputSchema: {
23
+ type: 'object',
24
+ properties: {
25
+ urlPattern: {
26
+ type: 'string',
27
+ description: 'URL substring or regex pattern to filter results (e.g. "/api/", "graphql", "product")'
28
+ },
29
+ method: {
30
+ type: 'string',
31
+ enum: ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'all'],
32
+ description: 'HTTP method filter. Default: "all"'
33
+ },
34
+ limit: {
35
+ type: 'number',
36
+ description: 'Max number of results to return. Default: 3, max: 20.',
37
+ default: 3,
38
+ maximum: 20
39
+ },
40
+ includeBody: {
41
+ type: 'boolean',
42
+ description: 'Include request/response bodies in verbose mode. Default: false.'
43
+ },
44
+ verbose: {
45
+ type: 'boolean',
46
+ description: 'Return full detail: contentType, timestamp, request/response bodies (default: false)',
47
+ default: false
48
+ },
49
+ tabId: {
50
+ type: 'number',
51
+ description: 'Target tab ID (optional). Omit to use active tab.'
52
+ },
53
+ force: {
54
+ type: 'boolean',
55
+ description: 'Skip soft guard hint about a fresh profile and run discovery anyway.',
56
+ default: false
57
+ }
58
+ }
59
+ },
60
+ handler: async (args, bridgeClient) => {
61
+ if (!bridgeClient.isConnected) {
62
+ return {
63
+ content: [{
64
+ type: 'text',
65
+ text: 'Error: Extension not connected.\nREQUIRED STEPS:\n1. Reload webpage\n2. Ensure the Chrome extension is active'
66
+ }],
67
+ isError: true
68
+ };
69
+ }
70
+
71
+ const requestId = `raw-net-${Date.now()}-${Math.floor(Math.random() * 1000)}`;
72
+ const { urlPattern, method = 'all', includeBody = false, verbose = false, tabId, force = false } = args || {};
73
+ const limit = Math.min(args?.limit ?? 3, 20);
74
+ const domain = await resolveActiveDomain(bridgeClient, tabId);
75
+ const softHints = softGuardDiscoverApis(bridgeClient, { domain, force });
76
+
77
+ try {
78
+ await circuitBreakers.discoverApis.execute(() =>
79
+ bridgeClient.queueRequest('raw-network-requests', { urlPattern, method, limit, includeBody, id: requestId, ...(tabId !== undefined ? { tabId } : {}) })
80
+ );
81
+
82
+ const timeout = 20000;
83
+ const resultItem = await bridgeClient.waitForResult('raw-network', requestId, timeout);
84
+
85
+ if (resultItem) {
86
+
87
+ const allRequests = (resultItem.requests || []).sort((a, b) => {
88
+ const aOk = a.status >= 200 && a.status < 300 ? 0 : 1;
89
+ const bOk = b.status >= 200 && b.status < 300 ? 0 : 1;
90
+ if (aOk !== bOk) return aOk - bOk;
91
+ return (b.timestamp || 0) - (a.timestamp || 0);
92
+ });
93
+ const requests = allRequests.slice(0, limit);
94
+
95
+ if (requests.length === 0) {
96
+ return {
97
+ content: [{
98
+ type: 'text',
99
+ text: `No API calls captured yet${urlPattern ? ` matching "${urlPattern}"` : ''}.\n\nTIPS:\n- Interact with the page first (scroll, click)\n- Use execute_action to trigger lazy-loaded requests\n- Try without urlPattern to see the full capture`
100
+ }]
101
+ };
102
+ }
103
+
104
+ // Push captures to bridge state via HTTP POST (process boundary)
105
+ // so manage_site_profile (running in this same MCP process) can read
106
+ // them back via bridgeClient.getCapturedEndpoints(domain).
107
+ if (domain) {
108
+ for (const req of allRequests) {
109
+ try {
110
+ await bridgeClient.addCapturedEndpoint(domain, {
111
+ method: req.method,
112
+ url: req.url,
113
+ status: req.status,
114
+ contentType: req.contentType
115
+ });
116
+ } catch (_) {
117
+ // best-effort — don't fail the whole discovery
118
+ }
119
+ }
120
+ }
121
+
122
+ const lines = [
123
+ `API discovery: ${requests.length} call${requests.length !== 1 ? 's' : ''} shown (total captured: ${resultItem.total})`,
124
+ urlPattern ? `Filter: "${urlPattern}"` : '',
125
+ ''
126
+ ].filter(Boolean);
127
+
128
+ requests.forEach((request, index) => {
129
+ const urlDisplay = request.url.length > 80 ? `${request.url.substring(0, 80)}...` : request.url;
130
+ lines.push(`**${index + 1}. ${request.method} ${urlDisplay}** ${request.status ? `[${request.status}]` : ''}`);
131
+
132
+ if (verbose) {
133
+ if (request.contentType) lines.push(` Content-Type: ${request.contentType}`);
134
+ if (request.timestamp) lines.push(` Time: ${new Date(request.timestamp).toLocaleTimeString()}`);
135
+ if (includeBody && request.requestBody) {
136
+ const body = typeof request.requestBody === 'string'
137
+ ? request.requestBody
138
+ : JSON.stringify(request.requestBody, null, 2);
139
+ lines.push(` Request Body: ${body.substring(0, 500)}${body.length > 500 ? '...' : ''}`);
140
+ }
141
+ if (includeBody && request.responseBody) {
142
+ const body = typeof request.responseBody === 'string'
143
+ ? request.responseBody
144
+ : JSON.stringify(request.responseBody, null, 2);
145
+ lines.push(` Response: ${body.substring(0, 500)}${body.length > 500 ? '...' : ''}`);
146
+ }
147
+ }
148
+
149
+ lines.push('');
150
+ });
151
+
152
+ if (domain) {
153
+ lines.push(`Captured ${allRequests.length} endpoint${allRequests.length !== 1 ? 's' : ''}. Inspect: \`manage_site_profile({ action: "load", domain: "${domain}" })\``);
154
+ }
155
+ if (verbose && !includeBody) {
156
+ lines.push('💡 Add `includeBody: true` to see request/response bodies.');
157
+ }
158
+ lines.push('---');
159
+ lines.push('Next steps:');
160
+ lines.push('- `get_element({ css: "SELECTOR" })` -> pick the field you want to scrape');
161
+ lines.push('- `get_network_trace()` -> connect that field to the correct API response');
162
+ lines.push('- `discover_apis({ limit: 20 })` -> inspect more captured calls');
163
+
164
+ const softNote = formatSoftGuard(softHints);
165
+ if (softNote) lines.push(softNote);
166
+
167
+ return {
168
+ content: [{ type: 'text', text: lines.join('\n') }]
169
+ };
170
+ }
171
+
172
+ return {
173
+ content: [{
174
+ type: 'text',
175
+ text: `Timeout: Extension did not respond within ${timeout}ms. Check if the extension is active and the page has finished loading.`
176
+ }],
177
+ isError: true
178
+ };
179
+ } catch (e) {
180
+ return {
181
+ content: [{ type: 'text', text: `Error: ${e.message}` }],
182
+ isError: true
183
+ };
184
+ }
185
+ }
186
+ };