@yusufffararatt/dombridge-mcp 2.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +559 -0
  2. package/bin/cli.js +88 -0
  3. package/package.json +54 -0
  4. package/src/bridge/http-server.js +290 -0
  5. package/src/bridge/middleware.js +56 -0
  6. package/src/bridge/routes.js +1003 -0
  7. package/src/bridge-daemon.js +172 -0
  8. package/src/cli/auto-config.js +120 -0
  9. package/src/constants.js +13 -0
  10. package/src/index.js +279 -0
  11. package/src/mcp-bridge.js +136 -0
  12. package/src/metrics/error-codes.js +44 -0
  13. package/src/metrics/index.js +3 -0
  14. package/src/metrics/metrics-db.js +269 -0
  15. package/src/metrics/metrics-recorder.js +240 -0
  16. package/src/metrics/metrics-report.js +146 -0
  17. package/src/profiles/profile-db.js +159 -0
  18. package/src/profiles/profile-enricher.js +333 -0
  19. package/src/profiles/profile-manager.js +563 -0
  20. package/src/profiles/profile-repo.js +183 -0
  21. package/src/state/bridge-client.js +272 -0
  22. package/src/state/bridge-persistence.js +205 -0
  23. package/src/state/cache.js +38 -0
  24. package/src/state/extension-state.js +321 -0
  25. package/src/tools/action_tools.js +218 -0
  26. package/src/tools/analyze-page.js +247 -0
  27. package/src/tools/debug-mcp-state.js +172 -0
  28. package/src/tools/discover-apis.js +186 -0
  29. package/src/tools/execute-js.js +284 -0
  30. package/src/tools/export-session.js +171 -0
  31. package/src/tools/extract-data.js +395 -0
  32. package/src/tools/get-element.js +281 -0
  33. package/src/tools/get-network-trace.js +471 -0
  34. package/src/tools/index.js +110 -0
  35. package/src/tools/manage-site-profile.js +153 -0
  36. package/src/tools/paginate.js +444 -0
  37. package/src/tools/quick-scan.js +418 -0
  38. package/src/tools/screenshot_tools.js +117 -0
  39. package/src/utils/circuit-breaker.js +112 -0
  40. package/src/utils/extract-density.js +21 -0
  41. package/src/utils/logger.js +31 -0
  42. package/src/utils/paginate-detector.js +24 -0
  43. package/src/utils/rate-limiter.js +244 -0
  44. package/src/utils/run-script.js +37 -0
  45. package/src/utils/selector-validator.js +95 -0
  46. package/src/utils/state-validator.js +354 -0
  47. package/src/utils/tab-resolver.js +70 -0
  48. package/src/utils/workflow-helper.js +292 -0
  49. package/src/utils/workflow-state.js +177 -0
@@ -0,0 +1,146 @@
1
+ /**
2
+ * CLI Report Generator
3
+ * Reads from MetricsDB and produces formatted tables/JSON for npm run metrics.
4
+ */
5
+ export class MetricsReport {
6
+ constructor(metricsDB) {
7
+ this.db = metricsDB;
8
+ }
9
+
10
+ getOverview(days = 30) {
11
+ if (!this.db.available) return [];
12
+ const since = new Date(Date.now() - days * 86400000).toISOString();
13
+ const rows = this.db.db.prepare(`
14
+ SELECT tool,
15
+ COUNT(*) as total_calls,
16
+ SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) as success_count,
17
+ SUM(CASE WHEN success = 0 THEN 1 ELSE 0 END) as fail_count,
18
+ AVG(duration_ms) as avg_duration_ms,
19
+ MAX(duration_ms) as max_duration_ms
20
+ FROM tool_calls
21
+ WHERE timestamp >= ?
22
+ GROUP BY tool
23
+ ORDER BY fail_count DESC, total_calls DESC
24
+ `).all(since);
25
+
26
+ return rows.map(r => ({
27
+ tool: r.tool,
28
+ total: r.total_calls,
29
+ success: r.success_count,
30
+ fail: r.fail_count,
31
+ fail_rate: r.total_calls > 0 ? ((r.fail_count / r.total_calls) * 100).toFixed(1) + '%' : '0%',
32
+ avg_duration_ms: r.avg_duration_ms ? Math.round(r.avg_duration_ms) : null,
33
+ max_duration_ms: r.max_duration_ms,
34
+ }));
35
+ }
36
+
37
+ getToolDetail(toolName, days = 30) {
38
+ if (!this.db.available) return null;
39
+ const since = new Date(Date.now() - days * 86400000).toISOString();
40
+
41
+ const byAction = this.db.db.prepare(`
42
+ SELECT action, COUNT(*) as total,
43
+ SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) as success_count,
44
+ SUM(CASE WHEN success = 0 THEN 1 ELSE 0 END) as fail_count,
45
+ AVG(duration_ms) as avg_duration_ms
46
+ FROM tool_calls
47
+ WHERE tool = ? AND timestamp >= ? AND action IS NOT NULL
48
+ GROUP BY action
49
+ ORDER BY fail_count DESC
50
+ `).all(toolName, since);
51
+
52
+ const byDomain = this.db.db.prepare(`
53
+ SELECT domain, COUNT(*) as total,
54
+ SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) as success_count,
55
+ SUM(CASE WHEN success = 0 THEN 1 ELSE 0 END) as fail_count
56
+ FROM tool_calls
57
+ WHERE tool = ? AND timestamp >= ?
58
+ GROUP BY domain
59
+ ORDER BY fail_count DESC
60
+ `).all(toolName, since);
61
+
62
+ const byResultType = this.db.db.prepare(`
63
+ SELECT result_type, COUNT(*) as count
64
+ FROM tool_calls
65
+ WHERE tool = ? AND timestamp >= ?
66
+ GROUP BY result_type
67
+ ORDER BY count DESC
68
+ `).all(toolName, since);
69
+
70
+ const topErrors = this.db.db.prepare(`
71
+ SELECT error_code, COUNT(*) as count
72
+ FROM tool_calls
73
+ WHERE tool = ? AND timestamp >= ? AND error_code IS NOT NULL
74
+ GROUP BY error_code
75
+ ORDER BY count DESC
76
+ LIMIT 10
77
+ `).all(toolName, since);
78
+
79
+ return {
80
+ tool: toolName,
81
+ actions: byAction.map(r => ({
82
+ action: r.action,
83
+ total: r.total,
84
+ success: r.success_count,
85
+ fail: r.fail_count,
86
+ fail_rate: r.total > 0 ? ((r.fail_count / r.total) * 100).toFixed(1) + '%' : '0%',
87
+ avg_duration_ms: r.avg_duration_ms ? Math.round(r.avg_duration_ms) : null,
88
+ })),
89
+ domains: byDomain.map(r => ({
90
+ domain: r.domain || '(none)',
91
+ total: r.total,
92
+ success: r.success_count,
93
+ fail: r.fail_count,
94
+ fail_rate: r.total > 0 ? ((r.fail_count / r.total) * 100).toFixed(1) + '%' : '0%',
95
+ })),
96
+ result_types: byResultType,
97
+ top_errors: topErrors,
98
+ };
99
+ }
100
+
101
+ getConnectionEvents(days = 7) {
102
+ if (!this.db.available) return { disconnections: 0, avg_downtime_ms: 0, max_downtime_ms: 0, total_retries: 0, events: [] };
103
+
104
+ const since = new Date(Date.now() - days * 86400000).toISOString();
105
+
106
+ const stats = this.db.db.prepare(`
107
+ SELECT
108
+ COUNT(CASE WHEN event_type = 'disconnected' THEN 1 END) as disconnections,
109
+ AVG(CASE WHEN event_type = 'reconnected' AND duration_ms IS NOT NULL THEN duration_ms END) as avg_downtime,
110
+ MAX(CASE WHEN event_type = 'reconnected' AND duration_ms IS NOT NULL THEN duration_ms END) as max_downtime,
111
+ SUM(CASE WHEN retry_count IS NOT NULL THEN retry_count ELSE 0 END) as total_retries
112
+ FROM connection_events
113
+ WHERE timestamp >= ?
114
+ `).get(since);
115
+
116
+ const recentEvents = this.db.db.prepare(`
117
+ SELECT * FROM connection_events
118
+ WHERE timestamp >= ?
119
+ ORDER BY timestamp DESC
120
+ LIMIT 20
121
+ `).all(since);
122
+
123
+ return {
124
+ disconnections: stats.disconnections || 0,
125
+ avg_downtime_ms: stats.avg_downtime ? Math.round(stats.avg_downtime) : 0,
126
+ max_downtime_ms: stats.max_downtime || 0,
127
+ total_retries: stats.total_retries || 0,
128
+ events: recentEvents,
129
+ };
130
+ }
131
+
132
+ getErrors(days = 30) {
133
+ if (!this.db.available) return [];
134
+ const since = new Date(Date.now() - days * 86400000).toISOString();
135
+
136
+ return this.db.db.prepare(`
137
+ SELECT error_code, tool, domain, COUNT(*) as count,
138
+ AVG(duration_ms) as avg_duration_ms
139
+ FROM tool_calls
140
+ WHERE timestamp >= ? AND error_code IS NOT NULL
141
+ GROUP BY error_code, tool, domain
142
+ ORDER BY count DESC
143
+ LIMIT 50
144
+ `).all(since);
145
+ }
146
+ }
@@ -0,0 +1,159 @@
1
+ /**
2
+ * ProfileDB — SQLite-backed storage for site profiles.
3
+ *
4
+ * Pattern mirror: mcp-server/src/metrics/metrics-db.js
5
+ * - WAL mode for concurrent reads/writes
6
+ * - Graceful degradation if init fails
7
+ * - better-sqlite3 prepared statements
8
+ */
9
+ import Database from 'better-sqlite3';
10
+ import { mkdirSync, existsSync } from 'fs';
11
+ import { dirname } from 'path';
12
+
13
+ const CREATE_SITE_PROFILES = `
14
+ CREATE TABLE IF NOT EXISTS site_profiles (
15
+ domain TEXT PRIMARY KEY,
16
+ framework TEXT,
17
+ page_type TEXT,
18
+ notes TEXT,
19
+ stable_selectors_json TEXT,
20
+ auth_info_json TEXT,
21
+ version INTEGER DEFAULT 1,
22
+ first_seen_at INTEGER,
23
+ last_seen_at INTEGER
24
+ )`;
25
+
26
+ const CREATE_ENDPOINTS = `
27
+ CREATE TABLE IF NOT EXISTS endpoints (
28
+ id INTEGER PRIMARY KEY,
29
+ domain TEXT NOT NULL,
30
+ method TEXT NOT NULL,
31
+ url TEXT NOT NULL,
32
+ status INTEGER,
33
+ content_type TEXT,
34
+ first_seen_at INTEGER,
35
+ last_seen_at INTEGER,
36
+ hit_count INTEGER DEFAULT 1,
37
+ FOREIGN KEY (domain) REFERENCES site_profiles(domain) ON DELETE CASCADE,
38
+ UNIQUE(domain, method, url)
39
+ )`;
40
+
41
+ const CREATE_PATHS = `
42
+ CREATE TABLE IF NOT EXISTS paths (
43
+ id INTEGER PRIMARY KEY,
44
+ domain TEXT NOT NULL,
45
+ path TEXT NOT NULL,
46
+ source_key TEXT,
47
+ example_value TEXT,
48
+ FOREIGN KEY (domain) REFERENCES site_profiles(domain) ON DELETE CASCADE
49
+ )`;
50
+
51
+ const CREATE_IDX_ENDPOINTS = `CREATE INDEX IF NOT EXISTS idx_endpoints_domain ON endpoints(domain)`;
52
+ const CREATE_IDX_PATHS = `CREATE INDEX IF NOT EXISTS idx_paths_domain ON paths(domain)`;
53
+
54
+ export class ProfileDB {
55
+ constructor(dbPath) {
56
+ const dir = dirname(dbPath);
57
+ if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
58
+
59
+ this.db = new Database(dbPath);
60
+ this.db.pragma('journal_mode = WAL');
61
+ this.db.pragma('foreign_keys = ON');
62
+ this._migrate();
63
+ }
64
+
65
+ _migrate() {
66
+ this.db.exec(CREATE_SITE_PROFILES);
67
+ this.db.exec(CREATE_ENDPOINTS);
68
+ this.db.exec(CREATE_PATHS);
69
+ this.db.exec(CREATE_IDX_ENDPOINTS);
70
+ this.db.exec(CREATE_IDX_PATHS);
71
+ }
72
+
73
+ close() {
74
+ this.db.close();
75
+ }
76
+
77
+ // ── Inspection helpers (used by tests) ─────────────────────────────
78
+ listTables() {
79
+ return this.db.prepare(
80
+ "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name"
81
+ ).all().map(row => row.name);
82
+ }
83
+
84
+ getTableInfo(table) {
85
+ return this.db.prepare(`PRAGMA table_info(${table})`).all();
86
+ }
87
+
88
+ getIndexList(table) {
89
+ return this.db.prepare(`PRAGMA index_list(${table})`).all();
90
+ }
91
+
92
+ // ── Profile CRUD ───────────────────────────────────────────────────
93
+ upsertProfile({ domain, framework, pageType, notes }) {
94
+ const now = Date.now();
95
+ this.db.prepare(`
96
+ INSERT INTO site_profiles (domain, framework, page_type, notes, version, first_seen_at, last_seen_at)
97
+ VALUES (?, ?, ?, ?, 1, ?, ?)
98
+ ON CONFLICT(domain) DO UPDATE SET
99
+ framework = COALESCE(excluded.framework, framework),
100
+ page_type = COALESCE(excluded.page_type, page_type),
101
+ notes = excluded.notes,
102
+ version = version + 1,
103
+ last_seen_at = excluded.last_seen_at
104
+ `).run(domain, framework ?? null, pageType ?? null, notes ?? '', now, now);
105
+ }
106
+
107
+ getProfile(domain) {
108
+ const row = this.db.prepare(
109
+ 'SELECT * FROM site_profiles WHERE domain = ?'
110
+ ).get(domain);
111
+ return row || null;
112
+ }
113
+
114
+ deleteProfile(domain) {
115
+ this.db.prepare('DELETE FROM site_profiles WHERE domain = ?').run(domain);
116
+ }
117
+
118
+ // ── Endpoint CRUD ──────────────────────────────────────────────────
119
+ upsertEndpoint(domain, { method, url, status, contentType, firstSeenAt, lastSeenAt }) {
120
+ const now = Date.now();
121
+ this.db.prepare(`
122
+ INSERT INTO endpoints (domain, method, url, status, content_type, first_seen_at, last_seen_at, hit_count)
123
+ VALUES (?, ?, ?, ?, ?, ?, ?, 1)
124
+ ON CONFLICT(domain, method, url) DO UPDATE SET
125
+ status = COALESCE(excluded.status, status),
126
+ content_type = COALESCE(excluded.content_type, content_type),
127
+ last_seen_at = excluded.last_seen_at,
128
+ hit_count = hit_count + 1
129
+ `).run(
130
+ domain,
131
+ (method || 'GET').toUpperCase(),
132
+ url,
133
+ status ?? null,
134
+ contentType ?? null,
135
+ firstSeenAt ?? now,
136
+ lastSeenAt ?? now
137
+ );
138
+ }
139
+
140
+ countEndpoints(domain) {
141
+ return this.db.prepare(
142
+ 'SELECT COUNT(*) AS c FROM endpoints WHERE domain = ?'
143
+ ).get(domain).c;
144
+ }
145
+
146
+ // ── Path CRUD ──────────────────────────────────────────────────────
147
+ upsertPath(domain, { path, sourceKey, exampleValue }) {
148
+ this.db.prepare(`
149
+ INSERT INTO paths (domain, path, source_key, example_value)
150
+ VALUES (?, ?, ?, ?)
151
+ `).run(domain, path, sourceKey ?? null, exampleValue ?? null);
152
+ }
153
+
154
+ countPaths(domain) {
155
+ return this.db.prepare(
156
+ 'SELECT COUNT(*) AS c FROM paths WHERE domain = ?'
157
+ ).get(domain).c;
158
+ }
159
+ }
@@ -0,0 +1,333 @@
1
+ /**
2
+ * Profile Enricher
3
+ * Converts raw tool results into profile-friendly structured data.
4
+ */
5
+
6
+ import { saveProfile } from './profile-manager.js';
7
+ import { markFieldFresh } from '../utils/workflow-state.js';
8
+
9
+ const AUTH_HEADER_PATTERN = /^(authorization|x-api-key|x-auth-token|x-access-token|bearer|cookie|set-cookie|x-csrf-token|x-session)$/i;
10
+ const SENSITIVE_VALUE_PATTERN = /token|secret|password|credential|api[_-]?key|session[_-]?id|auth[_-]?token|bearer|private/i;
11
+ const PRIORITY_SEGMENTS = ['price', 'amount', 'currency', 'title', 'name', 'slug', 'sku', 'stock', 'image', 'id', 'rating', 'pagination', 'page', 'next', 'results', 'items'];
12
+ const MAX_SAMPLE_PATHS_PER_SOURCE = 8;
13
+ const MAX_SAVED_KNOWN_PATHS = 10;
14
+ const MAX_SAVED_ENDPOINTS = 10;
15
+
16
+ function isoNow() {
17
+ return new Date().toISOString();
18
+ }
19
+
20
+ function sanitizeForProfile(data) {
21
+ if (!data || typeof data !== 'object') return data;
22
+ if (Array.isArray(data)) return data.map(sanitizeForProfile);
23
+
24
+ const result = {};
25
+ for (const [key, value] of Object.entries(data)) {
26
+ if (SENSITIVE_VALUE_PATTERN.test(key)) {
27
+ result[key] = '***';
28
+ } else if (Array.isArray(value)) {
29
+ result[key] = value.map(sanitizeForProfile);
30
+ } else if (typeof value === 'object' && value !== null) {
31
+ result[key] = sanitizeForProfile(value);
32
+ } else {
33
+ result[key] = value;
34
+ }
35
+ }
36
+ return result;
37
+ }
38
+
39
+ function note(source, text, kind = 'summary') {
40
+ return {
41
+ source,
42
+ kind,
43
+ timestamp: isoNow(),
44
+ text
45
+ };
46
+ }
47
+
48
+ function scorePath(entry) {
49
+ const path = String(entry?.path || '').toLowerCase();
50
+ const priorityHits = PRIORITY_SEGMENTS.reduce((sum, segment) => sum + Number(path.includes(segment)), 0);
51
+ const exampleBonus = entry?.example ? 3 : 0;
52
+ const typeBonus = ['string', 'number', 'boolean'].includes(entry?.type) ? 2 : 0;
53
+ return priorityHits * 10 + exampleBonus + typeBonus - path.length / 200;
54
+ }
55
+
56
+ function scoreEndpoint(request) {
57
+ const url = String(request?.url || '').toLowerCase();
58
+ const contentType = String(request?.contentType || '').toLowerCase();
59
+ const priorityHits = PRIORITY_SEGMENTS.reduce((sum, segment) => sum + Number(url.includes(segment)), 0);
60
+ const jsonBonus = contentType.includes('json') ? 8 : 0;
61
+ const graphqlBonus = request?.graphql?.operationName ? 6 : 0;
62
+ const successBonus = request?.status >= 200 && request?.status < 300 ? 5 : 0;
63
+ const queryPenalty = (url.match(/[?&]/g) || []).length;
64
+ return priorityHits * 10 + jsonBonus + graphqlBonus + successBonus - queryPenalty - url.length / 300;
65
+ }
66
+
67
+ function pickImportantPaths(paths, sourceKey, originTool) {
68
+ return (paths || [])
69
+ .map((entry) => ({
70
+ path: entry.path,
71
+ type: entry.type || 'unknown',
72
+ example: entry.example ? String(entry.example).substring(0, 80) : null,
73
+ sourceKey,
74
+ originTool,
75
+ confidence: scorePath(entry)
76
+ }))
77
+ .sort((a, b) => (b.confidence || 0) - (a.confidence || 0))
78
+ .slice(0, MAX_SAMPLE_PATHS_PER_SOURCE);
79
+ }
80
+
81
+ function detectAuthInfoFromHeaders(requests) {
82
+ const authHeaders = requests
83
+ .flatMap((request) => Object.keys(request.requestHeaders || {}))
84
+ .filter((header) => AUTH_HEADER_PATTERN.test(header));
85
+
86
+ if (authHeaders.length === 0) return null;
87
+
88
+ const headerName = authHeaders[0];
89
+ const type = /cookie/i.test(headerName)
90
+ ? 'cookie'
91
+ : (/authorization/i.test(headerName) ? 'bearer' : 'apiKey');
92
+
93
+ return { type, headerName };
94
+ }
95
+
96
+ function inferPageCharacteristics(result) {
97
+ const ssrKeys = result.ssrData ? Object.keys(result.ssrData) : [];
98
+ const hasSSR = ssrKeys.length > 0;
99
+ const hasApiSignal = Array.isArray(result.dataContainers) && result.dataContainers.length > 0;
100
+
101
+ return {
102
+ type: hasSSR ? 'SSR' : (hasApiSignal ? 'SPA' : 'unknown'),
103
+ signals: ssrKeys,
104
+ forms: result.elements?.forms ?? 0,
105
+ inputs: result.elements?.inputs ?? 0,
106
+ buttons: result.elements?.buttons ?? 0,
107
+ links: result.elements?.links ?? 0,
108
+ domElements: result.domStats?.totalElements ?? null,
109
+ maxDepth: result.domStats?.maxDepth ?? null
110
+ };
111
+ }
112
+
113
+ function extractFromAnalyzePage(result) {
114
+ if (!result) return {};
115
+
116
+ const frameworks = Array.isArray(result.frameworks) ? result.frameworks : [];
117
+ const pageCharacteristics = inferPageCharacteristics(result);
118
+ const ssrKeys = pageCharacteristics.signals || [];
119
+ const dataContainers = Array.isArray(result.dataContainers) ? result.dataContainers : [];
120
+ const dataContainerHints = dataContainers.slice(0, 8).map((entry) => entry.selector);
121
+ const paginationPatterns = /paginat|load more|next/i.test(JSON.stringify(result.headings || []))
122
+ ? { type: 'dom-hint', source: 'analyze_page' }
123
+ : null;
124
+
125
+ const partial = {
126
+ framework: frameworks,
127
+ pageCharacteristics,
128
+ paginationPatterns,
129
+ profileMeta: {
130
+ lastAnalyzedAt: isoNow()
131
+ }
132
+ };
133
+
134
+ const textParts = [];
135
+ if (frameworks.length > 0) textParts.push(`frameworks=${frameworks.join(', ')}`);
136
+ textParts.push(`pageType=${pageCharacteristics.type}`);
137
+ if (ssrKeys.length > 0) textParts.push(`ssr=${ssrKeys.join(', ')}`);
138
+ if (dataContainerHints.length > 0) textParts.push(`containers=${dataContainerHints.slice(0, 3).join(', ')}`);
139
+
140
+ partial.autoNotes = [note('analyze_page', textParts.join(' | '))];
141
+ return partial;
142
+ }
143
+
144
+ function extractFromDiscoverApis(raw) {
145
+ const requests = raw?.requests || [];
146
+ if (requests.length === 0) return {};
147
+
148
+ const successful = requests
149
+ .filter((request) => request.status >= 200 && request.status < 300)
150
+ .sort((a, b) => scoreEndpoint(b) - scoreEndpoint(a))
151
+ .slice(0, MAX_SAVED_ENDPOINTS)
152
+ .map((request) => ({
153
+ url: request.url,
154
+ method: request.method || 'GET',
155
+ status: request.status,
156
+ contentType: request.contentType || null,
157
+ originTool: 'discover_apis',
158
+ originDescription: 'captured during API discovery',
159
+ lastSeenAt: request.timestamp ? new Date(request.timestamp).toISOString() : isoNow(),
160
+ operationName: request.graphql?.operationName || null,
161
+ operationType: request.graphql?.operationType || null
162
+ }));
163
+
164
+ const responseHeaderPatterns = requests
165
+ .map((request) => request.contentType)
166
+ .filter(Boolean)
167
+ .reduce((acc, contentType) => {
168
+ const base = contentType.split(';')[0].trim();
169
+ acc[base] = (acc[base] || 0) + 1;
170
+ return acc;
171
+ }, {});
172
+
173
+ const partial = {
174
+ apiEndpoints: successful,
175
+ responseHeaderPatterns,
176
+ authInfo: detectAuthInfoFromHeaders(requests),
177
+ profileMeta: {
178
+ lastDiscoveryAt: isoNow()
179
+ }
180
+ };
181
+
182
+ if (successful.length > 0) {
183
+ partial.autoNotes = [
184
+ note(
185
+ 'discover_apis',
186
+ `${successful.length} endpoint(s) captured; top content types=${Object.keys(responseHeaderPatterns).slice(0, 3).join(', ') || 'unknown'}`
187
+ )
188
+ ];
189
+ }
190
+
191
+ return partial;
192
+ }
193
+
194
+ function extractFromExtractData(result) {
195
+ if (!result) return {};
196
+
197
+ if (!Array.isArray(result.sources) || result.sources.length === 0) {
198
+ return {
199
+ dataSchema: {
200
+ sources: [],
201
+ notes: 'No embedded SSR data. Site is API-driven — use discover_apis() + get_network_trace() to find data sources.'
202
+ },
203
+ apiEndpoints: Array.isArray(result.capturedEndpoints) ? result.capturedEndpoints : [],
204
+ lastUpdated: isoNow()
205
+ };
206
+ }
207
+
208
+ const knownPaths = [];
209
+ const sources = result.sources.map((source) => {
210
+ const topLevelKeys = Object.keys(source.schema || {}).filter((key) => !key.startsWith('__')).slice(0, 20);
211
+ const samplePaths = pickImportantPaths(source.leafPaths || [], source.key, 'extract_data');
212
+ knownPaths.push(...samplePaths);
213
+
214
+ return {
215
+ key: source.key,
216
+ type: source.type,
217
+ isArray: source.isArray || false,
218
+ arrayLength: source.arrayLength || null,
219
+ topLevelKeyCount: source.topLevelKeyCount || null,
220
+ topLevelKeys,
221
+ leafPathCount: Array.isArray(source.leafPaths) ? source.leafPaths.length : 0,
222
+ samplePaths,
223
+ capturedAt: isoNow()
224
+ };
225
+ });
226
+
227
+ const topKnownPaths = knownPaths
228
+ .sort((a, b) => (b.confidence || 0) - (a.confidence || 0))
229
+ .slice(0, MAX_SAVED_KNOWN_PATHS);
230
+
231
+ return {
232
+ dataSchema: { sources },
233
+ knownPaths: topKnownPaths,
234
+ autoNotes: [
235
+ note(
236
+ 'extract_data',
237
+ `${sources.length} source(s) captured; saved top ${topKnownPaths.length} path(s); primary=${sources.slice(0, 3).map((source) => source.key).join(', ')}`
238
+ )
239
+ ]
240
+ };
241
+ }
242
+
243
+ function extractFromNetworkTrace(raw) {
244
+ const matches = (raw?.matches || []).filter((match) => {
245
+ const confidence = match.matchInfo?.confidence ?? match.confidence ?? 0;
246
+ return confidence >= 80;
247
+ });
248
+
249
+ if (matches.length === 0) return {};
250
+
251
+ const apiEndpoints = matches
252
+ .map((match) => ({
253
+ url: match.url || match.requestUrl,
254
+ method: match.method || 'GET',
255
+ status: match.status || match.statusCode || null,
256
+ confidence: match.matchInfo?.confidence ?? match.confidence ?? null,
257
+ dataPath: Array.isArray(match.matchInfo?.paths) ? match.matchInfo.paths[0] : match.dataPath || null,
258
+ originTool: 'get_network_trace',
259
+ originDescription: raw?.elementLabel
260
+ ? `matched selected element "${raw.elementLabel}"`
261
+ : 'high-confidence DOM/network match'
262
+ }))
263
+ .filter((endpoint) => endpoint.url);
264
+
265
+ const knownPaths = apiEndpoints
266
+ .filter((endpoint) => endpoint.dataPath)
267
+ .map((endpoint) => ({
268
+ path: endpoint.dataPath,
269
+ type: 'matched-response-path',
270
+ example: raw?.elementLabel || null,
271
+ sourceKey: endpoint.url,
272
+ originTool: 'get_network_trace',
273
+ confidence: endpoint.confidence ?? null
274
+ }));
275
+
276
+ const partial = {
277
+ apiEndpoints,
278
+ knownPaths,
279
+ authInfo: detectAuthInfoFromHeaders(matches),
280
+ autoNotes: [
281
+ note(
282
+ 'get_network_trace',
283
+ `${apiEndpoints.length} high-confidence match(es); matched paths=${knownPaths.slice(0, 3).map((path) => path.path).join(', ') || 'none'}`
284
+ )
285
+ ]
286
+ };
287
+
288
+ return partial;
289
+ }
290
+
291
+ export function buildProfileEnrichment(toolName, rawResult) {
292
+ let partial = {};
293
+
294
+ switch (toolName) {
295
+ case 'analyze_page':
296
+ partial = extractFromAnalyzePage(rawResult);
297
+ break;
298
+ case 'discover_apis':
299
+ partial = extractFromDiscoverApis(rawResult);
300
+ break;
301
+ case 'extract_data':
302
+ partial = extractFromExtractData(rawResult);
303
+ break;
304
+ case 'get_network_trace':
305
+ partial = extractFromNetworkTrace(rawResult);
306
+ break;
307
+ case 'csp_detection':
308
+ partial = { cspStatus: rawResult?.level || 'strict' };
309
+ break;
310
+ default:
311
+ partial = {};
312
+ }
313
+
314
+ return sanitizeForProfile(partial);
315
+ }
316
+
317
+ export function enrichProfile(domain, toolName, rawResult) {
318
+ if (!domain || !toolName || !rawResult) return;
319
+
320
+ Promise.resolve().then(() => {
321
+ try {
322
+ const partial = buildProfileEnrichment(toolName, rawResult);
323
+ if (Object.keys(partial).length === 0) return;
324
+
325
+ saveProfile(domain, partial);
326
+ for (const field of Object.keys(partial)) {
327
+ markFieldFresh(domain, field);
328
+ }
329
+ } catch {
330
+ // Enrichment must not break tool execution.
331
+ }
332
+ });
333
+ }