@yusufffararatt/dombridge-mcp 2.7.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +559 -0
- package/bin/cli.js +88 -0
- package/package.json +54 -0
- package/src/bridge/http-server.js +290 -0
- package/src/bridge/middleware.js +56 -0
- package/src/bridge/routes.js +1003 -0
- package/src/bridge-daemon.js +172 -0
- package/src/cli/auto-config.js +120 -0
- package/src/constants.js +13 -0
- package/src/index.js +279 -0
- package/src/mcp-bridge.js +136 -0
- package/src/metrics/error-codes.js +44 -0
- package/src/metrics/index.js +3 -0
- package/src/metrics/metrics-db.js +269 -0
- package/src/metrics/metrics-recorder.js +240 -0
- package/src/metrics/metrics-report.js +146 -0
- package/src/profiles/profile-db.js +159 -0
- package/src/profiles/profile-enricher.js +333 -0
- package/src/profiles/profile-manager.js +563 -0
- package/src/profiles/profile-repo.js +183 -0
- package/src/state/bridge-client.js +272 -0
- package/src/state/bridge-persistence.js +205 -0
- package/src/state/cache.js +38 -0
- package/src/state/extension-state.js +321 -0
- package/src/tools/action_tools.js +218 -0
- package/src/tools/analyze-page.js +247 -0
- package/src/tools/debug-mcp-state.js +172 -0
- package/src/tools/discover-apis.js +186 -0
- package/src/tools/execute-js.js +284 -0
- package/src/tools/export-session.js +171 -0
- package/src/tools/extract-data.js +395 -0
- package/src/tools/get-element.js +281 -0
- package/src/tools/get-network-trace.js +471 -0
- package/src/tools/index.js +110 -0
- package/src/tools/manage-site-profile.js +153 -0
- package/src/tools/paginate.js +444 -0
- package/src/tools/quick-scan.js +418 -0
- package/src/tools/screenshot_tools.js +117 -0
- package/src/utils/circuit-breaker.js +112 -0
- package/src/utils/extract-density.js +21 -0
- package/src/utils/logger.js +31 -0
- package/src/utils/paginate-detector.js +24 -0
- package/src/utils/rate-limiter.js +244 -0
- package/src/utils/run-script.js +37 -0
- package/src/utils/selector-validator.js +95 -0
- package/src/utils/state-validator.js +354 -0
- package/src/utils/tab-resolver.js +70 -0
- package/src/utils/workflow-helper.js +292 -0
- package/src/utils/workflow-state.js +177 -0
|
@@ -0,0 +1,563 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Site Profile Manager
|
|
3
|
+
* Stores domain-scoped scraper profiles under mcp-server/profiles/.
|
|
4
|
+
*
|
|
5
|
+
* Profiles are both:
|
|
6
|
+
* - a machine-usable cache for future tool runs
|
|
7
|
+
* - a human-readable dossier for scraper maintenance
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { readFileSync, writeFileSync, mkdirSync, readdirSync, existsSync } from 'fs';
|
|
11
|
+
import { join, dirname } from 'path';
|
|
12
|
+
import { fileURLToPath } from 'url';
|
|
13
|
+
|
|
14
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
15
|
+
const PROFILES_DIR = join(__dirname, '../../profiles');
|
|
16
|
+
|
|
17
|
+
const MAX_ENDPOINTS = 100;
|
|
18
|
+
const MAX_AUTO_NOTES = 50;
|
|
19
|
+
const MAX_KNOWN_PATHS = 250;
|
|
20
|
+
const MAX_SAMPLE_PATHS = 20;
|
|
21
|
+
const PROFILE_VERSION = 2;
|
|
22
|
+
const MAX_DISCOVER_APIS_ENDPOINTS = 10;
|
|
23
|
+
const MAX_EXTRACT_DATA_KNOWN_PATHS = 10;
|
|
24
|
+
|
|
25
|
+
export const PROFILE_FIELD_ALIASES = {
|
|
26
|
+
framework: ['framework'],
|
|
27
|
+
pageCharacteristics: ['pageCharacteristics'],
|
|
28
|
+
authInfo: ['authInfo'],
|
|
29
|
+
apiEndpoints: ['apiEndpoints'],
|
|
30
|
+
knownPaths: ['knownPaths'],
|
|
31
|
+
dataSchema: ['dataSchema'],
|
|
32
|
+
stableSelectors: ['stableSelectors'],
|
|
33
|
+
paginationPatterns: ['paginationPatterns'],
|
|
34
|
+
cspStatus: ['cspStatus'],
|
|
35
|
+
rateLimit: ['rateLimit'],
|
|
36
|
+
botProtection: ['botProtection'],
|
|
37
|
+
responseHeaderPatterns: ['responseHeaderPatterns'],
|
|
38
|
+
autoNotes: ['autoNotes']
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
function ensureProfilesDir() {
|
|
42
|
+
if (!existsSync(PROFILES_DIR)) {
|
|
43
|
+
mkdirSync(PROFILES_DIR, { recursive: true });
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function domainToFilename(domain) {
|
|
48
|
+
return domain.replace(/[^a-zA-Z0-9.-]/g, '_') + '.json';
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function profilePath(domain) {
|
|
52
|
+
return join(PROFILES_DIR, domainToFilename(domain));
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function isoNow() {
|
|
56
|
+
return new Date().toISOString();
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function asArray(value) {
|
|
60
|
+
return Array.isArray(value) ? value : [];
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function asObject(value) {
|
|
64
|
+
return value && typeof value === 'object' && !Array.isArray(value) ? value : {};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function stripUndefined(obj) {
|
|
68
|
+
return Object.fromEntries(Object.entries(obj).filter(([, value]) => value !== undefined));
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export function extractDomain(urlOrDomain) {
|
|
72
|
+
try {
|
|
73
|
+
if (urlOrDomain.startsWith('http')) {
|
|
74
|
+
return new URL(urlOrDomain).hostname;
|
|
75
|
+
}
|
|
76
|
+
return urlOrDomain.toLowerCase().trim();
|
|
77
|
+
} catch {
|
|
78
|
+
return urlOrDomain.toLowerCase().trim();
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export function normalizeEndpointKey(url, method) {
|
|
83
|
+
try {
|
|
84
|
+
const u = new URL(url);
|
|
85
|
+
return `${(method || 'GET').toUpperCase()}:${u.hostname}${u.pathname}`;
|
|
86
|
+
} catch {
|
|
87
|
+
const path = String(url || '').split('?')[0].split('#')[0];
|
|
88
|
+
return `${(method || 'GET').toUpperCase()}:${path}`;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
function normalizeEndpoint(endpoint, now, fallback = {}) {
|
|
93
|
+
if (!endpoint) return null;
|
|
94
|
+
const rawUrl = typeof endpoint === 'string' ? endpoint : endpoint.url;
|
|
95
|
+
if (!rawUrl) return null;
|
|
96
|
+
|
|
97
|
+
const method = (endpoint.method || fallback.method || 'GET').toUpperCase();
|
|
98
|
+
const firstSeenAt = endpoint.firstSeenAt || fallback.firstSeenAt || now;
|
|
99
|
+
const lastSeenAt = endpoint.lastSeenAt || fallback.lastSeenAt || now;
|
|
100
|
+
|
|
101
|
+
return stripUndefined({
|
|
102
|
+
url: rawUrl,
|
|
103
|
+
method,
|
|
104
|
+
status: endpoint.status ?? fallback.status ?? null,
|
|
105
|
+
contentType: endpoint.contentType ?? fallback.contentType ?? null,
|
|
106
|
+
confidence: endpoint.confidence ?? fallback.confidence ?? null,
|
|
107
|
+
dataPath: endpoint.dataPath ?? fallback.dataPath ?? null,
|
|
108
|
+
sourceKey: endpoint.sourceKey ?? fallback.sourceKey ?? null,
|
|
109
|
+
operationName: endpoint.operationName ?? fallback.operationName ?? null,
|
|
110
|
+
operationType: endpoint.operationType ?? fallback.operationType ?? null,
|
|
111
|
+
originTool: endpoint.originTool ?? fallback.originTool ?? null,
|
|
112
|
+
originDescription: endpoint.originDescription ?? fallback.originDescription ?? null,
|
|
113
|
+
firstSeenAt,
|
|
114
|
+
lastSeenAt
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export function mergeEndpoints(existing, incoming, now = isoNow()) {
|
|
119
|
+
const merged = [];
|
|
120
|
+
const seen = new Map();
|
|
121
|
+
|
|
122
|
+
for (const raw of asArray(existing)) {
|
|
123
|
+
const normalized = normalizeEndpoint(raw, now);
|
|
124
|
+
if (!normalized) continue;
|
|
125
|
+
const key = normalizeEndpointKey(normalized.url, normalized.method);
|
|
126
|
+
seen.set(key, merged.length);
|
|
127
|
+
merged.push(normalized);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
for (const raw of asArray(incoming)) {
|
|
131
|
+
const key = normalizeEndpointKey(raw?.url || raw, raw?.method);
|
|
132
|
+
const idx = seen.get(key);
|
|
133
|
+
const base = idx !== undefined ? merged[idx] : {};
|
|
134
|
+
const normalized = normalizeEndpoint(raw, now, base);
|
|
135
|
+
if (!normalized) continue;
|
|
136
|
+
|
|
137
|
+
if (idx !== undefined) {
|
|
138
|
+
merged[idx] = {
|
|
139
|
+
...base,
|
|
140
|
+
...normalized,
|
|
141
|
+
firstSeenAt: base.firstSeenAt || normalized.firstSeenAt || now,
|
|
142
|
+
lastSeenAt: normalized.lastSeenAt || now,
|
|
143
|
+
confidence: normalized.confidence ?? base.confidence ?? null
|
|
144
|
+
};
|
|
145
|
+
} else {
|
|
146
|
+
seen.set(key, merged.length);
|
|
147
|
+
merged.push(normalized);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
const sorted = merged
|
|
152
|
+
.sort((a, b) => {
|
|
153
|
+
const aTime = new Date(a.lastSeenAt || a.firstSeenAt || 0).getTime();
|
|
154
|
+
const bTime = new Date(b.lastSeenAt || b.firstSeenAt || 0).getTime();
|
|
155
|
+
return bTime - aTime;
|
|
156
|
+
})
|
|
157
|
+
.slice(0, MAX_ENDPOINTS);
|
|
158
|
+
|
|
159
|
+
const discoverApis = sorted
|
|
160
|
+
.filter((entry) => entry.originTool === 'discover_apis')
|
|
161
|
+
.slice(0, MAX_DISCOVER_APIS_ENDPOINTS);
|
|
162
|
+
const others = sorted.filter((entry) => entry.originTool !== 'discover_apis');
|
|
163
|
+
|
|
164
|
+
return [...others, ...discoverApis]
|
|
165
|
+
.sort((a, b) => {
|
|
166
|
+
const aTime = new Date(a.lastSeenAt || a.firstSeenAt || 0).getTime();
|
|
167
|
+
const bTime = new Date(b.lastSeenAt || b.firstSeenAt || 0).getTime();
|
|
168
|
+
return bTime - aTime;
|
|
169
|
+
})
|
|
170
|
+
.slice(0, MAX_ENDPOINTS);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function normalizeKnownPath(pathEntry, now, fallback = {}) {
|
|
174
|
+
if (!pathEntry) return null;
|
|
175
|
+
const rawPath = typeof pathEntry === 'string' ? pathEntry : pathEntry.path;
|
|
176
|
+
if (!rawPath) return null;
|
|
177
|
+
|
|
178
|
+
const sourceKey = pathEntry.sourceKey ?? fallback.sourceKey ?? null;
|
|
179
|
+
const originTool = pathEntry.originTool ?? fallback.originTool ?? null;
|
|
180
|
+
const firstSeenAt = pathEntry.firstSeenAt || fallback.firstSeenAt || now;
|
|
181
|
+
const lastSeenAt = pathEntry.lastSeenAt || fallback.lastSeenAt || now;
|
|
182
|
+
|
|
183
|
+
return stripUndefined({
|
|
184
|
+
path: rawPath,
|
|
185
|
+
type: pathEntry.type ?? fallback.type ?? 'unknown',
|
|
186
|
+
example: pathEntry.example ?? fallback.example ?? null,
|
|
187
|
+
sourceKey,
|
|
188
|
+
originTool,
|
|
189
|
+
confidence: pathEntry.confidence ?? fallback.confidence ?? null,
|
|
190
|
+
firstSeenAt,
|
|
191
|
+
lastSeenAt
|
|
192
|
+
});
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
function knownPathKey(pathEntry) {
|
|
196
|
+
return `${pathEntry.path}::${pathEntry.sourceKey || ''}`;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
function mergeStableSelectors(existing, incoming) {
|
|
200
|
+
const seen = new Map();
|
|
201
|
+
const merged = [];
|
|
202
|
+
|
|
203
|
+
for (const s of asArray(existing)) {
|
|
204
|
+
if (!s || !s.selector) continue;
|
|
205
|
+
seen.set(s.selector, merged.length);
|
|
206
|
+
merged.push(s);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
for (const s of asArray(incoming)) {
|
|
210
|
+
if (!s || !s.selector) continue;
|
|
211
|
+
const idx = seen.get(s.selector);
|
|
212
|
+
if (idx !== undefined) {
|
|
213
|
+
// Gelen entry daha güncel — güncelle
|
|
214
|
+
merged[idx] = { ...merged[idx], ...s };
|
|
215
|
+
} else {
|
|
216
|
+
seen.set(s.selector, merged.length);
|
|
217
|
+
merged.push(s);
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
return merged;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
function mergeKnownPaths(existing, incoming, now = isoNow()) {
|
|
225
|
+
const merged = [];
|
|
226
|
+
const seen = new Map();
|
|
227
|
+
|
|
228
|
+
for (const raw of asArray(existing)) {
|
|
229
|
+
const normalized = normalizeKnownPath(raw, now);
|
|
230
|
+
if (!normalized) continue;
|
|
231
|
+
const key = knownPathKey(normalized);
|
|
232
|
+
seen.set(key, merged.length);
|
|
233
|
+
merged.push(normalized);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
for (const raw of asArray(incoming)) {
|
|
237
|
+
const normalized = normalizeKnownPath(raw, now);
|
|
238
|
+
if (!normalized) continue;
|
|
239
|
+
const key = knownPathKey(normalized);
|
|
240
|
+
const idx = seen.get(key);
|
|
241
|
+
if (idx !== undefined) {
|
|
242
|
+
const base = merged[idx];
|
|
243
|
+
merged[idx] = {
|
|
244
|
+
...base,
|
|
245
|
+
...normalized,
|
|
246
|
+
firstSeenAt: base.firstSeenAt || normalized.firstSeenAt || now,
|
|
247
|
+
lastSeenAt: normalized.lastSeenAt || now
|
|
248
|
+
};
|
|
249
|
+
} else {
|
|
250
|
+
seen.set(key, merged.length);
|
|
251
|
+
merged.push(normalized);
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
const sorted = merged
|
|
256
|
+
.sort((a, b) => {
|
|
257
|
+
const scoreA = Number(Boolean(a.confidence)) * 100 + (a.path?.length || 0);
|
|
258
|
+
const scoreB = Number(Boolean(b.confidence)) * 100 + (b.path?.length || 0);
|
|
259
|
+
return scoreB - scoreA;
|
|
260
|
+
})
|
|
261
|
+
.slice(0, MAX_KNOWN_PATHS);
|
|
262
|
+
|
|
263
|
+
const extractDataPaths = sorted
|
|
264
|
+
.filter((entry) => entry.originTool === 'extract_data')
|
|
265
|
+
.slice(0, MAX_EXTRACT_DATA_KNOWN_PATHS);
|
|
266
|
+
const others = sorted.filter((entry) => entry.originTool !== 'extract_data');
|
|
267
|
+
|
|
268
|
+
return [...others, ...extractDataPaths]
|
|
269
|
+
.sort((a, b) => {
|
|
270
|
+
const scoreA = Number(Boolean(a.confidence)) * 100 + (a.path?.length || 0);
|
|
271
|
+
const scoreB = Number(Boolean(b.confidence)) * 100 + (b.path?.length || 0);
|
|
272
|
+
return scoreB - scoreA;
|
|
273
|
+
})
|
|
274
|
+
.slice(0, MAX_KNOWN_PATHS);
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
function normalizeDataSource(source, now, fallback = {}) {
|
|
278
|
+
if (!source?.key) return null;
|
|
279
|
+
|
|
280
|
+
const samplePaths = asArray(source.samplePaths || source.leafPaths || fallback.samplePaths)
|
|
281
|
+
.map((entry) => {
|
|
282
|
+
if (!entry) return null;
|
|
283
|
+
if (typeof entry === 'string') return { path: entry };
|
|
284
|
+
if (!entry.path) return null;
|
|
285
|
+
return stripUndefined({
|
|
286
|
+
path: entry.path,
|
|
287
|
+
type: entry.type ?? null,
|
|
288
|
+
example: entry.example ?? null,
|
|
289
|
+
confidence: entry.confidence ?? null
|
|
290
|
+
});
|
|
291
|
+
})
|
|
292
|
+
.filter(Boolean)
|
|
293
|
+
.slice(0, MAX_SAMPLE_PATHS);
|
|
294
|
+
|
|
295
|
+
const topLevelKeys = asArray(source.topLevelKeys || fallback.topLevelKeys).slice(0, 30);
|
|
296
|
+
|
|
297
|
+
return stripUndefined({
|
|
298
|
+
key: source.key,
|
|
299
|
+
type: source.type ?? fallback.type ?? 'unknown',
|
|
300
|
+
isArray: source.isArray ?? fallback.isArray ?? false,
|
|
301
|
+
arrayLength: source.arrayLength ?? fallback.arrayLength ?? null,
|
|
302
|
+
topLevelKeyCount: source.topLevelKeyCount ?? fallback.topLevelKeyCount ?? null,
|
|
303
|
+
topLevelKeys,
|
|
304
|
+
leafPathCount: source.leafPathCount ?? source.leafPaths?.length ?? fallback.leafPathCount ?? null,
|
|
305
|
+
samplePaths,
|
|
306
|
+
capturedAt: source.capturedAt ?? fallback.capturedAt ?? now
|
|
307
|
+
});
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
function mergeDataSchema(existing, incoming, now = isoNow()) {
|
|
311
|
+
const existingSources = asArray(existing?.sources);
|
|
312
|
+
const incomingSources = asArray(incoming?.sources);
|
|
313
|
+
if (existingSources.length === 0 && incomingSources.length === 0) return null;
|
|
314
|
+
|
|
315
|
+
const merged = [];
|
|
316
|
+
const seen = new Map();
|
|
317
|
+
|
|
318
|
+
for (const raw of existingSources) {
|
|
319
|
+
const normalized = normalizeDataSource(raw, now);
|
|
320
|
+
if (!normalized) continue;
|
|
321
|
+
seen.set(normalized.key, merged.length);
|
|
322
|
+
merged.push(normalized);
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
for (const raw of incomingSources) {
|
|
326
|
+
const idx = seen.get(raw?.key);
|
|
327
|
+
const base = idx !== undefined ? merged[idx] : {};
|
|
328
|
+
const normalized = normalizeDataSource(raw, now, base);
|
|
329
|
+
if (!normalized) continue;
|
|
330
|
+
|
|
331
|
+
if (idx !== undefined) {
|
|
332
|
+
merged[idx] = {
|
|
333
|
+
...base,
|
|
334
|
+
...normalized,
|
|
335
|
+
capturedAt: normalized.capturedAt || base.capturedAt || now
|
|
336
|
+
};
|
|
337
|
+
} else {
|
|
338
|
+
seen.set(normalized.key, merged.length);
|
|
339
|
+
merged.push(normalized);
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
return {
|
|
344
|
+
sources: merged.sort((a, b) => (b.leafPathCount || 0) - (a.leafPathCount || 0))
|
|
345
|
+
};
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
function mergeAutoNotes(existing, incoming) {
|
|
349
|
+
const merged = [...asArray(existing)];
|
|
350
|
+
const keyFor = (note) => `${note.source || note.tool || 'unknown'}::${note.kind || 'summary'}`;
|
|
351
|
+
const seen = new Map(merged.map((note, index) => [keyFor(note), index]));
|
|
352
|
+
|
|
353
|
+
for (const note of asArray(incoming)) {
|
|
354
|
+
const normalized = {
|
|
355
|
+
source: note.source || note.tool || 'unknown',
|
|
356
|
+
kind: note.kind || 'summary',
|
|
357
|
+
timestamp: note.timestamp || isoNow(),
|
|
358
|
+
text: note.text || ''
|
|
359
|
+
};
|
|
360
|
+
const key = keyFor(normalized);
|
|
361
|
+
if (seen.has(key)) {
|
|
362
|
+
merged[seen.get(key)] = normalized;
|
|
363
|
+
} else {
|
|
364
|
+
seen.set(key, merged.length);
|
|
365
|
+
merged.push(normalized);
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
return merged.slice(-MAX_AUTO_NOTES);
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
function mergeFieldTimestamps(existing, profileData, now) {
|
|
373
|
+
const merged = {
|
|
374
|
+
...asObject(existing)
|
|
375
|
+
};
|
|
376
|
+
|
|
377
|
+
if (profileData.fieldTimestamps) {
|
|
378
|
+
Object.assign(merged, profileData.fieldTimestamps);
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
for (const [field, aliases] of Object.entries(PROFILE_FIELD_ALIASES)) {
|
|
382
|
+
if (aliases.some((alias) => profileData[alias] !== undefined)) {
|
|
383
|
+
merged[field] = now;
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
return merged;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
function mergeProfileMeta(existing, profileData, now) {
|
|
391
|
+
const previous = asObject(existing);
|
|
392
|
+
const current = asObject(profileData.profileMeta);
|
|
393
|
+
|
|
394
|
+
return {
|
|
395
|
+
createdAt: previous.createdAt || current.createdAt || now,
|
|
396
|
+
lastUpdated: now,
|
|
397
|
+
lastAnalyzedAt: current.lastAnalyzedAt ?? previous.lastAnalyzedAt ?? null,
|
|
398
|
+
lastDiscoveryAt: current.lastDiscoveryAt ?? previous.lastDiscoveryAt ?? null,
|
|
399
|
+
lastDriftCheckAt: current.lastDriftCheckAt ?? previous.lastDriftCheckAt ?? null
|
|
400
|
+
};
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
export function getProfileFieldTimestamp(profile, field) {
|
|
404
|
+
if (!profile) return null;
|
|
405
|
+
return profile.fieldTimestamps?.[field] || null;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
export function isProfileFieldFresh(profile, field, maxAgeMs) {
|
|
409
|
+
const ts = getProfileFieldTimestamp(profile, field);
|
|
410
|
+
if (!ts) return false;
|
|
411
|
+
return Date.now() - new Date(ts).getTime() < maxAgeMs;
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
function normalizeProfile(raw) {
|
|
415
|
+
if (!raw || typeof raw !== 'object') return null;
|
|
416
|
+
|
|
417
|
+
const now = raw.lastUpdated || raw.createdAt || isoNow();
|
|
418
|
+
const version = raw.version || 1;
|
|
419
|
+
const createdAt = raw.createdAt || now;
|
|
420
|
+
const lastUpdated = raw.lastUpdated || now;
|
|
421
|
+
|
|
422
|
+
const normalized = {
|
|
423
|
+
domain: raw.domain,
|
|
424
|
+
createdAt,
|
|
425
|
+
lastUpdated,
|
|
426
|
+
version,
|
|
427
|
+
notes: typeof raw.notes === 'string' ? raw.notes : '',
|
|
428
|
+
framework: asArray(raw.framework),
|
|
429
|
+
pageCharacteristics: raw.pageCharacteristics ?? null,
|
|
430
|
+
authInfo: raw.authInfo ?? null,
|
|
431
|
+
paginationPatterns: raw.paginationPatterns ?? null,
|
|
432
|
+
cspStatus: raw.cspStatus ?? null,
|
|
433
|
+
rateLimit: raw.rateLimit ?? null,
|
|
434
|
+
botProtection: asArray(raw.botProtection),
|
|
435
|
+
responseHeaderPatterns: asObject(raw.responseHeaderPatterns),
|
|
436
|
+
stableSelectors: asArray(raw.stableSelectors),
|
|
437
|
+
apiEndpoints: mergeEndpoints([], raw.apiEndpoints, lastUpdated),
|
|
438
|
+
knownPaths: mergeKnownPaths([], raw.knownPaths, lastUpdated),
|
|
439
|
+
dataSchema: mergeDataSchema(null, raw.dataSchema, lastUpdated),
|
|
440
|
+
autoNotes: mergeAutoNotes([], raw.autoNotes),
|
|
441
|
+
fieldTimestamps: mergeFieldTimestamps(raw.fieldTimestamps, {}, lastUpdated),
|
|
442
|
+
profileMeta: mergeProfileMeta(raw.profileMeta, { profileMeta: raw.profileMeta }, lastUpdated)
|
|
443
|
+
};
|
|
444
|
+
|
|
445
|
+
normalized.profileMeta.createdAt = createdAt;
|
|
446
|
+
normalized.profileMeta.lastUpdated = lastUpdated;
|
|
447
|
+
|
|
448
|
+
if (!normalized.fieldTimestamps.framework && normalized.framework.length > 0) {
|
|
449
|
+
normalized.fieldTimestamps.framework = lastUpdated;
|
|
450
|
+
}
|
|
451
|
+
if (!normalized.fieldTimestamps.apiEndpoints && normalized.apiEndpoints.length > 0) {
|
|
452
|
+
normalized.fieldTimestamps.apiEndpoints = lastUpdated;
|
|
453
|
+
}
|
|
454
|
+
if (!normalized.fieldTimestamps.knownPaths && normalized.knownPaths.length > 0) {
|
|
455
|
+
normalized.fieldTimestamps.knownPaths = lastUpdated;
|
|
456
|
+
}
|
|
457
|
+
if (!normalized.fieldTimestamps.dataSchema && normalized.dataSchema?.sources?.length) {
|
|
458
|
+
normalized.fieldTimestamps.dataSchema = lastUpdated;
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
return normalized;
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
export function saveProfile(domain, profileData) {
|
|
465
|
+
ensureProfilesDir();
|
|
466
|
+
|
|
467
|
+
const existing = loadProfile(domain) || {};
|
|
468
|
+
const now = isoNow();
|
|
469
|
+
|
|
470
|
+
const profile = {
|
|
471
|
+
domain,
|
|
472
|
+
createdAt: existing.createdAt || now,
|
|
473
|
+
lastUpdated: now,
|
|
474
|
+
version: Math.max(existing.version || 0, PROFILE_VERSION - 1) + 1,
|
|
475
|
+
notes: profileData.notes !== undefined && profileData.notes !== null
|
|
476
|
+
? profileData.notes
|
|
477
|
+
: (existing.notes ?? ''),
|
|
478
|
+
framework: profileData.framework ?? existing.framework ?? [],
|
|
479
|
+
pageCharacteristics: profileData.pageCharacteristics ?? existing.pageCharacteristics ?? null,
|
|
480
|
+
authInfo: profileData.authInfo ?? existing.authInfo ?? null,
|
|
481
|
+
paginationPatterns: profileData.paginationPatterns ?? existing.paginationPatterns ?? null,
|
|
482
|
+
cspStatus: profileData.cspStatus ?? existing.cspStatus ?? null,
|
|
483
|
+
rateLimit: profileData.rateLimit ?? existing.rateLimit ?? null,
|
|
484
|
+
botProtection: profileData.botProtection ?? existing.botProtection ?? [],
|
|
485
|
+
responseHeaderPatterns: profileData.responseHeaderPatterns ?? existing.responseHeaderPatterns ?? {},
|
|
486
|
+
stableSelectors: profileData.stableSelectors !== undefined
|
|
487
|
+
? mergeStableSelectors(existing.stableSelectors, profileData.stableSelectors)
|
|
488
|
+
: (existing.stableSelectors ?? []),
|
|
489
|
+
apiEndpoints: profileData.apiEndpoints !== undefined
|
|
490
|
+
? mergeEndpoints(existing.apiEndpoints, profileData.apiEndpoints, now)
|
|
491
|
+
: (existing.apiEndpoints ?? []),
|
|
492
|
+
knownPaths: profileData.knownPaths !== undefined
|
|
493
|
+
? mergeKnownPaths(existing.knownPaths, profileData.knownPaths, now)
|
|
494
|
+
: (existing.knownPaths ?? []),
|
|
495
|
+
dataSchema: profileData.dataSchema !== undefined
|
|
496
|
+
? mergeDataSchema(existing.dataSchema, profileData.dataSchema, now)
|
|
497
|
+
: (existing.dataSchema ?? null),
|
|
498
|
+
autoNotes: mergeAutoNotes(existing.autoNotes, profileData.autoNotes),
|
|
499
|
+
fieldTimestamps: mergeFieldTimestamps(existing.fieldTimestamps, profileData, now),
|
|
500
|
+
profileMeta: mergeProfileMeta(existing.profileMeta, profileData, now)
|
|
501
|
+
};
|
|
502
|
+
|
|
503
|
+
profile.profileMeta.createdAt = existing.profileMeta?.createdAt || existing.createdAt || now;
|
|
504
|
+
profile.profileMeta.lastUpdated = now;
|
|
505
|
+
profile.fieldTimestamps = {
|
|
506
|
+
...profile.fieldTimestamps,
|
|
507
|
+
notes: profileData.notes !== undefined && profileData.notes !== null
|
|
508
|
+
? now
|
|
509
|
+
: profile.fieldTimestamps.notes
|
|
510
|
+
};
|
|
511
|
+
|
|
512
|
+
writeFileSync(profilePath(domain), JSON.stringify(profile, null, 2), 'utf-8');
|
|
513
|
+
return profile;
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
export function loadProfile(domain) {
|
|
517
|
+
const p = profilePath(domain);
|
|
518
|
+
if (!existsSync(p)) return null;
|
|
519
|
+
|
|
520
|
+
try {
|
|
521
|
+
return normalizeProfile(JSON.parse(readFileSync(p, 'utf-8')));
|
|
522
|
+
} catch {
|
|
523
|
+
return null;
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
/**
|
|
528
|
+
* Get SSR-related window.__*__PROPS paths from saved profile.
|
|
529
|
+
* Used by get_network_trace as fallback when no API match found.
|
|
530
|
+
*/
|
|
531
|
+
export function getSavedSsrPaths(domain) {
|
|
532
|
+
const profile = loadProfile(domain);
|
|
533
|
+
if (!profile?.dataSchema?.sources) return [];
|
|
534
|
+
return profile.dataSchema.sources.filter(s => s.key && s.key.startsWith('__'));
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
export function listProfiles() {
|
|
538
|
+
ensureProfilesDir();
|
|
539
|
+
|
|
540
|
+
try {
|
|
541
|
+
return readdirSync(PROFILES_DIR)
|
|
542
|
+
.filter((file) => file.endsWith('.json'))
|
|
543
|
+
.map((file) => {
|
|
544
|
+
try {
|
|
545
|
+
const raw = normalizeProfile(JSON.parse(readFileSync(join(PROFILES_DIR, file), 'utf-8')));
|
|
546
|
+
return {
|
|
547
|
+
domain: raw.domain,
|
|
548
|
+
lastUpdated: raw.lastUpdated,
|
|
549
|
+
version: raw.version,
|
|
550
|
+
knownPathCount: asArray(raw.knownPaths).length,
|
|
551
|
+
apiEndpointCount: asArray(raw.apiEndpoints).length,
|
|
552
|
+
dataSourceCount: asArray(raw.dataSchema?.sources).length,
|
|
553
|
+
hasNotes: Boolean(raw.notes && raw.notes.trim())
|
|
554
|
+
};
|
|
555
|
+
} catch {
|
|
556
|
+
return null;
|
|
557
|
+
}
|
|
558
|
+
})
|
|
559
|
+
.filter(Boolean);
|
|
560
|
+
} catch {
|
|
561
|
+
return [];
|
|
562
|
+
}
|
|
563
|
+
}
|