@yusufffararatt/dombridge-mcp 2.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +559 -0
  2. package/bin/cli.js +88 -0
  3. package/package.json +54 -0
  4. package/src/bridge/http-server.js +290 -0
  5. package/src/bridge/middleware.js +56 -0
  6. package/src/bridge/routes.js +1003 -0
  7. package/src/bridge-daemon.js +172 -0
  8. package/src/cli/auto-config.js +120 -0
  9. package/src/constants.js +13 -0
  10. package/src/index.js +279 -0
  11. package/src/mcp-bridge.js +136 -0
  12. package/src/metrics/error-codes.js +44 -0
  13. package/src/metrics/index.js +3 -0
  14. package/src/metrics/metrics-db.js +269 -0
  15. package/src/metrics/metrics-recorder.js +240 -0
  16. package/src/metrics/metrics-report.js +146 -0
  17. package/src/profiles/profile-db.js +159 -0
  18. package/src/profiles/profile-enricher.js +333 -0
  19. package/src/profiles/profile-manager.js +563 -0
  20. package/src/profiles/profile-repo.js +183 -0
  21. package/src/state/bridge-client.js +272 -0
  22. package/src/state/bridge-persistence.js +205 -0
  23. package/src/state/cache.js +38 -0
  24. package/src/state/extension-state.js +321 -0
  25. package/src/tools/action_tools.js +218 -0
  26. package/src/tools/analyze-page.js +247 -0
  27. package/src/tools/debug-mcp-state.js +172 -0
  28. package/src/tools/discover-apis.js +186 -0
  29. package/src/tools/execute-js.js +284 -0
  30. package/src/tools/export-session.js +171 -0
  31. package/src/tools/extract-data.js +395 -0
  32. package/src/tools/get-element.js +281 -0
  33. package/src/tools/get-network-trace.js +471 -0
  34. package/src/tools/index.js +110 -0
  35. package/src/tools/manage-site-profile.js +153 -0
  36. package/src/tools/paginate.js +444 -0
  37. package/src/tools/quick-scan.js +418 -0
  38. package/src/tools/screenshot_tools.js +117 -0
  39. package/src/utils/circuit-breaker.js +112 -0
  40. package/src/utils/extract-density.js +21 -0
  41. package/src/utils/logger.js +31 -0
  42. package/src/utils/paginate-detector.js +24 -0
  43. package/src/utils/rate-limiter.js +244 -0
  44. package/src/utils/run-script.js +37 -0
  45. package/src/utils/selector-validator.js +95 -0
  46. package/src/utils/state-validator.js +354 -0
  47. package/src/utils/tab-resolver.js +70 -0
  48. package/src/utils/workflow-helper.js +292 -0
  49. package/src/utils/workflow-state.js +177 -0
@@ -0,0 +1,563 @@
1
+ /**
2
+ * Site Profile Manager
3
+ * Stores domain-scoped scraper profiles under mcp-server/profiles/.
4
+ *
5
+ * Profiles are both:
6
+ * - a machine-usable cache for future tool runs
7
+ * - a human-readable dossier for scraper maintenance
8
+ */
9
+
10
+ import { readFileSync, writeFileSync, mkdirSync, readdirSync, existsSync } from 'fs';
11
+ import { join, dirname } from 'path';
12
+ import { fileURLToPath } from 'url';
13
+
14
+ const __dirname = dirname(fileURLToPath(import.meta.url));
15
+ const PROFILES_DIR = join(__dirname, '../../profiles');
16
+
17
+ const MAX_ENDPOINTS = 100;
18
+ const MAX_AUTO_NOTES = 50;
19
+ const MAX_KNOWN_PATHS = 250;
20
+ const MAX_SAMPLE_PATHS = 20;
21
+ const PROFILE_VERSION = 2;
22
+ const MAX_DISCOVER_APIS_ENDPOINTS = 10;
23
+ const MAX_EXTRACT_DATA_KNOWN_PATHS = 10;
24
+
25
+ export const PROFILE_FIELD_ALIASES = {
26
+ framework: ['framework'],
27
+ pageCharacteristics: ['pageCharacteristics'],
28
+ authInfo: ['authInfo'],
29
+ apiEndpoints: ['apiEndpoints'],
30
+ knownPaths: ['knownPaths'],
31
+ dataSchema: ['dataSchema'],
32
+ stableSelectors: ['stableSelectors'],
33
+ paginationPatterns: ['paginationPatterns'],
34
+ cspStatus: ['cspStatus'],
35
+ rateLimit: ['rateLimit'],
36
+ botProtection: ['botProtection'],
37
+ responseHeaderPatterns: ['responseHeaderPatterns'],
38
+ autoNotes: ['autoNotes']
39
+ };
40
+
41
+ function ensureProfilesDir() {
42
+ if (!existsSync(PROFILES_DIR)) {
43
+ mkdirSync(PROFILES_DIR, { recursive: true });
44
+ }
45
+ }
46
+
47
+ function domainToFilename(domain) {
48
+ return domain.replace(/[^a-zA-Z0-9.-]/g, '_') + '.json';
49
+ }
50
+
51
+ function profilePath(domain) {
52
+ return join(PROFILES_DIR, domainToFilename(domain));
53
+ }
54
+
55
+ function isoNow() {
56
+ return new Date().toISOString();
57
+ }
58
+
59
+ function asArray(value) {
60
+ return Array.isArray(value) ? value : [];
61
+ }
62
+
63
+ function asObject(value) {
64
+ return value && typeof value === 'object' && !Array.isArray(value) ? value : {};
65
+ }
66
+
67
+ function stripUndefined(obj) {
68
+ return Object.fromEntries(Object.entries(obj).filter(([, value]) => value !== undefined));
69
+ }
70
+
71
+ export function extractDomain(urlOrDomain) {
72
+ try {
73
+ if (urlOrDomain.startsWith('http')) {
74
+ return new URL(urlOrDomain).hostname;
75
+ }
76
+ return urlOrDomain.toLowerCase().trim();
77
+ } catch {
78
+ return urlOrDomain.toLowerCase().trim();
79
+ }
80
+ }
81
+
82
+ export function normalizeEndpointKey(url, method) {
83
+ try {
84
+ const u = new URL(url);
85
+ return `${(method || 'GET').toUpperCase()}:${u.hostname}${u.pathname}`;
86
+ } catch {
87
+ const path = String(url || '').split('?')[0].split('#')[0];
88
+ return `${(method || 'GET').toUpperCase()}:${path}`;
89
+ }
90
+ }
91
+
92
+ function normalizeEndpoint(endpoint, now, fallback = {}) {
93
+ if (!endpoint) return null;
94
+ const rawUrl = typeof endpoint === 'string' ? endpoint : endpoint.url;
95
+ if (!rawUrl) return null;
96
+
97
+ const method = (endpoint.method || fallback.method || 'GET').toUpperCase();
98
+ const firstSeenAt = endpoint.firstSeenAt || fallback.firstSeenAt || now;
99
+ const lastSeenAt = endpoint.lastSeenAt || fallback.lastSeenAt || now;
100
+
101
+ return stripUndefined({
102
+ url: rawUrl,
103
+ method,
104
+ status: endpoint.status ?? fallback.status ?? null,
105
+ contentType: endpoint.contentType ?? fallback.contentType ?? null,
106
+ confidence: endpoint.confidence ?? fallback.confidence ?? null,
107
+ dataPath: endpoint.dataPath ?? fallback.dataPath ?? null,
108
+ sourceKey: endpoint.sourceKey ?? fallback.sourceKey ?? null,
109
+ operationName: endpoint.operationName ?? fallback.operationName ?? null,
110
+ operationType: endpoint.operationType ?? fallback.operationType ?? null,
111
+ originTool: endpoint.originTool ?? fallback.originTool ?? null,
112
+ originDescription: endpoint.originDescription ?? fallback.originDescription ?? null,
113
+ firstSeenAt,
114
+ lastSeenAt
115
+ });
116
+ }
117
+
118
+ export function mergeEndpoints(existing, incoming, now = isoNow()) {
119
+ const merged = [];
120
+ const seen = new Map();
121
+
122
+ for (const raw of asArray(existing)) {
123
+ const normalized = normalizeEndpoint(raw, now);
124
+ if (!normalized) continue;
125
+ const key = normalizeEndpointKey(normalized.url, normalized.method);
126
+ seen.set(key, merged.length);
127
+ merged.push(normalized);
128
+ }
129
+
130
+ for (const raw of asArray(incoming)) {
131
+ const key = normalizeEndpointKey(raw?.url || raw, raw?.method);
132
+ const idx = seen.get(key);
133
+ const base = idx !== undefined ? merged[idx] : {};
134
+ const normalized = normalizeEndpoint(raw, now, base);
135
+ if (!normalized) continue;
136
+
137
+ if (idx !== undefined) {
138
+ merged[idx] = {
139
+ ...base,
140
+ ...normalized,
141
+ firstSeenAt: base.firstSeenAt || normalized.firstSeenAt || now,
142
+ lastSeenAt: normalized.lastSeenAt || now,
143
+ confidence: normalized.confidence ?? base.confidence ?? null
144
+ };
145
+ } else {
146
+ seen.set(key, merged.length);
147
+ merged.push(normalized);
148
+ }
149
+ }
150
+
151
+ const sorted = merged
152
+ .sort((a, b) => {
153
+ const aTime = new Date(a.lastSeenAt || a.firstSeenAt || 0).getTime();
154
+ const bTime = new Date(b.lastSeenAt || b.firstSeenAt || 0).getTime();
155
+ return bTime - aTime;
156
+ })
157
+ .slice(0, MAX_ENDPOINTS);
158
+
159
+ const discoverApis = sorted
160
+ .filter((entry) => entry.originTool === 'discover_apis')
161
+ .slice(0, MAX_DISCOVER_APIS_ENDPOINTS);
162
+ const others = sorted.filter((entry) => entry.originTool !== 'discover_apis');
163
+
164
+ return [...others, ...discoverApis]
165
+ .sort((a, b) => {
166
+ const aTime = new Date(a.lastSeenAt || a.firstSeenAt || 0).getTime();
167
+ const bTime = new Date(b.lastSeenAt || b.firstSeenAt || 0).getTime();
168
+ return bTime - aTime;
169
+ })
170
+ .slice(0, MAX_ENDPOINTS);
171
+ }
172
+
173
+ function normalizeKnownPath(pathEntry, now, fallback = {}) {
174
+ if (!pathEntry) return null;
175
+ const rawPath = typeof pathEntry === 'string' ? pathEntry : pathEntry.path;
176
+ if (!rawPath) return null;
177
+
178
+ const sourceKey = pathEntry.sourceKey ?? fallback.sourceKey ?? null;
179
+ const originTool = pathEntry.originTool ?? fallback.originTool ?? null;
180
+ const firstSeenAt = pathEntry.firstSeenAt || fallback.firstSeenAt || now;
181
+ const lastSeenAt = pathEntry.lastSeenAt || fallback.lastSeenAt || now;
182
+
183
+ return stripUndefined({
184
+ path: rawPath,
185
+ type: pathEntry.type ?? fallback.type ?? 'unknown',
186
+ example: pathEntry.example ?? fallback.example ?? null,
187
+ sourceKey,
188
+ originTool,
189
+ confidence: pathEntry.confidence ?? fallback.confidence ?? null,
190
+ firstSeenAt,
191
+ lastSeenAt
192
+ });
193
+ }
194
+
195
+ function knownPathKey(pathEntry) {
196
+ return `${pathEntry.path}::${pathEntry.sourceKey || ''}`;
197
+ }
198
+
199
+ function mergeStableSelectors(existing, incoming) {
200
+ const seen = new Map();
201
+ const merged = [];
202
+
203
+ for (const s of asArray(existing)) {
204
+ if (!s || !s.selector) continue;
205
+ seen.set(s.selector, merged.length);
206
+ merged.push(s);
207
+ }
208
+
209
+ for (const s of asArray(incoming)) {
210
+ if (!s || !s.selector) continue;
211
+ const idx = seen.get(s.selector);
212
+ if (idx !== undefined) {
213
+ // Gelen entry daha güncel — güncelle
214
+ merged[idx] = { ...merged[idx], ...s };
215
+ } else {
216
+ seen.set(s.selector, merged.length);
217
+ merged.push(s);
218
+ }
219
+ }
220
+
221
+ return merged;
222
+ }
223
+
224
+ function mergeKnownPaths(existing, incoming, now = isoNow()) {
225
+ const merged = [];
226
+ const seen = new Map();
227
+
228
+ for (const raw of asArray(existing)) {
229
+ const normalized = normalizeKnownPath(raw, now);
230
+ if (!normalized) continue;
231
+ const key = knownPathKey(normalized);
232
+ seen.set(key, merged.length);
233
+ merged.push(normalized);
234
+ }
235
+
236
+ for (const raw of asArray(incoming)) {
237
+ const normalized = normalizeKnownPath(raw, now);
238
+ if (!normalized) continue;
239
+ const key = knownPathKey(normalized);
240
+ const idx = seen.get(key);
241
+ if (idx !== undefined) {
242
+ const base = merged[idx];
243
+ merged[idx] = {
244
+ ...base,
245
+ ...normalized,
246
+ firstSeenAt: base.firstSeenAt || normalized.firstSeenAt || now,
247
+ lastSeenAt: normalized.lastSeenAt || now
248
+ };
249
+ } else {
250
+ seen.set(key, merged.length);
251
+ merged.push(normalized);
252
+ }
253
+ }
254
+
255
+ const sorted = merged
256
+ .sort((a, b) => {
257
+ const scoreA = Number(Boolean(a.confidence)) * 100 + (a.path?.length || 0);
258
+ const scoreB = Number(Boolean(b.confidence)) * 100 + (b.path?.length || 0);
259
+ return scoreB - scoreA;
260
+ })
261
+ .slice(0, MAX_KNOWN_PATHS);
262
+
263
+ const extractDataPaths = sorted
264
+ .filter((entry) => entry.originTool === 'extract_data')
265
+ .slice(0, MAX_EXTRACT_DATA_KNOWN_PATHS);
266
+ const others = sorted.filter((entry) => entry.originTool !== 'extract_data');
267
+
268
+ return [...others, ...extractDataPaths]
269
+ .sort((a, b) => {
270
+ const scoreA = Number(Boolean(a.confidence)) * 100 + (a.path?.length || 0);
271
+ const scoreB = Number(Boolean(b.confidence)) * 100 + (b.path?.length || 0);
272
+ return scoreB - scoreA;
273
+ })
274
+ .slice(0, MAX_KNOWN_PATHS);
275
+ }
276
+
277
+ function normalizeDataSource(source, now, fallback = {}) {
278
+ if (!source?.key) return null;
279
+
280
+ const samplePaths = asArray(source.samplePaths || source.leafPaths || fallback.samplePaths)
281
+ .map((entry) => {
282
+ if (!entry) return null;
283
+ if (typeof entry === 'string') return { path: entry };
284
+ if (!entry.path) return null;
285
+ return stripUndefined({
286
+ path: entry.path,
287
+ type: entry.type ?? null,
288
+ example: entry.example ?? null,
289
+ confidence: entry.confidence ?? null
290
+ });
291
+ })
292
+ .filter(Boolean)
293
+ .slice(0, MAX_SAMPLE_PATHS);
294
+
295
+ const topLevelKeys = asArray(source.topLevelKeys || fallback.topLevelKeys).slice(0, 30);
296
+
297
+ return stripUndefined({
298
+ key: source.key,
299
+ type: source.type ?? fallback.type ?? 'unknown',
300
+ isArray: source.isArray ?? fallback.isArray ?? false,
301
+ arrayLength: source.arrayLength ?? fallback.arrayLength ?? null,
302
+ topLevelKeyCount: source.topLevelKeyCount ?? fallback.topLevelKeyCount ?? null,
303
+ topLevelKeys,
304
+ leafPathCount: source.leafPathCount ?? source.leafPaths?.length ?? fallback.leafPathCount ?? null,
305
+ samplePaths,
306
+ capturedAt: source.capturedAt ?? fallback.capturedAt ?? now
307
+ });
308
+ }
309
+
310
+ function mergeDataSchema(existing, incoming, now = isoNow()) {
311
+ const existingSources = asArray(existing?.sources);
312
+ const incomingSources = asArray(incoming?.sources);
313
+ if (existingSources.length === 0 && incomingSources.length === 0) return null;
314
+
315
+ const merged = [];
316
+ const seen = new Map();
317
+
318
+ for (const raw of existingSources) {
319
+ const normalized = normalizeDataSource(raw, now);
320
+ if (!normalized) continue;
321
+ seen.set(normalized.key, merged.length);
322
+ merged.push(normalized);
323
+ }
324
+
325
+ for (const raw of incomingSources) {
326
+ const idx = seen.get(raw?.key);
327
+ const base = idx !== undefined ? merged[idx] : {};
328
+ const normalized = normalizeDataSource(raw, now, base);
329
+ if (!normalized) continue;
330
+
331
+ if (idx !== undefined) {
332
+ merged[idx] = {
333
+ ...base,
334
+ ...normalized,
335
+ capturedAt: normalized.capturedAt || base.capturedAt || now
336
+ };
337
+ } else {
338
+ seen.set(normalized.key, merged.length);
339
+ merged.push(normalized);
340
+ }
341
+ }
342
+
343
+ return {
344
+ sources: merged.sort((a, b) => (b.leafPathCount || 0) - (a.leafPathCount || 0))
345
+ };
346
+ }
347
+
348
+ function mergeAutoNotes(existing, incoming) {
349
+ const merged = [...asArray(existing)];
350
+ const keyFor = (note) => `${note.source || note.tool || 'unknown'}::${note.kind || 'summary'}`;
351
+ const seen = new Map(merged.map((note, index) => [keyFor(note), index]));
352
+
353
+ for (const note of asArray(incoming)) {
354
+ const normalized = {
355
+ source: note.source || note.tool || 'unknown',
356
+ kind: note.kind || 'summary',
357
+ timestamp: note.timestamp || isoNow(),
358
+ text: note.text || ''
359
+ };
360
+ const key = keyFor(normalized);
361
+ if (seen.has(key)) {
362
+ merged[seen.get(key)] = normalized;
363
+ } else {
364
+ seen.set(key, merged.length);
365
+ merged.push(normalized);
366
+ }
367
+ }
368
+
369
+ return merged.slice(-MAX_AUTO_NOTES);
370
+ }
371
+
372
+ function mergeFieldTimestamps(existing, profileData, now) {
373
+ const merged = {
374
+ ...asObject(existing)
375
+ };
376
+
377
+ if (profileData.fieldTimestamps) {
378
+ Object.assign(merged, profileData.fieldTimestamps);
379
+ }
380
+
381
+ for (const [field, aliases] of Object.entries(PROFILE_FIELD_ALIASES)) {
382
+ if (aliases.some((alias) => profileData[alias] !== undefined)) {
383
+ merged[field] = now;
384
+ }
385
+ }
386
+
387
+ return merged;
388
+ }
389
+
390
+ function mergeProfileMeta(existing, profileData, now) {
391
+ const previous = asObject(existing);
392
+ const current = asObject(profileData.profileMeta);
393
+
394
+ return {
395
+ createdAt: previous.createdAt || current.createdAt || now,
396
+ lastUpdated: now,
397
+ lastAnalyzedAt: current.lastAnalyzedAt ?? previous.lastAnalyzedAt ?? null,
398
+ lastDiscoveryAt: current.lastDiscoveryAt ?? previous.lastDiscoveryAt ?? null,
399
+ lastDriftCheckAt: current.lastDriftCheckAt ?? previous.lastDriftCheckAt ?? null
400
+ };
401
+ }
402
+
403
+ export function getProfileFieldTimestamp(profile, field) {
404
+ if (!profile) return null;
405
+ return profile.fieldTimestamps?.[field] || null;
406
+ }
407
+
408
+ export function isProfileFieldFresh(profile, field, maxAgeMs) {
409
+ const ts = getProfileFieldTimestamp(profile, field);
410
+ if (!ts) return false;
411
+ return Date.now() - new Date(ts).getTime() < maxAgeMs;
412
+ }
413
+
414
+ function normalizeProfile(raw) {
415
+ if (!raw || typeof raw !== 'object') return null;
416
+
417
+ const now = raw.lastUpdated || raw.createdAt || isoNow();
418
+ const version = raw.version || 1;
419
+ const createdAt = raw.createdAt || now;
420
+ const lastUpdated = raw.lastUpdated || now;
421
+
422
+ const normalized = {
423
+ domain: raw.domain,
424
+ createdAt,
425
+ lastUpdated,
426
+ version,
427
+ notes: typeof raw.notes === 'string' ? raw.notes : '',
428
+ framework: asArray(raw.framework),
429
+ pageCharacteristics: raw.pageCharacteristics ?? null,
430
+ authInfo: raw.authInfo ?? null,
431
+ paginationPatterns: raw.paginationPatterns ?? null,
432
+ cspStatus: raw.cspStatus ?? null,
433
+ rateLimit: raw.rateLimit ?? null,
434
+ botProtection: asArray(raw.botProtection),
435
+ responseHeaderPatterns: asObject(raw.responseHeaderPatterns),
436
+ stableSelectors: asArray(raw.stableSelectors),
437
+ apiEndpoints: mergeEndpoints([], raw.apiEndpoints, lastUpdated),
438
+ knownPaths: mergeKnownPaths([], raw.knownPaths, lastUpdated),
439
+ dataSchema: mergeDataSchema(null, raw.dataSchema, lastUpdated),
440
+ autoNotes: mergeAutoNotes([], raw.autoNotes),
441
+ fieldTimestamps: mergeFieldTimestamps(raw.fieldTimestamps, {}, lastUpdated),
442
+ profileMeta: mergeProfileMeta(raw.profileMeta, { profileMeta: raw.profileMeta }, lastUpdated)
443
+ };
444
+
445
+ normalized.profileMeta.createdAt = createdAt;
446
+ normalized.profileMeta.lastUpdated = lastUpdated;
447
+
448
+ if (!normalized.fieldTimestamps.framework && normalized.framework.length > 0) {
449
+ normalized.fieldTimestamps.framework = lastUpdated;
450
+ }
451
+ if (!normalized.fieldTimestamps.apiEndpoints && normalized.apiEndpoints.length > 0) {
452
+ normalized.fieldTimestamps.apiEndpoints = lastUpdated;
453
+ }
454
+ if (!normalized.fieldTimestamps.knownPaths && normalized.knownPaths.length > 0) {
455
+ normalized.fieldTimestamps.knownPaths = lastUpdated;
456
+ }
457
+ if (!normalized.fieldTimestamps.dataSchema && normalized.dataSchema?.sources?.length) {
458
+ normalized.fieldTimestamps.dataSchema = lastUpdated;
459
+ }
460
+
461
+ return normalized;
462
+ }
463
+
464
+ export function saveProfile(domain, profileData) {
465
+ ensureProfilesDir();
466
+
467
+ const existing = loadProfile(domain) || {};
468
+ const now = isoNow();
469
+
470
+ const profile = {
471
+ domain,
472
+ createdAt: existing.createdAt || now,
473
+ lastUpdated: now,
474
+ version: Math.max(existing.version || 0, PROFILE_VERSION - 1) + 1,
475
+ notes: profileData.notes !== undefined && profileData.notes !== null
476
+ ? profileData.notes
477
+ : (existing.notes ?? ''),
478
+ framework: profileData.framework ?? existing.framework ?? [],
479
+ pageCharacteristics: profileData.pageCharacteristics ?? existing.pageCharacteristics ?? null,
480
+ authInfo: profileData.authInfo ?? existing.authInfo ?? null,
481
+ paginationPatterns: profileData.paginationPatterns ?? existing.paginationPatterns ?? null,
482
+ cspStatus: profileData.cspStatus ?? existing.cspStatus ?? null,
483
+ rateLimit: profileData.rateLimit ?? existing.rateLimit ?? null,
484
+ botProtection: profileData.botProtection ?? existing.botProtection ?? [],
485
+ responseHeaderPatterns: profileData.responseHeaderPatterns ?? existing.responseHeaderPatterns ?? {},
486
+ stableSelectors: profileData.stableSelectors !== undefined
487
+ ? mergeStableSelectors(existing.stableSelectors, profileData.stableSelectors)
488
+ : (existing.stableSelectors ?? []),
489
+ apiEndpoints: profileData.apiEndpoints !== undefined
490
+ ? mergeEndpoints(existing.apiEndpoints, profileData.apiEndpoints, now)
491
+ : (existing.apiEndpoints ?? []),
492
+ knownPaths: profileData.knownPaths !== undefined
493
+ ? mergeKnownPaths(existing.knownPaths, profileData.knownPaths, now)
494
+ : (existing.knownPaths ?? []),
495
+ dataSchema: profileData.dataSchema !== undefined
496
+ ? mergeDataSchema(existing.dataSchema, profileData.dataSchema, now)
497
+ : (existing.dataSchema ?? null),
498
+ autoNotes: mergeAutoNotes(existing.autoNotes, profileData.autoNotes),
499
+ fieldTimestamps: mergeFieldTimestamps(existing.fieldTimestamps, profileData, now),
500
+ profileMeta: mergeProfileMeta(existing.profileMeta, profileData, now)
501
+ };
502
+
503
+ profile.profileMeta.createdAt = existing.profileMeta?.createdAt || existing.createdAt || now;
504
+ profile.profileMeta.lastUpdated = now;
505
+ profile.fieldTimestamps = {
506
+ ...profile.fieldTimestamps,
507
+ notes: profileData.notes !== undefined && profileData.notes !== null
508
+ ? now
509
+ : profile.fieldTimestamps.notes
510
+ };
511
+
512
+ writeFileSync(profilePath(domain), JSON.stringify(profile, null, 2), 'utf-8');
513
+ return profile;
514
+ }
515
+
516
+ export function loadProfile(domain) {
517
+ const p = profilePath(domain);
518
+ if (!existsSync(p)) return null;
519
+
520
+ try {
521
+ return normalizeProfile(JSON.parse(readFileSync(p, 'utf-8')));
522
+ } catch {
523
+ return null;
524
+ }
525
+ }
526
+
527
+ /**
528
+ * Get SSR-related window.__*__PROPS paths from saved profile.
529
+ * Used by get_network_trace as fallback when no API match found.
530
+ */
531
+ export function getSavedSsrPaths(domain) {
532
+ const profile = loadProfile(domain);
533
+ if (!profile?.dataSchema?.sources) return [];
534
+ return profile.dataSchema.sources.filter(s => s.key && s.key.startsWith('__'));
535
+ }
536
+
537
+ export function listProfiles() {
538
+ ensureProfilesDir();
539
+
540
+ try {
541
+ return readdirSync(PROFILES_DIR)
542
+ .filter((file) => file.endsWith('.json'))
543
+ .map((file) => {
544
+ try {
545
+ const raw = normalizeProfile(JSON.parse(readFileSync(join(PROFILES_DIR, file), 'utf-8')));
546
+ return {
547
+ domain: raw.domain,
548
+ lastUpdated: raw.lastUpdated,
549
+ version: raw.version,
550
+ knownPathCount: asArray(raw.knownPaths).length,
551
+ apiEndpointCount: asArray(raw.apiEndpoints).length,
552
+ dataSourceCount: asArray(raw.dataSchema?.sources).length,
553
+ hasNotes: Boolean(raw.notes && raw.notes.trim())
554
+ };
555
+ } catch {
556
+ return null;
557
+ }
558
+ })
559
+ .filter(Boolean);
560
+ } catch {
561
+ return [];
562
+ }
563
+ }