cbrowser 18.34.2 → 18.36.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/analysis/accessibility-empathy.d.ts.map +1 -1
  2. package/dist/analysis/accessibility-empathy.js +57 -8
  3. package/dist/analysis/accessibility-empathy.js.map +1 -1
  4. package/dist/analysis/page-understanding.d.ts +118 -0
  5. package/dist/analysis/page-understanding.d.ts.map +1 -0
  6. package/dist/analysis/page-understanding.js +940 -0
  7. package/dist/analysis/page-understanding.js.map +1 -0
  8. package/dist/browser/index.d.ts +2 -0
  9. package/dist/browser/index.d.ts.map +1 -1
  10. package/dist/browser/index.js +1 -0
  11. package/dist/browser/index.js.map +1 -1
  12. package/dist/browser/site-profile-manager.d.ts +116 -0
  13. package/dist/browser/site-profile-manager.d.ts.map +1 -0
  14. package/dist/browser/site-profile-manager.js +495 -0
  15. package/dist/browser/site-profile-manager.js.map +1 -0
  16. package/dist/cognitive/goal-decomposer.d.ts +127 -0
  17. package/dist/cognitive/goal-decomposer.d.ts.map +1 -0
  18. package/dist/cognitive/goal-decomposer.js +902 -0
  19. package/dist/cognitive/goal-decomposer.js.map +1 -0
  20. package/dist/cognitive/goal-types.d.ts +140 -0
  21. package/dist/cognitive/goal-types.d.ts.map +1 -0
  22. package/dist/cognitive/goal-types.js +136 -0
  23. package/dist/cognitive/goal-types.js.map +1 -0
  24. package/dist/cognitive/index.d.ts +2 -0
  25. package/dist/cognitive/index.d.ts.map +1 -1
  26. package/dist/cognitive/index.js +5 -0
  27. package/dist/cognitive/index.js.map +1 -1
  28. package/dist/mcp-tools/base/audit-tools.d.ts.map +1 -1
  29. package/dist/mcp-tools/base/audit-tools.js +5 -2
  30. package/dist/mcp-tools/base/audit-tools.js.map +1 -1
  31. package/dist/mcp-tools/base/cognitive-tools.d.ts.map +1 -1
  32. package/dist/mcp-tools/base/cognitive-tools.js +72 -2
  33. package/dist/mcp-tools/base/cognitive-tools.js.map +1 -1
  34. package/dist/mcp-tools/base/index.d.ts +4 -2
  35. package/dist/mcp-tools/base/index.d.ts.map +1 -1
  36. package/dist/mcp-tools/base/index.js +7 -2
  37. package/dist/mcp-tools/base/index.js.map +1 -1
  38. package/dist/mcp-tools/base/interaction-tools.d.ts.map +1 -1
  39. package/dist/mcp-tools/base/interaction-tools.js +23 -0
  40. package/dist/mcp-tools/base/interaction-tools.js.map +1 -1
  41. package/dist/mcp-tools/base/navigation-tools.d.ts.map +1 -1
  42. package/dist/mcp-tools/base/navigation-tools.js +13 -0
  43. package/dist/mcp-tools/base/navigation-tools.js.map +1 -1
  44. package/dist/mcp-tools/base/site-knowledge-tools.d.ts +15 -0
  45. package/dist/mcp-tools/base/site-knowledge-tools.d.ts.map +1 -0
  46. package/dist/mcp-tools/base/site-knowledge-tools.js +314 -0
  47. package/dist/mcp-tools/base/site-knowledge-tools.js.map +1 -0
  48. package/dist/mcp-tools/index.d.ts +6 -6
  49. package/dist/mcp-tools/index.d.ts.map +1 -1
  50. package/dist/mcp-tools/index.js +7 -7
  51. package/dist/mcp-tools/index.js.map +1 -1
  52. package/dist/personas.d.ts.map +1 -1
  53. package/dist/personas.js +369 -0
  54. package/dist/personas.js.map +1 -1
  55. package/dist/site-model/manager.d.ts +161 -0
  56. package/dist/site-model/manager.d.ts.map +1 -0
  57. package/dist/site-model/manager.js +825 -0
  58. package/dist/site-model/manager.js.map +1 -0
  59. package/dist/site-model/types.d.ts +108 -0
  60. package/dist/site-model/types.d.ts.map +1 -0
  61. package/dist/site-model/types.js +10 -0
  62. package/dist/site-model/types.js.map +1 -0
  63. package/dist/types.d.ts +18 -0
  64. package/dist/types.d.ts.map +1 -1
  65. package/dist/types.js.map +1 -1
  66. package/package.json +1 -1
@@ -0,0 +1,825 @@
1
+ /**
2
+ * CBrowser - Site Model Manager
3
+ * Persistent knowledge graph per site with incremental updates,
4
+ * write-coalescing, data decay, and size-capped storage.
5
+ *
6
+ * @copyright 2026 Alexandria Eden alexandria.shai.eden@gmail.com https://cbrowser.ai
7
+ * @license MIT
8
+ * @since v18.35.0
9
+ */
10
+ import * as fs from "fs/promises";
11
+ import * as path from "path";
12
+ import * as os from "os";
13
+ import * as crypto from "crypto";
14
+ export class SiteModelManager {
15
+ modelDir;
16
+ models;
17
+ dirty;
18
+ writeTimer;
19
+ /** Singleton instance for process-wide sharing */
20
+ static _instance = null;
21
+ /** Get or create the singleton instance */
22
+ static getInstance(dataDir) {
23
+ if (!SiteModelManager._instance) {
24
+ SiteModelManager._instance = new SiteModelManager(dataDir);
25
+ }
26
+ return SiteModelManager._instance;
27
+ }
28
+ /** Maximum JSON size per domain model file */
29
+ static MAX_MODEL_SIZE = 500 * 1024; // 500KB
30
+ /** Data older than this is subject to decay/pruning */
31
+ static DECAY_THRESHOLD_MS = 30 * 24 * 60 * 60 * 1000; // 30 days
32
+ /** Write-coalescing debounce interval */
33
+ static WRITE_DEBOUNCE_MS = 1000; // 1 second
34
+ /** Minimum reliability/confidence before an entry is prunable */
35
+ static MIN_CONFIDENCE = 0.1;
36
+ /** Maximum goal paths stored per domain */
37
+ static MAX_GOAL_PATHS = 50;
38
+ /** Maximum failure patterns stored per domain */
39
+ static MAX_FAILURE_PATTERNS = 100;
40
+ /** Failure pattern expiry */
41
+ static FAILURE_EXPIRY_MS = 90 * 24 * 60 * 60 * 1000; // 90 days
42
+ constructor(dataDir) {
43
+ const baseDir = dataDir ||
44
+ process.env.CBROWSER_DATA_DIR ||
45
+ path.join(os.homedir(), ".cbrowser");
46
+ this.modelDir = path.join(baseDir, "site-models");
47
+ this.models = new Map();
48
+ this.dirty = new Set();
49
+ this.writeTimer = null;
50
+ }
51
+ // ---------------------------------------------------------------------------
52
+ // Public API: Loading
53
+ // ---------------------------------------------------------------------------
54
+ /**
55
+ * Lazy-load a site model from disk. If no file exists, creates an empty model.
56
+ * On load, stale data is pruned automatically.
57
+ */
58
+ async loadModel(domain) {
59
+ const normalized = this.normalizeDomain(domain);
60
+ const cached = this.models.get(normalized);
61
+ if (cached)
62
+ return cached;
63
+ await fs.mkdir(this.modelDir, { recursive: true });
64
+ const filePath = this.modelFilePath(normalized);
65
+ let model;
66
+ try {
67
+ const raw = await fs.readFile(filePath, "utf-8");
68
+ model = JSON.parse(raw);
69
+ // Ensure domain field matches (handles renames/moves)
70
+ model.domain = normalized;
71
+ }
72
+ catch {
73
+ model = this.createEmptyModel(normalized);
74
+ }
75
+ // Prune stale entries on load
76
+ this.pruneStaleData(normalized, model);
77
+ this.models.set(normalized, model);
78
+ return model;
79
+ }
80
+ /**
81
+ * Get model from cache or create empty one in memory (synchronous).
82
+ * This ensures record methods work without a prior loadModel() call.
83
+ */
84
+ getOrCreateModel(domain) {
85
+ const normalized = this.normalizeDomain(domain);
86
+ let model = this.models.get(normalized);
87
+ if (!model) {
88
+ model = this.createEmptyModel(normalized);
89
+ this.models.set(normalized, model);
90
+ // Trigger async load from disk to merge any existing data
91
+ this.loadModel(normalized).catch(() => { });
92
+ }
93
+ return model;
94
+ }
95
+ // ---------------------------------------------------------------------------
96
+ // Public API: Recording
97
+ // ---------------------------------------------------------------------------
98
+ /**
99
+ * Record a successful navigation from one page to another via a specific element.
100
+ */
101
+ recordNavigation(domain, fromUrl, toUrl, elementSelector, elementText) {
102
+ const normalized = this.normalizeDomain(domain);
103
+ const model = this.getOrCreateModel(normalized);
104
+ const now = Date.now();
105
+ const fromPath = this.normalizeUrl(fromUrl);
106
+ const toPath = this.normalizeUrl(toUrl);
107
+ // Upsert fromUrl node
108
+ if (!model.navigation.nodes[fromPath]) {
109
+ model.navigation.nodes[fromPath] = {
110
+ url: fromPath,
111
+ lastVisited: now,
112
+ visitCount: 1,
113
+ };
114
+ }
115
+ else {
116
+ model.navigation.nodes[fromPath].lastVisited = now;
117
+ model.navigation.nodes[fromPath].visitCount++;
118
+ }
119
+ // Upsert toUrl node
120
+ if (!model.navigation.nodes[toPath]) {
121
+ model.navigation.nodes[toPath] = {
122
+ url: toPath,
123
+ lastVisited: now,
124
+ visitCount: 1,
125
+ };
126
+ }
127
+ else {
128
+ model.navigation.nodes[toPath].lastVisited = now;
129
+ model.navigation.nodes[toPath].visitCount++;
130
+ }
131
+ // Find or create edge
132
+ const edge = model.navigation.edges.find((e) => e.fromUrl === fromPath &&
133
+ e.toUrl === toPath &&
134
+ e.elementSelector === elementSelector);
135
+ if (edge) {
136
+ edge.successCount++;
137
+ edge.lastUsed = now;
138
+ edge.elementText = elementText; // Update text in case it changed
139
+ edge.reliability =
140
+ edge.successCount / (edge.successCount + edge.failureCount);
141
+ }
142
+ else {
143
+ model.navigation.edges.push({
144
+ fromUrl: fromPath,
145
+ toUrl: toPath,
146
+ elementSelector,
147
+ elementText,
148
+ successCount: 1,
149
+ failureCount: 0,
150
+ lastUsed: now,
151
+ reliability: 1.0,
152
+ });
153
+ }
154
+ model.lastUpdated = new Date().toISOString();
155
+ this.markDirty(normalized);
156
+ }
157
+ /**
158
+ * Record the result (success/failure) of interacting with a specific element.
159
+ */
160
+ recordElementResult(domain, pageUrl, selector, success) {
161
+ const normalized = this.normalizeDomain(domain);
162
+ const model = this.getOrCreateModel(normalized);
163
+ const pagePattern = this.normalizeUrl(pageUrl);
164
+ const key = `${pagePattern}::${selector}`;
165
+ let entry = model.elements[key];
166
+ if (!entry) {
167
+ entry = {
168
+ selector,
169
+ domain: normalized,
170
+ pageUrlPattern: pagePattern,
171
+ successRate: 0,
172
+ totalAttempts: 0,
173
+ alternatives: [],
174
+ lastVerified: Date.now(),
175
+ decayFactor: 1.0,
176
+ };
177
+ model.elements[key] = entry;
178
+ }
179
+ entry.totalAttempts++;
180
+ // Recalculate success rate incrementally
181
+ const previousSuccesses = Math.round(entry.successRate * (entry.totalAttempts - 1));
182
+ const newSuccesses = previousSuccesses + (success ? 1 : 0);
183
+ entry.successRate = newSuccesses / entry.totalAttempts;
184
+ entry.lastVerified = Date.now();
185
+ entry.decayFactor = 1.0; // Reset decay on fresh interaction
186
+ model.lastUpdated = new Date().toISOString();
187
+ this.markDirty(normalized);
188
+ }
189
+ /**
190
+ * Record a completed goal path attempt (success or failure).
191
+ * Matches existing paths by goalType + keyword overlap before creating new entries.
192
+ */
193
+ recordGoalPath(domain, goalDescription, goalType, actions, success, steps, persona) {
194
+ const normalized = this.normalizeDomain(domain);
195
+ const model = this.getOrCreateModel(normalized);
196
+ const now = Date.now();
197
+ // Try to find existing path with fuzzy match
198
+ const existing = this.findSimilarGoalPath(model, goalDescription, goalType);
199
+ if (existing) {
200
+ // Update existing path stats
201
+ existing.attemptCount++;
202
+ const previousSuccesses = Math.round(existing.successRate * (existing.attemptCount - 1));
203
+ existing.successRate =
204
+ (previousSuccesses + (success ? 1 : 0)) / existing.attemptCount;
205
+ existing.averageSteps =
206
+ (existing.averageSteps * (existing.attemptCount - 1) + steps) /
207
+ existing.attemptCount;
208
+ existing.lastUsed = now;
209
+ // Update action sequence if this attempt was successful (prefer successful paths)
210
+ if (success) {
211
+ existing.actionSequence = actions;
212
+ }
213
+ // Record persona performance
214
+ if (persona) {
215
+ existing.personaPerformance[persona] = { success, steps };
216
+ }
217
+ }
218
+ else {
219
+ // Create new goal path
220
+ const newPath = {
221
+ goalDescription,
222
+ goalType,
223
+ actionSequence: actions,
224
+ successRate: success ? 1.0 : 0.0,
225
+ attemptCount: 1,
226
+ averageSteps: steps,
227
+ personaPerformance: persona
228
+ ? { [persona]: { success, steps } }
229
+ : {},
230
+ lastUsed: now,
231
+ };
232
+ model.goalPaths.push(newPath);
233
+ // Enforce cap: remove least useful paths if over limit
234
+ if (model.goalPaths.length > SiteModelManager.MAX_GOAL_PATHS) {
235
+ this.evictGoalPaths(model);
236
+ }
237
+ }
238
+ model.lastUpdated = new Date().toISOString();
239
+ this.markDirty(normalized);
240
+ }
241
+ /**
242
+ * Record a failure pattern for a page/element combination.
243
+ */
244
+ recordFailure(domain, pageUrl, selector, failureType, conditions) {
245
+ const normalized = this.normalizeDomain(domain);
246
+ const model = this.getOrCreateModel(normalized);
247
+ const now = Date.now();
248
+ const pagePattern = this.normalizeUrl(pageUrl);
249
+ // Find existing pattern matching page + failure type + selector
250
+ const existing = model.failures.find((f) => f.pageUrlPattern === pagePattern &&
251
+ f.failureType === failureType &&
252
+ f.elementSelector === selector);
253
+ if (existing) {
254
+ existing.frequency++;
255
+ existing.lastSeen = now;
256
+ // Merge new conditions (deduplicated)
257
+ for (const cond of conditions) {
258
+ if (!existing.conditions.includes(cond)) {
259
+ existing.conditions.push(cond);
260
+ }
261
+ }
262
+ }
263
+ else {
264
+ const pattern = {
265
+ pageUrlPattern: pagePattern,
266
+ elementSelector: selector,
267
+ failureType,
268
+ frequency: 1,
269
+ conditions: [...conditions],
270
+ lastSeen: now,
271
+ };
272
+ model.failures.push(pattern);
273
+ // Enforce cap
274
+ if (model.failures.length > SiteModelManager.MAX_FAILURE_PATTERNS) {
275
+ this.evictFailurePatterns(model);
276
+ }
277
+ }
278
+ // Also record as a failed navigation edge if selector is provided
279
+ if (selector) {
280
+ const key = `${pagePattern}::${selector}`;
281
+ const elem = model.elements[key];
282
+ if (elem) {
283
+ elem.totalAttempts++;
284
+ const previousSuccesses = Math.round(elem.successRate * (elem.totalAttempts - 1));
285
+ elem.successRate = previousSuccesses / elem.totalAttempts;
286
+ elem.lastVerified = now;
287
+ }
288
+ }
289
+ model.lastUpdated = new Date().toISOString();
290
+ this.markDirty(normalized);
291
+ }
292
+ /**
293
+ * Update or create a page fingerprint for structural change detection.
294
+ */
295
+ updateFingerprint(domain, urlPattern, headingHash, formCount, navLinkCount, ctaCount, pageType) {
296
+ const normalized = this.normalizeDomain(domain);
297
+ const model = this.getOrCreateModel(normalized);
298
+ const normalizedPattern = this.normalizeUrl(urlPattern);
299
+ const existing = model.fingerprints[normalizedPattern];
300
+ if (existing) {
301
+ // If heading structure changed, decay element reliability for this page
302
+ if (existing.headingStructureHash !== headingHash) {
303
+ this.decayElementsForPage(model, normalizedPattern);
304
+ }
305
+ }
306
+ model.fingerprints[normalizedPattern] = {
307
+ urlPattern: normalizedPattern,
308
+ headingStructureHash: headingHash,
309
+ formCount,
310
+ navLinkCount,
311
+ ctaCount,
312
+ pageType,
313
+ lastSeen: Date.now(),
314
+ };
315
+ model.lastUpdated = new Date().toISOString();
316
+ this.markDirty(normalized);
317
+ }
318
+ // ---------------------------------------------------------------------------
319
+ // Public API: Querying
320
+ // ---------------------------------------------------------------------------
321
+ /**
322
+ * Find the best known path for a given goal type.
323
+ * Weights by successRate * attemptCount to favor frequently successful paths.
324
+ */
325
+ queryBestPath(domain, goalType) {
326
+ const normalized = this.normalizeDomain(domain);
327
+ const model = this.models.get(normalized);
328
+ if (!model)
329
+ return null;
330
+ const matching = model.goalPaths.filter((p) => p.goalType === goalType);
331
+ if (matching.length === 0)
332
+ return null;
333
+ matching.sort((a, b) => {
334
+ const scoreA = a.successRate * a.attemptCount;
335
+ const scoreB = b.successRate * b.attemptCount;
336
+ return scoreB - scoreA;
337
+ });
338
+ return matching[0];
339
+ }
340
+ /**
341
+ * Get the reliability score for a specific selector on a domain.
342
+ * Returns the success rate multiplied by decay factor. Returns 0 if unknown.
343
+ */
344
+ queryElementReliability(domain, selector) {
345
+ const normalized = this.normalizeDomain(domain);
346
+ const model = this.models.get(normalized);
347
+ if (!model)
348
+ return 0;
349
+ // Search across all page patterns for this selector
350
+ for (const [_key, entry] of Object.entries(model.elements)) {
351
+ if (entry.selector === selector && entry.domain === normalized) {
352
+ return entry.successRate * entry.decayFactor;
353
+ }
354
+ }
355
+ return 0;
356
+ }
357
+ /**
358
+ * Get all known navigation edges departing from a given URL.
359
+ * Sorted by reliability descending.
360
+ */
361
+ queryNavigationTargets(domain, fromUrl) {
362
+ const normalized = this.normalizeDomain(domain);
363
+ const model = this.models.get(normalized);
364
+ if (!model)
365
+ return [];
366
+ const fromPath = this.normalizeUrl(fromUrl);
367
+ const edges = model.navigation.edges.filter((e) => e.fromUrl === fromPath);
368
+ return edges.sort((a, b) => b.reliability - a.reliability);
369
+ }
370
+ // ---------------------------------------------------------------------------
371
+ // Public API: Maintenance
372
+ // ---------------------------------------------------------------------------
373
+ /**
374
+ * Prune stale data from a domain's model. Can operate on a provided model
375
+ * (used during load) or look up the cached model by domain.
376
+ */
377
+ pruneStaleData(domain, targetModel) {
378
+ const normalized = this.normalizeDomain(domain);
379
+ const model = targetModel || this.models.get(normalized);
380
+ const result = {
381
+ edgesRemoved: 0,
382
+ elementsRemoved: 0,
383
+ pathsRemoved: 0,
384
+ failuresRemoved: 0,
385
+ fingerprintsRemoved: 0,
386
+ };
387
+ if (!model)
388
+ return result;
389
+ const now = Date.now();
390
+ const decayThreshold = now - SiteModelManager.DECAY_THRESHOLD_MS;
391
+ const failureExpiry = now - SiteModelManager.FAILURE_EXPIRY_MS;
392
+ // --- Edges: decay old ones, remove unreliable ---
393
+ const edgesBefore = model.navigation.edges.length;
394
+ for (const edge of model.navigation.edges) {
395
+ if (edge.lastUsed < decayThreshold) {
396
+ edge.reliability *= 0.5; // Reduce reliability by 50%
397
+ }
398
+ }
399
+ model.navigation.edges = model.navigation.edges.filter((e) => e.reliability >= 0.05);
400
+ result.edgesRemoved = edgesBefore - model.navigation.edges.length;
401
+ // --- Elements: apply time-based decay, remove low-confidence ---
402
+ const elementKeys = Object.keys(model.elements);
403
+ for (const key of elementKeys) {
404
+ const elem = model.elements[key];
405
+ if (elem.lastVerified < decayThreshold) {
406
+ // Apply time-based decay proportional to staleness
407
+ const staleDays = (now - elem.lastVerified) / (24 * 60 * 60 * 1000);
408
+ elem.decayFactor = Math.max(0, elem.decayFactor - staleDays * 0.01);
409
+ }
410
+ if (elem.decayFactor < SiteModelManager.MIN_CONFIDENCE) {
411
+ delete model.elements[key];
412
+ result.elementsRemoved++;
413
+ }
414
+ }
415
+ // --- Goal paths: remove persistently failing ones ---
416
+ const pathsBefore = model.goalPaths.length;
417
+ model.goalPaths = model.goalPaths.filter((p) => !(p.successRate === 0 && p.attemptCount > 3));
418
+ result.pathsRemoved = pathsBefore - model.goalPaths.length;
419
+ // --- Failure patterns: remove expired ---
420
+ const failuresBefore = model.failures.length;
421
+ model.failures = model.failures.filter((f) => f.lastSeen >= failureExpiry);
422
+ result.failuresRemoved = failuresBefore - model.failures.length;
423
+ // --- Fingerprints: remove very old ones (use failure expiry as threshold) ---
424
+ const fpKeys = Object.keys(model.fingerprints);
425
+ for (const key of fpKeys) {
426
+ if (model.fingerprints[key].lastSeen < failureExpiry) {
427
+ delete model.fingerprints[key];
428
+ result.fingerprintsRemoved++;
429
+ }
430
+ }
431
+ // Clean up orphaned navigation nodes (nodes with no edges)
432
+ this.pruneOrphanedNodes(model);
433
+ if (!targetModel) {
434
+ // If operating on cached model, mark dirty so changes persist
435
+ const anyPruned = result.edgesRemoved > 0 ||
436
+ result.elementsRemoved > 0 ||
437
+ result.pathsRemoved > 0 ||
438
+ result.failuresRemoved > 0 ||
439
+ result.fingerprintsRemoved > 0;
440
+ if (anyPruned) {
441
+ this.markDirty(normalized);
442
+ }
443
+ }
444
+ return result;
445
+ }
446
+ /**
447
+ * Get comprehensive statistics for a domain's model.
448
+ */
449
+ async getModelStats(domain) {
450
+ const normalized = this.normalizeDomain(domain);
451
+ const model = await this.getModel(normalized);
452
+ // Compute model size
453
+ const json = JSON.stringify(model);
454
+ const sizeBytes = Buffer.byteLength(json, "utf-8");
455
+ // Find oldest data timestamp across all entities
456
+ let oldest = Infinity;
457
+ for (const node of Object.values(model.navigation.nodes)) {
458
+ if (node.lastVisited < oldest)
459
+ oldest = node.lastVisited;
460
+ }
461
+ for (const edge of model.navigation.edges) {
462
+ if (edge.lastUsed < oldest)
463
+ oldest = edge.lastUsed;
464
+ }
465
+ for (const elem of Object.values(model.elements)) {
466
+ if (elem.lastVerified < oldest)
467
+ oldest = elem.lastVerified;
468
+ }
469
+ for (const gp of model.goalPaths) {
470
+ if (gp.lastUsed < oldest)
471
+ oldest = gp.lastUsed;
472
+ }
473
+ for (const fp of Object.values(model.fingerprints)) {
474
+ if (fp.lastSeen < oldest)
475
+ oldest = fp.lastSeen;
476
+ }
477
+ return {
478
+ domain: normalized,
479
+ navigationNodes: Object.keys(model.navigation.nodes).length,
480
+ navigationEdges: model.navigation.edges.length,
481
+ trackedElements: Object.keys(model.elements).length,
482
+ goalPaths: model.goalPaths.length,
483
+ failurePatterns: model.failures.length,
484
+ pageFingerprints: Object.keys(model.fingerprints).length,
485
+ modelSizeBytes: sizeBytes,
486
+ lastUpdated: model.lastUpdated,
487
+ oldestData: oldest === Infinity
488
+ ? model.created
489
+ : new Date(oldest).toISOString(),
490
+ };
491
+ }
492
+ /**
493
+ * Flush all pending writes and clear the debounce timer.
494
+ * Call this on process shutdown to avoid data loss.
495
+ */
496
+ async shutdown() {
497
+ if (this.writeTimer) {
498
+ clearTimeout(this.writeTimer);
499
+ this.writeTimer = null;
500
+ }
501
+ await this.flushDirty();
502
+ }
503
+ // ---------------------------------------------------------------------------
504
+ // Private: Write Coalescing
505
+ // ---------------------------------------------------------------------------
506
+ /**
507
+ * Mark a domain's model as needing a write to disk.
508
+ * Starts a debounce timer if one isn't already running.
509
+ */
510
+ markDirty(domain) {
511
+ this.dirty.add(domain);
512
+ if (!this.writeTimer) {
513
+ this.writeTimer = setTimeout(async () => {
514
+ this.writeTimer = null;
515
+ await this.flushDirty();
516
+ }, SiteModelManager.WRITE_DEBOUNCE_MS);
517
+ }
518
+ }
519
+ /**
520
+ * Write all dirty models to disk. Enforces size cap before writing.
521
+ */
522
+ async flushDirty() {
523
+ const domains = [...this.dirty];
524
+ this.dirty.clear();
525
+ await fs.mkdir(this.modelDir, { recursive: true });
526
+ const writePromises = domains.map(async (domain) => {
527
+ const model = this.models.get(domain);
528
+ if (!model)
529
+ return;
530
+ // Enforce size cap before writing
531
+ this.enforceModelSizeCap(model);
532
+ const filePath = this.modelFilePath(domain);
533
+ const json = JSON.stringify(model, null, 2);
534
+ // Write atomically: write to temp file, then rename
535
+ const tmpPath = `${filePath}.tmp.${crypto.randomBytes(4).toString("hex")}`;
536
+ try {
537
+ await fs.writeFile(tmpPath, json, "utf-8");
538
+ await fs.rename(tmpPath, filePath);
539
+ }
540
+ catch (err) {
541
+ // Clean up temp file on failure
542
+ try {
543
+ await fs.unlink(tmpPath);
544
+ }
545
+ catch {
546
+ // Ignore cleanup errors
547
+ }
548
+ throw err;
549
+ }
550
+ });
551
+ await Promise.allSettled(writePromises);
552
+ }
553
+ // ---------------------------------------------------------------------------
554
+ // Private: Size Enforcement
555
+ // ---------------------------------------------------------------------------
556
+ /**
557
+ * Enforce the 500KB per-domain model size cap.
558
+ * Evicts oldest/least-used data until the model fits within the cap.
559
+ */
560
+ enforceModelSizeCap(model) {
561
+ let json = JSON.stringify(model);
562
+ let size = Buffer.byteLength(json, "utf-8");
563
+ if (size <= SiteModelManager.MAX_MODEL_SIZE)
564
+ return;
565
+ // Phase 1: Remove oldest edges
566
+ while (size > SiteModelManager.MAX_MODEL_SIZE &&
567
+ model.navigation.edges.length > 0) {
568
+ model.navigation.edges.sort((a, b) => a.lastUsed - b.lastUsed);
569
+ // Remove bottom 10% or at least 1
570
+ const removeCount = Math.max(1, Math.floor(model.navigation.edges.length * 0.1));
571
+ model.navigation.edges.splice(0, removeCount);
572
+ json = JSON.stringify(model);
573
+ size = Buffer.byteLength(json, "utf-8");
574
+ }
575
+ // Phase 2: Remove oldest elements
576
+ if (size > SiteModelManager.MAX_MODEL_SIZE) {
577
+ const entries = Object.entries(model.elements).sort(([, a], [, b]) => a.lastVerified - b.lastVerified);
578
+ while (size > SiteModelManager.MAX_MODEL_SIZE &&
579
+ entries.length > 0) {
580
+ const removeCount = Math.max(1, Math.floor(entries.length * 0.1));
581
+ const removed = entries.splice(0, removeCount);
582
+ for (const [key] of removed) {
583
+ delete model.elements[key];
584
+ }
585
+ json = JSON.stringify(model);
586
+ size = Buffer.byteLength(json, "utf-8");
587
+ }
588
+ }
589
+ // Phase 3: Remove oldest goal paths
590
+ if (size > SiteModelManager.MAX_MODEL_SIZE) {
591
+ model.goalPaths.sort((a, b) => a.lastUsed - b.lastUsed);
592
+ while (size > SiteModelManager.MAX_MODEL_SIZE &&
593
+ model.goalPaths.length > 0) {
594
+ const removeCount = Math.max(1, Math.floor(model.goalPaths.length * 0.1));
595
+ model.goalPaths.splice(0, removeCount);
596
+ json = JSON.stringify(model);
597
+ size = Buffer.byteLength(json, "utf-8");
598
+ }
599
+ }
600
+ // Phase 4: Remove oldest failure patterns
601
+ if (size > SiteModelManager.MAX_MODEL_SIZE) {
602
+ model.failures.sort((a, b) => a.lastSeen - b.lastSeen);
603
+ while (size > SiteModelManager.MAX_MODEL_SIZE &&
604
+ model.failures.length > 0) {
605
+ const removeCount = Math.max(1, Math.floor(model.failures.length * 0.1));
606
+ model.failures.splice(0, removeCount);
607
+ json = JSON.stringify(model);
608
+ size = Buffer.byteLength(json, "utf-8");
609
+ }
610
+ }
611
+ // Phase 5: Remove oldest fingerprints
612
+ if (size > SiteModelManager.MAX_MODEL_SIZE) {
613
+ const fpEntries = Object.entries(model.fingerprints).sort(([, a], [, b]) => a.lastSeen - b.lastSeen);
614
+ while (size > SiteModelManager.MAX_MODEL_SIZE &&
615
+ fpEntries.length > 0) {
616
+ const removeCount = Math.max(1, Math.floor(fpEntries.length * 0.1));
617
+ const removed = fpEntries.splice(0, removeCount);
618
+ for (const [key] of removed) {
619
+ delete model.fingerprints[key];
620
+ }
621
+ json = JSON.stringify(model);
622
+ size = Buffer.byteLength(json, "utf-8");
623
+ }
624
+ }
625
+ // Final cleanup: prune orphaned nodes
626
+ this.pruneOrphanedNodes(model);
627
+ }
628
+ // ---------------------------------------------------------------------------
629
+ // Private: Helpers
630
+ // ---------------------------------------------------------------------------
631
+ /**
632
+ * Create a fresh, empty site model for a domain.
633
+ */
634
+ createEmptyModel(domain) {
635
+ const now = new Date().toISOString();
636
+ return {
637
+ domain,
638
+ version: 1,
639
+ created: now,
640
+ lastUpdated: now,
641
+ navigation: {
642
+ nodes: {},
643
+ edges: [],
644
+ },
645
+ elements: {},
646
+ goalPaths: [],
647
+ failures: [],
648
+ fingerprints: {},
649
+ };
650
+ }
651
+ /**
652
+ * Get a model, loading from disk if necessary.
653
+ */
654
+ async getModel(domain) {
655
+ const normalized = this.normalizeDomain(domain);
656
+ const cached = this.models.get(normalized);
657
+ if (cached)
658
+ return cached;
659
+ return this.loadModel(normalized);
660
+ }
661
+ /**
662
+ * Build the file path for a domain's model JSON.
663
+ * Sanitizes domain to be filesystem-safe.
664
+ */
665
+ modelFilePath(domain) {
666
+ // Replace characters that are problematic on filesystems
667
+ const safeName = domain.replace(/[^a-zA-Z0-9.-]/g, "_");
668
+ return path.join(this.modelDir, `${safeName}.json`);
669
+ }
670
+ /**
671
+ * Normalize a domain string: lowercase, strip protocol, strip trailing slash.
672
+ */
673
+ normalizeDomain(domain) {
674
+ return domain
675
+ .toLowerCase()
676
+ .replace(/^https?:\/\//, "")
677
+ .replace(/\/+$/, "")
678
+ .replace(/:\d+$/, ""); // Strip port for consistency
679
+ }
680
+ /**
681
+ * Normalize a URL to just its path for consistent node keys.
682
+ * Strips query strings, hashes, and trailing slashes.
683
+ */
684
+ normalizeUrl(url) {
685
+ try {
686
+ // Handle both full URLs and path-only strings
687
+ if (url.startsWith("http://") || url.startsWith("https://")) {
688
+ const parsed = new URL(url);
689
+ return parsed.pathname.replace(/\/+$/, "") || "/";
690
+ }
691
+ // Already a path - strip query and hash
692
+ const path = url.split("?")[0].split("#")[0];
693
+ return path.replace(/\/+$/, "") || "/";
694
+ }
695
+ catch {
696
+ // Fallback: strip query/hash manually
697
+ const path = url.split("?")[0].split("#")[0];
698
+ return path.replace(/\/+$/, "") || "/";
699
+ }
700
+ }
701
+ /**
702
+ * Find a similar goal path using goalType + keyword overlap in descriptions.
703
+ * Returns the best match if keyword overlap exceeds 50%.
704
+ */
705
+ findSimilarGoalPath(model, goalDescription, goalType) {
706
+ const descWords = this.extractKeywords(goalDescription);
707
+ if (descWords.length === 0)
708
+ return null;
709
+ let bestMatch = null;
710
+ let bestOverlap = 0;
711
+ for (const gp of model.goalPaths) {
712
+ if (gp.goalType !== goalType)
713
+ continue;
714
+ const existingWords = this.extractKeywords(gp.goalDescription);
715
+ if (existingWords.length === 0)
716
+ continue;
717
+ // Calculate Jaccard-like overlap
718
+ const intersection = descWords.filter((w) => existingWords.includes(w)).length;
719
+ const union = new Set([...descWords, ...existingWords]).size;
720
+ const overlap = intersection / union;
721
+ if (overlap > 0.5 && overlap > bestOverlap) {
722
+ bestOverlap = overlap;
723
+ bestMatch = gp;
724
+ }
725
+ }
726
+ return bestMatch;
727
+ }
728
+ /**
729
+ * Extract meaningful keywords from a goal description for fuzzy matching.
730
+ */
731
+ extractKeywords(text) {
732
+ const stopWords = new Set([
733
+ "the",
734
+ "a",
735
+ "an",
736
+ "is",
737
+ "are",
738
+ "was",
739
+ "were",
740
+ "be",
741
+ "been",
742
+ "being",
743
+ "to",
744
+ "of",
745
+ "in",
746
+ "for",
747
+ "on",
748
+ "with",
749
+ "at",
750
+ "by",
751
+ "from",
752
+ "and",
753
+ "or",
754
+ "but",
755
+ "not",
756
+ "this",
757
+ "that",
758
+ "it",
759
+ "its",
760
+ ]);
761
+ return text
762
+ .toLowerCase()
763
+ .replace(/[^a-z0-9\s]/g, "")
764
+ .split(/\s+/)
765
+ .filter((w) => w.length > 2 && !stopWords.has(w));
766
+ }
767
+ /**
768
+ * Evict the least valuable goal paths when over the cap.
769
+ * Score = successRate * attemptCount * recency_weight
770
+ */
771
+ evictGoalPaths(model) {
772
+ const now = Date.now();
773
+ model.goalPaths.sort((a, b) => {
774
+ const recencyA = 1 / (1 + (now - a.lastUsed) / (24 * 60 * 60 * 1000));
775
+ const recencyB = 1 / (1 + (now - b.lastUsed) / (24 * 60 * 60 * 1000));
776
+ const scoreA = a.successRate * a.attemptCount * recencyA;
777
+ const scoreB = b.successRate * b.attemptCount * recencyB;
778
+ return scoreB - scoreA; // Keep highest scores
779
+ });
780
+ model.goalPaths = model.goalPaths.slice(0, SiteModelManager.MAX_GOAL_PATHS);
781
+ }
782
+ /**
783
+ * Evict the least relevant failure patterns when over the cap.
784
+ * Keeps the most recent and most frequent patterns.
785
+ */
786
+ evictFailurePatterns(model) {
787
+ model.failures.sort((a, b) => {
788
+ // Composite score: recency * frequency
789
+ const scoreA = a.lastSeen * Math.log2(a.frequency + 1);
790
+ const scoreB = b.lastSeen * Math.log2(b.frequency + 1);
791
+ return scoreB - scoreA;
792
+ });
793
+ model.failures = model.failures.slice(0, SiteModelManager.MAX_FAILURE_PATTERNS);
794
+ }
795
+ /**
796
+ * When a page's structure changes (fingerprint hash differs),
797
+ * decay element reliability for selectors associated with that page.
798
+ */
799
+ decayElementsForPage(model, pagePattern) {
800
+ for (const [_key, elem] of Object.entries(model.elements)) {
801
+ if (elem.pageUrlPattern === pagePattern) {
802
+ elem.decayFactor *= 0.5; // 50% decay on structural change
803
+ }
804
+ }
805
+ }
806
+ /**
807
+ * Remove navigation nodes that have no edges referencing them
808
+ * and have not been visited recently.
809
+ */
810
+ pruneOrphanedNodes(model) {
811
+ const referencedUrls = new Set();
812
+ for (const edge of model.navigation.edges) {
813
+ referencedUrls.add(edge.fromUrl);
814
+ referencedUrls.add(edge.toUrl);
815
+ }
816
+ const now = Date.now();
817
+ for (const [url, node] of Object.entries(model.navigation.nodes)) {
818
+ if (!referencedUrls.has(url) &&
819
+ now - node.lastVisited > SiteModelManager.DECAY_THRESHOLD_MS) {
820
+ delete model.navigation.nodes[url];
821
+ }
822
+ }
823
+ }
824
+ }
825
+ //# sourceMappingURL=manager.js.map