cbrowser 18.34.1 → 18.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/dist/analysis/accessibility-empathy.d.ts.map +1 -1
  2. package/dist/analysis/accessibility-empathy.js +44 -8
  3. package/dist/analysis/accessibility-empathy.js.map +1 -1
  4. package/dist/analysis/page-understanding.d.ts +118 -0
  5. package/dist/analysis/page-understanding.d.ts.map +1 -0
  6. package/dist/analysis/page-understanding.js +940 -0
  7. package/dist/analysis/page-understanding.js.map +1 -0
  8. package/dist/browser/index.d.ts +2 -0
  9. package/dist/browser/index.d.ts.map +1 -1
  10. package/dist/browser/index.js +1 -0
  11. package/dist/browser/index.js.map +1 -1
  12. package/dist/browser/site-profile-manager.d.ts +116 -0
  13. package/dist/browser/site-profile-manager.d.ts.map +1 -0
  14. package/dist/browser/site-profile-manager.js +495 -0
  15. package/dist/browser/site-profile-manager.js.map +1 -0
  16. package/dist/cognitive/goal-decomposer.d.ts +127 -0
  17. package/dist/cognitive/goal-decomposer.d.ts.map +1 -0
  18. package/dist/cognitive/goal-decomposer.js +902 -0
  19. package/dist/cognitive/goal-decomposer.js.map +1 -0
  20. package/dist/cognitive/goal-types.d.ts +140 -0
  21. package/dist/cognitive/goal-types.d.ts.map +1 -0
  22. package/dist/cognitive/goal-types.js +136 -0
  23. package/dist/cognitive/goal-types.js.map +1 -0
  24. package/dist/cognitive/index.d.ts +2 -0
  25. package/dist/cognitive/index.d.ts.map +1 -1
  26. package/dist/cognitive/index.js +4 -0
  27. package/dist/cognitive/index.js.map +1 -1
  28. package/dist/mcp-tools/base/audit-tools.d.ts.map +1 -1
  29. package/dist/mcp-tools/base/audit-tools.js +5 -2
  30. package/dist/mcp-tools/base/audit-tools.js.map +1 -1
  31. package/dist/mcp-tools/base/cognitive-tools.d.ts.map +1 -1
  32. package/dist/mcp-tools/base/cognitive-tools.js +20 -0
  33. package/dist/mcp-tools/base/cognitive-tools.js.map +1 -1
  34. package/dist/mcp-tools/base/index.d.ts +4 -2
  35. package/dist/mcp-tools/base/index.d.ts.map +1 -1
  36. package/dist/mcp-tools/base/index.js +7 -2
  37. package/dist/mcp-tools/base/index.js.map +1 -1
  38. package/dist/mcp-tools/base/interaction-tools.d.ts.map +1 -1
  39. package/dist/mcp-tools/base/interaction-tools.js +23 -0
  40. package/dist/mcp-tools/base/interaction-tools.js.map +1 -1
  41. package/dist/mcp-tools/base/navigation-tools.d.ts.map +1 -1
  42. package/dist/mcp-tools/base/navigation-tools.js +13 -0
  43. package/dist/mcp-tools/base/navigation-tools.js.map +1 -1
  44. package/dist/mcp-tools/base/site-knowledge-tools.d.ts +15 -0
  45. package/dist/mcp-tools/base/site-knowledge-tools.d.ts.map +1 -0
  46. package/dist/mcp-tools/base/site-knowledge-tools.js +314 -0
  47. package/dist/mcp-tools/base/site-knowledge-tools.js.map +1 -0
  48. package/dist/mcp-tools/index.d.ts +6 -6
  49. package/dist/mcp-tools/index.d.ts.map +1 -1
  50. package/dist/mcp-tools/index.js +7 -7
  51. package/dist/mcp-tools/index.js.map +1 -1
  52. package/dist/site-model/manager.d.ts +161 -0
  53. package/dist/site-model/manager.d.ts.map +1 -0
  54. package/dist/site-model/manager.js +825 -0
  55. package/dist/site-model/manager.js.map +1 -0
  56. package/dist/site-model/types.d.ts +108 -0
  57. package/dist/site-model/types.d.ts.map +1 -0
  58. package/dist/site-model/types.js +10 -0
  59. package/dist/site-model/types.js.map +1 -0
  60. package/package.json +2 -2
@@ -0,0 +1,940 @@
1
+ /**
2
+ * CBrowser - Page Understanding Engine
3
+ * Real-time page understanding via accessibility tree + DOM analysis.
4
+ * Produces rich page models: type classification, affordances, structure, relationships.
5
+ *
6
+ * @copyright 2026 Alexandria Eden alexandria.shai.eden@gmail.com https://cbrowser.ai
7
+ * @license MIT
8
+ * @since v18.35.0
9
+ */
10
+ // ============================================================================
11
+ // Constants
12
+ // ============================================================================
13
+ const CACHE_TTL_MS = 30_000;
14
+ const MAX_ELEMENTS_FULL_SCAN = 1000;
15
+ const MAX_RELATIONSHIPS = 100;
16
+ const MAX_TEXT_LENGTH = 80;
17
+ // ============================================================================
18
+ // PageUnderstandingEngine
19
+ // ============================================================================
20
+ export class PageUnderstandingEngine {
21
+ cache;
22
+ constructor() {
23
+ this.cache = new Map();
24
+ }
25
+ /**
26
+ * Full page understanding (lazy, cached).
27
+ * Extracts all interactive elements, headings, forms, nav landmarks, and semantic regions
28
+ * in a single DOM traversal, then classifies, structures, and relates them.
29
+ */
30
+ async analyze(page) {
31
+ const url = page.url();
32
+ const start = Date.now();
33
+ // Check cache: valid if not expired and content hash matches
34
+ const cached = this.cache.get(url);
35
+ if (cached && Date.now() - cached.timestamp < CACHE_TTL_MS) {
36
+ const currentHash = await this.computeContentHash(page);
37
+ if (currentHash === cached.contentHash) {
38
+ return cached.understanding;
39
+ }
40
+ }
41
+ const raw = await this.extractDOM(page);
42
+ const type = classifyPageType(raw);
43
+ const affordances = computeAffordances(raw);
44
+ const structure = buildStructure(raw);
45
+ const relationships = computeRelationships(raw, structure);
46
+ const computeTimeMs = Date.now() - start;
47
+ const understanding = {
48
+ url,
49
+ type,
50
+ affordances,
51
+ structure,
52
+ relationships,
53
+ computedAt: Date.now(),
54
+ computeTimeMs,
55
+ };
56
+ // Cache with content hash
57
+ const contentHash = await this.computeContentHash(page);
58
+ this.cache.set(url, { understanding, contentHash, timestamp: Date.now() });
59
+ return understanding;
60
+ }
61
+ /**
62
+ * Lightweight skeleton (cheap, can be called more often).
63
+ * Counts elements, forms, links, headings, and nav landmarks in a single evaluate.
64
+ * Must complete in <50ms.
65
+ */
66
+ async skeleton(page) {
67
+ const url = page.url();
68
+ const data = await page.evaluate(() => {
69
+ const allElements = document.querySelectorAll("*");
70
+ const forms = document.querySelectorAll("form");
71
+ const links = document.querySelectorAll("a[href]");
72
+ const headings = document.querySelectorAll("h1, h2, h3, h4, h5, h6");
73
+ const navs = document.querySelectorAll('nav, [role="navigation"]');
74
+ const inputs = document.querySelectorAll("input, textarea, select");
75
+ const submitBtns = document.querySelectorAll('button[type="submit"], input[type="submit"]');
76
+ const headingTexts = [];
77
+ headings.forEach((h) => {
78
+ const text = (h.textContent || "").trim().slice(0, 80);
79
+ if (text)
80
+ headingTexts.push(text);
81
+ });
82
+ const title = document.title || "";
83
+ // Search input detection
84
+ const hasSearchInput = !!document.querySelector('input[type="search"], input[name*="search" i], input[name*="query" i], input[name*="q" i][type="text"], input[aria-label*="search" i], [role="search"] input');
85
+ // Sidebar detection (aside, or nav-heavy sidebar patterns)
86
+ const asides = document.querySelectorAll("aside, [role='complementary']");
87
+ const hasSidebar = asides.length > 0;
88
+ // Hero detection (large first section with CTA-like buttons)
89
+ const firstSection = document.querySelector("main > section:first-child, body > section:first-child, .hero, [class*='hero' i]");
90
+ const hasHero = !!firstSection;
91
+ // Long text blocks (paragraphs with >200 chars)
92
+ let longTextBlocks = 0;
93
+ document.querySelectorAll("p").forEach((p) => {
94
+ if ((p.textContent || "").trim().length > 200)
95
+ longTextBlocks++;
96
+ });
97
+ // Repeated structures (3+ similar sibling elements)
98
+ let repeatedStructures = 0;
99
+ document.querySelectorAll("ul, ol, [role='list'], main, [role='main']").forEach((container) => {
100
+ const children = container.children;
101
+ if (children.length >= 3) {
102
+ const firstTag = children[0]?.tagName;
103
+ let similar = 0;
104
+ for (let i = 1; i < Math.min(children.length, 10); i++) {
105
+ if (children[i]?.tagName === firstTag)
106
+ similar++;
107
+ }
108
+ if (similar >= 2)
109
+ repeatedStructures++;
110
+ }
111
+ });
112
+ // Error indicators
113
+ const titleLower = title.toLowerCase();
114
+ const hasErrorIndicator = titleLower.includes("404") ||
115
+ titleLower.includes("not found") ||
116
+ titleLower.includes("error") ||
117
+ !!document.querySelector("h1")?.textContent?.toLowerCase().match(/(?:404|not found|error|page not found|oops)/);
118
+ // Payment keywords in forms
119
+ let hasPaymentKeywords = false;
120
+ forms.forEach((form) => {
121
+ const formText = (form.textContent || "").toLowerCase();
122
+ if (formText.includes("payment") ||
123
+ formText.includes("credit card") ||
124
+ formText.includes("billing") ||
125
+ formText.includes("card number") ||
126
+ formText.includes("cvv") ||
127
+ formText.includes("expir")) {
128
+ hasPaymentKeywords = true;
129
+ }
130
+ });
131
+ // Settings keywords
132
+ const h1Text = (document.querySelector("h1")?.textContent || "").toLowerCase();
133
+ const hasSettingsKeywords = h1Text.includes("settings") ||
134
+ h1Text.includes("preferences") ||
135
+ h1Text.includes("configuration") ||
136
+ titleLower.includes("settings") ||
137
+ titleLower.includes("preferences");
138
+ return {
139
+ elementCount: allElements.length,
140
+ formCount: forms.length,
141
+ linkCount: links.length,
142
+ headingCount: headings.length,
143
+ navLandmarks: navs.length,
144
+ headingTexts,
145
+ title,
146
+ hasSearchInput,
147
+ inputCount: inputs.length,
148
+ hasSubmitButton: submitBtns.length > 0,
149
+ hasSidebar,
150
+ hasHero,
151
+ longTextBlocks,
152
+ repeatedStructures,
153
+ hasErrorIndicator,
154
+ hasPaymentKeywords,
155
+ hasSettingsKeywords,
156
+ };
157
+ });
158
+ const contentHash = computeHashFromSkeleton(data);
159
+ const type = classifyPageTypeFromSkeleton(data);
160
+ return {
161
+ url,
162
+ type,
163
+ elementCount: data.elementCount,
164
+ formCount: data.formCount,
165
+ linkCount: data.linkCount,
166
+ headingCount: data.headingCount,
167
+ navLandmarks: data.navLandmarks,
168
+ contentHash,
169
+ };
170
+ }
171
+ /** Invalidate cache for a specific URL. */
172
+ invalidate(url) {
173
+ this.cache.delete(url);
174
+ }
175
+ /** Invalidate all cached entries. */
176
+ invalidateAll() {
177
+ this.cache.clear();
178
+ }
179
+ // --------------------------------------------------------------------------
180
+ // Private helpers
181
+ // --------------------------------------------------------------------------
182
+ /**
183
+ * Compute a lightweight content hash for cache invalidation.
184
+ * Uses element counts + heading texts so it's cheap but detects meaningful changes.
185
+ */
186
+ async computeContentHash(page) {
187
+ const data = await page.evaluate(() => {
188
+ const headings = document.querySelectorAll("h1, h2, h3, h4, h5, h6");
189
+ const headingTexts = [];
190
+ headings.forEach((h) => {
191
+ const text = (h.textContent || "").trim().slice(0, 40);
192
+ if (text)
193
+ headingTexts.push(text);
194
+ });
195
+ return {
196
+ elementCount: document.querySelectorAll("*").length,
197
+ formCount: document.querySelectorAll("form").length,
198
+ linkCount: document.querySelectorAll("a[href]").length,
199
+ headingTexts,
200
+ };
201
+ });
202
+ // Simple hash: concatenate counts and heading texts
203
+ const raw = `${data.elementCount}:${data.formCount}:${data.linkCount}:${data.headingTexts.join("|")}`;
204
+ return simpleHash(raw);
205
+ }
206
+ /**
207
+ * Single page.evaluate() that extracts everything in one DOM traversal.
208
+ * For pages with >1000 elements, restricts extraction to visible viewport + nav elements.
209
+ */
210
+ async extractDOM(page) {
211
+ const raw = await page.evaluate((maxElements) => {
212
+ const totalElementCount = document.querySelectorAll("*").length;
213
+ const viewportHeight = window.innerHeight;
214
+ const limitToViewport = totalElementCount > maxElements;
215
+ function isInViewport(el) {
216
+ if (!limitToViewport)
217
+ return true;
218
+ const rect = el.getBoundingClientRect();
219
+ // Include elements in viewport OR in nav/header/footer (always important)
220
+ if (rect.top < viewportHeight * 1.5 && rect.bottom > -100)
221
+ return true;
222
+ const tag = el.tagName.toLowerCase();
223
+ if (tag === "nav" || tag === "header" || tag === "footer")
224
+ return true;
225
+ const role = el.getAttribute("role") || "";
226
+ if (role === "navigation" || role === "banner" || role === "contentinfo")
227
+ return true;
228
+ return false;
229
+ }
230
+ function getVisibleText(el) {
231
+ return (el.textContent || "").trim().replace(/\s+/g, " ").slice(0, 80);
232
+ }
233
+ function getNthOfType(el) {
234
+ const tag = el.tagName;
235
+ let nth = 1;
236
+ let sibling = el.previousElementSibling;
237
+ while (sibling) {
238
+ if (sibling.tagName === tag)
239
+ nth++;
240
+ sibling = sibling.previousElementSibling;
241
+ }
242
+ return nth;
243
+ }
244
+ // -- Interactive elements --
245
+ const interactiveSelectors = "a[href], button, input, textarea, select, [role='button'], [role='link'], [role='checkbox'], [role='radio'], [role='switch'], [role='tab'], [role='menuitem'], [tabindex]";
246
+ const interactiveEls = document.querySelectorAll(interactiveSelectors);
247
+ const interactiveElements = [];
248
+ // Build a form-index map
249
+ const allForms = document.querySelectorAll("form");
250
+ const formMap = new Map();
251
+ allForms.forEach((form, index) => formMap.set(form, index));
252
+ interactiveEls.forEach((el) => {
253
+ if (!isInViewport(el))
254
+ return;
255
+ const rect = el.getBoundingClientRect();
256
+ // Skip zero-size hidden elements
257
+ if (rect.width === 0 && rect.height === 0 && !el.type?.match(/hidden/i))
258
+ return;
259
+ const tag = el.tagName.toLowerCase();
260
+ const inputEl = el;
261
+ // Find enclosing form
262
+ let formIndex = -1;
263
+ let parent = el.parentElement;
264
+ while (parent) {
265
+ if (parent.tagName === "FORM" && formMap.has(parent)) {
266
+ formIndex = formMap.get(parent);
267
+ break;
268
+ }
269
+ parent = parent.parentElement;
270
+ }
271
+ interactiveElements.push({
272
+ tag,
273
+ type: inputEl.type || "",
274
+ text: getVisibleText(el),
275
+ href: el.href || "",
276
+ name: inputEl.name || "",
277
+ id: el.id || "",
278
+ className: el.className?.toString()?.slice(0, 120) || "",
279
+ role: el.getAttribute("role") || "",
280
+ ariaLabel: el.getAttribute("aria-label") || "",
281
+ disabled: inputEl.disabled || el.getAttribute("aria-disabled") === "true",
282
+ rect: { top: rect.top, left: rect.left, width: rect.width, height: rect.height },
283
+ formIndex,
284
+ nthOfType: getNthOfType(el),
285
+ });
286
+ });
287
+ // -- Headings --
288
+ const headingEls = document.querySelectorAll("h1, h2, h3, h4, h5, h6");
289
+ const headings = [];
290
+ headingEls.forEach((el) => {
291
+ if (!isInViewport(el))
292
+ return;
293
+ const level = parseInt(el.tagName.charAt(1), 10);
294
+ headings.push({
295
+ level,
296
+ text: getVisibleText(el),
297
+ id: el.id || "",
298
+ nthOfType: getNthOfType(el),
299
+ });
300
+ });
301
+ // -- Forms --
302
+ const forms = [];
303
+ allForms.forEach((form) => {
304
+ if (!isInViewport(form))
305
+ return;
306
+ const fields = [];
307
+ form.querySelectorAll("input, textarea, select").forEach((field) => {
308
+ const inputField = field;
309
+ const type = inputField.type || field.tagName.toLowerCase();
310
+ // Skip hidden inputs from field list (they aren't user-interactable)
311
+ if (type === "hidden")
312
+ return;
313
+ // Find associated label
314
+ let labelText = "";
315
+ if (field.id) {
316
+ const labelEl = document.querySelector(`label[for="${field.id}"]`);
317
+ if (labelEl)
318
+ labelText = getVisibleText(labelEl);
319
+ }
320
+ if (!labelText) {
321
+ // Check for wrapping label
322
+ const parentLabel = field.closest("label");
323
+ if (parentLabel) {
324
+ // Get label text excluding the input's own text
325
+ labelText = getVisibleText(parentLabel);
326
+ }
327
+ }
328
+ if (!labelText) {
329
+ labelText = field.getAttribute("aria-label") || field.getAttribute("placeholder") || "";
330
+ }
331
+ fields.push({
332
+ tag: field.tagName.toLowerCase(),
333
+ type,
334
+ name: inputField.name || "",
335
+ id: field.id || "",
336
+ labelText: labelText.slice(0, 80),
337
+ required: inputField.required || field.getAttribute("aria-required") === "true",
338
+ nthOfType: getNthOfType(field),
339
+ });
340
+ });
341
+ // Find submit button
342
+ let submitButton = null;
343
+ const submitEl = form.querySelector('button[type="submit"], input[type="submit"]') ||
344
+ form.querySelector("button:not([type])"); // Default button type is submit
345
+ if (submitEl) {
346
+ submitButton = {
347
+ tag: submitEl.tagName.toLowerCase(),
348
+ text: getVisibleText(submitEl) || submitEl.value || "Submit",
349
+ id: submitEl.id || "",
350
+ className: submitEl.className?.toString()?.slice(0, 120) || "",
351
+ nthOfType: getNthOfType(submitEl),
352
+ };
353
+ }
354
+ forms.push({
355
+ action: form.action || "",
356
+ method: (form.method || "GET").toUpperCase(),
357
+ id: form.id || "",
358
+ className: form.className?.toString()?.slice(0, 120) || "",
359
+ fields,
360
+ submitButton,
361
+ });
362
+ });
363
+ // -- Nav landmarks --
364
+ const navEls = document.querySelectorAll('nav, [role="navigation"]');
365
+ const navs = [];
366
+ navEls.forEach((nav) => {
367
+ if (!isInViewport(nav))
368
+ return;
369
+ const links = [];
370
+ nav.querySelectorAll("a[href]").forEach((a) => {
371
+ links.push({
372
+ text: getVisibleText(a),
373
+ href: a.href || "",
374
+ id: a.id || "",
375
+ className: a.className?.toString()?.slice(0, 120) || "",
376
+ nthOfType: getNthOfType(a),
377
+ });
378
+ });
379
+ navs.push({
380
+ ariaLabel: nav.getAttribute("aria-label") || "",
381
+ id: nav.id || "",
382
+ className: nav.className?.toString()?.slice(0, 120) || "",
383
+ links,
384
+ nthOfType: getNthOfType(nav),
385
+ });
386
+ });
387
+ // -- Semantic regions --
388
+ const regionEls = document.querySelectorAll('main, aside, footer, header, section, [role="main"], [role="complementary"], [role="contentinfo"], [role="banner"], [role="region"]');
389
+ const regions = [];
390
+ regionEls.forEach((el) => {
391
+ if (!isInViewport(el))
392
+ return;
393
+ regions.push({
394
+ tag: el.tagName.toLowerCase(),
395
+ role: el.getAttribute("role") || "",
396
+ ariaLabel: el.getAttribute("aria-label") || "",
397
+ id: el.id || "",
398
+ className: el.className?.toString()?.slice(0, 120) || "",
399
+ nthOfType: getNthOfType(el),
400
+ childCount: el.children.length,
401
+ });
402
+ });
403
+ return {
404
+ interactiveElements,
405
+ headings,
406
+ forms,
407
+ navs,
408
+ regions,
409
+ totalElementCount,
410
+ title: document.title || "",
411
+ viewportHeight,
412
+ };
413
+ }, MAX_ELEMENTS_FULL_SCAN);
414
+ return raw;
415
+ }
416
+ }
417
+ // ============================================================================
418
+ // Page type classification
419
+ // ============================================================================
420
+ function classifyPageType(raw) {
421
+ const { interactiveElements, headings, forms, navs, regions, title } = raw;
422
+ const titleLower = title.toLowerCase();
423
+ // Error page: title or h1 contains error keywords
424
+ const h1 = headings.find((h) => h.level === 1);
425
+ const h1Text = (h1?.text || "").toLowerCase();
426
+ if (titleLower.match(/\b(404|500|not found|error|oops)\b/) ||
427
+ h1Text.match(/\b(404|500|not found|error|oops|page not found)\b/)) {
428
+ return "error";
429
+ }
430
+ // Collect form field count
431
+ const totalFormFields = forms.reduce((sum, f) => sum + f.fields.length, 0);
432
+ // Checkout: form with payment keywords
433
+ const hasPaymentForm = forms.some((f) => {
434
+ const fieldNames = f.fields.map((field) => `${field.name} ${field.labelText}`.toLowerCase()).join(" ");
435
+ return (fieldNames.includes("card") ||
436
+ fieldNames.includes("payment") ||
437
+ fieldNames.includes("billing") ||
438
+ fieldNames.includes("cvv") ||
439
+ fieldNames.includes("expir") ||
440
+ fieldNames.includes("credit"));
441
+ });
442
+ if (hasPaymentForm)
443
+ return "checkout";
444
+ // Search: has search input + what looks like a results list
445
+ const hasSearchInput = interactiveElements.some((el) => el.type === "search" ||
446
+ el.name.toLowerCase().includes("search") ||
447
+ el.name.toLowerCase().includes("query") ||
448
+ (el.name.toLowerCase() === "q" && el.tag === "input") ||
449
+ el.ariaLabel.toLowerCase().includes("search") ||
450
+ el.role === "search");
451
+ // Presence of repeated structures near search suggests search results page
452
+ const hasRepeatedContent = regions.some((r) => r.childCount >= 5 && (r.tag === "main" || r.role === "main"));
453
+ if (hasSearchInput && hasRepeatedContent)
454
+ return "search";
455
+ // Settings: many toggles/selects/checkboxes with settings keywords
456
+ const toggleCount = interactiveElements.filter((el) => el.type === "checkbox" ||
457
+ el.type === "radio" ||
458
+ el.role === "switch" ||
459
+ el.role === "checkbox" ||
460
+ el.tag === "select").length;
461
+ if (toggleCount >= 3 &&
462
+ (h1Text.includes("settings") ||
463
+ h1Text.includes("preferences") ||
464
+ h1Text.includes("configuration") ||
465
+ titleLower.includes("settings") ||
466
+ titleLower.includes("preferences"))) {
467
+ return "settings";
468
+ }
469
+ // Dashboard: sidebar nav + multiple data sections
470
+ const hasSidebar = regions.some((r) => r.tag === "aside" || r.role === "complementary");
471
+ const hasMultipleNavs = navs.length >= 2;
472
+ const hasMultipleSections = regions.filter((r) => r.tag === "section" || r.role === "region").length >= 3;
473
+ if ((hasSidebar || hasMultipleNavs) && hasMultipleSections)
474
+ return "dashboard";
475
+ // Form: 3+ input fields with a submit button
476
+ if (totalFormFields >= 3 && forms.some((f) => f.submitButton))
477
+ return "form";
478
+ // Article: long text content with heading hierarchy, few forms
479
+ const hasDeepHeadings = headings.some((h) => h.level >= 3);
480
+ const manyHeadings = headings.length >= 3;
481
+ // Article heuristic: multiple headings, deep hierarchy, few interactive elements relative to headings
482
+ if (manyHeadings && hasDeepHeadings && totalFormFields <= 2)
483
+ return "article";
484
+ // List: repeated card/row structures (check for many links or repeated elements)
485
+ const linkCount = interactiveElements.filter((el) => el.tag === "a").length;
486
+ if (linkCount >= 10 && hasRepeatedContent)
487
+ return "list";
488
+ // Landing: hero section, CTA buttons, default fallback
489
+ // Check for hero-like patterns: large elements near top of page
490
+ const hasTopButtons = interactiveElements.some((el) => el.tag === "a" && el.rect.top < 600 && el.text.length > 0 && el.text.length < 30);
491
+ if (hasTopButtons || navs.length > 0)
492
+ return "landing";
493
+ return "landing";
494
+ }
495
+ function classifyPageTypeFromSkeleton(data) {
496
+ if (data.hasErrorIndicator)
497
+ return "error";
498
+ if (data.hasPaymentKeywords)
499
+ return "checkout";
500
+ if (data.hasSearchInput && data.repeatedStructures > 0)
501
+ return "search";
502
+ if (data.hasSettingsKeywords && data.inputCount >= 3)
503
+ return "settings";
504
+ if (data.hasSidebar && data.navLandmarks >= 2)
505
+ return "dashboard";
506
+ if (data.inputCount >= 3 && data.hasSubmitButton)
507
+ return "form";
508
+ if (data.headingCount >= 3 && data.longTextBlocks >= 2 && data.formCount <= 1)
509
+ return "article";
510
+ if (data.repeatedStructures >= 1 && data.linkCount >= 10)
511
+ return "list";
512
+ return "landing";
513
+ }
514
+ // ============================================================================
515
+ // Affordance computation
516
+ // ============================================================================
517
+ function computeAffordances(raw) {
518
+ const affordances = [];
519
+ for (const el of raw.interactiveElements) {
520
+ if (el.disabled)
521
+ continue;
522
+ const action = determineAction(el);
523
+ const expectedOutcome = predictOutcome(el, action);
524
+ const confidence = computeConfidence(el);
525
+ const reversible = determineReversibility(el, action);
526
+ const selector = buildSelector(el);
527
+ affordances.push({
528
+ element: selector,
529
+ elementText: el.text.slice(0, MAX_TEXT_LENGTH),
530
+ action,
531
+ expectedOutcome,
532
+ confidence,
533
+ reversible,
534
+ ...(el.role ? { ariaRole: el.role } : {}),
535
+ });
536
+ }
537
+ return affordances;
538
+ }
539
+ function determineAction(el) {
540
+ const { tag, type, role } = el;
541
+ // Submit buttons
542
+ if (type === "submit" || (tag === "button" && el.formIndex >= 0 && !type))
543
+ return "submit";
544
+ // Toggle elements
545
+ if (role === "switch" || role === "checkbox" || type === "checkbox" || type === "radio")
546
+ return "toggle";
547
+ // Fill elements
548
+ if (tag === "input" && !["submit", "button", "reset", "checkbox", "radio", "hidden"].includes(type))
549
+ return "fill";
550
+ if (tag === "textarea")
551
+ return "fill";
552
+ // Select elements
553
+ if (tag === "select" || role === "listbox" || role === "combobox")
554
+ return "select";
555
+ // Everything else is a click
556
+ return "click";
557
+ }
558
+ function predictOutcome(el, action) {
559
+ const text = el.text.toLowerCase().trim();
560
+ const href = el.href;
561
+ switch (action) {
562
+ case "submit":
563
+ return "Submit form data to server";
564
+ case "fill": {
565
+ const label = el.ariaLabel || el.name || el.type;
566
+ return `Enter ${label || "text"} value`;
567
+ }
568
+ case "select":
569
+ return `Select an option from ${el.ariaLabel || el.name || "dropdown"}`;
570
+ case "toggle":
571
+ return `Toggle ${el.ariaLabel || el.name || "option"} on/off`;
572
+ case "click": {
573
+ if (el.tag === "a" && href) {
574
+ if (href.startsWith("mailto:"))
575
+ return `Open email to ${href.replace("mailto:", "")}`;
576
+ if (href.startsWith("tel:"))
577
+ return `Initiate phone call`;
578
+ if (href.includes("#"))
579
+ return "Scroll to page section";
580
+ if (href.startsWith("http") && !href.includes(globalThis?.location?.hostname || "__no_match__")) {
581
+ return "Navigate to external site";
582
+ }
583
+ return `Navigate to ${text || "linked page"}`;
584
+ }
585
+ if (el.role === "tab")
586
+ return `Switch to ${text || "tab"} panel`;
587
+ if (el.role === "menuitem")
588
+ return `Select ${text || "menu"} option`;
589
+ if (text.match(/\b(close|dismiss|cancel)\b/i))
590
+ return "Close or dismiss";
591
+ if (text.match(/\b(delete|remove)\b/i))
592
+ return "Delete or remove item";
593
+ if (text.match(/\b(save|update)\b/i))
594
+ return "Save changes";
595
+ if (text.match(/\b(add|create|new)\b/i))
596
+ return "Create new item";
597
+ if (text.match(/\b(edit|modify)\b/i))
598
+ return "Enter edit mode";
599
+ if (text.match(/\b(open|show|view|expand)\b/i))
600
+ return "Show or reveal content";
601
+ if (text.match(/\b(next|continue|proceed)\b/i))
602
+ return "Advance to next step";
603
+ if (text.match(/\b(back|previous|prev)\b/i))
604
+ return "Return to previous step";
605
+ if (text.match(/\b(search|find)\b/i))
606
+ return "Initiate search";
607
+ if (text.match(/\b(sign in|log in|login)\b/i))
608
+ return "Navigate to sign in";
609
+ if (text.match(/\b(sign up|register|join)\b/i))
610
+ return "Navigate to registration";
611
+ return `Activate ${text || el.ariaLabel || "element"}`;
612
+ }
613
+ default:
614
+ return "Interact with element";
615
+ }
616
+ }
617
+ function computeConfidence(el) {
618
+ // ARIA role gives highest confidence - explicitly declared purpose
619
+ if (el.role && el.role !== "presentation" && el.role !== "none")
620
+ return 0.9;
621
+ // Clear visible text indicates purpose
622
+ if (el.text.trim().length > 0 && el.text.trim().length <= 50)
623
+ return 0.8;
624
+ // ARIA label provides machine-readable purpose
625
+ if (el.ariaLabel)
626
+ return 0.8;
627
+ // Type attribute at least tells us the input category
628
+ if (el.type && el.type !== "text")
629
+ return 0.7;
630
+ // Name attribute suggests server-side purpose
631
+ if (el.name)
632
+ return 0.65;
633
+ // Has an id (developer intended something)
634
+ if (el.id)
635
+ return 0.6;
636
+ // Generic element with no distinguishing attributes
637
+ return 0.5;
638
+ }
639
+ function determineReversibility(el, action) {
640
+ // Links are reversible (browser back button)
641
+ if (el.tag === "a" && el.href)
642
+ return true;
643
+ // Toggles are reversible (click again)
644
+ if (action === "toggle")
645
+ return true;
646
+ // Fill is reversible (clear and retype)
647
+ if (action === "fill")
648
+ return true;
649
+ // Select is reversible (choose different option)
650
+ if (action === "select")
651
+ return true;
652
+ // Tabs are reversible (click another tab)
653
+ if (el.role === "tab")
654
+ return true;
655
+ // Form submits are NOT reversible
656
+ if (action === "submit")
657
+ return false;
658
+ // Delete/remove buttons are NOT reversible
659
+ if (el.text.toLowerCase().match(/\b(delete|remove|destroy)\b/))
660
+ return false;
661
+ // Default: clicks with no clear destructive intent are cautiously reversible
662
+ return true;
663
+ }
664
+ // ============================================================================
665
+ // Structure building
666
+ // ============================================================================
667
+ function buildStructure(raw) {
668
+ const navigation = buildNavigationGroups(raw);
669
+ const mainContent = buildMainContentGroups(raw);
670
+ const forms = buildFormGroups(raw);
671
+ const ctas = detectCTAs(raw);
672
+ const headingHierarchy = buildHeadingHierarchy(raw.headings);
673
+ return { navigation, mainContent, forms, ctas, headingHierarchy };
674
+ }
675
+ function buildNavigationGroups(raw) {
676
+ return raw.navs.map((nav) => {
677
+ const navSelector = buildNavSelector(nav);
678
+ return {
679
+ label: nav.ariaLabel || "Navigation",
680
+ elements: nav.links.map((link) => ({
681
+ selector: buildLinkSelector(link, navSelector),
682
+ text: link.text,
683
+ role: "link",
684
+ })),
685
+ };
686
+ });
687
+ }
688
+ function buildMainContentGroups(raw) {
689
+ // Gather links NOT inside any nav
690
+ const navLinkHrefs = new Set();
691
+ for (const nav of raw.navs) {
692
+ for (const link of nav.links) {
693
+ navLinkHrefs.add(link.href);
694
+ }
695
+ }
696
+ const contentLinks = raw.interactiveElements.filter((el) => el.tag === "a" && el.href && !navLinkHrefs.has(el.href));
697
+ if (contentLinks.length === 0)
698
+ return [];
699
+ return [
700
+ {
701
+ label: "Content links",
702
+ elements: contentLinks.map((el) => ({
703
+ selector: buildSelector(el),
704
+ text: el.text.slice(0, MAX_TEXT_LENGTH),
705
+ role: el.role || "link",
706
+ })),
707
+ },
708
+ ];
709
+ }
710
+ function buildFormGroups(raw) {
711
+ return raw.forms.map((form, index) => {
712
+ const formSelector = form.id ? `#${cssEscape(form.id)}` : `form:nth-of-type(${index + 1})`;
713
+ // Derive a label from the form's first heading or legend or aria-label
714
+ const label = form.id || `Form ${index + 1}`;
715
+ return {
716
+ label,
717
+ action: form.action || undefined,
718
+ method: form.method || undefined,
719
+ fields: form.fields.map((field) => ({
720
+ selector: buildFieldSelector(field, formSelector),
721
+ type: field.type,
722
+ name: field.name,
723
+ label: field.labelText || undefined,
724
+ required: field.required,
725
+ })),
726
+ submitButton: form.submitButton
727
+ ? {
728
+ selector: buildSubmitSelector(form.submitButton, formSelector),
729
+ text: form.submitButton.text,
730
+ }
731
+ : undefined,
732
+ };
733
+ });
734
+ }
735
+ function detectCTAs(raw) {
736
+ const ctas = [];
737
+ const ctaKeywords = /\b(sign up|get started|try|start|buy|subscribe|download|join|register|book|order|contact|learn more|shop now|explore)\b/i;
738
+ for (const el of raw.interactiveElements) {
739
+ if (el.disabled)
740
+ continue;
741
+ const text = el.text.trim();
742
+ if (!text || text.length > 60)
743
+ continue;
744
+ // Check if element text matches CTA patterns
745
+ if (!ctaKeywords.test(text))
746
+ continue;
747
+ const selector = buildSelector(el);
748
+ const type = el.tag === "a" ? "link" : "button";
749
+ // Determine prominence based on position and styling hints
750
+ let prominence = "tertiary";
751
+ if (el.rect.top < 600) {
752
+ // Above the fold
753
+ if (el.rect.width > 120 || el.className.toLowerCase().match(/\b(primary|cta|hero|main)\b/)) {
754
+ prominence = "primary";
755
+ }
756
+ else {
757
+ prominence = "secondary";
758
+ }
759
+ }
760
+ else if (el.className.toLowerCase().match(/\b(primary|cta)\b/)) {
761
+ prominence = "secondary";
762
+ }
763
+ ctas.push({ selector, text, prominence, type });
764
+ }
765
+ // Sort: primary first, then secondary, then tertiary
766
+ const order = { primary: 0, secondary: 1, tertiary: 2 };
767
+ ctas.sort((a, b) => order[a.prominence] - order[b.prominence]);
768
+ return ctas;
769
+ }
770
+ function buildHeadingHierarchy(headings) {
771
+ if (headings.length === 0)
772
+ return [];
773
+ const root = [];
774
+ const stack = [];
775
+ for (const h of headings) {
776
+ const node = {
777
+ level: h.level,
778
+ text: h.text,
779
+ selector: h.id ? `#${cssEscape(h.id)}` : `h${h.level}:nth-of-type(${h.nthOfType})`,
780
+ children: [],
781
+ };
782
+ // Pop stack until we find a heading with a lower level (parent)
783
+ while (stack.length > 0 && stack[stack.length - 1].level >= h.level) {
784
+ stack.pop();
785
+ }
786
+ if (stack.length === 0) {
787
+ root.push(node);
788
+ }
789
+ else {
790
+ stack[stack.length - 1].children.push(node);
791
+ }
792
+ stack.push(node);
793
+ }
794
+ return root;
795
+ }
796
+ // ============================================================================
797
+ // Relationship computation
798
+ // ============================================================================
799
+ function computeRelationships(raw, structure) {
800
+ const relationships = [];
801
+ // same-form: inputs sharing a form element
802
+ for (const form of structure.forms) {
803
+ const fieldSelectors = form.fields.map((f) => f.selector);
804
+ for (let i = 0; i < fieldSelectors.length && relationships.length < MAX_RELATIONSHIPS; i++) {
805
+ for (let j = i + 1; j < fieldSelectors.length && relationships.length < MAX_RELATIONSHIPS; j++) {
806
+ relationships.push({
807
+ elementA: fieldSelectors[i],
808
+ elementB: fieldSelectors[j],
809
+ type: "same-form",
810
+ });
811
+ }
812
+ }
813
+ // Form contains its submit button
814
+ if (form.submitButton && relationships.length < MAX_RELATIONSHIPS) {
815
+ for (const field of form.fields) {
816
+ if (relationships.length >= MAX_RELATIONSHIPS)
817
+ break;
818
+ relationships.push({
819
+ elementA: form.submitButton.selector,
820
+ elementB: field.selector,
821
+ type: "same-form",
822
+ });
823
+ }
824
+ }
825
+ }
826
+ // contains: nav containing links
827
+ for (const navGroup of structure.navigation) {
828
+ for (const el of navGroup.elements) {
829
+ if (relationships.length >= MAX_RELATIONSHIPS)
830
+ break;
831
+ // Use the nav group label as a pseudo-selector for the container
832
+ const navSelector = `nav[aria-label="${navGroup.label}"]`;
833
+ relationships.push({
834
+ elementA: navSelector,
835
+ elementB: el.selector,
836
+ type: "contains",
837
+ });
838
+ }
839
+ }
840
+ // adjacent: interactive elements near each other (within 50px vertical)
841
+ const sorted = [...raw.interactiveElements]
842
+ .filter((el) => !el.disabled && el.rect.width > 0)
843
+ .sort((a, b) => a.rect.top - b.rect.top);
844
+ for (let i = 0; i < sorted.length - 1 && relationships.length < MAX_RELATIONSHIPS; i++) {
845
+ const a = sorted[i];
846
+ const b = sorted[i + 1];
847
+ const verticalGap = Math.abs(b.rect.top - (a.rect.top + a.rect.height));
848
+ if (verticalGap < 50 && a.formIndex === -1 && b.formIndex === -1) {
849
+ relationships.push({
850
+ elementA: buildSelector(a),
851
+ elementB: buildSelector(b),
852
+ type: "adjacent",
853
+ });
854
+ }
855
+ }
856
+ return relationships.slice(0, MAX_RELATIONSHIPS);
857
+ }
858
+ // ============================================================================
859
+ // Selector builders
860
+ // ============================================================================
861
+ function buildSelector(el) {
862
+ if (el.id)
863
+ return `#${cssEscape(el.id)}`;
864
+ if (el.role && el.ariaLabel)
865
+ return `[role="${el.role}"][aria-label="${cssEscapeAttr(el.ariaLabel)}"]`;
866
+ if (el.name && el.tag === "input")
867
+ return `input[name="${cssEscapeAttr(el.name)}"]`;
868
+ if (el.name)
869
+ return `${el.tag}[name="${cssEscapeAttr(el.name)}"]`;
870
+ if (el.ariaLabel)
871
+ return `${el.tag}[aria-label="${cssEscapeAttr(el.ariaLabel)}"]`;
872
+ if (el.href && el.tag === "a") {
873
+ const truncHref = el.href.length > 80 ? el.href.slice(0, 80) : el.href;
874
+ return `a[href="${cssEscapeAttr(truncHref)}"]`;
875
+ }
876
+ // Fallback to nth-of-type
877
+ return `${el.tag}:nth-of-type(${el.nthOfType})`;
878
+ }
879
+ function buildNavSelector(nav) {
880
+ if (nav.id)
881
+ return `#${cssEscape(nav.id)}`;
882
+ if (nav.ariaLabel)
883
+ return `nav[aria-label="${cssEscapeAttr(nav.ariaLabel)}"]`;
884
+ return `nav:nth-of-type(${nav.nthOfType})`;
885
+ }
886
+ function buildLinkSelector(link, parentSelector) {
887
+ if (link.id)
888
+ return `#${cssEscape(link.id)}`;
889
+ if (link.href)
890
+ return `${parentSelector} a[href="${cssEscapeAttr(link.href)}"]`;
891
+ return `${parentSelector} a:nth-of-type(${link.nthOfType})`;
892
+ }
893
+ function buildFieldSelector(field, formSelector) {
894
+ if (field.id)
895
+ return `#${cssEscape(field.id)}`;
896
+ if (field.name)
897
+ return `${formSelector} ${field.tag}[name="${cssEscapeAttr(field.name)}"]`;
898
+ return `${formSelector} ${field.tag}:nth-of-type(${field.nthOfType})`;
899
+ }
900
+ function buildSubmitSelector(btn, formSelector) {
901
+ if (btn.id)
902
+ return `#${cssEscape(btn.id)}`;
903
+ if (btn.tag === "input")
904
+ return `${formSelector} input[type="submit"]`;
905
+ return `${formSelector} button[type="submit"]`;
906
+ }
907
+ // ============================================================================
908
+ // Utility functions
909
+ // ============================================================================
910
+ /**
911
+ * Simple string hash for cache invalidation.
912
+ * Not cryptographic - just needs to detect content changes.
913
+ */
914
+ function simpleHash(input) {
915
+ let hash = 0;
916
+ for (let i = 0; i < input.length; i++) {
917
+ const char = input.charCodeAt(i);
918
+ hash = ((hash << 5) - hash + char) | 0;
919
+ }
920
+ return `h${(hash >>> 0).toString(36)}`;
921
+ }
922
+ function computeHashFromSkeleton(data) {
923
+ const raw = `${data.elementCount}:${data.formCount}:${data.linkCount}:${data.headingTexts.join("|")}`;
924
+ return simpleHash(raw);
925
+ }
926
+ /**
927
+ * Escape a string for use as a CSS ID selector.
928
+ * Handles characters that are invalid in CSS identifiers.
929
+ */
930
+ function cssEscape(value) {
931
+ return value.replace(/([^\w-])/g, "\\$1");
932
+ }
933
+ /**
934
+ * Escape a string for use inside a CSS attribute value selector.
935
+ * Escapes quotes and backslashes.
936
+ */
937
+ function cssEscapeAttr(value) {
938
+ return value.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
939
+ }
940
+ //# sourceMappingURL=page-understanding.js.map