@mcp-b/smart-dom-reader 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 mcp-b contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,441 @@
1
+ # Smart DOM Reader
2
+
3
+ A stateless, token-efficient TypeScript library for extracting DOM information optimized for AI-powered userscript generation. Combines wisdom from multiple DOM extraction approaches to provide intelligent, context-aware element extraction.
4
+
5
+ ## Key Features
6
+
7
+ - **Two extraction approaches**: Progressive (step-by-step) and Full (single-pass)
8
+ - **Stateless architecture**: All functions accept document/element parameters
9
+ - **Multiple selector strategies**: CSS, XPath, text-based, data-testid
10
+ - **Smart content detection**: Automatically identifies main content areas
11
+ - **Context preservation**: Maintains element relationships and semantic context
12
+ - **Shadow DOM & iframe support**: Traverses complex DOM structures
13
+ - **Token-efficient**: Optimized for LLM context windows
14
+
15
+ ## Installation
16
+
17
+ ```bash
18
+ npm install @mcp-b/smart-dom-reader
19
+ ```
20
+
21
+ ## Two Extraction Approaches
22
+
23
+ ### 1. Full Extraction (SmartDOMReader)
24
+
25
+ **When to use:** You need all information upfront and have sufficient token budget for processing the complete output. Best for automation, testing, and scenarios where you know exactly what you need.
26
+
27
+ ```typescript
28
+ import { SmartDOMReader } from '@mcp-b/smart-dom-reader';
29
+
30
+ // Pass document explicitly - no window dependency
31
+ const doc = document; // or any Document object
32
+
33
+ // Interactive mode - extract only interactive elements
34
+ const interactiveData = SmartDOMReader.extractInteractive(doc);
35
+
36
+ // Full mode - extract interactive + semantic elements
37
+ const fullData = SmartDOMReader.extractFull(doc);
38
+
39
+ // Custom options
40
+ const customData = SmartDOMReader.extractInteractive(doc, {
41
+ mainContentOnly: true,
42
+ viewportOnly: true,
43
+ includeHidden: false
44
+ });
45
+ ```
46
+
47
+ ### 2. Progressive Extraction (ProgressiveExtractor)
48
+
49
+ **When to use:** Working with AI/LLMs where token efficiency is critical. Allows making intelligent decisions at each step rather than extracting everything upfront.
50
+
51
+ ```typescript
52
+ import { ProgressiveExtractor } from '@mcp-b/smart-dom-reader';
53
+
54
+ // Step 1: Get high-level page structure (minimal tokens)
55
+ // Structure can be extracted from the whole document or a specific container element
56
+ const structure = ProgressiveExtractor.extractStructure(document);
57
+ console.log(structure.summary); // Quick stats about the page
58
+ console.log(structure.regions); // Map of page regions
59
+ console.log(structure.suggestions); // AI-friendly hints
60
+
61
+ // Step 2: Extract details from specific region based on structure
62
+ const mainContent = ProgressiveExtractor.extractRegion(
63
+ structure.summary.mainContentSelector,
64
+ document,
65
+ { mode: 'interactive' }
66
+ );
67
+
68
+ // Step 3: Extract readable content from a region
69
+ const articleText = ProgressiveExtractor.extractContent(
70
+ 'article.main-article',
71
+ document,
72
+ { includeHeadings: true, includeLists: true }
73
+ );
74
+
75
+ // Structure scoped to a container (e.g., navigation only)
76
+ const nav = document.querySelector('nav');
77
+ if (nav) {
78
+ const navOutline = ProgressiveExtractor.extractStructure(nav);
79
+ // navOutline.regions will only include elements within <nav>
80
+ }
81
+ ```
82
+
83
+ ## Extraction Modes
84
+
85
+ ### Interactive Mode
86
+ Focuses on elements users can interact with:
87
+ - Buttons and button-like elements
88
+ - Links
89
+ - Form inputs (text, select, textarea)
90
+ - Clickable elements with handlers
91
+ - Form structures and associations
92
+
93
+ ### Full Mode
94
+ Includes everything from interactive mode plus:
95
+ - Semantic HTML elements (articles, sections, nav)
96
+ - Headings hierarchy
97
+ - Images with alt text
98
+ - Tables and lists
99
+ - Content structure and relationships
100
+
101
+ ## API Comparison
102
+
103
+ ### Full Extraction API
104
+
105
+ ```typescript
106
+ // Class-based with options
107
+ const reader = new SmartDOMReader({
108
+ mode: 'interactive',
109
+ mainContentOnly: true,
110
+ viewportOnly: false
111
+ });
112
+ const result = reader.extract(document);
113
+
114
+ // Static methods for convenience
115
+ SmartDOMReader.extractInteractive(document);
116
+ SmartDOMReader.extractFull(document);
117
+ SmartDOMReader.extractFromElement(element, 'interactive');
118
+ ```
119
+
120
+ ### Progressive Extraction API
121
+
122
+ ```typescript
123
+ // Step 1: Structure overview (Document or Element)
124
+ const overview = ProgressiveExtractor.extractStructure(document);
125
+ // Returns: regions, forms, summary, suggestions
126
+
127
+ // Step 2: Region extraction
128
+ const region = ProgressiveExtractor.extractRegion(
129
+ selector,
130
+ document,
131
+ options
132
+ );
133
+ // Returns: Full SmartDOMResult for that region
134
+
135
+ // Step 3: Content extraction
136
+ const content = ProgressiveExtractor.extractContent(
137
+ selector,
138
+ document,
139
+ { includeMedia: true }
140
+ );
141
+ // Returns: Text content, headings, lists, tables, media
142
+ ```
143
+
144
+ ## Output Structure
145
+
146
+ Both approaches return structured data optimized for AI processing:
147
+
148
+ ```typescript
149
+ interface SmartDOMResult {
150
+ mode: 'interactive' | 'full';
151
+ timestamp: number;
152
+
153
+ page: {
154
+ url: string;
155
+ title: string;
156
+ hasErrors: boolean;
157
+ isLoading: boolean;
158
+ hasModals: boolean;
159
+ hasFocus?: string;
160
+ };
161
+
162
+ landmarks: {
163
+ navigation: string[];
164
+ main: string[];
165
+ forms: string[];
166
+ headers: string[];
167
+ footers: string[];
168
+ articles: string[];
169
+ sections: string[];
170
+ };
171
+
172
+ interactive: {
173
+ buttons: ExtractedElement[];
174
+ links: ExtractedElement[];
175
+ inputs: ExtractedElement[];
176
+ forms: FormInfo[];
177
+ clickable: ExtractedElement[];
178
+ };
179
+
180
+ semantic?: { // Only in full mode
181
+ headings: ExtractedElement[];
182
+ images: ExtractedElement[];
183
+ tables: ExtractedElement[];
184
+ lists: ExtractedElement[];
185
+ articles: ExtractedElement[];
186
+ };
187
+
188
+ metadata?: { // Only in full mode
189
+ totalElements: number;
190
+ extractedElements: number;
191
+ mainContent?: string;
192
+ language?: string;
193
+ };
194
+ }
195
+ ```
196
+
197
+ ## Element Information
198
+
199
+ Each extracted element includes comprehensive selector strategies with ranking (stable-first):
200
+
201
+ ```typescript
202
+ interface ExtractedElement {
203
+ tag: string;
204
+ text: string;
205
+
206
+ selector: {
207
+ css: string; // Best CSS selector (ranked stable-first)
208
+ xpath: string; // XPath selector
209
+ textBased?: string; // Text-content based hint
210
+ dataTestId?: string; // data-testid if available
211
+ ariaLabel?: string; // ARIA label if available
212
+ candidates?: Array<{
213
+ type: 'id' | 'data-testid' | 'role-aria' | 'name' | 'class-path' | 'css-path' | 'xpath' | 'text';
214
+ value: string;
215
+ score: number; // Higher = more stable/robust
216
+ }>;
217
+ };
218
+
219
+ attributes: Record<string, string>;
220
+
221
+ context: {
222
+ nearestForm?: string;
223
+ nearestSection?: string;
224
+ nearestMain?: string;
225
+ nearestNav?: string;
226
+ parentChain: string[];
227
+ };
228
+
229
+ // Compact flags: only present when true to save tokens
230
+ interaction: {
231
+ click?: boolean;
232
+ change?: boolean;
233
+ submit?: boolean;
234
+ nav?: boolean;
235
+ disabled?: boolean;
236
+ hidden?: boolean;
237
+ role?: string; // aria role when present
238
+ form?: string; // associated form selector
239
+ };
240
+ }
241
+ ```
242
+
243
+ ## Options
244
+
245
+ | Option | Type | Default | Description |
246
+ |--------|------|---------|-------------|
247
+ | `mode` | `'interactive' \| 'full'` | `'interactive'` | Extraction mode |
248
+ | `maxDepth` | `number` | `5` | Maximum traversal depth |
249
+ | `includeHidden` | `boolean` | `false` | Include hidden elements |
250
+ | `includeShadowDOM` | `boolean` | `true` | Traverse shadow DOM |
251
+ | `includeIframes` | `boolean` | `false` | Traverse iframes |
252
+ | `viewportOnly` | `boolean` | `false` | Only visible viewport elements |
253
+ | `mainContentOnly` | `boolean` | `false` | Focus on main content area |
254
+ | `customSelectors` | `string[]` | `[]` | Additional selectors to extract |
255
+
256
+ ## Use Cases
257
+
258
+ ### AI Userscript Generation (Progressive Approach)
259
+ ```typescript
260
+ // First, understand the page structure
261
+ const structure = ProgressiveExtractor.extractStructure(document);
262
+
263
+ // AI decides which region to focus on based on structure
264
+ const targetRegion = structure.regions.main?.selector || 'body';
265
+
266
+ // Extract detailed information from chosen region
267
+ const details = ProgressiveExtractor.extractRegion(
268
+ targetRegion,
269
+ document,
270
+ { mode: 'interactive', viewportOnly: true }
271
+ );
272
+
273
+ // Generate userscript prompt with focused context
274
+ const prompt = `
275
+ Page: ${details.page.title}
276
+ Main form: ${details.interactive.forms[0]?.selector}
277
+ Submit button: ${details.interactive.buttons.find(b => b.text.includes('Submit'))?.selector.css}
278
+
279
+ Write a userscript to auto-fill and submit this form.
280
+ `;
281
+ ```
282
+
283
+ ### Test Automation (Full Extraction)
284
+ ```typescript
285
+ // Get all interactive elements at once
286
+ const testData = SmartDOMReader.extractInteractive(document, {
287
+ customSelectors: ['[data-test]', '[data-cy]']
288
+ });
289
+
290
+ // Use multiple selector strategies for robust testing
291
+ testData.interactive.buttons.forEach(button => {
292
+ console.log(`Button: ${button.text}`);
293
+ console.log(` CSS: ${button.selector.css}`);
294
+ console.log(` XPath: ${button.selector.xpath}`);
295
+ console.log(` TestID: ${button.selector.dataTestId}`);
296
+ console.log(` Ranked candidates:`, button.selector.candidates?.slice(0, 3));
297
+ });
298
+ ```
299
+
300
+ ### Content Analysis (Progressive Approach)
301
+ ```typescript
302
+ // Get structure first
303
+ const structure = ProgressiveExtractor.extractStructure(document);
304
+
305
+ // Extract readable content from main area
306
+ const content = ProgressiveExtractor.extractContent(
307
+ structure.summary.mainContentSelector || 'main',
308
+ document,
309
+ { includeHeadings: true, includeTables: true }
310
+ );
311
+
312
+ console.log(`Word count: ${content.metadata.wordCount}`);
313
+ console.log(`Headings: ${content.text.headings?.length}`);
314
+ console.log(`Has interactive elements: ${content.metadata.hasInteractive}`);
315
+ ```
316
+
317
+ ## Stateless Architecture
318
+
319
+ All methods are stateless and accept document/element parameters explicitly:
320
+
321
+ ```typescript
322
+ // No window or document globals required
323
+ function extractFromIframe(iframe: HTMLIFrameElement) {
324
+ const iframeDoc = iframe.contentDocument;
325
+ if (iframeDoc) {
326
+ return SmartDOMReader.extractInteractive(iframeDoc);
327
+ }
328
+ }
329
+
330
+ // Works with any document context
331
+ function extractFromShadowRoot(shadowRoot: ShadowRoot) {
332
+ const container = shadowRoot.querySelector('.container');
333
+ if (container) {
334
+ return SmartDOMReader.extractFromElement(container);
335
+ }
336
+ }
337
+
338
+ /**
339
+ * Stateless bundle string (for extensions / userScripts)
340
+ *
341
+ * The library also provides a self-contained IIFE bundle as a string
342
+ * export that can be injected and executed without touching window scope.
343
+ */
344
+ import { SMART_DOM_READER_BUNDLE } from '@mcp-b/smart-dom-reader/bundle-string';
345
+
346
+ function execute(method, args) {
347
+ const code = `(() => {\n${SMART_DOM_READER_BUNDLE}\nreturn SmartDOMReaderBundle.executeExtraction(${JSON.stringify(
348
+ 'extractStructure'
349
+ )}, ${JSON.stringify({ selector: undefined, formatOptions: { detail: 'summary' } })});\n})()`;
350
+ // inject `code` into the page (e.g., chrome.userScripts.execute)
351
+ }
352
+
353
+ // Note: The bundle contains guarded fallbacks (e.g., typeof require === 'function')
354
+ // that are no-ops in the browser; there are no runtime imports.
355
+ ```
356
+
357
+ ## Design Philosophy
358
+
359
+ This library is designed to provide:
360
+
361
+ 1. **Token Efficiency**: Progressive extraction minimizes token usage for AI applications
362
+ 2. **Flexibility**: Choose between complete extraction or step-by-step approach
363
+ 3. **Statelessness**: No global dependencies, works in any JavaScript environment
364
+ 4. **Multiple Selector Strategies**: Robust element targeting with fallbacks
365
+ 5. **Semantic Understanding**: Preserves meaning and relationships
366
+ 6. **Interactive Focus**: Prioritizes elements users interact with
367
+ 7. **Context Preservation**: Maintains element relationships
368
+ 8. **Framework Agnostic**: Works with any web application
369
+
370
+ ## Credits
371
+
372
+ Inspired by:
373
+ - [stacking-contexts-inspector](https://github.com/andreadev-it/stacking-contexts-inspector) - DOM traversal techniques
374
+ - [dom-to-semantic-markdown](https://github.com/romansky/dom-to-semantic-markdown) - Content scoring algorithms
375
+ - [z-context](https://github.com/gwwar/z-context) - Selector generation approaches
376
+
377
+ ## License
378
+
379
+ MIT
380
+
381
+ ## MCP Server (Golden Path)
382
+
383
+ For AI agents, use the bundled MCP server which returns XML-wrapped Markdown instead of JSON. This keeps responses concise and readable for LLMs while providing clear structural boundaries.
384
+
385
+ - Output format: always XML envelope with a single section tag containing Markdown in CDATA
386
+ - Structure: `<page title="..." url="...">\n <outline><![CDATA[ ...markdown... ]]></outline>\n</page>`
387
+ - Region: `<page ...>\n <section><![CDATA[ ...markdown... ]]></section>\n</page>`
388
+ - Content: `<page ...>\n <content><![CDATA[ ...markdown... ]]></content>\n</page>`
389
+ - Golden path sequence:
390
+ 1) `dom_extract_structure` → get page outline and pick a target
391
+ 2) `dom_extract_region` → get actionable selectors for that area
392
+ 3) Write a script; if unstable, re-run with higher detail or limits
393
+ 4) Optional: `dom_extract_content` for readable text context
394
+
395
+ ### Running the server
396
+
397
+ Ensure the library is built so the formatter is available:
398
+
399
+ ```
400
+ pnpm -w --filter @mcp-b/smart-dom-reader run build
401
+ ```
402
+
403
+ Build and update the embedded bundle, then start the MCP server (stdio):
404
+
405
+ ```
406
+ pnpm --filter @mcp-b/smart-dom-reader bundle:mcp
407
+ pnpm --filter @mcp-b/smart-dom-reader-server run start
408
+ ```
409
+
410
+ Or directly with tsx:
411
+
412
+ ```
413
+ tsx smart-dom-reader/mcp-server/src/index.ts
414
+ ```
415
+
416
+ ### Tool overview (inputs only)
417
+
418
+ - `browser_connect` → `{ headless?: boolean, executablePath?: string }`
419
+ - `browser_navigate` → `{ url: string }`
420
+ - `dom_extract_structure` → `{ selector?: string, detail?: 'summary'|'region'|'deep', maxTextLength?: number, maxElements?: number }`
421
+ - `dom_extract_region` → `{ selector: string, options?: { mode?: 'interactive'|'full', includeHidden?: boolean, maxDepth?: number, detail?: 'summary'|'region'|'deep', maxTextLength?: number, maxElements?: number } }`
422
+ - `dom_extract_content` → `{ selector: string, options?: { includeHeadings?: boolean, includeLists?: boolean, includeMedia?: boolean, maxTextLength?: number, detail?: 'summary'|'region'|'deep', maxElements?: number } }`
423
+ - `dom_extract_interactive` → `{ selector?: string, options?: { viewportOnly?: boolean, maxDepth?: number, detail?: 'summary'|'region'|'deep', maxTextLength?: number, maxElements?: number } }`
424
+ - `browser_screenshot` → `{ path?: string, fullPage?: boolean }`
425
+ - `browser_close` → `{}`
426
+
427
+ All extraction tools return XML-wrapped Markdown with a short “Next:” instruction at the bottom to guide the following step.
428
+
429
+ ## Local Testing (Playwright)
430
+
431
+ Run the library in a real browser against local HTML (no network):
432
+
433
+ ```
434
+ pnpm --filter @mcp-b/smart-dom-reader bundle:mcp
435
+ pnpm --filter @mcp-b/smart-dom-reader test:local
436
+ ```
437
+
438
+ What it validates:
439
+ - Stable selectors (ID, data-testid, role+aria, name/id)
440
+ - Semantic extraction (headings/images/tables/lists)
441
+ - Shadow DOM detection
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Auto-generated bundle module for smart-dom-reader
3
+ * DO NOT EDIT - Generated by generate-bundle-module.mjs
4
+ *
5
+ * This module exports the bundled smart-dom-reader library as a string
6
+ * that can be injected into web pages for stateless DOM extraction.
7
+ */
8
+ declare const SMART_DOM_READER_BUNDLE = "var SmartDOMReaderBundle = (function(exports) {\n class ContentDetection {\n /**\n * Find the main content area of a page\n * Inspired by dom-to-semantic-markdown's approach\n */\n static findMainContent(doc) {\n const mainElement = doc.querySelector('main, [role=\"main\"]');\n if (mainElement) {\n return mainElement;\n }\n if (!doc.body) {\n return doc.documentElement;\n }\n return this.detectMainContent(doc.body);\n }\n /**\n * Detect main content using scoring algorithm\n */\n static detectMainContent(rootElement) {\n const candidates = [];\n const minScore = 15;\n this.collectCandidates(rootElement, candidates, minScore);\n if (candidates.length === 0) {\n return rootElement;\n }\n candidates.sort((a, b) => this.calculateContentScore(b) - this.calculateContentScore(a));\n let bestCandidate = candidates[0];\n for (let i = 1; i < candidates.length; i++) {\n const isIndependent = !candidates.some(\n (other, j) => j !== i && other.contains(candidates[i])\n );\n if (isIndependent && this.calculateContentScore(candidates[i]) > this.calculateContentScore(bestCandidate)) {\n bestCandidate = candidates[i];\n }\n }\n return bestCandidate;\n }\n /**\n * Collect content candidates\n */\n static collectCandidates(element, candidates, minScore) {\n const score = this.calculateContentScore(element);\n if (score >= minScore) {\n candidates.push(element);\n }\n Array.from(element.children).forEach((child) => {\n this.collectCandidates(child, candidates, minScore);\n });\n }\n /**\n * Calculate content score for an element\n */\n static calculateContentScore(element) {\n let score = 0;\n const semanticClasses = [\n \"article\",\n \"content\",\n \"main-container\",\n \"main\",\n \"main-content\",\n \"post\",\n \"entry\"\n ];\n const semanticIds = [\"content\", \"main\", \"article\", \"post\", \"entry\"];\n semanticClasses.forEach((cls) => {\n if (element.classList.contains(cls)) {\n score += 10;\n }\n });\n semanticIds.forEach((id) => {\n if (element.id && element.id.toLowerCase().includes(id)) {\n score += 10;\n }\n });\n const tag = element.tagName.toLowerCase();\n const highValueTags = [\"article\", \"main\", \"section\"];\n if (highValueTags.includes(tag)) {\n score += 8;\n }\n const paragraphs = element.getElementsByTagName(\"p\").length;\n score += Math.min(paragraphs * 2, 10);\n const headings = element.querySelectorAll(\"h1, h2, h3\").length;\n score += Math.min(headings * 3, 9);\n const textLength = element.textContent?.trim().length || 0;\n if (textLength > 300) {\n score += Math.min(Math.floor(textLength / 300) * 2, 10);\n }\n const linkDensity = this.calculateLinkDensity(element);\n if (linkDensity < 0.3) {\n score += 5;\n } else if (linkDensity > 0.5) {\n score -= 5;\n }\n if (element.hasAttribute(\"data-main\") || element.hasAttribute(\"data-content\") || element.hasAttribute(\"itemprop\")) {\n score += 8;\n }\n const role = element.getAttribute(\"role\");\n if (role === \"main\" || role === \"article\") {\n score += 10;\n }\n if (element.matches(\n \"aside, nav, header, footer, .sidebar, .navigation, .menu, .ad, .advertisement\"\n )) {\n score -= 10;\n }\n const forms = element.getElementsByTagName(\"form\").length;\n if (forms > 2) {\n score -= 5;\n }\n return Math.max(0, score);\n }\n /**\n * Calculate link density in an element\n */\n static calculateLinkDensity(element) {\n const links = element.getElementsByTagName(\"a\");\n let linkTextLength = 0;\n for (const link of Array.from(links)) {\n linkTextLength += link.textContent?.length || 0;\n }\n const totalTextLength = element.textContent?.length || 1;\n return linkTextLength / totalTextLength;\n }\n /**\n * Check if an element is likely navigation\n */\n static isNavigation(element) {\n const tag = element.tagName.toLowerCase();\n if (tag === \"nav\" || element.getAttribute(\"role\") === \"navigation\") {\n return true;\n }\n const navPatterns = [/nav/i, /menu/i, /sidebar/i, /toolbar/i];\n const classesAndId = (element.className + \" \" + element.id).toLowerCase();\n return navPatterns.some((pattern) => pattern.test(classesAndId));\n }\n /**\n * Check if element is likely supplementary content\n */\n static isSupplementary(element) {\n const tag = element.tagName.toLowerCase();\n if (tag === \"aside\" || element.getAttribute(\"role\") === \"complementary\") {\n return true;\n }\n const supplementaryPatterns = [/sidebar/i, /widget/i, /related/i, /advertisement/i, /social/i];\n const classesAndId = (element.className + \" \" + element.id).toLowerCase();\n return supplementaryPatterns.some((pattern) => pattern.test(classesAndId));\n }\n /**\n * Detect page landmarks\n */\n static detectLandmarks(doc) {\n const landmarks = {\n navigation: [],\n main: [],\n complementary: [],\n contentinfo: [],\n banner: [],\n search: [],\n form: [],\n region: []\n };\n const landmarkSelectors = {\n navigation: 'nav, [role=\"navigation\"]',\n main: 'main, [role=\"main\"]',\n complementary: 'aside, [role=\"complementary\"]',\n contentinfo: 'footer, [role=\"contentinfo\"]',\n banner: 'header, [role=\"banner\"]',\n search: '[role=\"search\"]',\n form: 'form[aria-label], form[aria-labelledby], [role=\"form\"]',\n region: 'section[aria-label], section[aria-labelledby], [role=\"region\"]'\n };\n for (const [landmark, selector] of Object.entries(landmarkSelectors)) {\n const elements = doc.querySelectorAll(selector);\n landmarks[landmark] = Array.from(elements);\n }\n return landmarks;\n }\n }\n class SelectorGenerator {\n /**\n * Generate multiple selector strategies for an element\n */\n static generateSelectors(element) {\n const doc = element.ownerDocument || document;\n const candidates = [];\n if (element.id && this.isUniqueId(element.id, doc)) {\n candidates.push({ type: \"id\", value: `#${CSS.escape(element.id)}`, score: 100 });\n }\n const testId = this.getDataTestId(element);\n if (testId) {\n const v = `[data-testid=\"${CSS.escape(testId)}\"]`;\n candidates.push({\n type: \"data-testid\",\n value: v,\n score: 90 + (this.isUniqueSelectorSafe(v, doc) ? 5 : 0)\n });\n }\n const role = element.getAttribute(\"role\");\n const aria = element.getAttribute(\"aria-label\");\n if (role && aria) {\n const v = `[role=\"${CSS.escape(role)}\"][aria-label=\"${CSS.escape(aria)}\"]`;\n candidates.push({\n type: \"role-aria\",\n value: v,\n score: 85 + (this.isUniqueSelectorSafe(v, doc) ? 5 : 0)\n });\n }\n const nameAttr = element.getAttribute(\"name\");\n if (nameAttr) {\n const v = `[name=\"${CSS.escape(nameAttr)}\"]`;\n candidates.push({\n type: \"name\",\n value: v,\n score: 78 + (this.isUniqueSelectorSafe(v, doc) ? 5 : 0)\n });\n }\n const pathCss = this.generateCSSSelector(element, doc);\n const structuralPenalty = (pathCss.match(/:nth-child\\(/g) || []).length * 10;\n const classBonus = pathCss.includes(\".\") ? 8 : 0;\n const pathScore = Math.max(0, 70 + classBonus - structuralPenalty);\n candidates.push({ type: \"class-path\", value: pathCss, score: pathScore });\n const xpath = this.generateXPath(element, doc);\n candidates.push({ type: \"xpath\", value: xpath, score: 40 });\n const textBased = this.generateTextBasedSelector(element);\n if (textBased) candidates.push({ type: \"text\", value: textBased, score: 30 });\n candidates.sort((a, b) => b.score - a.score);\n const bestCss = candidates.find((c) => c.type !== \"xpath\" && c.type !== \"text\")?.value || pathCss;\n return {\n css: bestCss,\n xpath,\n textBased,\n dataTestId: testId || void 0,\n ariaLabel: aria || void 0,\n candidates\n };\n }\n /**\n * Generate a unique CSS selector for an element\n */\n static generateCSSSelector(element, doc) {\n if (element.id && this.isUniqueId(element.id, doc)) {\n return `#${CSS.escape(element.id)}`;\n }\n const testId = this.getDataTestId(element);\n if (testId) {\n return `[data-testid=\"${CSS.escape(testId)}\"]`;\n }\n const path = [];\n let current = element;\n while (current && current.nodeType === Node.ELEMENT_NODE) {\n let selector = current.nodeName.toLowerCase();\n if (current.id && this.isUniqueId(current.id, doc)) {\n selector = `#${CSS.escape(current.id)}`;\n path.unshift(selector);\n break;\n }\n const classes = this.getMeaningfulClasses(current);\n if (classes.length > 0) {\n selector += \".\" + classes.map((c) => CSS.escape(c)).join(\".\");\n }\n const siblings = current.parentElement?.children;\n if (siblings && siblings.length > 1) {\n const index = Array.from(siblings).indexOf(current);\n if (index > 0 || !this.isUniqueSelector(selector, current.parentElement)) {\n selector += `:nth-child(${index + 1})`;\n }\n }\n path.unshift(selector);\n current = current.parentElement;\n }\n return this.optimizePath(path, element, doc);\n }\n /**\n * Generate XPath for an element\n */\n static generateXPath(element, doc) {\n if (element.id && this.isUniqueId(element.id, doc)) {\n return `//*[@id=\"${element.id}\"]`;\n }\n const path = [];\n let current = element;\n while (current && current.nodeType === Node.ELEMENT_NODE) {\n const tagName = current.nodeName.toLowerCase();\n if (current.id && this.isUniqueId(current.id, doc)) {\n path.unshift(`//*[@id=\"${current.id}\"]`);\n break;\n }\n let xpath = tagName;\n const siblings = current.parentElement?.children;\n if (siblings) {\n const sameTagSiblings = Array.from(siblings).filter(\n (s) => s.nodeName.toLowerCase() === tagName\n );\n if (sameTagSiblings.length > 1) {\n const index = sameTagSiblings.indexOf(current) + 1;\n xpath += `[${index}]`;\n }\n }\n path.unshift(xpath);\n current = current.parentElement;\n }\n return \"//\" + path.join(\"/\");\n }\n /**\n * Generate a text-based selector for buttons and links\n */\n static generateTextBasedSelector(element) {\n const text = element.textContent?.trim();\n if (!text || text.length > 50) return void 0;\n const tag = element.nodeName.toLowerCase();\n if ([\"button\", \"a\", \"label\"].includes(tag)) {\n const escapedText = text.replace(/['\"\\\\]/g, \"\\\\$&\");\n return `${tag}:contains(\"${escapedText}\")`;\n }\n return void 0;\n }\n /**\n * Get data-testid or similar attributes\n */\n static getDataTestId(element) {\n return element.getAttribute(\"data-testid\") || element.getAttribute(\"data-test-id\") || element.getAttribute(\"data-test\") || element.getAttribute(\"data-cy\") || void 0;\n }\n /**\n * Check if an ID is unique in the document\n */\n static isUniqueId(id, doc) {\n return doc.querySelectorAll(`#${CSS.escape(id)}`).length === 1;\n }\n /**\n * Check if a selector is unique within a container\n */\n static isUniqueSelector(selector, container) {\n try {\n return container.querySelectorAll(selector).length === 1;\n } catch {\n return false;\n }\n }\n static isUniqueSelectorSafe(selector, doc) {\n try {\n return doc.querySelectorAll(selector).length === 1;\n } catch {\n return false;\n }\n }\n /**\n * Get meaningful classes (filtering out utility classes)\n */\n static getMeaningfulClasses(element) {\n const classes = Array.from(element.classList);\n const utilityPatterns = [\n /^(p|m|w|h|text|bg|border|flex|grid|col|row)-/,\n /^(xs|sm|md|lg|xl|2xl):/,\n /^(hover|focus|active|disabled|checked):/,\n /^js-/,\n /^is-/,\n /^has-/\n ];\n return classes.filter((cls) => {\n if (cls.length < 3) return false;\n return !utilityPatterns.some((pattern) => pattern.test(cls));\n }).slice(0, 2);\n }\n /**\n * Optimize the selector path by removing unnecessary parts\n */\n static optimizePath(path, element, doc) {\n for (let i = 0; i < path.length - 1; i++) {\n const shortPath = path.slice(i).join(\" > \");\n try {\n const matches = doc.querySelectorAll(shortPath);\n if (matches.length === 1 && matches[0] === element) {\n return shortPath;\n }\n } catch {\n }\n }\n return path.join(\" > \");\n }\n /**\n * Get a human-readable path description\n */\n static getContextPath(element) {\n const path = [];\n let current = element;\n let depth = 0;\n const maxDepth = 5;\n while (current && current !== element.ownerDocument?.body && depth < maxDepth) {\n const tag = current.nodeName.toLowerCase();\n let descriptor = tag;\n if (current.id) {\n descriptor = `${tag}#${current.id}`;\n } else if (current.className && typeof current.className === \"string\") {\n const firstClass = current.className.split(\" \")[0];\n if (firstClass) {\n descriptor = `${tag}.${firstClass}`;\n }\n }\n const role = current.getAttribute(\"role\");\n if (role) {\n descriptor += `[role=\"${role}\"]`;\n }\n path.unshift(descriptor);\n current = current.parentElement;\n depth++;\n }\n return path;\n }\n }\n class DOMTraversal {\n static INTERACTIVE_SELECTORS = [\n \"button\",\n \"a[href]\",\n 'input:not([type=\"hidden\"])',\n \"textarea\",\n \"select\",\n '[role=\"button\"]',\n \"[onclick]\",\n '[contenteditable=\"true\"]',\n \"summary\",\n '[tabindex]:not([tabindex=\"-1\"])'\n ];\n static SEMANTIC_SELECTORS = [\n \"h1\",\n \"h2\",\n \"h3\",\n \"h4\",\n \"h5\",\n \"h6\",\n \"article\",\n \"section\",\n \"nav\",\n \"aside\",\n \"main\",\n \"header\",\n \"footer\",\n \"form\",\n \"table\",\n \"ul\",\n \"ol\",\n \"img[alt]\",\n \"figure\",\n \"video\",\n \"audio\",\n '[role=\"navigation\"]',\n '[role=\"main\"]',\n '[role=\"complementary\"]',\n '[role=\"contentinfo\"]'\n ];\n /**\n * Check if element is visible\n */\n static isVisible(element, computedStyle) {\n const rect = element.getBoundingClientRect();\n const style = computedStyle || element.ownerDocument?.defaultView?.getComputedStyle(element);\n if (!style) return false;\n return !!(rect.width > 0 && rect.height > 0 && style.display !== \"none\" && style.visibility !== \"hidden\" && style.opacity !== \"0\" && element.offsetParent !== null);\n }\n /**\n * Check if element is in viewport\n */\n static isInViewport(element, viewport) {\n const rect = element.getBoundingClientRect();\n const view = viewport || {\n width: element.ownerDocument?.defaultView?.innerWidth || 0,\n height: element.ownerDocument?.defaultView?.innerHeight || 0\n };\n return rect.top < view.height && rect.bottom > 0 && rect.left < view.width && rect.right > 0;\n }\n /**\n * Check if element passes filter criteria\n */\n static passesFilter(element, filter) {\n if (!filter) return true;\n const htmlElement = element;\n if (filter.excludeSelectors?.length) {\n for (const selector of filter.excludeSelectors) {\n if (element.matches(selector)) return false;\n }\n }\n if (filter.includeSelectors?.length) {\n let matches = false;\n for (const selector of filter.includeSelectors) {\n if (element.matches(selector)) {\n matches = true;\n break;\n }\n }\n if (!matches) return false;\n }\n if (filter.tags?.length && !filter.tags.includes(element.tagName.toLowerCase())) {\n return false;\n }\n const textContent = htmlElement.textContent?.toLowerCase() || \"\";\n if (filter.textContains?.length) {\n let hasText = false;\n for (const text of filter.textContains) {\n if (textContent.includes(text.toLowerCase())) {\n hasText = true;\n break;\n }\n }\n if (!hasText) return false;\n }\n if (filter.textMatches?.length) {\n let matches = false;\n for (const pattern of filter.textMatches) {\n if (pattern.test(textContent)) {\n matches = true;\n break;\n }\n }\n if (!matches) return false;\n }\n if (filter.hasAttributes?.length) {\n for (const attr of filter.hasAttributes) {\n if (!element.hasAttribute(attr)) return false;\n }\n }\n if (filter.attributeValues) {\n for (const [attr, value] of Object.entries(filter.attributeValues)) {\n const attrValue = element.getAttribute(attr);\n if (!attrValue) return false;\n if (typeof value === \"string\") {\n if (attrValue !== value) return false;\n } else if (value instanceof RegExp) {\n if (!value.test(attrValue)) return false;\n }\n }\n }\n if (filter.withinSelectors?.length) {\n let isWithin = false;\n for (const selector of filter.withinSelectors) {\n if (element.closest(selector)) {\n isWithin = true;\n break;\n }\n }\n if (!isWithin) return false;\n }\n if (filter.interactionTypes?.length) {\n const interaction = this.getInteractionInfo(element);\n let hasInteraction = false;\n for (const type of filter.interactionTypes) {\n if (interaction[type]) {\n hasInteraction = true;\n break;\n }\n }\n if (!hasInteraction) return false;\n }\n if (filter.nearText) {\n const parent = element.parentElement;\n if (!parent || !parent.textContent?.toLowerCase().includes(filter.nearText.toLowerCase())) {\n return false;\n }\n }\n return true;\n }\n /**\n * Extract element information\n */\n static extractElement(element, options, depth = 0) {\n if (options.maxDepth && depth > options.maxDepth) {\n return null;\n }\n if (!options.includeHidden && !this.isVisible(element)) {\n return null;\n }\n if (options.viewportOnly && !this.isInViewport(element)) {\n return null;\n }\n if (!this.passesFilter(element, options.filter)) {\n return null;\n }\n const htmlElement = element;\n const extracted = {\n tag: element.tagName.toLowerCase(),\n text: this.getElementText(element, options),\n selector: SelectorGenerator.generateSelectors(element),\n attributes: this.getRelevantAttributes(element, options),\n context: this.getElementContext(element),\n interaction: this.getInteractionInfo(element)\n // bounds removed to save tokens\n };\n if (options.mode === \"full\" && this.isSemanticContainer(element)) {\n const children = [];\n if (options.includeShadowDOM && htmlElement.shadowRoot) {\n const shadowChildren = this.extractChildren(htmlElement.shadowRoot, options, depth + 1);\n children.push(...shadowChildren);\n }\n const regularChildren = this.extractChildren(element, options, depth + 1);\n children.push(...regularChildren);\n if (children.length > 0) {\n extracted.children = children;\n }\n }\n return extracted;\n }\n /**\n * Extract children elements\n */\n static extractChildren(container, options, depth) {\n const children = [];\n const elements = container.querySelectorAll(\"*\");\n for (const child of Array.from(elements)) {\n if (this.hasExtractedAncestor(child, elements)) {\n continue;\n }\n const extracted = this.extractElement(child, options, depth);\n if (extracted) {\n children.push(extracted);\n }\n }\n return children;\n }\n /**\n * Check if element has an ancestor that was already extracted\n */\n static hasExtractedAncestor(element, extractedElements) {\n let parent = element.parentElement;\n while (parent) {\n if (Array.from(extractedElements).includes(parent)) {\n return true;\n }\n parent = parent.parentElement;\n }\n return false;\n }\n /**\n * Get relevant attributes for an element\n */\n static getRelevantAttributes(element, options) {\n const relevant = [\n \"id\",\n \"class\",\n \"name\",\n \"type\",\n \"value\",\n \"placeholder\",\n \"href\",\n \"src\",\n \"alt\",\n \"title\",\n \"action\",\n \"method\",\n \"aria-label\",\n \"aria-describedby\",\n \"aria-controls\",\n \"role\",\n \"disabled\",\n \"readonly\",\n \"required\",\n \"checked\",\n \"min\",\n \"max\",\n \"pattern\",\n \"step\",\n \"autocomplete\",\n \"data-testid\",\n \"data-test\",\n \"data-cy\"\n ];\n const attributes = {};\n const attrTruncate = options.attributeTruncateLength ?? 100;\n const dataAttrTruncate = options.dataAttributeTruncateLength ?? 50;\n for (const attr of relevant) {\n const value = element.getAttribute(attr);\n if (value) {\n attributes[attr] = value.length > attrTruncate ? value.substring(0, attrTruncate) + \"...\" : value;\n }\n }\n for (const attr of element.attributes) {\n if (attr.name.startsWith(\"data-\") && !relevant.includes(attr.name)) {\n attributes[attr.name] = attr.value.length > dataAttrTruncate ? attr.value.substring(0, dataAttrTruncate) + \"...\" : attr.value;\n }\n }\n return attributes;\n }\n /**\n * Get element context information\n */\n static getElementContext(element) {\n const context = {\n parentChain: SelectorGenerator.getContextPath(element)\n };\n const form = element.closest(\"form\");\n if (form) {\n context.nearestForm = SelectorGenerator.generateSelectors(form).css;\n }\n const section = element.closest('section, [role=\"region\"]');\n if (section) {\n context.nearestSection = SelectorGenerator.generateSelectors(section).css;\n }\n const main = element.closest('main, [role=\"main\"]');\n if (main) {\n context.nearestMain = SelectorGenerator.generateSelectors(main).css;\n }\n const nav = element.closest('nav, [role=\"navigation\"]');\n if (nav) {\n context.nearestNav = SelectorGenerator.generateSelectors(nav).css;\n }\n return context;\n }\n /**\n * Get interaction information for an element (compact format)\n */\n static getInteractionInfo(element) {\n const htmlElement = element;\n const interaction = {};\n const hasClickHandler = !!(htmlElement.onclick || element.getAttribute(\"onclick\") || element.matches('button, a[href], [role=\"button\"], [tabindex]:not([tabindex=\"-1\"])'));\n if (hasClickHandler) interaction.click = true;\n const hasChangeHandler = !!(htmlElement.onchange || element.getAttribute(\"onchange\") || element.matches(\"input, select, textarea\"));\n if (hasChangeHandler) interaction.change = true;\n const hasSubmitHandler = !!(htmlElement.onsubmit || element.getAttribute(\"onsubmit\") || element.matches(\"form\"));\n if (hasSubmitHandler) interaction.submit = true;\n const triggersNavigation = element.matches('a[href], button[type=\"submit\"]');\n if (triggersNavigation) interaction.nav = true;\n const isDisabled = htmlElement.hasAttribute(\"disabled\") || htmlElement.getAttribute(\"aria-disabled\") === \"true\";\n if (isDisabled) interaction.disabled = true;\n const isHidden = !this.isVisible(element);\n if (isHidden) interaction.hidden = true;\n const ariaRole = element.getAttribute(\"role\");\n if (ariaRole) interaction.role = ariaRole;\n if (element.matches(\"input, textarea, select, button\")) {\n const form = element.form || element.closest(\"form\");\n if (form) {\n interaction.form = SelectorGenerator.generateSelectors(form).css;\n }\n }\n return interaction;\n }\n /**\n * Get text content of an element (limited length)\n */\n static getElementText(element, options) {\n if (element.matches(\"input, textarea\")) {\n const input = element;\n return input.value || input.placeholder || \"\";\n }\n if (element.matches(\"img\")) {\n return element.alt || \"\";\n }\n const text = element.textContent?.trim() || \"\";\n const maxLength = options?.textTruncateLength;\n if (maxLength && text.length > maxLength) {\n return text.substring(0, maxLength) + \"...\";\n }\n return text;\n }\n /**\n * Check if element is a semantic container\n */\n static isSemanticContainer(element) {\n return element.matches(\n 'article, section, nav, aside, main, header, footer, form, table, ul, ol, dl, figure, details, dialog, [role=\"region\"], [role=\"navigation\"], [role=\"main\"], [role=\"complementary\"]'\n );\n }\n /**\n * Get interactive elements\n */\n static getInteractiveElements(container = document, options) {\n const elements = [];\n const selector = this.INTERACTIVE_SELECTORS.join(\", \");\n const found = container.querySelectorAll(selector);\n for (const element of Array.from(found)) {\n const extracted = this.extractElement(element, options);\n if (extracted) {\n elements.push(extracted);\n }\n }\n if (options.customSelectors) {\n for (const customSelector of options.customSelectors) {\n try {\n const customFound = container.querySelectorAll(customSelector);\n for (const element of Array.from(customFound)) {\n const extracted = this.extractElement(element, options);\n if (extracted) {\n elements.push(extracted);\n }\n }\n } catch (e) {\n console.warn(`Invalid custom selector: ${customSelector}`);\n }\n }\n }\n return elements;\n }\n /**\n * Get semantic elements (for full mode)\n */\n static getSemanticElements(container = document, options) {\n const elements = [];\n const selector = this.SEMANTIC_SELECTORS.join(\", \");\n const found = container.querySelectorAll(selector);\n for (const element of Array.from(found)) {\n const extracted = this.extractElement(element, options);\n if (extracted) {\n elements.push(extracted);\n }\n }\n return elements;\n }\n }\n function truncate(text, len) {\n const t = (text ?? \"\").trim();\n if (!len || t.length <= len) return t;\n const keywords = [\n \"login\",\n \"log in\",\n \"sign in\",\n \"sign up\",\n \"submit\",\n \"search\",\n \"filter\",\n \"add to cart\",\n \"next\",\n \"continue\"\n ];\n const lower = t.toLowerCase();\n const hit = keywords.map((k) => ({ k, i: lower.indexOf(k) })).find((x) => x.i > -1);\n const head = Math.max(0, Math.floor(len * 0.66));\n if (hit && hit.i > head) {\n const tailWindow = Math.max(12, len - head - 5);\n const start = Math.max(0, hit.i - Math.floor(tailWindow / 2));\n const end = Math.min(t.length, start + tailWindow);\n return t.slice(0, head).trimEnd() + \" \u2026 \" + t.slice(start, end).trim() + \"\u2026\";\n }\n const slice = t.slice(0, len);\n const lastSpace = slice.lastIndexOf(\" \");\n return (lastSpace > 32 ? slice.slice(0, lastSpace) : slice) + \"\u2026\";\n }\n function bestSelector(el) {\n return el.selector?.css || \"\";\n }\n function hashId(input) {\n let h = 5381;\n for (let i = 0; i < input.length; i++) h = h * 33 ^ input.charCodeAt(i);\n return \"sec-\" + (h >>> 0).toString(36);\n }\n function iconForRegion(key) {\n switch (key) {\n case \"header\":\n return \"\uD83E\uDDED\";\n case \"navigation\":\n return \"\uD83D\uDCD1\";\n case \"main\":\n return \"\uD83D\uDCC4\";\n case \"sections\":\n return \"\uD83D\uDDC2\uFE0F\";\n case \"sidebar\":\n return \"\uD83D\uDCDA\";\n case \"footer\":\n return \"\uD83D\uDD3B\";\n case \"modals\":\n return \"\uD83D\uDCAC\";\n default:\n return \"\uD83D\uDD39\";\n }\n }\n function elementLine(el, opts) {\n const txt = truncate(el.text || el.attributes?.ariaLabel, opts?.maxTextLength ?? 80);\n const sel = bestSelector(el);\n const tag = el.tag.toLowerCase();\n const action = el.interaction?.submit ? \"submit\" : el.interaction?.click ? \"click\" : el.interaction?.change ? \"change\" : void 0;\n const actionText = action ? ` (${action})` : \"\";\n return `- ${tag.toUpperCase()}: ${txt || \"(no text)\"} \u2192 \\`${sel}\\`${actionText}`;\n }\n function selectorQualitySummary(inter) {\n const all = [];\n all.push(...inter.buttons.map((e) => e.selector?.css || \"\"));\n all.push(...inter.links.map((e) => e.selector?.css || \"\"));\n all.push(...inter.inputs.map((e) => e.selector?.css || \"\"));\n all.push(...inter.clickable.map((e) => e.selector?.css || \"\"));\n const total = all.length || 1;\n const idCount = all.filter((s) => s.startsWith(\"#\")).length;\n const testIdCount = all.filter((s) => /\\[data-testid=/.test(s)).length;\n const nthCount = all.filter((s) => /:nth-child\\(/.test(s)).length;\n const stable = idCount + testIdCount;\n const stablePct = Math.round(stable / total * 100);\n const nthPct = Math.round(nthCount / total * 100);\n return `Selector quality: ${stablePct}% stable (ID/data-testid), ${nthPct}% structural (:nth-child)`;\n }\n function renderInteractive(inter, opts) {\n const parts = [];\n const limit = (arr) => typeof opts?.maxElements === \"number\" ? arr.slice(0, opts.maxElements) : arr;\n if (inter.buttons.length) {\n parts.push(\"Buttons:\");\n for (const el of limit(inter.buttons)) parts.push(elementLine(el, opts));\n }\n if (inter.links.length) {\n parts.push(\"Links:\");\n for (const el of limit(inter.links)) parts.push(elementLine(el, opts));\n }\n if (inter.inputs.length) {\n parts.push(\"Inputs:\");\n for (const el of limit(inter.inputs)) parts.push(elementLine(el, opts));\n }\n if (inter.clickable.length) {\n parts.push(\"Other Clickable:\");\n for (const el of limit(inter.clickable)) parts.push(elementLine(el, opts));\n }\n if (inter.forms.length) {\n parts.push(\"Forms:\");\n for (const f of limit(inter.forms)) {\n parts.push(`- FORM: action=${f.action ?? \"-\"} method=${f.method ?? \"-\"} \u2192 \\`${f.selector}\\``);\n }\n }\n return parts.join(\"\\n\");\n }\n function renderRegionInfo(region) {\n const icon = iconForRegion(\"region\");\n const id = hashId(`${region.selector}|${region.label ?? \"\"}|${region.role ?? \"\"}`);\n const label = region.label ? ` ${region.label}` : \"\";\n const stats = [];\n if (region.buttonCount) stats.push(`${region.buttonCount} buttons`);\n if (region.linkCount) stats.push(`${region.linkCount} links`);\n if (region.inputCount) stats.push(`${region.inputCount} inputs`);\n if (region.textPreview) stats.push(`\u201C${truncate(region.textPreview, 80)}\u201D`);\n const statsLine = stats.length ? ` \u2014 ${stats.join(\", \")}` : \"\";\n return `${icon} ${label} \u2192 \\`${region.selector}\\` [${id}]${statsLine}`;\n }\n function wrapXml(body, meta, type = \"section\") {\n const attrs = [\n meta?.title ? `title=\"${escapeXml(meta.title)}\"` : null,\n meta?.url ? `url=\"${escapeXml(meta.url)}\"` : null\n ].filter(Boolean).join(\" \");\n return `<page ${attrs}>\n <${type}><![CDATA[\n${body}\n]]></${type}>\n</page>`;\n }\n function escapeXml(s) {\n return s.replace(/&/g, \"&amp;\").replace(/</g, \"&lt;\").replace(/>/g, \"&gt;\").replace(/\"/g, \"&quot;\");\n }\n class MarkdownFormatter {\n static structure(overview, _opts = {}, meta) {\n const lines = [];\n lines.push(`# Page Outline`);\n if (meta?.title || meta?.url) {\n lines.push(`Title: ${meta?.title ?? \"\"}`.trim());\n lines.push(`URL: ${meta?.url ?? \"\"}`.trim());\n }\n lines.push(\"\");\n const regions = overview.regions;\n const entries = [\n [\"header\", regions.header],\n [\"navigation\", regions.navigation],\n [\"main\", regions.main],\n [\"sections\", regions.sections],\n [\"sidebar\", regions.sidebar],\n [\"footer\", regions.footer],\n [\"modals\", regions.modals]\n ];\n for (const [key, value] of entries) {\n if (!value) continue;\n const icon = iconForRegion(key);\n if (Array.isArray(value)) {\n if (!value.length) continue;\n lines.push(`## ${icon} ${capitalize(key)}`);\n for (const region of value) lines.push(renderRegionInfo(region));\n } else {\n lines.push(`## ${icon} ${capitalize(key)}`);\n lines.push(renderRegionInfo(value));\n }\n lines.push(\"\");\n }\n if (overview.suggestions?.length) {\n lines.push(\"## Suggestions\");\n for (const s of overview.suggestions) lines.push(`- ${s}`);\n lines.push(\"\");\n }\n lines.push(\n \"Next: choose a region (by selector or [sectionId]) and call dom_extract_region for actionable details.\"\n );\n const body = lines.join(\"\\n\");\n return wrapXml(body, meta, \"outline\");\n }\n static region(result, opts = {}, meta) {\n const lines = [];\n lines.push(`# Region Details`);\n if (meta?.title || meta?.url) {\n lines.push(`Title: ${meta?.title ?? \"\"}`.trim());\n lines.push(`URL: ${meta?.url ?? \"\"}`.trim());\n }\n lines.push(\"\");\n const inter = result.interactive;\n if (result.page) {\n const ps = [\n result.page.hasErrors ? \"errors: yes\" : \"errors: no\",\n result.page.isLoading ? \"loading: yes\" : \"loading: no\",\n result.page.hasModals ? \"modals: yes\" : \"modals: no\"\n ];\n lines.push(`Page state: ${ps.join(\", \")}`);\n }\n const summary = [];\n const count = (arr) => arr ? arr.length : 0;\n summary.push(`${count(inter.buttons)} buttons`);\n summary.push(`${count(inter.links)} links`);\n summary.push(`${count(inter.inputs)} inputs`);\n if (inter.forms?.length) summary.push(`${count(inter.forms)} forms`);\n lines.push(`Summary: ${summary.join(\", \")}`);\n lines.push(selectorQualitySummary(inter));\n lines.push(\"\");\n lines.push(renderInteractive(inter, opts));\n lines.push(\"\");\n lines.push(\n \"Next: write a script using the most stable selectors above. If selectors look unstable, rerun dom_extract_region with higher detail or call dom_extract_content for text context.\"\n );\n const body = lines.join(\"\\n\");\n return wrapXml(body, meta, \"section\");\n }\n static content(content, opts = {}, meta) {\n const lines = [];\n lines.push(`# Content`);\n lines.push(`Selector: \\`${content.selector}\\``);\n lines.push(\"\");\n if (content.text.headings?.length) {\n lines.push(\"Headings:\");\n for (const h of content.text.headings)\n lines.push(`- H${h.level}: ${truncate(h.text, opts.maxTextLength ?? 120)}`);\n lines.push(\"\");\n }\n if (content.text.paragraphs?.length) {\n const limit = typeof opts.maxElements === \"number\" ? opts.maxElements : content.text.paragraphs.length;\n lines.push(\"Paragraphs:\");\n for (const p of content.text.paragraphs.slice(0, limit))\n lines.push(`- ${truncate(p, opts.maxTextLength ?? 200)}`);\n lines.push(\"\");\n }\n if (content.text.lists?.length) {\n lines.push(\"Lists:\");\n for (const list of content.text.lists) {\n lines.push(`- ${list.type.toUpperCase()}:`);\n const limit = typeof opts.maxElements === \"number\" ? opts.maxElements : list.items.length;\n for (const item of list.items.slice(0, limit))\n lines.push(` - ${truncate(item, opts.maxTextLength ?? 120)}`);\n }\n lines.push(\"\");\n }\n if (content.tables?.length) {\n lines.push(\"Tables:\");\n for (const t of content.tables) {\n lines.push(`- Headers: ${t.headers.join(\" | \")}`);\n const limit = typeof opts.maxElements === \"number\" ? opts.maxElements : t.rows.length;\n for (const row of t.rows.slice(0, limit)) lines.push(` - ${row.join(\" | \")}`);\n }\n lines.push(\"\");\n }\n if (content.media?.length) {\n lines.push(\"Media:\");\n const limit = typeof opts.maxElements === \"number\" ? opts.maxElements : content.media.length;\n for (const m of content.media.slice(0, limit)) {\n lines.push(`- ${m.type.toUpperCase()}: ${m.alt ?? \"\"} ${m.src ? `\u2192 ${m.src}` : \"\"}`.trim());\n }\n lines.push(\"\");\n }\n lines.push(\n \"Next: if text is insufficient for targeting, call dom_extract_region for interactive selectors.\"\n );\n const body = lines.join(\"\\n\");\n return wrapXml(body, meta, \"content\");\n }\n }\n function capitalize(s) {\n return s.charAt(0).toUpperCase() + s.slice(1);\n }\n function resolveSmartDomReader() {\n if (typeof window !== \"undefined\") {\n const globalWindow = window;\n const direct = globalWindow.SmartDOMReader;\n if (typeof direct === \"function\") {\n return direct;\n }\n const namespace = globalWindow.SmartDOMReaderNamespace;\n if (namespace && typeof namespace.SmartDOMReader === \"function\") {\n return namespace.SmartDOMReader;\n }\n }\n try {\n if (typeof require === \"function\") {\n const moduleExports = require(\"./index\");\n if (moduleExports && typeof moduleExports.SmartDOMReader === \"function\") {\n return moduleExports.SmartDOMReader;\n }\n if (moduleExports && typeof moduleExports.default === \"function\") {\n return moduleExports.default;\n }\n }\n } catch {\n }\n return void 0;\n }\n class ProgressiveExtractor {\n /**\n * Step 1: Extract high-level structural overview\n * This provides a \"map\" of the page for the AI to understand structure\n */\n static extractStructure(root) {\n const regions = {};\n const header = root.querySelector('header, [role=\"banner\"], .header, #header');\n if (header) {\n regions.header = this.analyzeRegion(header);\n }\n const navs = root.querySelectorAll('nav, [role=\"navigation\"], .nav, .navigation');\n if (navs.length > 0) {\n regions.navigation = Array.from(navs).map((nav) => this.analyzeRegion(nav));\n }\n if (root instanceof Document) {\n const main = ContentDetection.findMainContent(root);\n if (main) {\n regions.main = this.analyzeRegion(main);\n const sections = main.querySelectorAll('section, article, [role=\"region\"]');\n if (sections.length > 0) {\n regions.sections = Array.from(sections).filter((section) => !section.closest(\"nav, header, footer\")).map((section) => this.analyzeRegion(section));\n }\n }\n } else {\n regions.main = this.analyzeRegion(root);\n const sections = root.querySelectorAll('section, article, [role=\"region\"]');\n if (sections.length > 0) {\n regions.sections = Array.from(sections).filter((section) => !section.closest(\"nav, header, footer\")).map((section) => this.analyzeRegion(section));\n }\n }\n const sidebars = root.querySelectorAll('aside, [role=\"complementary\"], .sidebar, #sidebar');\n if (sidebars.length > 0) {\n regions.sidebar = Array.from(sidebars).map((sidebar) => this.analyzeRegion(sidebar));\n }\n const footer = root.querySelector('footer, [role=\"contentinfo\"], .footer, #footer');\n if (footer) {\n regions.footer = this.analyzeRegion(footer);\n }\n const modals = root.querySelectorAll('[role=\"dialog\"], .modal, .popup, .overlay');\n const visibleModals = Array.from(modals).filter((modal) => DOMTraversal.isVisible(modal));\n if (visibleModals.length > 0) {\n regions.modals = visibleModals.map((modal) => this.analyzeRegion(modal));\n }\n const forms = this.extractFormOverview(root);\n const summary = this.calculateSummary(root, regions, forms);\n const suggestions = this.generateSuggestions(regions, summary);\n return { regions, forms, summary, suggestions };\n }\n /**\n * Step 2: Extract detailed information from a specific region\n */\n static extractRegion(selector, doc, options = {}, smartDomReaderCtor) {\n const element = doc.querySelector(selector);\n if (!element) return null;\n const SmartDOMReaderCtor = smartDomReaderCtor ?? resolveSmartDomReader();\n if (!SmartDOMReaderCtor) {\n throw new Error(\n \"SmartDOMReader is unavailable. Ensure the Smart DOM Reader module is loaded before calling extractRegion.\"\n );\n }\n const reader = new SmartDOMReaderCtor(options);\n return reader.extract(element, options);\n }\n /**\n * Step 3: Extract readable content from a region\n */\n static extractContent(selector, doc, options = {}) {\n const element = doc.querySelector(selector);\n if (!element) return null;\n const result = {\n selector,\n text: {},\n metadata: {\n wordCount: 0,\n hasInteractive: false\n }\n };\n if (options.includeHeadings !== false) {\n const headings = element.querySelectorAll(\"h1, h2, h3, h4, h5, h6\");\n result.text.headings = Array.from(headings).map((h) => ({\n level: parseInt(h.tagName[1]),\n text: this.getTextContent(h, options.maxTextLength)\n }));\n }\n const paragraphs = element.querySelectorAll(\"p\");\n if (paragraphs.length > 0) {\n result.text.paragraphs = Array.from(paragraphs).map((p) => this.getTextContent(p, options.maxTextLength)).filter((text) => text.length > 0);\n }\n if (options.includeLists !== false) {\n const lists = element.querySelectorAll(\"ul, ol\");\n result.text.lists = Array.from(lists).map((list) => ({\n type: list.tagName.toLowerCase(),\n items: Array.from(list.querySelectorAll(\"li\")).map(\n (li) => this.getTextContent(li, options.maxTextLength)\n )\n }));\n }\n if (options.includeTables !== false) {\n const tables = element.querySelectorAll(\"table\");\n result.tables = Array.from(tables).map((table) => {\n const headers = Array.from(table.querySelectorAll(\"th\")).map(\n (th) => this.getTextContent(th)\n );\n const rows = Array.from(table.querySelectorAll(\"tr\")).filter((tr) => tr.querySelector(\"td\")).map((tr) => Array.from(tr.querySelectorAll(\"td\")).map((td) => this.getTextContent(td)));\n return { headers, rows };\n });\n }\n if (options.includeMedia !== false) {\n const images = element.querySelectorAll(\"img\");\n const videos = element.querySelectorAll(\"video\");\n const audios = element.querySelectorAll(\"audio\");\n result.media = [\n ...Array.from(images).map((img) => ({\n type: \"img\",\n alt: img.getAttribute(\"alt\") || void 0,\n src: img.getAttribute(\"src\") || void 0\n })),\n ...Array.from(videos).map((video) => ({\n type: \"video\",\n src: video.getAttribute(\"src\") || void 0\n })),\n ...Array.from(audios).map((audio) => ({\n type: \"audio\",\n src: audio.getAttribute(\"src\") || void 0\n }))\n ];\n }\n const allText = element.textContent || \"\";\n result.metadata.wordCount = allText.trim().split(/\\s+/).length;\n result.metadata.hasInteractive = element.querySelectorAll(\"button, a, input, textarea, select\").length > 0;\n return result;\n }\n /**\n * Analyze a region and extract summary information\n */\n static analyzeRegion(element) {\n const selector = SelectorGenerator.generateSelectors(element).css;\n const buttons = element.querySelectorAll('button, [role=\"button\"]');\n const links = element.querySelectorAll(\"a[href]\");\n const inputs = element.querySelectorAll(\"input, textarea, select\");\n const forms = element.querySelectorAll(\"form\");\n const lists = element.querySelectorAll(\"ul, ol\");\n const tables = element.querySelectorAll(\"table\");\n const media = element.querySelectorAll(\"img, video, audio\");\n const interactiveCount = buttons.length + links.length + inputs.length;\n let label;\n const ariaLabel = element.getAttribute(\"aria-label\");\n if (ariaLabel) {\n label = ariaLabel;\n } else if (element.getAttribute(\"aria-labelledby\")) {\n const labelId = element.getAttribute(\"aria-labelledby\");\n if (labelId) {\n const labelElement = element.ownerDocument?.getElementById(labelId);\n if (labelElement) {\n label = labelElement.textContent?.trim();\n }\n }\n } else {\n const heading = element.querySelector(\"h1, h2, h3\");\n if (heading) {\n label = heading.textContent?.trim();\n }\n }\n const textContent = element.textContent?.trim() || \"\";\n const textPreview = textContent.length > 50 ? textContent.substring(0, 50) + \"...\" : textContent;\n return {\n selector,\n label,\n role: element.getAttribute(\"role\") || void 0,\n interactiveCount,\n hasForm: forms.length > 0,\n hasList: lists.length > 0,\n hasTable: tables.length > 0,\n hasMedia: media.length > 0,\n buttonCount: buttons.length > 0 ? buttons.length : void 0,\n linkCount: links.length > 0 ? links.length : void 0,\n inputCount: inputs.length > 0 ? inputs.length : void 0,\n textPreview: textPreview.length > 0 ? textPreview : void 0\n };\n }\n /**\n * Extract overview of forms on the page\n */\n static extractFormOverview(root) {\n const forms = root.querySelectorAll(\"form\");\n return Array.from(forms).map((form) => {\n const inputs = form.querySelectorAll(\"input, textarea, select\");\n const selector = SelectorGenerator.generateSelectors(form).css;\n let location2 = \"unknown\";\n if (form.closest('header, [role=\"banner\"]')) {\n location2 = \"header\";\n } else if (form.closest('nav, [role=\"navigation\"]')) {\n location2 = \"navigation\";\n } else if (form.closest('main, [role=\"main\"]')) {\n location2 = \"main\";\n } else if (form.closest('aside, [role=\"complementary\"]')) {\n location2 = \"sidebar\";\n } else if (form.closest('footer, [role=\"contentinfo\"]')) {\n location2 = \"footer\";\n }\n let purpose;\n const formId = form.getAttribute(\"id\")?.toLowerCase();\n const formClass = form.getAttribute(\"class\")?.toLowerCase();\n const formAction = form.getAttribute(\"action\")?.toLowerCase();\n const hasEmail = form.querySelector('input[type=\"email\"]');\n const hasPassword = form.querySelector('input[type=\"password\"]');\n const hasSearch = form.querySelector('input[type=\"search\"]');\n if (hasSearch || formId?.includes(\"search\") || formClass?.includes(\"search\")) {\n purpose = \"search\";\n } else if (hasPassword && hasEmail) {\n purpose = \"login\";\n } else if (hasPassword) {\n purpose = \"authentication\";\n } else if (formId?.includes(\"contact\") || formClass?.includes(\"contact\")) {\n purpose = \"contact\";\n } else if (formId?.includes(\"subscribe\") || formClass?.includes(\"subscribe\")) {\n purpose = \"subscription\";\n } else if (formAction?.includes(\"checkout\") || formClass?.includes(\"checkout\")) {\n purpose = \"checkout\";\n }\n return {\n selector,\n location: location2,\n inputCount: inputs.length,\n purpose\n };\n });\n }\n /**\n * Calculate summary statistics\n */\n static calculateSummary(root, regions, forms) {\n const allInteractive = root.querySelectorAll(\"button, a[href], input, textarea, select\");\n const allSections = root.querySelectorAll('section, article, [role=\"region\"]');\n const hasModals = (regions.modals?.length || 0) > 0;\n const errorSelectors = [\".error\", \".alert-danger\", '[role=\"alert\"]'];\n const hasErrors = errorSelectors.some((sel) => {\n const element = root.querySelector(sel);\n return element ? DOMTraversal.isVisible(element) : false;\n });\n const loadingSelectors = [\".loading\", \".spinner\", '[aria-busy=\"true\"]'];\n const isLoading = loadingSelectors.some((sel) => {\n const element = root.querySelector(sel);\n return element ? DOMTraversal.isVisible(element) : false;\n });\n const mainContentSelector = regions.main?.selector;\n return {\n totalInteractive: allInteractive.length,\n totalForms: forms.length,\n totalSections: allSections.length,\n hasModals,\n hasErrors,\n isLoading,\n mainContentSelector\n };\n }\n /**\n * Generate AI-friendly suggestions\n */\n static generateSuggestions(regions, summary) {\n const suggestions = [];\n if (summary.hasErrors) {\n suggestions.push(\"Page has error indicators - check error messages before interacting\");\n }\n if (summary.isLoading) {\n suggestions.push(\"Page appears to be loading - wait or check loading state\");\n }\n if (summary.hasModals) {\n suggestions.push(\"Modal/dialog is open - may need to interact with or close it first\");\n }\n if (regions.main && regions.main.interactiveCount > 10) {\n suggestions.push(\n `Main content has ${regions.main.interactiveCount} interactive elements - consider filtering`\n );\n }\n if (summary.totalForms > 0) {\n suggestions.push(`Found ${summary.totalForms} form(s) on the page`);\n }\n if (!regions.main) {\n suggestions.push(\"No clear main content area detected - may need to explore regions\");\n }\n return suggestions;\n }\n /**\n * Get text content with optional truncation\n */\n static getTextContent(element, maxLength) {\n const text = element.textContent?.trim() || \"\";\n if (maxLength && text.length > maxLength) {\n return text.substring(0, maxLength) + \"...\";\n }\n return text;\n }\n }\n class SmartDOMReader {\n options;\n constructor(options = {}) {\n this.options = {\n mode: options.mode || \"interactive\",\n maxDepth: options.maxDepth || 5,\n includeHidden: options.includeHidden || false,\n includeShadowDOM: options.includeShadowDOM || true,\n includeIframes: options.includeIframes || false,\n viewportOnly: options.viewportOnly || false,\n mainContentOnly: options.mainContentOnly || false,\n customSelectors: options.customSelectors || [],\n attributeTruncateLength: options.attributeTruncateLength,\n dataAttributeTruncateLength: options.dataAttributeTruncateLength,\n textTruncateLength: options.textTruncateLength,\n filter: options.filter\n };\n }\n /**\n * Main extraction method - extracts all data in one pass\n * @param rootElement The document or element to extract from\n * @param runtimeOptions Options to override constructor options\n */\n extract(rootElement = document, runtimeOptions) {\n const startTime = Date.now();\n const doc = rootElement instanceof Document ? rootElement : rootElement.ownerDocument;\n const options = { ...this.options, ...runtimeOptions };\n let container = rootElement instanceof Document ? doc : rootElement;\n if (options.mainContentOnly && rootElement instanceof Document) {\n container = ContentDetection.findMainContent(doc);\n }\n const pageState = this.extractPageState(doc);\n const landmarks = this.extractLandmarks(doc);\n const interactive = this.extractInteractiveElements(container, options);\n const result = {\n mode: options.mode,\n timestamp: startTime,\n page: pageState,\n landmarks,\n interactive\n };\n if (options.mode === \"full\") {\n result.semantic = this.extractSemanticElements(container, options);\n result.metadata = this.extractMetadata(doc, container, options);\n }\n return result;\n }\n /**\n * Extract page state information\n */\n extractPageState(doc) {\n return {\n url: doc.location?.href || \"\",\n title: doc.title || \"\",\n hasErrors: this.detectErrors(doc),\n isLoading: this.detectLoading(doc),\n hasModals: this.detectModals(doc),\n hasFocus: this.getFocusedElement(doc)\n };\n }\n /**\n * Extract page landmarks\n */\n extractLandmarks(doc) {\n const detected = ContentDetection.detectLandmarks(doc);\n return {\n navigation: this.elementsToSelectors(detected.navigation || []),\n main: this.elementsToSelectors(detected.main || []),\n forms: this.elementsToSelectors(detected.form || []),\n headers: this.elementsToSelectors(detected.banner || []),\n footers: this.elementsToSelectors(detected.contentinfo || []),\n articles: this.elementsToSelectors(detected.region || []),\n sections: this.elementsToSelectors(detected.region || [])\n };\n }\n /**\n * Convert elements to selector strings\n */\n elementsToSelectors(elements) {\n return elements.map((el) => SelectorGenerator.generateSelectors(el).css);\n }\n /**\n * Extract interactive elements\n */\n extractInteractiveElements(container, options) {\n const buttons = [];\n const links = [];\n const inputs = [];\n const clickable = [];\n const buttonElements = container.querySelectorAll(\n 'button, [role=\"button\"], input[type=\"button\"], input[type=\"submit\"]'\n );\n buttonElements.forEach((el) => {\n if (this.shouldIncludeElement(el, options)) {\n const extracted = DOMTraversal.extractElement(el, options);\n if (extracted) buttons.push(extracted);\n }\n });\n const linkElements = container.querySelectorAll(\"a[href]\");\n linkElements.forEach((el) => {\n if (this.shouldIncludeElement(el, options)) {\n const extracted = DOMTraversal.extractElement(el, options);\n if (extracted) links.push(extracted);\n }\n });\n const inputElements = container.querySelectorAll(\n 'input:not([type=\"button\"]):not([type=\"submit\"]), textarea, select'\n );\n inputElements.forEach((el) => {\n if (this.shouldIncludeElement(el, options)) {\n const extracted = DOMTraversal.extractElement(el, options);\n if (extracted) inputs.push(extracted);\n }\n });\n if (options.customSelectors) {\n options.customSelectors.forEach((selector) => {\n const elements = container.querySelectorAll(selector);\n elements.forEach((el) => {\n if (this.shouldIncludeElement(el, options)) {\n const extracted = DOMTraversal.extractElement(el, options);\n if (extracted) clickable.push(extracted);\n }\n });\n });\n }\n const forms = this.extractForms(container, options);\n return {\n buttons,\n links,\n inputs,\n forms,\n clickable\n };\n }\n /**\n * Extract form information\n */\n extractForms(container, options) {\n const forms = [];\n const formElements = container.querySelectorAll(\"form\");\n formElements.forEach((form) => {\n if (!this.shouldIncludeElement(form, options)) return;\n const formInputs = [];\n const formButtons = [];\n const inputs = form.querySelectorAll(\n 'input:not([type=\"button\"]):not([type=\"submit\"]), textarea, select'\n );\n inputs.forEach((input) => {\n const extracted = DOMTraversal.extractElement(input, options);\n if (extracted) formInputs.push(extracted);\n });\n const buttons = form.querySelectorAll('button, input[type=\"button\"], input[type=\"submit\"]');\n buttons.forEach((button) => {\n const extracted = DOMTraversal.extractElement(button, options);\n if (extracted) formButtons.push(extracted);\n });\n forms.push({\n selector: SelectorGenerator.generateSelectors(form).css,\n action: form.getAttribute(\"action\") || void 0,\n method: form.getAttribute(\"method\") || void 0,\n inputs: formInputs,\n buttons: formButtons\n });\n });\n return forms;\n }\n /**\n * Extract semantic elements (full mode only)\n */\n extractSemanticElements(container, options) {\n const headings = [];\n const images = [];\n const tables = [];\n const lists = [];\n const articles = [];\n container.querySelectorAll(\"h1, h2, h3, h4, h5, h6\").forEach((el) => {\n if (this.shouldIncludeElement(el, options)) {\n const extracted = DOMTraversal.extractElement(el, options);\n if (extracted) headings.push(extracted);\n }\n });\n container.querySelectorAll(\"img\").forEach((el) => {\n if (this.shouldIncludeElement(el, options)) {\n const extracted = DOMTraversal.extractElement(el, options);\n if (extracted) images.push(extracted);\n }\n });\n container.querySelectorAll(\"table\").forEach((el) => {\n if (this.shouldIncludeElement(el, options)) {\n const extracted = DOMTraversal.extractElement(el, options);\n if (extracted) tables.push(extracted);\n }\n });\n container.querySelectorAll(\"ul, ol\").forEach((el) => {\n if (this.shouldIncludeElement(el, options)) {\n const extracted = DOMTraversal.extractElement(el, options);\n if (extracted) lists.push(extracted);\n }\n });\n container.querySelectorAll('article, [role=\"article\"]').forEach((el) => {\n if (this.shouldIncludeElement(el, options)) {\n const extracted = DOMTraversal.extractElement(el, options);\n if (extracted) articles.push(extracted);\n }\n });\n return {\n headings,\n images,\n tables,\n lists,\n articles\n };\n }\n /**\n * Extract metadata\n */\n extractMetadata(doc, container, options) {\n const allElements = container.querySelectorAll(\"*\");\n const extractedElements = container.querySelectorAll(\n \"button, a, input, textarea, select, h1, h2, h3, h4, h5, h6, img, table, ul, ol, article\"\n ).length;\n return {\n totalElements: allElements.length,\n extractedElements,\n mainContent: options.mainContentOnly && container instanceof Element ? SelectorGenerator.generateSelectors(container).css : void 0,\n language: doc.documentElement.getAttribute(\"lang\") || void 0\n };\n }\n /**\n * Check if element should be included based on options\n */\n shouldIncludeElement(element, options) {\n if (!options.includeHidden && !DOMTraversal.isVisible(element)) {\n return false;\n }\n if (options.viewportOnly && !DOMTraversal.isInViewport(element)) {\n return false;\n }\n if (options.filter && !DOMTraversal.passesFilter(element, options.filter)) {\n return false;\n }\n return true;\n }\n /**\n * Detect errors on the page\n */\n detectErrors(doc) {\n const errorSelectors = [\".error\", \".alert-danger\", '[role=\"alert\"]', \".error-message\"];\n return errorSelectors.some((sel) => {\n const element = doc.querySelector(sel);\n return element ? DOMTraversal.isVisible(element) : false;\n });\n }\n /**\n * Detect if page is loading\n */\n detectLoading(doc) {\n const loadingSelectors = [\".loading\", \".spinner\", '[aria-busy=\"true\"]', \".loader\"];\n return loadingSelectors.some((sel) => {\n const element = doc.querySelector(sel);\n return element ? DOMTraversal.isVisible(element) : false;\n });\n }\n /**\n * Detect modal dialogs\n */\n detectModals(doc) {\n const modalSelectors = ['[role=\"dialog\"]', \".modal\", \".popup\", \".overlay\"];\n return modalSelectors.some((sel) => {\n const element = doc.querySelector(sel);\n return element ? DOMTraversal.isVisible(element) : false;\n });\n }\n /**\n * Get currently focused element\n */\n getFocusedElement(doc) {\n const focused = doc.activeElement;\n if (focused && focused !== doc.body) {\n return SelectorGenerator.generateSelectors(focused).css;\n }\n return void 0;\n }\n // ===== Static convenience methods =====\n /**\n * Quick extraction for interactive elements only\n * @param doc The document to extract from\n * @param options Extraction options\n */\n static extractInteractive(doc, options = {}) {\n const reader = new SmartDOMReader({\n ...options,\n mode: \"interactive\"\n });\n return reader.extract(doc);\n }\n /**\n * Quick extraction for full content\n * @param doc The document to extract from\n * @param options Extraction options\n */\n static extractFull(doc, options = {}) {\n const reader = new SmartDOMReader({\n ...options,\n mode: \"full\"\n });\n return reader.extract(doc);\n }\n /**\n * Extract from a specific element\n * @param element The element to extract from\n * @param mode The extraction mode\n * @param options Additional options\n */\n static extractFromElement(element, mode = \"interactive\", options = {}) {\n const reader = new SmartDOMReader({\n ...options,\n mode\n });\n return reader.extract(element);\n }\n }\n function executeExtraction(method, args) {\n try {\n let result;\n switch (method) {\n case \"extractStructure\": {\n const structureArgs = args;\n const { selector, frameSelector, formatOptions } = structureArgs;\n let doc = document;\n if (frameSelector) {\n const iframe = document.querySelector(frameSelector);\n if (!iframe || !(iframe instanceof HTMLIFrameElement) || !iframe.contentDocument) {\n return { error: `Cannot access iframe: ${frameSelector}` };\n }\n doc = iframe.contentDocument;\n }\n const target = selector ? doc.querySelector(selector) ?? doc : doc;\n const overview = ProgressiveExtractor.extractStructure(target);\n const meta = { title: document.title, url: location.href };\n result = MarkdownFormatter.structure(\n overview,\n formatOptions ?? { detail: \"summary\" },\n meta\n );\n break;\n }\n case \"extractRegion\": {\n const regionArgs = args;\n const { selector, mode, frameSelector, options, formatOptions } = regionArgs;\n let doc = document;\n if (frameSelector) {\n const iframe = document.querySelector(frameSelector);\n if (!iframe || !(iframe instanceof HTMLIFrameElement) || !iframe.contentDocument) {\n return { error: `Cannot access iframe: ${frameSelector}` };\n }\n doc = iframe.contentDocument;\n }\n const extractOptions = {\n ...options || {},\n mode: mode || \"interactive\"\n };\n const extractResult = ProgressiveExtractor.extractRegion(\n selector,\n doc,\n extractOptions,\n SmartDOMReader\n );\n if (!extractResult) {\n return { error: `No element found matching selector: ${selector}` };\n }\n const meta = { title: document.title, url: location.href };\n result = MarkdownFormatter.region(\n extractResult,\n formatOptions ?? { detail: \"region\" },\n meta\n );\n break;\n }\n case \"extractContent\": {\n const contentArgs = args;\n const { selector, frameSelector, options, formatOptions } = contentArgs;\n let doc = document;\n if (frameSelector) {\n const iframe = document.querySelector(frameSelector);\n if (!iframe || !(iframe instanceof HTMLIFrameElement) || !iframe.contentDocument) {\n return { error: `Cannot access iframe: ${frameSelector}` };\n }\n doc = iframe.contentDocument;\n }\n const extractOptions = options || {};\n const extractResult = ProgressiveExtractor.extractContent(selector, doc, extractOptions);\n if (!extractResult) {\n return { error: `No element found matching selector: ${selector}` };\n }\n const meta = { title: document.title, url: location.href };\n result = MarkdownFormatter.content(\n extractResult,\n formatOptions ?? { detail: \"region\" },\n meta\n );\n break;\n }\n case \"extractInteractive\": {\n const interactiveArgs = args;\n const { selector, frameSelector, options, formatOptions } = interactiveArgs;\n let doc = document;\n if (frameSelector) {\n const iframe = document.querySelector(frameSelector);\n if (!iframe || !(iframe instanceof HTMLIFrameElement) || !iframe.contentDocument) {\n return { error: `Cannot access iframe: ${frameSelector}` };\n }\n doc = iframe.contentDocument;\n }\n const extractResult = selector ? SmartDOMReader.extractFromElement(\n doc.querySelector(selector),\n \"interactive\",\n options || {}\n ) : SmartDOMReader.extractInteractive(doc, options || {});\n const meta = { title: document.title, url: location.href };\n result = MarkdownFormatter.region(\n extractResult,\n formatOptions ?? { detail: \"region\" },\n meta\n );\n break;\n }\n case \"extractFull\": {\n const fullArgs = args;\n const { selector, frameSelector, options, formatOptions } = fullArgs;\n let doc = document;\n if (frameSelector) {\n const iframe = document.querySelector(frameSelector);\n if (!iframe || !(iframe instanceof HTMLIFrameElement) || !iframe.contentDocument) {\n return { error: `Cannot access iframe: ${frameSelector}` };\n }\n doc = iframe.contentDocument;\n }\n const extractResult = selector ? SmartDOMReader.extractFromElement(doc.querySelector(selector), \"full\", options || {}) : SmartDOMReader.extractFull(doc, options || {});\n const meta = { title: document.title, url: location.href };\n result = MarkdownFormatter.region(extractResult, formatOptions ?? { detail: \"deep\" }, meta);\n break;\n }\n default:\n return { error: `Unknown method: ${method}` };\n }\n return result;\n } catch (error) {\n return {\n error: error instanceof Error ? error.message : String(error)\n };\n }\n }\n const SmartDOMReaderBundle2 = { executeExtraction };\n exports.SmartDOMReaderBundle = SmartDOMReaderBundle2;\n exports.executeExtraction = executeExtraction;\n Object.defineProperty(exports, Symbol.toStringTag, { value: \"Module\" });\n return exports;\n})({});\n";
9
+ declare const SMART_DOM_READER_VERSION = "1.0.0";
10
+
11
+ export { SMART_DOM_READER_BUNDLE, SMART_DOM_READER_VERSION };