@mcp-b/smart-dom-reader 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,1823 @@
1
+ var __defProp = Object.defineProperty;
2
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
3
+ var __getOwnPropNames = Object.getOwnPropertyNames;
4
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
5
+ var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
6
+ get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
7
+ }) : x)(function(x) {
8
+ if (typeof require !== "undefined") return require.apply(this, arguments);
9
+ throw Error('Dynamic require of "' + x + '" is not supported');
10
+ });
11
+ var __esm = (fn, res) => function __init() {
12
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
13
+ };
14
+ var __export = (target, all) => {
15
+ for (var name in all)
16
+ __defProp(target, name, { get: all[name], enumerable: true });
17
+ };
18
+ var __copyProps = (to, from, except, desc) => {
19
+ if (from && typeof from === "object" || typeof from === "function") {
20
+ for (let key of __getOwnPropNames(from))
21
+ if (!__hasOwnProp.call(to, key) && key !== except)
22
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
23
+ }
24
+ return to;
25
+ };
26
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
27
+
28
+ // src/content-detection.ts
29
+ var ContentDetection;
30
+ var init_content_detection = __esm({
31
+ "src/content-detection.ts"() {
32
+ "use strict";
33
+ ContentDetection = class {
34
+ /**
35
+ * Find the main content area of a page
36
+ * Inspired by dom-to-semantic-markdown's approach
37
+ */
38
+ static findMainContent(doc) {
39
+ const mainElement = doc.querySelector('main, [role="main"]');
40
+ if (mainElement) {
41
+ return mainElement;
42
+ }
43
+ if (!doc.body) {
44
+ return doc.documentElement;
45
+ }
46
+ return this.detectMainContent(doc.body);
47
+ }
48
+ /**
49
+ * Detect main content using scoring algorithm
50
+ */
51
+ static detectMainContent(rootElement) {
52
+ const candidates = [];
53
+ const minScore = 15;
54
+ this.collectCandidates(rootElement, candidates, minScore);
55
+ if (candidates.length === 0) {
56
+ return rootElement;
57
+ }
58
+ candidates.sort((a, b) => this.calculateContentScore(b) - this.calculateContentScore(a));
59
+ let bestCandidate = candidates[0];
60
+ for (let i = 1; i < candidates.length; i++) {
61
+ const isIndependent = !candidates.some(
62
+ (other, j) => j !== i && other.contains(candidates[i])
63
+ );
64
+ if (isIndependent && this.calculateContentScore(candidates[i]) > this.calculateContentScore(bestCandidate)) {
65
+ bestCandidate = candidates[i];
66
+ }
67
+ }
68
+ return bestCandidate;
69
+ }
70
+ /**
71
+ * Collect content candidates
72
+ */
73
+ static collectCandidates(element, candidates, minScore) {
74
+ const score = this.calculateContentScore(element);
75
+ if (score >= minScore) {
76
+ candidates.push(element);
77
+ }
78
+ Array.from(element.children).forEach((child) => {
79
+ this.collectCandidates(child, candidates, minScore);
80
+ });
81
+ }
82
+ /**
83
+ * Calculate content score for an element
84
+ */
85
+ static calculateContentScore(element) {
86
+ let score = 0;
87
+ const semanticClasses = [
88
+ "article",
89
+ "content",
90
+ "main-container",
91
+ "main",
92
+ "main-content",
93
+ "post",
94
+ "entry"
95
+ ];
96
+ const semanticIds = ["content", "main", "article", "post", "entry"];
97
+ semanticClasses.forEach((cls) => {
98
+ if (element.classList.contains(cls)) {
99
+ score += 10;
100
+ }
101
+ });
102
+ semanticIds.forEach((id) => {
103
+ if (element.id && element.id.toLowerCase().includes(id)) {
104
+ score += 10;
105
+ }
106
+ });
107
+ const tag = element.tagName.toLowerCase();
108
+ const highValueTags = ["article", "main", "section"];
109
+ if (highValueTags.includes(tag)) {
110
+ score += 8;
111
+ }
112
+ const paragraphs = element.getElementsByTagName("p").length;
113
+ score += Math.min(paragraphs * 2, 10);
114
+ const headings = element.querySelectorAll("h1, h2, h3").length;
115
+ score += Math.min(headings * 3, 9);
116
+ const textLength = element.textContent?.trim().length || 0;
117
+ if (textLength > 300) {
118
+ score += Math.min(Math.floor(textLength / 300) * 2, 10);
119
+ }
120
+ const linkDensity = this.calculateLinkDensity(element);
121
+ if (linkDensity < 0.3) {
122
+ score += 5;
123
+ } else if (linkDensity > 0.5) {
124
+ score -= 5;
125
+ }
126
+ if (element.hasAttribute("data-main") || element.hasAttribute("data-content") || element.hasAttribute("itemprop")) {
127
+ score += 8;
128
+ }
129
+ const role = element.getAttribute("role");
130
+ if (role === "main" || role === "article") {
131
+ score += 10;
132
+ }
133
+ if (element.matches(
134
+ "aside, nav, header, footer, .sidebar, .navigation, .menu, .ad, .advertisement"
135
+ )) {
136
+ score -= 10;
137
+ }
138
+ const forms = element.getElementsByTagName("form").length;
139
+ if (forms > 2) {
140
+ score -= 5;
141
+ }
142
+ return Math.max(0, score);
143
+ }
144
+ /**
145
+ * Calculate link density in an element
146
+ */
147
+ static calculateLinkDensity(element) {
148
+ const links = element.getElementsByTagName("a");
149
+ let linkTextLength = 0;
150
+ for (const link of Array.from(links)) {
151
+ linkTextLength += link.textContent?.length || 0;
152
+ }
153
+ const totalTextLength = element.textContent?.length || 1;
154
+ return linkTextLength / totalTextLength;
155
+ }
156
+ /**
157
+ * Check if an element is likely navigation
158
+ */
159
+ static isNavigation(element) {
160
+ const tag = element.tagName.toLowerCase();
161
+ if (tag === "nav" || element.getAttribute("role") === "navigation") {
162
+ return true;
163
+ }
164
+ const navPatterns = [/nav/i, /menu/i, /sidebar/i, /toolbar/i];
165
+ const classesAndId = (element.className + " " + element.id).toLowerCase();
166
+ return navPatterns.some((pattern) => pattern.test(classesAndId));
167
+ }
168
+ /**
169
+ * Check if element is likely supplementary content
170
+ */
171
+ static isSupplementary(element) {
172
+ const tag = element.tagName.toLowerCase();
173
+ if (tag === "aside" || element.getAttribute("role") === "complementary") {
174
+ return true;
175
+ }
176
+ const supplementaryPatterns = [/sidebar/i, /widget/i, /related/i, /advertisement/i, /social/i];
177
+ const classesAndId = (element.className + " " + element.id).toLowerCase();
178
+ return supplementaryPatterns.some((pattern) => pattern.test(classesAndId));
179
+ }
180
+ /**
181
+ * Detect page landmarks
182
+ */
183
+ static detectLandmarks(doc) {
184
+ const landmarks = {
185
+ navigation: [],
186
+ main: [],
187
+ complementary: [],
188
+ contentinfo: [],
189
+ banner: [],
190
+ search: [],
191
+ form: [],
192
+ region: []
193
+ };
194
+ const landmarkSelectors = {
195
+ navigation: 'nav, [role="navigation"]',
196
+ main: 'main, [role="main"]',
197
+ complementary: 'aside, [role="complementary"]',
198
+ contentinfo: 'footer, [role="contentinfo"]',
199
+ banner: 'header, [role="banner"]',
200
+ search: '[role="search"]',
201
+ form: 'form[aria-label], form[aria-labelledby], [role="form"]',
202
+ region: 'section[aria-label], section[aria-labelledby], [role="region"]'
203
+ };
204
+ for (const [landmark, selector] of Object.entries(landmarkSelectors)) {
205
+ const elements = doc.querySelectorAll(selector);
206
+ landmarks[landmark] = Array.from(elements);
207
+ }
208
+ return landmarks;
209
+ }
210
+ };
211
+ }
212
+ });
213
+
214
+ // src/selectors.ts
215
+ var SelectorGenerator;
216
+ var init_selectors = __esm({
217
+ "src/selectors.ts"() {
218
+ "use strict";
219
+ SelectorGenerator = class {
220
+ /**
221
+ * Generate multiple selector strategies for an element
222
+ */
223
+ static generateSelectors(element) {
224
+ const doc = element.ownerDocument || document;
225
+ const candidates = [];
226
+ if (element.id && this.isUniqueId(element.id, doc)) {
227
+ candidates.push({ type: "id", value: `#${CSS.escape(element.id)}`, score: 100 });
228
+ }
229
+ const testId = this.getDataTestId(element);
230
+ if (testId) {
231
+ const v = `[data-testid="${CSS.escape(testId)}"]`;
232
+ candidates.push({
233
+ type: "data-testid",
234
+ value: v,
235
+ score: 90 + (this.isUniqueSelectorSafe(v, doc) ? 5 : 0)
236
+ });
237
+ }
238
+ const role = element.getAttribute("role");
239
+ const aria = element.getAttribute("aria-label");
240
+ if (role && aria) {
241
+ const v = `[role="${CSS.escape(role)}"][aria-label="${CSS.escape(aria)}"]`;
242
+ candidates.push({
243
+ type: "role-aria",
244
+ value: v,
245
+ score: 85 + (this.isUniqueSelectorSafe(v, doc) ? 5 : 0)
246
+ });
247
+ }
248
+ const nameAttr = element.getAttribute("name");
249
+ if (nameAttr) {
250
+ const v = `[name="${CSS.escape(nameAttr)}"]`;
251
+ candidates.push({
252
+ type: "name",
253
+ value: v,
254
+ score: 78 + (this.isUniqueSelectorSafe(v, doc) ? 5 : 0)
255
+ });
256
+ }
257
+ const pathCss = this.generateCSSSelector(element, doc);
258
+ const structuralPenalty = (pathCss.match(/:nth-child\(/g) || []).length * 10;
259
+ const classBonus = pathCss.includes(".") ? 8 : 0;
260
+ const pathScore = Math.max(0, 70 + classBonus - structuralPenalty);
261
+ candidates.push({ type: "class-path", value: pathCss, score: pathScore });
262
+ const xpath = this.generateXPath(element, doc);
263
+ candidates.push({ type: "xpath", value: xpath, score: 40 });
264
+ const textBased = this.generateTextBasedSelector(element);
265
+ if (textBased) candidates.push({ type: "text", value: textBased, score: 30 });
266
+ candidates.sort((a, b) => b.score - a.score);
267
+ const bestCss = candidates.find((c) => c.type !== "xpath" && c.type !== "text")?.value || pathCss;
268
+ return {
269
+ css: bestCss,
270
+ xpath,
271
+ textBased,
272
+ dataTestId: testId || void 0,
273
+ ariaLabel: aria || void 0,
274
+ candidates
275
+ };
276
+ }
277
+ /**
278
+ * Generate a unique CSS selector for an element
279
+ */
280
+ static generateCSSSelector(element, doc) {
281
+ if (element.id && this.isUniqueId(element.id, doc)) {
282
+ return `#${CSS.escape(element.id)}`;
283
+ }
284
+ const testId = this.getDataTestId(element);
285
+ if (testId) {
286
+ return `[data-testid="${CSS.escape(testId)}"]`;
287
+ }
288
+ const path = [];
289
+ let current = element;
290
+ while (current && current.nodeType === Node.ELEMENT_NODE) {
291
+ let selector = current.nodeName.toLowerCase();
292
+ if (current.id && this.isUniqueId(current.id, doc)) {
293
+ selector = `#${CSS.escape(current.id)}`;
294
+ path.unshift(selector);
295
+ break;
296
+ }
297
+ const classes = this.getMeaningfulClasses(current);
298
+ if (classes.length > 0) {
299
+ selector += "." + classes.map((c) => CSS.escape(c)).join(".");
300
+ }
301
+ const siblings = current.parentElement?.children;
302
+ if (siblings && siblings.length > 1) {
303
+ const index = Array.from(siblings).indexOf(current);
304
+ if (index > 0 || !this.isUniqueSelector(selector, current.parentElement)) {
305
+ selector += `:nth-child(${index + 1})`;
306
+ }
307
+ }
308
+ path.unshift(selector);
309
+ current = current.parentElement;
310
+ }
311
+ return this.optimizePath(path, element, doc);
312
+ }
313
+ /**
314
+ * Generate XPath for an element
315
+ */
316
+ static generateXPath(element, doc) {
317
+ if (element.id && this.isUniqueId(element.id, doc)) {
318
+ return `//*[@id="${element.id}"]`;
319
+ }
320
+ const path = [];
321
+ let current = element;
322
+ while (current && current.nodeType === Node.ELEMENT_NODE) {
323
+ const tagName = current.nodeName.toLowerCase();
324
+ if (current.id && this.isUniqueId(current.id, doc)) {
325
+ path.unshift(`//*[@id="${current.id}"]`);
326
+ break;
327
+ }
328
+ let xpath = tagName;
329
+ const siblings = current.parentElement?.children;
330
+ if (siblings) {
331
+ const sameTagSiblings = Array.from(siblings).filter(
332
+ (s) => s.nodeName.toLowerCase() === tagName
333
+ );
334
+ if (sameTagSiblings.length > 1) {
335
+ const index = sameTagSiblings.indexOf(current) + 1;
336
+ xpath += `[${index}]`;
337
+ }
338
+ }
339
+ path.unshift(xpath);
340
+ current = current.parentElement;
341
+ }
342
+ return "//" + path.join("/");
343
+ }
344
+ /**
345
+ * Generate a text-based selector for buttons and links
346
+ */
347
+ static generateTextBasedSelector(element) {
348
+ const text = element.textContent?.trim();
349
+ if (!text || text.length > 50) return void 0;
350
+ const tag = element.nodeName.toLowerCase();
351
+ if (["button", "a", "label"].includes(tag)) {
352
+ const escapedText = text.replace(/['"\\]/g, "\\$&");
353
+ return `${tag}:contains("${escapedText}")`;
354
+ }
355
+ return void 0;
356
+ }
357
+ /**
358
+ * Get data-testid or similar attributes
359
+ */
360
+ static getDataTestId(element) {
361
+ return element.getAttribute("data-testid") || element.getAttribute("data-test-id") || element.getAttribute("data-test") || element.getAttribute("data-cy") || void 0;
362
+ }
363
+ /**
364
+ * Check if an ID is unique in the document
365
+ */
366
+ static isUniqueId(id, doc) {
367
+ return doc.querySelectorAll(`#${CSS.escape(id)}`).length === 1;
368
+ }
369
+ /**
370
+ * Check if a selector is unique within a container
371
+ */
372
+ static isUniqueSelector(selector, container) {
373
+ try {
374
+ return container.querySelectorAll(selector).length === 1;
375
+ } catch {
376
+ return false;
377
+ }
378
+ }
379
+ static isUniqueSelectorSafe(selector, doc) {
380
+ try {
381
+ return doc.querySelectorAll(selector).length === 1;
382
+ } catch {
383
+ return false;
384
+ }
385
+ }
386
+ /**
387
+ * Get meaningful classes (filtering out utility classes)
388
+ */
389
+ static getMeaningfulClasses(element) {
390
+ const classes = Array.from(element.classList);
391
+ const utilityPatterns = [
392
+ /^(p|m|w|h|text|bg|border|flex|grid|col|row)-/,
393
+ /^(xs|sm|md|lg|xl|2xl):/,
394
+ /^(hover|focus|active|disabled|checked):/,
395
+ /^js-/,
396
+ /^is-/,
397
+ /^has-/
398
+ ];
399
+ return classes.filter((cls) => {
400
+ if (cls.length < 3) return false;
401
+ return !utilityPatterns.some((pattern) => pattern.test(cls));
402
+ }).slice(0, 2);
403
+ }
404
+ /**
405
+ * Optimize the selector path by removing unnecessary parts
406
+ */
407
+ static optimizePath(path, element, doc) {
408
+ for (let i = 0; i < path.length - 1; i++) {
409
+ const shortPath = path.slice(i).join(" > ");
410
+ try {
411
+ const matches = doc.querySelectorAll(shortPath);
412
+ if (matches.length === 1 && matches[0] === element) {
413
+ return shortPath;
414
+ }
415
+ } catch {
416
+ }
417
+ }
418
+ return path.join(" > ");
419
+ }
420
+ /**
421
+ * Get a human-readable path description
422
+ */
423
+ static getContextPath(element) {
424
+ const path = [];
425
+ let current = element;
426
+ let depth = 0;
427
+ const maxDepth = 5;
428
+ while (current && current !== element.ownerDocument?.body && depth < maxDepth) {
429
+ const tag = current.nodeName.toLowerCase();
430
+ let descriptor = tag;
431
+ if (current.id) {
432
+ descriptor = `${tag}#${current.id}`;
433
+ } else if (current.className && typeof current.className === "string") {
434
+ const firstClass = current.className.split(" ")[0];
435
+ if (firstClass) {
436
+ descriptor = `${tag}.${firstClass}`;
437
+ }
438
+ }
439
+ const role = current.getAttribute("role");
440
+ if (role) {
441
+ descriptor += `[role="${role}"]`;
442
+ }
443
+ path.unshift(descriptor);
444
+ current = current.parentElement;
445
+ depth++;
446
+ }
447
+ return path;
448
+ }
449
+ };
450
+ }
451
+ });
452
+
453
+ // src/traversal.ts
454
+ var DOMTraversal;
455
+ var init_traversal = __esm({
456
+ "src/traversal.ts"() {
457
+ "use strict";
458
+ init_selectors();
459
+ DOMTraversal = class {
460
+ static INTERACTIVE_SELECTORS = [
461
+ "button",
462
+ "a[href]",
463
+ 'input:not([type="hidden"])',
464
+ "textarea",
465
+ "select",
466
+ '[role="button"]',
467
+ "[onclick]",
468
+ '[contenteditable="true"]',
469
+ "summary",
470
+ '[tabindex]:not([tabindex="-1"])'
471
+ ];
472
+ static SEMANTIC_SELECTORS = [
473
+ "h1",
474
+ "h2",
475
+ "h3",
476
+ "h4",
477
+ "h5",
478
+ "h6",
479
+ "article",
480
+ "section",
481
+ "nav",
482
+ "aside",
483
+ "main",
484
+ "header",
485
+ "footer",
486
+ "form",
487
+ "table",
488
+ "ul",
489
+ "ol",
490
+ "img[alt]",
491
+ "figure",
492
+ "video",
493
+ "audio",
494
+ '[role="navigation"]',
495
+ '[role="main"]',
496
+ '[role="complementary"]',
497
+ '[role="contentinfo"]'
498
+ ];
499
+ /**
500
+ * Check if element is visible
501
+ */
502
+ static isVisible(element, computedStyle) {
503
+ const rect = element.getBoundingClientRect();
504
+ const style = computedStyle || element.ownerDocument?.defaultView?.getComputedStyle(element);
505
+ if (!style) return false;
506
+ return !!(rect.width > 0 && rect.height > 0 && style.display !== "none" && style.visibility !== "hidden" && style.opacity !== "0" && element.offsetParent !== null);
507
+ }
508
+ /**
509
+ * Check if element is in viewport
510
+ */
511
+ static isInViewport(element, viewport) {
512
+ const rect = element.getBoundingClientRect();
513
+ const view = viewport || {
514
+ width: element.ownerDocument?.defaultView?.innerWidth || 0,
515
+ height: element.ownerDocument?.defaultView?.innerHeight || 0
516
+ };
517
+ return rect.top < view.height && rect.bottom > 0 && rect.left < view.width && rect.right > 0;
518
+ }
519
+ /**
520
+ * Check if element passes filter criteria
521
+ */
522
+ static passesFilter(element, filter) {
523
+ if (!filter) return true;
524
+ const htmlElement = element;
525
+ if (filter.excludeSelectors?.length) {
526
+ for (const selector of filter.excludeSelectors) {
527
+ if (element.matches(selector)) return false;
528
+ }
529
+ }
530
+ if (filter.includeSelectors?.length) {
531
+ let matches = false;
532
+ for (const selector of filter.includeSelectors) {
533
+ if (element.matches(selector)) {
534
+ matches = true;
535
+ break;
536
+ }
537
+ }
538
+ if (!matches) return false;
539
+ }
540
+ if (filter.tags?.length && !filter.tags.includes(element.tagName.toLowerCase())) {
541
+ return false;
542
+ }
543
+ const textContent = htmlElement.textContent?.toLowerCase() || "";
544
+ if (filter.textContains?.length) {
545
+ let hasText = false;
546
+ for (const text of filter.textContains) {
547
+ if (textContent.includes(text.toLowerCase())) {
548
+ hasText = true;
549
+ break;
550
+ }
551
+ }
552
+ if (!hasText) return false;
553
+ }
554
+ if (filter.textMatches?.length) {
555
+ let matches = false;
556
+ for (const pattern of filter.textMatches) {
557
+ if (pattern.test(textContent)) {
558
+ matches = true;
559
+ break;
560
+ }
561
+ }
562
+ if (!matches) return false;
563
+ }
564
+ if (filter.hasAttributes?.length) {
565
+ for (const attr of filter.hasAttributes) {
566
+ if (!element.hasAttribute(attr)) return false;
567
+ }
568
+ }
569
+ if (filter.attributeValues) {
570
+ for (const [attr, value] of Object.entries(filter.attributeValues)) {
571
+ const attrValue = element.getAttribute(attr);
572
+ if (!attrValue) return false;
573
+ if (typeof value === "string") {
574
+ if (attrValue !== value) return false;
575
+ } else if (value instanceof RegExp) {
576
+ if (!value.test(attrValue)) return false;
577
+ }
578
+ }
579
+ }
580
+ if (filter.withinSelectors?.length) {
581
+ let isWithin = false;
582
+ for (const selector of filter.withinSelectors) {
583
+ if (element.closest(selector)) {
584
+ isWithin = true;
585
+ break;
586
+ }
587
+ }
588
+ if (!isWithin) return false;
589
+ }
590
+ if (filter.interactionTypes?.length) {
591
+ const interaction = this.getInteractionInfo(element);
592
+ let hasInteraction = false;
593
+ for (const type of filter.interactionTypes) {
594
+ if (interaction[type]) {
595
+ hasInteraction = true;
596
+ break;
597
+ }
598
+ }
599
+ if (!hasInteraction) return false;
600
+ }
601
+ if (filter.nearText) {
602
+ const parent = element.parentElement;
603
+ if (!parent || !parent.textContent?.toLowerCase().includes(filter.nearText.toLowerCase())) {
604
+ return false;
605
+ }
606
+ }
607
+ return true;
608
+ }
609
+ /**
610
+ * Extract element information
611
+ */
612
+ static extractElement(element, options, depth = 0) {
613
+ if (options.maxDepth && depth > options.maxDepth) {
614
+ return null;
615
+ }
616
+ if (!options.includeHidden && !this.isVisible(element)) {
617
+ return null;
618
+ }
619
+ if (options.viewportOnly && !this.isInViewport(element)) {
620
+ return null;
621
+ }
622
+ if (!this.passesFilter(element, options.filter)) {
623
+ return null;
624
+ }
625
+ const htmlElement = element;
626
+ const extracted = {
627
+ tag: element.tagName.toLowerCase(),
628
+ text: this.getElementText(element, options),
629
+ selector: SelectorGenerator.generateSelectors(element),
630
+ attributes: this.getRelevantAttributes(element, options),
631
+ context: this.getElementContext(element),
632
+ interaction: this.getInteractionInfo(element)
633
+ // bounds removed to save tokens
634
+ };
635
+ if (options.mode === "full" && this.isSemanticContainer(element)) {
636
+ const children = [];
637
+ if (options.includeShadowDOM && htmlElement.shadowRoot) {
638
+ const shadowChildren = this.extractChildren(htmlElement.shadowRoot, options, depth + 1);
639
+ children.push(...shadowChildren);
640
+ }
641
+ const regularChildren = this.extractChildren(element, options, depth + 1);
642
+ children.push(...regularChildren);
643
+ if (children.length > 0) {
644
+ extracted.children = children;
645
+ }
646
+ }
647
+ return extracted;
648
+ }
649
+ /**
650
+ * Extract children elements
651
+ */
652
+ static extractChildren(container, options, depth) {
653
+ const children = [];
654
+ const elements = container.querySelectorAll("*");
655
+ for (const child of Array.from(elements)) {
656
+ if (this.hasExtractedAncestor(child, elements)) {
657
+ continue;
658
+ }
659
+ const extracted = this.extractElement(child, options, depth);
660
+ if (extracted) {
661
+ children.push(extracted);
662
+ }
663
+ }
664
+ return children;
665
+ }
666
+ /**
667
+ * Check if element has an ancestor that was already extracted
668
+ */
669
+ static hasExtractedAncestor(element, extractedElements) {
670
+ let parent = element.parentElement;
671
+ while (parent) {
672
+ if (Array.from(extractedElements).includes(parent)) {
673
+ return true;
674
+ }
675
+ parent = parent.parentElement;
676
+ }
677
+ return false;
678
+ }
679
+ /**
680
+ * Get relevant attributes for an element
681
+ */
682
+ static getRelevantAttributes(element, options) {
683
+ const relevant = [
684
+ "id",
685
+ "class",
686
+ "name",
687
+ "type",
688
+ "value",
689
+ "placeholder",
690
+ "href",
691
+ "src",
692
+ "alt",
693
+ "title",
694
+ "action",
695
+ "method",
696
+ "aria-label",
697
+ "aria-describedby",
698
+ "aria-controls",
699
+ "role",
700
+ "disabled",
701
+ "readonly",
702
+ "required",
703
+ "checked",
704
+ "min",
705
+ "max",
706
+ "pattern",
707
+ "step",
708
+ "autocomplete",
709
+ "data-testid",
710
+ "data-test",
711
+ "data-cy"
712
+ ];
713
+ const attributes = {};
714
+ const attrTruncate = options.attributeTruncateLength ?? 100;
715
+ const dataAttrTruncate = options.dataAttributeTruncateLength ?? 50;
716
+ for (const attr of relevant) {
717
+ const value = element.getAttribute(attr);
718
+ if (value) {
719
+ attributes[attr] = value.length > attrTruncate ? value.substring(0, attrTruncate) + "..." : value;
720
+ }
721
+ }
722
+ for (const attr of element.attributes) {
723
+ if (attr.name.startsWith("data-") && !relevant.includes(attr.name)) {
724
+ attributes[attr.name] = attr.value.length > dataAttrTruncate ? attr.value.substring(0, dataAttrTruncate) + "..." : attr.value;
725
+ }
726
+ }
727
+ return attributes;
728
+ }
729
+ /**
730
+ * Get element context information
731
+ */
732
+ static getElementContext(element) {
733
+ const context = {
734
+ parentChain: SelectorGenerator.getContextPath(element)
735
+ };
736
+ const form = element.closest("form");
737
+ if (form) {
738
+ context.nearestForm = SelectorGenerator.generateSelectors(form).css;
739
+ }
740
+ const section = element.closest('section, [role="region"]');
741
+ if (section) {
742
+ context.nearestSection = SelectorGenerator.generateSelectors(section).css;
743
+ }
744
+ const main = element.closest('main, [role="main"]');
745
+ if (main) {
746
+ context.nearestMain = SelectorGenerator.generateSelectors(main).css;
747
+ }
748
+ const nav = element.closest('nav, [role="navigation"]');
749
+ if (nav) {
750
+ context.nearestNav = SelectorGenerator.generateSelectors(nav).css;
751
+ }
752
+ return context;
753
+ }
754
+ /**
755
+ * Get interaction information for an element (compact format)
756
+ */
757
+ static getInteractionInfo(element) {
758
+ const htmlElement = element;
759
+ const interaction = {};
760
+ const hasClickHandler = !!(htmlElement.onclick || element.getAttribute("onclick") || element.matches('button, a[href], [role="button"], [tabindex]:not([tabindex="-1"])'));
761
+ if (hasClickHandler) interaction.click = true;
762
+ const hasChangeHandler = !!(htmlElement.onchange || element.getAttribute("onchange") || element.matches("input, select, textarea"));
763
+ if (hasChangeHandler) interaction.change = true;
764
+ const hasSubmitHandler = !!(htmlElement.onsubmit || element.getAttribute("onsubmit") || element.matches("form"));
765
+ if (hasSubmitHandler) interaction.submit = true;
766
+ const triggersNavigation = element.matches('a[href], button[type="submit"]');
767
+ if (triggersNavigation) interaction.nav = true;
768
+ const isDisabled = htmlElement.hasAttribute("disabled") || htmlElement.getAttribute("aria-disabled") === "true";
769
+ if (isDisabled) interaction.disabled = true;
770
+ const isHidden = !this.isVisible(element);
771
+ if (isHidden) interaction.hidden = true;
772
+ const ariaRole = element.getAttribute("role");
773
+ if (ariaRole) interaction.role = ariaRole;
774
+ if (element.matches("input, textarea, select, button")) {
775
+ const form = element.form || element.closest("form");
776
+ if (form) {
777
+ interaction.form = SelectorGenerator.generateSelectors(form).css;
778
+ }
779
+ }
780
+ return interaction;
781
+ }
782
+ /**
783
+ * Get text content of an element (limited length)
784
+ */
785
+ static getElementText(element, options) {
786
+ if (element.matches("input, textarea")) {
787
+ const input = element;
788
+ return input.value || input.placeholder || "";
789
+ }
790
+ if (element.matches("img")) {
791
+ return element.alt || "";
792
+ }
793
+ const text = element.textContent?.trim() || "";
794
+ const maxLength = options?.textTruncateLength;
795
+ if (maxLength && text.length > maxLength) {
796
+ return text.substring(0, maxLength) + "...";
797
+ }
798
+ return text;
799
+ }
800
+ /**
801
+ * Check if element is a semantic container
802
+ */
803
+ static isSemanticContainer(element) {
804
+ return element.matches(
805
+ 'article, section, nav, aside, main, header, footer, form, table, ul, ol, dl, figure, details, dialog, [role="region"], [role="navigation"], [role="main"], [role="complementary"]'
806
+ );
807
+ }
808
+ /**
809
+ * Get interactive elements
810
+ */
811
+ static getInteractiveElements(container = document, options) {
812
+ const elements = [];
813
+ const selector = this.INTERACTIVE_SELECTORS.join(", ");
814
+ const found = container.querySelectorAll(selector);
815
+ for (const element of Array.from(found)) {
816
+ const extracted = this.extractElement(element, options);
817
+ if (extracted) {
818
+ elements.push(extracted);
819
+ }
820
+ }
821
+ if (options.customSelectors) {
822
+ for (const customSelector of options.customSelectors) {
823
+ try {
824
+ const customFound = container.querySelectorAll(customSelector);
825
+ for (const element of Array.from(customFound)) {
826
+ const extracted = this.extractElement(element, options);
827
+ if (extracted) {
828
+ elements.push(extracted);
829
+ }
830
+ }
831
+ } catch (e) {
832
+ console.warn(`Invalid custom selector: ${customSelector}`);
833
+ }
834
+ }
835
+ }
836
+ return elements;
837
+ }
838
+ /**
839
+ * Get semantic elements (for full mode)
840
+ */
841
+ static getSemanticElements(container = document, options) {
842
+ const elements = [];
843
+ const selector = this.SEMANTIC_SELECTORS.join(", ");
844
+ const found = container.querySelectorAll(selector);
845
+ for (const element of Array.from(found)) {
846
+ const extracted = this.extractElement(element, options);
847
+ if (extracted) {
848
+ elements.push(extracted);
849
+ }
850
+ }
851
+ return elements;
852
+ }
853
+ };
854
+ }
855
+ });
856
+
857
+ // src/markdown-formatter.ts
858
+ function truncate(text, len) {
859
+ const t = (text ?? "").trim();
860
+ if (!len || t.length <= len) return t;
861
+ const keywords = [
862
+ "login",
863
+ "log in",
864
+ "sign in",
865
+ "sign up",
866
+ "submit",
867
+ "search",
868
+ "filter",
869
+ "add to cart",
870
+ "next",
871
+ "continue"
872
+ ];
873
+ const lower = t.toLowerCase();
874
+ const hit = keywords.map((k) => ({ k, i: lower.indexOf(k) })).find((x) => x.i > -1);
875
+ const head = Math.max(0, Math.floor(len * 0.66));
876
+ if (hit && hit.i > head) {
877
+ const tailWindow = Math.max(12, len - head - 5);
878
+ const start = Math.max(0, hit.i - Math.floor(tailWindow / 2));
879
+ const end = Math.min(t.length, start + tailWindow);
880
+ return t.slice(0, head).trimEnd() + " \u2026 " + t.slice(start, end).trim() + "\u2026";
881
+ }
882
+ const slice = t.slice(0, len);
883
+ const lastSpace = slice.lastIndexOf(" ");
884
+ return (lastSpace > 32 ? slice.slice(0, lastSpace) : slice) + "\u2026";
885
+ }
886
+ function bestSelector(el) {
887
+ return el.selector?.css || "";
888
+ }
889
+ function hashId(input) {
890
+ let h = 5381;
891
+ for (let i = 0; i < input.length; i++) h = h * 33 ^ input.charCodeAt(i);
892
+ return "sec-" + (h >>> 0).toString(36);
893
+ }
894
+ function iconForRegion(key) {
895
+ switch (key) {
896
+ case "header":
897
+ return "\u{1F9ED}";
898
+ case "navigation":
899
+ return "\u{1F4D1}";
900
+ case "main":
901
+ return "\u{1F4C4}";
902
+ case "sections":
903
+ return "\u{1F5C2}\uFE0F";
904
+ case "sidebar":
905
+ return "\u{1F4DA}";
906
+ case "footer":
907
+ return "\u{1F53B}";
908
+ case "modals":
909
+ return "\u{1F4AC}";
910
+ default:
911
+ return "\u{1F539}";
912
+ }
913
+ }
914
+ function elementLine(el, opts) {
915
+ const txt = truncate(el.text || el.attributes?.ariaLabel, opts?.maxTextLength ?? 80);
916
+ const sel = bestSelector(el);
917
+ const tag = el.tag.toLowerCase();
918
+ const action = el.interaction?.submit ? "submit" : el.interaction?.click ? "click" : el.interaction?.change ? "change" : void 0;
919
+ const actionText = action ? ` (${action})` : "";
920
+ return `- ${tag.toUpperCase()}: ${txt || "(no text)"} \u2192 \`${sel}\`${actionText}`;
921
+ }
922
+ function selectorQualitySummary(inter) {
923
+ const all = [];
924
+ all.push(...inter.buttons.map((e) => e.selector?.css || ""));
925
+ all.push(...inter.links.map((e) => e.selector?.css || ""));
926
+ all.push(...inter.inputs.map((e) => e.selector?.css || ""));
927
+ all.push(...inter.clickable.map((e) => e.selector?.css || ""));
928
+ const total = all.length || 1;
929
+ const idCount = all.filter((s) => s.startsWith("#")).length;
930
+ const testIdCount = all.filter((s) => /\[data-testid=/.test(s)).length;
931
+ const nthCount = all.filter((s) => /:nth-child\(/.test(s)).length;
932
+ const stable = idCount + testIdCount;
933
+ const stablePct = Math.round(stable / total * 100);
934
+ const nthPct = Math.round(nthCount / total * 100);
935
+ return `Selector quality: ${stablePct}% stable (ID/data-testid), ${nthPct}% structural (:nth-child)`;
936
+ }
937
+ function renderInteractive(inter, opts) {
938
+ const parts = [];
939
+ const limit = (arr) => typeof opts?.maxElements === "number" ? arr.slice(0, opts.maxElements) : arr;
940
+ if (inter.buttons.length) {
941
+ parts.push("Buttons:");
942
+ for (const el of limit(inter.buttons)) parts.push(elementLine(el, opts));
943
+ }
944
+ if (inter.links.length) {
945
+ parts.push("Links:");
946
+ for (const el of limit(inter.links)) parts.push(elementLine(el, opts));
947
+ }
948
+ if (inter.inputs.length) {
949
+ parts.push("Inputs:");
950
+ for (const el of limit(inter.inputs)) parts.push(elementLine(el, opts));
951
+ }
952
+ if (inter.clickable.length) {
953
+ parts.push("Other Clickable:");
954
+ for (const el of limit(inter.clickable)) parts.push(elementLine(el, opts));
955
+ }
956
+ if (inter.forms.length) {
957
+ parts.push("Forms:");
958
+ for (const f of limit(inter.forms)) {
959
+ parts.push(`- FORM: action=${f.action ?? "-"} method=${f.method ?? "-"} \u2192 \`${f.selector}\``);
960
+ }
961
+ }
962
+ return parts.join("\n");
963
+ }
964
+ function renderRegionInfo(region) {
965
+ const icon = iconForRegion("region");
966
+ const id = hashId(`${region.selector}|${region.label ?? ""}|${region.role ?? ""}`);
967
+ const label = region.label ? ` ${region.label}` : "";
968
+ const stats = [];
969
+ if (region.buttonCount) stats.push(`${region.buttonCount} buttons`);
970
+ if (region.linkCount) stats.push(`${region.linkCount} links`);
971
+ if (region.inputCount) stats.push(`${region.inputCount} inputs`);
972
+ if (region.textPreview) stats.push(`\u201C${truncate(region.textPreview, 80)}\u201D`);
973
+ const statsLine = stats.length ? ` \u2014 ${stats.join(", ")}` : "";
974
+ return `${icon} ${label} \u2192 \`${region.selector}\` [${id}]${statsLine}`;
975
+ }
976
+ function wrapXml(body, meta, type = "section") {
977
+ const attrs = [
978
+ meta?.title ? `title="${escapeXml(meta.title)}"` : null,
979
+ meta?.url ? `url="${escapeXml(meta.url)}"` : null
980
+ ].filter(Boolean).join(" ");
981
+ return `<page ${attrs}>
982
+ <${type}><![CDATA[
983
+ ${body}
984
+ ]]></${type}>
985
+ </page>`;
986
+ }
987
+ function escapeXml(s) {
988
+ return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;");
989
+ }
990
+ function capitalize(s) {
991
+ return s.charAt(0).toUpperCase() + s.slice(1);
992
+ }
993
+ var MarkdownFormatter;
994
+ var init_markdown_formatter = __esm({
995
+ "src/markdown-formatter.ts"() {
996
+ "use strict";
997
+ MarkdownFormatter = class {
998
+ static structure(overview, _opts = {}, meta) {
999
+ const lines = [];
1000
+ lines.push(`# Page Outline`);
1001
+ if (meta?.title || meta?.url) {
1002
+ lines.push(`Title: ${meta?.title ?? ""}`.trim());
1003
+ lines.push(`URL: ${meta?.url ?? ""}`.trim());
1004
+ }
1005
+ lines.push("");
1006
+ const regions = overview.regions;
1007
+ const entries = [
1008
+ ["header", regions.header],
1009
+ ["navigation", regions.navigation],
1010
+ ["main", regions.main],
1011
+ ["sections", regions.sections],
1012
+ ["sidebar", regions.sidebar],
1013
+ ["footer", regions.footer],
1014
+ ["modals", regions.modals]
1015
+ ];
1016
+ for (const [key, value] of entries) {
1017
+ if (!value) continue;
1018
+ const icon = iconForRegion(key);
1019
+ if (Array.isArray(value)) {
1020
+ if (!value.length) continue;
1021
+ lines.push(`## ${icon} ${capitalize(key)}`);
1022
+ for (const region of value) lines.push(renderRegionInfo(region));
1023
+ } else {
1024
+ lines.push(`## ${icon} ${capitalize(key)}`);
1025
+ lines.push(renderRegionInfo(value));
1026
+ }
1027
+ lines.push("");
1028
+ }
1029
+ if (overview.suggestions?.length) {
1030
+ lines.push("## Suggestions");
1031
+ for (const s of overview.suggestions) lines.push(`- ${s}`);
1032
+ lines.push("");
1033
+ }
1034
+ lines.push(
1035
+ "Next: choose a region (by selector or [sectionId]) and call dom_extract_region for actionable details."
1036
+ );
1037
+ const body = lines.join("\n");
1038
+ return wrapXml(body, meta, "outline");
1039
+ }
1040
+ static region(result, opts = {}, meta) {
1041
+ const lines = [];
1042
+ lines.push(`# Region Details`);
1043
+ if (meta?.title || meta?.url) {
1044
+ lines.push(`Title: ${meta?.title ?? ""}`.trim());
1045
+ lines.push(`URL: ${meta?.url ?? ""}`.trim());
1046
+ }
1047
+ lines.push("");
1048
+ const inter = result.interactive;
1049
+ if (result.page) {
1050
+ const ps = [
1051
+ result.page.hasErrors ? "errors: yes" : "errors: no",
1052
+ result.page.isLoading ? "loading: yes" : "loading: no",
1053
+ result.page.hasModals ? "modals: yes" : "modals: no"
1054
+ ];
1055
+ lines.push(`Page state: ${ps.join(", ")}`);
1056
+ }
1057
+ const summary = [];
1058
+ const count = (arr) => arr ? arr.length : 0;
1059
+ summary.push(`${count(inter.buttons)} buttons`);
1060
+ summary.push(`${count(inter.links)} links`);
1061
+ summary.push(`${count(inter.inputs)} inputs`);
1062
+ if (inter.forms?.length) summary.push(`${count(inter.forms)} forms`);
1063
+ lines.push(`Summary: ${summary.join(", ")}`);
1064
+ lines.push(selectorQualitySummary(inter));
1065
+ lines.push("");
1066
+ lines.push(renderInteractive(inter, opts));
1067
+ lines.push("");
1068
+ lines.push(
1069
+ "Next: write a script using the most stable selectors above. If selectors look unstable, rerun dom_extract_region with higher detail or call dom_extract_content for text context."
1070
+ );
1071
+ const body = lines.join("\n");
1072
+ return wrapXml(body, meta, "section");
1073
+ }
1074
+ static content(content, opts = {}, meta) {
1075
+ const lines = [];
1076
+ lines.push(`# Content`);
1077
+ lines.push(`Selector: \`${content.selector}\``);
1078
+ lines.push("");
1079
+ if (content.text.headings?.length) {
1080
+ lines.push("Headings:");
1081
+ for (const h of content.text.headings)
1082
+ lines.push(`- H${h.level}: ${truncate(h.text, opts.maxTextLength ?? 120)}`);
1083
+ lines.push("");
1084
+ }
1085
+ if (content.text.paragraphs?.length) {
1086
+ const limit = typeof opts.maxElements === "number" ? opts.maxElements : content.text.paragraphs.length;
1087
+ lines.push("Paragraphs:");
1088
+ for (const p of content.text.paragraphs.slice(0, limit))
1089
+ lines.push(`- ${truncate(p, opts.maxTextLength ?? 200)}`);
1090
+ lines.push("");
1091
+ }
1092
+ if (content.text.lists?.length) {
1093
+ lines.push("Lists:");
1094
+ for (const list of content.text.lists) {
1095
+ lines.push(`- ${list.type.toUpperCase()}:`);
1096
+ const limit = typeof opts.maxElements === "number" ? opts.maxElements : list.items.length;
1097
+ for (const item of list.items.slice(0, limit))
1098
+ lines.push(` - ${truncate(item, opts.maxTextLength ?? 120)}`);
1099
+ }
1100
+ lines.push("");
1101
+ }
1102
+ if (content.tables?.length) {
1103
+ lines.push("Tables:");
1104
+ for (const t of content.tables) {
1105
+ lines.push(`- Headers: ${t.headers.join(" | ")}`);
1106
+ const limit = typeof opts.maxElements === "number" ? opts.maxElements : t.rows.length;
1107
+ for (const row of t.rows.slice(0, limit)) lines.push(` - ${row.join(" | ")}`);
1108
+ }
1109
+ lines.push("");
1110
+ }
1111
+ if (content.media?.length) {
1112
+ lines.push("Media:");
1113
+ const limit = typeof opts.maxElements === "number" ? opts.maxElements : content.media.length;
1114
+ for (const m of content.media.slice(0, limit)) {
1115
+ lines.push(`- ${m.type.toUpperCase()}: ${m.alt ?? ""} ${m.src ? `\u2192 ${m.src}` : ""}`.trim());
1116
+ }
1117
+ lines.push("");
1118
+ }
1119
+ lines.push(
1120
+ "Next: if text is insufficient for targeting, call dom_extract_region for interactive selectors."
1121
+ );
1122
+ const body = lines.join("\n");
1123
+ return wrapXml(body, meta, "content");
1124
+ }
1125
+ };
1126
+ }
1127
+ });
1128
+
1129
+ // src/progressive.ts
1130
+ function resolveSmartDomReader() {
1131
+ if (typeof window !== "undefined") {
1132
+ const globalWindow = window;
1133
+ const direct = globalWindow.SmartDOMReader;
1134
+ if (typeof direct === "function") {
1135
+ return direct;
1136
+ }
1137
+ const namespace = globalWindow.SmartDOMReaderNamespace;
1138
+ if (namespace && typeof namespace.SmartDOMReader === "function") {
1139
+ return namespace.SmartDOMReader;
1140
+ }
1141
+ }
1142
+ try {
1143
+ if (typeof __require === "function") {
1144
+ const moduleExports = (init_index(), __toCommonJS(index_exports));
1145
+ if (moduleExports && typeof moduleExports.SmartDOMReader === "function") {
1146
+ return moduleExports.SmartDOMReader;
1147
+ }
1148
+ if (moduleExports && typeof moduleExports.default === "function") {
1149
+ return moduleExports.default;
1150
+ }
1151
+ }
1152
+ } catch {
1153
+ }
1154
+ return void 0;
1155
+ }
1156
+ var ProgressiveExtractor;
1157
+ var init_progressive = __esm({
1158
+ "src/progressive.ts"() {
1159
+ "use strict";
1160
+ init_content_detection();
1161
+ init_selectors();
1162
+ init_traversal();
1163
+ ProgressiveExtractor = class {
1164
+ /**
1165
+ * Step 1: Extract high-level structural overview
1166
+ * This provides a "map" of the page for the AI to understand structure
1167
+ */
1168
+ static extractStructure(root) {
1169
+ const regions = {};
1170
+ const header = root.querySelector('header, [role="banner"], .header, #header');
1171
+ if (header) {
1172
+ regions.header = this.analyzeRegion(header);
1173
+ }
1174
+ const navs = root.querySelectorAll('nav, [role="navigation"], .nav, .navigation');
1175
+ if (navs.length > 0) {
1176
+ regions.navigation = Array.from(navs).map((nav) => this.analyzeRegion(nav));
1177
+ }
1178
+ if (root instanceof Document) {
1179
+ const main = ContentDetection.findMainContent(root);
1180
+ if (main) {
1181
+ regions.main = this.analyzeRegion(main);
1182
+ const sections = main.querySelectorAll('section, article, [role="region"]');
1183
+ if (sections.length > 0) {
1184
+ regions.sections = Array.from(sections).filter((section) => !section.closest("nav, header, footer")).map((section) => this.analyzeRegion(section));
1185
+ }
1186
+ }
1187
+ } else {
1188
+ regions.main = this.analyzeRegion(root);
1189
+ const sections = root.querySelectorAll('section, article, [role="region"]');
1190
+ if (sections.length > 0) {
1191
+ regions.sections = Array.from(sections).filter((section) => !section.closest("nav, header, footer")).map((section) => this.analyzeRegion(section));
1192
+ }
1193
+ }
1194
+ const sidebars = root.querySelectorAll('aside, [role="complementary"], .sidebar, #sidebar');
1195
+ if (sidebars.length > 0) {
1196
+ regions.sidebar = Array.from(sidebars).map((sidebar) => this.analyzeRegion(sidebar));
1197
+ }
1198
+ const footer = root.querySelector('footer, [role="contentinfo"], .footer, #footer');
1199
+ if (footer) {
1200
+ regions.footer = this.analyzeRegion(footer);
1201
+ }
1202
+ const modals = root.querySelectorAll('[role="dialog"], .modal, .popup, .overlay');
1203
+ const visibleModals = Array.from(modals).filter((modal) => DOMTraversal.isVisible(modal));
1204
+ if (visibleModals.length > 0) {
1205
+ regions.modals = visibleModals.map((modal) => this.analyzeRegion(modal));
1206
+ }
1207
+ const forms = this.extractFormOverview(root);
1208
+ const summary = this.calculateSummary(root, regions, forms);
1209
+ const suggestions = this.generateSuggestions(regions, summary);
1210
+ return { regions, forms, summary, suggestions };
1211
+ }
1212
+ /**
1213
+ * Step 2: Extract detailed information from a specific region
1214
+ */
1215
+ static extractRegion(selector, doc, options = {}, smartDomReaderCtor) {
1216
+ const element = doc.querySelector(selector);
1217
+ if (!element) return null;
1218
+ const SmartDOMReaderCtor = smartDomReaderCtor ?? resolveSmartDomReader();
1219
+ if (!SmartDOMReaderCtor) {
1220
+ throw new Error(
1221
+ "SmartDOMReader is unavailable. Ensure the Smart DOM Reader module is loaded before calling extractRegion."
1222
+ );
1223
+ }
1224
+ const reader = new SmartDOMReaderCtor(options);
1225
+ return reader.extract(element, options);
1226
+ }
1227
+ /**
1228
+ * Step 3: Extract readable content from a region
1229
+ */
1230
+ static extractContent(selector, doc, options = {}) {
1231
+ const element = doc.querySelector(selector);
1232
+ if (!element) return null;
1233
+ const result = {
1234
+ selector,
1235
+ text: {},
1236
+ metadata: {
1237
+ wordCount: 0,
1238
+ hasInteractive: false
1239
+ }
1240
+ };
1241
+ if (options.includeHeadings !== false) {
1242
+ const headings = element.querySelectorAll("h1, h2, h3, h4, h5, h6");
1243
+ result.text.headings = Array.from(headings).map((h) => ({
1244
+ level: parseInt(h.tagName[1]),
1245
+ text: this.getTextContent(h, options.maxTextLength)
1246
+ }));
1247
+ }
1248
+ const paragraphs = element.querySelectorAll("p");
1249
+ if (paragraphs.length > 0) {
1250
+ result.text.paragraphs = Array.from(paragraphs).map((p) => this.getTextContent(p, options.maxTextLength)).filter((text) => text.length > 0);
1251
+ }
1252
+ if (options.includeLists !== false) {
1253
+ const lists = element.querySelectorAll("ul, ol");
1254
+ result.text.lists = Array.from(lists).map((list) => ({
1255
+ type: list.tagName.toLowerCase(),
1256
+ items: Array.from(list.querySelectorAll("li")).map(
1257
+ (li) => this.getTextContent(li, options.maxTextLength)
1258
+ )
1259
+ }));
1260
+ }
1261
+ if (options.includeTables !== false) {
1262
+ const tables = element.querySelectorAll("table");
1263
+ result.tables = Array.from(tables).map((table) => {
1264
+ const headers = Array.from(table.querySelectorAll("th")).map(
1265
+ (th) => this.getTextContent(th)
1266
+ );
1267
+ const rows = Array.from(table.querySelectorAll("tr")).filter((tr) => tr.querySelector("td")).map((tr) => Array.from(tr.querySelectorAll("td")).map((td) => this.getTextContent(td)));
1268
+ return { headers, rows };
1269
+ });
1270
+ }
1271
+ if (options.includeMedia !== false) {
1272
+ const images = element.querySelectorAll("img");
1273
+ const videos = element.querySelectorAll("video");
1274
+ const audios = element.querySelectorAll("audio");
1275
+ result.media = [
1276
+ ...Array.from(images).map((img) => ({
1277
+ type: "img",
1278
+ alt: img.getAttribute("alt") || void 0,
1279
+ src: img.getAttribute("src") || void 0
1280
+ })),
1281
+ ...Array.from(videos).map((video) => ({
1282
+ type: "video",
1283
+ src: video.getAttribute("src") || void 0
1284
+ })),
1285
+ ...Array.from(audios).map((audio) => ({
1286
+ type: "audio",
1287
+ src: audio.getAttribute("src") || void 0
1288
+ }))
1289
+ ];
1290
+ }
1291
+ const allText = element.textContent || "";
1292
+ result.metadata.wordCount = allText.trim().split(/\s+/).length;
1293
+ result.metadata.hasInteractive = element.querySelectorAll("button, a, input, textarea, select").length > 0;
1294
+ return result;
1295
+ }
1296
+ /**
1297
+ * Analyze a region and extract summary information
1298
+ */
1299
+ static analyzeRegion(element) {
1300
+ const selector = SelectorGenerator.generateSelectors(element).css;
1301
+ const buttons = element.querySelectorAll('button, [role="button"]');
1302
+ const links = element.querySelectorAll("a[href]");
1303
+ const inputs = element.querySelectorAll("input, textarea, select");
1304
+ const forms = element.querySelectorAll("form");
1305
+ const lists = element.querySelectorAll("ul, ol");
1306
+ const tables = element.querySelectorAll("table");
1307
+ const media = element.querySelectorAll("img, video, audio");
1308
+ const interactiveCount = buttons.length + links.length + inputs.length;
1309
+ let label;
1310
+ const ariaLabel = element.getAttribute("aria-label");
1311
+ if (ariaLabel) {
1312
+ label = ariaLabel;
1313
+ } else if (element.getAttribute("aria-labelledby")) {
1314
+ const labelId = element.getAttribute("aria-labelledby");
1315
+ if (labelId) {
1316
+ const labelElement = element.ownerDocument?.getElementById(labelId);
1317
+ if (labelElement) {
1318
+ label = labelElement.textContent?.trim();
1319
+ }
1320
+ }
1321
+ } else {
1322
+ const heading = element.querySelector("h1, h2, h3");
1323
+ if (heading) {
1324
+ label = heading.textContent?.trim();
1325
+ }
1326
+ }
1327
+ const textContent = element.textContent?.trim() || "";
1328
+ const textPreview = textContent.length > 50 ? textContent.substring(0, 50) + "..." : textContent;
1329
+ return {
1330
+ selector,
1331
+ label,
1332
+ role: element.getAttribute("role") || void 0,
1333
+ interactiveCount,
1334
+ hasForm: forms.length > 0,
1335
+ hasList: lists.length > 0,
1336
+ hasTable: tables.length > 0,
1337
+ hasMedia: media.length > 0,
1338
+ buttonCount: buttons.length > 0 ? buttons.length : void 0,
1339
+ linkCount: links.length > 0 ? links.length : void 0,
1340
+ inputCount: inputs.length > 0 ? inputs.length : void 0,
1341
+ textPreview: textPreview.length > 0 ? textPreview : void 0
1342
+ };
1343
+ }
1344
+ /**
1345
+ * Extract overview of forms on the page
1346
+ */
1347
+ static extractFormOverview(root) {
1348
+ const forms = root.querySelectorAll("form");
1349
+ return Array.from(forms).map((form) => {
1350
+ const inputs = form.querySelectorAll("input, textarea, select");
1351
+ const selector = SelectorGenerator.generateSelectors(form).css;
1352
+ let location = "unknown";
1353
+ if (form.closest('header, [role="banner"]')) {
1354
+ location = "header";
1355
+ } else if (form.closest('nav, [role="navigation"]')) {
1356
+ location = "navigation";
1357
+ } else if (form.closest('main, [role="main"]')) {
1358
+ location = "main";
1359
+ } else if (form.closest('aside, [role="complementary"]')) {
1360
+ location = "sidebar";
1361
+ } else if (form.closest('footer, [role="contentinfo"]')) {
1362
+ location = "footer";
1363
+ }
1364
+ let purpose;
1365
+ const formId = form.getAttribute("id")?.toLowerCase();
1366
+ const formClass = form.getAttribute("class")?.toLowerCase();
1367
+ const formAction = form.getAttribute("action")?.toLowerCase();
1368
+ const hasEmail = form.querySelector('input[type="email"]');
1369
+ const hasPassword = form.querySelector('input[type="password"]');
1370
+ const hasSearch = form.querySelector('input[type="search"]');
1371
+ if (hasSearch || formId?.includes("search") || formClass?.includes("search")) {
1372
+ purpose = "search";
1373
+ } else if (hasPassword && hasEmail) {
1374
+ purpose = "login";
1375
+ } else if (hasPassword) {
1376
+ purpose = "authentication";
1377
+ } else if (formId?.includes("contact") || formClass?.includes("contact")) {
1378
+ purpose = "contact";
1379
+ } else if (formId?.includes("subscribe") || formClass?.includes("subscribe")) {
1380
+ purpose = "subscription";
1381
+ } else if (formAction?.includes("checkout") || formClass?.includes("checkout")) {
1382
+ purpose = "checkout";
1383
+ }
1384
+ return {
1385
+ selector,
1386
+ location,
1387
+ inputCount: inputs.length,
1388
+ purpose
1389
+ };
1390
+ });
1391
+ }
1392
+ /**
1393
+ * Calculate summary statistics
1394
+ */
1395
+ static calculateSummary(root, regions, forms) {
1396
+ const allInteractive = root.querySelectorAll("button, a[href], input, textarea, select");
1397
+ const allSections = root.querySelectorAll('section, article, [role="region"]');
1398
+ const hasModals = (regions.modals?.length || 0) > 0;
1399
+ const errorSelectors = [".error", ".alert-danger", '[role="alert"]'];
1400
+ const hasErrors = errorSelectors.some((sel) => {
1401
+ const element = root.querySelector(sel);
1402
+ return element ? DOMTraversal.isVisible(element) : false;
1403
+ });
1404
+ const loadingSelectors = [".loading", ".spinner", '[aria-busy="true"]'];
1405
+ const isLoading = loadingSelectors.some((sel) => {
1406
+ const element = root.querySelector(sel);
1407
+ return element ? DOMTraversal.isVisible(element) : false;
1408
+ });
1409
+ const mainContentSelector = regions.main?.selector;
1410
+ return {
1411
+ totalInteractive: allInteractive.length,
1412
+ totalForms: forms.length,
1413
+ totalSections: allSections.length,
1414
+ hasModals,
1415
+ hasErrors,
1416
+ isLoading,
1417
+ mainContentSelector
1418
+ };
1419
+ }
1420
+ /**
1421
+ * Generate AI-friendly suggestions
1422
+ */
1423
+ static generateSuggestions(regions, summary) {
1424
+ const suggestions = [];
1425
+ if (summary.hasErrors) {
1426
+ suggestions.push("Page has error indicators - check error messages before interacting");
1427
+ }
1428
+ if (summary.isLoading) {
1429
+ suggestions.push("Page appears to be loading - wait or check loading state");
1430
+ }
1431
+ if (summary.hasModals) {
1432
+ suggestions.push("Modal/dialog is open - may need to interact with or close it first");
1433
+ }
1434
+ if (regions.main && regions.main.interactiveCount > 10) {
1435
+ suggestions.push(
1436
+ `Main content has ${regions.main.interactiveCount} interactive elements - consider filtering`
1437
+ );
1438
+ }
1439
+ if (summary.totalForms > 0) {
1440
+ suggestions.push(`Found ${summary.totalForms} form(s) on the page`);
1441
+ }
1442
+ if (!regions.main) {
1443
+ suggestions.push("No clear main content area detected - may need to explore regions");
1444
+ }
1445
+ return suggestions;
1446
+ }
1447
+ /**
1448
+ * Get text content with optional truncation
1449
+ */
1450
+ static getTextContent(element, maxLength) {
1451
+ const text = element.textContent?.trim() || "";
1452
+ if (maxLength && text.length > maxLength) {
1453
+ return text.substring(0, maxLength) + "...";
1454
+ }
1455
+ return text;
1456
+ }
1457
+ };
1458
+ }
1459
+ });
1460
+
1461
+ // src/types.ts
1462
+ var init_types = __esm({
1463
+ "src/types.ts"() {
1464
+ "use strict";
1465
+ }
1466
+ });
1467
+
1468
+ // src/index.ts
1469
+ var index_exports = {};
1470
+ __export(index_exports, {
1471
+ ContentDetection: () => ContentDetection,
1472
+ MarkdownFormatter: () => MarkdownFormatter,
1473
+ ProgressiveExtractor: () => ProgressiveExtractor,
1474
+ SelectorGenerator: () => SelectorGenerator,
1475
+ SmartDOMReader: () => SmartDOMReader,
1476
+ default: () => index_default
1477
+ });
1478
+ var SmartDOMReader, index_default;
1479
+ var init_index = __esm({
1480
+ "src/index.ts"() {
1481
+ init_content_detection();
1482
+ init_selectors();
1483
+ init_traversal();
1484
+ init_content_detection();
1485
+ init_markdown_formatter();
1486
+ init_progressive();
1487
+ init_selectors();
1488
+ init_types();
1489
+ SmartDOMReader = class _SmartDOMReader {
1490
+ options;
1491
+ constructor(options = {}) {
1492
+ this.options = {
1493
+ mode: options.mode || "interactive",
1494
+ maxDepth: options.maxDepth || 5,
1495
+ includeHidden: options.includeHidden || false,
1496
+ includeShadowDOM: options.includeShadowDOM || true,
1497
+ includeIframes: options.includeIframes || false,
1498
+ viewportOnly: options.viewportOnly || false,
1499
+ mainContentOnly: options.mainContentOnly || false,
1500
+ customSelectors: options.customSelectors || [],
1501
+ attributeTruncateLength: options.attributeTruncateLength,
1502
+ dataAttributeTruncateLength: options.dataAttributeTruncateLength,
1503
+ textTruncateLength: options.textTruncateLength,
1504
+ filter: options.filter
1505
+ };
1506
+ }
1507
+ /**
1508
+ * Main extraction method - extracts all data in one pass
1509
+ * @param rootElement The document or element to extract from
1510
+ * @param runtimeOptions Options to override constructor options
1511
+ */
1512
+ extract(rootElement = document, runtimeOptions) {
1513
+ const startTime = Date.now();
1514
+ const doc = rootElement instanceof Document ? rootElement : rootElement.ownerDocument;
1515
+ const options = { ...this.options, ...runtimeOptions };
1516
+ let container = rootElement instanceof Document ? doc : rootElement;
1517
+ if (options.mainContentOnly && rootElement instanceof Document) {
1518
+ container = ContentDetection.findMainContent(doc);
1519
+ }
1520
+ const pageState = this.extractPageState(doc);
1521
+ const landmarks = this.extractLandmarks(doc);
1522
+ const interactive = this.extractInteractiveElements(container, options);
1523
+ const result = {
1524
+ mode: options.mode,
1525
+ timestamp: startTime,
1526
+ page: pageState,
1527
+ landmarks,
1528
+ interactive
1529
+ };
1530
+ if (options.mode === "full") {
1531
+ result.semantic = this.extractSemanticElements(container, options);
1532
+ result.metadata = this.extractMetadata(doc, container, options);
1533
+ }
1534
+ return result;
1535
+ }
1536
+ /**
1537
+ * Extract page state information
1538
+ */
1539
+ extractPageState(doc) {
1540
+ return {
1541
+ url: doc.location?.href || "",
1542
+ title: doc.title || "",
1543
+ hasErrors: this.detectErrors(doc),
1544
+ isLoading: this.detectLoading(doc),
1545
+ hasModals: this.detectModals(doc),
1546
+ hasFocus: this.getFocusedElement(doc)
1547
+ };
1548
+ }
1549
+ /**
1550
+ * Extract page landmarks
1551
+ */
1552
+ extractLandmarks(doc) {
1553
+ const detected = ContentDetection.detectLandmarks(doc);
1554
+ return {
1555
+ navigation: this.elementsToSelectors(detected.navigation || []),
1556
+ main: this.elementsToSelectors(detected.main || []),
1557
+ forms: this.elementsToSelectors(detected.form || []),
1558
+ headers: this.elementsToSelectors(detected.banner || []),
1559
+ footers: this.elementsToSelectors(detected.contentinfo || []),
1560
+ articles: this.elementsToSelectors(detected.region || []),
1561
+ sections: this.elementsToSelectors(detected.region || [])
1562
+ };
1563
+ }
1564
+ /**
1565
+ * Convert elements to selector strings
1566
+ */
1567
+ elementsToSelectors(elements) {
1568
+ return elements.map((el) => SelectorGenerator.generateSelectors(el).css);
1569
+ }
1570
+ /**
1571
+ * Extract interactive elements
1572
+ */
1573
+ extractInteractiveElements(container, options) {
1574
+ const buttons = [];
1575
+ const links = [];
1576
+ const inputs = [];
1577
+ const clickable = [];
1578
+ const buttonElements = container.querySelectorAll(
1579
+ 'button, [role="button"], input[type="button"], input[type="submit"]'
1580
+ );
1581
+ buttonElements.forEach((el) => {
1582
+ if (this.shouldIncludeElement(el, options)) {
1583
+ const extracted = DOMTraversal.extractElement(el, options);
1584
+ if (extracted) buttons.push(extracted);
1585
+ }
1586
+ });
1587
+ const linkElements = container.querySelectorAll("a[href]");
1588
+ linkElements.forEach((el) => {
1589
+ if (this.shouldIncludeElement(el, options)) {
1590
+ const extracted = DOMTraversal.extractElement(el, options);
1591
+ if (extracted) links.push(extracted);
1592
+ }
1593
+ });
1594
+ const inputElements = container.querySelectorAll(
1595
+ 'input:not([type="button"]):not([type="submit"]), textarea, select'
1596
+ );
1597
+ inputElements.forEach((el) => {
1598
+ if (this.shouldIncludeElement(el, options)) {
1599
+ const extracted = DOMTraversal.extractElement(el, options);
1600
+ if (extracted) inputs.push(extracted);
1601
+ }
1602
+ });
1603
+ if (options.customSelectors) {
1604
+ options.customSelectors.forEach((selector) => {
1605
+ const elements = container.querySelectorAll(selector);
1606
+ elements.forEach((el) => {
1607
+ if (this.shouldIncludeElement(el, options)) {
1608
+ const extracted = DOMTraversal.extractElement(el, options);
1609
+ if (extracted) clickable.push(extracted);
1610
+ }
1611
+ });
1612
+ });
1613
+ }
1614
+ const forms = this.extractForms(container, options);
1615
+ return {
1616
+ buttons,
1617
+ links,
1618
+ inputs,
1619
+ forms,
1620
+ clickable
1621
+ };
1622
+ }
1623
+ /**
1624
+ * Extract form information
1625
+ */
1626
+ extractForms(container, options) {
1627
+ const forms = [];
1628
+ const formElements = container.querySelectorAll("form");
1629
+ formElements.forEach((form) => {
1630
+ if (!this.shouldIncludeElement(form, options)) return;
1631
+ const formInputs = [];
1632
+ const formButtons = [];
1633
+ const inputs = form.querySelectorAll(
1634
+ 'input:not([type="button"]):not([type="submit"]), textarea, select'
1635
+ );
1636
+ inputs.forEach((input) => {
1637
+ const extracted = DOMTraversal.extractElement(input, options);
1638
+ if (extracted) formInputs.push(extracted);
1639
+ });
1640
+ const buttons = form.querySelectorAll('button, input[type="button"], input[type="submit"]');
1641
+ buttons.forEach((button) => {
1642
+ const extracted = DOMTraversal.extractElement(button, options);
1643
+ if (extracted) formButtons.push(extracted);
1644
+ });
1645
+ forms.push({
1646
+ selector: SelectorGenerator.generateSelectors(form).css,
1647
+ action: form.getAttribute("action") || void 0,
1648
+ method: form.getAttribute("method") || void 0,
1649
+ inputs: formInputs,
1650
+ buttons: formButtons
1651
+ });
1652
+ });
1653
+ return forms;
1654
+ }
1655
+ /**
1656
+ * Extract semantic elements (full mode only)
1657
+ */
1658
+ extractSemanticElements(container, options) {
1659
+ const headings = [];
1660
+ const images = [];
1661
+ const tables = [];
1662
+ const lists = [];
1663
+ const articles = [];
1664
+ container.querySelectorAll("h1, h2, h3, h4, h5, h6").forEach((el) => {
1665
+ if (this.shouldIncludeElement(el, options)) {
1666
+ const extracted = DOMTraversal.extractElement(el, options);
1667
+ if (extracted) headings.push(extracted);
1668
+ }
1669
+ });
1670
+ container.querySelectorAll("img").forEach((el) => {
1671
+ if (this.shouldIncludeElement(el, options)) {
1672
+ const extracted = DOMTraversal.extractElement(el, options);
1673
+ if (extracted) images.push(extracted);
1674
+ }
1675
+ });
1676
+ container.querySelectorAll("table").forEach((el) => {
1677
+ if (this.shouldIncludeElement(el, options)) {
1678
+ const extracted = DOMTraversal.extractElement(el, options);
1679
+ if (extracted) tables.push(extracted);
1680
+ }
1681
+ });
1682
+ container.querySelectorAll("ul, ol").forEach((el) => {
1683
+ if (this.shouldIncludeElement(el, options)) {
1684
+ const extracted = DOMTraversal.extractElement(el, options);
1685
+ if (extracted) lists.push(extracted);
1686
+ }
1687
+ });
1688
+ container.querySelectorAll('article, [role="article"]').forEach((el) => {
1689
+ if (this.shouldIncludeElement(el, options)) {
1690
+ const extracted = DOMTraversal.extractElement(el, options);
1691
+ if (extracted) articles.push(extracted);
1692
+ }
1693
+ });
1694
+ return {
1695
+ headings,
1696
+ images,
1697
+ tables,
1698
+ lists,
1699
+ articles
1700
+ };
1701
+ }
1702
+ /**
1703
+ * Extract metadata
1704
+ */
1705
+ extractMetadata(doc, container, options) {
1706
+ const allElements = container.querySelectorAll("*");
1707
+ const extractedElements = container.querySelectorAll(
1708
+ "button, a, input, textarea, select, h1, h2, h3, h4, h5, h6, img, table, ul, ol, article"
1709
+ ).length;
1710
+ return {
1711
+ totalElements: allElements.length,
1712
+ extractedElements,
1713
+ mainContent: options.mainContentOnly && container instanceof Element ? SelectorGenerator.generateSelectors(container).css : void 0,
1714
+ language: doc.documentElement.getAttribute("lang") || void 0
1715
+ };
1716
+ }
1717
+ /**
1718
+ * Check if element should be included based on options
1719
+ */
1720
+ shouldIncludeElement(element, options) {
1721
+ if (!options.includeHidden && !DOMTraversal.isVisible(element)) {
1722
+ return false;
1723
+ }
1724
+ if (options.viewportOnly && !DOMTraversal.isInViewport(element)) {
1725
+ return false;
1726
+ }
1727
+ if (options.filter && !DOMTraversal.passesFilter(element, options.filter)) {
1728
+ return false;
1729
+ }
1730
+ return true;
1731
+ }
1732
+ /**
1733
+ * Detect errors on the page
1734
+ */
1735
+ detectErrors(doc) {
1736
+ const errorSelectors = [".error", ".alert-danger", '[role="alert"]', ".error-message"];
1737
+ return errorSelectors.some((sel) => {
1738
+ const element = doc.querySelector(sel);
1739
+ return element ? DOMTraversal.isVisible(element) : false;
1740
+ });
1741
+ }
1742
+ /**
1743
+ * Detect if page is loading
1744
+ */
1745
+ detectLoading(doc) {
1746
+ const loadingSelectors = [".loading", ".spinner", '[aria-busy="true"]', ".loader"];
1747
+ return loadingSelectors.some((sel) => {
1748
+ const element = doc.querySelector(sel);
1749
+ return element ? DOMTraversal.isVisible(element) : false;
1750
+ });
1751
+ }
1752
+ /**
1753
+ * Detect modal dialogs
1754
+ */
1755
+ detectModals(doc) {
1756
+ const modalSelectors = ['[role="dialog"]', ".modal", ".popup", ".overlay"];
1757
+ return modalSelectors.some((sel) => {
1758
+ const element = doc.querySelector(sel);
1759
+ return element ? DOMTraversal.isVisible(element) : false;
1760
+ });
1761
+ }
1762
+ /**
1763
+ * Get currently focused element
1764
+ */
1765
+ getFocusedElement(doc) {
1766
+ const focused = doc.activeElement;
1767
+ if (focused && focused !== doc.body) {
1768
+ return SelectorGenerator.generateSelectors(focused).css;
1769
+ }
1770
+ return void 0;
1771
+ }
1772
+ // ===== Static convenience methods =====
1773
+ /**
1774
+ * Quick extraction for interactive elements only
1775
+ * @param doc The document to extract from
1776
+ * @param options Extraction options
1777
+ */
1778
+ static extractInteractive(doc, options = {}) {
1779
+ const reader = new _SmartDOMReader({
1780
+ ...options,
1781
+ mode: "interactive"
1782
+ });
1783
+ return reader.extract(doc);
1784
+ }
1785
+ /**
1786
+ * Quick extraction for full content
1787
+ * @param doc The document to extract from
1788
+ * @param options Extraction options
1789
+ */
1790
+ static extractFull(doc, options = {}) {
1791
+ const reader = new _SmartDOMReader({
1792
+ ...options,
1793
+ mode: "full"
1794
+ });
1795
+ return reader.extract(doc);
1796
+ }
1797
+ /**
1798
+ * Extract from a specific element
1799
+ * @param element The element to extract from
1800
+ * @param mode The extraction mode
1801
+ * @param options Additional options
1802
+ */
1803
+ static extractFromElement(element, mode = "interactive", options = {}) {
1804
+ const reader = new _SmartDOMReader({
1805
+ ...options,
1806
+ mode
1807
+ });
1808
+ return reader.extract(element);
1809
+ }
1810
+ };
1811
+ index_default = SmartDOMReader;
1812
+ }
1813
+ });
1814
+ init_index();
1815
+ export {
1816
+ ContentDetection,
1817
+ MarkdownFormatter,
1818
+ ProgressiveExtractor,
1819
+ SelectorGenerator,
1820
+ SmartDOMReader,
1821
+ index_default as default
1822
+ };
1823
+ //# sourceMappingURL=index.js.map