btcp-browser-agent 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,11 @@
3
3
  *
4
4
  * Generates a flat accessibility snapshot of the DOM.
5
5
  * Produces a compact, AI-friendly list of interactive elements.
6
+ *
7
+ * Supports three modes:
8
+ * - 'interactive': Find clickable elements (default)
9
+ * - 'outline': Understand page structure with xpaths + metadata
10
+ * - 'content': Extract text content from sections
6
11
  */
7
12
  /**
8
13
  * Get HTML element constructors from window (works in both browser and jsdom)
@@ -573,11 +578,634 @@ function buildSemanticXPath(element) {
573
578
  }
574
579
  return '/' + parts.join('/');
575
580
  }
581
+ // Landmark roles that define page structure
582
+ const LANDMARK_ROLES = new Set([
583
+ 'banner', 'main', 'contentinfo', 'navigation', 'complementary', 'region'
584
+ ]);
585
+ /**
586
+ * Count words in text content
587
+ */
588
+ function countWords(text) {
589
+ return text.trim().split(/\s+/).filter(w => w.length > 0).length;
590
+ }
591
+ /**
592
+ * Get full text content with whitespace normalization
593
+ */
594
+ function getCleanTextContent(element, maxLength) {
595
+ const text = (element.textContent || '').replace(/\s+/g, ' ').trim();
596
+ if (maxLength && text.length > maxLength) {
597
+ return text.slice(0, maxLength - 3) + '...';
598
+ }
599
+ return text;
600
+ }
601
+ /**
602
+ * Count specific child elements
603
+ */
604
+ function countChildElements(element, tagNames) {
605
+ const tags = new Set(tagNames.map(t => t.toUpperCase()));
606
+ let count = 0;
607
+ const walk = (el) => {
608
+ if (tags.has(el.tagName))
609
+ count++;
610
+ for (const child of el.children)
611
+ walk(child);
612
+ };
613
+ walk(element);
614
+ return count;
615
+ }
616
+ /**
617
+ * Get list items as strings
618
+ */
619
+ function getListItems(element, maxItems = 10) {
620
+ const items = [];
621
+ const listItems = element.querySelectorAll('li');
622
+ for (let i = 0; i < Math.min(listItems.length, maxItems); i++) {
623
+ const text = getCleanTextContent(listItems[i], 100);
624
+ if (text)
625
+ items.push(text);
626
+ }
627
+ return items;
628
+ }
629
+ /**
630
+ * Detect code language from class or content
631
+ */
632
+ function detectCodeLanguage(element) {
633
+ // Check class names for language hints
634
+ const classes = element.className?.toString() || '';
635
+ const match = classes.match(/(?:language-|lang-)(\w+)/i);
636
+ if (match)
637
+ return match[1].toLowerCase();
638
+ // Check parent pre/code element
639
+ const parent = element.closest('pre, code');
640
+ if (parent && parent !== element) {
641
+ const parentClasses = parent.className?.toString() || '';
642
+ const parentMatch = parentClasses.match(/(?:language-|lang-)(\w+)/i);
643
+ if (parentMatch)
644
+ return parentMatch[1].toLowerCase();
645
+ }
646
+ return null;
647
+ }
648
+ /**
649
+ * Build metadata string for outline mode
650
+ */
651
+ function buildOutlineMetadata(element) {
652
+ const parts = [];
653
+ const wordCount = countWords(element.textContent || '');
654
+ if (wordCount > 0) {
655
+ parts.push(`${wordCount} words`);
656
+ }
657
+ // Count specific elements
658
+ const links = element.querySelectorAll('a[href]').length;
659
+ if (links > 0)
660
+ parts.push(`${links} links`);
661
+ const paragraphs = countChildElements(element, ['P']);
662
+ if (paragraphs > 1)
663
+ parts.push(`${paragraphs} paragraphs`);
664
+ const listItems = countChildElements(element, ['LI']);
665
+ if (listItems > 0)
666
+ parts.push(`${listItems} items`);
667
+ const codeBlocks = element.querySelectorAll('pre, code').length;
668
+ if (codeBlocks > 0)
669
+ parts.push(`${codeBlocks} code`);
670
+ return parts.length > 0 ? `[${parts.join(', ')}]` : '';
671
+ }
672
+ /**
673
+ * Get section name/label from element
674
+ */
675
+ function getSectionName(element) {
676
+ // Try aria-label first
677
+ const ariaLabel = element.getAttribute('aria-label');
678
+ if (ariaLabel)
679
+ return ariaLabel.trim();
680
+ // Try aria-labelledby
681
+ const labelledBy = element.getAttribute('aria-labelledby');
682
+ if (labelledBy) {
683
+ const labels = labelledBy
684
+ .split(/\s+/)
685
+ .map(id => element.ownerDocument.getElementById(id)?.textContent?.trim())
686
+ .filter(Boolean);
687
+ if (labels.length)
688
+ return labels.join(' ');
689
+ }
690
+ // Try id or class for name hint
691
+ const id = element.id;
692
+ if (id && id.length < 30 && !/^\d/.test(id)) {
693
+ return id.replace(/[-_]/g, ' ');
694
+ }
695
+ const semanticClass = getSemanticClass(element);
696
+ if (semanticClass) {
697
+ return semanticClass.replace(/[-_]/g, ' ');
698
+ }
699
+ // Try first heading inside
700
+ const heading = element.querySelector('h1, h2, h3, h4, h5, h6');
701
+ if (heading) {
702
+ return getCleanTextContent(heading, 50);
703
+ }
704
+ return '';
705
+ }
706
+ /**
707
+ * Create outline snapshot - structural overview with metadata
708
+ */
709
+ function createOutlineSnapshot(document, refMap, options) {
710
+ const { root = document.body, maxDepth = 50, includeHidden = false, grep: grepPattern } = options;
711
+ refMap.clear();
712
+ const win = document.defaultView || window;
713
+ const refs = {};
714
+ const lines = [];
715
+ let refCounter = 0;
716
+ // Stats for header
717
+ let landmarkCount = 0;
718
+ let sectionCount = 0;
719
+ let headingCount = 0;
720
+ let totalWords = 0;
721
+ // Recursive function to build outline with indentation
722
+ function buildOutline(element, depth, indent) {
723
+ if (depth > maxDepth)
724
+ return;
725
+ if (!includeHidden && !isVisible(element, false))
726
+ return;
727
+ const role = getRole(element);
728
+ const tagName = element.tagName;
729
+ // Track stats
730
+ if (role?.startsWith('heading'))
731
+ headingCount++;
732
+ if (LANDMARK_ROLES.has(role || ''))
733
+ landmarkCount++;
734
+ // Determine if this element should be in the outline
735
+ let shouldInclude = false;
736
+ let line = '';
737
+ const indentStr = ' '.repeat(indent);
738
+ // Landmarks (MAIN, BANNER, etc.)
739
+ if (role && LANDMARK_ROLES.has(role)) {
740
+ shouldInclude = true;
741
+ const roleUpper = role.toUpperCase();
742
+ const name = getSectionName(element);
743
+ const metadata = buildOutlineMetadata(element);
744
+ const xpath = buildSemanticXPath(element);
745
+ // Generate ref for landmarks
746
+ const ref = `@ref:${refCounter++}`;
747
+ refMap.set(ref, element);
748
+ refs[ref] = {
749
+ selector: generateSelector(element),
750
+ role: role,
751
+ name: name || undefined
752
+ };
753
+ line = `${indentStr}${roleUpper}`;
754
+ if (name)
755
+ line += ` "${truncateByType(name, 'ELEMENT_NAME')}"`;
756
+ line += ` ${ref}`;
757
+ if (metadata)
758
+ line += ` ${metadata}`;
759
+ line += ` ${xpath}`;
760
+ sectionCount++;
761
+ }
762
+ // Headings
763
+ else if (role?.startsWith('heading')) {
764
+ shouldInclude = true;
765
+ const level = tagName[1];
766
+ const text = getCleanTextContent(element, 60);
767
+ const xpath = buildSemanticXPath(element);
768
+ line = `${indentStr}HEADING level=${level}`;
769
+ if (text)
770
+ line += ` "${text}"`;
771
+ line += ` ${xpath}`;
772
+ }
773
+ // Articles and named sections/regions
774
+ else if (tagName === 'ARTICLE' || (tagName === 'SECTION' && (element.id || element.getAttribute('aria-label')))) {
775
+ shouldInclude = true;
776
+ const roleUpper = tagName === 'ARTICLE' ? 'ARTICLE' : 'REGION';
777
+ const name = getSectionName(element);
778
+ const metadata = buildOutlineMetadata(element);
779
+ const xpath = buildSemanticXPath(element);
780
+ // Generate ref
781
+ const ref = `@ref:${refCounter++}`;
782
+ refMap.set(ref, element);
783
+ refs[ref] = {
784
+ selector: generateSelector(element),
785
+ role: roleUpper.toLowerCase(),
786
+ name: name || undefined
787
+ };
788
+ line = `${indentStr}${roleUpper}`;
789
+ if (name)
790
+ line += ` "${truncateByType(name, 'ELEMENT_NAME')}"`;
791
+ line += ` ${ref}`;
792
+ if (metadata)
793
+ line += ` ${metadata}`;
794
+ line += ` ${xpath}`;
795
+ sectionCount++;
796
+ }
797
+ // Divs with semantic id/class that contain substantial content
798
+ else if (tagName === 'DIV' && (element.id || getSemanticClass(element))) {
799
+ const wordCount = countWords(element.textContent || '');
800
+ if (wordCount > 50) {
801
+ shouldInclude = true;
802
+ const name = getSectionName(element);
803
+ const metadata = buildOutlineMetadata(element);
804
+ const xpath = buildSemanticXPath(element);
805
+ const ref = `@ref:${refCounter++}`;
806
+ refMap.set(ref, element);
807
+ refs[ref] = {
808
+ selector: generateSelector(element),
809
+ role: 'region',
810
+ name: name || undefined
811
+ };
812
+ line = `${indentStr}REGION`;
813
+ if (name)
814
+ line += ` "${truncateByType(name, 'ELEMENT_NAME')}"`;
815
+ line += ` ${ref}`;
816
+ if (metadata)
817
+ line += ` ${metadata}`;
818
+ line += ` ${xpath}`;
819
+ sectionCount++;
820
+ }
821
+ }
822
+ // Paragraph counts (grouped under parent)
823
+ else if (tagName === 'P' && depth > 0) {
824
+ // Don't include individual paragraphs, they're counted in metadata
825
+ }
826
+ // Lists
827
+ else if (tagName === 'UL' || tagName === 'OL') {
828
+ const items = element.querySelectorAll(':scope > li').length;
829
+ if (items > 0) {
830
+ shouldInclude = true;
831
+ const xpath = buildSemanticXPath(element);
832
+ line = `${indentStr}LIST [${items} items] ${xpath}`;
833
+ }
834
+ }
835
+ // Code blocks
836
+ else if (tagName === 'PRE') {
837
+ shouldInclude = true;
838
+ const lang = detectCodeLanguage(element);
839
+ const lineCount = (element.textContent || '').split('\n').length;
840
+ const xpath = buildSemanticXPath(element);
841
+ line = `${indentStr}CODE`;
842
+ if (lang)
843
+ line += ` [${lang}]`;
844
+ line += ` [${lineCount} lines]`;
845
+ line += ` ${xpath}`;
846
+ }
847
+ if (shouldInclude && line) {
848
+ lines.push(line);
849
+ }
850
+ // Recurse into children (increase indent if we included this element)
851
+ const nextIndent = shouldInclude ? indent + 1 : indent;
852
+ for (const child of element.children) {
853
+ buildOutline(child, depth + 1, nextIndent);
854
+ }
855
+ }
856
+ buildOutline(root, 0, 0);
857
+ // Calculate total words
858
+ totalWords = countWords(root.textContent || '');
859
+ // Apply grep filter
860
+ let filteredLines = lines;
861
+ let grepDisplayPattern = '';
862
+ if (grepPattern) {
863
+ const grepOpts = typeof grepPattern === 'string'
864
+ ? { pattern: grepPattern }
865
+ : grepPattern;
866
+ const { pattern, ignoreCase = false, invert = false, fixedStrings = false } = grepOpts;
867
+ grepDisplayPattern = pattern;
868
+ let regexPattern = fixedStrings
869
+ ? pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
870
+ : pattern;
871
+ const flags = ignoreCase ? 'i' : '';
872
+ try {
873
+ const regex = new RegExp(regexPattern, flags);
874
+ filteredLines = lines.filter(line => {
875
+ const matches = regex.test(line);
876
+ return invert ? !matches : matches;
877
+ });
878
+ }
879
+ catch {
880
+ filteredLines = lines.filter(line => {
881
+ const matches = ignoreCase
882
+ ? line.toLowerCase().includes(pattern.toLowerCase())
883
+ : line.includes(pattern);
884
+ return invert ? !matches : matches;
885
+ });
886
+ }
887
+ }
888
+ // Build headers
889
+ const pageHeader = `PAGE: ${document.location?.href || 'about:blank'} | ${document.title || 'Untitled'} | viewport=${win.innerWidth}x${win.innerHeight}`;
890
+ let outlineHeader = `OUTLINE: landmarks=${landmarkCount} sections=${sectionCount} headings=${headingCount} words=${totalWords}`;
891
+ if (grepPattern) {
892
+ outlineHeader += ` grep=${grepDisplayPattern} matches=${filteredLines.length}`;
893
+ }
894
+ const output = [pageHeader, outlineHeader, '', ...filteredLines].join('\n');
895
+ return {
896
+ tree: output,
897
+ refs,
898
+ metadata: {
899
+ totalInteractiveElements: sectionCount,
900
+ capturedElements: refCounter,
901
+ quality: 'high'
902
+ }
903
+ };
904
+ }
905
+ /**
906
+ * Create content snapshot - extract text content from sections
907
+ */
908
+ function createContentSnapshot(document, refMap, options) {
909
+ const { root = document.body, maxDepth = 50, includeHidden = false, format = 'tree', grep: grepPattern, maxLength = 2000, includeLinks = true, includeImages = false } = options;
910
+ refMap.clear();
911
+ const refs = {};
912
+ let refCounter = 0;
913
+ // Collect content sections based on grep pattern
914
+ const sections = [];
915
+ function collectSections(element, depth) {
916
+ if (depth > maxDepth)
917
+ return;
918
+ if (!includeHidden && !isVisible(element, false))
919
+ return;
920
+ const xpath = buildSemanticXPath(element);
921
+ const role = getRole(element);
922
+ const tagName = element.tagName;
923
+ // Check if this element should be a section
924
+ let isSection = false;
925
+ // Landmarks and articles are sections
926
+ if (role && (LANDMARK_ROLES.has(role) || role === 'article')) {
927
+ isSection = true;
928
+ }
929
+ // Named sections/regions
930
+ else if (tagName === 'SECTION' && (element.id || element.getAttribute('aria-label'))) {
931
+ isSection = true;
932
+ }
933
+ // Semantic divs with substantial content
934
+ else if (tagName === 'DIV' && (element.id || getSemanticClass(element))) {
935
+ const wordCount = countWords(element.textContent || '');
936
+ if (wordCount > 30)
937
+ isSection = true;
938
+ }
939
+ if (isSection) {
940
+ // Get first heading in section
941
+ const heading = element.querySelector('h1, h2, h3, h4, h5, h6');
942
+ sections.push({
943
+ xpath,
944
+ element,
945
+ heading: heading ? getCleanTextContent(heading, 100) : undefined,
946
+ headingLevel: heading ? parseInt(heading.tagName[1]) : undefined
947
+ });
948
+ }
949
+ // Recurse into children
950
+ for (const child of element.children) {
951
+ collectSections(child, depth + 1);
952
+ }
953
+ }
954
+ collectSections(root, 0);
955
+ // Filter sections by grep pattern (matches xpath)
956
+ let filteredSections = sections;
957
+ let grepDisplayPattern = '';
958
+ if (grepPattern) {
959
+ const grepOpts = typeof grepPattern === 'string'
960
+ ? { pattern: grepPattern }
961
+ : grepPattern;
962
+ const { pattern, ignoreCase = false, invert = false, fixedStrings = false } = grepOpts;
963
+ grepDisplayPattern = pattern;
964
+ let regexPattern = fixedStrings
965
+ ? pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
966
+ : pattern;
967
+ const flags = ignoreCase ? 'i' : '';
968
+ try {
969
+ const regex = new RegExp(regexPattern, flags);
970
+ filteredSections = sections.filter(section => {
971
+ const matches = regex.test(section.xpath);
972
+ return invert ? !matches : matches;
973
+ });
974
+ }
975
+ catch {
976
+ filteredSections = sections.filter(section => {
977
+ const matches = ignoreCase
978
+ ? section.xpath.toLowerCase().includes(pattern.toLowerCase())
979
+ : section.xpath.includes(pattern);
980
+ return invert ? !matches : matches;
981
+ });
982
+ }
983
+ }
984
+ // Generate output based on format
985
+ if (format === 'markdown') {
986
+ return generateMarkdownContent(document, filteredSections, refs, refMap, refCounter, {
987
+ maxLength,
988
+ includeLinks,
989
+ includeImages
990
+ });
991
+ }
992
+ // Tree format (default for content mode)
993
+ const lines = [];
994
+ let totalWords = 0;
995
+ for (const section of filteredSections) {
996
+ const sectionWords = countWords(section.element.textContent || '');
997
+ totalWords += sectionWords;
998
+ lines.push(`SECTION ${section.xpath} [${sectionWords} words]`);
999
+ // Extract content from section
1000
+ extractContentLines(section.element, lines, ' ', maxLength, refMap, refs, refCounter);
1001
+ lines.push('');
1002
+ }
1003
+ // Build headers
1004
+ const pageHeader = `PAGE: ${document.location?.href || 'about:blank'} | ${document.title || 'Untitled'}`;
1005
+ let contentHeader = `CONTENT: sections=${filteredSections.length} words=${totalWords}`;
1006
+ if (grepPattern) {
1007
+ contentHeader += ` grep=${grepDisplayPattern}`;
1008
+ }
1009
+ const output = [pageHeader, contentHeader, '', ...lines].join('\n');
1010
+ return {
1011
+ tree: output,
1012
+ refs,
1013
+ metadata: {
1014
+ totalInteractiveElements: filteredSections.length,
1015
+ capturedElements: Object.keys(refs).length,
1016
+ quality: 'high'
1017
+ }
1018
+ };
1019
+ }
1020
+ /**
1021
+ * Extract content lines from an element (for tree format)
1022
+ */
1023
+ function extractContentLines(element, lines, indent, maxLength, refMap, refs, refCounter) {
1024
+ const tagName = element.tagName;
1025
+ const role = getRole(element);
1026
+ // Headings
1027
+ if (role?.startsWith('heading')) {
1028
+ const level = tagName[1];
1029
+ const text = getCleanTextContent(element, 100);
1030
+ lines.push(`${indent}HEADING level=${level} "${text}"`);
1031
+ return;
1032
+ }
1033
+ // Paragraphs
1034
+ if (tagName === 'P') {
1035
+ const text = getCleanTextContent(element, maxLength);
1036
+ if (text) {
1037
+ lines.push(`${indent}TEXT "${text}"`);
1038
+ }
1039
+ return;
1040
+ }
1041
+ // Lists
1042
+ if (tagName === 'UL' || tagName === 'OL') {
1043
+ const items = getListItems(element, 10);
1044
+ if (items.length > 0) {
1045
+ lines.push(`${indent}LIST [${items.length} items]`);
1046
+ for (const item of items) {
1047
+ lines.push(`${indent} - "${item}"`);
1048
+ }
1049
+ }
1050
+ return;
1051
+ }
1052
+ // Code blocks
1053
+ if (tagName === 'PRE') {
1054
+ const lang = detectCodeLanguage(element);
1055
+ const code = (element.textContent || '').trim();
1056
+ const codeLines = code.split('\n');
1057
+ const preview = codeLines.slice(0, 5).join('\n');
1058
+ let line = `${indent}CODE`;
1059
+ if (lang)
1060
+ line += ` [${lang}, ${codeLines.length} lines]`;
1061
+ else
1062
+ line += ` [${codeLines.length} lines]`;
1063
+ lines.push(line);
1064
+ // Add preview of code
1065
+ for (const codeLine of preview.split('\n')) {
1066
+ lines.push(`${indent} ${codeLine}`);
1067
+ }
1068
+ if (codeLines.length > 5) {
1069
+ lines.push(`${indent} ...`);
1070
+ }
1071
+ return;
1072
+ }
1073
+ // Recurse into other elements
1074
+ for (const child of element.children) {
1075
+ extractContentLines(child, lines, indent, maxLength, refMap, refs, refCounter);
1076
+ }
1077
+ }
1078
+ /**
1079
+ * Generate markdown content output
1080
+ */
1081
+ function generateMarkdownContent(document, sections, refs, refMap, refCounter, options) {
1082
+ const { maxLength, includeLinks, includeImages } = options;
1083
+ const lines = [];
1084
+ let totalWords = 0;
1085
+ // Source comment
1086
+ lines.push(`<!-- source: ${document.location?.href || 'about:blank'} -->`);
1087
+ for (const section of sections) {
1088
+ const sectionWords = countWords(section.element.textContent || '');
1089
+ totalWords += sectionWords;
1090
+ // Section xpath comment
1091
+ lines.push(`<!-- xpath: ${section.xpath} -->`);
1092
+ lines.push('');
1093
+ // Extract markdown content
1094
+ extractMarkdownContent(section.element, lines, maxLength, includeLinks, includeImages, refMap, refs, refCounter);
1095
+ lines.push('');
1096
+ }
1097
+ // End comment
1098
+ lines.push(`<!-- end: ${totalWords} words extracted -->`);
1099
+ const output = lines.join('\n');
1100
+ return {
1101
+ tree: output,
1102
+ refs,
1103
+ metadata: {
1104
+ totalInteractiveElements: sections.length,
1105
+ capturedElements: Object.keys(refs).length,
1106
+ quality: 'high'
1107
+ }
1108
+ };
1109
+ }
1110
+ /**
1111
+ * Extract markdown content from element
1112
+ */
1113
+ function extractMarkdownContent(element, lines, maxLength, includeLinks, includeImages, refMap, refs, refCounter) {
1114
+ const tagName = element.tagName;
1115
+ const role = getRole(element);
1116
+ // Headings
1117
+ if (role?.startsWith('heading')) {
1118
+ const level = parseInt(tagName[1]);
1119
+ const text = getCleanTextContent(element, 100);
1120
+ const prefix = '#'.repeat(level);
1121
+ lines.push(`${prefix} ${text}`);
1122
+ lines.push('');
1123
+ return;
1124
+ }
1125
+ // Paragraphs
1126
+ if (tagName === 'P') {
1127
+ const text = getCleanTextContent(element, maxLength);
1128
+ if (text) {
1129
+ lines.push(text);
1130
+ lines.push('');
1131
+ }
1132
+ return;
1133
+ }
1134
+ // Lists
1135
+ if (tagName === 'UL') {
1136
+ const items = element.querySelectorAll(':scope > li');
1137
+ for (const item of items) {
1138
+ const text = getCleanTextContent(item, 200);
1139
+ if (text)
1140
+ lines.push(`- ${text}`);
1141
+ }
1142
+ lines.push('');
1143
+ return;
1144
+ }
1145
+ if (tagName === 'OL') {
1146
+ const items = element.querySelectorAll(':scope > li');
1147
+ let i = 1;
1148
+ for (const item of items) {
1149
+ const text = getCleanTextContent(item, 200);
1150
+ if (text)
1151
+ lines.push(`${i}. ${text}`);
1152
+ i++;
1153
+ }
1154
+ lines.push('');
1155
+ return;
1156
+ }
1157
+ // Code blocks
1158
+ if (tagName === 'PRE') {
1159
+ const lang = detectCodeLanguage(element) || '';
1160
+ const code = (element.textContent || '').trim();
1161
+ lines.push('```' + lang);
1162
+ lines.push(code);
1163
+ lines.push('```');
1164
+ lines.push('');
1165
+ return;
1166
+ }
1167
+ // Blockquotes
1168
+ if (tagName === 'BLOCKQUOTE') {
1169
+ const text = getCleanTextContent(element, maxLength);
1170
+ if (text) {
1171
+ const quotedLines = text.split('\n').map(l => `> ${l}`);
1172
+ lines.push(...quotedLines);
1173
+ lines.push('');
1174
+ }
1175
+ return;
1176
+ }
1177
+ // Images (if requested)
1178
+ if (includeImages && tagName === 'IMG') {
1179
+ const alt = element.getAttribute('alt') || 'image';
1180
+ const src = element.getAttribute('src') || '';
1181
+ lines.push(`![${alt}](${src})`);
1182
+ lines.push('');
1183
+ return;
1184
+ }
1185
+ // Recurse into other elements
1186
+ for (const child of element.children) {
1187
+ extractMarkdownContent(child, lines, maxLength, includeLinks, includeImages, refMap, refs, refCounter);
1188
+ }
1189
+ }
576
1190
  /**
577
1191
  * Generate flat snapshot of the DOM
1192
+ *
1193
+ * Supports three modes:
1194
+ * - 'interactive' (default): Find clickable elements with @ref markers
1195
+ * - 'outline': Structural overview with xpaths and metadata
1196
+ * - 'content': Extract text content from sections
578
1197
  */
579
1198
  export function createSnapshot(document, refMap, options = {}) {
580
- const { root = document.body, maxDepth = 50, includeHidden = false, interactive = true, all = false, format = 'tree', grep: grepPattern } = options;
1199
+ const { root = document.body, maxDepth = 50, includeHidden = false, mode = 'interactive', format = 'tree', grep: grepPattern } = options;
1200
+ // Dispatch based on mode
1201
+ const effectiveMode = mode;
1202
+ if (effectiveMode === 'outline') {
1203
+ return createOutlineSnapshot(document, refMap, { ...options, root });
1204
+ }
1205
+ if (effectiveMode === 'content') {
1206
+ return createContentSnapshot(document, refMap, { ...options, root });
1207
+ }
1208
+ // Default: interactive mode (original behavior)
581
1209
  // Fast path for HTML format - return raw body HTML without processing
582
1210
  if (format === 'html') {
583
1211
  const bodyHTML = document.body?.outerHTML || '';
@@ -619,11 +1247,11 @@ export function createSnapshot(document, refMap, options = {}) {
619
1247
  const isInteractiveElement = isInteractive(element);
620
1248
  if (isInteractiveElement)
621
1249
  totalInteractive++;
622
- // Skip non-interactive in interactive mode
623
- if (interactive && !isInteractiveElement)
1250
+ // Skip non-interactive elements in interactive mode
1251
+ if (!isInteractiveElement)
624
1252
  continue;
625
- // Skip elements without role in non-all mode
626
- if (!all && !role)
1253
+ // Skip elements without role
1254
+ if (!role)
627
1255
  continue;
628
1256
  const name = getAccessibleName(element);
629
1257
  // Build line