afdocs 0.3.4 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/README.md +53 -30
  2. package/dist/checks/agent-discoverability/llms-txt-directive.js +206 -5
  3. package/dist/checks/agent-discoverability/llms-txt-directive.js.map +1 -1
  4. package/dist/checks/authentication/auth-alternative-access.js +109 -6
  5. package/dist/checks/authentication/auth-alternative-access.js.map +1 -1
  6. package/dist/checks/authentication/auth-gate-detection.js +8 -3
  7. package/dist/checks/authentication/auth-gate-detection.js.map +1 -1
  8. package/dist/checks/content-structure/markdown-code-fence-validity.js +1 -1
  9. package/dist/checks/content-structure/markdown-code-fence-validity.js.map +1 -1
  10. package/dist/checks/content-structure/section-header-quality.js +240 -6
  11. package/dist/checks/content-structure/section-header-quality.js.map +1 -1
  12. package/dist/checks/content-structure/tabbed-content-serialization.js +200 -5
  13. package/dist/checks/content-structure/tabbed-content-serialization.js.map +1 -1
  14. package/dist/checks/index.d.ts +1 -0
  15. package/dist/checks/index.d.ts.map +1 -1
  16. package/dist/checks/index.js +1 -0
  17. package/dist/checks/index.js.map +1 -1
  18. package/dist/checks/llms-txt/llms-txt-exists.js +17 -10
  19. package/dist/checks/llms-txt/llms-txt-exists.js.map +1 -1
  20. package/dist/checks/observability/llms-txt-freshness.d.ts +24 -1
  21. package/dist/checks/observability/llms-txt-freshness.d.ts.map +1 -1
  22. package/dist/checks/observability/llms-txt-freshness.js +391 -5
  23. package/dist/checks/observability/llms-txt-freshness.js.map +1 -1
  24. package/dist/checks/observability/markdown-content-parity.js +599 -5
  25. package/dist/checks/observability/markdown-content-parity.js.map +1 -1
  26. package/dist/checks/page-size/content-start-position.js +3 -7
  27. package/dist/checks/page-size/content-start-position.js.map +1 -1
  28. package/dist/checks/page-size/page-size-html.js +4 -8
  29. package/dist/checks/page-size/page-size-html.js.map +1 -1
  30. package/dist/checks/page-size/rendering-strategy.d.ts +2 -0
  31. package/dist/checks/page-size/rendering-strategy.d.ts.map +1 -0
  32. package/dist/checks/page-size/rendering-strategy.js +154 -0
  33. package/dist/checks/page-size/rendering-strategy.js.map +1 -0
  34. package/dist/checks/url-stability/redirect-behavior.js +127 -5
  35. package/dist/checks/url-stability/redirect-behavior.js.map +1 -1
  36. package/dist/cli/commands/check.d.ts.map +1 -1
  37. package/dist/cli/commands/check.js +9 -0
  38. package/dist/cli/commands/check.js.map +1 -1
  39. package/dist/cli/formatters/text.d.ts.map +1 -1
  40. package/dist/cli/formatters/text.js +13 -3
  41. package/dist/cli/formatters/text.js.map +1 -1
  42. package/dist/cli/index.d.ts.map +1 -1
  43. package/dist/cli/index.js +4 -1
  44. package/dist/cli/index.js.map +1 -1
  45. package/dist/constants.d.ts.map +1 -1
  46. package/dist/constants.js +1 -0
  47. package/dist/constants.js.map +1 -1
  48. package/dist/helpers/detect-rendering.d.ts +31 -0
  49. package/dist/helpers/detect-rendering.d.ts.map +1 -0
  50. package/dist/helpers/detect-rendering.js +85 -0
  51. package/dist/helpers/detect-rendering.js.map +1 -0
  52. package/dist/helpers/detect-tabs.d.ts +12 -0
  53. package/dist/helpers/detect-tabs.d.ts.map +1 -0
  54. package/dist/helpers/detect-tabs.js +309 -0
  55. package/dist/helpers/detect-tabs.js.map +1 -0
  56. package/dist/helpers/fetch-page.d.ts +8 -0
  57. package/dist/helpers/fetch-page.d.ts.map +1 -0
  58. package/dist/helpers/fetch-page.js +20 -0
  59. package/dist/helpers/fetch-page.js.map +1 -0
  60. package/dist/helpers/get-page-urls.d.ts +9 -0
  61. package/dist/helpers/get-page-urls.d.ts.map +1 -1
  62. package/dist/helpers/get-page-urls.js +153 -21
  63. package/dist/helpers/get-page-urls.js.map +1 -1
  64. package/dist/helpers/index.d.ts +5 -0
  65. package/dist/helpers/index.d.ts.map +1 -1
  66. package/dist/helpers/index.js +3 -0
  67. package/dist/helpers/index.js.map +1 -1
  68. package/dist/helpers/to-md-urls.d.ts +4 -0
  69. package/dist/helpers/to-md-urls.d.ts.map +1 -1
  70. package/dist/helpers/to-md-urls.js +13 -0
  71. package/dist/helpers/to-md-urls.js.map +1 -1
  72. package/dist/runner.d.ts.map +1 -1
  73. package/dist/runner.js +1 -0
  74. package/dist/runner.js.map +1 -1
  75. package/dist/types.d.ts +19 -0
  76. package/dist/types.d.ts.map +1 -1
  77. package/package.json +2 -1
@@ -0,0 +1,31 @@
1
+ export interface RenderingAnalysis {
2
+ /** Whether the page appears to be server-rendered with real content. */
3
+ hasContent: boolean;
4
+ /** Whether known SPA framework markers were found. */
5
+ hasSpaMarkers: boolean;
6
+ /** Which SPA marker was found, if any. */
7
+ spaMarker: string | null;
8
+ /** Number of content headings found (excluding nav-only headings). */
9
+ contentHeadings: number;
10
+ /** Number of paragraphs with substantial prose (>30 chars). */
11
+ contentParagraphs: number;
12
+ /** Number of code blocks found. */
13
+ codeBlocks: number;
14
+ /** Whether a <main> or [role="main"] element with children exists. */
15
+ hasMainContent: boolean;
16
+ /** Visible text length after stripping script/style/noscript. */
17
+ visibleTextLength: number;
18
+ /** Total HTML length. */
19
+ htmlLength: number;
20
+ }
21
+ /**
22
+ * Analyze whether an HTML page contains server-rendered content or is
23
+ * a client-side-rendered SPA shell.
24
+ *
25
+ * Unlike a simple text-ratio heuristic, this checks for concrete content
26
+ * signals: headings, paragraphs with prose, code blocks, and main content
27
+ * regions. SSR sites with heavy bundled assets (low text ratio but real
28
+ * content) will pass; true SPA shells (framework marker + no content) will fail.
29
+ */
30
+ export declare function analyzeRendering(html: string): RenderingAnalysis;
31
+ //# sourceMappingURL=detect-rendering.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"detect-rendering.d.ts","sourceRoot":"","sources":["../../src/helpers/detect-rendering.ts"],"names":[],"mappings":"AAIA,MAAM,WAAW,iBAAiB;IAChC,wEAAwE;IACxE,UAAU,EAAE,OAAO,CAAC;IACpB,sDAAsD;IACtD,aAAa,EAAE,OAAO,CAAC;IACvB,0CAA0C;IAC1C,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,sEAAsE;IACtE,eAAe,EAAE,MAAM,CAAC;IACxB,+DAA+D;IAC/D,iBAAiB,EAAE,MAAM,CAAC;IAC1B,mCAAmC;IACnC,UAAU,EAAE,MAAM,CAAC;IACnB,sEAAsE;IACtE,cAAc,EAAE,OAAO,CAAC;IACxB,iEAAiE;IACjE,iBAAiB,EAAE,MAAM,CAAC;IAC1B,yBAAyB;IACzB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED;;;;;;;;GAQG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG,iBAAiB,CAgFhE"}
@@ -0,0 +1,85 @@
1
+ import { parse } from 'node-html-parser';
2
+ const SPA_MARKERS = ['id="___gatsby"', 'id="__next"', 'id="__nuxt"', 'id="root"'];
3
+ /**
4
+ * Analyze whether an HTML page contains server-rendered content or is
5
+ * a client-side-rendered SPA shell.
6
+ *
7
+ * Unlike a simple text-ratio heuristic, this checks for concrete content
8
+ * signals: headings, paragraphs with prose, code blocks, and main content
9
+ * regions. SSR sites with heavy bundled assets (low text ratio but real
10
+ * content) will pass; true SPA shells (framework marker + no content) will fail.
11
+ */
12
+ export function analyzeRendering(html) {
13
+ const htmlLength = html.length;
14
+ // Check for SPA framework markers
15
+ let spaMarker = null;
16
+ for (const marker of SPA_MARKERS) {
17
+ if (html.includes(marker)) {
18
+ spaMarker = marker;
19
+ break;
20
+ }
21
+ }
22
+ const hasSpaMarkers = spaMarker !== null;
23
+ // Parse and strip non-content elements
24
+ const root = parse(html);
25
+ const body = root.querySelector('body') ?? root;
26
+ for (const el of body.querySelectorAll('script, style, noscript, svg')) {
27
+ el.remove();
28
+ }
29
+ // Visible text
30
+ const visibleText = body.textContent.replace(/\s+/g, ' ').trim();
31
+ const visibleTextLength = visibleText.length;
32
+ // Content signals: headings with substantive text
33
+ const headings = body.querySelectorAll('h1, h2, h3, h4, h5, h6');
34
+ let contentHeadings = 0;
35
+ for (const h of headings) {
36
+ const text = h.textContent.trim();
37
+ // Skip very short headings that are likely nav labels
38
+ if (text.length > 3)
39
+ contentHeadings++;
40
+ }
41
+ // Content signals: paragraphs with prose
42
+ const paragraphs = body.querySelectorAll('p');
43
+ let contentParagraphs = 0;
44
+ for (const p of paragraphs) {
45
+ const text = p.textContent.trim();
46
+ if (text.length > 30)
47
+ contentParagraphs++;
48
+ }
49
+ // Content signals: code blocks
50
+ const codeBlocks = body.querySelectorAll('pre, code').length;
51
+ // Content signals: main content region with substantive content inside it.
52
+ // An SPA shell can have a <main> element with just a page title and breadcrumbs,
53
+ // so we check for real content (paragraphs, code) inside <main> specifically.
54
+ const main = body.querySelector('main, [role="main"]');
55
+ let hasMainContent = false;
56
+ if (main) {
57
+ const mainParas = main.querySelectorAll('p');
58
+ let mainParagraphs = 0;
59
+ for (const p of mainParas) {
60
+ if (p.textContent.trim().length > 30)
61
+ mainParagraphs++;
62
+ }
63
+ const mainCode = main.querySelectorAll('pre, code').length;
64
+ hasMainContent = mainParagraphs >= 2 || mainCode >= 1;
65
+ }
66
+ // Determine if the page has real content
67
+ // A page has content if it has enough content signals, regardless of text ratio
68
+ const hasContent = contentHeadings >= 3 ||
69
+ contentParagraphs >= 5 ||
70
+ (hasMainContent && contentHeadings >= 1) ||
71
+ codeBlocks >= 3 ||
72
+ !hasSpaMarkers; // No SPA markers = traditional server-rendered, assume content
73
+ return {
74
+ hasContent,
75
+ hasSpaMarkers,
76
+ spaMarker,
77
+ contentHeadings,
78
+ contentParagraphs,
79
+ codeBlocks,
80
+ hasMainContent,
81
+ visibleTextLength,
82
+ htmlLength,
83
+ };
84
+ }
85
+ //# sourceMappingURL=detect-rendering.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"detect-rendering.js","sourceRoot":"","sources":["../../src/helpers/detect-rendering.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAEzC,MAAM,WAAW,GAAG,CAAC,gBAAgB,EAAE,aAAa,EAAE,aAAa,EAAE,WAAW,CAAC,CAAC;AAuBlF;;;;;;;;GAQG;AACH,MAAM,UAAU,gBAAgB,CAAC,IAAY;IAC3C,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC;IAE/B,kCAAkC;IAClC,IAAI,SAAS,GAAkB,IAAI,CAAC;IACpC,KAAK,MAAM,MAAM,IAAI,WAAW,EAAE,CAAC;QACjC,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1B,SAAS,GAAG,MAAM,CAAC;YACnB,MAAM;QACR,CAAC;IACH,CAAC;IACD,MAAM,aAAa,GAAG,SAAS,KAAK,IAAI,CAAC;IAEzC,uCAAuC;IACvC,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC;IACzB,MAAM,IAAI,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC;IAEhD,KAAK,MAAM,EAAE,IAAI,IAAI,CAAC,gBAAgB,CAAC,8BAA8B,CAAC,EAAE,CAAC;QACvE,EAAE,CAAC,MAAM,EAAE,CAAC;IACd,CAAC;IAED,eAAe;IACf,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IACjE,MAAM,iBAAiB,GAAG,WAAW,CAAC,MAAM,CAAC;IAE7C,kDAAkD;IAClD,MAAM,QAAQ,GAAG,IAAI,CAAC,gBAAgB,CAAC,wBAAwB,CAAC,CAAC;IACjE,IAAI,eAAe,GAAG,CAAC,CAAC;IACxB,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,CAAC,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;QAClC,sDAAsD;QACtD,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;YAAE,eAAe,EAAE,CAAC;IACzC,CAAC;IAED,yCAAyC;IACzC,MAAM,UAAU,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,CAAC;IAC9C,IAAI,iBAAiB,GAAG,CAAC,CAAC;IAC1B,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,CAAC,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;QAClC,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE;YAAE,iBAAiB,EAAE,CAAC;IAC5C,CAAC;IAED,+BAA+B;IAC/B,MAAM,UAAU,GAAG,IAAI,CAAC,gBAAgB,CAAC,WAAW,CAAC,CAAC,MAAM,CAAC;IAE7D,2EAA2E;IAC3E,iFAAiF;IACjF,8EAA8E;IAC9E,MAAM,IAAI,GAAG,IAAI,CAAC,aAAa,CAAC,qBAAqB,CAAC,CAAC;IACvD,IAAI,cAAc,GAAG,KAAK,CAAC;IAC3B,IAAI,IAAI,EAAE,CAAC;QACT,MAAM,SAAS,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,CAAC;QAC7C,IAAI,cAAc,GAAG,CAAC,CAAC;QACvB,KAAK,MAAM,CAAC,IAAI,SAAS,EAAE,CAAC;YAC1B,IAAI,CAAC,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,EAAE;gBAAE,cAAc,EAAE,CAAC;QACzD,CAAC;QACD,MAAM,QAAQ,GAAG,IAAI,CAAC,gBAAgB,CAAC,WAAW,CAAC,CAAC,MAAM,CAAC;QAC3D,cAAc,GAAG,cAAc,IAAI,CAAC,IAAI,QAAQ,IAAI,CAAC,CAAC;IACxD,CAAC;IAED,yCAAyC;IACzC,gFAAgF;IAChF,MAAM,UAAU,GACd,eAAe,IAAI,CAAC;QACpB,iBAAiB,IAAI,CAAC;QACtB,CAAC,cAAc,IAAI,eAAe,IAAI,CAAC,CAAC;QACxC,UAAU,IAAI,CAAC;QACf,CAAC,aAAa,CAAC,CAAC,+DAA+D;IAEjF,OAAO;QACL,UAAU;QACV,aAAa;QACb,SAAS;QACT,eAAe;QACf,iBAAiB;QACjB,UAAU;QACV,cAAc;QACd,iBAAiB;QACjB,UAAU;KACX,CAAC;AACJ,CAAC"}
@@ -0,0 +1,12 @@
1
+ export interface TabPanel {
2
+ label: string | null;
3
+ html: string;
4
+ }
5
+ export interface DetectedTabGroup {
6
+ framework: string;
7
+ tabCount: number;
8
+ htmlSlice: string;
9
+ panels: TabPanel[];
10
+ }
11
+ export declare function detectTabGroups(html: string): DetectedTabGroup[];
12
+ //# sourceMappingURL=detect-tabs.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"detect-tabs.d.ts","sourceRoot":"","sources":["../../src/helpers/detect-tabs.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,QAAQ;IACvB,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,gBAAgB;IAC/B,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,QAAQ,EAAE,CAAC;CACpB;AAqUD,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,gBAAgB,EAAE,CAgBhE"}
@@ -0,0 +1,309 @@
1
+ import { parse } from 'node-html-parser';
2
+ function isDescendantOf(node, ancestor) {
3
+ let current = node.parentNode;
4
+ while (current) {
5
+ if (current === ancestor)
6
+ return true;
7
+ current = current.parentNode;
8
+ }
9
+ return false;
10
+ }
11
+ function isInsideClaimed(node, claimed) {
12
+ for (const container of claimed) {
13
+ if (container === node || isDescendantOf(node, container))
14
+ return true;
15
+ }
16
+ return false;
17
+ }
18
+ function textOf(el) {
19
+ // Clone to avoid mutating the original DOM, then strip <style> elements
20
+ // that some component libraries (e.g. LeafyGreen) embed inside tab buttons.
21
+ const clone = el.clone();
22
+ for (const style of clone.querySelectorAll('style')) {
23
+ style.remove();
24
+ }
25
+ return clone.textContent.trim();
26
+ }
27
+ /**
28
+ * Walk up from `start` (exclusive) looking for the nearest ancestor that
29
+ * contains `[role="tabpanel"]` children. Some component libraries (e.g.
30
+ * LeafyGreen) place the tablist and the tab-panels as siblings rather than
31
+ * parent/child, so `tablist.parentNode` alone won't find the panels.
32
+ * Stops after `maxDepth` levels to avoid scanning the whole document.
33
+ */
34
+ function findContainerWithPanels(start, claimed, maxDepth = 4) {
35
+ let current = start.parentNode;
36
+ for (let depth = 0; current && depth < maxDepth; depth++) {
37
+ if (isInsideClaimed(current, claimed))
38
+ return null;
39
+ const panels = current.querySelectorAll('[role="tabpanel"]');
40
+ if (panels.length > 0)
41
+ return { container: current, panels: [...panels] };
42
+ current = current.parentNode;
43
+ }
44
+ return null;
45
+ }
46
+ const docusaurus = (root, claimed) => {
47
+ const groups = [];
48
+ // Docusaurus uses role="tablist" with tabs__item class children
49
+ const tablists = root.querySelectorAll('[role="tablist"]');
50
+ for (const tablist of tablists) {
51
+ if (isInsideClaimed(tablist, claimed))
52
+ continue;
53
+ const tabs = tablist.querySelectorAll('.tabs__item');
54
+ if (tabs.length === 0)
55
+ continue;
56
+ // Find the wrapping container (parent of both tablist and tabpanels).
57
+ // Some component libraries put tablist and tabpanels as siblings rather
58
+ // than direct parent/child, so we walk up until we find panels.
59
+ const found = findContainerWithPanels(tablist, claimed);
60
+ if (!found)
61
+ continue;
62
+ const { container, panels } = found;
63
+ const labels = tabs.map((t) => textOf(t));
64
+ const panelData = panels.map((p, i) => ({
65
+ label: labels[i] ?? null,
66
+ html: p.outerHTML,
67
+ }));
68
+ claimed.add(container);
69
+ groups.push({
70
+ framework: 'docusaurus',
71
+ tabCount: tabs.length,
72
+ htmlSlice: container.outerHTML,
73
+ panels: panelData,
74
+ });
75
+ }
76
+ return groups;
77
+ };
78
+ const mkdocs = (root, claimed) => {
79
+ const groups = [];
80
+ const containers = root.querySelectorAll('.tabbed-set');
81
+ for (const container of containers) {
82
+ const el = container;
83
+ if (isInsideClaimed(el, claimed))
84
+ continue;
85
+ const labels = el.querySelectorAll('.tabbed-labels label, .tabbed-labels > *');
86
+ const panels = el.querySelectorAll('.tabbed-content > .tabbed-block, .tabbed-content > *');
87
+ if (labels.length === 0 && panels.length === 0)
88
+ continue;
89
+ const panelData = [];
90
+ const count = Math.max(labels.length, panels.length);
91
+ for (let i = 0; i < count; i++) {
92
+ panelData.push({
93
+ label: labels[i] ? textOf(labels[i]) : null,
94
+ html: panels[i] ? panels[i].outerHTML : '',
95
+ });
96
+ }
97
+ claimed.add(el);
98
+ groups.push({
99
+ framework: 'mkdocs',
100
+ tabCount: count,
101
+ htmlSlice: el.outerHTML,
102
+ panels: panelData,
103
+ });
104
+ }
105
+ return groups;
106
+ };
107
+ const sphinx = (root, claimed) => {
108
+ const groups = [];
109
+ const containers = root.querySelectorAll('.sphinx-tabs');
110
+ for (const container of containers) {
111
+ const el = container;
112
+ if (isInsideClaimed(el, claimed))
113
+ continue;
114
+ const tabs = el.querySelectorAll('.sphinx-tabs-tab');
115
+ const panels = el.querySelectorAll('.sphinx-tabs-panel');
116
+ if (tabs.length === 0 && panels.length === 0)
117
+ continue;
118
+ const panelData = [];
119
+ const count = Math.max(tabs.length, panels.length);
120
+ for (let i = 0; i < count; i++) {
121
+ panelData.push({
122
+ label: tabs[i] ? textOf(tabs[i]) : null,
123
+ html: panels[i] ? panels[i].outerHTML : '',
124
+ });
125
+ }
126
+ claimed.add(el);
127
+ groups.push({
128
+ framework: 'sphinx',
129
+ tabCount: count,
130
+ htmlSlice: el.outerHTML,
131
+ panels: panelData,
132
+ });
133
+ }
134
+ return groups;
135
+ };
136
+ const msLearn = (root, claimed) => {
137
+ const groups = [];
138
+ const containers = root.querySelectorAll('.tabGroup');
139
+ for (const container of containers) {
140
+ const el = container;
141
+ if (isInsideClaimed(el, claimed))
142
+ continue;
143
+ const tabs = el.querySelectorAll('[role="tab"][data-tab]');
144
+ const panels = el.querySelectorAll('[role="tabpanel"], .tabPanel');
145
+ if (tabs.length === 0 || panels.length === 0)
146
+ continue;
147
+ const panelData = [];
148
+ const count = Math.max(tabs.length, panels.length);
149
+ for (let i = 0; i < count; i++) {
150
+ panelData.push({
151
+ label: tabs[i] ? textOf(tabs[i]) : null,
152
+ html: panels[i] ? panels[i].outerHTML : '',
153
+ });
154
+ }
155
+ claimed.add(el);
156
+ groups.push({
157
+ framework: 'microsoft-learn',
158
+ tabCount: count,
159
+ htmlSlice: el.outerHTML,
160
+ panels: panelData,
161
+ });
162
+ }
163
+ return groups;
164
+ };
165
+ const genericAria = (root, claimed) => {
166
+ const groups = [];
167
+ const tablists = root.querySelectorAll('[role="tablist"]');
168
+ for (const tablist of tablists) {
169
+ const el = tablist;
170
+ if (isInsideClaimed(el, claimed))
171
+ continue;
172
+ const found = findContainerWithPanels(el, claimed);
173
+ const tabs = el.querySelectorAll('[role="tab"]');
174
+ if (!found) {
175
+ // No panels found — skip. Tabs without panels are typically site
176
+ // navigation, not content tab groups. The serialization check only
177
+ // cares about panel content, so there's nothing to measure here.
178
+ continue;
179
+ }
180
+ const { container, panels } = found;
181
+ if (tabs.length === 0 && panels.length === 0)
182
+ continue;
183
+ // Use tab count as the authority. Containers may hold panels from
184
+ // multiple tab groups; capping to tabs.length avoids misattributing
185
+ // panels from sibling groups in the same ancestor.
186
+ const panelData = [];
187
+ const count = tabs.length > 0 ? tabs.length : panels.length;
188
+ for (let i = 0; i < count; i++) {
189
+ panelData.push({
190
+ label: tabs[i] ? textOf(tabs[i]) : null,
191
+ html: panels[i] ? panels[i].outerHTML : '',
192
+ });
193
+ }
194
+ claimed.add(container);
195
+ groups.push({
196
+ framework: 'generic-aria',
197
+ tabCount: count,
198
+ htmlSlice: container.outerHTML,
199
+ panels: panelData,
200
+ });
201
+ }
202
+ return groups;
203
+ };
204
+ /**
205
+ * Find all `<Tabs>...</Tabs>` blocks in raw source text with proper nesting.
206
+ * Returns the content (including the tags) and the start offset of each
207
+ * top-level `<Tabs>` block. We use regex rather than node-html-parser
208
+ * because the DOM parser can't reliably handle `</Tabs>` followed by
209
+ * markdown text followed by `<Tabs>` — it merges them into one element.
210
+ */
211
+ function findTabsBlocks(source) {
212
+ const blocks = [];
213
+ const openRe = /<Tabs\b[^>]*>/gi;
214
+ let match;
215
+ while ((match = openRe.exec(source)) !== null) {
216
+ const startIdx = match.index;
217
+ // Track nesting to find the matching </Tabs>
218
+ let depth = 1;
219
+ let pos = startIdx + match[0].length;
220
+ while (pos < source.length && depth > 0) {
221
+ const nextOpen = source.indexOf('<Tabs', pos);
222
+ const nextClose = source.indexOf('</Tabs>', pos);
223
+ if (nextClose === -1)
224
+ break; // unclosed tag
225
+ if (nextOpen !== -1 && nextOpen < nextClose) {
226
+ depth++;
227
+ pos = nextOpen + 5;
228
+ }
229
+ else {
230
+ depth--;
231
+ if (depth === 0) {
232
+ blocks.push(source.substring(startIdx, nextClose + 7));
233
+ // Advance the outer regex past this block to avoid re-entering
234
+ openRe.lastIndex = nextClose + 7;
235
+ }
236
+ pos = nextClose + 7;
237
+ }
238
+ }
239
+ }
240
+ return blocks;
241
+ }
242
+ const TAB_OPEN_RE = /<(Tab|TabItem)\b([^>]*)>/gi;
243
+ const ATTR_RE = /(?:name|label|value)\s*=\s*"([^"]*)"/i;
244
+ /**
245
+ * MDX-style tabs: `<Tabs>` container with `<Tab name="...">` or
246
+ * `<TabItem label="...">` children. Used by MongoDB docs, Docusaurus MDX, and others.
247
+ *
248
+ * Uses regex-based block finding instead of DOM parsing because
249
+ * node-html-parser can't reliably parse `</Tabs>` + markdown + `<Tabs>`
250
+ * as separate elements in mixed markdown/HTML content.
251
+ */
252
+ const mdxTabs = (_root, _claimed, source) => {
253
+ if (!source)
254
+ return [];
255
+ const groups = [];
256
+ const blocks = findTabsBlocks(source);
257
+ for (const block of blocks) {
258
+ // Find direct <Tab>/<TabItem> children (depth 1 inside this block).
259
+ // We track nesting to skip tabs inside nested <Tabs> groups.
260
+ const inner = block.substring(block.indexOf('>') + 1, block.lastIndexOf('</'));
261
+ const panels = [];
262
+ TAB_OPEN_RE.lastIndex = 0;
263
+ let tabMatch;
264
+ while ((tabMatch = TAB_OPEN_RE.exec(inner)) !== null) {
265
+ // Check for nested <Tabs> between our position and this <Tab>
266
+ const before = inner.substring(0, tabMatch.index);
267
+ const opensInBefore = (before.match(/<Tabs\b/gi) || []).length;
268
+ const closesInBefore = (before.match(/<\/Tabs>/gi) || []).length;
269
+ const depth = opensInBefore - closesInBefore;
270
+ if (depth > 0)
271
+ continue; // This <Tab> belongs to a nested <Tabs>
272
+ const attrs = tabMatch[2];
273
+ const labelMatch = attrs.match(ATTR_RE);
274
+ const label = labelMatch ? labelMatch[1] : null;
275
+ // Find the matching closing tag for this <Tab>
276
+ const closeTag = `</${tabMatch[1]}>`;
277
+ const closeIdx = inner.indexOf(closeTag, tabMatch.index + tabMatch[0].length);
278
+ const tabContent = closeIdx !== -1
279
+ ? inner.substring(tabMatch.index, closeIdx + closeTag.length)
280
+ : inner.substring(tabMatch.index);
281
+ panels.push({ label, html: tabContent });
282
+ }
283
+ if (panels.length === 0)
284
+ continue;
285
+ groups.push({
286
+ framework: 'mdx',
287
+ tabCount: panels.length,
288
+ htmlSlice: block,
289
+ panels,
290
+ });
291
+ }
292
+ return groups;
293
+ };
294
+ const frameworkDetectors = [docusaurus, mkdocs, sphinx, msLearn, mdxTabs];
295
+ export function detectTabGroups(html) {
296
+ const root = parse(html);
297
+ const claimed = new Set();
298
+ const groups = [];
299
+ for (const detector of frameworkDetectors) {
300
+ for (const group of detector(root, claimed, html)) {
301
+ groups.push(group);
302
+ }
303
+ }
304
+ for (const group of genericAria(root, claimed)) {
305
+ groups.push(group);
306
+ }
307
+ return groups;
308
+ }
309
+ //# sourceMappingURL=detect-tabs.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"detect-tabs.js","sourceRoot":"","sources":["../../src/helpers/detect-tabs.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAoB,MAAM,kBAAkB,CAAC;AAoB3D,SAAS,cAAc,CAAC,IAAiB,EAAE,QAAqB;IAC9D,IAAI,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC;IAC9B,OAAO,OAAO,EAAE,CAAC;QACf,IAAI,OAAO,KAAK,QAAQ;YAAE,OAAO,IAAI,CAAC;QACtC,OAAO,GAAG,OAAO,CAAC,UAAU,CAAC;IAC/B,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,eAAe,CAAC,IAAiB,EAAE,OAAyB;IACnE,KAAK,MAAM,SAAS,IAAI,OAAO,EAAE,CAAC;QAChC,IAAI,SAAS,KAAK,IAAI,IAAI,cAAc,CAAC,IAAI,EAAE,SAAS,CAAC;YAAE,OAAO,IAAI,CAAC;IACzE,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,MAAM,CAAC,EAAe;IAC7B,wEAAwE;IACxE,4EAA4E;IAC5E,MAAM,KAAK,GAAG,EAAE,CAAC,KAAK,EAAiB,CAAC;IACxC,KAAK,MAAM,KAAK,IAAI,KAAK,CAAC,gBAAgB,CAAC,OAAO,CAAC,EAAE,CAAC;QACpD,KAAK,CAAC,MAAM,EAAE,CAAC;IACjB,CAAC;IACD,OAAO,KAAK,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;AAClC,CAAC;AAED;;;;;;GAMG;AACH,SAAS,uBAAuB,CAC9B,KAAkB,EAClB,OAAyB,EACzB,QAAQ,GAAG,CAAC;IAEZ,IAAI,OAAO,GAAG,KAAK,CAAC,UAAgC,CAAC;IACrD,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,OAAO,IAAI,KAAK,GAAG,QAAQ,EAAE,KAAK,EAAE,EAAE,CAAC;QACzD,IAAI,eAAe,CAAC,OAAO,EAAE,OAAO,CAAC;YAAE,OAAO,IAAI,CAAC;QACnD,MAAM,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC,mBAAmB,CAA6B,CAAC;QACzF,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,GAAG,MAAM,CAAC,EAAE,CAAC;QAC1E,OAAO,GAAG,OAAO,CAAC,UAAgC,CAAC;IACrD,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,UAAU,GAAa,CAAC,IAAI,EAAE,OAAO,EAAE,EAAE;IAC7C,MAAM,MAAM,GAAuB,EAAE,CAAC;IACtC,gEAAgE;IAChE,MAAM,QAAQ,GAAG,IAAI,CAAC,gBAAgB,CAAC,kBAAkB,CAAC,CAAC;IAC3D,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,IAAI,eAAe,CAAC,OAAsB,EAAE,OAAO,CAAC;YAAE,SAAS;QAC/D,MAAM,IAAI,GAAG,OAAO,CAAC,gBAAgB,CAAC,aAAa,CAAC,CAAC;QACrD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAEhC,sEAAsE;QACtE,wEAAwE;QACxE,gEAAgE;QAChE,MAAM,KAAK,GAAG,uBAAuB,CAAC,OAAsB,EAAE,OAAO,CAAC,CAAC;QACvE,IAAI,CAAC,KAAK;YAAE,SAAS;QACrB,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,GAAG,KAAK,CAAC;QAEpC,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAgB,CAAC,CAAC,CAAC;QACzD,MAAM,SAAS,GAAe,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;YAClD,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI;YACxB,IAAI,EAAE,CAAC,CAAC,SAAS;SAClB,CAAC,CAAC,CAAC;QAEJ,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACvB,MAAM,CAAC,IAAI,CAAC;YACV,SAAS,EAAE,YAAY;YACvB,QAAQ,EAAE,IAAI,CAAC,MAAM;YACrB,SAAS,EAAE,SAAS,CAAC,SAAS;YAC9B,MAAM,EAAE,SAAS;SAClB,CAAC,CAAC;IACL,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC,CAAC;AAEF,MAAM,MAAM,GAAa,CAAC,IAAI,EAAE,OAAO,EAAE,EAAE;IACzC,MAAM,MAAM,GAAuB,EAAE,CAAC;IACtC,MAAM,UAAU,GAAG,IAAI,CAAC,gBAAgB,CAAC,aAAa,CAAC,CAAC;IACxD,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,MAAM,EAAE,GAAG,SAAwB,CAAC;QACpC,IAAI,eAAe,CAAC,EAAE,EAAE,OAAO,CAAC;YAAE,SAAS;QAE3C,MAAM,MAAM,GAAG,EAAE,CAAC,gBAAgB,CAAC,0CAA0C,CAAC,CAAC;QAC/E,MAAM,MAAM,GAAG,EAAE,CAAC,gBAAgB,CAAC,sDAAsD,CAAC,CAAC;QAE3F,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAEzD,MAAM,SAAS,GAAe,EAAE,CAAC;QACjC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;QACrD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/B,SAAS,CAAC,IAAI,CAAC;gBACb,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAgB,CAAC,CAAC,CAAC,CAAC,IAAI;gBAC1D,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAE,MAAM,CAAC,CAAC,CAAiB,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE;aAC5D,CAAC,CAAC;QACL,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAChB,MAAM,CAAC,IAAI,CAAC;YACV,SAAS,EAAE,QAAQ;YACnB,QAAQ,EAAE,KAAK;YACf,SAAS,EAAE,EAAE,CAAC,SAAS;YACvB,MAAM,EAAE,SAAS;SAClB,CAAC,CAAC;IACL,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC,CAAC;AAEF,MAAM,MAAM,GAAa,CAAC,IAAI,EAAE,OAAO,EAAE,EAAE;IACzC,MAAM,MAAM,GAAuB,EAAE,CAAC;IACtC,MAAM,UAAU,GAAG,IAAI,CAAC,gBAAgB,CAAC,cAAc,CAAC,CAAC;IACzD,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,MAAM,EAAE,GAAG,SAAwB,CAAC;QACpC,IAAI,eAAe,CAAC,EAAE,EAAE,OAAO,CAAC;YAAE,SAAS;QAE3C,MAAM,IAAI,GAAG,EAAE,CAAC,gBAAgB,CAAC,kBAAkB,CAAC,CAAC;QACrD,MAAM,MAAM,GAAG,EAAE,CAAC,gBAAgB,CAAC,oBAAoB,CAAC,CAAC;QACzD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAEvD,MAAM,SAAS,GAAe,EAAE,CAAC;QACjC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;QACnD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/B,SAAS,CAAC,IAAI,CAAC;gBACb,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAgB,CAAC,CAAC,CAAC,CAAC,IAAI;gBACtD,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAE,MAAM,CAAC,CAAC,CAAiB,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE;aAC5D,CAAC,CAAC;QACL,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAChB,MAAM,CAAC,IAAI,CAAC;YACV,SAAS,EAAE,QAAQ;YACnB,QAAQ,EAAE,KAAK;YACf,SAAS,EAAE,EAAE,CAAC,SAAS;YACvB,MAAM,EAAE,SAAS;SAClB,CAAC,CAAC;IACL,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC,CAAC;AAEF,MAAM,OAAO,GAAa,CAAC,IAAI,EAAE,OAAO,EAAE,EAAE;IAC1C,MAAM,MAAM,GAAuB,EAAE,CAAC;IACtC,MAAM,UAAU,GAAG,IAAI,CAAC,gBAAgB,CAAC,WAAW,CAAC,CAAC;IACtD,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,MAAM,EAAE,GAAG,SAAwB,CAAC;QACpC,IAAI,eAAe,CAAC,EAAE,EAAE,OAAO,CAAC;YAAE,SAAS;QAE3C,MAAM,IAAI,GAAG,EAAE,CAAC,gBAAgB,CAAC,wBAAwB,CAAC,CAAC;QAC3D,MAAM,MAAM,GAAG,EAAE,CAAC,gBAAgB,CAAC,8BAA8B,CAAC,CAAC;QACnE,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAEvD,MAAM,SAAS,GAAe,EAAE,CAAC;QACjC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;QACnD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/B,SAAS,CAAC,IAAI,CAAC;gBACb,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAgB,CAAC,CAAC,CAAC,CAAC,IAAI;gBACtD,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAE,MAAM,CAAC,CAAC,CAAiB,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE;aAC5D,CAAC,CAAC;QACL,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAChB,MAAM,CAAC,IAAI,CAAC;YACV,SAAS,EAAE,iBAAiB;YAC5B,QAAQ,EAAE,KAAK;YACf,SAAS,EAAE,EAAE,CAAC,SAAS;YACvB,MAAM,EAAE,SAAS;SAClB,CAAC,CAAC;IACL,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC,CAAC;AAEF,MAAM,WAAW,GAAa,CAAC,IAAI,EAAE,OAAO,EAAE,EAAE;IAC9C,MAAM,MAAM,GAAuB,EAAE,CAAC;IACtC,MAAM,QAAQ,GAAG,IAAI,CAAC,gBAAgB,CAAC,kBAAkB,CAAC,CAAC;IAC3D,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,MAAM,EAAE,GAAG,OAAsB,CAAC;QAClC,IAAI,eAAe,CAAC,EAAE,EAAE,OAAO,CAAC;YAAE,SAAS;QAE3C,MAAM,KAAK,GAAG,uBAAuB,CAAC,EAAE,EAAE,OAAO,CAAC,CAAC;QACnD,MAAM,IAAI,GAAG,EAAE,CAAC,gBAAgB,CAAC,cAAc,CAAC,CAAC;QAEjD,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,iEAAiE;YACjE,mEAAmE;YACnE,iEAAiE;YACjE,SAAS;QACX,CAAC;QAED,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,GAAG,KAAK,CAAC;QACpC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAEvD,kEAAkE;QAClE,oEAAoE;QACpE,mDAAmD;QACnD,MAAM,SAAS,GAAe,EAAE,CAAC;QACjC,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC;QAC5D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/B,SAAS,CAAC,IAAI,CAAC;gBACb,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAgB,CAAC,CAAC,CAAC,CAAC,IAAI;gBACtD,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAE,MAAM,CAAC,CAAC,CAAiB,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE;aAC5D,CAAC,CAAC;QACL,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACvB,MAAM,CAAC,IAAI,CAAC;YACV,SAAS,EAAE,cAAc;YACzB,QAAQ,EAAE,KAAK;YACf,SAAS,EAAE,SAAS,CAAC,SAAS;YAC9B,MAAM,EAAE,SAAS;SAClB,CAAC,CAAC;IACL,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC,CAAC;AAEF;;;;;;GAMG;AACH,SAAS,cAAc,CAAC,MAAc;IACpC,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,MAAM,GAAG,iBAAiB,CAAC;IACjC,IAAI,KAA6B,CAAC;IAClC,OAAO,CAAC,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC9C,MAAM,QAAQ,GAAG,KAAK,CAAC,KAAK,CAAC;QAC7B,6CAA6C;QAC7C,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,GAAG,GAAG,QAAQ,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;QACrC,OAAO,GAAG,GAAG,MAAM,CAAC,MAAM,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;YACxC,MAAM,QAAQ,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;YAC9C,MAAM,SAAS,GAAG,MAAM,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;YACjD,IAAI,SAAS,KAAK,CAAC,CAAC;gBAAE,MAAM,CAAC,eAAe;YAC5C,IAAI,QAAQ,KAAK,CAAC,CAAC,IAAI,QAAQ,GAAG,SAAS,EAAE,CAAC;gBAC5C,KAAK,EAAE,CAAC;gBACR,GAAG,GAAG,QAAQ,GAAG,CAAC,CAAC;YACrB,CAAC;iBAAM,CAAC;gBACN,KAAK,EAAE,CAAC;gBACR,IAAI,KAAK,KAAK,CAAC,EAAE,CAAC;oBAChB,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,QAAQ,EAAE,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC;oBACvD,+DAA+D;oBAC/D,MAAM,CAAC,SAAS,GAAG,SAAS,GAAG,CAAC,CAAC;gBACnC,CAAC;gBACD,GAAG,GAAG,SAAS,GAAG,CAAC,CAAC;YACtB,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,WAAW,GAAG,4BAA4B,CAAC;AACjD,MAAM,OAAO,GAAG,uCAAuC,CAAC;AAExD;;;;;;;GAOG;AACH,MAAM,OAAO,GAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,MAAe,EAAE,EAAE;IAC7D,IAAI,CAAC,MAAM;QAAE,OAAO,EAAE,CAAC;IACvB,MAAM,MAAM,GAAuB,EAAE,CAAC;IACtC,MAAM,MAAM,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC;IAEtC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,oEAAoE;QACpE,6DAA6D;QAC7D,MAAM,KAAK,GAAG,KAAK,CAAC,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,KAAK,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC;QAC/E,MAAM,MAAM,GAAe,EAAE,CAAC;QAE9B,WAAW,CAAC,SAAS,GAAG,CAAC,CAAC;QAC1B,IAAI,QAAgC,CAAC;QACrC,OAAO,CAAC,QAAQ,GAAG,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACrD,8DAA8D;YAC9D,MAAM,MAAM,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC;YAClD,MAAM,aAAa,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;YAC/D,MAAM,cAAc,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;YACjE,MAAM,KAAK,GAAG,aAAa,GAAG,cAAc,CAAC;YAE7C,IAAI,KAAK,GAAG,CAAC;gBAAE,SAAS,CAAC,wCAAwC;YAEjE,MAAM,KAAK,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;YAC1B,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACxC,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YAEhD,+CAA+C;YAC/C,MAAM,QAAQ,GAAG,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC;YACrC,MAAM,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAC,QAAQ,EAAE,QAAQ,CAAC,KAAK,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;YAC9E,MAAM,UAAU,GACd,QAAQ,KAAK,CAAC,CAAC;gBACb,CAAC,CAAC,KAAK,CAAC,SAAS,CAAC,QAAQ,CAAC,KAAK,EAAE,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC;gBAC7D,CAAC,CAAC,KAAK,CAAC,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;YAEtC,MAAM,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC,CAAC;QAC3C,CAAC;QAED,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAElC,MAAM,CAAC,IAAI,CAAC;YACV,SAAS,EAAE,KAAK;YAChB,QAAQ,EAAE,MAAM,CAAC,MAAM;YACvB,SAAS,EAAE,KAAK;YAChB,MAAM;SACP,CAAC,CAAC;IACL,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC,CAAC;AAEF,MAAM,kBAAkB,GAAe,CAAC,UAAU,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;AAEtF,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC;IACzB,MAAM,OAAO,GAAG,IAAI,GAAG,EAAe,CAAC;IACvC,MAAM,MAAM,GAAuB,EAAE,CAAC;IAEtC,KAAK,MAAM,QAAQ,IAAI,kBAAkB,EAAE,CAAC;QAC1C,KAAK,MAAM,KAAK,IAAI,QAAQ,CAAC,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,CAAC;YAClD,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,WAAW,CAAC,IAAI,EAAE,OAAO,CAAC,EAAE,CAAC;QAC/C,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACrB,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,8 @@
1
+ import type { CheckContext, FetchedPage } from '../types.js';
2
+ /**
3
+ * Fetch a page URL, returning the body and content-type metadata.
4
+ * Results are cached on `ctx.htmlCache` so that multiple checks
5
+ * within the same run avoid redundant HTTP requests.
6
+ */
7
+ export declare function fetchPage(ctx: CheckContext, url: string): Promise<FetchedPage>;
8
+ //# sourceMappingURL=fetch-page.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch-page.d.ts","sourceRoot":"","sources":["../../src/helpers/fetch-page.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAE7D;;;;GAIG;AACH,wBAAsB,SAAS,CAAC,GAAG,EAAE,YAAY,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAcpF"}
@@ -0,0 +1,20 @@
1
+ import { looksLikeHtml } from './detect-markdown.js';
2
+ /**
3
+ * Fetch a page URL, returning the body and content-type metadata.
4
+ * Results are cached on `ctx.htmlCache` so that multiple checks
5
+ * within the same run avoid redundant HTTP requests.
6
+ */
7
+ export async function fetchPage(ctx, url) {
8
+ const cached = ctx.htmlCache.get(url);
9
+ if (cached)
10
+ return cached;
11
+ const response = await ctx.http.fetch(url);
12
+ const body = await response.text();
13
+ const contentType = response.headers.get('content-type') ?? '';
14
+ const isMarkdownType = contentType.includes('text/markdown') || contentType.includes('text/plain');
15
+ const isHtml = !isMarkdownType && (contentType.includes('text/html') || looksLikeHtml(body));
16
+ const result = { url, status: response.status, body, contentType, isHtml };
17
+ ctx.htmlCache.set(url, result);
18
+ return result;
19
+ }
20
+ //# sourceMappingURL=fetch-page.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch-page.js","sourceRoot":"","sources":["../../src/helpers/fetch-page.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAGrD;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,GAAiB,EAAE,GAAW;IAC5D,MAAM,MAAM,GAAG,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IACtC,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC;IAE1B,MAAM,QAAQ,GAAG,MAAM,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC3C,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;IACnC,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;IAC/D,MAAM,cAAc,GAClB,WAAW,CAAC,QAAQ,CAAC,eAAe,CAAC,IAAI,WAAW,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;IAC9E,MAAM,MAAM,GAAG,CAAC,cAAc,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC;IAE7F,MAAM,MAAM,GAAgB,EAAE,GAAG,EAAE,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,IAAI,EAAE,WAAW,EAAE,MAAM,EAAE,CAAC;IACxF,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;IAC/B,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -7,6 +7,7 @@ export declare function parseSitemapUrls(xml: string): {
7
7
  urls: string[];
8
8
  sitemapIndexUrls: string[];
9
9
  };
10
+ export declare function getUrlsFromCachedLlmsTxt(ctx: CheckContext): Promise<string[]>;
10
11
  /**
11
12
  * Parse `Sitemap:` directives from a robots.txt body.
12
13
  * Returns an array of sitemap URLs found.
@@ -16,6 +17,7 @@ export interface PageUrlResult {
16
17
  urls: string[];
17
18
  warnings: string[];
18
19
  }
20
+ export declare function getUrlsFromSitemap(ctx: CheckContext, warnings: string[], maxUrls?: number, originOverride?: string): Promise<string[]>;
19
21
  /**
20
22
  * Discover page URLs from llms.txt links, sitemap, or fall back to baseUrl.
21
23
  *
@@ -37,6 +39,13 @@ export interface SampledPages {
37
39
  *
38
40
  * The result is cached on ctx so that all checks within a single run
39
41
  * share the same sampled page list, avoiding inconsistent results.
42
+ *
43
+ * Sampling strategies:
44
+ * - `random`: Fisher-Yates shuffle, then take the first maxLinksToTest. (Default.)
45
+ * - `deterministic`: Sort URLs lexicographically, then pick every Nth URL
46
+ * so that the result is reproducible across runs (as long as the discovered
47
+ * URL set is stable).
48
+ * - `none`: Skip discovery entirely; return only the baseUrl.
40
49
  */
41
50
  export declare function discoverAndSamplePages(ctx: CheckContext): Promise<SampledPages>;
42
51
  //# sourceMappingURL=get-page-urls.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"get-page-urls.d.ts","sourceRoot":"","sources":["../../src/helpers/get-page-urls.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,YAAY,EAAkB,MAAM,aAAa,CAAC;AAEhE;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,MAAM,GAAG;IAAE,IAAI,EAAE,MAAM,EAAE,CAAC;IAAC,gBAAgB,EAAE,MAAM,EAAE,CAAA;CAAE,CA2B5F;AAyDD;;;GAGG;AACH,wBAAgB,sBAAsB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,EAAE,CASlE;AAsBD,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAyED;;;;;;;GAOG;AACH,wBAAsB,WAAW,CAAC,GAAG,EAAE,YAAY,GAAG,OAAO,CAAC,aAAa,CAAC,CAmB3E;AAED,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED;;;;;;GAMG;AACH,wBAAsB,sBAAsB,CAAC,GAAG,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,CAkBrF"}
1
+ {"version":3,"file":"get-page-urls.d.ts","sourceRoot":"","sources":["../../src/helpers/get-page-urls.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,YAAY,EAAkB,MAAM,aAAa,CAAC;AAEhE;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,MAAM,GAAG;IAAE,IAAI,EAAE,MAAM,EAAE,CAAC;IAAC,gBAAgB,EAAE,MAAM,EAAE,CAAA;CAAE,CA2B5F;AAED,wBAAsB,wBAAwB,CAAC,GAAG,EAAE,YAAY,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAMnF;AAiID;;;GAGG;AACH,wBAAgB,sBAAsB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,EAAE,CASlE;AAoCD,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AA0BD,wBAAsB,kBAAkB,CACtC,GAAG,EAAE,YAAY,EACjB,QAAQ,EAAE,MAAM,EAAE,EAClB,OAAO,GAAE,MAAyB,EAClC,cAAc,CAAC,EAAE,MAAM,GACtB,OAAO,CAAC,MAAM,EAAE,CAAC,CA8CnB;AAED;;;;;;;GAOG;AACH,wBAAsB,WAAW,CAAC,GAAG,EAAE,YAAY,GAAG,OAAO,CAAC,aAAa,CAAC,CAmB3E;AAED,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAsB,sBAAsB,CAAC,GAAG,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,CA2CrF"}