@redpanda-data/docs-extensions-and-macros 4.15.10 → 4.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,16 +23,31 @@ function formatLlmsDirective(componentName) {
23
23
  return `> ${LLMS_DIRECTIVE_BASE}`;
24
24
  }
25
25
 
26
+ /**
27
+ * Helper to escape regex metacharacters in a string.
28
+ * @param {string} str - String to escape
29
+ * @returns {string} Escaped string safe for use in RegExp
30
+ */
31
+ function escapeRegExp(str) {
32
+ return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
33
+ }
34
+
26
35
  /**
27
36
  * Regex pattern to match and strip the llms directive from markdown content.
37
+ * Dynamically derived from LLMS_DIRECTIVE_BASE to stay in sync.
28
38
  * Matches the blockquote format with optional component-specific suffix.
29
39
  */
30
- const LLMS_DIRECTIVE_REGEX = /^> For the complete documentation index, see \[llms\.txt\].*$/gm;
40
+ const LLMS_DIRECTIVE_REGEX = new RegExp(
41
+ `^> ${escapeRegExp(LLMS_DIRECTIVE_BASE)}.*$`,
42
+ 'gm'
43
+ );
31
44
 
32
45
  /**
33
- * Regex pattern to match and strip HTML source comments from markdown content.
46
+ * Regex pattern to match and strip only injected metadata HTML comments from markdown content.
47
+ * Only matches comments that start with known markers: "Source:" or "Note for AI:"
48
+ * This preserves any user-authored HTML comments in the markdown.
34
49
  */
35
- const SOURCE_COMMENT_REGEX = /^<!--[\s\S]*?-->\s*/gm;
50
+ const SOURCE_COMMENT_REGEX = /^<!--\s*(?:Source:|Note for AI:)[\s\S]*?-->\s*/gm;
36
51
 
37
52
  /**
38
53
  * Strip metadata added by convert-to-markdown extension from page content.
@@ -87,6 +87,13 @@ IMPORTANT: Extensions must be registered under the `antora.extensions` key in yo
87
87
  * **process-context-switcher** - Handle context-dependent content
88
88
  * **validate-attributes** - Validate AsciiDoc attributes
89
89
 
90
+ === AI-friendly documentation
91
+ * **convert-to-markdown** - Export documentation pages as markdown with frontmatter
92
+ * **convert-llms-to-txt** - Generate llms.txt and component-specific exports for AI agents
93
+ * **add-llms-directive** - Inject llms.txt discovery directive into HTML pages
94
+ * **add-markdown-urls-to-sitemap** - Add markdown URL entries to sitemap.xml
95
+ * **convert-sitemap-to-markdown** - Generate markdown sitemap indexes
96
+
90
97
  See link:REFERENCE.adoc[Reference documentation] for complete details on each extension.
91
98
 
92
99
  == Common use cases
@@ -0,0 +1,82 @@
1
+ 'use strict'
2
+
3
+ /**
4
+ * Adds llms.txt directive to HTML pages for agent-friendly documentation.
5
+ *
6
+ * This extension injects a blockquote directive pointing to /llms.txt immediately
7
+ * after the <body> tag of each documentation page. This helps AI agents discover
8
+ * the documentation index according to the Agent-Friendly Docs spec.
9
+ *
10
+ * The directive is styled to be visually hidden but remains in the HTML for agents
11
+ * to discover when they fetch pages.
12
+ *
13
+ * @see https://agentdocsspec.com/spec/#llms-txt-directive
14
+ */
15
+
16
+ const { formatLlmsDirective } = require('../extension-utils/llms-utils')
17
+
18
+ module.exports.register = function () {
19
+ const logger = this.getLogger('add-llms-directive-extension')
20
+
21
+ this.on('pagesComposed', ({ contentCatalog }) => {
22
+ const pages = contentCatalog.getPages()
23
+ let processedCount = 0
24
+
25
+ pages.forEach(page => {
26
+ if (!page.contents) return
27
+
28
+ try {
29
+ const html = page.contents.toString('utf8')
30
+
31
+ // Find the <body> tag and inject directive immediately after it
32
+ // This ensures the directive appears early in the HTML for better agent discovery
33
+ const bodyMatch = html.match(/(<body[^>]*>)/i)
34
+
35
+ if (!bodyMatch) {
36
+ logger.debug(`No <body> tag found in ${page.src?.path}`)
37
+ return
38
+ }
39
+
40
+ const bodyTag = bodyMatch[1]
41
+
42
+ // Get component name for component-specific link
43
+ const componentName = page.src?.component || ''
44
+
45
+ // Generate the directive in markdown blockquote format
46
+ const directiveMarkdown = formatLlmsDirective(componentName)
47
+
48
+ // Convert markdown blockquote to HTML blockquote
49
+ // Remove leading '> ' and convert markdown links to HTML
50
+ let directiveText = directiveMarkdown.replace(/^>\s*/, '')
51
+
52
+ // Convert markdown links [text](url) to HTML <a> tags
53
+ // Add tabindex="-1" and aria-hidden="true" to remove from tab order and hide from assistive tech
54
+ directiveText = directiveText.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2" tabindex="-1" aria-hidden="true">$1</a>')
55
+
56
+ // Add tabindex="-1" and aria-hidden="true" to blockquote to fully hide from assistive tech
57
+ const directiveHtml = `\n<blockquote class="llms-directive" tabindex="-1" aria-hidden="true">\n<p>${directiveText}</p>\n</blockquote>\n`
58
+
59
+ // Inject the directive immediately after the <body> tag
60
+ let newHtml = html.replace(bodyTag, bodyTag + directiveHtml)
61
+
62
+ // Add CSS to hide the directive visually (screen-reader-only pattern)
63
+ // This keeps it in HTML for agents but hidden from visual users
64
+ const cssTag = `<style>.llms-directive{position:absolute;left:-10000px;width:1px;height:1px;overflow:hidden}</style>`
65
+
66
+ // Inject CSS before </head> if not already present
67
+ if (!html.includes('.llms-directive{')) {
68
+ newHtml = newHtml.replace('</head>', `${cssTag}\n</head>`)
69
+ }
70
+
71
+ // Update page contents
72
+ page.contents = Buffer.from(newHtml, 'utf8')
73
+ processedCount++
74
+
75
+ } catch (err) {
76
+ logger.error(`Error adding llms directive to ${page.src?.path}: ${err.message}`)
77
+ }
78
+ })
79
+
80
+ logger.info(`Added llms.txt directive to ${processedCount} HTML pages`)
81
+ })
82
+ }
@@ -0,0 +1,167 @@
1
+ 'use strict'
2
+
3
+ /**
4
+ * Adds markdown URL entries to sitemap.xml files for AI-friendly documentation.
5
+ *
6
+ * This extension enhances Antora's generated sitemaps by adding <url> entries
7
+ * for markdown versions of pages alongside the HTML versions. This improves
8
+ * compatibility with agent-friendly documentation tools that expect markdown
9
+ * URLs to be discoverable in sitemaps.
10
+ *
11
+ * The extension:
12
+ * - Finds all sitemap XML files in the site catalog
13
+ * - For each HTML URL entry, adds a corresponding .md URL entry
14
+ * - Preserves lastmod dates from the HTML versions
15
+ * - Works with both main sitemaps and component-specific sitemaps
16
+ *
17
+ * @see https://agentdocsspec.com/spec/#llms-txt-freshness
18
+ */
19
+
20
+ const { parseStringPromise } = require('xml2js')
21
+ const { toMarkdownUrl } = require('../extension-utils/url-utils')
22
+
23
+ module.exports.register = function () {
24
+ const logger = this.getLogger('add-markdown-urls-to-sitemap-extension')
25
+
26
+ this.on('beforePublish', async ({ siteCatalog }) => {
27
+ try {
28
+ // Find all sitemap XML files
29
+ // Includes both sitemap.xml (single component) and sitemap-*.xml (multiple components)
30
+ const sitemapFiles = siteCatalog.getFiles().filter(file => {
31
+ const path = file.out.path
32
+ // Include sitemap.xml OR sitemap-*.xml (but not the sitemap index which would be handled separately)
33
+ return path.endsWith('.xml') && (path === 'sitemap.xml' || path.startsWith('sitemap-'))
34
+ })
35
+
36
+ if (sitemapFiles.length === 0) {
37
+ logger.info('No component sitemap files found')
38
+ return
39
+ }
40
+
41
+ logger.info(`Processing ${sitemapFiles.length} sitemap file(s)...`)
42
+ let totalAdded = 0
43
+
44
+ for (const sitemapFile of sitemapFiles) {
45
+ const added = await addMarkdownUrlsToSitemap(sitemapFile, logger)
46
+ totalAdded += added
47
+ }
48
+
49
+ logger.info(`Added ${totalAdded} markdown URL entries across ${sitemapFiles.length} sitemap(s)`)
50
+ } catch (error) {
51
+ logger.error(`Failed to add markdown URLs to sitemaps: ${error.message}`)
52
+ // Don't throw - sitemap enhancement is not critical
53
+ }
54
+ })
55
+ }
56
+
57
+ /**
58
+ * Add markdown URL entries to a single sitemap file
59
+ * @param {Object} sitemapFile - The sitemap file from site catalog
60
+ * @param {Object} logger - Logger instance
61
+ * @returns {number} Number of markdown URLs added
62
+ */
63
+ async function addMarkdownUrlsToSitemap(sitemapFile, logger) {
64
+ try {
65
+ const xmlContent = sitemapFile.contents.toString('utf8')
66
+ const parsed = await parseStringPromise(xmlContent, {
67
+ explicitArray: true,
68
+ xmlns: false, // Don't create namespace objects
69
+ tagNameProcessors: [], // Keep tag names as-is
70
+ })
71
+
72
+ if (!parsed || !parsed.urlset || !parsed.urlset.url) {
73
+ logger.debug(`No URLs found in ${sitemapFile.out.path}`)
74
+ return 0
75
+ }
76
+
77
+ const urlEntries = parsed.urlset.url
78
+ const newEntries = []
79
+
80
+ // Collect all existing loc strings to prevent duplicates
81
+ const existingLocs = new Set()
82
+ for (const entry of urlEntries) {
83
+ if (entry.loc && entry.loc[0]) {
84
+ let url = entry.loc[0]
85
+ if (typeof url === 'object' && url._) {
86
+ url = url._
87
+ }
88
+ if (typeof url === 'string') {
89
+ existingLocs.add(url)
90
+ }
91
+ }
92
+ }
93
+
94
+ // For each HTML URL, create a markdown URL entry
95
+ for (const entry of urlEntries) {
96
+ if (!entry.loc || !entry.loc[0]) continue
97
+
98
+ // xml2js might parse loc as object or string, handle both
99
+ let htmlUrl = entry.loc[0]
100
+ if (typeof htmlUrl === 'object' && htmlUrl._) {
101
+ htmlUrl = htmlUrl._
102
+ }
103
+
104
+ if (typeof htmlUrl !== 'string') {
105
+ logger.debug(`Skipping non-string URL: ${JSON.stringify(htmlUrl)}`)
106
+ continue
107
+ }
108
+
109
+ // Skip if it's already a markdown URL or special file
110
+ if (htmlUrl.endsWith('.md') || htmlUrl.endsWith('.txt') || htmlUrl.endsWith('.xml')) {
111
+ continue
112
+ }
113
+
114
+ // Convert HTML URL to markdown URL
115
+ const urlObj = new URL(htmlUrl)
116
+ const mdPath = toMarkdownUrl(urlObj.pathname)
117
+ const mdUrl = `${urlObj.origin}${mdPath}`
118
+
119
+ // Skip if this markdown URL already exists in the sitemap or was already added
120
+ if (existingLocs.has(mdUrl)) {
121
+ logger.debug(`Skipping duplicate markdown URL: ${mdUrl}`)
122
+ continue
123
+ }
124
+
125
+ // Create new entry for markdown URL with same lastmod
126
+ const mdEntry = {
127
+ loc: [mdUrl],
128
+ }
129
+
130
+ if (entry.lastmod && entry.lastmod[0]) {
131
+ mdEntry.lastmod = entry.lastmod
132
+ }
133
+
134
+ newEntries.push(mdEntry)
135
+ existingLocs.add(mdUrl) // Track this URL to prevent duplicates within this run
136
+ }
137
+
138
+ if (newEntries.length === 0) {
139
+ logger.debug(`No markdown URLs to add for ${sitemapFile.out.path}`)
140
+ return 0
141
+ }
142
+
143
+ // Add markdown entries to the sitemap
144
+ parsed.urlset.url.push(...newEntries)
145
+
146
+ // Rebuild XML with xml2js builder
147
+ const builder = new (require('xml2js')).Builder({
148
+ xmldec: { version: '1.0', encoding: 'UTF-8' },
149
+ xmlns: true,
150
+ renderOpts: {
151
+ pretty: true,
152
+ indent: ' ',
153
+ },
154
+ })
155
+
156
+ const newXml = builder.buildObject(parsed)
157
+
158
+ // Update the file contents
159
+ sitemapFile.contents = Buffer.from(newXml, 'utf8')
160
+
161
+ logger.debug(`Added ${newEntries.length} markdown URLs to ${sitemapFile.out.path}`)
162
+ return newEntries.length
163
+ } catch (error) {
164
+ logger.error(`Error processing ${sitemapFile.out.path}: ${error.message}`)
165
+ return 0
166
+ }
167
+ }
@@ -291,7 +291,7 @@ module.exports.register = function () {
291
291
  }
292
292
 
293
293
  // Generate navigation section with component sitemaps and key sections
294
- const navSection = generateNavigationSection(siteUrl);
294
+ const navSection = generateNavigationSection(siteUrl, contentCatalog, components);
295
295
 
296
296
  // Calculate available space for navigation section
297
297
  const availableSpace = MAX_LLMS_TXT_CHARS - llmsTxtContent.length - 2; // -2 for \n\n separator
@@ -592,45 +592,113 @@ function truncateAtNewline(content, maxLength) {
592
592
  * Generate a comprehensive navigation section for llms.txt
593
593
  * This improves llms-txt-freshness score by providing pathways to all documentation
594
594
  *
595
- * NOTE: The section URLs below are hardcoded. If pages are renamed, moved, or removed,
596
- * these links will 404. When restructuring documentation, update these URLs accordingly.
597
- * Future improvement: Generate these from the content catalog at build time.
595
+ * Dynamically generates navigation from the content catalog - no hardcoded URLs.
598
596
  *
599
597
  * @param {string} siteUrl - Base site URL
598
+ * @param {Object} contentCatalog - Antora content catalog
599
+ * @param {Array} components - Array of component objects
600
600
  * @returns {string} Markdown navigation section
601
601
  */
602
- function generateNavigationSection(siteUrl) {
602
+ function generateNavigationSection(siteUrl, contentCatalog, components) {
603
603
  let nav = `## Complete documentation index\n\n`;
604
604
  nav += `For comprehensive page listings, use the sitemaps:\n\n`;
605
605
  nav += `- [sitemap.md](${siteUrl}/sitemap.md) - Main sitemap index with all documentation\n`;
606
606
  nav += `- [sitemap-all.md](${siteUrl}/sitemap-all.md) - Combined listing of all documentation pages\n\n`;
607
607
 
608
+ // Generate component sitemaps dynamically
608
609
  nav += `### Component sitemaps\n\n`;
609
- nav += `- [Redpanda Self-Managed](${siteUrl}/sitemap-ROOT.md)\n`;
610
- nav += `- [Redpanda Cloud](${siteUrl}/sitemap-redpanda-cloud.md)\n`;
611
- nav += `- [Redpanda Connect](${siteUrl}/sitemap-redpanda-connect.md)\n`;
612
- nav += `- [Redpanda Labs](${siteUrl}/sitemap-redpanda-labs.md)\n`;
610
+ components.forEach(component => {
611
+ // Skip internal components like 'home'
612
+ if (component.name === 'home') return;
613
+ nav += `- [${component.title}](${siteUrl}/sitemap-${component.name}.md)\n`;
614
+ });
613
615
 
616
+ // Generate key sections dynamically from Antora's navigation structure
614
617
  nav += `\n### Key documentation sections\n\n`;
615
- nav += `**Self-Managed:**\n`;
616
- nav += `- [Deploy](${siteUrl}/current/deploy.md) - Installation and deployment guides\n`;
617
- nav += `- [Manage](${siteUrl}/current/manage.md) - Cluster operations and administration\n`;
618
- nav += `- [Develop](${siteUrl}/current/develop.md) - Application development guides\n`;
619
- nav += `- [Reference](${siteUrl}/current/reference.md) - Configuration, CLI, and API references\n`;
620
- nav += `- [Upgrade](${siteUrl}/current/upgrade.md) - Version upgrade procedures\n`;
621
- nav += `- [Troubleshoot](${siteUrl}/current/troubleshoot.md) - Debugging and issue resolution\n`;
622
-
623
- nav += `\n**Cloud:**\n`;
624
- nav += `- [Get Started](${siteUrl}/redpanda-cloud/get-started.md) - Cloud quickstart and cluster types\n`;
625
- nav += `- [Manage](${siteUrl}/redpanda-cloud/manage.md) - Cloud cluster management\n`;
626
- nav += `- [Networking](${siteUrl}/redpanda-cloud/networking.md) - Network configuration\n`;
627
- nav += `- [Security](${siteUrl}/redpanda-cloud/security.md) - Authentication and authorization\n`;
628
- nav += `- [AI Agents](${siteUrl}/redpanda-cloud/ai-agents.md) - Agentic Data Plane documentation\n`;
629
-
630
- nav += `\n**Connect:**\n`;
631
- nav += `- [Components](${siteUrl}/redpanda-connect/components.md) - All connectors, processors, and more\n`;
632
- nav += `- [Guides](${siteUrl}/redpanda-connect/guides.md) - Integration tutorials\n`;
633
- nav += `- [Configuration](${siteUrl}/redpanda-connect/configuration.md) - YAML configuration reference\n`;
618
+
619
+ components.forEach(component => {
620
+ // Skip internal/utility components
621
+ const internalComponents = ['home', 'shared', 'search', 'api'];
622
+ if (internalComponents.includes(component.name)) return;
623
+
624
+ const latest = component.latest || component.versions[0];
625
+ if (!latest) return;
626
+
627
+ // Get top-level navigation items from the component's navigation tree
628
+ // Falls back to pages for components without nav (like Labs)
629
+ const navItems = getTopLevelNavItems(contentCatalog, component, latest);
630
+
631
+ if (navItems.length === 0) return;
632
+
633
+ nav += `**${component.title}:**\n`;
634
+ navItems.forEach(item => {
635
+ if (item.url) {
636
+ const mdUrl = toMarkdownUrl(item.url);
637
+ nav += `- [${item.content}](${siteUrl}${mdUrl})\n`;
638
+ }
639
+ });
640
+ nav += `\n`;
641
+ });
634
642
 
635
643
  return nav;
636
644
  }
645
+
646
+ /**
647
+ * Get top-level navigation items from a component version.
648
+ * First tries the navigation tree (from nav.adoc), then falls back to pages.
649
+ *
650
+ * @param {Object} contentCatalog - Antora content catalog
651
+ * @param {Object} component - Component object
652
+ * @param {Object} componentVersion - Component version object
653
+ * @returns {Array} Array of {content, url} objects
654
+ */
655
+ function getTopLevelNavItems(contentCatalog, component, componentVersion) {
656
+ const navigation = componentVersion.navigation;
657
+
658
+ // If component has navigation, use it
659
+ if (navigation && Array.isArray(navigation) && navigation.length > 0) {
660
+ const topLevelItems = [];
661
+
662
+ navigation.forEach(navTree => {
663
+ if (!navTree.items || !Array.isArray(navTree.items)) return;
664
+
665
+ navTree.items.forEach(item => {
666
+ if (item.url) {
667
+ topLevelItems.push({
668
+ content: item.content || 'Untitled',
669
+ url: item.url,
670
+ });
671
+ } else if (item.content && item.items && item.items.length > 0) {
672
+ const firstChild = item.items[0];
673
+ if (firstChild && firstChild.url) {
674
+ topLevelItems.push({
675
+ content: item.content,
676
+ url: firstChild.url,
677
+ });
678
+ }
679
+ }
680
+ });
681
+ });
682
+
683
+ if (topLevelItems.length > 0) {
684
+ return topLevelItems.slice(0, 10);
685
+ }
686
+ }
687
+
688
+ // Fallback: get pages directly from content catalog (for components like Labs)
689
+ const pages = contentCatalog.findBy({
690
+ component: component.name,
691
+ version: componentVersion.version,
692
+ family: 'page',
693
+ });
694
+
695
+ // Return all pages with URLs, sorted by title
696
+ return pages
697
+ .filter(page => page.pub?.url)
698
+ .map(page => ({
699
+ content: page.asciidoc?.navtitle || page.asciidoc?.doctitle || page.src.stem,
700
+ url: page.pub.url,
701
+ }))
702
+ .sort((a, b) => a.content.localeCompare(b.content))
703
+ .slice(0, 10);
704
+ }
@@ -0,0 +1,228 @@
1
+ 'use strict'
2
+
3
+ /**
4
+ * Resolves AsciiDoc xrefs in JSON attachment files to HTML links.
5
+ *
6
+ * This extension automatically processes ALL JSON attachments across all
7
+ * components and converts xref syntax to HTML anchor tags using Antora's
8
+ * content catalog for proper URL resolution.
9
+ *
10
+ * No configuration needed - just add to playbook:
11
+ * antora:
12
+ * extensions:
13
+ * - require: ./extensions/resolve-xrefs-in-attachments.js
14
+ *
15
+ * Xref patterns handled:
16
+ * - xref:./relative-path.adoc#anchor[display text]
17
+ * - xref:module:path/to/file.adoc#anchor[display text]
18
+ * - xref:path.adoc[] (empty display uses page title or path)
19
+ */
20
+
21
+ module.exports.register = function () {
22
+ const logger = this.getLogger('resolve-xrefs-in-attachments')
23
+
24
+ this.on('contentClassified', ({ contentCatalog }) => {
25
+ // Process all JSON attachments across all components
26
+ const allAttachments = contentCatalog.findBy({ family: 'attachment' })
27
+ const jsonAttachments = allAttachments.filter((att) => {
28
+ const path = att.src?.path || att.out?.path || ''
29
+ return path.endsWith('.json')
30
+ })
31
+
32
+ if (!jsonAttachments.length) {
33
+ logger.debug('No JSON attachments found')
34
+ return
35
+ }
36
+
37
+ let processedCount = 0
38
+ let xrefCount = 0
39
+
40
+ jsonAttachments.forEach((attachment) => {
41
+ try {
42
+ const result = processJsonAttachment(attachment, contentCatalog, logger)
43
+ if (result.modified) {
44
+ processedCount++
45
+ xrefCount += result.xrefCount
46
+ }
47
+ } catch (err) {
48
+ logger.warn(`Error processing ${attachment.src?.path}: ${err.message}`)
49
+ }
50
+ })
51
+
52
+ if (processedCount > 0) {
53
+ logger.info(`Resolved ${xrefCount} xrefs in ${processedCount} JSON attachments`)
54
+ }
55
+ })
56
+ }
57
+
58
+ /**
59
+ * Process a JSON attachment, resolving xrefs in all string values.
60
+ * @returns {{ modified: boolean, xrefCount: number }}
61
+ */
62
+ function processJsonAttachment (attachment, contentCatalog, logger) {
63
+ const contentStr = attachment.contents.toString('utf8')
64
+
65
+ // Quick check - skip if no xrefs present
66
+ if (!contentStr.includes('xref:')) {
67
+ return { modified: false, xrefCount: 0 }
68
+ }
69
+
70
+ let data
71
+ try {
72
+ data = JSON.parse(contentStr)
73
+ } catch (err) {
74
+ logger.debug(`Skipping invalid JSON: ${attachment.src?.path}`)
75
+ return { modified: false, xrefCount: 0 }
76
+ }
77
+
78
+ // Create a context for xref resolution using the attachment's location
79
+ const context = {
80
+ component: attachment.src.component,
81
+ version: attachment.src.version,
82
+ module: attachment.src.module || 'ROOT',
83
+ xrefCount: 0,
84
+ }
85
+
86
+ // Recursively process all string values in the JSON
87
+ const processed = processValue(data, contentCatalog, context, logger)
88
+
89
+ // Write back the modified JSON
90
+ attachment.contents = Buffer.from(JSON.stringify(processed, null, 2), 'utf8')
91
+
92
+ return { modified: true, xrefCount: context.xrefCount }
93
+ }
94
+
95
+ /**
96
+ * Recursively process a value, resolving xrefs in strings.
97
+ */
98
+ function processValue (value, contentCatalog, context, logger) {
99
+ if (typeof value === 'string') {
100
+ return resolveXrefsInString(value, contentCatalog, context, logger)
101
+ }
102
+
103
+ if (Array.isArray(value)) {
104
+ return value.map((item) => processValue(item, contentCatalog, context, logger))
105
+ }
106
+
107
+ if (value && typeof value === 'object') {
108
+ const result = {}
109
+ for (const [key, val] of Object.entries(value)) {
110
+ result[key] = processValue(val, contentCatalog, context, logger)
111
+ }
112
+ return result
113
+ }
114
+
115
+ return value
116
+ }
117
+
118
+ /**
119
+ * Resolve xrefs in a string to HTML anchor tags.
120
+ *
121
+ * @param {string} text - Text containing xref macros
122
+ * @param {Object} contentCatalog - Antora content catalog
123
+ * @param {Object} context - Context with component, version, module, xrefCount
124
+ * @param {Object} logger - Logger instance
125
+ * @returns {string} Text with xrefs resolved to HTML links
126
+ */
127
+ function resolveXrefsInString (text, contentCatalog, context, logger) {
128
+ if (!text || !text.includes('xref:')) return text
129
+
130
+ // Match xref:target[link text] pattern - link text may be empty
131
+ const xrefPattern = /xref:([^\[]+)\[([^\]]*)\]/g
132
+
133
+ return text.replace(xrefPattern, (match, target, linkText) => {
134
+ context.xrefCount++
135
+
136
+ try {
137
+ // Handle anchor separately
138
+ let anchor = ''
139
+ let targetPath = target
140
+ if (target.includes('#')) {
141
+ const parts = target.split('#')
142
+ targetPath = parts[0]
143
+ anchor = parts[1] || ''
144
+ }
145
+
146
+ // Normalize target path for resolution
147
+ // - ./file.adoc → file.adoc (relative)
148
+ // - file.adoc → file.adoc (same dir)
149
+ // - module:path.adoc → already qualified
150
+ let normalizedTarget = targetPath.replace(/^\.\//, '')
151
+
152
+ // For relative paths without module prefix, try to find a page that matches
153
+ let resource = null
154
+
155
+ if (!normalizedTarget.includes(':')) {
156
+ // Try direct resolution first
157
+ resource = contentCatalog.resolveResource(normalizedTarget, {
158
+ component: context.component,
159
+ version: context.version,
160
+ module: context.module,
161
+ }, 'page')
162
+
163
+ // If not found and looks like a properties file, try reference module
164
+ if (!resource && (normalizedTarget.includes('properties') || normalizedTarget.includes('cluster-') || normalizedTarget.includes('topic-'))) {
165
+ // Try with explicit reference module path
166
+ resource = contentCatalog.resolveResource(`reference:properties/${normalizedTarget}`, {
167
+ component: context.component,
168
+ version: context.version,
169
+ module: 'ROOT',
170
+ }, 'page')
171
+ }
172
+
173
+ // Try finding the page directly by searching
174
+ if (!resource) {
175
+ const basename = normalizedTarget.replace(/\.adoc$/, '')
176
+ const pages = contentCatalog.findBy({
177
+ component: context.component,
178
+ version: context.version,
179
+ family: 'page',
180
+ })
181
+ resource = pages.find(p => {
182
+ const pageStem = p.src.stem || p.src.basename?.replace(/\.adoc$/, '')
183
+ return pageStem === basename
184
+ })
185
+ }
186
+ } else {
187
+ // Already module-qualified
188
+ resource = contentCatalog.resolveResource(normalizedTarget, {
189
+ component: context.component,
190
+ version: context.version,
191
+ module: 'ROOT',
192
+ }, 'page')
193
+ }
194
+
195
+ if (resource && resource.pub && resource.pub.url) {
196
+ let url = resource.pub.url
197
+ if (anchor) {
198
+ // Convert anchor: underscores to hyphens for URL
199
+ url += '#' + anchor.replace(/_/g, '-')
200
+ }
201
+
202
+ // Use link text if provided, otherwise use page title or target path
203
+ const display = linkText || resource.asciidoc?.doctitle || targetPath.replace(/\.adoc$/, '').replace(/^.*\//, '')
204
+
205
+ return `<a href="${escapeHtml(url)}">${escapeHtml(display)}</a>`
206
+ } else {
207
+ // Resource not found - log and keep original or use link text
208
+ logger.debug(`Could not resolve xref: ${target} from ${context.component}:${context.module}`)
209
+ return linkText || target.replace(/\.adoc$/, '').replace(/^.*\//, '')
210
+ }
211
+ } catch (error) {
212
+ logger.debug(`Xref resolution error for ${target}: ${error.message}`)
213
+ return linkText || target.replace(/\.adoc$/, '').replace(/^.*\//, '')
214
+ }
215
+ })
216
+ }
217
+
218
+ /**
219
+ * Escape HTML special characters to prevent XSS.
220
+ */
221
+ function escapeHtml (text) {
222
+ if (!text) return ''
223
+ return String(text)
224
+ .replace(/&/g, '&amp;')
225
+ .replace(/</g, '&lt;')
226
+ .replace(/>/g, '&gt;')
227
+ .replace(/"/g, '&quot;')
228
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@redpanda-data/docs-extensions-and-macros",
3
- "version": "4.15.10",
3
+ "version": "4.16.1",
4
4
  "description": "Antora extensions and macros developed for Redpanda documentation.",
5
5
  "keywords": [
6
6
  "antora",
@@ -62,6 +62,8 @@
62
62
  "./extensions/git-full-clone": "./extensions/git-full-clone.js",
63
63
  "./extensions/add-git-dates": "./extensions/add-git-dates.js",
64
64
  "./extensions/add-faq-structured-data": "./extensions/add-faq-structured-data.js",
65
+ "./extensions/add-llms-directive": "./extensions/add-llms-directive.js",
66
+ "./extensions/add-markdown-urls-to-sitemap": "./extensions/add-markdown-urls-to-sitemap.js",
65
67
  "./extensions/version-fetcher/set-latest-version": "./extensions/version-fetcher/set-latest-version.js",
66
68
  "./extensions/modify-connect-tag-playbook": "./extensions/modify-connect-tag-playbook.js",
67
69
  "./extensions/validate-attributes": "./extensions/validate-attributes.js",
@@ -73,6 +75,7 @@
73
75
  "./extensions/modify-redirects": "./extensions/produce-redirects.js",
74
76
  "./extensions/algolia-indexer/index": "./extensions/algolia-indexer/index.js",
75
77
  "./extensions/aggregate-terms": "./extensions/aggregate-terms.js",
78
+ "./extensions/resolve-xrefs-in-attachments": "./extensions/resolve-xrefs-in-attachments.js",
76
79
  "./macros/glossary": "./macros/glossary.js",
77
80
  "./macros/rp-connect-components": "./macros/rp-connect-components.js",
78
81
  "./macros/config-ref": "./macros/config-ref.js",
@@ -248,6 +248,55 @@ function createEntrypoint(tempDir, apiSurface) {
248
248
  return fragmentFiles;
249
249
  }
250
250
 
251
+ /**
252
+ * Wrap $ref siblings into allOf to preserve field-level descriptions.
253
+ *
254
+ * In OpenAPI 3.0, sibling properties next to $ref are ignored per spec.
255
+ * Some renderers (e.g. Bump.sh) follow this behavior, displaying the generic
256
+ * description from the referenced schema instead of field-level overrides.
257
+ * This function transforms { $ref, description, ... } into
258
+ * { allOf: [{ $ref }], description, ... } so renderers pick up field-level
259
+ * descriptions correctly.
260
+ *
261
+ * @param {*} node - Any value from the parsed OpenAPI spec.
262
+ * @returns {*} The transformed value (mutates in place for objects).
263
+ */
264
+ function wrapRefSiblings(node) {
265
+ if (node === null || typeof node !== 'object') {
266
+ return node;
267
+ }
268
+
269
+ if (Array.isArray(node)) {
270
+ node.forEach((item, i) => {
271
+ node[i] = wrapRefSiblings(item);
272
+ });
273
+ return node;
274
+ }
275
+
276
+ // Check if this object has $ref with sibling properties that need wrapping
277
+ if (node['$ref'] && typeof node['$ref'] === 'string') {
278
+ const keys = Object.keys(node);
279
+ const hasSiblings = keys.length > 1;
280
+
281
+ if (hasSiblings) {
282
+ // Skip if allOf already exists — assumes a pre-existing structure
283
+ // from the source spec that should not be modified.
284
+ if (!node.allOf) {
285
+ const ref = node['$ref'];
286
+ delete node['$ref'];
287
+ node.allOf = [{ '$ref': ref }];
288
+ }
289
+ }
290
+ }
291
+
292
+ // Recurse into all object values
293
+ for (const key of Object.keys(node)) {
294
+ node[key] = wrapRefSiblings(node[key]);
295
+ }
296
+
297
+ return node;
298
+ }
299
+
251
300
  /**
252
301
  * Bundle one or more OpenAPI fragment files into a single bundled YAML using a selected external bundler.
253
302
  *
@@ -529,6 +578,9 @@ function postProcessBundle(filePath, options, quiet = false) {
529
578
  bundle.info['x-generated-at'] = new Date().toISOString();
530
579
  bundle.info['x-generator'] = 'redpanda-docs-openapi-bundler';
531
580
 
581
+ // Wrap $ref siblings into allOf so renderers display field descriptions
582
+ bundle = wrapRefSiblings(bundle);
583
+
532
584
  // Sort keys for deterministic output
533
585
  const sortedBundle = sortObjectKeys(bundle);
534
586
 
@@ -776,6 +828,7 @@ module.exports = {
776
828
  normalizeTag,
777
829
  getMajorMinor,
778
830
  sortObjectKeys,
831
+ wrapRefSiblings,
779
832
  detectBundler,
780
833
  createEntrypoint,
781
834
  postProcessBundle
@@ -9,12 +9,13 @@ function bloblangExample(example) {
9
9
  let summary = example.summary.trim();
10
10
 
11
11
  // Convert Markdown headings to AsciiDoc
12
- // ##### Heading -> ==== Heading (H5 -> H4 in AsciiDoc)
13
- summary = summary.replace(/^#####\s+(.+)$/gm, '==== $1');
14
- // #### Heading -> === Heading (H4 -> H3 in AsciiDoc)
15
- summary = summary.replace(/^####\s+(.+)$/gm, '=== $1');
16
- // ### Heading -> == Heading (H3 -> H2 in AsciiDoc)
17
- summary = summary.replace(/^###\s+(.+)$/gm, '== $1');
12
+ // ##### Heading -> === Heading (H5 -> H3 in AsciiDoc)
13
+ // H3 fits correctly under the H2 "Examples" section
14
+ summary = summary.replace(/^#####\s+(.+)$/gm, '=== $1');
15
+ // #### Heading -> == Heading (H4 -> H2 in AsciiDoc)
16
+ summary = summary.replace(/^####\s+(.+)$/gm, '== $1');
17
+ // ### Heading -> = Heading (H3 -> H1 in AsciiDoc)
18
+ summary = summary.replace(/^###\s+(.+)$/gm, '= $1');
18
19
 
19
20
  // Ensure lead-in ends with a colon (replace period/exclamation/question mark if present)
20
21
  if (summary.endsWith('.') || summary.endsWith('!') || summary.endsWith('?')) {
@@ -983,45 +983,6 @@ async function handleRpcnConnectorDocs (options) {
983
983
  console.log(' Skipping diff generation, but will run binary analysis.\n')
984
984
  }
985
985
 
986
- // Publish merged version
987
- if (options.overrides && fs.existsSync(options.overrides)) {
988
- try {
989
- const { mergeOverrides, resolveReferences } = require('./generate-rpcn-connector-docs.js')
990
-
991
- const mergedData = JSON.parse(JSON.stringify(newIndex))
992
- const ovRaw = fs.readFileSync(options.overrides, 'utf8')
993
- const ovObj = JSON.parse(ovRaw)
994
- const resolvedOverrides = resolveReferences(ovObj, ovObj)
995
- mergeOverrides(mergedData, resolvedOverrides)
996
-
997
- const attachmentsRoot = path.resolve(process.cwd(), 'modules/components/attachments')
998
- fs.mkdirSync(attachmentsRoot, { recursive: true })
999
-
1000
- const existingVersions = fs.readdirSync(attachmentsRoot)
1001
- .filter(f => /^connect-(\d+\.\d+\.\d+(?:-[0-9A-Za-z-.]+)?)\.json$/.test(f))
1002
- .map(f => {
1003
- const match = f.match(/^connect-(\d+\.\d+\.\d+(?:-[0-9A-Za-z-.]+)?)\.json$/)
1004
- return match ? match[1] : null
1005
- })
1006
- .filter(v => v && semver.valid(v))
1007
-
1008
- const sortedVersions = semver.sort(existingVersions) // ascending order
1009
-
1010
- for (const version of sortedVersions) {
1011
- const oldFile = `connect-${version}.json`
1012
- const oldFilePath = path.join(attachmentsRoot, oldFile)
1013
- fs.unlinkSync(oldFilePath)
1014
- console.log(`🧹 Deleted old version: ${oldFile}`)
1015
- }
1016
-
1017
- const destFile = path.join(attachmentsRoot, `connect-${newVersion}.json`)
1018
- fs.writeFileSync(destFile, JSON.stringify(mergedData, null, 2), 'utf8')
1019
- console.log(`Done: Published merged version to: ${path.relative(process.cwd(), destFile)}`)
1020
- } catch (err) {
1021
- console.error(`Error: Failed to publish merged version: ${err.message}`)
1022
- }
1023
- }
1024
-
1025
986
  // Binary analysis
1026
987
  let oldBinaryAnalysis = null
1027
988
 
@@ -1248,6 +1209,49 @@ async function handleRpcnConnectorDocs (options) {
1248
1209
  }
1249
1210
  }
1250
1211
 
1212
+ // Publish merged version to attachments
1213
+ // IMPORTANT: This must run AFTER binary analysis and augmentation to include CGO-only connectors
1214
+ if (options.overrides && fs.existsSync(options.overrides)) {
1215
+ try {
1216
+ const { mergeOverrides, resolveReferences } = require('./generate-rpcn-connector-docs.js')
1217
+
1218
+ // Use the augmented newIndex which now includes CGO-only and cloud-only connectors
1219
+ const mergedData = JSON.parse(JSON.stringify(newIndex))
1220
+
1221
+ const ovRaw = fs.readFileSync(options.overrides, 'utf8')
1222
+ const ovObj = JSON.parse(ovRaw)
1223
+ const resolvedOverrides = resolveReferences(ovObj, ovObj)
1224
+
1225
+ mergeOverrides(mergedData, resolvedOverrides)
1226
+
1227
+ const attachmentsRoot = path.resolve(process.cwd(), 'modules/components/attachments')
1228
+ fs.mkdirSync(attachmentsRoot, { recursive: true })
1229
+
1230
+ const existingVersions = fs.readdirSync(attachmentsRoot)
1231
+ .filter(f => /^connect-(\d+\.\d+\.\d+(?:-[0-9A-Za-z-.]+)?)\.json$/.test(f))
1232
+ .map(f => {
1233
+ const match = f.match(/^connect-(\d+\.\d+\.\d+(?:-[0-9A-Za-z-.]+)?)\.json$/)
1234
+ return match ? match[1] : null
1235
+ })
1236
+ .filter(v => v && semver.valid(v))
1237
+
1238
+ const sortedVersions = semver.sort(existingVersions) // ascending order
1239
+
1240
+ for (const version of sortedVersions) {
1241
+ const oldFile = `connect-${version}.json`
1242
+ const oldFilePath = path.join(attachmentsRoot, oldFile)
1243
+ fs.unlinkSync(oldFilePath)
1244
+ console.log(`🧹 Deleted old version from attachments: ${oldFile}`)
1245
+ }
1246
+
1247
+ const destFile = path.join(attachmentsRoot, `connect-${newVersion}.json`)
1248
+ fs.writeFileSync(destFile, JSON.stringify(mergedData, null, 2), 'utf8')
1249
+ console.log(`✓ Published merged version to: ${path.relative(process.cwd(), destFile)}`)
1250
+ } catch (err) {
1251
+ console.error(`Error: Failed to publish merged version: ${err.message}`)
1252
+ }
1253
+ }
1254
+
1251
1255
  // Generate diff JSON
1252
1256
  let diffJson = null
1253
1257
  if (!oldVersion) {