npm - @redpanda-data/docs-extensions-and-macros - Versions diffs - 4.15.10 → 4.16.1 - Mend

@redpanda-data/docs-extensions-and-macros 4.15.10 → 4.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/extension-utils/llms-utils.js +18 -3
package/extensions/README.adoc +7 -0
package/extensions/add-llms-directive.js +82 -0
package/extensions/add-markdown-urls-to-sitemap.js +167 -0
package/extensions/convert-llms-to-txt.js +96 -28
package/extensions/resolve-xrefs-in-attachments.js +228 -0
package/package.json +4 -1
package/tools/bundle-openapi.js +53 -0
package/tools/redpanda-connect/helpers/bloblangExample.js +7 -6
package/tools/redpanda-connect/rpcn-connector-docs-handler.js +43 -39

package/extension-utils/llms-utils.js CHANGED Viewed

@@ -23,16 +23,31 @@ function formatLlmsDirective(componentName) {
   return `> ${LLMS_DIRECTIVE_BASE}`;
 }
+/**
+ * Helper to escape regex metacharacters in a string.
+ * @param {string} str - String to escape
+ * @returns {string} Escaped string safe for use in RegExp
+ */
+function escapeRegExp(str) {
+  return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+}
 /**
  * Regex pattern to match and strip the llms directive from markdown content.
+ * Dynamically derived from LLMS_DIRECTIVE_BASE to stay in sync.
  * Matches the blockquote format with optional component-specific suffix.
  */
-const LLMS_DIRECTIVE_REGEX = /^> For the complete documentation index, see \[llms\.txt\].*$/gm;
+const LLMS_DIRECTIVE_REGEX = new RegExp(
+  `^> ${escapeRegExp(LLMS_DIRECTIVE_BASE)}.*$`,
+  'gm'
+);
 /**
- * Regex pattern to match and strip HTML source comments from markdown content.
+ * Regex pattern to match and strip only injected metadata HTML comments from markdown content.
+ * Only matches comments that start with known markers: "Source:" or "Note for AI:"
+ * This preserves any user-authored HTML comments in the markdown.
  */
-const SOURCE_COMMENT_REGEX = /^<!--[\s\S]*?-->\s*/gm;
+const SOURCE_COMMENT_REGEX = /^<!--\s*(?:Source:|Note for AI:)[\s\S]*?-->\s*/gm;
 /**
  * Strip metadata added by convert-to-markdown extension from page content.

package/extensions/README.adoc CHANGED Viewed

@@ -87,6 +87,13 @@ IMPORTANT: Extensions must be registered under the `antora.extensions` key in yo
 * **process-context-switcher** - Handle context-dependent content
 * **validate-attributes** - Validate AsciiDoc attributes
+=== AI-friendly documentation
+* **convert-to-markdown** - Export documentation pages as markdown with frontmatter
+* **convert-llms-to-txt** - Generate llms.txt and component-specific exports for AI agents
+* **add-llms-directive** - Inject llms.txt discovery directive into HTML pages
+* **add-markdown-urls-to-sitemap** - Add markdown URL entries to sitemap.xml
+* **convert-sitemap-to-markdown** - Generate markdown sitemap indexes
 See link:REFERENCE.adoc[Reference documentation] for complete details on each extension.
 == Common use cases

package/extensions/add-llms-directive.js ADDED Viewed

@@ -0,0 +1,82 @@
+'use strict'
+/**
+ * Adds llms.txt directive to HTML pages for agent-friendly documentation.
+ *
+ * This extension injects a blockquote directive pointing to /llms.txt immediately
+ * after the <body> tag of each documentation page. This helps AI agents discover
+ * the documentation index according to the Agent-Friendly Docs spec.
+ *
+ * The directive is styled to be visually hidden but remains in the HTML for agents
+ * to discover when they fetch pages.
+ *
+ * @see https://agentdocsspec.com/spec/#llms-txt-directive
+ */
+const { formatLlmsDirective } = require('../extension-utils/llms-utils')
+module.exports.register = function () {
+  const logger = this.getLogger('add-llms-directive-extension')
+  this.on('pagesComposed', ({ contentCatalog }) => {
+    const pages = contentCatalog.getPages()
+    let processedCount = 0
+    pages.forEach(page => {
+      if (!page.contents) return
+      try {
+        const html = page.contents.toString('utf8')
+        // Find the <body> tag and inject directive immediately after it
+        // This ensures the directive appears early in the HTML for better agent discovery
+        const bodyMatch = html.match(/(<body[^>]*>)/i)
+        if (!bodyMatch) {
+          logger.debug(`No <body> tag found in ${page.src?.path}`)
+          return
+        }
+        const bodyTag = bodyMatch[1]
+        // Get component name for component-specific link
+        const componentName = page.src?.component || ''
+        // Generate the directive in markdown blockquote format
+        const directiveMarkdown = formatLlmsDirective(componentName)
+        // Convert markdown blockquote to HTML blockquote
+        // Remove leading '> ' and convert markdown links to HTML
+        let directiveText = directiveMarkdown.replace(/^>\s*/, '')
+        // Convert markdown links [text](url) to HTML <a> tags
+        // Add tabindex="-1" and aria-hidden="true" to remove from tab order and hide from assistive tech
+        directiveText = directiveText.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2" tabindex="-1" aria-hidden="true">$1</a>')
+        // Add tabindex="-1" and aria-hidden="true" to blockquote to fully hide from assistive tech
+        const directiveHtml = `\n<blockquote class="llms-directive" tabindex="-1" aria-hidden="true">\n<p>${directiveText}</p>\n</blockquote>\n`
+        // Inject the directive immediately after the <body> tag
+        let newHtml = html.replace(bodyTag, bodyTag + directiveHtml)
+        // Add CSS to hide the directive visually (screen-reader-only pattern)
+        // This keeps it in HTML for agents but hidden from visual users
+        const cssTag = `<style>.llms-directive{position:absolute;left:-10000px;width:1px;height:1px;overflow:hidden}</style>`
+        // Inject CSS before </head> if not already present
+        if (!html.includes('.llms-directive{')) {
+          newHtml = newHtml.replace('</head>', `${cssTag}\n</head>`)
+        }
+        // Update page contents
+        page.contents = Buffer.from(newHtml, 'utf8')
+        processedCount++
+      } catch (err) {
+        logger.error(`Error adding llms directive to ${page.src?.path}: ${err.message}`)
+      }
+    })
+    logger.info(`Added llms.txt directive to ${processedCount} HTML pages`)
+  })
+}

package/extensions/add-markdown-urls-to-sitemap.js ADDED Viewed

@@ -0,0 +1,167 @@
+'use strict'
+/**
+ * Adds markdown URL entries to sitemap.xml files for AI-friendly documentation.
+ *
+ * This extension enhances Antora's generated sitemaps by adding <url> entries
+ * for markdown versions of pages alongside the HTML versions. This improves
+ * compatibility with agent-friendly documentation tools that expect markdown
+ * URLs to be discoverable in sitemaps.
+ *
+ * The extension:
+ * - Finds all sitemap XML files in the site catalog
+ * - For each HTML URL entry, adds a corresponding .md URL entry
+ * - Preserves lastmod dates from the HTML versions
+ * - Works with both main sitemaps and component-specific sitemaps
+ *
+ * @see https://agentdocsspec.com/spec/#llms-txt-freshness
+ */
+const { parseStringPromise } = require('xml2js')
+const { toMarkdownUrl } = require('../extension-utils/url-utils')
+module.exports.register = function () {
+  const logger = this.getLogger('add-markdown-urls-to-sitemap-extension')
+  this.on('beforePublish', async ({ siteCatalog }) => {
+    try {
+      // Find all sitemap XML files
+      // Includes both sitemap.xml (single component) and sitemap-*.xml (multiple components)
+      const sitemapFiles = siteCatalog.getFiles().filter(file => {
+        const path = file.out.path
+        // Include sitemap.xml OR sitemap-*.xml (but not the sitemap index which would be handled separately)
+        return path.endsWith('.xml') && (path === 'sitemap.xml' || path.startsWith('sitemap-'))
+      })
+      if (sitemapFiles.length === 0) {
+        logger.info('No component sitemap files found')
+        return
+      }
+      logger.info(`Processing ${sitemapFiles.length} sitemap file(s)...`)
+      let totalAdded = 0
+      for (const sitemapFile of sitemapFiles) {
+        const added = await addMarkdownUrlsToSitemap(sitemapFile, logger)
+        totalAdded += added
+      }
+      logger.info(`Added ${totalAdded} markdown URL entries across ${sitemapFiles.length} sitemap(s)`)
+    } catch (error) {
+      logger.error(`Failed to add markdown URLs to sitemaps: ${error.message}`)
+      // Don't throw - sitemap enhancement is not critical
+    }
+  })
+}
+/**
+ * Add markdown URL entries to a single sitemap file
+ * @param {Object} sitemapFile - The sitemap file from site catalog
+ * @param {Object} logger - Logger instance
+ * @returns {number} Number of markdown URLs added
+ */
+async function addMarkdownUrlsToSitemap(sitemapFile, logger) {
+  try {
+    const xmlContent = sitemapFile.contents.toString('utf8')
+    const parsed = await parseStringPromise(xmlContent, {
+      explicitArray: true,
+      xmlns: false,  // Don't create namespace objects
+      tagNameProcessors: [],  // Keep tag names as-is
+    })
+    if (!parsed || !parsed.urlset || !parsed.urlset.url) {
+      logger.debug(`No URLs found in ${sitemapFile.out.path}`)
+      return 0
+    }
+    const urlEntries = parsed.urlset.url
+    const newEntries = []
+    // Collect all existing loc strings to prevent duplicates
+    const existingLocs = new Set()
+    for (const entry of urlEntries) {
+      if (entry.loc && entry.loc[0]) {
+        let url = entry.loc[0]
+        if (typeof url === 'object' && url._) {
+          url = url._
+        }
+        if (typeof url === 'string') {
+          existingLocs.add(url)
+        }
+      }
+    }
+    // For each HTML URL, create a markdown URL entry
+    for (const entry of urlEntries) {
+      if (!entry.loc || !entry.loc[0]) continue
+      // xml2js might parse loc as object or string, handle both
+      let htmlUrl = entry.loc[0]
+      if (typeof htmlUrl === 'object' && htmlUrl._) {
+        htmlUrl = htmlUrl._
+      }
+      if (typeof htmlUrl !== 'string') {
+        logger.debug(`Skipping non-string URL: ${JSON.stringify(htmlUrl)}`)
+        continue
+      }
+      // Skip if it's already a markdown URL or special file
+      if (htmlUrl.endsWith('.md') || htmlUrl.endsWith('.txt') || htmlUrl.endsWith('.xml')) {
+        continue
+      }
+      // Convert HTML URL to markdown URL
+      const urlObj = new URL(htmlUrl)
+      const mdPath = toMarkdownUrl(urlObj.pathname)
+      const mdUrl = `${urlObj.origin}${mdPath}`
+      // Skip if this markdown URL already exists in the sitemap or was already added
+      if (existingLocs.has(mdUrl)) {
+        logger.debug(`Skipping duplicate markdown URL: ${mdUrl}`)
+        continue
+      }
+      // Create new entry for markdown URL with same lastmod
+      const mdEntry = {
+        loc: [mdUrl],
+      }
+      if (entry.lastmod && entry.lastmod[0]) {
+        mdEntry.lastmod = entry.lastmod
+      }
+      newEntries.push(mdEntry)
+      existingLocs.add(mdUrl) // Track this URL to prevent duplicates within this run
+    }
+    if (newEntries.length === 0) {
+      logger.debug(`No markdown URLs to add for ${sitemapFile.out.path}`)
+      return 0
+    }
+    // Add markdown entries to the sitemap
+    parsed.urlset.url.push(...newEntries)
+    // Rebuild XML with xml2js builder
+    const builder = new (require('xml2js')).Builder({
+      xmldec: { version: '1.0', encoding: 'UTF-8' },
+      xmlns: true,
+      renderOpts: {
+        pretty: true,
+        indent: '  ',
+      },
+    })
+    const newXml = builder.buildObject(parsed)
+    // Update the file contents
+    sitemapFile.contents = Buffer.from(newXml, 'utf8')
+    logger.debug(`Added ${newEntries.length} markdown URLs to ${sitemapFile.out.path}`)
+    return newEntries.length
+  } catch (error) {
+    logger.error(`Error processing ${sitemapFile.out.path}: ${error.message}`)
+    return 0
+  }
+}

package/extensions/convert-llms-to-txt.js CHANGED Viewed

@@ -291,7 +291,7 @@ module.exports.register = function () {
       }
       // Generate navigation section with component sitemaps and key sections
-      const navSection = generateNavigationSection(siteUrl);
+      const navSection = generateNavigationSection(siteUrl, contentCatalog, components);
       // Calculate available space for navigation section
       const availableSpace = MAX_LLMS_TXT_CHARS - llmsTxtContent.length - 2; // -2 for \n\n separator
@@ -592,45 +592,113 @@ function truncateAtNewline(content, maxLength) {
  * Generate a comprehensive navigation section for llms.txt
  * This improves llms-txt-freshness score by providing pathways to all documentation
  *
- * NOTE: The section URLs below are hardcoded. If pages are renamed, moved, or removed,
- * these links will 404. When restructuring documentation, update these URLs accordingly.
- * Future improvement: Generate these from the content catalog at build time.
+ * Dynamically generates navigation from the content catalog - no hardcoded URLs.
  *
  * @param {string} siteUrl - Base site URL
+ * @param {Object} contentCatalog - Antora content catalog
+ * @param {Array} components - Array of component objects
  * @returns {string} Markdown navigation section
  */
-function generateNavigationSection(siteUrl) {
+function generateNavigationSection(siteUrl, contentCatalog, components) {
   let nav = `## Complete documentation index\n\n`;
   nav += `For comprehensive page listings, use the sitemaps:\n\n`;
   nav += `- [sitemap.md](${siteUrl}/sitemap.md) - Main sitemap index with all documentation\n`;
   nav += `- [sitemap-all.md](${siteUrl}/sitemap-all.md) - Combined listing of all documentation pages\n\n`;
+  // Generate component sitemaps dynamically
   nav += `### Component sitemaps\n\n`;
-  nav += `- [Redpanda Self-Managed](${siteUrl}/sitemap-ROOT.md)\n`;
-  nav += `- [Redpanda Cloud](${siteUrl}/sitemap-redpanda-cloud.md)\n`;
-  nav += `- [Redpanda Connect](${siteUrl}/sitemap-redpanda-connect.md)\n`;
-  nav += `- [Redpanda Labs](${siteUrl}/sitemap-redpanda-labs.md)\n`;
+  components.forEach(component => {
+    // Skip internal components like 'home'
+    if (component.name === 'home') return;
+    nav += `- [${component.title}](${siteUrl}/sitemap-${component.name}.md)\n`;
+  });
+  // Generate key sections dynamically from Antora's navigation structure
   nav += `\n### Key documentation sections\n\n`;
-  nav += `**Self-Managed:**\n`;
-  nav += `- [Deploy](${siteUrl}/current/deploy.md) - Installation and deployment guides\n`;
-  nav += `- [Manage](${siteUrl}/current/manage.md) - Cluster operations and administration\n`;
-  nav += `- [Develop](${siteUrl}/current/develop.md) - Application development guides\n`;
-  nav += `- [Reference](${siteUrl}/current/reference.md) - Configuration, CLI, and API references\n`;
-  nav += `- [Upgrade](${siteUrl}/current/upgrade.md) - Version upgrade procedures\n`;
-  nav += `- [Troubleshoot](${siteUrl}/current/troubleshoot.md) - Debugging and issue resolution\n`;
-  nav += `\n**Cloud:**\n`;
-  nav += `- [Get Started](${siteUrl}/redpanda-cloud/get-started.md) - Cloud quickstart and cluster types\n`;
-  nav += `- [Manage](${siteUrl}/redpanda-cloud/manage.md) - Cloud cluster management\n`;
-  nav += `- [Networking](${siteUrl}/redpanda-cloud/networking.md) - Network configuration\n`;
-  nav += `- [Security](${siteUrl}/redpanda-cloud/security.md) - Authentication and authorization\n`;
-  nav += `- [AI Agents](${siteUrl}/redpanda-cloud/ai-agents.md) - Agentic Data Plane documentation\n`;
-  nav += `\n**Connect:**\n`;
-  nav += `- [Components](${siteUrl}/redpanda-connect/components.md) - All connectors, processors, and more\n`;
-  nav += `- [Guides](${siteUrl}/redpanda-connect/guides.md) - Integration tutorials\n`;
-  nav += `- [Configuration](${siteUrl}/redpanda-connect/configuration.md) - YAML configuration reference\n`;
+  components.forEach(component => {
+    // Skip internal/utility components
+    const internalComponents = ['home', 'shared', 'search', 'api'];
+    if (internalComponents.includes(component.name)) return;
+    const latest = component.latest || component.versions[0];
+    if (!latest) return;
+    // Get top-level navigation items from the component's navigation tree
+    // Falls back to pages for components without nav (like Labs)
+    const navItems = getTopLevelNavItems(contentCatalog, component, latest);
+    if (navItems.length === 0) return;
+    nav += `**${component.title}:**\n`;
+    navItems.forEach(item => {
+      if (item.url) {
+        const mdUrl = toMarkdownUrl(item.url);
+        nav += `- [${item.content}](${siteUrl}${mdUrl})\n`;
+      }
+    });
+    nav += `\n`;
+  });
   return nav;
 }
+/**
+ * Get top-level navigation items from a component version.
+ * First tries the navigation tree (from nav.adoc), then falls back to pages.
+ *
+ * @param {Object} contentCatalog - Antora content catalog
+ * @param {Object} component - Component object
+ * @param {Object} componentVersion - Component version object
+ * @returns {Array} Array of {content, url} objects
+ */
+function getTopLevelNavItems(contentCatalog, component, componentVersion) {
+  const navigation = componentVersion.navigation;
+  // If component has navigation, use it
+  if (navigation && Array.isArray(navigation) && navigation.length > 0) {
+    const topLevelItems = [];
+    navigation.forEach(navTree => {
+      if (!navTree.items || !Array.isArray(navTree.items)) return;
+      navTree.items.forEach(item => {
+        if (item.url) {
+          topLevelItems.push({
+            content: item.content || 'Untitled',
+            url: item.url,
+          });
+        } else if (item.content && item.items && item.items.length > 0) {
+          const firstChild = item.items[0];
+          if (firstChild && firstChild.url) {
+            topLevelItems.push({
+              content: item.content,
+              url: firstChild.url,
+            });
+          }
+        }
+      });
+    });
+    if (topLevelItems.length > 0) {
+      return topLevelItems.slice(0, 10);
+    }
+  }
+  // Fallback: get pages directly from content catalog (for components like Labs)
+  const pages = contentCatalog.findBy({
+    component: component.name,
+    version: componentVersion.version,
+    family: 'page',
+  });
+  // Return all pages with URLs, sorted by title
+  return pages
+    .filter(page => page.pub?.url)
+    .map(page => ({
+      content: page.asciidoc?.navtitle || page.asciidoc?.doctitle || page.src.stem,
+      url: page.pub.url,
+    }))
+    .sort((a, b) => a.content.localeCompare(b.content))
+    .slice(0, 10);
+}

package/extensions/resolve-xrefs-in-attachments.js ADDED Viewed

@@ -0,0 +1,228 @@
+'use strict'
+/**
+ * Resolves AsciiDoc xrefs in JSON attachment files to HTML links.
+ *
+ * This extension automatically processes ALL JSON attachments across all
+ * components and converts xref syntax to HTML anchor tags using Antora's
+ * content catalog for proper URL resolution.
+ *
+ * No configuration needed - just add to playbook:
+ *   antora:
+ *     extensions:
+ *       - require: ./extensions/resolve-xrefs-in-attachments.js
+ *
+ * Xref patterns handled:
+ * - xref:./relative-path.adoc#anchor[display text]
+ * - xref:module:path/to/file.adoc#anchor[display text]
+ * - xref:path.adoc[] (empty display uses page title or path)
+ */
+module.exports.register = function () {
+  const logger = this.getLogger('resolve-xrefs-in-attachments')
+  this.on('contentClassified', ({ contentCatalog }) => {
+    // Process all JSON attachments across all components
+    const allAttachments = contentCatalog.findBy({ family: 'attachment' })
+    const jsonAttachments = allAttachments.filter((att) => {
+      const path = att.src?.path || att.out?.path || ''
+      return path.endsWith('.json')
+    })
+    if (!jsonAttachments.length) {
+      logger.debug('No JSON attachments found')
+      return
+    }
+    let processedCount = 0
+    let xrefCount = 0
+    jsonAttachments.forEach((attachment) => {
+      try {
+        const result = processJsonAttachment(attachment, contentCatalog, logger)
+        if (result.modified) {
+          processedCount++
+          xrefCount += result.xrefCount
+        }
+      } catch (err) {
+        logger.warn(`Error processing ${attachment.src?.path}: ${err.message}`)
+      }
+    })
+    if (processedCount > 0) {
+      logger.info(`Resolved ${xrefCount} xrefs in ${processedCount} JSON attachments`)
+    }
+  })
+}
+/**
+ * Process a JSON attachment, resolving xrefs in all string values.
+ * @returns {{ modified: boolean, xrefCount: number }}
+ */
+function processJsonAttachment (attachment, contentCatalog, logger) {
+  const contentStr = attachment.contents.toString('utf8')
+  // Quick check - skip if no xrefs present
+  if (!contentStr.includes('xref:')) {
+    return { modified: false, xrefCount: 0 }
+  }
+  let data
+  try {
+    data = JSON.parse(contentStr)
+  } catch (err) {
+    logger.debug(`Skipping invalid JSON: ${attachment.src?.path}`)
+    return { modified: false, xrefCount: 0 }
+  }
+  // Create a context for xref resolution using the attachment's location
+  const context = {
+    component: attachment.src.component,
+    version: attachment.src.version,
+    module: attachment.src.module || 'ROOT',
+    xrefCount: 0,
+  }
+  // Recursively process all string values in the JSON
+  const processed = processValue(data, contentCatalog, context, logger)
+  // Write back the modified JSON
+  attachment.contents = Buffer.from(JSON.stringify(processed, null, 2), 'utf8')
+  return { modified: true, xrefCount: context.xrefCount }
+}
+/**
+ * Recursively process a value, resolving xrefs in strings.
+ */
+function processValue (value, contentCatalog, context, logger) {
+  if (typeof value === 'string') {
+    return resolveXrefsInString(value, contentCatalog, context, logger)
+  }
+  if (Array.isArray(value)) {
+    return value.map((item) => processValue(item, contentCatalog, context, logger))
+  }
+  if (value && typeof value === 'object') {
+    const result = {}
+    for (const [key, val] of Object.entries(value)) {
+      result[key] = processValue(val, contentCatalog, context, logger)
+    }
+    return result
+  }
+  return value
+}
+/**
+ * Resolve xrefs in a string to HTML anchor tags.
+ *
+ * @param {string} text - Text containing xref macros
+ * @param {Object} contentCatalog - Antora content catalog
+ * @param {Object} context - Context with component, version, module, xrefCount
+ * @param {Object} logger - Logger instance
+ * @returns {string} Text with xrefs resolved to HTML links
+ */
+function resolveXrefsInString (text, contentCatalog, context, logger) {
+  if (!text || !text.includes('xref:')) return text
+  // Match xref:target[link text] pattern - link text may be empty
+  const xrefPattern = /xref:([^\[]+)\[([^\]]*)\]/g
+  return text.replace(xrefPattern, (match, target, linkText) => {
+    context.xrefCount++
+    try {
+      // Handle anchor separately
+      let anchor = ''
+      let targetPath = target
+      if (target.includes('#')) {
+        const parts = target.split('#')
+        targetPath = parts[0]
+        anchor = parts[1] || ''
+      }
+      // Normalize target path for resolution
+      // - ./file.adoc → file.adoc (relative)
+      // - file.adoc → file.adoc (same dir)
+      // - module:path.adoc → already qualified
+      let normalizedTarget = targetPath.replace(/^\.\//, '')
+      // For relative paths without module prefix, try to find a page that matches
+      let resource = null
+      if (!normalizedTarget.includes(':')) {
+        // Try direct resolution first
+        resource = contentCatalog.resolveResource(normalizedTarget, {
+          component: context.component,
+          version: context.version,
+          module: context.module,
+        }, 'page')
+        // If not found and looks like a properties file, try reference module
+        if (!resource && (normalizedTarget.includes('properties') || normalizedTarget.includes('cluster-') || normalizedTarget.includes('topic-'))) {
+          // Try with explicit reference module path
+          resource = contentCatalog.resolveResource(`reference:properties/${normalizedTarget}`, {
+            component: context.component,
+            version: context.version,
+            module: 'ROOT',
+          }, 'page')
+        }
+        // Try finding the page directly by searching
+        if (!resource) {
+          const basename = normalizedTarget.replace(/\.adoc$/, '')
+          const pages = contentCatalog.findBy({
+            component: context.component,
+            version: context.version,
+            family: 'page',
+          })
+          resource = pages.find(p => {
+            const pageStem = p.src.stem || p.src.basename?.replace(/\.adoc$/, '')
+            return pageStem === basename
+          })
+        }
+      } else {
+        // Already module-qualified
+        resource = contentCatalog.resolveResource(normalizedTarget, {
+          component: context.component,
+          version: context.version,
+          module: 'ROOT',
+        }, 'page')
+      }
+      if (resource && resource.pub && resource.pub.url) {
+        let url = resource.pub.url
+        if (anchor) {
+          // Convert anchor: underscores to hyphens for URL
+          url += '#' + anchor.replace(/_/g, '-')
+        }
+        // Use link text if provided, otherwise use page title or target path
+        const display = linkText || resource.asciidoc?.doctitle || targetPath.replace(/\.adoc$/, '').replace(/^.*\//, '')
+        return `<a href="${escapeHtml(url)}">${escapeHtml(display)}</a>`
+      } else {
+        // Resource not found - log and keep original or use link text
+        logger.debug(`Could not resolve xref: ${target} from ${context.component}:${context.module}`)
+        return linkText || target.replace(/\.adoc$/, '').replace(/^.*\//, '')
+      }
+    } catch (error) {
+      logger.debug(`Xref resolution error for ${target}: ${error.message}`)
+      return linkText || target.replace(/\.adoc$/, '').replace(/^.*\//, '')
+    }
+  })
+}
+/**
+ * Escape HTML special characters to prevent XSS.
+ */
+function escapeHtml (text) {
+  if (!text) return ''
+  return String(text)
+    .replace(/&/g, '&amp;')
+    .replace(/</g, '&lt;')
+    .replace(/>/g, '&gt;')
+    .replace(/"/g, '&quot;')
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@redpanda-data/docs-extensions-and-macros",
-  "version": "4.15.10",
+  "version": "4.16.1",
   "description": "Antora extensions and macros developed for Redpanda documentation.",
   "keywords": [
     "antora",
@@ -62,6 +62,8 @@
     "./extensions/git-full-clone": "./extensions/git-full-clone.js",
     "./extensions/add-git-dates": "./extensions/add-git-dates.js",
     "./extensions/add-faq-structured-data": "./extensions/add-faq-structured-data.js",
+    "./extensions/add-llms-directive": "./extensions/add-llms-directive.js",
+    "./extensions/add-markdown-urls-to-sitemap": "./extensions/add-markdown-urls-to-sitemap.js",
     "./extensions/version-fetcher/set-latest-version": "./extensions/version-fetcher/set-latest-version.js",
     "./extensions/modify-connect-tag-playbook": "./extensions/modify-connect-tag-playbook.js",
     "./extensions/validate-attributes": "./extensions/validate-attributes.js",
@@ -73,6 +75,7 @@
     "./extensions/modify-redirects": "./extensions/produce-redirects.js",
     "./extensions/algolia-indexer/index": "./extensions/algolia-indexer/index.js",
     "./extensions/aggregate-terms": "./extensions/aggregate-terms.js",
+    "./extensions/resolve-xrefs-in-attachments": "./extensions/resolve-xrefs-in-attachments.js",
     "./macros/glossary": "./macros/glossary.js",
     "./macros/rp-connect-components": "./macros/rp-connect-components.js",
     "./macros/config-ref": "./macros/config-ref.js",

package/tools/bundle-openapi.js CHANGED Viewed

@@ -248,6 +248,55 @@ function createEntrypoint(tempDir, apiSurface) {
   return fragmentFiles;
 }
+/**
+ * Wrap $ref siblings into allOf to preserve field-level descriptions.
+ *
+ * In OpenAPI 3.0, sibling properties next to $ref are ignored per spec.
+ * Some renderers (e.g. Bump.sh) follow this behavior, displaying the generic
+ * description from the referenced schema instead of field-level overrides.
+ * This function transforms { $ref, description, ... } into
+ * { allOf: [{ $ref }], description, ... } so renderers pick up field-level
+ * descriptions correctly.
+ *
+ * @param {*} node - Any value from the parsed OpenAPI spec.
+ * @returns {*} The transformed value (mutates in place for objects).
+ */
+function wrapRefSiblings(node) {
+  if (node === null || typeof node !== 'object') {
+    return node;
+  }
+  if (Array.isArray(node)) {
+    node.forEach((item, i) => {
+      node[i] = wrapRefSiblings(item);
+    });
+    return node;
+  }
+  // Check if this object has $ref with sibling properties that need wrapping
+  if (node['$ref'] && typeof node['$ref'] === 'string') {
+    const keys = Object.keys(node);
+    const hasSiblings = keys.length > 1;
+    if (hasSiblings) {
+      // Skip if allOf already exists — assumes a pre-existing structure
+      // from the source spec that should not be modified.
+      if (!node.allOf) {
+        const ref = node['$ref'];
+        delete node['$ref'];
+        node.allOf = [{ '$ref': ref }];
+      }
+    }
+  }
+  // Recurse into all object values
+  for (const key of Object.keys(node)) {
+    node[key] = wrapRefSiblings(node[key]);
+  }
+  return node;
+}
 /**
  * Bundle one or more OpenAPI fragment files into a single bundled YAML using a selected external bundler.
  *
@@ -529,6 +578,9 @@ function postProcessBundle(filePath, options, quiet = false) {
     bundle.info['x-generated-at'] = new Date().toISOString();
     bundle.info['x-generator'] = 'redpanda-docs-openapi-bundler';
+    // Wrap $ref siblings into allOf so renderers display field descriptions
+    bundle = wrapRefSiblings(bundle);
     // Sort keys for deterministic output
     const sortedBundle = sortObjectKeys(bundle);
@@ -776,6 +828,7 @@ module.exports = {
   normalizeTag,
   getMajorMinor,
   sortObjectKeys,
+  wrapRefSiblings,
   detectBundler,
   createEntrypoint,
   postProcessBundle

package/tools/redpanda-connect/helpers/bloblangExample.js CHANGED Viewed

@@ -9,12 +9,13 @@ function bloblangExample(example) {
       let summary = example.summary.trim();
       // Convert Markdown headings to AsciiDoc
-      // ##### Heading -> ==== Heading (H5 -> H4 in AsciiDoc)
-      summary = summary.replace(/^#####\s+(.+)$/gm, '==== $1');
-      // #### Heading -> === Heading (H4 -> H3 in AsciiDoc)
-      summary = summary.replace(/^####\s+(.+)$/gm, '=== $1');
-      // ### Heading -> == Heading (H3 -> H2 in AsciiDoc)
-      summary = summary.replace(/^###\s+(.+)$/gm, '== $1');
+      // ##### Heading -> === Heading (H5 -> H3 in AsciiDoc)
+      // H3 fits correctly under the H2 "Examples" section
+      summary = summary.replace(/^#####\s+(.+)$/gm, '=== $1');
+      // #### Heading -> == Heading (H4 -> H2 in AsciiDoc)
+      summary = summary.replace(/^####\s+(.+)$/gm, '== $1');
+      // ### Heading -> = Heading (H3 -> H1 in AsciiDoc)
+      summary = summary.replace(/^###\s+(.+)$/gm, '= $1');
       // Ensure lead-in ends with a colon (replace period/exclamation/question mark if present)
       if (summary.endsWith('.') || summary.endsWith('!') || summary.endsWith('?')) {

package/tools/redpanda-connect/rpcn-connector-docs-handler.js CHANGED Viewed

@@ -983,45 +983,6 @@ async function handleRpcnConnectorDocs (options) {
     console.log('  Skipping diff generation, but will run binary analysis.\n')
   }
-  // Publish merged version
-  if (options.overrides && fs.existsSync(options.overrides)) {
-    try {
-      const { mergeOverrides, resolveReferences } = require('./generate-rpcn-connector-docs.js')
-      const mergedData = JSON.parse(JSON.stringify(newIndex))
-      const ovRaw = fs.readFileSync(options.overrides, 'utf8')
-      const ovObj = JSON.parse(ovRaw)
-      const resolvedOverrides = resolveReferences(ovObj, ovObj)
-      mergeOverrides(mergedData, resolvedOverrides)
-      const attachmentsRoot = path.resolve(process.cwd(), 'modules/components/attachments')
-      fs.mkdirSync(attachmentsRoot, { recursive: true })
-      const existingVersions = fs.readdirSync(attachmentsRoot)
-        .filter(f => /^connect-(\d+\.\d+\.\d+(?:-[0-9A-Za-z-.]+)?)\.json$/.test(f))
-        .map(f => {
-          const match = f.match(/^connect-(\d+\.\d+\.\d+(?:-[0-9A-Za-z-.]+)?)\.json$/)
-          return match ? match[1] : null
-        })
-        .filter(v => v && semver.valid(v))
-      const sortedVersions = semver.sort(existingVersions) // ascending order
-      for (const version of sortedVersions) {
-        const oldFile = `connect-${version}.json`
-        const oldFilePath = path.join(attachmentsRoot, oldFile)
-        fs.unlinkSync(oldFilePath)
-        console.log(`🧹 Deleted old version: ${oldFile}`)
-      }
-      const destFile = path.join(attachmentsRoot, `connect-${newVersion}.json`)
-      fs.writeFileSync(destFile, JSON.stringify(mergedData, null, 2), 'utf8')
-      console.log(`Done: Published merged version to: ${path.relative(process.cwd(), destFile)}`)
-    } catch (err) {
-      console.error(`Error: Failed to publish merged version: ${err.message}`)
-    }
-  }
   // Binary analysis
   let oldBinaryAnalysis = null
@@ -1248,6 +1209,49 @@ async function handleRpcnConnectorDocs (options) {
     }
   }
+  // Publish merged version to attachments
+  // IMPORTANT: This must run AFTER binary analysis and augmentation to include CGO-only connectors
+  if (options.overrides && fs.existsSync(options.overrides)) {
+    try {
+      const { mergeOverrides, resolveReferences } = require('./generate-rpcn-connector-docs.js')
+      // Use the augmented newIndex which now includes CGO-only and cloud-only connectors
+      const mergedData = JSON.parse(JSON.stringify(newIndex))
+      const ovRaw = fs.readFileSync(options.overrides, 'utf8')
+      const ovObj = JSON.parse(ovRaw)
+      const resolvedOverrides = resolveReferences(ovObj, ovObj)
+      mergeOverrides(mergedData, resolvedOverrides)
+      const attachmentsRoot = path.resolve(process.cwd(), 'modules/components/attachments')
+      fs.mkdirSync(attachmentsRoot, { recursive: true })
+      const existingVersions = fs.readdirSync(attachmentsRoot)
+        .filter(f => /^connect-(\d+\.\d+\.\d+(?:-[0-9A-Za-z-.]+)?)\.json$/.test(f))
+        .map(f => {
+          const match = f.match(/^connect-(\d+\.\d+\.\d+(?:-[0-9A-Za-z-.]+)?)\.json$/)
+          return match ? match[1] : null
+        })
+        .filter(v => v && semver.valid(v))
+      const sortedVersions = semver.sort(existingVersions) // ascending order
+      for (const version of sortedVersions) {
+        const oldFile = `connect-${version}.json`
+        const oldFilePath = path.join(attachmentsRoot, oldFile)
+        fs.unlinkSync(oldFilePath)
+        console.log(`🧹 Deleted old version from attachments: ${oldFile}`)
+      }
+      const destFile = path.join(attachmentsRoot, `connect-${newVersion}.json`)
+      fs.writeFileSync(destFile, JSON.stringify(mergedData, null, 2), 'utf8')
+      console.log(`✓ Published merged version to: ${path.relative(process.cwd(), destFile)}`)
+    } catch (err) {
+      console.error(`Error: Failed to publish merged version: ${err.message}`)
+    }
+  }
   // Generate diff JSON
   let diffJson = null
   if (!oldVersion) {