npm - @redpanda-data/docs-extensions-and-macros - Versions diffs - 4.15.9 → 4.16.0 - Mend

@redpanda-data/docs-extensions-and-macros 4.15.9 → 4.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/extension-utils/llms-utils.js +64 -0
package/extensions/convert-llms-to-txt.js +111 -6
package/extensions/convert-to-markdown.js +9 -7
package/package.json +1 -1
package/tools/bundle-openapi.js +53 -0
package/tools/property-extractor/property_extractor.py +2 -1
package/tools/property-extractor/transformers.py +4 -2

package/extension-utils/llms-utils.js ADDED Viewed

@@ -0,0 +1,64 @@
+'use strict';
+/**
+ * Shared utilities for llms.txt generation and markdown processing.
+ * Used by both convert-to-markdown.js and convert-llms-to-txt.js.
+ */
+/**
+ * The base directive text that appears in markdown files pointing to llms.txt.
+ * This is the canonical source of truth used for both rendering and stripping.
+ */
+const LLMS_DIRECTIVE_BASE = 'For the complete documentation index, see [llms.txt](/llms.txt)';
+/**
+ * Format the llms directive blockquote for a page.
+ * @param {string} componentName - Optional component name for component-specific link
+ * @returns {string} Formatted markdown blockquote directive
+ */
+function formatLlmsDirective(componentName) {
+  if (componentName) {
+    return `> ${LLMS_DIRECTIVE_BASE}. Component-specific: [${componentName}-full.txt](/${componentName}-full.txt)`;
+  }
+  return `> ${LLMS_DIRECTIVE_BASE}`;
+}
+/**
+ * Regex pattern to match and strip the llms directive from markdown content.
+ * Matches the blockquote format with optional component-specific suffix.
+ */
+const LLMS_DIRECTIVE_REGEX = /^> For the complete documentation index, see \[llms\.txt\].*$/gm;
+/**
+ * Regex pattern to match and strip HTML source comments from markdown content.
+ */
+const SOURCE_COMMENT_REGEX = /^<!--[\s\S]*?-->\s*/gm;
+/**
+ * Strip metadata added by convert-to-markdown extension from page content.
+ * This removes:
+ * 1. HTML comments (source URLs)
+ * 2. llms.txt directive blockquotes (redundant in aggregated exports)
+ *
+ * @param {string|Buffer} content - The markdown content to strip
+ * @returns {string} Cleaned markdown content
+ */
+function stripMarkdownMetadata(content) {
+  let text = typeof content === 'string' ? content : content.toString('utf8');
+  // Strip HTML comments (source URLs)
+  text = text.replace(SOURCE_COMMENT_REGEX, '');
+  // Strip llms.txt directive blockquotes
+  text = text.replace(LLMS_DIRECTIVE_REGEX, '');
+  return text.trim();
+}
+module.exports = {
+  LLMS_DIRECTIVE_BASE,
+  LLMS_DIRECTIVE_REGEX,
+  SOURCE_COMMENT_REGEX,
+  formatLlmsDirective,
+  stripMarkdownMetadata,
+};

package/extensions/convert-llms-to-txt.js CHANGED Viewed

@@ -1,6 +1,7 @@
 'use strict';
 const { toMarkdownUrl } = require('../extension-utils/url-utils');
+const { stripMarkdownMetadata } = require('../extension-utils/llms-utils');
 /**
  * Extracts markdown from llms.adoc page and generates AI-friendly documentation exports.
@@ -32,6 +33,8 @@ module.exports.register = function () {
       siteUrl = playbook.site?.url || 'https://docs.redpanda.com';
       logger.info(`Using site URL: ${siteUrl}`);
     }
+    // Normalize: strip trailing slashes to avoid double slashes in URL concatenation
+    siteUrl = siteUrl.replace(/\/+$/, '');
   });
   this.on('contentClassified', ({ contentCatalog }) => {
@@ -71,10 +74,10 @@ module.exports.register = function () {
         let content = llmsPage.markdownContents.toString('utf8');
         logger.info(`Extracted ${content.length} bytes of markdown content`);
-        // Strip HTML comments added by convert-to-markdown extension
+        // Strip metadata added by convert-to-markdown extension using shared helper
         // These reference the unpublished /home/llms/ URL which doesn't make sense for llms.txt
-        content = content.replace(/^<!--[\s\S]*?-->\s*/gm, '').trim();
-        logger.debug(`Stripped HTML comments, now ${content.length} bytes`);
+        content = stripMarkdownMetadata(content);
+        logger.debug(`Stripped metadata, now ${content.length} bytes`);
         // Fix URLs: convert em dashes back to double hyphens and remove invisible characters
         // The markdown converter applies smart typography that turns -- into — (em dash)
@@ -187,7 +190,8 @@ module.exports.register = function () {
       fullContent += `# Page ${index + 1}: ${pageTitle}\n\n`;
       fullContent += `**URL**: ${pageUrl}\n\n`;
       fullContent += `---\n\n`;
-      fullContent += page.markdownContents.toString('utf8');
+      // Strip metadata (directive, source comments) from page content
+      fullContent += stripMarkdownMetadata(page.markdownContents);
       fullContent += `\n\n---\n\n`;
     });
@@ -258,7 +262,8 @@ module.exports.register = function () {
         componentContent += `# Page ${index + 1}: ${pageTitle}\n\n`;
         componentContent += `**URL**: ${pageUrl}\n\n`;
         componentContent += `---\n\n`;
-        componentContent += page.markdownContents.toString('utf8');
+        // Strip metadata (directive, source comments) from page content
+        componentContent += stripMarkdownMetadata(page.markdownContents);
         componentContent += `\n\n---\n\n`;
       });
@@ -274,8 +279,40 @@ module.exports.register = function () {
     if (llmsPage && llmsPage.llmsTxtContent) {
       logger.info('Adding llms.txt to site root');
+      // Target: Stay under 50K chars (agent-friendly docs spec limit)
+      const MAX_LLMS_TXT_CHARS = 45000; // Leave buffer below 50K
+      let llmsTxtContent = llmsPage.llmsTxtContent;
+      // Check if base content already exceeds limit
+      if (llmsTxtContent.length >= MAX_LLMS_TXT_CHARS) {
+        logger.warn(`Base llms.txt content (${llmsTxtContent.length} chars) exceeds ${MAX_LLMS_TXT_CHARS} char limit, truncating`);
+        // Truncate at last newline before limit to avoid cutting mid-line or mid-URL
+        llmsTxtContent = truncateAtNewline(llmsTxtContent, MAX_LLMS_TXT_CHARS - 100) + '\n\n[Content truncated due to size limits]';
+      }
+      // Generate navigation section with component sitemaps and key sections
+      const navSection = generateNavigationSection(siteUrl);
+      // Calculate available space for navigation section
+      const availableSpace = MAX_LLMS_TXT_CHARS - llmsTxtContent.length - 2; // -2 for \n\n separator
+      if (availableSpace >= navSection.length) {
+        // Full navigation section fits
+        llmsTxtContent = llmsTxtContent + '\n\n' + navSection;
+        logger.info(`Injected full navigation section (${navSection.length} chars)`);
+      } else if (availableSpace > 500) {
+        // Partial navigation section - truncate at last newline to avoid cutting mid-line or mid-URL
+        const truncatedNav = truncateAtNewline(navSection, availableSpace - 50) + '\n\n[Navigation truncated due to size limits]';
+        llmsTxtContent = llmsTxtContent + '\n\n' + truncatedNav;
+        logger.warn(`Truncated navigation section from ${navSection.length} to ${truncatedNav.length} chars`);
+      } else {
+        logger.warn(`Skipping navigation injection - only ${availableSpace} chars available`);
+      }
+      logger.info(`Final llms.txt size: ${llmsTxtContent.length} chars`);
       siteCatalog.addFile({
-        contents: Buffer.from(llmsPage.llmsTxtContent, 'utf8'),
+        contents: Buffer.from(llmsTxtContent, 'utf8'),
         out: { path: 'llms.txt' },
       });
       logger.info('Successfully added llms.txt');
@@ -529,3 +566,71 @@ function addLastmodToComponentSitemaps(contentCatalog, siteCatalog, sitemapIndex
   return sitemapIndexXml;
 }
+/**
+ * Truncate content at the last newline before the specified limit.
+ * This avoids cutting mid-line or mid-URL which would produce malformed output.
+ *
+ * @param {string} content - Content to truncate
+ * @param {number} maxLength - Maximum length
+ * @returns {string} Truncated content ending at a newline boundary
+ */
+function truncateAtNewline(content, maxLength) {
+  if (content.length <= maxLength) {
+    return content;
+  }
+  const truncated = content.slice(0, maxLength);
+  const lastNewline = truncated.lastIndexOf('\n');
+  if (lastNewline > 0) {
+    return truncated.slice(0, lastNewline);
+  }
+  // Fallback: no newline found, return as-is
+  return truncated;
+}
+/**
+ * Generate a comprehensive navigation section for llms.txt
+ * This improves llms-txt-freshness score by providing pathways to all documentation
+ *
+ * NOTE: The section URLs below are hardcoded. If pages are renamed, moved, or removed,
+ * these links will 404. When restructuring documentation, update these URLs accordingly.
+ * Future improvement: Generate these from the content catalog at build time.
+ *
+ * @param {string} siteUrl - Base site URL
+ * @returns {string} Markdown navigation section
+ */
+function generateNavigationSection(siteUrl) {
+  let nav = `## Complete documentation index\n\n`;
+  nav += `For comprehensive page listings, use the sitemaps:\n\n`;
+  nav += `- [sitemap.md](${siteUrl}/sitemap.md) - Main sitemap index with all documentation\n`;
+  nav += `- [sitemap-all.md](${siteUrl}/sitemap-all.md) - Combined listing of all documentation pages\n\n`;
+  nav += `### Component sitemaps\n\n`;
+  nav += `- [Redpanda Self-Managed](${siteUrl}/sitemap-ROOT.md)\n`;
+  nav += `- [Redpanda Cloud](${siteUrl}/sitemap-redpanda-cloud.md)\n`;
+  nav += `- [Redpanda Connect](${siteUrl}/sitemap-redpanda-connect.md)\n`;
+  nav += `- [Redpanda Labs](${siteUrl}/sitemap-redpanda-labs.md)\n`;
+  nav += `\n### Key documentation sections\n\n`;
+  nav += `**Self-Managed:**\n`;
+  nav += `- [Deploy](${siteUrl}/current/deploy.md) - Installation and deployment guides\n`;
+  nav += `- [Manage](${siteUrl}/current/manage.md) - Cluster operations and administration\n`;
+  nav += `- [Develop](${siteUrl}/current/develop.md) - Application development guides\n`;
+  nav += `- [Reference](${siteUrl}/current/reference.md) - Configuration, CLI, and API references\n`;
+  nav += `- [Upgrade](${siteUrl}/current/upgrade.md) - Version upgrade procedures\n`;
+  nav += `- [Troubleshoot](${siteUrl}/current/troubleshoot.md) - Debugging and issue resolution\n`;
+  nav += `\n**Cloud:**\n`;
+  nav += `- [Get Started](${siteUrl}/redpanda-cloud/get-started.md) - Cloud quickstart and cluster types\n`;
+  nav += `- [Manage](${siteUrl}/redpanda-cloud/manage.md) - Cloud cluster management\n`;
+  nav += `- [Networking](${siteUrl}/redpanda-cloud/networking.md) - Network configuration\n`;
+  nav += `- [Security](${siteUrl}/redpanda-cloud/security.md) - Authentication and authorization\n`;
+  nav += `- [AI Agents](${siteUrl}/redpanda-cloud/ai-agents.md) - Agentic Data Plane documentation\n`;
+  nav += `\n**Connect:**\n`;
+  nav += `- [Components](${siteUrl}/redpanda-connect/components.md) - All connectors, processors, and more\n`;
+  nav += `- [Guides](${siteUrl}/redpanda-connect/guides.md) - Integration tutorials\n`;
+  nav += `- [Configuration](${siteUrl}/redpanda-connect/configuration.md) - YAML configuration reference\n`;
+  return nav;
+}

package/extensions/convert-to-markdown.js CHANGED Viewed

@@ -2,6 +2,7 @@ const path = require('path')
 const os = require('os')
 const yaml = require('js-yaml')
 const { toMarkdownUrl } = require('../extension-utils/url-utils')
+const { formatLlmsDirective } = require('../extension-utils/llms-utils')
 const TurndownService = require('turndown')
 const turndownPluginGfm = require('turndown-plugin-gfm')
 const { gfm } = turndownPluginGfm
@@ -500,17 +501,18 @@ module.exports.register = function () {
             restOfMarkdown = markdown.substring(h1Match[0].length).trimStart()
           }
-          // Add frontmatter AFTER H1 heading, then source reference and AI-friendly note
+          // Structure: H1 → llms.txt directive (blockquote) → frontmatter → source → content
+          // The directive must appear near the top for agent-friendly docs spec compliance
           if (canonicalUrl) {
             const componentName = page.src?.component || '';
-            const urlHint = componentName
-              ? `<!-- Note for AI: This is a Markdown export. For aggregated content, see /llms.txt (curated overview), /${componentName}-full.txt (this component only), or /llms-full.txt (complete documentation). -->`
-              : `<!-- Note for AI: This is a Markdown export. For aggregated content, see /llms.txt (curated overview) or /llms-full.txt (complete documentation). -->`;
+            // Use markdown blockquote format for the directive (visible, can be hidden with CSS)
+            const llmsDirective = formatLlmsDirective(componentName);
-            markdown = `${h1Heading}\n${frontmatter}<!-- Source: ${canonicalUrl} -->\n${urlHint}\n\n${restOfMarkdown}`
+            markdown = `${h1Heading}\n${llmsDirective}\n\n${frontmatter}<!-- Source: ${canonicalUrl} -->\n\n${restOfMarkdown}`
           } else if (frontmatter) {
-            // If no canonical URL but we have frontmatter, still add it after H1
-            markdown = `${h1Heading}\n${frontmatter}${restOfMarkdown}`
+            // If no canonical URL but we have frontmatter, still add directive after H1
+            const llmsDirective = formatLlmsDirective();
+            markdown = `${h1Heading}\n${llmsDirective}\n\n${frontmatter}${restOfMarkdown}`
           }
           // Clean up unnecessary whitespace

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@redpanda-data/docs-extensions-and-macros",
-  "version": "4.15.9",
+  "version": "4.16.0",
   "description": "Antora extensions and macros developed for Redpanda documentation.",
   "keywords": [
     "antora",

package/tools/bundle-openapi.js CHANGED Viewed

@@ -248,6 +248,55 @@ function createEntrypoint(tempDir, apiSurface) {
   return fragmentFiles;
 }
+/**
+ * Wrap $ref siblings into allOf to preserve field-level descriptions.
+ *
+ * In OpenAPI 3.0, sibling properties next to $ref are ignored per spec.
+ * Some renderers (e.g. Bump.sh) follow this behavior, displaying the generic
+ * description from the referenced schema instead of field-level overrides.
+ * This function transforms { $ref, description, ... } into
+ * { allOf: [{ $ref }], description, ... } so renderers pick up field-level
+ * descriptions correctly.
+ *
+ * @param {*} node - Any value from the parsed OpenAPI spec.
+ * @returns {*} The transformed value (mutates in place for objects).
+ */
+function wrapRefSiblings(node) {
+  if (node === null || typeof node !== 'object') {
+    return node;
+  }
+  if (Array.isArray(node)) {
+    node.forEach((item, i) => {
+      node[i] = wrapRefSiblings(item);
+    });
+    return node;
+  }
+  // Check if this object has $ref with sibling properties that need wrapping
+  if (node['$ref'] && typeof node['$ref'] === 'string') {
+    const keys = Object.keys(node);
+    const hasSiblings = keys.length > 1;
+    if (hasSiblings) {
+      // Skip if allOf already exists — assumes a pre-existing structure
+      // from the source spec that should not be modified.
+      if (!node.allOf) {
+        const ref = node['$ref'];
+        delete node['$ref'];
+        node.allOf = [{ '$ref': ref }];
+      }
+    }
+  }
+  // Recurse into all object values
+  for (const key of Object.keys(node)) {
+    node[key] = wrapRefSiblings(node[key]);
+  }
+  return node;
+}
 /**
  * Bundle one or more OpenAPI fragment files into a single bundled YAML using a selected external bundler.
  *
@@ -529,6 +578,9 @@ function postProcessBundle(filePath, options, quiet = false) {
     bundle.info['x-generated-at'] = new Date().toISOString();
     bundle.info['x-generator'] = 'redpanda-docs-openapi-bundler';
+    // Wrap $ref siblings into allOf so renderers display field descriptions
+    bundle = wrapRefSiblings(bundle);
     // Sort keys for deterministic output
     const sortedBundle = sortObjectKeys(bundle);
@@ -776,6 +828,7 @@ module.exports = {
   normalizeTag,
   getMajorMinor,
   sortObjectKeys,
+  wrapRefSiblings,
   detectBundler,
   createEntrypoint,
   postProcessBundle

package/tools/property-extractor/property_extractor.py CHANGED Viewed

@@ -2143,7 +2143,8 @@ def resolve_type_and_default(properties, definitions):
                 if resolved_type == "enum" or "enum" in resolved:
                     # Enums are represented as strings with an enum constraint in JSON Schema
                     prop["type"] = "string"
-                    if "enum" in resolved:
+                    # Only set enum if not already set by an override (accepted_values)
+                    if "enum" in resolved and "enum" not in prop:
                         prop["enum"] = resolved["enum"]
                 elif resolved_type in ("object", "string", "integer", "boolean", "array", "number"):
                     prop["type"] = resolved_type

package/tools/property-extractor/transformers.py CHANGED Viewed

@@ -2483,8 +2483,10 @@ class RuntimeValidationEnumExtractor:
         )
         if enum_results:
-            # Extract just the values for the enum field
-            property["enum"] = [result["value"] for result in enum_results]
+            # Skip if enum was already set by an override (accepted_values)
+            if "enum" not in property:
+                # Extract just the values for the enum field
+                property["enum"] = [result["value"] for result in enum_results]
             # Add metadata about which enum values are enterprise-only
             enum_metadata = {}