npm - @redpanda-data/docs-extensions-and-macros - Versions diffs - 4.15.8 → 4.15.10 - Mend

@redpanda-data/docs-extensions-and-macros 4.15.8 → 4.15.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/extension-utils/llms-utils.js +64 -0
package/extensions/convert-llms-to-txt.js +111 -6
package/extensions/convert-to-markdown.js +9 -7
package/package.json +1 -1
package/tools/property-extractor/property_extractor.py +22 -5
package/tools/property-extractor/transformers.py +4 -2
package/tools/redpanda-connect/connector-binary-analyzer.js +2 -0
package/tools/redpanda-connect/report-delta.js +79 -5
package/tools/redpanda-connect/rpcn-connector-docs-handler.js +86 -54

package/extension-utils/llms-utils.js ADDED Viewed

@@ -0,0 +1,64 @@
+'use strict';
+/**
+ * Shared utilities for llms.txt generation and markdown processing.
+ * Used by both convert-to-markdown.js and convert-llms-to-txt.js.
+ */
+/**
+ * The base directive text that appears in markdown files pointing to llms.txt.
+ * This is the canonical source of truth used for both rendering and stripping.
+ */
+const LLMS_DIRECTIVE_BASE = 'For the complete documentation index, see [llms.txt](/llms.txt)';
+/**
+ * Format the llms directive blockquote for a page.
+ * @param {string} componentName - Optional component name for component-specific link
+ * @returns {string} Formatted markdown blockquote directive
+ */
+function formatLlmsDirective(componentName) {
+  if (componentName) {
+    return `> ${LLMS_DIRECTIVE_BASE}. Component-specific: [${componentName}-full.txt](/${componentName}-full.txt)`;
+  }
+  return `> ${LLMS_DIRECTIVE_BASE}`;
+}
+/**
+ * Regex pattern to match and strip the llms directive from markdown content.
+ * Matches the blockquote format with optional component-specific suffix.
+ */
+const LLMS_DIRECTIVE_REGEX = /^> For the complete documentation index, see \[llms\.txt\].*$/gm;
+/**
+ * Regex pattern to match and strip HTML source comments from markdown content.
+ */
+const SOURCE_COMMENT_REGEX = /^<!--[\s\S]*?-->\s*/gm;
+/**
+ * Strip metadata added by convert-to-markdown extension from page content.
+ * This removes:
+ * 1. HTML comments (source URLs)
+ * 2. llms.txt directive blockquotes (redundant in aggregated exports)
+ *
+ * @param {string|Buffer} content - The markdown content to strip
+ * @returns {string} Cleaned markdown content
+ */
+function stripMarkdownMetadata(content) {
+  let text = typeof content === 'string' ? content : content.toString('utf8');
+  // Strip HTML comments (source URLs)
+  text = text.replace(SOURCE_COMMENT_REGEX, '');
+  // Strip llms.txt directive blockquotes
+  text = text.replace(LLMS_DIRECTIVE_REGEX, '');
+  return text.trim();
+}
+module.exports = {
+  LLMS_DIRECTIVE_BASE,
+  LLMS_DIRECTIVE_REGEX,
+  SOURCE_COMMENT_REGEX,
+  formatLlmsDirective,
+  stripMarkdownMetadata,
+};

package/extensions/convert-llms-to-txt.js CHANGED Viewed

@@ -1,6 +1,7 @@
 'use strict';
 const { toMarkdownUrl } = require('../extension-utils/url-utils');
+const { stripMarkdownMetadata } = require('../extension-utils/llms-utils');
 /**
  * Extracts markdown from llms.adoc page and generates AI-friendly documentation exports.
@@ -32,6 +33,8 @@ module.exports.register = function () {
       siteUrl = playbook.site?.url || 'https://docs.redpanda.com';
       logger.info(`Using site URL: ${siteUrl}`);
     }
+    // Normalize: strip trailing slashes to avoid double slashes in URL concatenation
+    siteUrl = siteUrl.replace(/\/+$/, '');
   });
   this.on('contentClassified', ({ contentCatalog }) => {
@@ -71,10 +74,10 @@ module.exports.register = function () {
         let content = llmsPage.markdownContents.toString('utf8');
         logger.info(`Extracted ${content.length} bytes of markdown content`);
-        // Strip HTML comments added by convert-to-markdown extension
+        // Strip metadata added by convert-to-markdown extension using shared helper
         // These reference the unpublished /home/llms/ URL which doesn't make sense for llms.txt
-        content = content.replace(/^<!--[\s\S]*?-->\s*/gm, '').trim();
-        logger.debug(`Stripped HTML comments, now ${content.length} bytes`);
+        content = stripMarkdownMetadata(content);
+        logger.debug(`Stripped metadata, now ${content.length} bytes`);
         // Fix URLs: convert em dashes back to double hyphens and remove invisible characters
         // The markdown converter applies smart typography that turns -- into — (em dash)
@@ -187,7 +190,8 @@ module.exports.register = function () {
       fullContent += `# Page ${index + 1}: ${pageTitle}\n\n`;
       fullContent += `**URL**: ${pageUrl}\n\n`;
       fullContent += `---\n\n`;
-      fullContent += page.markdownContents.toString('utf8');
+      // Strip metadata (directive, source comments) from page content
+      fullContent += stripMarkdownMetadata(page.markdownContents);
       fullContent += `\n\n---\n\n`;
     });
@@ -258,7 +262,8 @@ module.exports.register = function () {
         componentContent += `# Page ${index + 1}: ${pageTitle}\n\n`;
         componentContent += `**URL**: ${pageUrl}\n\n`;
         componentContent += `---\n\n`;
-        componentContent += page.markdownContents.toString('utf8');
+        // Strip metadata (directive, source comments) from page content
+        componentContent += stripMarkdownMetadata(page.markdownContents);
         componentContent += `\n\n---\n\n`;
       });
@@ -274,8 +279,40 @@ module.exports.register = function () {
     if (llmsPage && llmsPage.llmsTxtContent) {
       logger.info('Adding llms.txt to site root');
+      // Target: Stay under 50K chars (agent-friendly docs spec limit)
+      const MAX_LLMS_TXT_CHARS = 45000; // Leave buffer below 50K
+      let llmsTxtContent = llmsPage.llmsTxtContent;
+      // Check if base content already exceeds limit
+      if (llmsTxtContent.length >= MAX_LLMS_TXT_CHARS) {
+        logger.warn(`Base llms.txt content (${llmsTxtContent.length} chars) exceeds ${MAX_LLMS_TXT_CHARS} char limit, truncating`);
+        // Truncate at last newline before limit to avoid cutting mid-line or mid-URL
+        llmsTxtContent = truncateAtNewline(llmsTxtContent, MAX_LLMS_TXT_CHARS - 100) + '\n\n[Content truncated due to size limits]';
+      }
+      // Generate navigation section with component sitemaps and key sections
+      const navSection = generateNavigationSection(siteUrl);
+      // Calculate available space for navigation section
+      const availableSpace = MAX_LLMS_TXT_CHARS - llmsTxtContent.length - 2; // -2 for \n\n separator
+      if (availableSpace >= navSection.length) {
+        // Full navigation section fits
+        llmsTxtContent = llmsTxtContent + '\n\n' + navSection;
+        logger.info(`Injected full navigation section (${navSection.length} chars)`);
+      } else if (availableSpace > 500) {
+        // Partial navigation section - truncate at last newline to avoid cutting mid-line or mid-URL
+        const truncatedNav = truncateAtNewline(navSection, availableSpace - 50) + '\n\n[Navigation truncated due to size limits]';
+        llmsTxtContent = llmsTxtContent + '\n\n' + truncatedNav;
+        logger.warn(`Truncated navigation section from ${navSection.length} to ${truncatedNav.length} chars`);
+      } else {
+        logger.warn(`Skipping navigation injection - only ${availableSpace} chars available`);
+      }
+      logger.info(`Final llms.txt size: ${llmsTxtContent.length} chars`);
       siteCatalog.addFile({
-        contents: Buffer.from(llmsPage.llmsTxtContent, 'utf8'),
+        contents: Buffer.from(llmsTxtContent, 'utf8'),
         out: { path: 'llms.txt' },
       });
       logger.info('Successfully added llms.txt');
@@ -529,3 +566,71 @@ function addLastmodToComponentSitemaps(contentCatalog, siteCatalog, sitemapIndex
   return sitemapIndexXml;
 }
+/**
+ * Truncate content at the last newline before the specified limit.
+ * This avoids cutting mid-line or mid-URL which would produce malformed output.
+ *
+ * @param {string} content - Content to truncate
+ * @param {number} maxLength - Maximum length
+ * @returns {string} Truncated content ending at a newline boundary
+ */
+function truncateAtNewline(content, maxLength) {
+  if (content.length <= maxLength) {
+    return content;
+  }
+  const truncated = content.slice(0, maxLength);
+  const lastNewline = truncated.lastIndexOf('\n');
+  if (lastNewline > 0) {
+    return truncated.slice(0, lastNewline);
+  }
+  // Fallback: no newline found, return as-is
+  return truncated;
+}
+/**
+ * Generate a comprehensive navigation section for llms.txt
+ * This improves llms-txt-freshness score by providing pathways to all documentation
+ *
+ * NOTE: The section URLs below are hardcoded. If pages are renamed, moved, or removed,
+ * these links will 404. When restructuring documentation, update these URLs accordingly.
+ * Future improvement: Generate these from the content catalog at build time.
+ *
+ * @param {string} siteUrl - Base site URL
+ * @returns {string} Markdown navigation section
+ */
+function generateNavigationSection(siteUrl) {
+  let nav = `## Complete documentation index\n\n`;
+  nav += `For comprehensive page listings, use the sitemaps:\n\n`;
+  nav += `- [sitemap.md](${siteUrl}/sitemap.md) - Main sitemap index with all documentation\n`;
+  nav += `- [sitemap-all.md](${siteUrl}/sitemap-all.md) - Combined listing of all documentation pages\n\n`;
+  nav += `### Component sitemaps\n\n`;
+  nav += `- [Redpanda Self-Managed](${siteUrl}/sitemap-ROOT.md)\n`;
+  nav += `- [Redpanda Cloud](${siteUrl}/sitemap-redpanda-cloud.md)\n`;
+  nav += `- [Redpanda Connect](${siteUrl}/sitemap-redpanda-connect.md)\n`;
+  nav += `- [Redpanda Labs](${siteUrl}/sitemap-redpanda-labs.md)\n`;
+  nav += `\n### Key documentation sections\n\n`;
+  nav += `**Self-Managed:**\n`;
+  nav += `- [Deploy](${siteUrl}/current/deploy.md) - Installation and deployment guides\n`;
+  nav += `- [Manage](${siteUrl}/current/manage.md) - Cluster operations and administration\n`;
+  nav += `- [Develop](${siteUrl}/current/develop.md) - Application development guides\n`;
+  nav += `- [Reference](${siteUrl}/current/reference.md) - Configuration, CLI, and API references\n`;
+  nav += `- [Upgrade](${siteUrl}/current/upgrade.md) - Version upgrade procedures\n`;
+  nav += `- [Troubleshoot](${siteUrl}/current/troubleshoot.md) - Debugging and issue resolution\n`;
+  nav += `\n**Cloud:**\n`;
+  nav += `- [Get Started](${siteUrl}/redpanda-cloud/get-started.md) - Cloud quickstart and cluster types\n`;
+  nav += `- [Manage](${siteUrl}/redpanda-cloud/manage.md) - Cloud cluster management\n`;
+  nav += `- [Networking](${siteUrl}/redpanda-cloud/networking.md) - Network configuration\n`;
+  nav += `- [Security](${siteUrl}/redpanda-cloud/security.md) - Authentication and authorization\n`;
+  nav += `- [AI Agents](${siteUrl}/redpanda-cloud/ai-agents.md) - Agentic Data Plane documentation\n`;
+  nav += `\n**Connect:**\n`;
+  nav += `- [Components](${siteUrl}/redpanda-connect/components.md) - All connectors, processors, and more\n`;
+  nav += `- [Guides](${siteUrl}/redpanda-connect/guides.md) - Integration tutorials\n`;
+  nav += `- [Configuration](${siteUrl}/redpanda-connect/configuration.md) - YAML configuration reference\n`;
+  return nav;
+}

package/extensions/convert-to-markdown.js CHANGED Viewed

@@ -2,6 +2,7 @@ const path = require('path')
 const os = require('os')
 const yaml = require('js-yaml')
 const { toMarkdownUrl } = require('../extension-utils/url-utils')
+const { formatLlmsDirective } = require('../extension-utils/llms-utils')
 const TurndownService = require('turndown')
 const turndownPluginGfm = require('turndown-plugin-gfm')
 const { gfm } = turndownPluginGfm
@@ -500,17 +501,18 @@ module.exports.register = function () {
             restOfMarkdown = markdown.substring(h1Match[0].length).trimStart()
           }
-          // Add frontmatter AFTER H1 heading, then source reference and AI-friendly note
+          // Structure: H1 → llms.txt directive (blockquote) → frontmatter → source → content
+          // The directive must appear near the top for agent-friendly docs spec compliance
           if (canonicalUrl) {
             const componentName = page.src?.component || '';
-            const urlHint = componentName
-              ? `<!-- Note for AI: This is a Markdown export. For aggregated content, see /llms.txt (curated overview), /${componentName}-full.txt (this component only), or /llms-full.txt (complete documentation). -->`
-              : `<!-- Note for AI: This is a Markdown export. For aggregated content, see /llms.txt (curated overview) or /llms-full.txt (complete documentation). -->`;
+            // Use markdown blockquote format for the directive (visible, can be hidden with CSS)
+            const llmsDirective = formatLlmsDirective(componentName);
-            markdown = `${h1Heading}\n${frontmatter}<!-- Source: ${canonicalUrl} -->\n${urlHint}\n\n${restOfMarkdown}`
+            markdown = `${h1Heading}\n${llmsDirective}\n\n${frontmatter}<!-- Source: ${canonicalUrl} -->\n\n${restOfMarkdown}`
           } else if (frontmatter) {
-            // If no canonical URL but we have frontmatter, still add it after H1
-            markdown = `${h1Heading}\n${frontmatter}${restOfMarkdown}`
+            // If no canonical URL but we have frontmatter, still add directive after H1
+            const llmsDirective = formatLlmsDirective();
+            markdown = `${h1Heading}\n${llmsDirective}\n\n${frontmatter}${restOfMarkdown}`
           }
           // Clean up unnecessary whitespace

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@redpanda-data/docs-extensions-and-macros",
-  "version": "4.15.8",
+  "version": "4.15.10",
   "description": "Antora extensions and macros developed for Redpanda documentation.",
   "keywords": [
     "antora",

package/tools/property-extractor/property_extractor.py CHANGED Viewed

@@ -991,6 +991,14 @@ def _apply_override_to_existing_property(property_dict, override, overrides_file
     if "category" in override:
         property_dict["category"] = override["category"]
+    # Apply accepted_values override - replaces enum field to filter displayed values
+    # Use case: Exclude internal-only enum values (e.g., greedy mode for leader_balancer_mode)
+    if "accepted_values" in override:
+        if isinstance(override["accepted_values"], list):
+            property_dict["enum"] = override["accepted_values"]
+        else:
+            logger.warning(f"accepted_values for property must be an array")
 def _create_property_from_override(prop_name, override, overrides_file_path):
     """Create a new property from override specification."""
@@ -1025,9 +1033,10 @@ def _create_property_from_override(prop_name, override, overrides_file_path):
     # Add any other custom fields from override
     for key, value in override.items():
-        if key not in ["description", "type", "default", "config_scope", "version",
-                       "example", "example_file", "example_yaml", "related_topics",
-                       "is_deprecated", "visibility"]:
+        if key not in ["description", "type", "default", "config_scope", "version",
+                       "example", "example_file", "example_yaml", "related_topics",
+                       "is_deprecated", "visibility", "exclude_from_docs", "category",
+                       "accepted_values", "_comment"]:
             new_property[key] = value
     # Add exclude_from_docs if specified
@@ -1037,7 +1046,14 @@ def _create_property_from_override(prop_name, override, overrides_file_path):
     # Add category if specified
     if "category" in override:
         new_property["category"] = override["category"]
+    # Add accepted_values as enum field if specified
+    if "accepted_values" in override:
+        if isinstance(override["accepted_values"], list):
+            new_property["enum"] = override["accepted_values"]
+        else:
+            logger.warning(f"accepted_values for property '{prop_name}' must be an array")
     return new_property
@@ -2127,7 +2143,8 @@ def resolve_type_and_default(properties, definitions):
                 if resolved_type == "enum" or "enum" in resolved:
                     # Enums are represented as strings with an enum constraint in JSON Schema
                     prop["type"] = "string"
-                    if "enum" in resolved:
+                    # Only set enum if not already set by an override (accepted_values)
+                    if "enum" in resolved and "enum" not in prop:
                         prop["enum"] = resolved["enum"]
                 elif resolved_type in ("object", "string", "integer", "boolean", "array", "number"):
                     prop["type"] = resolved_type

package/tools/property-extractor/transformers.py CHANGED Viewed

@@ -2483,8 +2483,10 @@ class RuntimeValidationEnumExtractor:
         )
         if enum_results:
-            # Extract just the values for the enum field
-            property["enum"] = [result["value"] for result in enum_results]
+            # Skip if enum was already set by an override (accepted_values)
+            if "enum" not in property:
+                # Extract just the values for the enum field
+                property["enum"] = [result["value"] for result in enum_results]
             # Add metadata about which enum values are enterprise-only
             enum_metadata = {}

package/tools/redpanda-connect/connector-binary-analyzer.js CHANGED Viewed

@@ -247,6 +247,7 @@ function getConnectorList(binaryPath) {
     let result;
     if (needsDocker) {
+      console.log(`   Using Docker to run Linux binary on ${os.platform()}`);
       // Use Docker to run Linux binaries on macOS/Windows
       const binaryDir = path.dirname(binaryPath);
       const binaryFile = path.basename(binaryPath);
@@ -268,6 +269,7 @@ function getConnectorList(binaryPath) {
         maxBuffer: 10 * 1024 * 1024 // 10MB buffer
       });
     } else {
+      console.log(`   Running natively on ${os.platform()}`);
       // Run natively
       result = spawnSync(binaryPath, ['list', '--format', 'json-full'], {
         stdio: ['ignore', 'pipe', 'ignore'],

package/tools/redpanda-connect/report-delta.js CHANGED Viewed

@@ -2,6 +2,7 @@ const { execSync } = require('child_process');
 /**
  * Generate a JSON diff report between two connector index objects.
+ * Includes platform metadata (CGO, cloud-only) to detect transitions.
  * @param {object} oldIndex - Previous version connector index
  * @param {object} newIndex - Current version connector index
  * @param {object} opts - { oldVersion, newVersion, timestamp, binaryAnalysis, oldBinaryAnalysis }
@@ -11,31 +12,39 @@ function generateConnectorDiffJson(oldIndex, newIndex, opts = {}) {
   const oldMap = buildComponentMap(oldIndex);
   const newMap = buildComponentMap(newIndex);
-  // New components
+  // New components (include platform metadata)
   const newComponentKeys = Object.keys(newMap).filter(k => !(k in oldMap));
   const newComponents = newComponentKeys.map(key => {
     const [type, name] = key.split(':');
     const raw = newMap[key].raw;
+    const metadata = newMap[key].metadata || {};
     return {
       name,
       type,
       status: raw.status || raw.type || '',
       version: raw.version || raw.introducedInVersion || '',
-      description: raw.description || ''
+      description: raw.description || '',
+      requiresCgo: metadata.requiresCgo || false,
+      cloudOnly: metadata.cloudOnly || false,
+      cloudSupported: metadata.cloudSupported || false
     };
   });
-  // Removed components
+  // Removed components (include platform metadata to understand why removed)
   const removedComponentKeys = Object.keys(oldMap).filter(k => !(k in newMap));
   const removedComponents = removedComponentKeys.map(key => {
     const [type, name] = key.split(':');
     const raw = oldMap[key].raw;
+    const metadata = oldMap[key].metadata || {};
     return {
       name,
       type,
       status: raw.status || raw.type || '',
       version: raw.version || raw.introducedInVersion || '',
-      description: raw.description || ''
+      description: raw.description || '',
+      requiresCgo: metadata.requiresCgo || false,
+      cloudOnly: metadata.cloudOnly || false,
+      cloudSupported: metadata.cloudSupported || false
     };
   });
@@ -99,6 +108,57 @@ function generateConnectorDiffJson(oldIndex, newIndex, opts = {}) {
     }
   });
+  // Platform transitions (CGO, cloud support changes)
+  const platformTransitions = [];
+  Object.keys(newMap).forEach(cKey => {
+    if (!(cKey in oldMap)) return;
+    const oldMeta = oldMap[cKey].metadata || {};
+    const newMeta = newMap[cKey].metadata || {};
+    const [type, name] = cKey.split(':');
+    const transitions = [];
+    // CGO requirement changes
+    if (!oldMeta.requiresCgo && newMeta.requiresCgo) {
+      transitions.push('became_cgo_only');
+    } else if (oldMeta.requiresCgo && !newMeta.requiresCgo) {
+      transitions.push('no_longer_cgo_only');
+    }
+    // Cloud support changes
+    if (!oldMeta.cloudSupported && newMeta.cloudSupported) {
+      transitions.push('added_cloud_support');
+    } else if (oldMeta.cloudSupported && !newMeta.cloudSupported) {
+      transitions.push('removed_cloud_support');
+    }
+    // Cloud-only status changes
+    if (!oldMeta.cloudOnly && newMeta.cloudOnly) {
+      transitions.push('became_cloud_only');
+    } else if (oldMeta.cloudOnly && !newMeta.cloudOnly) {
+      transitions.push('no_longer_cloud_only');
+    }
+    if (transitions.length > 0) {
+      platformTransitions.push({
+        name,
+        type,
+        transitions,
+        oldPlatform: {
+          requiresCgo: oldMeta.requiresCgo || false,
+          cloudSupported: oldMeta.cloudSupported || false,
+          cloudOnly: oldMeta.cloudOnly || false
+        },
+        newPlatform: {
+          requiresCgo: newMeta.requiresCgo || false,
+          cloudSupported: newMeta.cloudSupported || false,
+          cloudOnly: newMeta.cloudOnly || false
+        }
+      });
+    }
+  });
   // Newly deprecated fields (exist in both versions but became deprecated)
   const deprecatedFields = [];
   // Changed default values
@@ -221,6 +281,7 @@ function generateConnectorDiffJson(oldIndex, newIndex, opts = {}) {
       deprecatedComponents: deprecatedComponents.length,
       deprecatedFields: deprecatedFields.length,
       changedDefaults: changedDefaults.length,
+      platformTransitions: platformTransitions.length,
       newBloblangMethods: newBloblangMethods.length,
       removedBloblangMethods: removedBloblangMethods.length,
       newBloblangFunctions: newBloblangFunctions.length,
@@ -236,6 +297,7 @@ function generateConnectorDiffJson(oldIndex, newIndex, opts = {}) {
       deprecatedComponents,
       deprecatedFields,
       changedDefaults,
+      platformTransitions,
       newBloblangMethods,
       removedBloblangMethods,
       newBloblangFunctions,
@@ -341,7 +403,19 @@ function buildComponentMap(indexObj) {
       }
       const fieldNames = childArray.map(f => f.name);
-      map[lookupKey] = { raw: component, fields: fieldNames };
+      // Preserve platform metadata for accurate diff comparison
+      const metadata = {
+        requiresCgo: component.requiresCgo || false,
+        cloudSupported: component.cloudSupported || false,
+        cloudOnly: component.cloudOnly || false
+      };
+      map[lookupKey] = {
+        raw: component,
+        fields: fieldNames,
+        metadata: metadata
+      };
     });
   });

package/tools/redpanda-connect/rpcn-connector-docs-handler.js CHANGED Viewed

@@ -92,6 +92,19 @@ function capToTwoSentences (description) {
   return result.trim()
 }
+/**
+ * Remove platform metadata fields from connectors
+ * @param {Array} connectors - Array of connector objects
+ */
+function stripPlatformMetadata (connectors) {
+  if (!Array.isArray(connectors)) return
+  connectors.forEach(c => {
+    delete c.cloudSupported
+    delete c.requiresCgo
+    delete c.cloudOnly
+  })
+}
 /**
  * Update whats-new.adoc with new release information
  * @param {Object} params - Parameters
@@ -487,37 +500,6 @@ function logCollapsed (label, filesArray, maxToShow = 10) {
   console.log('')
 }
-/**
- * Strip augmentation fields from connector data to ensure clean comparisons
- * Removes cloudSupported, requiresCgo, cloudOnly fields and filters out cloud-only connectors
- * @param {object} data - Connector index data
- * @returns {object} Clean connector data without augmentation
- */
-function stripAugmentationFields(data) {
-  const cleanData = JSON.parse(JSON.stringify(data));
-  const connectorTypes = ['inputs', 'outputs', 'processors', 'caches', 'rate_limits',
-    'buffers', 'metrics', 'scanners', 'tracers', 'config', 'bloblang-methods'];
-  for (const type of connectorTypes) {
-    if (Array.isArray(cleanData[type])) {
-      // Remove connectors that were added by augmentation (cloudOnly or requiresCgo without OSS data)
-      cleanData[type] = cleanData[type].filter(c => {
-        // Keep if it's not marked as cloudOnly or requiresCgo
-        // OR if it has a config/fields (meaning it came from OSS, not just binary analysis)
-        return (!(c.cloudOnly || c.requiresCgo) || c.config || c.fields);
-      });
-      // Remove augmentation fields
-      cleanData[type].forEach(c => {
-        delete c.cloudSupported;
-        delete c.requiresCgo;
-        delete c.cloudOnly;
-      });
-    }
-  }
-  return cleanData;
-}
 /**
  * Load or fetch connector data for a specific version
@@ -529,15 +511,11 @@ function stripAugmentationFields(data) {
 async function loadConnectorDataForVersion(version, dataDir, options = {}) {
   const dataFile = path.join(dataDir, `connect-${version}.json`);
-  // If file exists, load it
+  // If file exists, load it (with platform metadata intact)
   if (fs.existsSync(dataFile)) {
     console.log(`✓ Using existing data file: connect-${version}.json`);
     const data = JSON.parse(fs.readFileSync(dataFile, 'utf8'));
-    // Strip augmentation fields to ensure clean comparisons
-    // Augmentation adds CGO/cloud-only components that shouldn't affect version diffs
-    const cleanData = stripAugmentationFields(data);
-    return cleanData;
+    return data;
   }
   // If not, fetch it
@@ -940,8 +918,8 @@ async function handleRpcnConnectorDocs (options) {
   let oldIndex = {}
   let oldVersion = null
   if (options.oldData && fs.existsSync(options.oldData)) {
-    // Strip augmentation fields to ensure clean comparisons
-    oldIndex = stripAugmentationFields(JSON.parse(fs.readFileSync(options.oldData, 'utf8')))
+    // Load with platform metadata intact for accurate diff
+    oldIndex = JSON.parse(fs.readFileSync(options.oldData, 'utf8'))
     const m = options.oldData.match(/connect-([\d.]+)\.json$/)
     if (m) oldVersion = m[1]
   } else {
@@ -959,30 +937,43 @@ async function handleRpcnConnectorDocs (options) {
       oldVersion = sortedVersions[0]
       const oldFile = `connect-${oldVersion}.json`
       const oldPath = path.join(dataDir, oldFile)
-      // Strip augmentation fields to ensure clean comparisons
-      oldIndex = stripAugmentationFields(JSON.parse(fs.readFileSync(oldPath, 'utf8')))
+      // Load with platform metadata intact for accurate diff
+      oldIndex = JSON.parse(fs.readFileSync(oldPath, 'utf8'))
       console.log(`📋 Using old version data: ${oldFile}`)
     } else {
       oldVersion = getAntoraValue('asciidoc.attributes.latest-connect-version')
       if (oldVersion) {
         const oldPath = path.join(dataDir, `connect-${oldVersion}.json`)
         if (fs.existsSync(oldPath)) {
-          // Strip augmentation fields to ensure clean comparisons
-          oldIndex = stripAugmentationFields(JSON.parse(fs.readFileSync(oldPath, 'utf8')))
+          // Load with platform metadata intact for accurate diff
+          oldIndex = JSON.parse(fs.readFileSync(oldPath, 'utf8'))
         }
       }
     }
   }
-  // Load and strip augmentation fields for clean comparisons
-  let newIndex = stripAugmentationFields(JSON.parse(fs.readFileSync(dataFile, 'utf8')))
+  // Load with platform metadata intact for accurate diff
+  let newIndex = JSON.parse(fs.readFileSync(dataFile, 'utf8'))
-  // Save a clean copy of OSS data for binary analysis (before augmentation)
-  // This ensures the binary analyzer compares actual binaries, not augmented data
+  // Save a clean copy of OSS data for binary analysis
+  // Binary analyzer needs pure OSS data without augmented CGO/cloud connectors
   const cleanOssDataPath = path.join(dataDir, `._connect-${newVersion}-clean.json`)
-  // Use the already-stripped newIndex for clean data
+  // Create clean version by removing augmented connectors
   const cleanData = JSON.parse(JSON.stringify(newIndex))
+  const connectorTypes = ['inputs', 'outputs', 'processors', 'caches', 'rate_limits',
+    'buffers', 'metrics', 'scanners', 'tracers']
+  for (const type of connectorTypes) {
+    if (Array.isArray(cleanData[type])) {
+      // Keep only connectors from OSS rpk (have config/fields)
+      // Remove augmentation-only connectors (added by previous binary analysis)
+      cleanData[type] = cleanData[type].filter(c => c.config || c.fields)
+      // Remove platform metadata from remaining connectors
+      stripPlatformMetadata(cleanData[type])
+    }
+  }
   fs.writeFileSync(cleanOssDataPath, JSON.stringify(cleanData, null, 2), 'utf8')
@@ -1031,8 +1022,6 @@ async function handleRpcnConnectorDocs (options) {
     }
   }
-  printDeltaReport(oldIndex, newIndex)
   // Binary analysis
   let oldBinaryAnalysis = null
@@ -1250,9 +1239,10 @@ async function handleRpcnConnectorDocs (options) {
         }
       }
-      // NOTE: We do NOT reload newIndex after augmentation
-      // Diff generation should use clean OSS data to avoid false positives from CGO/cloud-only components
-      // The augmented data is saved to disk but not used for version comparisons
+      // IMPORTANT: Reload newIndex with augmented data for unified diff
+      // The unified diff approach compares platform metadata to detect transitions
+      newIndex = connectorData
+      console.log(`✓ Reloaded newIndex with augmented data for diff comparison`)
     } catch (err) {
       console.error(`Warning: Failed to augment data file: ${err.message}`)
     }
@@ -1265,9 +1255,51 @@ async function handleRpcnConnectorDocs (options) {
   } else if (versionsMatch) {
     console.log(`⏭️  Skipping diff generation: versions match (${oldVersion} === ${newVersion})`)
   } else {
+    // FALLBACK: If binary analysis failed, strip CGO/cloud augmentation from old data
+    // to prevent false "removed" reports when comparing augmented old vs non-augmented new
+    let oldIndexForDiff = oldIndex
+    // Check if CGO analysis specifically failed (cgoIndex will be undefined if CGO binary couldn't be analyzed)
+    const cgoAnalysisFailed = !binaryAnalysis || !binaryAnalysis.ossVersion || !binaryAnalysis.cgoIndex
+    if (cgoAnalysisFailed) {
+      console.log('⚠️  Binary analysis unavailable - stripping CGO/cloud metadata from old data for clean comparison')
+      // Strip CGO/cloud-only connectors and metadata from old data
+      oldIndexForDiff = JSON.parse(JSON.stringify(oldIndex))
+      const connectorTypes = ['inputs', 'outputs', 'processors', 'caches', 'rate_limits',
+        'buffers', 'metrics', 'scanners', 'tracers']
+      let totalStripped = 0
+      for (const type of connectorTypes) {
+        if (Array.isArray(oldIndexForDiff[type])) {
+          const originalCount = oldIndexForDiff[type].length
+          // Remove connectors marked as CGO-only or cloud-only
+          // These shouldn't appear as "removed" when binary analysis is unavailable
+          oldIndexForDiff[type] = oldIndexForDiff[type].filter(c => {
+            return !(c.requiresCgo || c.cloudOnly)
+          })
+          const removed = originalCount - oldIndexForDiff[type].length
+          if (removed > 0) {
+            console.log(`   • Stripped ${removed} CGO/cloud connectors from ${type}`)
+            totalStripped += removed
+          }
+          // Remove platform metadata from remaining connectors
+          stripPlatformMetadata(oldIndexForDiff[type])
+        }
+      }
+      if (totalStripped > 0) {
+        console.log(`   ✓ Total stripped: ${totalStripped} CGO/cloud connectors`)
+      }
+    }
+    printDeltaReport(oldIndexForDiff, newIndex)
     const { generateConnectorDiffJson } = require('./report-delta.js')
     diffJson = generateConnectorDiffJson(
-      oldIndex,
+      oldIndexForDiff,
       newIndex,
       {
         oldVersion: oldVersion,