@redpanda-data/docs-extensions-and-macros 4.15.8 → 4.15.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,64 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Shared utilities for llms.txt generation and markdown processing.
5
+ * Used by both convert-to-markdown.js and convert-llms-to-txt.js.
6
+ */
7
+
8
+ /**
9
+ * The base directive text that appears in markdown files pointing to llms.txt.
10
+ * This is the canonical source of truth used for both rendering and stripping.
11
+ */
12
+ const LLMS_DIRECTIVE_BASE = 'For the complete documentation index, see [llms.txt](/llms.txt)';
13
+
14
+ /**
15
+ * Format the llms directive blockquote for a page.
16
+ * @param {string} componentName - Optional component name for component-specific link
17
+ * @returns {string} Formatted markdown blockquote directive
18
+ */
19
+ function formatLlmsDirective(componentName) {
20
+ if (componentName) {
21
+ return `> ${LLMS_DIRECTIVE_BASE}. Component-specific: [${componentName}-full.txt](/${componentName}-full.txt)`;
22
+ }
23
+ return `> ${LLMS_DIRECTIVE_BASE}`;
24
+ }
25
+
26
+ /**
27
+ * Regex pattern to match and strip the llms directive from markdown content.
28
+ * Matches the blockquote format with optional component-specific suffix.
29
+ */
30
+ const LLMS_DIRECTIVE_REGEX = /^> For the complete documentation index, see \[llms\.txt\].*$/gm;
31
+
32
+ /**
33
+ * Regex pattern to match and strip HTML source comments from markdown content.
34
+ */
35
+ const SOURCE_COMMENT_REGEX = /^<!--[\s\S]*?-->\s*/gm;
36
+
37
+ /**
38
+ * Strip metadata added by convert-to-markdown extension from page content.
39
+ * This removes:
40
+ * 1. HTML comments (source URLs)
41
+ * 2. llms.txt directive blockquotes (redundant in aggregated exports)
42
+ *
43
+ * @param {string|Buffer} content - The markdown content to strip
44
+ * @returns {string} Cleaned markdown content
45
+ */
46
+ function stripMarkdownMetadata(content) {
47
+ let text = typeof content === 'string' ? content : content.toString('utf8');
48
+
49
+ // Strip HTML comments (source URLs)
50
+ text = text.replace(SOURCE_COMMENT_REGEX, '');
51
+
52
+ // Strip llms.txt directive blockquotes
53
+ text = text.replace(LLMS_DIRECTIVE_REGEX, '');
54
+
55
+ return text.trim();
56
+ }
57
+
58
+ module.exports = {
59
+ LLMS_DIRECTIVE_BASE,
60
+ LLMS_DIRECTIVE_REGEX,
61
+ SOURCE_COMMENT_REGEX,
62
+ formatLlmsDirective,
63
+ stripMarkdownMetadata,
64
+ };
@@ -1,6 +1,7 @@
1
1
  'use strict';
2
2
 
3
3
  const { toMarkdownUrl } = require('../extension-utils/url-utils');
4
+ const { stripMarkdownMetadata } = require('../extension-utils/llms-utils');
4
5
 
5
6
  /**
6
7
  * Extracts markdown from llms.adoc page and generates AI-friendly documentation exports.
@@ -32,6 +33,8 @@ module.exports.register = function () {
32
33
  siteUrl = playbook.site?.url || 'https://docs.redpanda.com';
33
34
  logger.info(`Using site URL: ${siteUrl}`);
34
35
  }
36
+ // Normalize: strip trailing slashes to avoid double slashes in URL concatenation
37
+ siteUrl = siteUrl.replace(/\/+$/, '');
35
38
  });
36
39
 
37
40
  this.on('contentClassified', ({ contentCatalog }) => {
@@ -71,10 +74,10 @@ module.exports.register = function () {
71
74
  let content = llmsPage.markdownContents.toString('utf8');
72
75
  logger.info(`Extracted ${content.length} bytes of markdown content`);
73
76
 
74
- // Strip HTML comments added by convert-to-markdown extension
77
+ // Strip metadata added by convert-to-markdown extension using shared helper
75
78
  // These reference the unpublished /home/llms/ URL which doesn't make sense for llms.txt
76
- content = content.replace(/^<!--[\s\S]*?-->\s*/gm, '').trim();
77
- logger.debug(`Stripped HTML comments, now ${content.length} bytes`);
79
+ content = stripMarkdownMetadata(content);
80
+ logger.debug(`Stripped metadata, now ${content.length} bytes`);
78
81
 
79
82
  // Fix URLs: convert em dashes back to double hyphens and remove invisible characters
80
83
  // The markdown converter applies smart typography that turns -- into — (em dash)
@@ -187,7 +190,8 @@ module.exports.register = function () {
187
190
  fullContent += `# Page ${index + 1}: ${pageTitle}\n\n`;
188
191
  fullContent += `**URL**: ${pageUrl}\n\n`;
189
192
  fullContent += `---\n\n`;
190
- fullContent += page.markdownContents.toString('utf8');
193
+ // Strip metadata (directive, source comments) from page content
194
+ fullContent += stripMarkdownMetadata(page.markdownContents);
191
195
  fullContent += `\n\n---\n\n`;
192
196
  });
193
197
 
@@ -258,7 +262,8 @@ module.exports.register = function () {
258
262
  componentContent += `# Page ${index + 1}: ${pageTitle}\n\n`;
259
263
  componentContent += `**URL**: ${pageUrl}\n\n`;
260
264
  componentContent += `---\n\n`;
261
- componentContent += page.markdownContents.toString('utf8');
265
+ // Strip metadata (directive, source comments) from page content
266
+ componentContent += stripMarkdownMetadata(page.markdownContents);
262
267
  componentContent += `\n\n---\n\n`;
263
268
  });
264
269
 
@@ -274,8 +279,40 @@ module.exports.register = function () {
274
279
  if (llmsPage && llmsPage.llmsTxtContent) {
275
280
  logger.info('Adding llms.txt to site root');
276
281
 
282
+ // Target: Stay under 50K chars (agent-friendly docs spec limit)
283
+ const MAX_LLMS_TXT_CHARS = 45000; // Leave buffer below 50K
284
+ let llmsTxtContent = llmsPage.llmsTxtContent;
285
+
286
+ // Check if base content already exceeds limit
287
+ if (llmsTxtContent.length >= MAX_LLMS_TXT_CHARS) {
288
+ logger.warn(`Base llms.txt content (${llmsTxtContent.length} chars) exceeds ${MAX_LLMS_TXT_CHARS} char limit, truncating`);
289
+ // Truncate at last newline before limit to avoid cutting mid-line or mid-URL
290
+ llmsTxtContent = truncateAtNewline(llmsTxtContent, MAX_LLMS_TXT_CHARS - 100) + '\n\n[Content truncated due to size limits]';
291
+ }
292
+
293
+ // Generate navigation section with component sitemaps and key sections
294
+ const navSection = generateNavigationSection(siteUrl);
295
+
296
+ // Calculate available space for navigation section
297
+ const availableSpace = MAX_LLMS_TXT_CHARS - llmsTxtContent.length - 2; // -2 for \n\n separator
298
+
299
+ if (availableSpace >= navSection.length) {
300
+ // Full navigation section fits
301
+ llmsTxtContent = llmsTxtContent + '\n\n' + navSection;
302
+ logger.info(`Injected full navigation section (${navSection.length} chars)`);
303
+ } else if (availableSpace > 500) {
304
+ // Partial navigation section - truncate at last newline to avoid cutting mid-line or mid-URL
305
+ const truncatedNav = truncateAtNewline(navSection, availableSpace - 50) + '\n\n[Navigation truncated due to size limits]';
306
+ llmsTxtContent = llmsTxtContent + '\n\n' + truncatedNav;
307
+ logger.warn(`Truncated navigation section from ${navSection.length} to ${truncatedNav.length} chars`);
308
+ } else {
309
+ logger.warn(`Skipping navigation injection - only ${availableSpace} chars available`);
310
+ }
311
+
312
+ logger.info(`Final llms.txt size: ${llmsTxtContent.length} chars`);
313
+
277
314
  siteCatalog.addFile({
278
- contents: Buffer.from(llmsPage.llmsTxtContent, 'utf8'),
315
+ contents: Buffer.from(llmsTxtContent, 'utf8'),
279
316
  out: { path: 'llms.txt' },
280
317
  });
281
318
  logger.info('Successfully added llms.txt');
@@ -529,3 +566,71 @@ function addLastmodToComponentSitemaps(contentCatalog, siteCatalog, sitemapIndex
529
566
 
530
567
  return sitemapIndexXml;
531
568
  }
569
+
570
+ /**
571
+ * Truncate content at the last newline before the specified limit.
572
+ * This avoids cutting mid-line or mid-URL which would produce malformed output.
573
+ *
574
+ * @param {string} content - Content to truncate
575
+ * @param {number} maxLength - Maximum length
576
+ * @returns {string} Truncated content ending at a newline boundary
577
+ */
578
+ function truncateAtNewline(content, maxLength) {
579
+ if (content.length <= maxLength) {
580
+ return content;
581
+ }
582
+ const truncated = content.slice(0, maxLength);
583
+ const lastNewline = truncated.lastIndexOf('\n');
584
+ if (lastNewline > 0) {
585
+ return truncated.slice(0, lastNewline);
586
+ }
587
+ // Fallback: no newline found, return as-is
588
+ return truncated;
589
+ }
590
+
591
+ /**
592
+ * Generate a comprehensive navigation section for llms.txt
593
+ * This improves llms-txt-freshness score by providing pathways to all documentation
594
+ *
595
+ * NOTE: The section URLs below are hardcoded. If pages are renamed, moved, or removed,
596
+ * these links will 404. When restructuring documentation, update these URLs accordingly.
597
+ * Future improvement: Generate these from the content catalog at build time.
598
+ *
599
+ * @param {string} siteUrl - Base site URL
600
+ * @returns {string} Markdown navigation section
601
+ */
602
+ function generateNavigationSection(siteUrl) {
603
+ let nav = `## Complete documentation index\n\n`;
604
+ nav += `For comprehensive page listings, use the sitemaps:\n\n`;
605
+ nav += `- [sitemap.md](${siteUrl}/sitemap.md) - Main sitemap index with all documentation\n`;
606
+ nav += `- [sitemap-all.md](${siteUrl}/sitemap-all.md) - Combined listing of all documentation pages\n\n`;
607
+
608
+ nav += `### Component sitemaps\n\n`;
609
+ nav += `- [Redpanda Self-Managed](${siteUrl}/sitemap-ROOT.md)\n`;
610
+ nav += `- [Redpanda Cloud](${siteUrl}/sitemap-redpanda-cloud.md)\n`;
611
+ nav += `- [Redpanda Connect](${siteUrl}/sitemap-redpanda-connect.md)\n`;
612
+ nav += `- [Redpanda Labs](${siteUrl}/sitemap-redpanda-labs.md)\n`;
613
+
614
+ nav += `\n### Key documentation sections\n\n`;
615
+ nav += `**Self-Managed:**\n`;
616
+ nav += `- [Deploy](${siteUrl}/current/deploy.md) - Installation and deployment guides\n`;
617
+ nav += `- [Manage](${siteUrl}/current/manage.md) - Cluster operations and administration\n`;
618
+ nav += `- [Develop](${siteUrl}/current/develop.md) - Application development guides\n`;
619
+ nav += `- [Reference](${siteUrl}/current/reference.md) - Configuration, CLI, and API references\n`;
620
+ nav += `- [Upgrade](${siteUrl}/current/upgrade.md) - Version upgrade procedures\n`;
621
+ nav += `- [Troubleshoot](${siteUrl}/current/troubleshoot.md) - Debugging and issue resolution\n`;
622
+
623
+ nav += `\n**Cloud:**\n`;
624
+ nav += `- [Get Started](${siteUrl}/redpanda-cloud/get-started.md) - Cloud quickstart and cluster types\n`;
625
+ nav += `- [Manage](${siteUrl}/redpanda-cloud/manage.md) - Cloud cluster management\n`;
626
+ nav += `- [Networking](${siteUrl}/redpanda-cloud/networking.md) - Network configuration\n`;
627
+ nav += `- [Security](${siteUrl}/redpanda-cloud/security.md) - Authentication and authorization\n`;
628
+ nav += `- [AI Agents](${siteUrl}/redpanda-cloud/ai-agents.md) - Agentic Data Plane documentation\n`;
629
+
630
+ nav += `\n**Connect:**\n`;
631
+ nav += `- [Components](${siteUrl}/redpanda-connect/components.md) - All connectors, processors, and more\n`;
632
+ nav += `- [Guides](${siteUrl}/redpanda-connect/guides.md) - Integration tutorials\n`;
633
+ nav += `- [Configuration](${siteUrl}/redpanda-connect/configuration.md) - YAML configuration reference\n`;
634
+
635
+ return nav;
636
+ }
@@ -2,6 +2,7 @@ const path = require('path')
2
2
  const os = require('os')
3
3
  const yaml = require('js-yaml')
4
4
  const { toMarkdownUrl } = require('../extension-utils/url-utils')
5
+ const { formatLlmsDirective } = require('../extension-utils/llms-utils')
5
6
  const TurndownService = require('turndown')
6
7
  const turndownPluginGfm = require('turndown-plugin-gfm')
7
8
  const { gfm } = turndownPluginGfm
@@ -500,17 +501,18 @@ module.exports.register = function () {
500
501
  restOfMarkdown = markdown.substring(h1Match[0].length).trimStart()
501
502
  }
502
503
 
503
- // Add frontmatter AFTER H1 heading, then source reference and AI-friendly note
504
+ // Structure: H1 llms.txt directive (blockquote) frontmatter source → content
505
+ // The directive must appear near the top for agent-friendly docs spec compliance
504
506
  if (canonicalUrl) {
505
507
  const componentName = page.src?.component || '';
506
- const urlHint = componentName
507
- ? `<!-- Note for AI: This is a Markdown export. For aggregated content, see /llms.txt (curated overview), /${componentName}-full.txt (this component only), or /llms-full.txt (complete documentation). -->`
508
- : `<!-- Note for AI: This is a Markdown export. For aggregated content, see /llms.txt (curated overview) or /llms-full.txt (complete documentation). -->`;
508
+ // Use markdown blockquote format for the directive (visible, can be hidden with CSS)
509
+ const llmsDirective = formatLlmsDirective(componentName);
509
510
 
510
- markdown = `${h1Heading}\n${frontmatter}<!-- Source: ${canonicalUrl} -->\n${urlHint}\n\n${restOfMarkdown}`
511
+ markdown = `${h1Heading}\n${llmsDirective}\n\n${frontmatter}<!-- Source: ${canonicalUrl} -->\n\n${restOfMarkdown}`
511
512
  } else if (frontmatter) {
512
- // If no canonical URL but we have frontmatter, still add it after H1
513
- markdown = `${h1Heading}\n${frontmatter}${restOfMarkdown}`
513
+ // If no canonical URL but we have frontmatter, still add directive after H1
514
+ const llmsDirective = formatLlmsDirective();
515
+ markdown = `${h1Heading}\n${llmsDirective}\n\n${frontmatter}${restOfMarkdown}`
514
516
  }
515
517
 
516
518
  // Clean up unnecessary whitespace
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@redpanda-data/docs-extensions-and-macros",
3
- "version": "4.15.8",
3
+ "version": "4.15.10",
4
4
  "description": "Antora extensions and macros developed for Redpanda documentation.",
5
5
  "keywords": [
6
6
  "antora",
@@ -991,6 +991,14 @@ def _apply_override_to_existing_property(property_dict, override, overrides_file
991
991
  if "category" in override:
992
992
  property_dict["category"] = override["category"]
993
993
 
994
+ # Apply accepted_values override - replaces enum field to filter displayed values
995
+ # Use case: Exclude internal-only enum values (e.g., greedy mode for leader_balancer_mode)
996
+ if "accepted_values" in override:
997
+ if isinstance(override["accepted_values"], list):
998
+ property_dict["enum"] = override["accepted_values"]
999
+ else:
1000
+ logger.warning(f"accepted_values for property must be an array")
1001
+
994
1002
 
995
1003
  def _create_property_from_override(prop_name, override, overrides_file_path):
996
1004
  """Create a new property from override specification."""
@@ -1025,9 +1033,10 @@ def _create_property_from_override(prop_name, override, overrides_file_path):
1025
1033
 
1026
1034
  # Add any other custom fields from override
1027
1035
  for key, value in override.items():
1028
- if key not in ["description", "type", "default", "config_scope", "version",
1029
- "example", "example_file", "example_yaml", "related_topics",
1030
- "is_deprecated", "visibility"]:
1036
+ if key not in ["description", "type", "default", "config_scope", "version",
1037
+ "example", "example_file", "example_yaml", "related_topics",
1038
+ "is_deprecated", "visibility", "exclude_from_docs", "category",
1039
+ "accepted_values", "_comment"]:
1031
1040
  new_property[key] = value
1032
1041
 
1033
1042
  # Add exclude_from_docs if specified
@@ -1037,7 +1046,14 @@ def _create_property_from_override(prop_name, override, overrides_file_path):
1037
1046
  # Add category if specified
1038
1047
  if "category" in override:
1039
1048
  new_property["category"] = override["category"]
1040
-
1049
+
1050
+ # Add accepted_values as enum field if specified
1051
+ if "accepted_values" in override:
1052
+ if isinstance(override["accepted_values"], list):
1053
+ new_property["enum"] = override["accepted_values"]
1054
+ else:
1055
+ logger.warning(f"accepted_values for property '{prop_name}' must be an array")
1056
+
1041
1057
  return new_property
1042
1058
 
1043
1059
 
@@ -2127,7 +2143,8 @@ def resolve_type_and_default(properties, definitions):
2127
2143
  if resolved_type == "enum" or "enum" in resolved:
2128
2144
  # Enums are represented as strings with an enum constraint in JSON Schema
2129
2145
  prop["type"] = "string"
2130
- if "enum" in resolved:
2146
+ # Only set enum if not already set by an override (accepted_values)
2147
+ if "enum" in resolved and "enum" not in prop:
2131
2148
  prop["enum"] = resolved["enum"]
2132
2149
  elif resolved_type in ("object", "string", "integer", "boolean", "array", "number"):
2133
2150
  prop["type"] = resolved_type
@@ -2483,8 +2483,10 @@ class RuntimeValidationEnumExtractor:
2483
2483
  )
2484
2484
 
2485
2485
  if enum_results:
2486
- # Extract just the values for the enum field
2487
- property["enum"] = [result["value"] for result in enum_results]
2486
+ # Skip if enum was already set by an override (accepted_values)
2487
+ if "enum" not in property:
2488
+ # Extract just the values for the enum field
2489
+ property["enum"] = [result["value"] for result in enum_results]
2488
2490
 
2489
2491
  # Add metadata about which enum values are enterprise-only
2490
2492
  enum_metadata = {}
@@ -247,6 +247,7 @@ function getConnectorList(binaryPath) {
247
247
 
248
248
  let result;
249
249
  if (needsDocker) {
250
+ console.log(` Using Docker to run Linux binary on ${os.platform()}`);
250
251
  // Use Docker to run Linux binaries on macOS/Windows
251
252
  const binaryDir = path.dirname(binaryPath);
252
253
  const binaryFile = path.basename(binaryPath);
@@ -268,6 +269,7 @@ function getConnectorList(binaryPath) {
268
269
  maxBuffer: 10 * 1024 * 1024 // 10MB buffer
269
270
  });
270
271
  } else {
272
+ console.log(` Running natively on ${os.platform()}`);
271
273
  // Run natively
272
274
  result = spawnSync(binaryPath, ['list', '--format', 'json-full'], {
273
275
  stdio: ['ignore', 'pipe', 'ignore'],
@@ -2,6 +2,7 @@ const { execSync } = require('child_process');
2
2
 
3
3
  /**
4
4
  * Generate a JSON diff report between two connector index objects.
5
+ * Includes platform metadata (CGO, cloud-only) to detect transitions.
5
6
  * @param {object} oldIndex - Previous version connector index
6
7
  * @param {object} newIndex - Current version connector index
7
8
  * @param {object} opts - { oldVersion, newVersion, timestamp, binaryAnalysis, oldBinaryAnalysis }
@@ -11,31 +12,39 @@ function generateConnectorDiffJson(oldIndex, newIndex, opts = {}) {
11
12
  const oldMap = buildComponentMap(oldIndex);
12
13
  const newMap = buildComponentMap(newIndex);
13
14
 
14
- // New components
15
+ // New components (include platform metadata)
15
16
  const newComponentKeys = Object.keys(newMap).filter(k => !(k in oldMap));
16
17
  const newComponents = newComponentKeys.map(key => {
17
18
  const [type, name] = key.split(':');
18
19
  const raw = newMap[key].raw;
20
+ const metadata = newMap[key].metadata || {};
19
21
  return {
20
22
  name,
21
23
  type,
22
24
  status: raw.status || raw.type || '',
23
25
  version: raw.version || raw.introducedInVersion || '',
24
- description: raw.description || ''
26
+ description: raw.description || '',
27
+ requiresCgo: metadata.requiresCgo || false,
28
+ cloudOnly: metadata.cloudOnly || false,
29
+ cloudSupported: metadata.cloudSupported || false
25
30
  };
26
31
  });
27
32
 
28
- // Removed components
33
+ // Removed components (include platform metadata to understand why removed)
29
34
  const removedComponentKeys = Object.keys(oldMap).filter(k => !(k in newMap));
30
35
  const removedComponents = removedComponentKeys.map(key => {
31
36
  const [type, name] = key.split(':');
32
37
  const raw = oldMap[key].raw;
38
+ const metadata = oldMap[key].metadata || {};
33
39
  return {
34
40
  name,
35
41
  type,
36
42
  status: raw.status || raw.type || '',
37
43
  version: raw.version || raw.introducedInVersion || '',
38
- description: raw.description || ''
44
+ description: raw.description || '',
45
+ requiresCgo: metadata.requiresCgo || false,
46
+ cloudOnly: metadata.cloudOnly || false,
47
+ cloudSupported: metadata.cloudSupported || false
39
48
  };
40
49
  });
41
50
 
@@ -99,6 +108,57 @@ function generateConnectorDiffJson(oldIndex, newIndex, opts = {}) {
99
108
  }
100
109
  });
101
110
 
111
+ // Platform transitions (CGO, cloud support changes)
112
+ const platformTransitions = [];
113
+ Object.keys(newMap).forEach(cKey => {
114
+ if (!(cKey in oldMap)) return;
115
+
116
+ const oldMeta = oldMap[cKey].metadata || {};
117
+ const newMeta = newMap[cKey].metadata || {};
118
+ const [type, name] = cKey.split(':');
119
+
120
+ const transitions = [];
121
+
122
+ // CGO requirement changes
123
+ if (!oldMeta.requiresCgo && newMeta.requiresCgo) {
124
+ transitions.push('became_cgo_only');
125
+ } else if (oldMeta.requiresCgo && !newMeta.requiresCgo) {
126
+ transitions.push('no_longer_cgo_only');
127
+ }
128
+
129
+ // Cloud support changes
130
+ if (!oldMeta.cloudSupported && newMeta.cloudSupported) {
131
+ transitions.push('added_cloud_support');
132
+ } else if (oldMeta.cloudSupported && !newMeta.cloudSupported) {
133
+ transitions.push('removed_cloud_support');
134
+ }
135
+
136
+ // Cloud-only status changes
137
+ if (!oldMeta.cloudOnly && newMeta.cloudOnly) {
138
+ transitions.push('became_cloud_only');
139
+ } else if (oldMeta.cloudOnly && !newMeta.cloudOnly) {
140
+ transitions.push('no_longer_cloud_only');
141
+ }
142
+
143
+ if (transitions.length > 0) {
144
+ platformTransitions.push({
145
+ name,
146
+ type,
147
+ transitions,
148
+ oldPlatform: {
149
+ requiresCgo: oldMeta.requiresCgo || false,
150
+ cloudSupported: oldMeta.cloudSupported || false,
151
+ cloudOnly: oldMeta.cloudOnly || false
152
+ },
153
+ newPlatform: {
154
+ requiresCgo: newMeta.requiresCgo || false,
155
+ cloudSupported: newMeta.cloudSupported || false,
156
+ cloudOnly: newMeta.cloudOnly || false
157
+ }
158
+ });
159
+ }
160
+ });
161
+
102
162
  // Newly deprecated fields (exist in both versions but became deprecated)
103
163
  const deprecatedFields = [];
104
164
  // Changed default values
@@ -221,6 +281,7 @@ function generateConnectorDiffJson(oldIndex, newIndex, opts = {}) {
221
281
  deprecatedComponents: deprecatedComponents.length,
222
282
  deprecatedFields: deprecatedFields.length,
223
283
  changedDefaults: changedDefaults.length,
284
+ platformTransitions: platformTransitions.length,
224
285
  newBloblangMethods: newBloblangMethods.length,
225
286
  removedBloblangMethods: removedBloblangMethods.length,
226
287
  newBloblangFunctions: newBloblangFunctions.length,
@@ -236,6 +297,7 @@ function generateConnectorDiffJson(oldIndex, newIndex, opts = {}) {
236
297
  deprecatedComponents,
237
298
  deprecatedFields,
238
299
  changedDefaults,
300
+ platformTransitions,
239
301
  newBloblangMethods,
240
302
  removedBloblangMethods,
241
303
  newBloblangFunctions,
@@ -341,7 +403,19 @@ function buildComponentMap(indexObj) {
341
403
  }
342
404
 
343
405
  const fieldNames = childArray.map(f => f.name);
344
- map[lookupKey] = { raw: component, fields: fieldNames };
406
+
407
+ // Preserve platform metadata for accurate diff comparison
408
+ const metadata = {
409
+ requiresCgo: component.requiresCgo || false,
410
+ cloudSupported: component.cloudSupported || false,
411
+ cloudOnly: component.cloudOnly || false
412
+ };
413
+
414
+ map[lookupKey] = {
415
+ raw: component,
416
+ fields: fieldNames,
417
+ metadata: metadata
418
+ };
345
419
  });
346
420
  });
347
421
 
@@ -92,6 +92,19 @@ function capToTwoSentences (description) {
92
92
  return result.trim()
93
93
  }
94
94
 
95
+ /**
96
+ * Remove platform metadata fields from connectors
97
+ * @param {Array} connectors - Array of connector objects
98
+ */
99
+ function stripPlatformMetadata (connectors) {
100
+ if (!Array.isArray(connectors)) return
101
+ connectors.forEach(c => {
102
+ delete c.cloudSupported
103
+ delete c.requiresCgo
104
+ delete c.cloudOnly
105
+ })
106
+ }
107
+
95
108
  /**
96
109
  * Update whats-new.adoc with new release information
97
110
  * @param {Object} params - Parameters
@@ -487,37 +500,6 @@ function logCollapsed (label, filesArray, maxToShow = 10) {
487
500
  console.log('')
488
501
  }
489
502
 
490
- /**
491
- * Strip augmentation fields from connector data to ensure clean comparisons
492
- * Removes cloudSupported, requiresCgo, cloudOnly fields and filters out cloud-only connectors
493
- * @param {object} data - Connector index data
494
- * @returns {object} Clean connector data without augmentation
495
- */
496
- function stripAugmentationFields(data) {
497
- const cleanData = JSON.parse(JSON.stringify(data));
498
- const connectorTypes = ['inputs', 'outputs', 'processors', 'caches', 'rate_limits',
499
- 'buffers', 'metrics', 'scanners', 'tracers', 'config', 'bloblang-methods'];
500
-
501
- for (const type of connectorTypes) {
502
- if (Array.isArray(cleanData[type])) {
503
- // Remove connectors that were added by augmentation (cloudOnly or requiresCgo without OSS data)
504
- cleanData[type] = cleanData[type].filter(c => {
505
- // Keep if it's not marked as cloudOnly or requiresCgo
506
- // OR if it has a config/fields (meaning it came from OSS, not just binary analysis)
507
- return (!(c.cloudOnly || c.requiresCgo) || c.config || c.fields);
508
- });
509
-
510
- // Remove augmentation fields
511
- cleanData[type].forEach(c => {
512
- delete c.cloudSupported;
513
- delete c.requiresCgo;
514
- delete c.cloudOnly;
515
- });
516
- }
517
- }
518
-
519
- return cleanData;
520
- }
521
503
 
522
504
  /**
523
505
  * Load or fetch connector data for a specific version
@@ -529,15 +511,11 @@ function stripAugmentationFields(data) {
529
511
  async function loadConnectorDataForVersion(version, dataDir, options = {}) {
530
512
  const dataFile = path.join(dataDir, `connect-${version}.json`);
531
513
 
532
- // If file exists, load it
514
+ // If file exists, load it (with platform metadata intact)
533
515
  if (fs.existsSync(dataFile)) {
534
516
  console.log(`✓ Using existing data file: connect-${version}.json`);
535
517
  const data = JSON.parse(fs.readFileSync(dataFile, 'utf8'));
536
-
537
- // Strip augmentation fields to ensure clean comparisons
538
- // Augmentation adds CGO/cloud-only components that shouldn't affect version diffs
539
- const cleanData = stripAugmentationFields(data);
540
- return cleanData;
518
+ return data;
541
519
  }
542
520
 
543
521
  // If not, fetch it
@@ -940,8 +918,8 @@ async function handleRpcnConnectorDocs (options) {
940
918
  let oldIndex = {}
941
919
  let oldVersion = null
942
920
  if (options.oldData && fs.existsSync(options.oldData)) {
943
- // Strip augmentation fields to ensure clean comparisons
944
- oldIndex = stripAugmentationFields(JSON.parse(fs.readFileSync(options.oldData, 'utf8')))
921
+ // Load with platform metadata intact for accurate diff
922
+ oldIndex = JSON.parse(fs.readFileSync(options.oldData, 'utf8'))
945
923
  const m = options.oldData.match(/connect-([\d.]+)\.json$/)
946
924
  if (m) oldVersion = m[1]
947
925
  } else {
@@ -959,30 +937,43 @@ async function handleRpcnConnectorDocs (options) {
959
937
  oldVersion = sortedVersions[0]
960
938
  const oldFile = `connect-${oldVersion}.json`
961
939
  const oldPath = path.join(dataDir, oldFile)
962
- // Strip augmentation fields to ensure clean comparisons
963
- oldIndex = stripAugmentationFields(JSON.parse(fs.readFileSync(oldPath, 'utf8')))
940
+ // Load with platform metadata intact for accurate diff
941
+ oldIndex = JSON.parse(fs.readFileSync(oldPath, 'utf8'))
964
942
  console.log(`📋 Using old version data: ${oldFile}`)
965
943
  } else {
966
944
  oldVersion = getAntoraValue('asciidoc.attributes.latest-connect-version')
967
945
  if (oldVersion) {
968
946
  const oldPath = path.join(dataDir, `connect-${oldVersion}.json`)
969
947
  if (fs.existsSync(oldPath)) {
970
- // Strip augmentation fields to ensure clean comparisons
971
- oldIndex = stripAugmentationFields(JSON.parse(fs.readFileSync(oldPath, 'utf8')))
948
+ // Load with platform metadata intact for accurate diff
949
+ oldIndex = JSON.parse(fs.readFileSync(oldPath, 'utf8'))
972
950
  }
973
951
  }
974
952
  }
975
953
  }
976
954
 
977
- // Load and strip augmentation fields for clean comparisons
978
- let newIndex = stripAugmentationFields(JSON.parse(fs.readFileSync(dataFile, 'utf8')))
955
+ // Load with platform metadata intact for accurate diff
956
+ let newIndex = JSON.parse(fs.readFileSync(dataFile, 'utf8'))
979
957
 
980
- // Save a clean copy of OSS data for binary analysis (before augmentation)
981
- // This ensures the binary analyzer compares actual binaries, not augmented data
958
+ // Save a clean copy of OSS data for binary analysis
959
+ // Binary analyzer needs pure OSS data without augmented CGO/cloud connectors
982
960
  const cleanOssDataPath = path.join(dataDir, `._connect-${newVersion}-clean.json`)
983
961
 
984
- // Use the already-stripped newIndex for clean data
962
+ // Create clean version by removing augmented connectors
985
963
  const cleanData = JSON.parse(JSON.stringify(newIndex))
964
+ const connectorTypes = ['inputs', 'outputs', 'processors', 'caches', 'rate_limits',
965
+ 'buffers', 'metrics', 'scanners', 'tracers']
966
+
967
+ for (const type of connectorTypes) {
968
+ if (Array.isArray(cleanData[type])) {
969
+ // Keep only connectors from OSS rpk (have config/fields)
970
+ // Remove augmentation-only connectors (added by previous binary analysis)
971
+ cleanData[type] = cleanData[type].filter(c => c.config || c.fields)
972
+
973
+ // Remove platform metadata from remaining connectors
974
+ stripPlatformMetadata(cleanData[type])
975
+ }
976
+ }
986
977
 
987
978
  fs.writeFileSync(cleanOssDataPath, JSON.stringify(cleanData, null, 2), 'utf8')
988
979
 
@@ -1031,8 +1022,6 @@ async function handleRpcnConnectorDocs (options) {
1031
1022
  }
1032
1023
  }
1033
1024
 
1034
- printDeltaReport(oldIndex, newIndex)
1035
-
1036
1025
  // Binary analysis
1037
1026
  let oldBinaryAnalysis = null
1038
1027
 
@@ -1250,9 +1239,10 @@ async function handleRpcnConnectorDocs (options) {
1250
1239
  }
1251
1240
  }
1252
1241
 
1253
- // NOTE: We do NOT reload newIndex after augmentation
1254
- // Diff generation should use clean OSS data to avoid false positives from CGO/cloud-only components
1255
- // The augmented data is saved to disk but not used for version comparisons
1242
+ // IMPORTANT: Reload newIndex with augmented data for unified diff
1243
+ // The unified diff approach compares platform metadata to detect transitions
1244
+ newIndex = connectorData
1245
+ console.log(`✓ Reloaded newIndex with augmented data for diff comparison`)
1256
1246
  } catch (err) {
1257
1247
  console.error(`Warning: Failed to augment data file: ${err.message}`)
1258
1248
  }
@@ -1265,9 +1255,51 @@ async function handleRpcnConnectorDocs (options) {
1265
1255
  } else if (versionsMatch) {
1266
1256
  console.log(`⏭️ Skipping diff generation: versions match (${oldVersion} === ${newVersion})`)
1267
1257
  } else {
1258
+ // FALLBACK: If binary analysis failed, strip CGO/cloud augmentation from old data
1259
+ // to prevent false "removed" reports when comparing augmented old vs non-augmented new
1260
+ let oldIndexForDiff = oldIndex
1261
+ // Check if CGO analysis specifically failed (cgoIndex will be undefined if CGO binary couldn't be analyzed)
1262
+ const cgoAnalysisFailed = !binaryAnalysis || !binaryAnalysis.ossVersion || !binaryAnalysis.cgoIndex
1263
+ if (cgoAnalysisFailed) {
1264
+ console.log('⚠️ Binary analysis unavailable - stripping CGO/cloud metadata from old data for clean comparison')
1265
+
1266
+ // Strip CGO/cloud-only connectors and metadata from old data
1267
+ oldIndexForDiff = JSON.parse(JSON.stringify(oldIndex))
1268
+ const connectorTypes = ['inputs', 'outputs', 'processors', 'caches', 'rate_limits',
1269
+ 'buffers', 'metrics', 'scanners', 'tracers']
1270
+
1271
+ let totalStripped = 0
1272
+ for (const type of connectorTypes) {
1273
+ if (Array.isArray(oldIndexForDiff[type])) {
1274
+ const originalCount = oldIndexForDiff[type].length
1275
+
1276
+ // Remove connectors marked as CGO-only or cloud-only
1277
+ // These shouldn't appear as "removed" when binary analysis is unavailable
1278
+ oldIndexForDiff[type] = oldIndexForDiff[type].filter(c => {
1279
+ return !(c.requiresCgo || c.cloudOnly)
1280
+ })
1281
+
1282
+ const removed = originalCount - oldIndexForDiff[type].length
1283
+ if (removed > 0) {
1284
+ console.log(` • Stripped ${removed} CGO/cloud connectors from ${type}`)
1285
+ totalStripped += removed
1286
+ }
1287
+
1288
+ // Remove platform metadata from remaining connectors
1289
+ stripPlatformMetadata(oldIndexForDiff[type])
1290
+ }
1291
+ }
1292
+
1293
+ if (totalStripped > 0) {
1294
+ console.log(` ✓ Total stripped: ${totalStripped} CGO/cloud connectors`)
1295
+ }
1296
+ }
1297
+
1298
+ printDeltaReport(oldIndexForDiff, newIndex)
1299
+
1268
1300
  const { generateConnectorDiffJson } = require('./report-delta.js')
1269
1301
  diffJson = generateConnectorDiffJson(
1270
- oldIndex,
1302
+ oldIndexForDiff,
1271
1303
  newIndex,
1272
1304
  {
1273
1305
  oldVersion: oldVersion,