@redpanda-data/docs-extensions-and-macros 4.15.9 → 4.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,64 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Shared utilities for llms.txt generation and markdown processing.
5
+ * Used by both convert-to-markdown.js and convert-llms-to-txt.js.
6
+ */
7
+
8
+ /**
9
+ * The base directive text that appears in markdown files pointing to llms.txt.
10
+ * This is the canonical source of truth used for both rendering and stripping.
11
+ */
12
+ const LLMS_DIRECTIVE_BASE = 'For the complete documentation index, see [llms.txt](/llms.txt)';
13
+
14
+ /**
15
+ * Format the llms directive blockquote for a page.
16
+ * @param {string} componentName - Optional component name for component-specific link
17
+ * @returns {string} Formatted markdown blockquote directive
18
+ */
19
+ function formatLlmsDirective(componentName) {
20
+ if (componentName) {
21
+ return `> ${LLMS_DIRECTIVE_BASE}. Component-specific: [${componentName}-full.txt](/${componentName}-full.txt)`;
22
+ }
23
+ return `> ${LLMS_DIRECTIVE_BASE}`;
24
+ }
25
+
26
+ /**
27
+ * Regex pattern to match and strip the llms directive from markdown content.
28
+ * Matches the blockquote format with optional component-specific suffix.
29
+ */
30
+ const LLMS_DIRECTIVE_REGEX = /^> For the complete documentation index, see \[llms\.txt\].*$/gm;
31
+
32
+ /**
33
+ * Regex pattern to match and strip HTML source comments from markdown content.
34
+ */
35
+ const SOURCE_COMMENT_REGEX = /^<!--[\s\S]*?-->\s*/gm;
36
+
37
+ /**
38
+ * Strip metadata added by convert-to-markdown extension from page content.
39
+ * This removes:
40
+ * 1. HTML comments (source URLs)
41
+ * 2. llms.txt directive blockquotes (redundant in aggregated exports)
42
+ *
43
+ * @param {string|Buffer} content - The markdown content to strip
44
+ * @returns {string} Cleaned markdown content
45
+ */
46
+ function stripMarkdownMetadata(content) {
47
+ let text = typeof content === 'string' ? content : content.toString('utf8');
48
+
49
+ // Strip HTML comments (source URLs)
50
+ text = text.replace(SOURCE_COMMENT_REGEX, '');
51
+
52
+ // Strip llms.txt directive blockquotes
53
+ text = text.replace(LLMS_DIRECTIVE_REGEX, '');
54
+
55
+ return text.trim();
56
+ }
57
+
58
+ module.exports = {
59
+ LLMS_DIRECTIVE_BASE,
60
+ LLMS_DIRECTIVE_REGEX,
61
+ SOURCE_COMMENT_REGEX,
62
+ formatLlmsDirective,
63
+ stripMarkdownMetadata,
64
+ };
@@ -1,6 +1,7 @@
1
1
  'use strict';
2
2
 
3
3
  const { toMarkdownUrl } = require('../extension-utils/url-utils');
4
+ const { stripMarkdownMetadata } = require('../extension-utils/llms-utils');
4
5
 
5
6
  /**
6
7
  * Extracts markdown from llms.adoc page and generates AI-friendly documentation exports.
@@ -32,6 +33,8 @@ module.exports.register = function () {
32
33
  siteUrl = playbook.site?.url || 'https://docs.redpanda.com';
33
34
  logger.info(`Using site URL: ${siteUrl}`);
34
35
  }
36
+ // Normalize: strip trailing slashes to avoid double slashes in URL concatenation
37
+ siteUrl = siteUrl.replace(/\/+$/, '');
35
38
  });
36
39
 
37
40
  this.on('contentClassified', ({ contentCatalog }) => {
@@ -71,10 +74,10 @@ module.exports.register = function () {
71
74
  let content = llmsPage.markdownContents.toString('utf8');
72
75
  logger.info(`Extracted ${content.length} bytes of markdown content`);
73
76
 
74
- // Strip HTML comments added by convert-to-markdown extension
77
+ // Strip metadata added by convert-to-markdown extension using shared helper
75
78
  // These reference the unpublished /home/llms/ URL which doesn't make sense for llms.txt
76
- content = content.replace(/^<!--[\s\S]*?-->\s*/gm, '').trim();
77
- logger.debug(`Stripped HTML comments, now ${content.length} bytes`);
79
+ content = stripMarkdownMetadata(content);
80
+ logger.debug(`Stripped metadata, now ${content.length} bytes`);
78
81
 
79
82
  // Fix URLs: convert em dashes back to double hyphens and remove invisible characters
80
83
  // The markdown converter applies smart typography that turns -- into — (em dash)
@@ -187,7 +190,8 @@ module.exports.register = function () {
187
190
  fullContent += `# Page ${index + 1}: ${pageTitle}\n\n`;
188
191
  fullContent += `**URL**: ${pageUrl}\n\n`;
189
192
  fullContent += `---\n\n`;
190
- fullContent += page.markdownContents.toString('utf8');
193
+ // Strip metadata (directive, source comments) from page content
194
+ fullContent += stripMarkdownMetadata(page.markdownContents);
191
195
  fullContent += `\n\n---\n\n`;
192
196
  });
193
197
 
@@ -258,7 +262,8 @@ module.exports.register = function () {
258
262
  componentContent += `# Page ${index + 1}: ${pageTitle}\n\n`;
259
263
  componentContent += `**URL**: ${pageUrl}\n\n`;
260
264
  componentContent += `---\n\n`;
261
- componentContent += page.markdownContents.toString('utf8');
265
+ // Strip metadata (directive, source comments) from page content
266
+ componentContent += stripMarkdownMetadata(page.markdownContents);
262
267
  componentContent += `\n\n---\n\n`;
263
268
  });
264
269
 
@@ -274,8 +279,40 @@ module.exports.register = function () {
274
279
  if (llmsPage && llmsPage.llmsTxtContent) {
275
280
  logger.info('Adding llms.txt to site root');
276
281
 
282
+ // Target: Stay under 50K chars (agent-friendly docs spec limit)
283
+ const MAX_LLMS_TXT_CHARS = 45000; // Leave buffer below 50K
284
+ let llmsTxtContent = llmsPage.llmsTxtContent;
285
+
286
+ // Check if base content already exceeds limit
287
+ if (llmsTxtContent.length >= MAX_LLMS_TXT_CHARS) {
288
+ logger.warn(`Base llms.txt content (${llmsTxtContent.length} chars) exceeds ${MAX_LLMS_TXT_CHARS} char limit, truncating`);
289
+ // Truncate at last newline before limit to avoid cutting mid-line or mid-URL
290
+ llmsTxtContent = truncateAtNewline(llmsTxtContent, MAX_LLMS_TXT_CHARS - 100) + '\n\n[Content truncated due to size limits]';
291
+ }
292
+
293
+ // Generate navigation section with component sitemaps and key sections
294
+ const navSection = generateNavigationSection(siteUrl);
295
+
296
+ // Calculate available space for navigation section
297
+ const availableSpace = MAX_LLMS_TXT_CHARS - llmsTxtContent.length - 2; // -2 for \n\n separator
298
+
299
+ if (availableSpace >= navSection.length) {
300
+ // Full navigation section fits
301
+ llmsTxtContent = llmsTxtContent + '\n\n' + navSection;
302
+ logger.info(`Injected full navigation section (${navSection.length} chars)`);
303
+ } else if (availableSpace > 500) {
304
+ // Partial navigation section - truncate at last newline to avoid cutting mid-line or mid-URL
305
+ const truncatedNav = truncateAtNewline(navSection, availableSpace - 50) + '\n\n[Navigation truncated due to size limits]';
306
+ llmsTxtContent = llmsTxtContent + '\n\n' + truncatedNav;
307
+ logger.warn(`Truncated navigation section from ${navSection.length} to ${truncatedNav.length} chars`);
308
+ } else {
309
+ logger.warn(`Skipping navigation injection - only ${availableSpace} chars available`);
310
+ }
311
+
312
+ logger.info(`Final llms.txt size: ${llmsTxtContent.length} chars`);
313
+
277
314
  siteCatalog.addFile({
278
- contents: Buffer.from(llmsPage.llmsTxtContent, 'utf8'),
315
+ contents: Buffer.from(llmsTxtContent, 'utf8'),
279
316
  out: { path: 'llms.txt' },
280
317
  });
281
318
  logger.info('Successfully added llms.txt');
@@ -529,3 +566,71 @@ function addLastmodToComponentSitemaps(contentCatalog, siteCatalog, sitemapIndex
529
566
 
530
567
  return sitemapIndexXml;
531
568
  }
569
+
570
+ /**
571
+ * Truncate content at the last newline before the specified limit.
572
+ * This avoids cutting mid-line or mid-URL which would produce malformed output.
573
+ *
574
+ * @param {string} content - Content to truncate
575
+ * @param {number} maxLength - Maximum length
576
+ * @returns {string} Truncated content ending at a newline boundary
577
+ */
578
+ function truncateAtNewline(content, maxLength) {
579
+ if (content.length <= maxLength) {
580
+ return content;
581
+ }
582
+ const truncated = content.slice(0, maxLength);
583
+ const lastNewline = truncated.lastIndexOf('\n');
584
+ if (lastNewline > 0) {
585
+ return truncated.slice(0, lastNewline);
586
+ }
587
+ // Fallback: no newline found, return as-is
588
+ return truncated;
589
+ }
590
+
591
+ /**
592
+ * Generate a comprehensive navigation section for llms.txt
593
+ * This improves llms-txt-freshness score by providing pathways to all documentation
594
+ *
595
+ * NOTE: The section URLs below are hardcoded. If pages are renamed, moved, or removed,
596
+ * these links will 404. When restructuring documentation, update these URLs accordingly.
597
+ * Future improvement: Generate these from the content catalog at build time.
598
+ *
599
+ * @param {string} siteUrl - Base site URL
600
+ * @returns {string} Markdown navigation section
601
+ */
602
+ function generateNavigationSection(siteUrl) {
603
+ let nav = `## Complete documentation index\n\n`;
604
+ nav += `For comprehensive page listings, use the sitemaps:\n\n`;
605
+ nav += `- [sitemap.md](${siteUrl}/sitemap.md) - Main sitemap index with all documentation\n`;
606
+ nav += `- [sitemap-all.md](${siteUrl}/sitemap-all.md) - Combined listing of all documentation pages\n\n`;
607
+
608
+ nav += `### Component sitemaps\n\n`;
609
+ nav += `- [Redpanda Self-Managed](${siteUrl}/sitemap-ROOT.md)\n`;
610
+ nav += `- [Redpanda Cloud](${siteUrl}/sitemap-redpanda-cloud.md)\n`;
611
+ nav += `- [Redpanda Connect](${siteUrl}/sitemap-redpanda-connect.md)\n`;
612
+ nav += `- [Redpanda Labs](${siteUrl}/sitemap-redpanda-labs.md)\n`;
613
+
614
+ nav += `\n### Key documentation sections\n\n`;
615
+ nav += `**Self-Managed:**\n`;
616
+ nav += `- [Deploy](${siteUrl}/current/deploy.md) - Installation and deployment guides\n`;
617
+ nav += `- [Manage](${siteUrl}/current/manage.md) - Cluster operations and administration\n`;
618
+ nav += `- [Develop](${siteUrl}/current/develop.md) - Application development guides\n`;
619
+ nav += `- [Reference](${siteUrl}/current/reference.md) - Configuration, CLI, and API references\n`;
620
+ nav += `- [Upgrade](${siteUrl}/current/upgrade.md) - Version upgrade procedures\n`;
621
+ nav += `- [Troubleshoot](${siteUrl}/current/troubleshoot.md) - Debugging and issue resolution\n`;
622
+
623
+ nav += `\n**Cloud:**\n`;
624
+ nav += `- [Get Started](${siteUrl}/redpanda-cloud/get-started.md) - Cloud quickstart and cluster types\n`;
625
+ nav += `- [Manage](${siteUrl}/redpanda-cloud/manage.md) - Cloud cluster management\n`;
626
+ nav += `- [Networking](${siteUrl}/redpanda-cloud/networking.md) - Network configuration\n`;
627
+ nav += `- [Security](${siteUrl}/redpanda-cloud/security.md) - Authentication and authorization\n`;
628
+ nav += `- [AI Agents](${siteUrl}/redpanda-cloud/ai-agents.md) - Agentic Data Plane documentation\n`;
629
+
630
+ nav += `\n**Connect:**\n`;
631
+ nav += `- [Components](${siteUrl}/redpanda-connect/components.md) - All connectors, processors, and more\n`;
632
+ nav += `- [Guides](${siteUrl}/redpanda-connect/guides.md) - Integration tutorials\n`;
633
+ nav += `- [Configuration](${siteUrl}/redpanda-connect/configuration.md) - YAML configuration reference\n`;
634
+
635
+ return nav;
636
+ }
@@ -2,6 +2,7 @@ const path = require('path')
2
2
  const os = require('os')
3
3
  const yaml = require('js-yaml')
4
4
  const { toMarkdownUrl } = require('../extension-utils/url-utils')
5
+ const { formatLlmsDirective } = require('../extension-utils/llms-utils')
5
6
  const TurndownService = require('turndown')
6
7
  const turndownPluginGfm = require('turndown-plugin-gfm')
7
8
  const { gfm } = turndownPluginGfm
@@ -500,17 +501,18 @@ module.exports.register = function () {
500
501
  restOfMarkdown = markdown.substring(h1Match[0].length).trimStart()
501
502
  }
502
503
 
503
- // Add frontmatter AFTER H1 heading, then source reference and AI-friendly note
504
+ // Structure: H1 llms.txt directive (blockquote) frontmatter source → content
505
+ // The directive must appear near the top for agent-friendly docs spec compliance
504
506
  if (canonicalUrl) {
505
507
  const componentName = page.src?.component || '';
506
- const urlHint = componentName
507
- ? `<!-- Note for AI: This is a Markdown export. For aggregated content, see /llms.txt (curated overview), /${componentName}-full.txt (this component only), or /llms-full.txt (complete documentation). -->`
508
- : `<!-- Note for AI: This is a Markdown export. For aggregated content, see /llms.txt (curated overview) or /llms-full.txt (complete documentation). -->`;
508
+ // Use markdown blockquote format for the directive (visible, can be hidden with CSS)
509
+ const llmsDirective = formatLlmsDirective(componentName);
509
510
 
510
- markdown = `${h1Heading}\n${frontmatter}<!-- Source: ${canonicalUrl} -->\n${urlHint}\n\n${restOfMarkdown}`
511
+ markdown = `${h1Heading}\n${llmsDirective}\n\n${frontmatter}<!-- Source: ${canonicalUrl} -->\n\n${restOfMarkdown}`
511
512
  } else if (frontmatter) {
512
- // If no canonical URL but we have frontmatter, still add it after H1
513
- markdown = `${h1Heading}\n${frontmatter}${restOfMarkdown}`
513
+ // If no canonical URL but we have frontmatter, still add directive after H1
514
+ const llmsDirective = formatLlmsDirective();
515
+ markdown = `${h1Heading}\n${llmsDirective}\n\n${frontmatter}${restOfMarkdown}`
514
516
  }
515
517
 
516
518
  // Clean up unnecessary whitespace
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@redpanda-data/docs-extensions-and-macros",
3
- "version": "4.15.9",
3
+ "version": "4.16.0",
4
4
  "description": "Antora extensions and macros developed for Redpanda documentation.",
5
5
  "keywords": [
6
6
  "antora",
@@ -248,6 +248,55 @@ function createEntrypoint(tempDir, apiSurface) {
248
248
  return fragmentFiles;
249
249
  }
250
250
 
251
+ /**
252
+ * Wrap $ref siblings into allOf to preserve field-level descriptions.
253
+ *
254
+ * In OpenAPI 3.0, sibling properties next to $ref are ignored per spec.
255
+ * Some renderers (e.g. Bump.sh) follow this behavior, displaying the generic
256
+ * description from the referenced schema instead of field-level overrides.
257
+ * This function transforms { $ref, description, ... } into
258
+ * { allOf: [{ $ref }], description, ... } so renderers pick up field-level
259
+ * descriptions correctly.
260
+ *
261
+ * @param {*} node - Any value from the parsed OpenAPI spec.
262
+ * @returns {*} The transformed value (mutates in place for objects).
263
+ */
264
+ function wrapRefSiblings(node) {
265
+ if (node === null || typeof node !== 'object') {
266
+ return node;
267
+ }
268
+
269
+ if (Array.isArray(node)) {
270
+ node.forEach((item, i) => {
271
+ node[i] = wrapRefSiblings(item);
272
+ });
273
+ return node;
274
+ }
275
+
276
+ // Check if this object has $ref with sibling properties that need wrapping
277
+ if (node['$ref'] && typeof node['$ref'] === 'string') {
278
+ const keys = Object.keys(node);
279
+ const hasSiblings = keys.length > 1;
280
+
281
+ if (hasSiblings) {
282
+ // Skip if allOf already exists — assumes a pre-existing structure
283
+ // from the source spec that should not be modified.
284
+ if (!node.allOf) {
285
+ const ref = node['$ref'];
286
+ delete node['$ref'];
287
+ node.allOf = [{ '$ref': ref }];
288
+ }
289
+ }
290
+ }
291
+
292
+ // Recurse into all object values
293
+ for (const key of Object.keys(node)) {
294
+ node[key] = wrapRefSiblings(node[key]);
295
+ }
296
+
297
+ return node;
298
+ }
299
+
251
300
  /**
252
301
  * Bundle one or more OpenAPI fragment files into a single bundled YAML using a selected external bundler.
253
302
  *
@@ -529,6 +578,9 @@ function postProcessBundle(filePath, options, quiet = false) {
529
578
  bundle.info['x-generated-at'] = new Date().toISOString();
530
579
  bundle.info['x-generator'] = 'redpanda-docs-openapi-bundler';
531
580
 
581
+ // Wrap $ref siblings into allOf so renderers display field descriptions
582
+ bundle = wrapRefSiblings(bundle);
583
+
532
584
  // Sort keys for deterministic output
533
585
  const sortedBundle = sortObjectKeys(bundle);
534
586
 
@@ -776,6 +828,7 @@ module.exports = {
776
828
  normalizeTag,
777
829
  getMajorMinor,
778
830
  sortObjectKeys,
831
+ wrapRefSiblings,
779
832
  detectBundler,
780
833
  createEntrypoint,
781
834
  postProcessBundle
@@ -2143,7 +2143,8 @@ def resolve_type_and_default(properties, definitions):
2143
2143
  if resolved_type == "enum" or "enum" in resolved:
2144
2144
  # Enums are represented as strings with an enum constraint in JSON Schema
2145
2145
  prop["type"] = "string"
2146
- if "enum" in resolved:
2146
+ # Only set enum if not already set by an override (accepted_values)
2147
+ if "enum" in resolved and "enum" not in prop:
2147
2148
  prop["enum"] = resolved["enum"]
2148
2149
  elif resolved_type in ("object", "string", "integer", "boolean", "array", "number"):
2149
2150
  prop["type"] = resolved_type
@@ -2483,8 +2483,10 @@ class RuntimeValidationEnumExtractor:
2483
2483
  )
2484
2484
 
2485
2485
  if enum_results:
2486
- # Extract just the values for the enum field
2487
- property["enum"] = [result["value"] for result in enum_results]
2486
+ # Skip if enum was already set by an override (accepted_values)
2487
+ if "enum" not in property:
2488
+ # Extract just the values for the enum field
2489
+ property["enum"] = [result["value"] for result in enum_results]
2488
2490
 
2489
2491
  # Add metadata about which enum values are enterprise-only
2490
2492
  enum_metadata = {}