@redpanda-data/docs-extensions-and-macros 4.15.9 → 4.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/extension-utils/llms-utils.js +64 -0
- package/extensions/convert-llms-to-txt.js +111 -6
- package/extensions/convert-to-markdown.js +9 -7
- package/package.json +1 -1
- package/tools/bundle-openapi.js +53 -0
- package/tools/property-extractor/property_extractor.py +2 -1
- package/tools/property-extractor/transformers.py +4 -2
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Shared utilities for llms.txt generation and markdown processing.
|
|
5
|
+
* Used by both convert-to-markdown.js and convert-llms-to-txt.js.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* The base directive text that appears in markdown files pointing to llms.txt.
|
|
10
|
+
* This is the canonical source of truth used for both rendering and stripping.
|
|
11
|
+
*/
|
|
12
|
+
const LLMS_DIRECTIVE_BASE = 'For the complete documentation index, see [llms.txt](/llms.txt)';
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Format the llms directive blockquote for a page.
|
|
16
|
+
* @param {string} componentName - Optional component name for component-specific link
|
|
17
|
+
* @returns {string} Formatted markdown blockquote directive
|
|
18
|
+
*/
|
|
19
|
+
function formatLlmsDirective(componentName) {
|
|
20
|
+
if (componentName) {
|
|
21
|
+
return `> ${LLMS_DIRECTIVE_BASE}. Component-specific: [${componentName}-full.txt](/${componentName}-full.txt)`;
|
|
22
|
+
}
|
|
23
|
+
return `> ${LLMS_DIRECTIVE_BASE}`;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Regex pattern to match and strip the llms directive from markdown content.
|
|
28
|
+
* Matches the blockquote format with optional component-specific suffix.
|
|
29
|
+
*/
|
|
30
|
+
const LLMS_DIRECTIVE_REGEX = /^> For the complete documentation index, see \[llms\.txt\].*$/gm;
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Regex pattern to match and strip HTML source comments from markdown content.
|
|
34
|
+
*/
|
|
35
|
+
const SOURCE_COMMENT_REGEX = /^<!--[\s\S]*?-->\s*/gm;
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Strip metadata added by convert-to-markdown extension from page content.
|
|
39
|
+
* This removes:
|
|
40
|
+
* 1. HTML comments (source URLs)
|
|
41
|
+
* 2. llms.txt directive blockquotes (redundant in aggregated exports)
|
|
42
|
+
*
|
|
43
|
+
* @param {string|Buffer} content - The markdown content to strip
|
|
44
|
+
* @returns {string} Cleaned markdown content
|
|
45
|
+
*/
|
|
46
|
+
function stripMarkdownMetadata(content) {
|
|
47
|
+
let text = typeof content === 'string' ? content : content.toString('utf8');
|
|
48
|
+
|
|
49
|
+
// Strip HTML comments (source URLs)
|
|
50
|
+
text = text.replace(SOURCE_COMMENT_REGEX, '');
|
|
51
|
+
|
|
52
|
+
// Strip llms.txt directive blockquotes
|
|
53
|
+
text = text.replace(LLMS_DIRECTIVE_REGEX, '');
|
|
54
|
+
|
|
55
|
+
return text.trim();
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
module.exports = {
|
|
59
|
+
LLMS_DIRECTIVE_BASE,
|
|
60
|
+
LLMS_DIRECTIVE_REGEX,
|
|
61
|
+
SOURCE_COMMENT_REGEX,
|
|
62
|
+
formatLlmsDirective,
|
|
63
|
+
stripMarkdownMetadata,
|
|
64
|
+
};
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
const { toMarkdownUrl } = require('../extension-utils/url-utils');
|
|
4
|
+
const { stripMarkdownMetadata } = require('../extension-utils/llms-utils');
|
|
4
5
|
|
|
5
6
|
/**
|
|
6
7
|
* Extracts markdown from llms.adoc page and generates AI-friendly documentation exports.
|
|
@@ -32,6 +33,8 @@ module.exports.register = function () {
|
|
|
32
33
|
siteUrl = playbook.site?.url || 'https://docs.redpanda.com';
|
|
33
34
|
logger.info(`Using site URL: ${siteUrl}`);
|
|
34
35
|
}
|
|
36
|
+
// Normalize: strip trailing slashes to avoid double slashes in URL concatenation
|
|
37
|
+
siteUrl = siteUrl.replace(/\/+$/, '');
|
|
35
38
|
});
|
|
36
39
|
|
|
37
40
|
this.on('contentClassified', ({ contentCatalog }) => {
|
|
@@ -71,10 +74,10 @@ module.exports.register = function () {
|
|
|
71
74
|
let content = llmsPage.markdownContents.toString('utf8');
|
|
72
75
|
logger.info(`Extracted ${content.length} bytes of markdown content`);
|
|
73
76
|
|
|
74
|
-
// Strip
|
|
77
|
+
// Strip metadata added by convert-to-markdown extension using shared helper
|
|
75
78
|
// These reference the unpublished /home/llms/ URL which doesn't make sense for llms.txt
|
|
76
|
-
content = content
|
|
77
|
-
logger.debug(`Stripped
|
|
79
|
+
content = stripMarkdownMetadata(content);
|
|
80
|
+
logger.debug(`Stripped metadata, now ${content.length} bytes`);
|
|
78
81
|
|
|
79
82
|
// Fix URLs: convert em dashes back to double hyphens and remove invisible characters
|
|
80
83
|
// The markdown converter applies smart typography that turns -- into — (em dash)
|
|
@@ -187,7 +190,8 @@ module.exports.register = function () {
|
|
|
187
190
|
fullContent += `# Page ${index + 1}: ${pageTitle}\n\n`;
|
|
188
191
|
fullContent += `**URL**: ${pageUrl}\n\n`;
|
|
189
192
|
fullContent += `---\n\n`;
|
|
190
|
-
|
|
193
|
+
// Strip metadata (directive, source comments) from page content
|
|
194
|
+
fullContent += stripMarkdownMetadata(page.markdownContents);
|
|
191
195
|
fullContent += `\n\n---\n\n`;
|
|
192
196
|
});
|
|
193
197
|
|
|
@@ -258,7 +262,8 @@ module.exports.register = function () {
|
|
|
258
262
|
componentContent += `# Page ${index + 1}: ${pageTitle}\n\n`;
|
|
259
263
|
componentContent += `**URL**: ${pageUrl}\n\n`;
|
|
260
264
|
componentContent += `---\n\n`;
|
|
261
|
-
|
|
265
|
+
// Strip metadata (directive, source comments) from page content
|
|
266
|
+
componentContent += stripMarkdownMetadata(page.markdownContents);
|
|
262
267
|
componentContent += `\n\n---\n\n`;
|
|
263
268
|
});
|
|
264
269
|
|
|
@@ -274,8 +279,40 @@ module.exports.register = function () {
|
|
|
274
279
|
if (llmsPage && llmsPage.llmsTxtContent) {
|
|
275
280
|
logger.info('Adding llms.txt to site root');
|
|
276
281
|
|
|
282
|
+
// Target: Stay under 50K chars (agent-friendly docs spec limit)
|
|
283
|
+
const MAX_LLMS_TXT_CHARS = 45000; // Leave buffer below 50K
|
|
284
|
+
let llmsTxtContent = llmsPage.llmsTxtContent;
|
|
285
|
+
|
|
286
|
+
// Check if base content already exceeds limit
|
|
287
|
+
if (llmsTxtContent.length >= MAX_LLMS_TXT_CHARS) {
|
|
288
|
+
logger.warn(`Base llms.txt content (${llmsTxtContent.length} chars) exceeds ${MAX_LLMS_TXT_CHARS} char limit, truncating`);
|
|
289
|
+
// Truncate at last newline before limit to avoid cutting mid-line or mid-URL
|
|
290
|
+
llmsTxtContent = truncateAtNewline(llmsTxtContent, MAX_LLMS_TXT_CHARS - 100) + '\n\n[Content truncated due to size limits]';
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// Generate navigation section with component sitemaps and key sections
|
|
294
|
+
const navSection = generateNavigationSection(siteUrl);
|
|
295
|
+
|
|
296
|
+
// Calculate available space for navigation section
|
|
297
|
+
const availableSpace = MAX_LLMS_TXT_CHARS - llmsTxtContent.length - 2; // -2 for \n\n separator
|
|
298
|
+
|
|
299
|
+
if (availableSpace >= navSection.length) {
|
|
300
|
+
// Full navigation section fits
|
|
301
|
+
llmsTxtContent = llmsTxtContent + '\n\n' + navSection;
|
|
302
|
+
logger.info(`Injected full navigation section (${navSection.length} chars)`);
|
|
303
|
+
} else if (availableSpace > 500) {
|
|
304
|
+
// Partial navigation section - truncate at last newline to avoid cutting mid-line or mid-URL
|
|
305
|
+
const truncatedNav = truncateAtNewline(navSection, availableSpace - 50) + '\n\n[Navigation truncated due to size limits]';
|
|
306
|
+
llmsTxtContent = llmsTxtContent + '\n\n' + truncatedNav;
|
|
307
|
+
logger.warn(`Truncated navigation section from ${navSection.length} to ${truncatedNav.length} chars`);
|
|
308
|
+
} else {
|
|
309
|
+
logger.warn(`Skipping navigation injection - only ${availableSpace} chars available`);
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
logger.info(`Final llms.txt size: ${llmsTxtContent.length} chars`);
|
|
313
|
+
|
|
277
314
|
siteCatalog.addFile({
|
|
278
|
-
contents: Buffer.from(
|
|
315
|
+
contents: Buffer.from(llmsTxtContent, 'utf8'),
|
|
279
316
|
out: { path: 'llms.txt' },
|
|
280
317
|
});
|
|
281
318
|
logger.info('Successfully added llms.txt');
|
|
@@ -529,3 +566,71 @@ function addLastmodToComponentSitemaps(contentCatalog, siteCatalog, sitemapIndex
|
|
|
529
566
|
|
|
530
567
|
return sitemapIndexXml;
|
|
531
568
|
}
|
|
569
|
+
|
|
570
|
+
/**
|
|
571
|
+
* Truncate content at the last newline before the specified limit.
|
|
572
|
+
* This avoids cutting mid-line or mid-URL which would produce malformed output.
|
|
573
|
+
*
|
|
574
|
+
* @param {string} content - Content to truncate
|
|
575
|
+
* @param {number} maxLength - Maximum length
|
|
576
|
+
* @returns {string} Truncated content ending at a newline boundary
|
|
577
|
+
*/
|
|
578
|
+
function truncateAtNewline(content, maxLength) {
|
|
579
|
+
if (content.length <= maxLength) {
|
|
580
|
+
return content;
|
|
581
|
+
}
|
|
582
|
+
const truncated = content.slice(0, maxLength);
|
|
583
|
+
const lastNewline = truncated.lastIndexOf('\n');
|
|
584
|
+
if (lastNewline > 0) {
|
|
585
|
+
return truncated.slice(0, lastNewline);
|
|
586
|
+
}
|
|
587
|
+
// Fallback: no newline found, return as-is
|
|
588
|
+
return truncated;
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
/**
|
|
592
|
+
* Generate a comprehensive navigation section for llms.txt
|
|
593
|
+
* This improves llms-txt-freshness score by providing pathways to all documentation
|
|
594
|
+
*
|
|
595
|
+
* NOTE: The section URLs below are hardcoded. If pages are renamed, moved, or removed,
|
|
596
|
+
* these links will 404. When restructuring documentation, update these URLs accordingly.
|
|
597
|
+
* Future improvement: Generate these from the content catalog at build time.
|
|
598
|
+
*
|
|
599
|
+
* @param {string} siteUrl - Base site URL
|
|
600
|
+
* @returns {string} Markdown navigation section
|
|
601
|
+
*/
|
|
602
|
+
function generateNavigationSection(siteUrl) {
|
|
603
|
+
let nav = `## Complete documentation index\n\n`;
|
|
604
|
+
nav += `For comprehensive page listings, use the sitemaps:\n\n`;
|
|
605
|
+
nav += `- [sitemap.md](${siteUrl}/sitemap.md) - Main sitemap index with all documentation\n`;
|
|
606
|
+
nav += `- [sitemap-all.md](${siteUrl}/sitemap-all.md) - Combined listing of all documentation pages\n\n`;
|
|
607
|
+
|
|
608
|
+
nav += `### Component sitemaps\n\n`;
|
|
609
|
+
nav += `- [Redpanda Self-Managed](${siteUrl}/sitemap-ROOT.md)\n`;
|
|
610
|
+
nav += `- [Redpanda Cloud](${siteUrl}/sitemap-redpanda-cloud.md)\n`;
|
|
611
|
+
nav += `- [Redpanda Connect](${siteUrl}/sitemap-redpanda-connect.md)\n`;
|
|
612
|
+
nav += `- [Redpanda Labs](${siteUrl}/sitemap-redpanda-labs.md)\n`;
|
|
613
|
+
|
|
614
|
+
nav += `\n### Key documentation sections\n\n`;
|
|
615
|
+
nav += `**Self-Managed:**\n`;
|
|
616
|
+
nav += `- [Deploy](${siteUrl}/current/deploy.md) - Installation and deployment guides\n`;
|
|
617
|
+
nav += `- [Manage](${siteUrl}/current/manage.md) - Cluster operations and administration\n`;
|
|
618
|
+
nav += `- [Develop](${siteUrl}/current/develop.md) - Application development guides\n`;
|
|
619
|
+
nav += `- [Reference](${siteUrl}/current/reference.md) - Configuration, CLI, and API references\n`;
|
|
620
|
+
nav += `- [Upgrade](${siteUrl}/current/upgrade.md) - Version upgrade procedures\n`;
|
|
621
|
+
nav += `- [Troubleshoot](${siteUrl}/current/troubleshoot.md) - Debugging and issue resolution\n`;
|
|
622
|
+
|
|
623
|
+
nav += `\n**Cloud:**\n`;
|
|
624
|
+
nav += `- [Get Started](${siteUrl}/redpanda-cloud/get-started.md) - Cloud quickstart and cluster types\n`;
|
|
625
|
+
nav += `- [Manage](${siteUrl}/redpanda-cloud/manage.md) - Cloud cluster management\n`;
|
|
626
|
+
nav += `- [Networking](${siteUrl}/redpanda-cloud/networking.md) - Network configuration\n`;
|
|
627
|
+
nav += `- [Security](${siteUrl}/redpanda-cloud/security.md) - Authentication and authorization\n`;
|
|
628
|
+
nav += `- [AI Agents](${siteUrl}/redpanda-cloud/ai-agents.md) - Agentic Data Plane documentation\n`;
|
|
629
|
+
|
|
630
|
+
nav += `\n**Connect:**\n`;
|
|
631
|
+
nav += `- [Components](${siteUrl}/redpanda-connect/components.md) - All connectors, processors, and more\n`;
|
|
632
|
+
nav += `- [Guides](${siteUrl}/redpanda-connect/guides.md) - Integration tutorials\n`;
|
|
633
|
+
nav += `- [Configuration](${siteUrl}/redpanda-connect/configuration.md) - YAML configuration reference\n`;
|
|
634
|
+
|
|
635
|
+
return nav;
|
|
636
|
+
}
|
|
@@ -2,6 +2,7 @@ const path = require('path')
|
|
|
2
2
|
const os = require('os')
|
|
3
3
|
const yaml = require('js-yaml')
|
|
4
4
|
const { toMarkdownUrl } = require('../extension-utils/url-utils')
|
|
5
|
+
const { formatLlmsDirective } = require('../extension-utils/llms-utils')
|
|
5
6
|
const TurndownService = require('turndown')
|
|
6
7
|
const turndownPluginGfm = require('turndown-plugin-gfm')
|
|
7
8
|
const { gfm } = turndownPluginGfm
|
|
@@ -500,17 +501,18 @@ module.exports.register = function () {
|
|
|
500
501
|
restOfMarkdown = markdown.substring(h1Match[0].length).trimStart()
|
|
501
502
|
}
|
|
502
503
|
|
|
503
|
-
//
|
|
504
|
+
// Structure: H1 → llms.txt directive (blockquote) → frontmatter → source → content
|
|
505
|
+
// The directive must appear near the top for agent-friendly docs spec compliance
|
|
504
506
|
if (canonicalUrl) {
|
|
505
507
|
const componentName = page.src?.component || '';
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
: `<!-- Note for AI: This is a Markdown export. For aggregated content, see /llms.txt (curated overview) or /llms-full.txt (complete documentation). -->`;
|
|
508
|
+
// Use markdown blockquote format for the directive (visible, can be hidden with CSS)
|
|
509
|
+
const llmsDirective = formatLlmsDirective(componentName);
|
|
509
510
|
|
|
510
|
-
markdown = `${h1Heading}\n${frontmatter}<!-- Source: ${canonicalUrl} -->\n
|
|
511
|
+
markdown = `${h1Heading}\n${llmsDirective}\n\n${frontmatter}<!-- Source: ${canonicalUrl} -->\n\n${restOfMarkdown}`
|
|
511
512
|
} else if (frontmatter) {
|
|
512
|
-
// If no canonical URL but we have frontmatter, still add
|
|
513
|
-
|
|
513
|
+
// If no canonical URL but we have frontmatter, still add directive after H1
|
|
514
|
+
const llmsDirective = formatLlmsDirective();
|
|
515
|
+
markdown = `${h1Heading}\n${llmsDirective}\n\n${frontmatter}${restOfMarkdown}`
|
|
514
516
|
}
|
|
515
517
|
|
|
516
518
|
// Clean up unnecessary whitespace
|
package/package.json
CHANGED
package/tools/bundle-openapi.js
CHANGED
|
@@ -248,6 +248,55 @@ function createEntrypoint(tempDir, apiSurface) {
|
|
|
248
248
|
return fragmentFiles;
|
|
249
249
|
}
|
|
250
250
|
|
|
251
|
+
/**
|
|
252
|
+
* Wrap $ref siblings into allOf to preserve field-level descriptions.
|
|
253
|
+
*
|
|
254
|
+
* In OpenAPI 3.0, sibling properties next to $ref are ignored per spec.
|
|
255
|
+
* Some renderers (e.g. Bump.sh) follow this behavior, displaying the generic
|
|
256
|
+
* description from the referenced schema instead of field-level overrides.
|
|
257
|
+
* This function transforms { $ref, description, ... } into
|
|
258
|
+
* { allOf: [{ $ref }], description, ... } so renderers pick up field-level
|
|
259
|
+
* descriptions correctly.
|
|
260
|
+
*
|
|
261
|
+
* @param {*} node - Any value from the parsed OpenAPI spec.
|
|
262
|
+
* @returns {*} The transformed value (mutates in place for objects).
|
|
263
|
+
*/
|
|
264
|
+
function wrapRefSiblings(node) {
|
|
265
|
+
if (node === null || typeof node !== 'object') {
|
|
266
|
+
return node;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
if (Array.isArray(node)) {
|
|
270
|
+
node.forEach((item, i) => {
|
|
271
|
+
node[i] = wrapRefSiblings(item);
|
|
272
|
+
});
|
|
273
|
+
return node;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// Check if this object has $ref with sibling properties that need wrapping
|
|
277
|
+
if (node['$ref'] && typeof node['$ref'] === 'string') {
|
|
278
|
+
const keys = Object.keys(node);
|
|
279
|
+
const hasSiblings = keys.length > 1;
|
|
280
|
+
|
|
281
|
+
if (hasSiblings) {
|
|
282
|
+
// Skip if allOf already exists — assumes a pre-existing structure
|
|
283
|
+
// from the source spec that should not be modified.
|
|
284
|
+
if (!node.allOf) {
|
|
285
|
+
const ref = node['$ref'];
|
|
286
|
+
delete node['$ref'];
|
|
287
|
+
node.allOf = [{ '$ref': ref }];
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// Recurse into all object values
|
|
293
|
+
for (const key of Object.keys(node)) {
|
|
294
|
+
node[key] = wrapRefSiblings(node[key]);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
return node;
|
|
298
|
+
}
|
|
299
|
+
|
|
251
300
|
/**
|
|
252
301
|
* Bundle one or more OpenAPI fragment files into a single bundled YAML using a selected external bundler.
|
|
253
302
|
*
|
|
@@ -529,6 +578,9 @@ function postProcessBundle(filePath, options, quiet = false) {
|
|
|
529
578
|
bundle.info['x-generated-at'] = new Date().toISOString();
|
|
530
579
|
bundle.info['x-generator'] = 'redpanda-docs-openapi-bundler';
|
|
531
580
|
|
|
581
|
+
// Wrap $ref siblings into allOf so renderers display field descriptions
|
|
582
|
+
bundle = wrapRefSiblings(bundle);
|
|
583
|
+
|
|
532
584
|
// Sort keys for deterministic output
|
|
533
585
|
const sortedBundle = sortObjectKeys(bundle);
|
|
534
586
|
|
|
@@ -776,6 +828,7 @@ module.exports = {
|
|
|
776
828
|
normalizeTag,
|
|
777
829
|
getMajorMinor,
|
|
778
830
|
sortObjectKeys,
|
|
831
|
+
wrapRefSiblings,
|
|
779
832
|
detectBundler,
|
|
780
833
|
createEntrypoint,
|
|
781
834
|
postProcessBundle
|
|
@@ -2143,7 +2143,8 @@ def resolve_type_and_default(properties, definitions):
|
|
|
2143
2143
|
if resolved_type == "enum" or "enum" in resolved:
|
|
2144
2144
|
# Enums are represented as strings with an enum constraint in JSON Schema
|
|
2145
2145
|
prop["type"] = "string"
|
|
2146
|
-
|
|
2146
|
+
# Only set enum if not already set by an override (accepted_values)
|
|
2147
|
+
if "enum" in resolved and "enum" not in prop:
|
|
2147
2148
|
prop["enum"] = resolved["enum"]
|
|
2148
2149
|
elif resolved_type in ("object", "string", "integer", "boolean", "array", "number"):
|
|
2149
2150
|
prop["type"] = resolved_type
|
|
@@ -2483,8 +2483,10 @@ class RuntimeValidationEnumExtractor:
|
|
|
2483
2483
|
)
|
|
2484
2484
|
|
|
2485
2485
|
if enum_results:
|
|
2486
|
-
#
|
|
2487
|
-
|
|
2486
|
+
# Skip if enum was already set by an override (accepted_values)
|
|
2487
|
+
if "enum" not in property:
|
|
2488
|
+
# Extract just the values for the enum field
|
|
2489
|
+
property["enum"] = [result["value"] for result in enum_results]
|
|
2488
2490
|
|
|
2489
2491
|
# Add metadata about which enum values are enterprise-only
|
|
2490
2492
|
enum_metadata = {}
|