@redpanda-data/docs-extensions-and-macros 4.15.1 → 4.15.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/extension-utils/url-utils.js +39 -0
- package/extensions/README.adoc +4 -0
- package/extensions/REFERENCE.adoc +424 -0
- package/extensions/add-faq-structured-data.js +153 -0
- package/extensions/add-git-dates.js +287 -0
- package/extensions/convert-llms-to-txt.js +341 -7
- package/extensions/convert-sitemap-to-markdown.js +274 -0
- package/extensions/convert-to-markdown.js +187 -20
- package/extensions/git-full-clone.js +114 -0
- package/package.json +5 -1
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Antora extension that generates markdown versions of sitemap.xml files.
|
|
3
|
+
*
|
|
4
|
+
* For each sitemap.xml in the published site, creates a corresponding sitemap.md
|
|
5
|
+
* with a human-readable and AI-friendly markdown format.
|
|
6
|
+
*
|
|
7
|
+
* Usage in playbook:
|
|
8
|
+
* ```yaml
|
|
9
|
+
* antora:
|
|
10
|
+
* extensions:
|
|
11
|
+
* - require: '@redpanda-data/docs-extensions-and-macros/extensions/convert-sitemap-to-markdown'
|
|
12
|
+
* ```
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
const path = require('path')
|
|
16
|
+
const { parseStringPromise } = require('xml2js')
|
|
17
|
+
|
|
18
|
+
module.exports.register = function ({ config }) {
|
|
19
|
+
const logger = this.getLogger('convert-sitemap-to-markdown')
|
|
20
|
+
|
|
21
|
+
this
|
|
22
|
+
.on('beforePublish', async ({ playbook, siteCatalog }) => {
|
|
23
|
+
const startTime = Date.now()
|
|
24
|
+
|
|
25
|
+
logger.info('Sitemap to markdown converter starting...')
|
|
26
|
+
|
|
27
|
+
try {
|
|
28
|
+
// Find all sitemap files in the site catalog
|
|
29
|
+
const sitemapFiles = siteCatalog.getFiles().filter(file =>
|
|
30
|
+
/^sitemap(-[^/]+)?\.xml$/.test(path.basename(file.out.path))
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
if (sitemapFiles.length === 0) {
|
|
34
|
+
logger.info('No sitemap files found in site catalog')
|
|
35
|
+
return
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
logger.info(`Found ${sitemapFiles.length} sitemap file(s)`)
|
|
39
|
+
|
|
40
|
+
// Convert each sitemap and collect all URLs
|
|
41
|
+
const allUrls = []
|
|
42
|
+
for (const sitemapFile of sitemapFiles) {
|
|
43
|
+
const urls = await convertSitemapToMarkdown(sitemapFile, siteCatalog, logger)
|
|
44
|
+
if (urls) {
|
|
45
|
+
allUrls.push(...urls)
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Create combined master sitemap if we have multiple sitemaps
|
|
50
|
+
if (sitemapFiles.length > 1 && allUrls.length > 0) {
|
|
51
|
+
await createMasterSitemap(sitemapFiles, allUrls, siteCatalog, logger)
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const duration = ((Date.now() - startTime) / 1000).toFixed(2)
|
|
55
|
+
logger.info(`Generated ${sitemapFiles.length} sitemap markdown file(s) in ${duration}s`)
|
|
56
|
+
} catch (error) {
|
|
57
|
+
logger.error(`Failed to generate sitemap markdown: ${error.message}`)
|
|
58
|
+
throw error
|
|
59
|
+
}
|
|
60
|
+
})
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Create a master combined sitemap from all individual sitemaps
|
|
65
|
+
*/
|
|
66
|
+
async function createMasterSitemap(sitemapFiles, allUrls, siteCatalog, logger) {
|
|
67
|
+
const sitemapCount = sitemapFiles.length
|
|
68
|
+
const sitemapWord = sitemapCount === 1 ? 'sitemap' : 'sitemaps'
|
|
69
|
+
|
|
70
|
+
let markdown = '# Complete documentation sitemap\n\n'
|
|
71
|
+
markdown += `> Combined view of all ${allUrls.length.toLocaleString()} documentation pages from ${sitemapCount} ${sitemapWord}\n\n`
|
|
72
|
+
|
|
73
|
+
// Add overview of source sitemaps
|
|
74
|
+
markdown += '## Source sitemaps\n\n'
|
|
75
|
+
for (const sitemapFile of sitemapFiles) {
|
|
76
|
+
const basename = path.basename(sitemapFile.out.path)
|
|
77
|
+
const mdName = basename.replace(/\.xml$/, '.md')
|
|
78
|
+
markdown += `- [${basename}](${mdName})\n`
|
|
79
|
+
}
|
|
80
|
+
markdown += '\n'
|
|
81
|
+
|
|
82
|
+
// Add all URLs grouped by component
|
|
83
|
+
if (allUrls.length > 0) {
|
|
84
|
+
markdown += convertUrlsetToMarkdown(allUrls)
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Add to site catalog
|
|
88
|
+
siteCatalog.addFile({
|
|
89
|
+
contents: Buffer.from(markdown, 'utf8'),
|
|
90
|
+
out: { path: 'sitemap-all.md' },
|
|
91
|
+
})
|
|
92
|
+
|
|
93
|
+
logger.info(`Generated master sitemap: sitemap-all.md (${allUrls.length} pages)`)
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Convert a sitemap file to markdown
|
|
98
|
+
* Returns the URLs found in this sitemap for aggregation
|
|
99
|
+
*/
|
|
100
|
+
async function convertSitemapToMarkdown(sitemapFile, siteCatalog, logger) {
|
|
101
|
+
const xmlContent = sitemapFile.contents.toString('utf8')
|
|
102
|
+
const basename = path.basename(sitemapFile.out.path)
|
|
103
|
+
const outputPath = basename.replace(/\.xml$/, '.md')
|
|
104
|
+
|
|
105
|
+
try {
|
|
106
|
+
const parsed = await parseStringPromise(xmlContent)
|
|
107
|
+
|
|
108
|
+
let markdown = '# Sitemap\n\n'
|
|
109
|
+
markdown += `> Documentation sitemap generated from ${basename}\n\n`
|
|
110
|
+
|
|
111
|
+
let urls = []
|
|
112
|
+
|
|
113
|
+
// Handle standard sitemap
|
|
114
|
+
if (parsed.urlset && parsed.urlset.url) {
|
|
115
|
+
urls = parsed.urlset.url
|
|
116
|
+
markdown += convertUrlsetToMarkdown(urls)
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Handle sitemap index
|
|
120
|
+
if (parsed.sitemapindex && parsed.sitemapindex.sitemap) {
|
|
121
|
+
markdown += convertSitemapIndexToMarkdown(parsed.sitemapindex.sitemap)
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Add to site catalog
|
|
125
|
+
siteCatalog.addFile({
|
|
126
|
+
contents: Buffer.from(markdown, 'utf8'),
|
|
127
|
+
out: { path: outputPath },
|
|
128
|
+
})
|
|
129
|
+
|
|
130
|
+
logger.debug(`Generated ${outputPath}`)
|
|
131
|
+
|
|
132
|
+
return urls
|
|
133
|
+
} catch (error) {
|
|
134
|
+
logger.warn(`Failed to parse ${basename}: ${error.message}`)
|
|
135
|
+
return []
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Convert URL entries to markdown
|
|
141
|
+
*/
|
|
142
|
+
function convertUrlsetToMarkdown(urls) {
|
|
143
|
+
let markdown = '## Pages\n\n'
|
|
144
|
+
markdown += `Total pages: ${urls.length.toLocaleString()}\n\n`
|
|
145
|
+
|
|
146
|
+
// Group URLs by component/path
|
|
147
|
+
const groups = groupUrlsByPath(urls)
|
|
148
|
+
|
|
149
|
+
for (const [groupName, groupUrls] of Object.entries(groups)) {
|
|
150
|
+
markdown += `### ${groupName}\n\n`
|
|
151
|
+
|
|
152
|
+
for (const url of groupUrls) {
|
|
153
|
+
const loc = url.loc ? url.loc[0] : ''
|
|
154
|
+
const lastmod = url.lastmod ? url.lastmod[0] : ''
|
|
155
|
+
const changefreq = url.changefreq ? url.changefreq[0] : ''
|
|
156
|
+
const priority = url.priority ? url.priority[0] : ''
|
|
157
|
+
|
|
158
|
+
if (loc) {
|
|
159
|
+
markdown += `- [${extractPageTitle(loc)}](${loc})`
|
|
160
|
+
|
|
161
|
+
const metadata = []
|
|
162
|
+
if (lastmod) metadata.push(`modified: ${lastmod}`)
|
|
163
|
+
if (changefreq) metadata.push(`frequency: ${changefreq}`)
|
|
164
|
+
if (priority) metadata.push(`priority: ${priority}`)
|
|
165
|
+
|
|
166
|
+
if (metadata.length > 0) {
|
|
167
|
+
markdown += ` (${metadata.join(', ')})`
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
markdown += '\n'
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
markdown += '\n'
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
return markdown
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Convert sitemap index to markdown
|
|
182
|
+
*/
|
|
183
|
+
function convertSitemapIndexToMarkdown(sitemaps) {
|
|
184
|
+
const count = sitemaps.length
|
|
185
|
+
const sitemapWord = count === 1 ? 'sub-sitemap' : 'sub-sitemaps'
|
|
186
|
+
|
|
187
|
+
let markdown = '## Sitemap index\n\n'
|
|
188
|
+
markdown += `This sitemap index contains ${count} ${sitemapWord}:\n\n`
|
|
189
|
+
|
|
190
|
+
for (const sitemap of sitemaps) {
|
|
191
|
+
const loc = sitemap.loc ? sitemap.loc[0] : ''
|
|
192
|
+
const lastmod = sitemap.lastmod ? sitemap.lastmod[0] : ''
|
|
193
|
+
|
|
194
|
+
if (loc) {
|
|
195
|
+
// Convert .xml URL to .md for markdown version
|
|
196
|
+
const mdUrl = loc.replace(/\.xml$/, '.md')
|
|
197
|
+
const basename = path.basename(loc)
|
|
198
|
+
|
|
199
|
+
markdown += `- [${basename}](${mdUrl})`
|
|
200
|
+
if (lastmod) {
|
|
201
|
+
markdown += ` (modified: ${lastmod})`
|
|
202
|
+
}
|
|
203
|
+
markdown += '\n'
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
markdown += '\n'
|
|
208
|
+
return markdown
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Group URLs by their path prefix for better organization
|
|
213
|
+
*/
|
|
214
|
+
function groupUrlsByPath(urls) {
|
|
215
|
+
const groups = {}
|
|
216
|
+
|
|
217
|
+
for (const url of urls) {
|
|
218
|
+
const loc = url.loc ? url.loc[0] : ''
|
|
219
|
+
if (!loc) continue
|
|
220
|
+
|
|
221
|
+
// Extract component from URL path
|
|
222
|
+
const urlPath = new URL(loc).pathname
|
|
223
|
+
const parts = urlPath.split('/').filter(p => p)
|
|
224
|
+
|
|
225
|
+
let groupName = 'Root'
|
|
226
|
+
if (parts.length > 0) {
|
|
227
|
+
// Use first meaningful path segment as group
|
|
228
|
+
groupName = parts[0]
|
|
229
|
+
|
|
230
|
+
// Capitalize and format
|
|
231
|
+
groupName = groupName
|
|
232
|
+
.split('-')
|
|
233
|
+
.map(word => word.charAt(0).toUpperCase() + word.slice(1))
|
|
234
|
+
.join(' ')
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
if (!groups[groupName]) {
|
|
238
|
+
groups[groupName] = []
|
|
239
|
+
}
|
|
240
|
+
groups[groupName].push(url)
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// Sort groups alphabetically
|
|
244
|
+
const sortedGroups = {}
|
|
245
|
+
Object.keys(groups).sort().forEach(key => {
|
|
246
|
+
sortedGroups[key] = groups[key]
|
|
247
|
+
})
|
|
248
|
+
|
|
249
|
+
return sortedGroups
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Extract a readable page title from URL
|
|
254
|
+
*/
|
|
255
|
+
function extractPageTitle(url) {
|
|
256
|
+
const urlPath = new URL(url).pathname
|
|
257
|
+
const parts = urlPath.split('/').filter(p => p)
|
|
258
|
+
|
|
259
|
+
if (parts.length === 0) return 'Home'
|
|
260
|
+
|
|
261
|
+
// Get the last meaningful part
|
|
262
|
+
let title = parts[parts.length - 1]
|
|
263
|
+
|
|
264
|
+
// Remove .html extension
|
|
265
|
+
title = title.replace(/\.html$/, '')
|
|
266
|
+
|
|
267
|
+
// Convert dashes to spaces and capitalize
|
|
268
|
+
title = title
|
|
269
|
+
.split('-')
|
|
270
|
+
.map(word => word.charAt(0).toUpperCase() + word.slice(1))
|
|
271
|
+
.join(' ')
|
|
272
|
+
|
|
273
|
+
return title
|
|
274
|
+
}
|
|
@@ -1,9 +1,174 @@
|
|
|
1
1
|
const path = require('path')
|
|
2
2
|
const os = require('os')
|
|
3
|
+
const yaml = require('js-yaml')
|
|
4
|
+
const { toMarkdownUrl } = require('../extension-utils/url-utils')
|
|
3
5
|
const TurndownService = require('turndown')
|
|
4
6
|
const turndownPluginGfm = require('turndown-plugin-gfm')
|
|
5
7
|
const { gfm } = turndownPluginGfm
|
|
6
8
|
|
|
9
|
+
/**
|
|
10
|
+
* Decode HTML entities in a string
|
|
11
|
+
* @param {string} str - String with potential HTML entities
|
|
12
|
+
* @returns {string} Decoded string
|
|
13
|
+
*/
|
|
14
|
+
function decodeHtmlEntities (str) {
|
|
15
|
+
if (!str || typeof str !== 'string') return str
|
|
16
|
+
return str
|
|
17
|
+
.replace(/&#(\d+);/g, (_, dec) => String.fromCharCode(dec))
|
|
18
|
+
.replace(/&#x([0-9a-fA-F]+);/g, (_, hex) => String.fromCharCode(parseInt(hex, 16)))
|
|
19
|
+
.replace(/"/g, '"')
|
|
20
|
+
.replace(/&/g, '&')
|
|
21
|
+
.replace(/</g, '<')
|
|
22
|
+
.replace(/>/g, '>')
|
|
23
|
+
.replace(/'/g, "'")
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Converts AsciiDoc page attributes to YAML frontmatter
|
|
28
|
+
* @param {Object} page - The page object with asciidoc attributes
|
|
29
|
+
* @returns {string} YAML frontmatter string or empty string if no attributes
|
|
30
|
+
*/
|
|
31
|
+
function generateFrontmatter(page) {
|
|
32
|
+
const frontmatter = {}
|
|
33
|
+
|
|
34
|
+
// Add title (decode HTML entities from AsciiDoc processing)
|
|
35
|
+
if (page.asciidoc?.doctitle) {
|
|
36
|
+
frontmatter.title = decodeHtmlEntities(page.asciidoc.doctitle)
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Add navigation title if different from doctitle
|
|
40
|
+
if (page.asciidoc?.navtitle && page.asciidoc.navtitle !== page.asciidoc?.doctitle) {
|
|
41
|
+
frontmatter.navtitle = decodeHtmlEntities(page.asciidoc.navtitle)
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Get all page attributes
|
|
45
|
+
const attrs = page.asciidoc?.attributes || {}
|
|
46
|
+
|
|
47
|
+
// Allowlist of attributes to include in frontmatter
|
|
48
|
+
// Explicitly opt-in to attributes that are useful for AI consumption
|
|
49
|
+
const allowedAttributes = [
|
|
50
|
+
'title',
|
|
51
|
+
'navtitle',
|
|
52
|
+
'description',
|
|
53
|
+
'categories',
|
|
54
|
+
'page-component-name',
|
|
55
|
+
'page-component-title',
|
|
56
|
+
'page-component-version',
|
|
57
|
+
'page-version',
|
|
58
|
+
'page-relative-src-path',
|
|
59
|
+
'page-edit-url',
|
|
60
|
+
'page-topic-type',
|
|
61
|
+
'personas',
|
|
62
|
+
'docname',
|
|
63
|
+
'page-beta',
|
|
64
|
+
'page-beta-text',
|
|
65
|
+
'page-is-nearing-eol',
|
|
66
|
+
'page-is-past-eol',
|
|
67
|
+
'page-eol-date',
|
|
68
|
+
'page-git-created-date',
|
|
69
|
+
'page-git-modified-date',
|
|
70
|
+
// Component-specific version attributes (from antora.yml)
|
|
71
|
+
'latest-redpanda-tag', // Redpanda docker tag (e.g., v25.3.5)
|
|
72
|
+
'latest-console-tag',
|
|
73
|
+
'latest-operator-version',
|
|
74
|
+
'latest-connect-version',
|
|
75
|
+
]
|
|
76
|
+
|
|
77
|
+
// Add allowed page attributes to frontmatter
|
|
78
|
+
Object.keys(attrs).forEach(key => {
|
|
79
|
+
const value = attrs[key]
|
|
80
|
+
|
|
81
|
+
// Allow all learning-objective-* attributes (learning-objective-1, -2, -3, etc.)
|
|
82
|
+
const isLearningObjective = key.startsWith('learning-objective-')
|
|
83
|
+
|
|
84
|
+
// Only include attributes in our allowlist or learning objectives
|
|
85
|
+
if (!allowedAttributes.includes(key) && !isLearningObjective) return
|
|
86
|
+
|
|
87
|
+
// Only include page-beta-text if page-beta is true
|
|
88
|
+
if (key === 'page-beta-text' && !attrs['page-beta']) {
|
|
89
|
+
return
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Skip empty attributes (AsciiDoc boolean flags)
|
|
93
|
+
if (value === '') {
|
|
94
|
+
// Special handling for version fields - use actual version from page source
|
|
95
|
+
if (key === 'page-version') {
|
|
96
|
+
frontmatter[key] = page.src?.version || 'master'
|
|
97
|
+
return
|
|
98
|
+
}
|
|
99
|
+
if (key === 'page-component-version') {
|
|
100
|
+
frontmatter[key] = page.src?.version || 'master'
|
|
101
|
+
return
|
|
102
|
+
}
|
|
103
|
+
// Preserve important boolean flags
|
|
104
|
+
if (key.startsWith('page-')) {
|
|
105
|
+
frontmatter[key] = true
|
|
106
|
+
}
|
|
107
|
+
return
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Include the attribute
|
|
111
|
+
frontmatter[key] = value
|
|
112
|
+
})
|
|
113
|
+
|
|
114
|
+
// Transform EOL fields to be more user-friendly
|
|
115
|
+
if (frontmatter['page-is-nearing-eol'] || frontmatter['page-is-past-eol']) {
|
|
116
|
+
let eolStatus = 'supported'
|
|
117
|
+
if (frontmatter['page-is-past-eol'] === 'true' || frontmatter['page-is-past-eol'] === true) {
|
|
118
|
+
eolStatus = 'past end-of-life'
|
|
119
|
+
} else if (frontmatter['page-is-nearing-eol'] === 'true' || frontmatter['page-is-nearing-eol'] === true) {
|
|
120
|
+
eolStatus = 'nearing end-of-life'
|
|
121
|
+
}
|
|
122
|
+
frontmatter['support-status'] = eolStatus
|
|
123
|
+
// Keep original fields for compatibility
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Transform beta fields to be more user-friendly
|
|
127
|
+
if (frontmatter['page-beta'] === 'true' || frontmatter['page-beta'] === true) {
|
|
128
|
+
let betaStatus = 'beta'
|
|
129
|
+
if (frontmatter['page-beta-text']) {
|
|
130
|
+
betaStatus = `beta - ${frontmatter['page-beta-text']}`
|
|
131
|
+
}
|
|
132
|
+
frontmatter['release-status'] = betaStatus
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Return empty string if no frontmatter
|
|
136
|
+
if (Object.keys(frontmatter).length === 0) return ''
|
|
137
|
+
|
|
138
|
+
// Convert to YAML format using js-yaml library for proper escaping
|
|
139
|
+
let yamlContent = yaml.dump(frontmatter, {
|
|
140
|
+
lineWidth: -1, // Disable line wrapping
|
|
141
|
+
noRefs: true, // Disable anchors/aliases
|
|
142
|
+
quotingType: '"', // Use double quotes
|
|
143
|
+
forceQuotes: false, // Only quote when necessary
|
|
144
|
+
})
|
|
145
|
+
|
|
146
|
+
// Add helpful comments for EOL (end-of-life) fields
|
|
147
|
+
// Find the first EOL-related field and add comment before it
|
|
148
|
+
if (frontmatter['page-is-nearing-eol'] || frontmatter['page-is-past-eol'] || frontmatter['support-status']) {
|
|
149
|
+
const eolFieldRegex = /^(page-is-nearing-eol:|page-is-past-eol:|support-status:)/m
|
|
150
|
+
if (!yamlContent.includes('# EOL =')) {
|
|
151
|
+
yamlContent = yamlContent.replace(
|
|
152
|
+
eolFieldRegex,
|
|
153
|
+
'# EOL = End-of-Life (support lifecycle status)\n$1'
|
|
154
|
+
)
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Add helpful comments for beta fields
|
|
159
|
+
if (frontmatter['page-beta'] || frontmatter['release-status']) {
|
|
160
|
+
const betaFieldRegex = /^(page-beta:|release-status:)/m
|
|
161
|
+
if (!yamlContent.includes('# Beta release')) {
|
|
162
|
+
yamlContent = yamlContent.replace(
|
|
163
|
+
betaFieldRegex,
|
|
164
|
+
'# Beta release status\n$1'
|
|
165
|
+
)
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
return `---\n${yamlContent}---\n\n`
|
|
170
|
+
}
|
|
171
|
+
|
|
7
172
|
module.exports.register = function () {
|
|
8
173
|
const logger = this.getLogger('convert-to-markdown-extension')
|
|
9
174
|
let playbook
|
|
@@ -308,22 +473,9 @@ module.exports.register = function () {
|
|
|
308
473
|
let canonicalUrl = ''
|
|
309
474
|
try {
|
|
310
475
|
if (siteUrl && page.pub?.url) {
|
|
311
|
-
const htmlStyle = playbook?.urls?.htmlExtensionStyle
|
|
312
|
-
const isIndexify = htmlStyle === 'indexify'
|
|
313
476
|
const baseUrl = new URL(page.pub.url, siteUrl)
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
if (isIndexify) {
|
|
317
|
-
const looksLikeDir =
|
|
318
|
-
pathname.endsWith('/') ||
|
|
319
|
-
!path.basename(pathname).includes('.')
|
|
320
|
-
baseUrl.pathname = looksLikeDir
|
|
321
|
-
? pathname.replace(/\/?$/, '/index.md')
|
|
322
|
-
: pathname.replace(/\.html$/, '.md')
|
|
323
|
-
} else {
|
|
324
|
-
baseUrl.pathname = pathname.replace(/\.html$/, '.md')
|
|
325
|
-
}
|
|
326
|
-
|
|
477
|
+
// Convert HTML URL to markdown URL using shared utility
|
|
478
|
+
baseUrl.pathname = toMarkdownUrl(baseUrl.pathname)
|
|
327
479
|
canonicalUrl = baseUrl.toString()
|
|
328
480
|
}
|
|
329
481
|
} catch (e) {
|
|
@@ -332,11 +484,23 @@ module.exports.register = function () {
|
|
|
332
484
|
)
|
|
333
485
|
}
|
|
334
486
|
|
|
335
|
-
//
|
|
487
|
+
// Generate YAML frontmatter from AsciiDoc attributes
|
|
488
|
+
const frontmatter = generateFrontmatter(page)
|
|
489
|
+
if (frontmatter) {
|
|
490
|
+
logger.debug(`Generated frontmatter for ${page.src?.path}`)
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
// Prepend frontmatter first, then source reference and AI-friendly note
|
|
336
494
|
if (canonicalUrl) {
|
|
337
|
-
const
|
|
338
|
-
|
|
339
|
-
|
|
495
|
+
const componentName = page.src?.component || '';
|
|
496
|
+
const urlHint = componentName
|
|
497
|
+
? `<!-- Note for AI: This is a Markdown export. For aggregated content, see /llms.txt (curated overview), /${componentName}-full.txt (this component only), or /llms-full.txt (complete documentation). -->`
|
|
498
|
+
: `<!-- Note for AI: This is a Markdown export. For aggregated content, see /llms.txt (curated overview) or /llms-full.txt (complete documentation). -->`;
|
|
499
|
+
|
|
500
|
+
markdown = `${frontmatter}<!-- Source: ${canonicalUrl} -->\n${urlHint}\n\n${markdown}`
|
|
501
|
+
} else if (frontmatter) {
|
|
502
|
+
// If no canonical URL but we have frontmatter, still add it
|
|
503
|
+
markdown = `${frontmatter}${markdown}`
|
|
340
504
|
}
|
|
341
505
|
|
|
342
506
|
// Clean up unnecessary whitespace
|
|
@@ -378,7 +542,10 @@ module.exports.register = function () {
|
|
|
378
542
|
for (const page of pages) {
|
|
379
543
|
const htmlOut = page.out?.path
|
|
380
544
|
if (!htmlOut) continue
|
|
381
|
-
|
|
545
|
+
|
|
546
|
+
// Convert HTML path to markdown path using shared utility
|
|
547
|
+
const mdOutPath = toMarkdownUrl(htmlOut)
|
|
548
|
+
|
|
382
549
|
siteCatalog.addFile({
|
|
383
550
|
contents: page.markdownContents,
|
|
384
551
|
out: { path: mdOutPath },
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
'use strict'
|
|
2
|
+
|
|
3
|
+
const { execSync } = require('child_process')
|
|
4
|
+
const fs = require('fs')
|
|
5
|
+
const path = require('path')
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Configure Antora to use full git clones instead of shallow clones.
|
|
9
|
+
* This is needed for accurate git dates from the full commit history.
|
|
10
|
+
*
|
|
11
|
+
* Two-phase approach:
|
|
12
|
+
* 1. Set depth=0 in playbook to request full clones (doesn't always work due to Antora internals)
|
|
13
|
+
* 2. After content aggregation, unshallow any repos that are still shallow
|
|
14
|
+
*
|
|
15
|
+
* Configuration options:
|
|
16
|
+
* - skipUnshallow: Set to true to skip the unshallow phase (for air-gapped environments)
|
|
17
|
+
* - unshallowTimeout: Timeout in milliseconds per repo (default: 60000)
|
|
18
|
+
*
|
|
19
|
+
* Example:
|
|
20
|
+
* antora:
|
|
21
|
+
* extensions:
|
|
22
|
+
* - require: '@redpanda-data/docs-extensions-and-macros/extensions/git-full-clone'
|
|
23
|
+
* skipUnshallow: false
|
|
24
|
+
* unshallowTimeout: 120000 # 2 minutes for very large repos
|
|
25
|
+
*
|
|
26
|
+
* Production considerations:
|
|
27
|
+
* - First build will unshallow repos (~1-5 seconds per repo)
|
|
28
|
+
* - Subsequent builds with warm cache skip unshallow (already full clones)
|
|
29
|
+
* - Large repos (10k+ commits) may take longer - increase timeout if needed
|
|
30
|
+
* - Requires network access during build (for git fetch --unshallow)
|
|
31
|
+
* - For CI/CD, consider pre-populating Antora cache with full clones
|
|
32
|
+
*/
|
|
33
|
+
|
|
34
|
+
module.exports.register = function ({ config, playbook }) {
|
|
35
|
+
const logger = this.getLogger('git-full-clone-extension')
|
|
36
|
+
|
|
37
|
+
logger.info('✓ git-full-clone extension loaded')
|
|
38
|
+
|
|
39
|
+
// Phase 1: Try modifying playbook during registration
|
|
40
|
+
if (playbook?.content?.sources) {
|
|
41
|
+
logger.info('Phase 1: Modifying playbook during registration')
|
|
42
|
+
let remoteCount = 0
|
|
43
|
+
playbook.content.sources.forEach(source => {
|
|
44
|
+
if (source.url && source.url.startsWith('http')) {
|
|
45
|
+
const oldDepth = source.git?.depth
|
|
46
|
+
if (!source.git) source.git = {}
|
|
47
|
+
source.git.depth = 0
|
|
48
|
+
remoteCount++
|
|
49
|
+
logger.info(` → ${source.url}: depth ${oldDepth || 'default'} → 0 (full clone)`)
|
|
50
|
+
}
|
|
51
|
+
})
|
|
52
|
+
logger.info(`✓ Configured ${remoteCount} remote content sources for full clones`)
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Phase 2: After content is aggregated, unshallow any repos that are still shallow
|
|
56
|
+
this.on('contentAggregated', ({ contentAggregate }) => {
|
|
57
|
+
// Allow disabling unshallow phase via config (for air-gapped environments or if git dates not needed)
|
|
58
|
+
const skipUnshallow = config?.skipUnshallow || false
|
|
59
|
+
if (skipUnshallow) {
|
|
60
|
+
logger.info('Phase 2: Skipping unshallow (skipUnshallow: true)')
|
|
61
|
+
return
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
logger.info('Phase 2: Checking for shallow clones to unshallow')
|
|
65
|
+
const processedRepos = new Set()
|
|
66
|
+
let unshallowedCount = 0
|
|
67
|
+
const unshallowTimeout = config?.unshallowTimeout || 60000 // Default 60 seconds per repo
|
|
68
|
+
|
|
69
|
+
for (const aggregate of contentAggregate) {
|
|
70
|
+
for (const origin of aggregate.origins || []) {
|
|
71
|
+
const gitdir = origin.gitdir
|
|
72
|
+
if (!gitdir || processedRepos.has(gitdir)) continue
|
|
73
|
+
processedRepos.add(gitdir)
|
|
74
|
+
|
|
75
|
+
// Check if this is a shallow clone
|
|
76
|
+
const shallowFile = path.join(gitdir, 'shallow')
|
|
77
|
+
if (fs.existsSync(shallowFile)) {
|
|
78
|
+
const startTime = Date.now()
|
|
79
|
+
try {
|
|
80
|
+
logger.info(` → Unshallowing ${path.basename(gitdir)}...`)
|
|
81
|
+
|
|
82
|
+
// Use git fetch --unshallow to convert to full clone
|
|
83
|
+
execSync('git fetch --unshallow', {
|
|
84
|
+
cwd: gitdir,
|
|
85
|
+
stdio: 'pipe',
|
|
86
|
+
timeout: unshallowTimeout,
|
|
87
|
+
env: { ...process.env, GIT_TERMINAL_PROMPT: '0' }
|
|
88
|
+
})
|
|
89
|
+
|
|
90
|
+
const duration = Date.now() - startTime
|
|
91
|
+
unshallowedCount++
|
|
92
|
+
logger.info(` ✓ Successfully unshallowed ${path.basename(gitdir)} (${duration}ms)`)
|
|
93
|
+
} catch (err) {
|
|
94
|
+
const duration = Date.now() - startTime
|
|
95
|
+
if (err.killed) {
|
|
96
|
+
logger.warn(` ✗ Unshallow timeout after ${duration}ms for ${path.basename(gitdir)} (increase unshallowTimeout if needed)`)
|
|
97
|
+
} else {
|
|
98
|
+
logger.warn(` ✗ Failed to unshallow ${path.basename(gitdir)}: ${err.message}`)
|
|
99
|
+
}
|
|
100
|
+
logger.warn(` ⚠️ Git dates may be inaccurate for this repo - consider using a pre-cloned cache`)
|
|
101
|
+
}
|
|
102
|
+
} else {
|
|
103
|
+
logger.debug(` → ${path.basename(gitdir)} is already a full clone`)
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if (unshallowedCount > 0) {
|
|
109
|
+
logger.info(`✓ Unshallowed ${unshallowedCount} repositories`)
|
|
110
|
+
} else {
|
|
111
|
+
logger.info('✓ All repositories are already full clones')
|
|
112
|
+
}
|
|
113
|
+
})
|
|
114
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@redpanda-data/docs-extensions-and-macros",
|
|
3
|
-
"version": "4.15.
|
|
3
|
+
"version": "4.15.3",
|
|
4
4
|
"description": "Antora extensions and macros developed for Redpanda documentation.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"antora",
|
|
@@ -59,6 +59,8 @@
|
|
|
59
59
|
"./extensions/generate-index-data": "./extensions/generate-index-data.js",
|
|
60
60
|
"./extensions/generate-rp-connect-info": "./extensions/generate-rp-connect-info.js",
|
|
61
61
|
"./extensions/add-global-attributes": "./extensions/add-global-attributes.js",
|
|
62
|
+
"./extensions/add-git-dates": "./extensions/add-git-dates.js",
|
|
63
|
+
"./extensions/add-faq-structured-data": "./extensions/add-faq-structured-data.js",
|
|
62
64
|
"./extensions/version-fetcher/set-latest-version": "./extensions/version-fetcher/set-latest-version.js",
|
|
63
65
|
"./extensions/modify-connect-tag-playbook": "./extensions/modify-connect-tag-playbook.js",
|
|
64
66
|
"./extensions/validate-attributes": "./extensions/validate-attributes.js",
|
|
@@ -66,6 +68,7 @@
|
|
|
66
68
|
"./extensions/unpublish-pages": "./extensions/unpublish-pages.js",
|
|
67
69
|
"./extensions/find-related-labs": "./extensions/find-related-labs.js",
|
|
68
70
|
"./extensions/convert-llms-to-txt": "./extensions/convert-llms-to-txt.js",
|
|
71
|
+
"./extensions/convert-sitemap-to-markdown": "./extensions/convert-sitemap-to-markdown.js",
|
|
69
72
|
"./extensions/modify-redirects": "./extensions/produce-redirects.js",
|
|
70
73
|
"./extensions/algolia-indexer/index": "./extensions/algolia-indexer/index.js",
|
|
71
74
|
"./extensions/aggregate-terms": "./extensions/aggregate-terms.js",
|
|
@@ -123,6 +126,7 @@
|
|
|
123
126
|
"tree-sitter": "^0.22.4",
|
|
124
127
|
"turndown": "^7.2.2",
|
|
125
128
|
"turndown-plugin-gfm": "^1.0.2",
|
|
129
|
+
"xml2js": "^0.6.2",
|
|
126
130
|
"yaml": "^2.8.2",
|
|
127
131
|
"yargs": "^17.7.2"
|
|
128
132
|
},
|