@redpanda-data/docs-extensions-and-macros 4.15.2 → 4.15.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/extension-utils/url-utils.js +39 -0
- package/extensions/README.adoc +4 -0
- package/extensions/REFERENCE.adoc +424 -0
- package/extensions/add-faq-structured-data.js +153 -0
- package/extensions/add-git-dates.js +287 -0
- package/extensions/convert-llms-to-txt.js +334 -3
- package/extensions/convert-sitemap-to-markdown.js +274 -0
- package/extensions/convert-to-markdown.js +187 -20
- package/extensions/git-full-clone.js +114 -0
- package/package.json +5 -1
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
'use strict'
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Adds Git commit dates to pages as attributes.
|
|
5
|
+
*
|
|
6
|
+
* This extension:
|
|
7
|
+
* 1. Gets the first commit date (when file was created) -> page-git-created-date
|
|
8
|
+
* 2. Gets the last commit date (when file was modified) -> page-git-modified-date
|
|
9
|
+
* 3. Adds these to page.asciidoc.attributes with page- prefix for UI template access
|
|
10
|
+
*
|
|
11
|
+
* Supports both local repos (with worktree) and remote repos (bare clones with gitdir).
|
|
12
|
+
* Antora caches remote repos as bare Git repos in ~/.cache/antora/content/
|
|
13
|
+
*
|
|
14
|
+
* Performance optimization: Uses isomorphic-git to walk the entire git log ONCE per
|
|
15
|
+
* repository, building a filepath→dates map. This is O(commits) instead of O(files * commits).
|
|
16
|
+
* For a repo with 1000 files and 5000 commits, this reduces operations from 5M to 5K.
|
|
17
|
+
*
|
|
18
|
+
* Attribute naming: Uses page- prefix so attributes appear in page.attributes
|
|
19
|
+
* in Handlebars templates (Antora strips the prefix when exposing to UI model).
|
|
20
|
+
*
|
|
21
|
+
* Only runs on pages that have origin info (skips virtual/generated pages).
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
const path = require('path')
|
|
25
|
+
const fs = require('fs')
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Resolve isomorphic-git from Antora's dependencies
|
|
29
|
+
* @param {Object} context - Extension context with module info
|
|
30
|
+
* @returns {Object} isomorphic-git module
|
|
31
|
+
*/
|
|
32
|
+
function requireGit (context) {
|
|
33
|
+
return require(
|
|
34
|
+
require.resolve('isomorphic-git', {
|
|
35
|
+
paths: [require.resolve('@antora/content-aggregator', { paths: context.module.paths }) + '/..']
|
|
36
|
+
})
|
|
37
|
+
)
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Format timestamp to ISO date string (YYYY-MM-DD)
|
|
42
|
+
* @param {number} timestamp - Unix timestamp in seconds
|
|
43
|
+
* @returns {string} ISO date string
|
|
44
|
+
*/
|
|
45
|
+
function formatDate (timestamp) {
|
|
46
|
+
return new Date(timestamp * 1000).toISOString().substring(0, 10)
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Build a map of filepath -> {created, modified} dates from git log
|
|
51
|
+
* Walks the entire log once, tracking first and last commit for each MODIFIED file
|
|
52
|
+
*
|
|
53
|
+
* This compares each commit's tree with its parent to find which files actually changed,
|
|
54
|
+
* rather than just looking at all files in the tree (which would give incorrect dates).
|
|
55
|
+
*
|
|
56
|
+
* @param {Object} git - isomorphic-git module
|
|
57
|
+
* @param {string} gitdir - Path to .git directory
|
|
58
|
+
* @param {string} ref - Git ref (branch/tag/commit)
|
|
59
|
+
* @param {Object} logger - Logger instance
|
|
60
|
+
* @returns {Promise<Map<string, {created: string, modified: string}>>}
|
|
61
|
+
*/
|
|
62
|
+
async function buildFileDateMap (git, gitdir, ref, logger) {
|
|
63
|
+
const fileDates = new Map()
|
|
64
|
+
const cache = {}
|
|
65
|
+
|
|
66
|
+
try {
|
|
67
|
+
// Get all commits - walking from newest to oldest
|
|
68
|
+
const commits = await git.log({
|
|
69
|
+
fs,
|
|
70
|
+
gitdir,
|
|
71
|
+
ref,
|
|
72
|
+
cache,
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
logger.info(`Walking ${commits.length} commits for ${path.basename(gitdir)} (ref: ${ref})`)
|
|
76
|
+
|
|
77
|
+
// Build tree cache to avoid re-reading trees
|
|
78
|
+
const treeCache = new Map()
|
|
79
|
+
|
|
80
|
+
// Process commits from newest to oldest
|
|
81
|
+
// First occurrence = modified date, last occurrence = created date
|
|
82
|
+
for (let i = 0; i < commits.length; i++) {
|
|
83
|
+
const commit = commits[i]
|
|
84
|
+
const timestamp = commit.commit.committer.timestamp
|
|
85
|
+
const date = formatDate(timestamp)
|
|
86
|
+
|
|
87
|
+
try {
|
|
88
|
+
const currentTreeOid = commit.commit.tree
|
|
89
|
+
const parentCommits = commit.commit.parent || []
|
|
90
|
+
|
|
91
|
+
// Get files in current commit's tree
|
|
92
|
+
const currentFiles = await getTreeFiles(git, gitdir, currentTreeOid, '', cache, treeCache)
|
|
93
|
+
|
|
94
|
+
// Get files in parent commit's tree (if parent exists)
|
|
95
|
+
let parentFiles = new Map()
|
|
96
|
+
if (parentCommits.length > 0) {
|
|
97
|
+
const parentCommit = await git.readCommit({ fs, gitdir, oid: parentCommits[0], cache })
|
|
98
|
+
const parentTreeOid = parentCommit.commit.tree
|
|
99
|
+
parentFiles = await getTreeFiles(git, gitdir, parentTreeOid, '', cache, treeCache)
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Find files that were added or modified (different OID from parent)
|
|
103
|
+
for (const [filepath, oid] of currentFiles) {
|
|
104
|
+
const parentOid = parentFiles.get(filepath)
|
|
105
|
+
const isModified = !parentOid || parentOid !== oid
|
|
106
|
+
|
|
107
|
+
if (isModified) {
|
|
108
|
+
if (!fileDates.has(filepath)) {
|
|
109
|
+
// First time seeing this file modified (from newest commit)
|
|
110
|
+
fileDates.set(filepath, { created: date, modified: date })
|
|
111
|
+
} else {
|
|
112
|
+
// Update created date (older commit where file was modified)
|
|
113
|
+
const entry = fileDates.get(filepath)
|
|
114
|
+
entry.created = date
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
} catch (err) {
|
|
119
|
+
// Skip commits that can't be read
|
|
120
|
+
logger.debug(`Skipping commit ${commit.oid.substring(0, 7)}: ${err.message}`)
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
} catch (err) {
|
|
124
|
+
logger.warn(`Failed to read git log for ${gitdir}: ${err.message}`)
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return fileDates
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Recursively walk a git tree to get all file paths with their OIDs
|
|
132
|
+
* Returns a Map of filepath → OID for comparison between commits
|
|
133
|
+
*
|
|
134
|
+
* @param {Object} git - isomorphic-git module
|
|
135
|
+
* @param {string} gitdir - Path to .git directory
|
|
136
|
+
* @param {string} oid - Tree object ID
|
|
137
|
+
* @param {string} prefix - Current path prefix
|
|
138
|
+
* @param {Object} cache - Git object cache
|
|
139
|
+
* @param {Map} treeCache - Cache of tree OID → files map
|
|
140
|
+
* @returns {Promise<Map<string, string>>} Map of filepath → blob OID
|
|
141
|
+
*/
|
|
142
|
+
async function getTreeFiles (git, gitdir, oid, prefix, cache, treeCache) {
|
|
143
|
+
// Check tree cache first
|
|
144
|
+
if (treeCache.has(oid)) {
|
|
145
|
+
return treeCache.get(oid)
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const files = new Map()
|
|
149
|
+
|
|
150
|
+
try {
|
|
151
|
+
const { tree } = await git.readTree({
|
|
152
|
+
fs,
|
|
153
|
+
gitdir,
|
|
154
|
+
oid,
|
|
155
|
+
cache,
|
|
156
|
+
})
|
|
157
|
+
|
|
158
|
+
for (const entry of tree) {
|
|
159
|
+
const filepath = prefix ? `${prefix}/${entry.path}` : entry.path
|
|
160
|
+
|
|
161
|
+
if (entry.type === 'blob') {
|
|
162
|
+
files.set(filepath, entry.oid)
|
|
163
|
+
} else if (entry.type === 'tree') {
|
|
164
|
+
// Recurse into subdirectory
|
|
165
|
+
const subfiles = await getTreeFiles(git, gitdir, entry.oid, filepath, cache, treeCache)
|
|
166
|
+
for (const [subpath, suboid] of subfiles) {
|
|
167
|
+
files.set(subpath, suboid)
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Cache this tree's files
|
|
173
|
+
treeCache.set(oid, files)
|
|
174
|
+
} catch (err) {
|
|
175
|
+
// Skip trees that can't be read
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
return files
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
module.exports.register = function () {
|
|
182
|
+
const logger = this.getLogger('add-git-dates-extension')
|
|
183
|
+
const context = this
|
|
184
|
+
|
|
185
|
+
// Run on documentsConverted after Antora builds page.asciidoc.attributes
|
|
186
|
+
this.on('documentsConverted', async ({ contentCatalog }) => {
|
|
187
|
+
const startTime = Date.now()
|
|
188
|
+
let processedCount = 0
|
|
189
|
+
let skippedCount = 0
|
|
190
|
+
|
|
191
|
+
// Load isomorphic-git
|
|
192
|
+
let git
|
|
193
|
+
try {
|
|
194
|
+
git = requireGit(context)
|
|
195
|
+
} catch (err) {
|
|
196
|
+
logger.error(`Failed to load isomorphic-git: ${err.message}`)
|
|
197
|
+
return
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// Group pages by BOTH gitdir AND ref (since same repo can have multiple branches/versions)
|
|
201
|
+
const pagesByRepoAndRef = new Map()
|
|
202
|
+
const skipLoggedRepos = new Set()
|
|
203
|
+
|
|
204
|
+
contentCatalog.getPages().forEach((page) => {
|
|
205
|
+
const origin = page.src?.origin
|
|
206
|
+
if (!origin?.url) {
|
|
207
|
+
skippedCount++
|
|
208
|
+
return
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Need gitdir for isomorphic-git (works for both local and bare repos)
|
|
212
|
+
const gitdir = origin.gitdir || (origin.worktree ? path.join(origin.worktree, '.git') : null)
|
|
213
|
+
if (!gitdir) {
|
|
214
|
+
// Debug: Log which repos don't have gitdir
|
|
215
|
+
if (!skipLoggedRepos.has(origin.url)) {
|
|
216
|
+
logger.info(`⚠️ Skipping repo without gitdir: ${origin.url} (has gitdir: ${!!origin.gitdir}, has worktree: ${!!origin.worktree})`)
|
|
217
|
+
skipLoggedRepos.add(origin.url)
|
|
218
|
+
}
|
|
219
|
+
skippedCount++
|
|
220
|
+
return
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// Ensure asciidoc.attributes exists
|
|
224
|
+
if (!page.asciidoc) page.asciidoc = {}
|
|
225
|
+
if (!page.asciidoc.attributes) page.asciidoc.attributes = {}
|
|
226
|
+
|
|
227
|
+
const startPath = origin.startPath || ''
|
|
228
|
+
const relativeFilePath = startPath ? path.join(startPath, page.src.path) : page.src.path
|
|
229
|
+
const ref = origin.refhash || origin.refname || 'HEAD'
|
|
230
|
+
|
|
231
|
+
// Create composite key: gitdir + ref to handle multiple branches per repo
|
|
232
|
+
const repoRefKey = `${gitdir}::${ref}`
|
|
233
|
+
|
|
234
|
+
// Group by repo AND ref
|
|
235
|
+
if (!pagesByRepoAndRef.has(repoRefKey)) {
|
|
236
|
+
pagesByRepoAndRef.set(repoRefKey, {
|
|
237
|
+
gitdir,
|
|
238
|
+
ref,
|
|
239
|
+
pages: []
|
|
240
|
+
})
|
|
241
|
+
}
|
|
242
|
+
pagesByRepoAndRef.get(repoRefKey).pages.push({ page, relativeFilePath })
|
|
243
|
+
})
|
|
244
|
+
|
|
245
|
+
const totalPages = Array.from(pagesByRepoAndRef.values()).reduce((sum, r) => sum + r.pages.length, 0)
|
|
246
|
+
const repoCount = new Set(Array.from(pagesByRepoAndRef.values()).map(r => r.gitdir)).size
|
|
247
|
+
logger.info(`Processing ${totalPages} pages across ${repoCount} repos (${pagesByRepoAndRef.size} branches) for git dates (skipped ${skippedCount} virtual/generated)`)
|
|
248
|
+
|
|
249
|
+
// Log which repos are being processed
|
|
250
|
+
const reposBeingProcessed = new Set()
|
|
251
|
+
pagesByRepoAndRef.forEach(({ gitdir }) => {
|
|
252
|
+
if (!reposBeingProcessed.has(gitdir)) {
|
|
253
|
+
logger.info(`✓ Will process git dates for: ${gitdir}`)
|
|
254
|
+
reposBeingProcessed.add(gitdir)
|
|
255
|
+
}
|
|
256
|
+
})
|
|
257
|
+
|
|
258
|
+
// Process each repository + ref combination
|
|
259
|
+
for (const [repoRefKey, { gitdir, ref, pages }] of pagesByRepoAndRef) {
|
|
260
|
+
const repoStartTime = Date.now()
|
|
261
|
+
|
|
262
|
+
try {
|
|
263
|
+
// Build the filepath -> dates map for this repo + ref
|
|
264
|
+
const fileDateMap = await buildFileDateMap(git, gitdir, ref, logger)
|
|
265
|
+
|
|
266
|
+
// Apply dates to pages
|
|
267
|
+
for (const { page, relativeFilePath } of pages) {
|
|
268
|
+
const dates = fileDateMap.get(relativeFilePath)
|
|
269
|
+
if (dates) {
|
|
270
|
+
page.asciidoc.attributes['page-git-created-date'] = dates.created
|
|
271
|
+
page.asciidoc.attributes['page-git-modified-date'] = dates.modified
|
|
272
|
+
processedCount++
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
const repoTime = Date.now() - repoStartTime
|
|
277
|
+
logger.debug(`Processed ${pages.length} pages from ${path.basename(gitdir)}@${ref.substring(0,8)} in ${repoTime}ms (map size: ${fileDateMap.size})`)
|
|
278
|
+
} catch (err) {
|
|
279
|
+
logger.warn(`Failed to process repo ${gitdir}@${ref}: ${err.message}`)
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
const duration = Date.now() - startTime
|
|
284
|
+
const perPage = totalPages > 0 ? (duration / totalPages).toFixed(1) : 0
|
|
285
|
+
logger.info(`Git dates added: processed=${processedCount}, skipped=${skippedCount}, duration=${duration}ms (${perPage}ms/page)`)
|
|
286
|
+
})
|
|
287
|
+
}
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
+
const { toMarkdownUrl } = require('../extension-utils/url-utils');
|
|
4
|
+
|
|
3
5
|
/**
|
|
4
|
-
* Extracts markdown from llms.adoc page and generates
|
|
6
|
+
* Extracts markdown from llms.adoc page and generates AI-friendly documentation exports.
|
|
5
7
|
*
|
|
6
8
|
* This extension:
|
|
7
9
|
* 1. Adds site-url attribute to home component:
|
|
@@ -11,7 +13,8 @@
|
|
|
11
13
|
* 3. Gets the markdown content from page.markdownContents (set by convert-to-markdown extension)
|
|
12
14
|
* 4. Unpublishes the HTML page
|
|
13
15
|
* 5. Places llms.txt (markdown) at site root
|
|
14
|
-
* 6. Generates llms-full.txt with markdown from latest versions
|
|
16
|
+
* 6. Generates llms-full.txt with markdown from latest versions of all components
|
|
17
|
+
* 7. Generates component-specific full.txt files (e.g., redpanda-full.txt, cloud-full.txt)
|
|
15
18
|
*
|
|
16
19
|
* Must run after convert-to-markdown extension to access page.markdownContents.
|
|
17
20
|
*/
|
|
@@ -146,6 +149,15 @@ module.exports.register = function () {
|
|
|
146
149
|
}
|
|
147
150
|
});
|
|
148
151
|
fullContent += `\n`;
|
|
152
|
+
fullContent += `### AI-Friendly Documentation Formats\n\n`;
|
|
153
|
+
fullContent += `We provide multiple formats optimized for AI consumption:\n\n`;
|
|
154
|
+
fullContent += `- **${siteUrl}/llms.txt**: Curated overview following the llms.txt standard - start here for a quick introduction\n`;
|
|
155
|
+
fullContent += `- **${siteUrl}/llms-full.txt**: Complete documentation export (this file) - comprehensive reference with all pages\n`;
|
|
156
|
+
fullContent += `- **Component-specific exports**: Focused documentation for individual products:\n`;
|
|
157
|
+
components.forEach(component => {
|
|
158
|
+
fullContent += ` - \`${siteUrl}/${component.name}-full.txt\`: ${component.title}\n`;
|
|
159
|
+
});
|
|
160
|
+
fullContent += `- **Individual markdown pages**: Each HTML page has a corresponding .md file (e.g., \`/docs/page.html\` → \`/docs/page.md\`)\n\n`;
|
|
149
161
|
fullContent += `### Accessing Versioned Content\n\n`;
|
|
150
162
|
fullContent += `For components with versioned documentation (like Redpanda Self-Managed), older versions can be accessed by replacing the version segment in the URL:\n`;
|
|
151
163
|
fullContent += `- Latest: \`${siteUrl}/current/page-path\`\n`;
|
|
@@ -161,7 +173,8 @@ module.exports.register = function () {
|
|
|
161
173
|
});
|
|
162
174
|
|
|
163
175
|
pages.forEach((page, index) => {
|
|
164
|
-
const
|
|
176
|
+
const mdUrl = page.pub?.url ? toMarkdownUrl(page.pub.url) : '';
|
|
177
|
+
const pageUrl = mdUrl ? `${siteUrl}${mdUrl}` : 'unknown';
|
|
165
178
|
const pageTitle = page.asciidoc?.doctitle || page.src?.stem || 'Untitled';
|
|
166
179
|
|
|
167
180
|
fullContent += `# Page ${index + 1}: ${pageTitle}\n\n`;
|
|
@@ -178,16 +191,334 @@ module.exports.register = function () {
|
|
|
178
191
|
});
|
|
179
192
|
logger.info(`Generated llms-full.txt with ${pages.length} pages`);
|
|
180
193
|
|
|
194
|
+
// Generate component-specific full.txt files
|
|
195
|
+
logger.info('Generating component-specific full.txt files...');
|
|
196
|
+
const componentGroups = new Map();
|
|
197
|
+
|
|
198
|
+
// Group pages by component
|
|
199
|
+
pages.forEach(page => {
|
|
200
|
+
const componentName = page.src.component;
|
|
201
|
+
if (!componentGroups.has(componentName)) {
|
|
202
|
+
componentGroups.set(componentName, []);
|
|
203
|
+
}
|
|
204
|
+
componentGroups.get(componentName).push(page);
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
// Generate a full.txt file for each component
|
|
208
|
+
componentGroups.forEach((componentPages, componentName) => {
|
|
209
|
+
const component = components.find(c => c.name === componentName);
|
|
210
|
+
if (!component) return;
|
|
211
|
+
|
|
212
|
+
const latest = component.latest || component.versions[0];
|
|
213
|
+
if (!latest) return;
|
|
214
|
+
|
|
215
|
+
// Sort pages by URL for consistent ordering
|
|
216
|
+
componentPages.sort((a, b) => {
|
|
217
|
+
const urlA = a.pub?.url || '';
|
|
218
|
+
const urlB = b.pub?.url || '';
|
|
219
|
+
return urlA.localeCompare(urlB);
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
let componentContent = `# ${component.title} - Full Markdown Export\n\n`;
|
|
223
|
+
componentContent += `> This file contains all ${component.title} documentation pages in markdown format for AI agent consumption.\n`;
|
|
224
|
+
componentContent += `> Generated from ${componentPages.length} pages on ${new Date().toISOString()}\n`;
|
|
225
|
+
componentContent += `> Component: ${component.name} | Version: ${latest.version}\n`;
|
|
226
|
+
componentContent += `> Site: ${siteUrl}\n\n`;
|
|
227
|
+
componentContent += `## About This Export\n\n`;
|
|
228
|
+
componentContent += `This export includes the **latest version** (${latest.version}) of the ${component.title} documentation.\n\n`;
|
|
229
|
+
componentContent += `### AI-Friendly Documentation Formats\n\n`;
|
|
230
|
+
componentContent += `We provide multiple formats optimized for AI consumption:\n\n`;
|
|
231
|
+
componentContent += `- **${siteUrl}/llms.txt**: Curated overview of all Redpanda documentation\n`;
|
|
232
|
+
componentContent += `- **${siteUrl}/llms-full.txt**: Complete documentation export with all components\n`;
|
|
233
|
+
componentContent += `- **${siteUrl}/${componentName}-full.txt**: This file - ${component.title} documentation only\n`;
|
|
234
|
+
componentContent += `- **Individual markdown pages**: Each HTML page has a corresponding .md file\n\n`;
|
|
235
|
+
|
|
236
|
+
if (component.versions.length > 1) {
|
|
237
|
+
componentContent += `### Accessing Older Versions\n\n`;
|
|
238
|
+
componentContent += `This component has versioned documentation. Older versions can be accessed by replacing the version segment in the URL:\n`;
|
|
239
|
+
componentContent += `- Latest: \`${siteUrl}/current/page-path\`\n`;
|
|
240
|
+
componentContent += `- Specific version: \`${siteUrl}/24.3/page-path\`, \`${siteUrl}/25.1/page-path\`, etc.\n\n`;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
componentContent += `---\n\n`;
|
|
244
|
+
|
|
245
|
+
// Add all pages
|
|
246
|
+
componentPages.forEach((page, index) => {
|
|
247
|
+
const mdUrl = page.pub?.url ? toMarkdownUrl(page.pub.url) : '';
|
|
248
|
+
const pageUrl = mdUrl ? `${siteUrl}${mdUrl}` : 'unknown';
|
|
249
|
+
const pageTitle = page.asciidoc?.doctitle || page.src?.stem || 'Untitled';
|
|
250
|
+
|
|
251
|
+
componentContent += `# Page ${index + 1}: ${pageTitle}\n\n`;
|
|
252
|
+
componentContent += `**URL**: ${pageUrl}\n\n`;
|
|
253
|
+
componentContent += `---\n\n`;
|
|
254
|
+
componentContent += page.markdownContents.toString('utf8');
|
|
255
|
+
componentContent += `\n\n---\n\n`;
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
// Add component-specific full.txt file to site root
|
|
259
|
+
siteCatalog.addFile({
|
|
260
|
+
contents: Buffer.from(componentContent, 'utf8'),
|
|
261
|
+
out: { path: `${componentName}-full.txt` },
|
|
262
|
+
});
|
|
263
|
+
logger.info(`Generated ${componentName}-full.txt with ${componentPages.length} pages`);
|
|
264
|
+
});
|
|
265
|
+
|
|
181
266
|
// Add llms.txt to site root (using content extracted earlier)
|
|
182
267
|
if (llmsPage && llmsPage.llmsTxtContent) {
|
|
183
268
|
logger.info('Adding llms.txt to site root');
|
|
269
|
+
|
|
184
270
|
siteCatalog.addFile({
|
|
185
271
|
contents: Buffer.from(llmsPage.llmsTxtContent, 'utf8'),
|
|
186
272
|
out: { path: 'llms.txt' },
|
|
187
273
|
});
|
|
188
274
|
logger.info('Successfully added llms.txt');
|
|
275
|
+
|
|
276
|
+
// Add llms.txt to sitemap with git dates
|
|
277
|
+
try {
|
|
278
|
+
// Build a map of filename -> most recent git modified date
|
|
279
|
+
const gitDates = new Map();
|
|
280
|
+
|
|
281
|
+
// llms.txt uses the llms page's git modified date
|
|
282
|
+
if (llmsPage.asciidoc?.attributes?.['page-git-modified-date']) {
|
|
283
|
+
gitDates.set('llms.txt', llmsPage.asciidoc.attributes['page-git-modified-date']);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
// llms-full.txt uses the most recent modified date from all pages
|
|
287
|
+
if (pages.length > 0) {
|
|
288
|
+
const mostRecent = getMostRecentGitDate(pages);
|
|
289
|
+
if (mostRecent) {
|
|
290
|
+
gitDates.set('llms-full.txt', mostRecent);
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// Component-specific files use most recent from that component
|
|
295
|
+
componentGroups.forEach((componentPages, componentName) => {
|
|
296
|
+
const mostRecent = getMostRecentGitDate(componentPages);
|
|
297
|
+
if (mostRecent) {
|
|
298
|
+
gitDates.set(`${componentName}-full.txt`, mostRecent);
|
|
299
|
+
}
|
|
300
|
+
});
|
|
301
|
+
|
|
302
|
+
addToSitemap(contentCatalog, siteCatalog, siteUrl, gitDates, logger);
|
|
303
|
+
} catch (err) {
|
|
304
|
+
logger.warn(`Failed to add llms.txt to sitemap: ${err.message}`);
|
|
305
|
+
}
|
|
189
306
|
} else {
|
|
190
307
|
logger.warn('llms.txt not generated - page not found or no content extracted');
|
|
191
308
|
}
|
|
192
309
|
});
|
|
193
310
|
};
|
|
311
|
+
|
|
312
|
+
/**
|
|
313
|
+
* Get the most recent git modified date from a collection of pages
|
|
314
|
+
*/
|
|
315
|
+
function getMostRecentGitDate(pages) {
|
|
316
|
+
let mostRecent = null;
|
|
317
|
+
|
|
318
|
+
for (const page of pages) {
|
|
319
|
+
const gitDate = page.asciidoc?.attributes?.['page-git-modified-date'];
|
|
320
|
+
if (gitDate) {
|
|
321
|
+
const date = new Date(gitDate);
|
|
322
|
+
if (!mostRecent || date > mostRecent) {
|
|
323
|
+
mostRecent = date;
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
return mostRecent ? mostRecent.toISOString() : null;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
/**
|
|
332
|
+
* Add llms.txt and all -full.txt files to sitemap by creating a separate sitemap-llms.xml
|
|
333
|
+
* and adding it to the main sitemap index
|
|
334
|
+
*
|
|
335
|
+
* @param {Object} contentCatalog - Antora content catalog (source pages)
|
|
336
|
+
* @param {Object} siteCatalog - Antora site catalog (output files)
|
|
337
|
+
* @param {string} siteUrl - Base site URL
|
|
338
|
+
* @param {Map<string, string>} gitDates - Map of filename -> ISO date string from git history
|
|
339
|
+
* @param {Object} logger - Logger instance
|
|
340
|
+
*/
|
|
341
|
+
function addToSitemap(contentCatalog, siteCatalog, siteUrl, gitDates, logger) {
|
|
342
|
+
const now = new Date().toISOString();
|
|
343
|
+
|
|
344
|
+
// Find all llms .txt files in the site catalog
|
|
345
|
+
const llmsFiles = siteCatalog.getFiles()
|
|
346
|
+
.filter(file => {
|
|
347
|
+
const filename = file.out.path;
|
|
348
|
+
return filename === 'llms.txt' ||
|
|
349
|
+
filename === 'llms-full.txt' ||
|
|
350
|
+
filename.endsWith('-full.txt');
|
|
351
|
+
})
|
|
352
|
+
.map(file => file.out.path)
|
|
353
|
+
.sort(); // Sort for consistent ordering
|
|
354
|
+
|
|
355
|
+
logger.info(`Found ${llmsFiles.length} llms files to add to sitemap: ${llmsFiles.join(', ')}`);
|
|
356
|
+
|
|
357
|
+
// Create sitemap-llms.xml with all llms files
|
|
358
|
+
const urlEntries = llmsFiles.map(filename => {
|
|
359
|
+
// Use git date if available, otherwise fall back to build time
|
|
360
|
+
const lastmod = gitDates.get(filename) || now;
|
|
361
|
+
return ` <url>
|
|
362
|
+
<loc>${siteUrl}/${filename}</loc>
|
|
363
|
+
<lastmod>${lastmod}</lastmod>
|
|
364
|
+
<changefreq>daily</changefreq>
|
|
365
|
+
<priority>1.0</priority>
|
|
366
|
+
</url>`;
|
|
367
|
+
}).join('\n');
|
|
368
|
+
|
|
369
|
+
const llmsSitemapXml = `<?xml version="1.0" encoding="UTF-8"?>
|
|
370
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
371
|
+
${urlEntries}
|
|
372
|
+
</urlset>`;
|
|
373
|
+
|
|
374
|
+
siteCatalog.addFile({
|
|
375
|
+
contents: Buffer.from(llmsSitemapXml, 'utf8'),
|
|
376
|
+
out: { path: 'sitemap-llms.xml' },
|
|
377
|
+
});
|
|
378
|
+
logger.info(`Created sitemap-llms.xml with ${llmsFiles.length} entries`);
|
|
379
|
+
|
|
380
|
+
// Find and update the main sitemap index
|
|
381
|
+
const sitemapIndex = siteCatalog.getFiles().find(file =>
|
|
382
|
+
file.out.path === 'sitemap.xml'
|
|
383
|
+
);
|
|
384
|
+
|
|
385
|
+
if (!sitemapIndex) {
|
|
386
|
+
logger.warn('Main sitemap.xml not found, cannot add llms sitemap to index');
|
|
387
|
+
return;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
// Parse and update the sitemap index
|
|
391
|
+
let sitemapIndexXml = sitemapIndex.contents.toString('utf8');
|
|
392
|
+
|
|
393
|
+
// Add lastmod to all existing component sitemaps for consistency
|
|
394
|
+
sitemapIndexXml = addLastmodToComponentSitemaps(contentCatalog, siteCatalog, sitemapIndexXml, siteUrl, logger);
|
|
395
|
+
|
|
396
|
+
// Check if sitemap-llms.xml is already in the index
|
|
397
|
+
if (sitemapIndexXml.includes('sitemap-llms.xml')) {
|
|
398
|
+
logger.debug('sitemap-llms.xml already in sitemap index');
|
|
399
|
+
// Update the index with modified component sitemaps even if llms sitemap exists
|
|
400
|
+
sitemapIndex.contents = Buffer.from(sitemapIndexXml, 'utf8');
|
|
401
|
+
return;
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
// Find the most recent date from all llms files for the sitemap-llms.xml lastmod
|
|
405
|
+
let sitemapLastmod = now;
|
|
406
|
+
if (gitDates.size > 0) {
|
|
407
|
+
const dates = Array.from(gitDates.values()).map(d => new Date(d));
|
|
408
|
+
const mostRecent = new Date(Math.max(...dates));
|
|
409
|
+
sitemapLastmod = mostRecent.toISOString();
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
// Add sitemap-llms.xml entry before the closing </sitemapindex> tag
|
|
413
|
+
const llmsSitemapEntry = ` <sitemap>
|
|
414
|
+
<loc>${siteUrl}/sitemap-llms.xml</loc>
|
|
415
|
+
<lastmod>${sitemapLastmod}</lastmod>
|
|
416
|
+
</sitemap>
|
|
417
|
+
</sitemapindex>`;
|
|
418
|
+
|
|
419
|
+
sitemapIndexXml = sitemapIndexXml.replace('</sitemapindex>', llmsSitemapEntry);
|
|
420
|
+
|
|
421
|
+
// Update the sitemap index in the catalog
|
|
422
|
+
sitemapIndex.contents = Buffer.from(sitemapIndexXml, 'utf8');
|
|
423
|
+
logger.info('Added sitemap-llms.xml to main sitemap index');
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
/**
|
|
427
|
+
* Add lastmod to all component sitemaps in the sitemap index for consistency
|
|
428
|
+
* Also updates the component sitemaps themselves to use git dates instead of build time
|
|
429
|
+
*
|
|
430
|
+
* @param {Object} contentCatalog - Antora content catalog (source pages with git dates)
|
|
431
|
+
* @param {Object} siteCatalog - Antora site catalog (output files including sitemaps)
|
|
432
|
+
* @param {string} sitemapIndexXml - The sitemap index XML content
|
|
433
|
+
* @param {string} siteUrl - Base site URL for matching URLs
|
|
434
|
+
* @param {Object} logger - Logger instance
|
|
435
|
+
*/
|
|
436
|
+
function addLastmodToComponentSitemaps(contentCatalog, siteCatalog, sitemapIndexXml, siteUrl, logger) {
|
|
437
|
+
// Build a map of URL -> git date from all pages
|
|
438
|
+
const urlToGitDate = new Map();
|
|
439
|
+
const allPages = contentCatalog.getPages();
|
|
440
|
+
|
|
441
|
+
allPages.forEach(page => {
|
|
442
|
+
if (page.pub?.url && page.asciidoc?.attributes?.['page-git-modified-date']) {
|
|
443
|
+
const gitDate = page.asciidoc.attributes['page-git-modified-date'];
|
|
444
|
+
const url = `${siteUrl}${page.pub.url}`;
|
|
445
|
+
urlToGitDate.set(url, gitDate);
|
|
446
|
+
}
|
|
447
|
+
});
|
|
448
|
+
|
|
449
|
+
logger.info(`Built URL -> git date map with ${urlToGitDate.size} entries`);
|
|
450
|
+
if (urlToGitDate.size > 0 && urlToGitDate.size < 20) {
|
|
451
|
+
urlToGitDate.forEach((date, url) => {
|
|
452
|
+
logger.debug(` ${url} -> ${date}`);
|
|
453
|
+
});
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
// Find all component sitemap XML files
|
|
457
|
+
const componentSitemaps = siteCatalog.getFiles()
|
|
458
|
+
.filter(file => {
|
|
459
|
+
const path = file.out.path;
|
|
460
|
+
return path.startsWith('sitemap-') &&
|
|
461
|
+
path.endsWith('.xml') &&
|
|
462
|
+
path !== 'sitemap-llms.xml';
|
|
463
|
+
});
|
|
464
|
+
|
|
465
|
+
logger.debug(`Found ${componentSitemaps.length} component sitemaps to update with git dates`);
|
|
466
|
+
|
|
467
|
+
// For each component sitemap, update URLs with git dates and find the most recent
|
|
468
|
+
componentSitemaps.forEach(sitemapFile => {
|
|
469
|
+
const filename = sitemapFile.out.path;
|
|
470
|
+
let xml = sitemapFile.contents.toString('utf8');
|
|
471
|
+
const dates = [];
|
|
472
|
+
let updatedCount = 0;
|
|
473
|
+
|
|
474
|
+
// Update each URL in the sitemap with its git date if available
|
|
475
|
+
xml = xml.replace(
|
|
476
|
+
/<url>\s*<loc>([^<]+)<\/loc>\s*<lastmod>([^<]+)<\/lastmod>\s*<\/url>/g,
|
|
477
|
+
(match, url, oldDate) => {
|
|
478
|
+
const gitDate = urlToGitDate.get(url);
|
|
479
|
+
if (gitDate) {
|
|
480
|
+
updatedCount++;
|
|
481
|
+
dates.push(new Date(gitDate));
|
|
482
|
+
return `<url>\n<loc>${url}</loc>\n<lastmod>${gitDate}</lastmod>\n</url>`;
|
|
483
|
+
} else {
|
|
484
|
+
// Keep original date
|
|
485
|
+
try {
|
|
486
|
+
dates.push(new Date(oldDate));
|
|
487
|
+
} catch (e) {
|
|
488
|
+
// Skip invalid dates
|
|
489
|
+
}
|
|
490
|
+
return match;
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
);
|
|
494
|
+
|
|
495
|
+
// Update the sitemap file in the catalog
|
|
496
|
+
sitemapFile.contents = Buffer.from(xml, 'utf8');
|
|
497
|
+
logger.info(`Updated sitemap ${filename}: ${updatedCount} URLs with git dates, ${dates.length} total dates`);
|
|
498
|
+
|
|
499
|
+
if (dates.length === 0) {
|
|
500
|
+
logger.debug(`No dates found in ${filename}`);
|
|
501
|
+
return;
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
// Find the most recent date
|
|
505
|
+
const mostRecent = new Date(Math.max(...dates));
|
|
506
|
+
const lastmod = mostRecent.toISOString();
|
|
507
|
+
|
|
508
|
+
// Update the sitemap index entry to include/update lastmod
|
|
509
|
+
// Match various patterns since Antora might have different formatting
|
|
510
|
+
const locPattern = new RegExp(
|
|
511
|
+
`(<loc>[^<]*/${filename.replace(/\./g, '\\.')}</loc>)(?:\\s*<lastmod>[^<]*</lastmod>)?\\s*</sitemap>`,
|
|
512
|
+
'g'
|
|
513
|
+
);
|
|
514
|
+
|
|
515
|
+
sitemapIndexXml = sitemapIndexXml.replace(
|
|
516
|
+
locPattern,
|
|
517
|
+
`$1\n <lastmod>${lastmod}</lastmod>\n </sitemap>`
|
|
518
|
+
);
|
|
519
|
+
|
|
520
|
+
logger.debug(`Updated lastmod to ${lastmod} for ${filename} in index`);
|
|
521
|
+
});
|
|
522
|
+
|
|
523
|
+
return sitemapIndexXml;
|
|
524
|
+
}
|