docusaurus-plugin-llms 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +246 -16
- package/lib/generator-current.d.ts +44 -0
- package/lib/generator-current.js +398 -0
- package/lib/generator.d.ts +4 -2
- package/lib/generator.js +163 -71
- package/lib/index.js +174 -10
- package/lib/null-handling-guide.d.ts +47 -0
- package/lib/null-handling-guide.js +290 -0
- package/lib/processor.d.ts +0 -10
- package/lib/processor.js +217 -80
- package/lib/types.d.ts +10 -0
- package/lib/utils.d.ts +141 -7
- package/lib/utils.js +429 -34
- package/package.json +2 -2
- package/src/generator.ts +206 -86
- package/src/index.ts +202 -14
- package/src/null-handling-guide.ts +321 -0
- package/src/processor.ts +303 -126
- package/src/types.ts +15 -0
- package/src/utils.ts +530 -59
package/lib/generator.js
CHANGED
|
@@ -51,10 +51,11 @@ const processor_1 = require("./processor");
|
|
|
51
51
|
* @returns Cleaned description suitable for TOC
|
|
52
52
|
*/
|
|
53
53
|
function cleanDescriptionForToc(description) {
|
|
54
|
-
if (!description)
|
|
54
|
+
if (!(0, utils_1.isNonEmptyString)(description))
|
|
55
55
|
return '';
|
|
56
56
|
// Get just the first line for TOC display
|
|
57
|
-
const
|
|
57
|
+
const lines = description.split('\n');
|
|
58
|
+
const firstLine = lines.length > 0 ? lines[0] : '';
|
|
58
59
|
// Remove heading markers only at the beginning of the line
|
|
59
60
|
// Be careful to only remove actual heading markers (# followed by space at beginning)
|
|
60
61
|
// and not hashtag symbols that are part of the content (inline hashtags)
|
|
@@ -71,60 +72,74 @@ function cleanDescriptionForToc(description) {
|
|
|
71
72
|
* @param includeFullContent - Whether to include full content or just links
|
|
72
73
|
* @param version - Version of the file
|
|
73
74
|
* @param customRootContent - Optional custom content to include at the root level
|
|
75
|
+
* @param batchSize - Batch size for processing documents (default: 100)
|
|
74
76
|
*/
|
|
75
|
-
async function generateLLMFile(docs, outputPath, fileTitle, fileDescription, includeFullContent, version, customRootContent) {
|
|
76
|
-
|
|
77
|
+
async function generateLLMFile(docs, outputPath, fileTitle, fileDescription, includeFullContent, version, customRootContent, batchSize = 100) {
|
|
78
|
+
// Validate path length before proceeding
|
|
79
|
+
if (!(0, utils_1.validatePathLength)(outputPath)) {
|
|
80
|
+
throw new Error(`Output path exceeds maximum length: ${outputPath}`);
|
|
81
|
+
}
|
|
82
|
+
utils_1.logger.verbose(`Generating file: ${outputPath}, version: ${version || 'undefined'}`);
|
|
77
83
|
const versionInfo = version ? `\n\nVersion: ${version}` : '';
|
|
78
84
|
if (includeFullContent) {
|
|
79
85
|
// Generate full content file with header deduplication
|
|
86
|
+
// Process documents in batches to prevent memory issues on large sites
|
|
80
87
|
const usedHeaders = new Set();
|
|
81
|
-
const fullContentSections =
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
const
|
|
85
|
-
|
|
86
|
-
const
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
if
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
88
|
+
const fullContentSections = [];
|
|
89
|
+
// Process documents in batches
|
|
90
|
+
for (let i = 0; i < docs.length; i += batchSize) {
|
|
91
|
+
const batch = docs.slice(i, i + batchSize);
|
|
92
|
+
const batchNumber = Math.floor(i / batchSize) + 1;
|
|
93
|
+
const totalBatches = Math.ceil(docs.length / batchSize);
|
|
94
|
+
if (totalBatches > 1) {
|
|
95
|
+
utils_1.logger.verbose(`Processing batch ${batchNumber}/${totalBatches} (${batch.length} documents)`);
|
|
96
|
+
}
|
|
97
|
+
const batchSections = batch.map(doc => {
|
|
98
|
+
// Check if content already starts with the same heading to avoid duplication
|
|
99
|
+
const trimmedContent = doc.content.trim();
|
|
100
|
+
const contentLines = trimmedContent.split('\n');
|
|
101
|
+
const firstLine = contentLines.length > 0 ? contentLines[0] : '';
|
|
102
|
+
// Check if the first line is a heading that matches our title
|
|
103
|
+
const headingMatch = firstLine.match(/^#+\s+(.+)$/);
|
|
104
|
+
const firstHeadingText = headingMatch ? headingMatch[1].trim() : null;
|
|
105
|
+
// Generate unique header using the utility function
|
|
106
|
+
const uniqueHeader = (0, utils_1.ensureUniqueIdentifier)(doc.title, usedHeaders, (counter, base) => {
|
|
107
|
+
// Try to make it more descriptive by adding the file path info if available
|
|
108
|
+
if ((0, utils_1.isNonEmptyString)(doc.path) && counter === 2) {
|
|
109
|
+
const pathParts = doc.path.split('/');
|
|
110
|
+
const folderName = pathParts.length >= 2 ? pathParts[pathParts.length - 2] : '';
|
|
111
|
+
if ((0, utils_1.isNonEmptyString)(folderName)) {
|
|
112
|
+
return `(${folderName.charAt(0).toUpperCase() + folderName.slice(1)})`;
|
|
113
|
+
}
|
|
96
114
|
}
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
// Content already has the same heading, replace it with our unique header if needed
|
|
102
|
-
if (uniqueHeader !== doc.title) {
|
|
115
|
+
return `(${counter})`;
|
|
116
|
+
});
|
|
117
|
+
if (firstHeadingText === doc.title) {
|
|
118
|
+
// Content already has the same heading, replace it with our unique header
|
|
103
119
|
const restOfContent = trimmedContent.split('\n').slice(1).join('\n');
|
|
104
120
|
return `## ${uniqueHeader}
|
|
105
121
|
|
|
106
122
|
${restOfContent}`;
|
|
107
123
|
}
|
|
108
124
|
else {
|
|
109
|
-
//
|
|
110
|
-
const restOfContent = trimmedContent.split('\n').slice(1).join('\n');
|
|
125
|
+
// Content doesn't have the same heading, add our unique H2 header
|
|
111
126
|
return `## ${uniqueHeader}
|
|
112
127
|
|
|
113
|
-
${restOfContent}`;
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
else {
|
|
117
|
-
// Content doesn't have the same heading, add our unique H2 header
|
|
118
|
-
return `## ${uniqueHeader}
|
|
119
|
-
|
|
120
128
|
${doc.content}`;
|
|
121
|
-
|
|
122
|
-
|
|
129
|
+
}
|
|
130
|
+
});
|
|
131
|
+
fullContentSections.push(...batchSections);
|
|
132
|
+
}
|
|
123
133
|
// Use custom root content or default message
|
|
124
134
|
const rootContent = customRootContent || 'This file contains all documentation content in a single document following the llmstxt.org standard.';
|
|
125
135
|
const llmFileContent = (0, utils_1.createMarkdownContent)(fileTitle, `${fileDescription}${versionInfo}`, `${rootContent}\n\n${fullContentSections.join('\n\n---\n\n')}`, true // include metadata (description)
|
|
126
136
|
);
|
|
127
|
-
|
|
137
|
+
try {
|
|
138
|
+
await (0, utils_1.writeFile)(outputPath, llmFileContent);
|
|
139
|
+
}
|
|
140
|
+
catch (error) {
|
|
141
|
+
throw new Error(`Failed to write file ${outputPath}: ${(0, utils_1.getErrorMessage)(error)}`);
|
|
142
|
+
}
|
|
128
143
|
}
|
|
129
144
|
else {
|
|
130
145
|
// Generate links-only file
|
|
@@ -137,9 +152,14 @@ ${doc.content}`;
|
|
|
137
152
|
const rootContent = customRootContent || 'This file contains links to documentation sections following the llmstxt.org standard.';
|
|
138
153
|
const llmFileContent = (0, utils_1.createMarkdownContent)(fileTitle, `${fileDescription}${versionInfo}`, `${rootContent}\n\n## Table of Contents\n\n${tocItems.join('\n')}`, true // include metadata (description)
|
|
139
154
|
);
|
|
140
|
-
|
|
155
|
+
try {
|
|
156
|
+
await (0, utils_1.writeFile)(outputPath, llmFileContent);
|
|
157
|
+
}
|
|
158
|
+
catch (error) {
|
|
159
|
+
throw new Error(`Failed to write file ${outputPath}: ${(0, utils_1.getErrorMessage)(error)}`);
|
|
160
|
+
}
|
|
141
161
|
}
|
|
142
|
-
|
|
162
|
+
utils_1.logger.info(`Generated: ${outputPath}`);
|
|
143
163
|
}
|
|
144
164
|
/**
|
|
145
165
|
* Generate individual markdown files for each document
|
|
@@ -148,42 +168,100 @@ ${doc.content}`;
|
|
|
148
168
|
* @param siteUrl - Base site URL
|
|
149
169
|
* @param docsDir - The configured docs directory name (e.g., 'docs', 'documentation', etc.)
|
|
150
170
|
* @param keepFrontMatter - Array of frontmatter keys to preserve in generated files
|
|
171
|
+
* @param preserveDirectoryStructure - Whether to preserve the full directory structure (default: true)
|
|
151
172
|
* @returns Updated docs with new URLs pointing to generated markdown files
|
|
152
173
|
*/
|
|
153
|
-
async function generateIndividualMarkdownFiles(docs, outputDir, siteUrl, docsDir = 'docs', keepFrontMatter = []) {
|
|
174
|
+
async function generateIndividualMarkdownFiles(docs, outputDir, siteUrl, docsDir = 'docs', keepFrontMatter = [], preserveDirectoryStructure = true) {
|
|
154
175
|
const updatedDocs = [];
|
|
155
176
|
const usedPaths = new Set();
|
|
156
177
|
for (const doc of docs) {
|
|
157
|
-
// Use the original path structure
|
|
178
|
+
// Use the original path structure as default filename.
|
|
158
179
|
let relativePath = doc.path
|
|
159
180
|
.replace(/^\/+/, '') // Remove leading slashes
|
|
160
181
|
.replace(/\.mdx?$/, '.md'); // Ensure .md extension
|
|
161
|
-
|
|
162
|
-
|
|
182
|
+
// Strip the docsDir prefix only if preserveDirectoryStructure is false
|
|
183
|
+
if (!preserveDirectoryStructure) {
|
|
184
|
+
relativePath = relativePath
|
|
185
|
+
.replace(new RegExp(`^${docsDir.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}/`), ''); // Remove configured docs dir prefix
|
|
186
|
+
}
|
|
187
|
+
// If frontmatter has slug, use that.
|
|
188
|
+
if ((0, utils_1.isNonEmptyString)(doc.frontMatter?.slug)) {
|
|
189
|
+
const slug = doc.frontMatter.slug.trim().replace(/^\/+|\/+$/g, ''); // Trim whitespace and slashes
|
|
190
|
+
if ((0, utils_1.isNonEmptyString)(slug)) { // Only process if slug is not empty after trimming
|
|
191
|
+
if (slug.includes('/')) {
|
|
192
|
+
// Nested slug: create directory structure
|
|
193
|
+
relativePath = slug + '.md';
|
|
194
|
+
}
|
|
195
|
+
else {
|
|
196
|
+
// Simple slug: replace just the filename
|
|
197
|
+
const pathParts = relativePath.replace(/\.md$/, '').split('/');
|
|
198
|
+
pathParts[pathParts.length - 1] = slug;
|
|
199
|
+
relativePath = pathParts.join('/') + '.md';
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
// Otherwise, if frontmatter has id, use that.
|
|
204
|
+
else if ((0, utils_1.isNonEmptyString)(doc.frontMatter?.id)) {
|
|
205
|
+
const id = doc.frontMatter.id.trim().replace(/^\/+|\/+$/g, ''); // Trim whitespace and slashes
|
|
206
|
+
if ((0, utils_1.isNonEmptyString)(id)) { // Only process if id is not empty after trimming
|
|
207
|
+
if (id.includes('/')) {
|
|
208
|
+
// Nested id: create directory structure
|
|
209
|
+
relativePath = id + '.md';
|
|
210
|
+
}
|
|
211
|
+
else {
|
|
212
|
+
// Simple id: replace just the filename
|
|
213
|
+
const pathParts = relativePath.replace(/\.md$/, '').split('/');
|
|
214
|
+
pathParts[pathParts.length - 1] = id;
|
|
215
|
+
relativePath = pathParts.join('/') + '.md';
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
// Trim any leading/trailing whitespace from the path
|
|
220
|
+
relativePath = relativePath.trim();
|
|
163
221
|
// If path is empty or invalid, create a fallback path
|
|
164
|
-
if (!relativePath || relativePath === '.md') {
|
|
222
|
+
if (!(0, utils_1.isNonEmptyString)(relativePath) || relativePath === '.md') {
|
|
165
223
|
const sanitizedTitle = (0, utils_1.sanitizeForFilename)(doc.title, 'untitled');
|
|
166
224
|
relativePath = `${sanitizedTitle}.md`;
|
|
167
225
|
}
|
|
168
226
|
// Ensure path uniqueness
|
|
169
227
|
let uniquePath = relativePath;
|
|
170
228
|
let counter = 1;
|
|
229
|
+
const MAX_PATH_ITERATIONS = 10000;
|
|
230
|
+
let pathIterations = 0;
|
|
171
231
|
while (usedPaths.has(uniquePath.toLowerCase())) {
|
|
172
232
|
counter++;
|
|
173
233
|
const pathParts = relativePath.split('.');
|
|
174
234
|
const extension = pathParts.pop() || 'md';
|
|
175
235
|
const basePath = pathParts.join('.');
|
|
176
236
|
uniquePath = `${basePath}-${counter}.${extension}`;
|
|
237
|
+
pathIterations++;
|
|
238
|
+
if (pathIterations >= MAX_PATH_ITERATIONS) {
|
|
239
|
+
// Fallback to timestamp
|
|
240
|
+
const timestamp = Date.now();
|
|
241
|
+
uniquePath = `${basePath}-${timestamp}.${extension}`;
|
|
242
|
+
utils_1.logger.warn(`Maximum iterations reached for unique path. Using timestamp: ${uniquePath}`);
|
|
243
|
+
break;
|
|
244
|
+
}
|
|
177
245
|
}
|
|
178
246
|
usedPaths.add(uniquePath.toLowerCase());
|
|
179
|
-
// Create the full file path and
|
|
180
|
-
|
|
247
|
+
// Create the full file path and validate/shorten if needed
|
|
248
|
+
let fullPath = path.join(outputDir, uniquePath);
|
|
249
|
+
fullPath = (0, utils_1.shortenPathIfNeeded)(fullPath, outputDir, uniquePath);
|
|
250
|
+
// Update uniquePath to reflect the shortened path if it was changed
|
|
251
|
+
if (fullPath !== path.join(outputDir, uniquePath)) {
|
|
252
|
+
uniquePath = path.relative(outputDir, fullPath);
|
|
253
|
+
}
|
|
181
254
|
const directory = path.dirname(fullPath);
|
|
182
255
|
// Create directory structure if it doesn't exist
|
|
183
|
-
|
|
256
|
+
try {
|
|
257
|
+
await fs.mkdir(directory, { recursive: true });
|
|
258
|
+
}
|
|
259
|
+
catch (error) {
|
|
260
|
+
throw new Error(`Failed to create directory ${directory}: ${(0, utils_1.getErrorMessage)(error)}`);
|
|
261
|
+
}
|
|
184
262
|
// Extract preserved frontmatter if specified
|
|
185
263
|
let preservedFrontMatter = {};
|
|
186
|
-
if (keepFrontMatter
|
|
264
|
+
if ((0, utils_1.isNonEmptyArray)(keepFrontMatter) && (0, utils_1.isDefined)(doc.frontMatter)) {
|
|
187
265
|
for (const key of keepFrontMatter) {
|
|
188
266
|
if (key in doc.frontMatter) {
|
|
189
267
|
preservedFrontMatter[key] = doc.frontMatter[key];
|
|
@@ -194,17 +272,22 @@ async function generateIndividualMarkdownFiles(docs, outputDir, siteUrl, docsDir
|
|
|
194
272
|
const markdownContent = (0, utils_1.createMarkdownContent)(doc.title, doc.description, doc.content, true, // includeMetadata
|
|
195
273
|
Object.keys(preservedFrontMatter).length > 0 ? preservedFrontMatter : undefined);
|
|
196
274
|
// Write the markdown file
|
|
197
|
-
|
|
275
|
+
try {
|
|
276
|
+
await (0, utils_1.writeFile)(fullPath, markdownContent);
|
|
277
|
+
}
|
|
278
|
+
catch (error) {
|
|
279
|
+
throw new Error(`Failed to write file ${fullPath}: ${(0, utils_1.getErrorMessage)(error)}`);
|
|
280
|
+
}
|
|
198
281
|
// Create updated DocInfo with new URL pointing to the generated markdown file
|
|
199
282
|
// Convert file path to URL path (use forward slashes)
|
|
200
|
-
const urlPath =
|
|
283
|
+
const urlPath = (0, utils_1.normalizePath)(uniquePath);
|
|
201
284
|
const newUrl = `${siteUrl}/${urlPath}`;
|
|
202
285
|
updatedDocs.push({
|
|
203
286
|
...doc,
|
|
204
287
|
url: newUrl,
|
|
205
288
|
path: `/${urlPath}` // Update path to the new markdown file
|
|
206
289
|
});
|
|
207
|
-
|
|
290
|
+
utils_1.logger.verbose(`Generated markdown file: ${uniquePath}`);
|
|
208
291
|
}
|
|
209
292
|
return updatedDocs;
|
|
210
293
|
}
|
|
@@ -215,31 +298,38 @@ async function generateIndividualMarkdownFiles(docs, outputDir, siteUrl, docsDir
|
|
|
215
298
|
*/
|
|
216
299
|
async function generateStandardLLMFiles(context, allDocFiles) {
|
|
217
300
|
const { outDir, siteUrl, docTitle, docDescription, options } = context;
|
|
218
|
-
const { generateLLMsTxt, generateLLMsFullTxt, llmsTxtFilename = 'llms.txt', llmsFullTxtFilename = 'llms-full.txt', includeOrder = [], includeUnmatchedLast = true, version, generateMarkdownFiles = false, rootContent, fullRootContent } = options;
|
|
301
|
+
const { generateLLMsTxt, generateLLMsFullTxt, llmsTxtFilename = 'llms.txt', llmsFullTxtFilename = 'llms-full.txt', includeOrder = [], includeUnmatchedLast = true, version, generateMarkdownFiles = false, rootContent, fullRootContent, processingBatchSize = 100 } = options;
|
|
219
302
|
if (!generateLLMsTxt && !generateLLMsFullTxt) {
|
|
303
|
+
utils_1.logger.warn('No standard LLM files configured for generation. Skipping.');
|
|
220
304
|
return;
|
|
221
305
|
}
|
|
222
306
|
// Process files for the standard outputs
|
|
223
307
|
let processedDocs = await (0, processor_1.processFilesWithPatterns)(context, allDocFiles, [], // No specific include patterns - include all
|
|
224
308
|
[], // No additional ignore patterns beyond global ignoreFiles
|
|
225
309
|
includeOrder, includeUnmatchedLast);
|
|
226
|
-
|
|
310
|
+
utils_1.logger.verbose(`Processed ${processedDocs.length} documentation files for standard LLM files`);
|
|
311
|
+
// Check if we have documents to process
|
|
312
|
+
if (!(0, utils_1.isNonEmptyArray)(processedDocs)) {
|
|
313
|
+
utils_1.logger.warn('No documents found matching patterns for standard LLM files. Skipping.');
|
|
314
|
+
return;
|
|
315
|
+
}
|
|
227
316
|
// Generate individual markdown files if requested
|
|
228
|
-
if (generateMarkdownFiles
|
|
229
|
-
|
|
230
|
-
processedDocs = await generateIndividualMarkdownFiles(processedDocs, outDir, siteUrl, context.docsDir, context.options.keepFrontMatter || []
|
|
317
|
+
if (generateMarkdownFiles) {
|
|
318
|
+
utils_1.logger.info('Generating individual markdown files...');
|
|
319
|
+
processedDocs = await generateIndividualMarkdownFiles(processedDocs, outDir, siteUrl, context.docsDir, context.options.keepFrontMatter || [], context.options.preserveDirectoryStructure !== false // Default to true
|
|
320
|
+
);
|
|
231
321
|
}
|
|
232
322
|
// Generate llms.txt
|
|
233
323
|
if (generateLLMsTxt) {
|
|
234
324
|
const llmsTxtPath = path.join(outDir, llmsTxtFilename);
|
|
235
325
|
await generateLLMFile(processedDocs, llmsTxtPath, docTitle, docDescription, false, // links only
|
|
236
|
-
version, rootContent);
|
|
326
|
+
version, rootContent, processingBatchSize);
|
|
237
327
|
}
|
|
238
328
|
// Generate llms-full.txt
|
|
239
329
|
if (generateLLMsFullTxt) {
|
|
240
330
|
const llmsFullTxtPath = path.join(outDir, llmsFullTxtFilename);
|
|
241
331
|
await generateLLMFile(processedDocs, llmsFullTxtPath, docTitle, docDescription, true, // full content
|
|
242
|
-
version, fullRootContent);
|
|
332
|
+
version, fullRootContent, processingBatchSize);
|
|
243
333
|
}
|
|
244
334
|
}
|
|
245
335
|
/**
|
|
@@ -249,13 +339,14 @@ async function generateStandardLLMFiles(context, allDocFiles) {
|
|
|
249
339
|
*/
|
|
250
340
|
async function generateCustomLLMFiles(context, allDocFiles) {
|
|
251
341
|
const { outDir, siteUrl, docTitle, docDescription, options } = context;
|
|
252
|
-
const { customLLMFiles = [], ignoreFiles = [], generateMarkdownFiles = false } = options;
|
|
342
|
+
const { customLLMFiles = [], ignoreFiles = [], generateMarkdownFiles = false, processingBatchSize = 100 } = options;
|
|
253
343
|
if (customLLMFiles.length === 0) {
|
|
344
|
+
utils_1.logger.warn('No custom LLM files configured. Skipping.');
|
|
254
345
|
return;
|
|
255
346
|
}
|
|
256
|
-
|
|
347
|
+
utils_1.logger.info(`Generating ${customLLMFiles.length} custom LLM files...`);
|
|
257
348
|
for (const customFile of customLLMFiles) {
|
|
258
|
-
|
|
349
|
+
utils_1.logger.verbose(`Processing custom file: ${customFile.filename}, version: ${customFile.version || 'undefined'}`);
|
|
259
350
|
// Combine global ignores with custom ignores
|
|
260
351
|
const combinedIgnores = [...ignoreFiles];
|
|
261
352
|
if (customFile.ignorePatterns) {
|
|
@@ -266,19 +357,20 @@ async function generateCustomLLMFiles(context, allDocFiles) {
|
|
|
266
357
|
if (customDocs.length > 0) {
|
|
267
358
|
// Generate individual markdown files if requested
|
|
268
359
|
if (generateMarkdownFiles) {
|
|
269
|
-
|
|
270
|
-
customDocs = await generateIndividualMarkdownFiles(customDocs, outDir, siteUrl, context.docsDir, context.options.keepFrontMatter || []
|
|
360
|
+
utils_1.logger.info(`Generating individual markdown files for custom file: ${customFile.filename}...`);
|
|
361
|
+
customDocs = await generateIndividualMarkdownFiles(customDocs, outDir, siteUrl, context.docsDir, context.options.keepFrontMatter || [], context.options.preserveDirectoryStructure !== false // Default to true
|
|
362
|
+
);
|
|
271
363
|
}
|
|
272
364
|
// Use custom title/description or fall back to defaults
|
|
273
365
|
const customTitle = customFile.title || docTitle;
|
|
274
366
|
const customDescription = customFile.description || docDescription;
|
|
275
367
|
// Generate the custom LLM file
|
|
276
368
|
const customFilePath = path.join(outDir, customFile.filename);
|
|
277
|
-
await generateLLMFile(customDocs, customFilePath, customTitle, customDescription, customFile.fullContent, customFile.version, customFile.rootContent);
|
|
278
|
-
|
|
369
|
+
await generateLLMFile(customDocs, customFilePath, customTitle, customDescription, customFile.fullContent, customFile.version, customFile.rootContent, processingBatchSize);
|
|
370
|
+
utils_1.logger.info(`Generated custom LLM file: ${customFile.filename} with ${customDocs.length} documents`);
|
|
279
371
|
}
|
|
280
372
|
else {
|
|
281
|
-
|
|
373
|
+
utils_1.logger.warn(`No matching documents found for custom LLM file: ${customFile.filename}`);
|
|
282
374
|
}
|
|
283
375
|
}
|
|
284
376
|
}
|
|
@@ -289,18 +381,18 @@ async function generateCustomLLMFiles(context, allDocFiles) {
|
|
|
289
381
|
*/
|
|
290
382
|
async function collectDocFiles(context) {
|
|
291
383
|
const { siteDir, docsDir, options } = context;
|
|
292
|
-
const { ignoreFiles = [], includeBlog = false } = options;
|
|
384
|
+
const { ignoreFiles = [], includeBlog = false, warnOnIgnoredFiles = false } = options;
|
|
293
385
|
const allDocFiles = [];
|
|
294
386
|
// Process docs directory
|
|
295
387
|
const fullDocsDir = path.join(siteDir, docsDir);
|
|
296
388
|
try {
|
|
297
389
|
await fs.access(fullDocsDir);
|
|
298
390
|
// Collect all markdown files from docs directory
|
|
299
|
-
const docFiles = await (0, utils_1.readMarkdownFiles)(fullDocsDir, siteDir, ignoreFiles);
|
|
391
|
+
const docFiles = await (0, utils_1.readMarkdownFiles)(fullDocsDir, siteDir, ignoreFiles, docsDir, warnOnIgnoredFiles);
|
|
300
392
|
allDocFiles.push(...docFiles);
|
|
301
393
|
}
|
|
302
394
|
catch (err) {
|
|
303
|
-
|
|
395
|
+
utils_1.logger.warn(`Docs directory not found: ${fullDocsDir}`);
|
|
304
396
|
}
|
|
305
397
|
// Process blog if enabled
|
|
306
398
|
if (includeBlog) {
|
|
@@ -308,11 +400,11 @@ async function collectDocFiles(context) {
|
|
|
308
400
|
try {
|
|
309
401
|
await fs.access(blogDir);
|
|
310
402
|
// Collect all markdown files from blog directory
|
|
311
|
-
const blogFiles = await (0, utils_1.readMarkdownFiles)(blogDir, siteDir, ignoreFiles);
|
|
403
|
+
const blogFiles = await (0, utils_1.readMarkdownFiles)(blogDir, siteDir, ignoreFiles, docsDir, warnOnIgnoredFiles);
|
|
312
404
|
allDocFiles.push(...blogFiles);
|
|
313
405
|
}
|
|
314
406
|
catch (err) {
|
|
315
|
-
|
|
407
|
+
utils_1.logger.warn(`Blog directory not found: ${blogDir}`);
|
|
316
408
|
}
|
|
317
409
|
}
|
|
318
410
|
return allDocFiles;
|
package/lib/index.js
CHANGED
|
@@ -11,6 +11,159 @@
|
|
|
11
11
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
12
|
exports.default = docusaurusPluginLLMs;
|
|
13
13
|
const generator_1 = require("./generator");
|
|
14
|
+
const utils_1 = require("./utils");
|
|
15
|
+
/**
|
|
16
|
+
* Validates plugin options to ensure they conform to expected types and constraints
|
|
17
|
+
* @param options - Plugin options to validate
|
|
18
|
+
* @throws Error if any option is invalid
|
|
19
|
+
*/
|
|
20
|
+
function validatePluginOptions(options) {
|
|
21
|
+
// Validate includeOrder
|
|
22
|
+
if (options.includeOrder !== undefined) {
|
|
23
|
+
if (!Array.isArray(options.includeOrder)) {
|
|
24
|
+
throw new Error('includeOrder must be an array');
|
|
25
|
+
}
|
|
26
|
+
if (!options.includeOrder.every(item => typeof item === 'string')) {
|
|
27
|
+
throw new Error('includeOrder must contain only strings');
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
// Validate ignoreFiles
|
|
31
|
+
if (options.ignoreFiles !== undefined) {
|
|
32
|
+
if (!Array.isArray(options.ignoreFiles)) {
|
|
33
|
+
throw new Error('ignoreFiles must be an array');
|
|
34
|
+
}
|
|
35
|
+
if (!options.ignoreFiles.every(item => typeof item === 'string')) {
|
|
36
|
+
throw new Error('ignoreFiles must contain only strings');
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
// Validate pathTransformation
|
|
40
|
+
if ((0, utils_1.isDefined)(options.pathTransformation)) {
|
|
41
|
+
if (typeof options.pathTransformation !== 'object') {
|
|
42
|
+
throw new Error('pathTransformation must be an object');
|
|
43
|
+
}
|
|
44
|
+
const { ignorePaths, addPaths } = options.pathTransformation;
|
|
45
|
+
if (ignorePaths !== undefined) {
|
|
46
|
+
if (!Array.isArray(ignorePaths)) {
|
|
47
|
+
throw new Error('pathTransformation.ignorePaths must be an array');
|
|
48
|
+
}
|
|
49
|
+
if (!ignorePaths.every(item => typeof item === 'string')) {
|
|
50
|
+
throw new Error('pathTransformation.ignorePaths must contain only strings');
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
if (addPaths !== undefined) {
|
|
54
|
+
if (!Array.isArray(addPaths)) {
|
|
55
|
+
throw new Error('pathTransformation.addPaths must be an array');
|
|
56
|
+
}
|
|
57
|
+
if (!addPaths.every(item => typeof item === 'string')) {
|
|
58
|
+
throw new Error('pathTransformation.addPaths must contain only strings');
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
// Validate boolean options
|
|
63
|
+
const booleanOptions = [
|
|
64
|
+
'generateLLMsTxt',
|
|
65
|
+
'generateLLMsFullTxt',
|
|
66
|
+
'includeBlog',
|
|
67
|
+
'includeUnmatchedLast',
|
|
68
|
+
'excludeImports',
|
|
69
|
+
'removeDuplicateHeadings',
|
|
70
|
+
'generateMarkdownFiles',
|
|
71
|
+
'preserveDirectoryStructure'
|
|
72
|
+
];
|
|
73
|
+
for (const option of booleanOptions) {
|
|
74
|
+
if (options[option] !== undefined && typeof options[option] !== 'boolean') {
|
|
75
|
+
throw new Error(`${option} must be a boolean`);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
// Validate string options
|
|
79
|
+
const stringOptions = [
|
|
80
|
+
'docsDir',
|
|
81
|
+
'title',
|
|
82
|
+
'description',
|
|
83
|
+
'llmsTxtFilename',
|
|
84
|
+
'llmsFullTxtFilename',
|
|
85
|
+
'version',
|
|
86
|
+
'rootContent',
|
|
87
|
+
'fullRootContent'
|
|
88
|
+
];
|
|
89
|
+
for (const option of stringOptions) {
|
|
90
|
+
if (options[option] !== undefined && typeof options[option] !== 'string') {
|
|
91
|
+
throw new Error(`${option} must be a string`);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
// Validate keepFrontMatter
|
|
95
|
+
if (options.keepFrontMatter !== undefined) {
|
|
96
|
+
if (!Array.isArray(options.keepFrontMatter)) {
|
|
97
|
+
throw new Error('keepFrontMatter must be an array');
|
|
98
|
+
}
|
|
99
|
+
if (!options.keepFrontMatter.every(item => typeof item === 'string')) {
|
|
100
|
+
throw new Error('keepFrontMatter must contain only strings');
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
// Validate logLevel
|
|
104
|
+
if (options.logLevel !== undefined) {
|
|
105
|
+
const validLogLevels = ['quiet', 'normal', 'verbose'];
|
|
106
|
+
if (!validLogLevels.includes(options.logLevel)) {
|
|
107
|
+
throw new Error(`logLevel must be one of: ${validLogLevels.join(', ')}`);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
// Validate customLLMFiles
|
|
111
|
+
if (options.customLLMFiles !== undefined) {
|
|
112
|
+
if (!Array.isArray(options.customLLMFiles)) {
|
|
113
|
+
throw new Error('customLLMFiles must be an array');
|
|
114
|
+
}
|
|
115
|
+
options.customLLMFiles.forEach((file, index) => {
|
|
116
|
+
if (!(0, utils_1.isDefined)(file) || typeof file !== 'object') {
|
|
117
|
+
throw new Error(`customLLMFiles[${index}] must be an object`);
|
|
118
|
+
}
|
|
119
|
+
// Required fields
|
|
120
|
+
if (!(0, utils_1.isNonEmptyString)(file.filename)) {
|
|
121
|
+
throw new Error(`customLLMFiles[${index}].filename must be a non-empty string`);
|
|
122
|
+
}
|
|
123
|
+
if (!(0, utils_1.isNonEmptyArray)(file.includePatterns)) {
|
|
124
|
+
throw new Error(`customLLMFiles[${index}].includePatterns must be a non-empty array`);
|
|
125
|
+
}
|
|
126
|
+
if (!file.includePatterns.every(item => typeof item === 'string')) {
|
|
127
|
+
throw new Error(`customLLMFiles[${index}].includePatterns must contain only strings`);
|
|
128
|
+
}
|
|
129
|
+
if (typeof file.fullContent !== 'boolean') {
|
|
130
|
+
throw new Error(`customLLMFiles[${index}].fullContent must be a boolean`);
|
|
131
|
+
}
|
|
132
|
+
// Optional fields
|
|
133
|
+
if ((0, utils_1.isDefined)(file.title) && !(0, utils_1.isNonEmptyString)(file.title)) {
|
|
134
|
+
throw new Error(`customLLMFiles[${index}].title must be a non-empty string`);
|
|
135
|
+
}
|
|
136
|
+
if ((0, utils_1.isDefined)(file.description) && !(0, utils_1.isNonEmptyString)(file.description)) {
|
|
137
|
+
throw new Error(`customLLMFiles[${index}].description must be a non-empty string`);
|
|
138
|
+
}
|
|
139
|
+
if (file.ignorePatterns !== undefined) {
|
|
140
|
+
if (!Array.isArray(file.ignorePatterns)) {
|
|
141
|
+
throw new Error(`customLLMFiles[${index}].ignorePatterns must be an array`);
|
|
142
|
+
}
|
|
143
|
+
if (!file.ignorePatterns.every(item => typeof item === 'string')) {
|
|
144
|
+
throw new Error(`customLLMFiles[${index}].ignorePatterns must contain only strings`);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
if (file.orderPatterns !== undefined) {
|
|
148
|
+
if (!Array.isArray(file.orderPatterns)) {
|
|
149
|
+
throw new Error(`customLLMFiles[${index}].orderPatterns must be an array`);
|
|
150
|
+
}
|
|
151
|
+
if (!file.orderPatterns.every(item => typeof item === 'string')) {
|
|
152
|
+
throw new Error(`customLLMFiles[${index}].orderPatterns must contain only strings`);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
if (file.includeUnmatchedLast !== undefined && typeof file.includeUnmatchedLast !== 'boolean') {
|
|
156
|
+
throw new Error(`customLLMFiles[${index}].includeUnmatchedLast must be a boolean`);
|
|
157
|
+
}
|
|
158
|
+
if ((0, utils_1.isDefined)(file.version) && !(0, utils_1.isNonEmptyString)(file.version)) {
|
|
159
|
+
throw new Error(`customLLMFiles[${index}].version must be a non-empty string`);
|
|
160
|
+
}
|
|
161
|
+
if ((0, utils_1.isDefined)(file.rootContent) && !(0, utils_1.isNonEmptyString)(file.rootContent)) {
|
|
162
|
+
throw new Error(`customLLMFiles[${index}].rootContent must be a non-empty string`);
|
|
163
|
+
}
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
}
|
|
14
167
|
/**
|
|
15
168
|
* A Docusaurus plugin to generate LLM-friendly documentation following
|
|
16
169
|
* the llmstxt.org standard
|
|
@@ -20,13 +173,24 @@ const generator_1 = require("./generator");
|
|
|
20
173
|
* @returns Plugin object
|
|
21
174
|
*/
|
|
22
175
|
function docusaurusPluginLLMs(context, options = {}) {
|
|
176
|
+
// Validate options before processing
|
|
177
|
+
validatePluginOptions(options);
|
|
23
178
|
// Set default options
|
|
24
|
-
const { generateLLMsTxt = true, generateLLMsFullTxt = true, docsDir = 'docs', ignoreFiles = [], title, description, llmsTxtFilename = 'llms.txt', llmsFullTxtFilename = 'llms-full.txt', includeBlog = false, pathTransformation, includeOrder = [], includeUnmatchedLast = true, customLLMFiles = [], excludeImports = false, removeDuplicateHeadings = false, generateMarkdownFiles = false, keepFrontMatter = [], rootContent, fullRootContent, } = options;
|
|
179
|
+
const { generateLLMsTxt = true, generateLLMsFullTxt = true, docsDir = 'docs', ignoreFiles = [], title, description, llmsTxtFilename = 'llms.txt', llmsFullTxtFilename = 'llms-full.txt', includeBlog = false, pathTransformation, includeOrder = [], includeUnmatchedLast = true, customLLMFiles = [], excludeImports = false, removeDuplicateHeadings = false, generateMarkdownFiles = false, keepFrontMatter = [], rootContent, fullRootContent, logLevel = 'normal', } = options;
|
|
180
|
+
// Initialize logging level
|
|
181
|
+
const logLevelMap = {
|
|
182
|
+
quiet: utils_1.LogLevel.QUIET,
|
|
183
|
+
normal: utils_1.LogLevel.NORMAL,
|
|
184
|
+
verbose: utils_1.LogLevel.VERBOSE,
|
|
185
|
+
};
|
|
186
|
+
(0, utils_1.setLogLevel)(logLevelMap[logLevel] || utils_1.LogLevel.NORMAL);
|
|
25
187
|
const { siteDir, siteConfig, outDir, } = context;
|
|
26
|
-
//
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
188
|
+
// Normalize baseUrl: remove trailing slash unless it's root '/'
|
|
189
|
+
let normalizedBaseUrl = siteConfig.baseUrl || '/';
|
|
190
|
+
if (normalizedBaseUrl !== '/' && normalizedBaseUrl.endsWith('/')) {
|
|
191
|
+
normalizedBaseUrl = normalizedBaseUrl.slice(0, -1);
|
|
192
|
+
}
|
|
193
|
+
const siteUrl = siteConfig.url + normalizedBaseUrl;
|
|
30
194
|
// Create a plugin context object with processed options
|
|
31
195
|
const pluginContext = {
|
|
32
196
|
siteDir,
|
|
@@ -63,7 +227,7 @@ function docusaurusPluginLLMs(context, options = {}) {
|
|
|
63
227
|
* Generates LLM-friendly documentation files after the build is complete
|
|
64
228
|
*/
|
|
65
229
|
async postBuild(props) {
|
|
66
|
-
|
|
230
|
+
utils_1.logger.info('Generating LLM-friendly documentation...');
|
|
67
231
|
try {
|
|
68
232
|
let enhancedContext = pluginContext;
|
|
69
233
|
// If props are provided (Docusaurus 3.x+), use the resolved routes
|
|
@@ -96,8 +260,8 @@ function docusaurusPluginLLMs(context, options = {}) {
|
|
|
96
260
|
// Collect all document files
|
|
97
261
|
const allDocFiles = await (0, generator_1.collectDocFiles)(enhancedContext);
|
|
98
262
|
// Skip further processing if no documents were found
|
|
99
|
-
if (
|
|
100
|
-
|
|
263
|
+
if (!(0, utils_1.isNonEmptyArray)(allDocFiles)) {
|
|
264
|
+
utils_1.logger.warn('No documents found to process. Skipping.');
|
|
101
265
|
return;
|
|
102
266
|
}
|
|
103
267
|
// Process standard LLM files (llms.txt and llms-full.txt)
|
|
@@ -105,10 +269,10 @@ function docusaurusPluginLLMs(context, options = {}) {
|
|
|
105
269
|
// Process custom LLM files
|
|
106
270
|
await (0, generator_1.generateCustomLLMFiles)(enhancedContext, allDocFiles);
|
|
107
271
|
// Output overall statistics
|
|
108
|
-
|
|
272
|
+
utils_1.logger.info(`Stats: ${allDocFiles.length} total available documents processed`);
|
|
109
273
|
}
|
|
110
274
|
catch (err) {
|
|
111
|
-
|
|
275
|
+
utils_1.logger.error(`Error generating LLM documentation: ${(0, utils_1.getErrorMessage)(err)}`);
|
|
112
276
|
}
|
|
113
277
|
},
|
|
114
278
|
};
|