docusaurus-plugin-llms 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +246 -16
- package/lib/generator-current.d.ts +44 -0
- package/lib/generator-current.js +398 -0
- package/lib/generator.d.ts +4 -2
- package/lib/generator.js +163 -71
- package/lib/index.js +174 -10
- package/lib/null-handling-guide.d.ts +47 -0
- package/lib/null-handling-guide.js +290 -0
- package/lib/processor.d.ts +0 -10
- package/lib/processor.js +217 -80
- package/lib/types.d.ts +10 -0
- package/lib/utils.d.ts +141 -7
- package/lib/utils.js +429 -34
- package/package.json +2 -2
- package/src/generator.ts +206 -86
- package/src/index.ts +202 -14
- package/src/null-handling-guide.ts +321 -0
- package/src/processor.ts +303 -126
- package/src/types.ts +15 -0
- package/src/utils.ts +530 -59
package/src/generator.ts
CHANGED
|
@@ -5,12 +5,20 @@
|
|
|
5
5
|
import * as path from 'path';
|
|
6
6
|
import * as fs from 'fs/promises';
|
|
7
7
|
import { DocInfo, PluginContext, CustomLLMFile } from './types';
|
|
8
|
-
import {
|
|
9
|
-
writeFile,
|
|
10
|
-
readMarkdownFiles,
|
|
11
|
-
sanitizeForFilename,
|
|
12
|
-
ensureUniqueIdentifier,
|
|
13
|
-
createMarkdownContent
|
|
8
|
+
import {
|
|
9
|
+
writeFile,
|
|
10
|
+
readMarkdownFiles,
|
|
11
|
+
sanitizeForFilename,
|
|
12
|
+
ensureUniqueIdentifier,
|
|
13
|
+
createMarkdownContent,
|
|
14
|
+
normalizePath,
|
|
15
|
+
validatePathLength,
|
|
16
|
+
shortenPathIfNeeded,
|
|
17
|
+
logger,
|
|
18
|
+
getErrorMessage,
|
|
19
|
+
isNonEmptyString,
|
|
20
|
+
isNonEmptyArray,
|
|
21
|
+
isDefined
|
|
14
22
|
} from './utils';
|
|
15
23
|
import { processFilesWithPatterns } from './processor';
|
|
16
24
|
|
|
@@ -20,11 +28,12 @@ import { processFilesWithPatterns } from './processor';
|
|
|
20
28
|
* @returns Cleaned description suitable for TOC
|
|
21
29
|
*/
|
|
22
30
|
function cleanDescriptionForToc(description: string): string {
|
|
23
|
-
if (!description) return '';
|
|
24
|
-
|
|
31
|
+
if (!isNonEmptyString(description)) return '';
|
|
32
|
+
|
|
25
33
|
// Get just the first line for TOC display
|
|
26
|
-
const
|
|
27
|
-
|
|
34
|
+
const lines = description.split('\n');
|
|
35
|
+
const firstLine = lines.length > 0 ? lines[0] : '';
|
|
36
|
+
|
|
28
37
|
// Remove heading markers only at the beginning of the line
|
|
29
38
|
// Be careful to only remove actual heading markers (# followed by space at beginning)
|
|
30
39
|
// and not hashtag symbols that are part of the content (inline hashtags)
|
|
@@ -43,6 +52,7 @@ function cleanDescriptionForToc(description: string): string {
|
|
|
43
52
|
* @param includeFullContent - Whether to include full content or just links
|
|
44
53
|
* @param version - Version of the file
|
|
45
54
|
* @param customRootContent - Optional custom content to include at the root level
|
|
55
|
+
* @param batchSize - Batch size for processing documents (default: 100)
|
|
46
56
|
*/
|
|
47
57
|
export async function generateLLMFile(
|
|
48
58
|
docs: DocInfo[],
|
|
@@ -51,19 +61,39 @@ export async function generateLLMFile(
|
|
|
51
61
|
fileDescription: string,
|
|
52
62
|
includeFullContent: boolean,
|
|
53
63
|
version?: string,
|
|
54
|
-
customRootContent?: string
|
|
64
|
+
customRootContent?: string,
|
|
65
|
+
batchSize: number = 100
|
|
55
66
|
): Promise<void> {
|
|
56
|
-
|
|
67
|
+
// Validate path length before proceeding
|
|
68
|
+
if (!validatePathLength(outputPath)) {
|
|
69
|
+
throw new Error(`Output path exceeds maximum length: ${outputPath}`);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
logger.verbose(`Generating file: ${outputPath}, version: ${version || 'undefined'}`);
|
|
57
73
|
const versionInfo = version ? `\n\nVersion: ${version}` : '';
|
|
58
74
|
|
|
59
75
|
if (includeFullContent) {
|
|
60
76
|
// Generate full content file with header deduplication
|
|
77
|
+
// Process documents in batches to prevent memory issues on large sites
|
|
61
78
|
const usedHeaders = new Set<string>();
|
|
62
|
-
const fullContentSections =
|
|
79
|
+
const fullContentSections: string[] = [];
|
|
80
|
+
|
|
81
|
+
// Process documents in batches
|
|
82
|
+
for (let i = 0; i < docs.length; i += batchSize) {
|
|
83
|
+
const batch = docs.slice(i, i + batchSize);
|
|
84
|
+
const batchNumber = Math.floor(i / batchSize) + 1;
|
|
85
|
+
const totalBatches = Math.ceil(docs.length / batchSize);
|
|
86
|
+
|
|
87
|
+
if (totalBatches > 1) {
|
|
88
|
+
logger.verbose(`Processing batch ${batchNumber}/${totalBatches} (${batch.length} documents)`);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const batchSections = batch.map(doc => {
|
|
63
92
|
// Check if content already starts with the same heading to avoid duplication
|
|
64
93
|
const trimmedContent = doc.content.trim();
|
|
65
|
-
const
|
|
66
|
-
|
|
94
|
+
const contentLines = trimmedContent.split('\n');
|
|
95
|
+
const firstLine = contentLines.length > 0 ? contentLines[0] : '';
|
|
96
|
+
|
|
67
97
|
// Check if the first line is a heading that matches our title
|
|
68
98
|
const headingMatch = firstLine.match(/^#+\s+(.+)$/);
|
|
69
99
|
const firstHeadingText = headingMatch ? headingMatch[1].trim() : null;
|
|
@@ -74,10 +104,10 @@ export async function generateLLMFile(
|
|
|
74
104
|
usedHeaders,
|
|
75
105
|
(counter, base) => {
|
|
76
106
|
// Try to make it more descriptive by adding the file path info if available
|
|
77
|
-
if (doc.path && counter === 2) {
|
|
107
|
+
if (isNonEmptyString(doc.path) && counter === 2) {
|
|
78
108
|
const pathParts = doc.path.split('/');
|
|
79
|
-
const folderName = pathParts.length
|
|
80
|
-
if (folderName) {
|
|
109
|
+
const folderName = pathParts.length >= 2 ? pathParts[pathParts.length - 2] : '';
|
|
110
|
+
if (isNonEmptyString(folderName)) {
|
|
81
111
|
return `(${folderName.charAt(0).toUpperCase() + folderName.slice(1)})`;
|
|
82
112
|
}
|
|
83
113
|
}
|
|
@@ -86,19 +116,11 @@ export async function generateLLMFile(
|
|
|
86
116
|
);
|
|
87
117
|
|
|
88
118
|
if (firstHeadingText === doc.title) {
|
|
89
|
-
// Content already has the same heading, replace it with our unique header
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
return `## ${uniqueHeader}
|
|
93
|
-
|
|
94
|
-
${restOfContent}`;
|
|
95
|
-
} else {
|
|
96
|
-
// Replace the existing H1 with H2 to comply with llmstxt.org standard
|
|
97
|
-
const restOfContent = trimmedContent.split('\n').slice(1).join('\n');
|
|
98
|
-
return `## ${uniqueHeader}
|
|
119
|
+
// Content already has the same heading, replace it with our unique header
|
|
120
|
+
const restOfContent = trimmedContent.split('\n').slice(1).join('\n');
|
|
121
|
+
return `## ${uniqueHeader}
|
|
99
122
|
|
|
100
123
|
${restOfContent}`;
|
|
101
|
-
}
|
|
102
124
|
} else {
|
|
103
125
|
// Content doesn't have the same heading, add our unique H2 header
|
|
104
126
|
return `## ${uniqueHeader}
|
|
@@ -107,6 +129,9 @@ ${doc.content}`;
|
|
|
107
129
|
}
|
|
108
130
|
});
|
|
109
131
|
|
|
132
|
+
fullContentSections.push(...batchSections);
|
|
133
|
+
}
|
|
134
|
+
|
|
110
135
|
// Use custom root content or default message
|
|
111
136
|
const rootContent = customRootContent || 'This file contains all documentation content in a single document following the llmstxt.org standard.';
|
|
112
137
|
|
|
@@ -117,7 +142,11 @@ ${doc.content}`;
|
|
|
117
142
|
true // include metadata (description)
|
|
118
143
|
);
|
|
119
144
|
|
|
120
|
-
|
|
145
|
+
try {
|
|
146
|
+
await writeFile(outputPath, llmFileContent);
|
|
147
|
+
} catch (error: unknown) {
|
|
148
|
+
throw new Error(`Failed to write file ${outputPath}: ${getErrorMessage(error)}`);
|
|
149
|
+
}
|
|
121
150
|
} else {
|
|
122
151
|
// Generate links-only file
|
|
123
152
|
const tocItems = docs.map(doc => {
|
|
@@ -137,19 +166,24 @@ ${doc.content}`;
|
|
|
137
166
|
true // include metadata (description)
|
|
138
167
|
);
|
|
139
168
|
|
|
140
|
-
|
|
169
|
+
try {
|
|
170
|
+
await writeFile(outputPath, llmFileContent);
|
|
171
|
+
} catch (error: unknown) {
|
|
172
|
+
throw new Error(`Failed to write file ${outputPath}: ${getErrorMessage(error)}`);
|
|
173
|
+
}
|
|
141
174
|
}
|
|
142
|
-
|
|
143
|
-
|
|
175
|
+
|
|
176
|
+
logger.info(`Generated: ${outputPath}`);
|
|
144
177
|
}
|
|
145
178
|
|
|
146
179
|
/**
|
|
147
180
|
* Generate individual markdown files for each document
|
|
148
|
-
* @param docs - Processed document information
|
|
181
|
+
* @param docs - Processed document information
|
|
149
182
|
* @param outputDir - Directory to write the markdown files
|
|
150
183
|
* @param siteUrl - Base site URL
|
|
151
184
|
* @param docsDir - The configured docs directory name (e.g., 'docs', 'documentation', etc.)
|
|
152
185
|
* @param keepFrontMatter - Array of frontmatter keys to preserve in generated files
|
|
186
|
+
* @param preserveDirectoryStructure - Whether to preserve the full directory structure (default: true)
|
|
153
187
|
* @returns Updated docs with new URLs pointing to generated markdown files
|
|
154
188
|
*/
|
|
155
189
|
export async function generateIndividualMarkdownFiles(
|
|
@@ -157,24 +191,64 @@ export async function generateIndividualMarkdownFiles(
|
|
|
157
191
|
outputDir: string,
|
|
158
192
|
siteUrl: string,
|
|
159
193
|
docsDir: string = 'docs',
|
|
160
|
-
keepFrontMatter: string[] = []
|
|
194
|
+
keepFrontMatter: string[] = [],
|
|
195
|
+
preserveDirectoryStructure: boolean = true
|
|
161
196
|
): Promise<DocInfo[]> {
|
|
162
197
|
const updatedDocs: DocInfo[] = [];
|
|
163
198
|
const usedPaths = new Set<string>();
|
|
164
|
-
|
|
165
|
-
|
|
199
|
+
|
|
200
|
+
|
|
166
201
|
for (const doc of docs) {
|
|
167
|
-
// Use the original path structure
|
|
202
|
+
// Use the original path structure as default filename.
|
|
168
203
|
let relativePath = doc.path
|
|
169
204
|
.replace(/^\/+/, '') // Remove leading slashes
|
|
170
205
|
.replace(/\.mdx?$/, '.md'); // Ensure .md extension
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
// Strip the docsDir prefix only if preserveDirectoryStructure is false
|
|
209
|
+
if (!preserveDirectoryStructure) {
|
|
210
|
+
relativePath = relativePath
|
|
211
|
+
.replace(new RegExp(`^${docsDir.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}/`), '');// Remove configured docs dir prefix
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// If frontmatter has slug, use that.
|
|
215
|
+
if (isNonEmptyString(doc.frontMatter?.slug)) {
|
|
216
|
+
const slug = doc.frontMatter.slug.trim().replace(/^\/+|\/+$/g, ''); // Trim whitespace and slashes
|
|
217
|
+
|
|
218
|
+
if (isNonEmptyString(slug)) { // Only process if slug is not empty after trimming
|
|
219
|
+
if (slug.includes('/')) {
|
|
220
|
+
// Nested slug: create directory structure
|
|
221
|
+
relativePath = slug + '.md';
|
|
222
|
+
} else {
|
|
223
|
+
// Simple slug: replace just the filename
|
|
224
|
+
const pathParts = relativePath.replace(/\.md$/, '').split('/');
|
|
225
|
+
pathParts[pathParts.length - 1] = slug;
|
|
226
|
+
relativePath = pathParts.join('/') + '.md';
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
// Otherwise, if frontmatter has id, use that.
|
|
231
|
+
else if (isNonEmptyString(doc.frontMatter?.id)) {
|
|
232
|
+
const id = doc.frontMatter.id.trim().replace(/^\/+|\/+$/g, ''); // Trim whitespace and slashes
|
|
233
|
+
|
|
234
|
+
if (isNonEmptyString(id)) { // Only process if id is not empty after trimming
|
|
235
|
+
if (id.includes('/')) {
|
|
236
|
+
// Nested id: create directory structure
|
|
237
|
+
relativePath = id + '.md';
|
|
238
|
+
} else {
|
|
239
|
+
// Simple id: replace just the filename
|
|
240
|
+
const pathParts = relativePath.replace(/\.md$/, '').split('/');
|
|
241
|
+
pathParts[pathParts.length - 1] = id;
|
|
242
|
+
relativePath = pathParts.join('/') + '.md';
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// Trim any leading/trailing whitespace from the path
|
|
248
|
+
relativePath = relativePath.trim();
|
|
249
|
+
|
|
176
250
|
// If path is empty or invalid, create a fallback path
|
|
177
|
-
if (!relativePath || relativePath === '.md') {
|
|
251
|
+
if (!isNonEmptyString(relativePath) || relativePath === '.md') {
|
|
178
252
|
const sanitizedTitle = sanitizeForFilename(doc.title, 'untitled');
|
|
179
253
|
relativePath = `${sanitizedTitle}.md`;
|
|
180
254
|
}
|
|
@@ -182,25 +256,48 @@ export async function generateIndividualMarkdownFiles(
|
|
|
182
256
|
// Ensure path uniqueness
|
|
183
257
|
let uniquePath = relativePath;
|
|
184
258
|
let counter = 1;
|
|
259
|
+
const MAX_PATH_ITERATIONS = 10000;
|
|
260
|
+
let pathIterations = 0;
|
|
261
|
+
|
|
185
262
|
while (usedPaths.has(uniquePath.toLowerCase())) {
|
|
186
263
|
counter++;
|
|
187
264
|
const pathParts = relativePath.split('.');
|
|
188
265
|
const extension = pathParts.pop() || 'md';
|
|
189
266
|
const basePath = pathParts.join('.');
|
|
190
267
|
uniquePath = `${basePath}-${counter}.${extension}`;
|
|
268
|
+
|
|
269
|
+
pathIterations++;
|
|
270
|
+
if (pathIterations >= MAX_PATH_ITERATIONS) {
|
|
271
|
+
// Fallback to timestamp
|
|
272
|
+
const timestamp = Date.now();
|
|
273
|
+
uniquePath = `${basePath}-${timestamp}.${extension}`;
|
|
274
|
+
logger.warn(`Maximum iterations reached for unique path. Using timestamp: ${uniquePath}`);
|
|
275
|
+
break;
|
|
276
|
+
}
|
|
191
277
|
}
|
|
192
278
|
usedPaths.add(uniquePath.toLowerCase());
|
|
193
|
-
|
|
194
|
-
// Create the full file path and
|
|
195
|
-
|
|
279
|
+
|
|
280
|
+
// Create the full file path and validate/shorten if needed
|
|
281
|
+
let fullPath = path.join(outputDir, uniquePath);
|
|
282
|
+
fullPath = shortenPathIfNeeded(fullPath, outputDir, uniquePath);
|
|
283
|
+
|
|
284
|
+
// Update uniquePath to reflect the shortened path if it was changed
|
|
285
|
+
if (fullPath !== path.join(outputDir, uniquePath)) {
|
|
286
|
+
uniquePath = path.relative(outputDir, fullPath);
|
|
287
|
+
}
|
|
288
|
+
|
|
196
289
|
const directory = path.dirname(fullPath);
|
|
197
|
-
|
|
290
|
+
|
|
198
291
|
// Create directory structure if it doesn't exist
|
|
199
|
-
|
|
292
|
+
try {
|
|
293
|
+
await fs.mkdir(directory, { recursive: true });
|
|
294
|
+
} catch (error: unknown) {
|
|
295
|
+
throw new Error(`Failed to create directory ${directory}: ${getErrorMessage(error)}`);
|
|
296
|
+
}
|
|
200
297
|
|
|
201
298
|
// Extract preserved frontmatter if specified
|
|
202
299
|
let preservedFrontMatter: Record<string, any> = {};
|
|
203
|
-
if (keepFrontMatter
|
|
300
|
+
if (isNonEmptyArray(keepFrontMatter) && isDefined(doc.frontMatter)) {
|
|
204
301
|
for (const key of keepFrontMatter) {
|
|
205
302
|
if (key in doc.frontMatter) {
|
|
206
303
|
preservedFrontMatter[key] = doc.frontMatter[key];
|
|
@@ -210,19 +307,23 @@ export async function generateIndividualMarkdownFiles(
|
|
|
210
307
|
|
|
211
308
|
// Create markdown content using the utility function
|
|
212
309
|
const markdownContent = createMarkdownContent(
|
|
213
|
-
doc.title,
|
|
214
|
-
doc.description,
|
|
215
|
-
doc.content,
|
|
310
|
+
doc.title,
|
|
311
|
+
doc.description,
|
|
312
|
+
doc.content,
|
|
216
313
|
true, // includeMetadata
|
|
217
314
|
Object.keys(preservedFrontMatter).length > 0 ? preservedFrontMatter : undefined
|
|
218
315
|
);
|
|
219
|
-
|
|
316
|
+
|
|
220
317
|
// Write the markdown file
|
|
221
|
-
|
|
318
|
+
try {
|
|
319
|
+
await writeFile(fullPath, markdownContent);
|
|
320
|
+
} catch (error: unknown) {
|
|
321
|
+
throw new Error(`Failed to write file ${fullPath}: ${getErrorMessage(error)}`);
|
|
322
|
+
}
|
|
222
323
|
|
|
223
324
|
// Create updated DocInfo with new URL pointing to the generated markdown file
|
|
224
325
|
// Convert file path to URL path (use forward slashes)
|
|
225
|
-
const urlPath = uniquePath
|
|
326
|
+
const urlPath = normalizePath(uniquePath);
|
|
226
327
|
const newUrl = `${siteUrl}/${urlPath}`;
|
|
227
328
|
|
|
228
329
|
updatedDocs.push({
|
|
@@ -231,7 +332,7 @@ export async function generateIndividualMarkdownFiles(
|
|
|
231
332
|
path: `/${urlPath}` // Update path to the new markdown file
|
|
232
333
|
});
|
|
233
334
|
|
|
234
|
-
|
|
335
|
+
logger.verbose(`Generated markdown file: ${uniquePath}`);
|
|
235
336
|
}
|
|
236
337
|
|
|
237
338
|
return updatedDocs;
|
|
@@ -254,8 +355,8 @@ export async function generateStandardLLMFiles(
|
|
|
254
355
|
options
|
|
255
356
|
} = context;
|
|
256
357
|
|
|
257
|
-
const {
|
|
258
|
-
generateLLMsTxt,
|
|
358
|
+
const {
|
|
359
|
+
generateLLMsTxt,
|
|
259
360
|
generateLLMsFullTxt,
|
|
260
361
|
llmsTxtFilename = 'llms.txt',
|
|
261
362
|
llmsFullTxtFilename = 'llms-full.txt',
|
|
@@ -264,10 +365,12 @@ export async function generateStandardLLMFiles(
|
|
|
264
365
|
version,
|
|
265
366
|
generateMarkdownFiles = false,
|
|
266
367
|
rootContent,
|
|
267
|
-
fullRootContent
|
|
368
|
+
fullRootContent,
|
|
369
|
+
processingBatchSize = 100
|
|
268
370
|
} = options;
|
|
269
371
|
|
|
270
372
|
if (!generateLLMsTxt && !generateLLMsFullTxt) {
|
|
373
|
+
logger.warn('No standard LLM files configured for generation. Skipping.');
|
|
271
374
|
return;
|
|
272
375
|
}
|
|
273
376
|
|
|
@@ -281,17 +384,24 @@ export async function generateStandardLLMFiles(
|
|
|
281
384
|
includeUnmatchedLast
|
|
282
385
|
);
|
|
283
386
|
|
|
284
|
-
|
|
285
|
-
|
|
387
|
+
logger.verbose(`Processed ${processedDocs.length} documentation files for standard LLM files`);
|
|
388
|
+
|
|
389
|
+
// Check if we have documents to process
|
|
390
|
+
if (!isNonEmptyArray(processedDocs)) {
|
|
391
|
+
logger.warn('No documents found matching patterns for standard LLM files. Skipping.');
|
|
392
|
+
return;
|
|
393
|
+
}
|
|
394
|
+
|
|
286
395
|
// Generate individual markdown files if requested
|
|
287
|
-
if (generateMarkdownFiles
|
|
288
|
-
|
|
396
|
+
if (generateMarkdownFiles) {
|
|
397
|
+
logger.info('Generating individual markdown files...');
|
|
289
398
|
processedDocs = await generateIndividualMarkdownFiles(
|
|
290
399
|
processedDocs,
|
|
291
400
|
outDir,
|
|
292
401
|
siteUrl,
|
|
293
402
|
context.docsDir,
|
|
294
|
-
context.options.keepFrontMatter || []
|
|
403
|
+
context.options.keepFrontMatter || [],
|
|
404
|
+
context.options.preserveDirectoryStructure !== false // Default to true
|
|
295
405
|
);
|
|
296
406
|
}
|
|
297
407
|
|
|
@@ -305,7 +415,8 @@ export async function generateStandardLLMFiles(
|
|
|
305
415
|
docDescription,
|
|
306
416
|
false, // links only
|
|
307
417
|
version,
|
|
308
|
-
rootContent
|
|
418
|
+
rootContent,
|
|
419
|
+
processingBatchSize
|
|
309
420
|
);
|
|
310
421
|
}
|
|
311
422
|
|
|
@@ -319,7 +430,8 @@ export async function generateStandardLLMFiles(
|
|
|
319
430
|
docDescription,
|
|
320
431
|
true, // full content
|
|
321
432
|
version,
|
|
322
|
-
fullRootContent
|
|
433
|
+
fullRootContent,
|
|
434
|
+
processingBatchSize
|
|
323
435
|
);
|
|
324
436
|
}
|
|
325
437
|
}
|
|
@@ -334,16 +446,22 @@ export async function generateCustomLLMFiles(
|
|
|
334
446
|
allDocFiles: string[]
|
|
335
447
|
): Promise<void> {
|
|
336
448
|
const { outDir, siteUrl, docTitle, docDescription, options } = context;
|
|
337
|
-
const {
|
|
449
|
+
const {
|
|
450
|
+
customLLMFiles = [],
|
|
451
|
+
ignoreFiles = [],
|
|
452
|
+
generateMarkdownFiles = false,
|
|
453
|
+
processingBatchSize = 100
|
|
454
|
+
} = options;
|
|
338
455
|
|
|
339
456
|
if (customLLMFiles.length === 0) {
|
|
457
|
+
logger.warn('No custom LLM files configured. Skipping.');
|
|
340
458
|
return;
|
|
341
459
|
}
|
|
342
460
|
|
|
343
|
-
|
|
461
|
+
logger.info(`Generating ${customLLMFiles.length} custom LLM files...`);
|
|
344
462
|
|
|
345
463
|
for (const customFile of customLLMFiles) {
|
|
346
|
-
|
|
464
|
+
logger.verbose(`Processing custom file: ${customFile.filename}, version: ${customFile.version || 'undefined'}`);
|
|
347
465
|
|
|
348
466
|
// Combine global ignores with custom ignores
|
|
349
467
|
const combinedIgnores = [...ignoreFiles];
|
|
@@ -364,13 +482,14 @@ export async function generateCustomLLMFiles(
|
|
|
364
482
|
if (customDocs.length > 0) {
|
|
365
483
|
// Generate individual markdown files if requested
|
|
366
484
|
if (generateMarkdownFiles) {
|
|
367
|
-
|
|
485
|
+
logger.info(`Generating individual markdown files for custom file: ${customFile.filename}...`);
|
|
368
486
|
customDocs = await generateIndividualMarkdownFiles(
|
|
369
487
|
customDocs,
|
|
370
488
|
outDir,
|
|
371
489
|
siteUrl,
|
|
372
490
|
context.docsDir,
|
|
373
|
-
context.options.keepFrontMatter || []
|
|
491
|
+
context.options.keepFrontMatter || [],
|
|
492
|
+
context.options.preserveDirectoryStructure !== false // Default to true
|
|
374
493
|
);
|
|
375
494
|
}
|
|
376
495
|
|
|
@@ -387,12 +506,13 @@ export async function generateCustomLLMFiles(
|
|
|
387
506
|
customDescription,
|
|
388
507
|
customFile.fullContent,
|
|
389
508
|
customFile.version,
|
|
390
|
-
customFile.rootContent
|
|
509
|
+
customFile.rootContent,
|
|
510
|
+
processingBatchSize
|
|
391
511
|
);
|
|
392
512
|
|
|
393
|
-
|
|
513
|
+
logger.info(`Generated custom LLM file: ${customFile.filename} with ${customDocs.length} documents`);
|
|
394
514
|
} else {
|
|
395
|
-
|
|
515
|
+
logger.warn(`No matching documents found for custom LLM file: ${customFile.filename}`);
|
|
396
516
|
}
|
|
397
517
|
}
|
|
398
518
|
}
|
|
@@ -404,7 +524,7 @@ export async function generateCustomLLMFiles(
|
|
|
404
524
|
*/
|
|
405
525
|
export async function collectDocFiles(context: PluginContext): Promise<string[]> {
|
|
406
526
|
const { siteDir, docsDir, options } = context;
|
|
407
|
-
const { ignoreFiles = [], includeBlog = false } = options;
|
|
527
|
+
const { ignoreFiles = [], includeBlog = false, warnOnIgnoredFiles = false } = options;
|
|
408
528
|
|
|
409
529
|
const allDocFiles: string[] = [];
|
|
410
530
|
|
|
@@ -413,13 +533,13 @@ export async function collectDocFiles(context: PluginContext): Promise<string[]>
|
|
|
413
533
|
|
|
414
534
|
try {
|
|
415
535
|
await fs.access(fullDocsDir);
|
|
416
|
-
|
|
536
|
+
|
|
417
537
|
// Collect all markdown files from docs directory
|
|
418
|
-
const docFiles = await readMarkdownFiles(fullDocsDir, siteDir, ignoreFiles);
|
|
538
|
+
const docFiles = await readMarkdownFiles(fullDocsDir, siteDir, ignoreFiles, docsDir, warnOnIgnoredFiles);
|
|
419
539
|
allDocFiles.push(...docFiles);
|
|
420
|
-
|
|
421
|
-
} catch (err) {
|
|
422
|
-
|
|
540
|
+
|
|
541
|
+
} catch (err: unknown) {
|
|
542
|
+
logger.warn(`Docs directory not found: ${fullDocsDir}`);
|
|
423
543
|
}
|
|
424
544
|
|
|
425
545
|
// Process blog if enabled
|
|
@@ -428,13 +548,13 @@ export async function collectDocFiles(context: PluginContext): Promise<string[]>
|
|
|
428
548
|
|
|
429
549
|
try {
|
|
430
550
|
await fs.access(blogDir);
|
|
431
|
-
|
|
551
|
+
|
|
432
552
|
// Collect all markdown files from blog directory
|
|
433
|
-
const blogFiles = await readMarkdownFiles(blogDir, siteDir, ignoreFiles);
|
|
553
|
+
const blogFiles = await readMarkdownFiles(blogDir, siteDir, ignoreFiles, docsDir, warnOnIgnoredFiles);
|
|
434
554
|
allDocFiles.push(...blogFiles);
|
|
435
|
-
|
|
436
|
-
} catch (err) {
|
|
437
|
-
|
|
555
|
+
|
|
556
|
+
} catch (err: unknown) {
|
|
557
|
+
logger.warn(`Blog directory not found: ${blogDir}`);
|
|
438
558
|
}
|
|
439
559
|
}
|
|
440
560
|
|