docusaurus-plugin-llms 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +246 -15
- package/lib/generator-current.d.ts +44 -0
- package/lib/generator-current.js +398 -0
- package/lib/generator.d.ts +6 -2
- package/lib/generator.js +200 -120
- package/lib/index.js +175 -10
- package/lib/null-handling-guide.d.ts +47 -0
- package/lib/null-handling-guide.js +290 -0
- package/lib/processor.d.ts +0 -10
- package/lib/processor.js +230 -83
- package/lib/types.d.ts +13 -0
- package/lib/utils.d.ts +165 -6
- package/lib/utils.js +481 -28
- package/package.json +5 -3
- package/src/generator.ts +270 -128
- package/src/index.ts +204 -14
- package/src/null-handling-guide.ts +321 -0
- package/src/processor.ts +314 -127
- package/src/types.ts +20 -1
- package/src/utils.ts +594 -48
package/src/generator.ts
CHANGED
|
@@ -5,7 +5,21 @@
|
|
|
5
5
|
import * as path from 'path';
|
|
6
6
|
import * as fs from 'fs/promises';
|
|
7
7
|
import { DocInfo, PluginContext, CustomLLMFile } from './types';
|
|
8
|
-
import {
|
|
8
|
+
import {
|
|
9
|
+
writeFile,
|
|
10
|
+
readMarkdownFiles,
|
|
11
|
+
sanitizeForFilename,
|
|
12
|
+
ensureUniqueIdentifier,
|
|
13
|
+
createMarkdownContent,
|
|
14
|
+
normalizePath,
|
|
15
|
+
validatePathLength,
|
|
16
|
+
shortenPathIfNeeded,
|
|
17
|
+
logger,
|
|
18
|
+
getErrorMessage,
|
|
19
|
+
isNonEmptyString,
|
|
20
|
+
isNonEmptyArray,
|
|
21
|
+
isDefined
|
|
22
|
+
} from './utils';
|
|
9
23
|
import { processFilesWithPatterns } from './processor';
|
|
10
24
|
|
|
11
25
|
/**
|
|
@@ -14,11 +28,12 @@ import { processFilesWithPatterns } from './processor';
|
|
|
14
28
|
* @returns Cleaned description suitable for TOC
|
|
15
29
|
*/
|
|
16
30
|
function cleanDescriptionForToc(description: string): string {
|
|
17
|
-
if (!description) return '';
|
|
18
|
-
|
|
31
|
+
if (!isNonEmptyString(description)) return '';
|
|
32
|
+
|
|
19
33
|
// Get just the first line for TOC display
|
|
20
|
-
const
|
|
21
|
-
|
|
34
|
+
const lines = description.split('\n');
|
|
35
|
+
const firstLine = lines.length > 0 ? lines[0] : '';
|
|
36
|
+
|
|
22
37
|
// Remove heading markers only at the beginning of the line
|
|
23
38
|
// Be careful to only remove actual heading markers (# followed by space at beginning)
|
|
24
39
|
// and not hashtag symbols that are part of the content (inline hashtags)
|
|
@@ -37,6 +52,7 @@ function cleanDescriptionForToc(description: string): string {
|
|
|
37
52
|
* @param includeFullContent - Whether to include full content or just links
|
|
38
53
|
* @param version - Version of the file
|
|
39
54
|
* @param customRootContent - Optional custom content to include at the root level
|
|
55
|
+
* @param batchSize - Batch size for processing documents (default: 100)
|
|
40
56
|
*/
|
|
41
57
|
export async function generateLLMFile(
|
|
42
58
|
docs: DocInfo[],
|
|
@@ -45,61 +61,66 @@ export async function generateLLMFile(
|
|
|
45
61
|
fileDescription: string,
|
|
46
62
|
includeFullContent: boolean,
|
|
47
63
|
version?: string,
|
|
48
|
-
customRootContent?: string
|
|
64
|
+
customRootContent?: string,
|
|
65
|
+
batchSize: number = 100
|
|
49
66
|
): Promise<void> {
|
|
50
|
-
|
|
67
|
+
// Validate path length before proceeding
|
|
68
|
+
if (!validatePathLength(outputPath)) {
|
|
69
|
+
throw new Error(`Output path exceeds maximum length: ${outputPath}`);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
logger.verbose(`Generating file: ${outputPath}, version: ${version || 'undefined'}`);
|
|
51
73
|
const versionInfo = version ? `\n\nVersion: ${version}` : '';
|
|
52
74
|
|
|
53
75
|
if (includeFullContent) {
|
|
54
76
|
// Generate full content file with header deduplication
|
|
77
|
+
// Process documents in batches to prevent memory issues on large sites
|
|
55
78
|
const usedHeaders = new Set<string>();
|
|
56
|
-
const fullContentSections =
|
|
79
|
+
const fullContentSections: string[] = [];
|
|
80
|
+
|
|
81
|
+
// Process documents in batches
|
|
82
|
+
for (let i = 0; i < docs.length; i += batchSize) {
|
|
83
|
+
const batch = docs.slice(i, i + batchSize);
|
|
84
|
+
const batchNumber = Math.floor(i / batchSize) + 1;
|
|
85
|
+
const totalBatches = Math.ceil(docs.length / batchSize);
|
|
86
|
+
|
|
87
|
+
if (totalBatches > 1) {
|
|
88
|
+
logger.verbose(`Processing batch ${batchNumber}/${totalBatches} (${batch.length} documents)`);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const batchSections = batch.map(doc => {
|
|
57
92
|
// Check if content already starts with the same heading to avoid duplication
|
|
58
93
|
const trimmedContent = doc.content.trim();
|
|
59
|
-
const
|
|
60
|
-
|
|
94
|
+
const contentLines = trimmedContent.split('\n');
|
|
95
|
+
const firstLine = contentLines.length > 0 ? contentLines[0] : '';
|
|
96
|
+
|
|
61
97
|
// Check if the first line is a heading that matches our title
|
|
62
98
|
const headingMatch = firstLine.match(/^#+\s+(.+)$/);
|
|
63
99
|
const firstHeadingText = headingMatch ? headingMatch[1].trim() : null;
|
|
64
100
|
|
|
65
|
-
//
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
if (folderName) {
|
|
78
|
-
uniqueHeader = `${headerText} (${folderName.charAt(0).toUpperCase() + folderName.slice(1)})`;
|
|
79
|
-
} else {
|
|
80
|
-
uniqueHeader = `${headerText} (${counter})`;
|
|
101
|
+
// Generate unique header using the utility function
|
|
102
|
+
const uniqueHeader = ensureUniqueIdentifier(
|
|
103
|
+
doc.title,
|
|
104
|
+
usedHeaders,
|
|
105
|
+
(counter, base) => {
|
|
106
|
+
// Try to make it more descriptive by adding the file path info if available
|
|
107
|
+
if (isNonEmptyString(doc.path) && counter === 2) {
|
|
108
|
+
const pathParts = doc.path.split('/');
|
|
109
|
+
const folderName = pathParts.length >= 2 ? pathParts[pathParts.length - 2] : '';
|
|
110
|
+
if (isNonEmptyString(folderName)) {
|
|
111
|
+
return `(${folderName.charAt(0).toUpperCase() + folderName.slice(1)})`;
|
|
112
|
+
}
|
|
81
113
|
}
|
|
82
|
-
|
|
83
|
-
uniqueHeader = `${headerText} (${counter})`;
|
|
114
|
+
return `(${counter})`;
|
|
84
115
|
}
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
usedHeaders.add(uniqueHeader.toLowerCase());
|
|
116
|
+
);
|
|
88
117
|
|
|
89
118
|
if (firstHeadingText === doc.title) {
|
|
90
|
-
// Content already has the same heading, replace it with our unique header
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
return `## ${uniqueHeader}
|
|
94
|
-
|
|
95
|
-
${restOfContent}`;
|
|
96
|
-
} else {
|
|
97
|
-
// Replace the existing H1 with H2 to comply with llmstxt.org standard
|
|
98
|
-
const restOfContent = trimmedContent.split('\n').slice(1).join('\n');
|
|
99
|
-
return `## ${uniqueHeader}
|
|
119
|
+
// Content already has the same heading, replace it with our unique header
|
|
120
|
+
const restOfContent = trimmedContent.split('\n').slice(1).join('\n');
|
|
121
|
+
return `## ${uniqueHeader}
|
|
100
122
|
|
|
101
123
|
${restOfContent}`;
|
|
102
|
-
}
|
|
103
124
|
} else {
|
|
104
125
|
// Content doesn't have the same heading, add our unique H2 header
|
|
105
126
|
return `## ${uniqueHeader}
|
|
@@ -108,19 +129,24 @@ ${doc.content}`;
|
|
|
108
129
|
}
|
|
109
130
|
});
|
|
110
131
|
|
|
132
|
+
fullContentSections.push(...batchSections);
|
|
133
|
+
}
|
|
134
|
+
|
|
111
135
|
// Use custom root content or default message
|
|
112
136
|
const rootContent = customRootContent || 'This file contains all documentation content in a single document following the llmstxt.org standard.';
|
|
113
137
|
|
|
114
|
-
const llmFileContent =
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
${fullContentSections.join('\n\n---\n\n')}
|
|
121
|
-
`;
|
|
138
|
+
const llmFileContent = createMarkdownContent(
|
|
139
|
+
fileTitle,
|
|
140
|
+
`${fileDescription}${versionInfo}`,
|
|
141
|
+
`${rootContent}\n\n${fullContentSections.join('\n\n---\n\n')}`,
|
|
142
|
+
true // include metadata (description)
|
|
143
|
+
);
|
|
122
144
|
|
|
123
|
-
|
|
145
|
+
try {
|
|
146
|
+
await writeFile(outputPath, llmFileContent);
|
|
147
|
+
} catch (error: unknown) {
|
|
148
|
+
throw new Error(`Failed to write file ${outputPath}: ${getErrorMessage(error)}`);
|
|
149
|
+
}
|
|
124
150
|
} else {
|
|
125
151
|
// Generate links-only file
|
|
126
152
|
const tocItems = docs.map(doc => {
|
|
@@ -133,87 +159,180 @@ ${fullContentSections.join('\n\n---\n\n')}
|
|
|
133
159
|
// Use custom root content or default message
|
|
134
160
|
const rootContent = customRootContent || 'This file contains links to documentation sections following the llmstxt.org standard.';
|
|
135
161
|
|
|
136
|
-
const llmFileContent =
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
## Table of Contents
|
|
143
|
-
|
|
144
|
-
${tocItems.join('\n')}
|
|
145
|
-
`;
|
|
162
|
+
const llmFileContent = createMarkdownContent(
|
|
163
|
+
fileTitle,
|
|
164
|
+
`${fileDescription}${versionInfo}`,
|
|
165
|
+
`${rootContent}\n\n## Table of Contents\n\n${tocItems.join('\n')}`,
|
|
166
|
+
true // include metadata (description)
|
|
167
|
+
);
|
|
146
168
|
|
|
147
|
-
|
|
169
|
+
try {
|
|
170
|
+
await writeFile(outputPath, llmFileContent);
|
|
171
|
+
} catch (error: unknown) {
|
|
172
|
+
throw new Error(`Failed to write file ${outputPath}: ${getErrorMessage(error)}`);
|
|
173
|
+
}
|
|
148
174
|
}
|
|
149
|
-
|
|
150
|
-
|
|
175
|
+
|
|
176
|
+
logger.info(`Generated: ${outputPath}`);
|
|
151
177
|
}
|
|
152
178
|
|
|
153
179
|
/**
|
|
154
180
|
* Generate individual markdown files for each document
|
|
155
|
-
* @param docs - Processed document information
|
|
181
|
+
* @param docs - Processed document information
|
|
156
182
|
* @param outputDir - Directory to write the markdown files
|
|
157
183
|
* @param siteUrl - Base site URL
|
|
184
|
+
* @param docsDir - The configured docs directory name (e.g., 'docs', 'documentation', etc.)
|
|
185
|
+
* @param keepFrontMatter - Array of frontmatter keys to preserve in generated files
|
|
186
|
+
* @param preserveDirectoryStructure - Whether to preserve the full directory structure (default: true)
|
|
158
187
|
* @returns Updated docs with new URLs pointing to generated markdown files
|
|
159
188
|
*/
|
|
160
189
|
export async function generateIndividualMarkdownFiles(
|
|
161
190
|
docs: DocInfo[],
|
|
162
191
|
outputDir: string,
|
|
163
|
-
siteUrl: string
|
|
192
|
+
siteUrl: string,
|
|
193
|
+
docsDir: string = 'docs',
|
|
194
|
+
keepFrontMatter: string[] = [],
|
|
195
|
+
preserveDirectoryStructure: boolean = true
|
|
164
196
|
): Promise<DocInfo[]> {
|
|
165
197
|
const updatedDocs: DocInfo[] = [];
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
198
|
+
const usedPaths = new Set<string>();
|
|
199
|
+
|
|
200
|
+
|
|
170
201
|
for (const doc of docs) {
|
|
171
|
-
//
|
|
172
|
-
let
|
|
173
|
-
.
|
|
174
|
-
.replace(
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
//
|
|
178
|
-
if (!
|
|
179
|
-
|
|
180
|
-
.replace(
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
202
|
+
// Use the original path structure as default filename.
|
|
203
|
+
let relativePath = doc.path
|
|
204
|
+
.replace(/^\/+/, '') // Remove leading slashes
|
|
205
|
+
.replace(/\.mdx?$/, '.md'); // Ensure .md extension
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
// Strip the docsDir prefix only if preserveDirectoryStructure is false
|
|
209
|
+
if (!preserveDirectoryStructure) {
|
|
210
|
+
relativePath = relativePath
|
|
211
|
+
.replace(new RegExp(`^${docsDir.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}/`), '');// Remove configured docs dir prefix
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// If frontmatter has slug, use that.
|
|
215
|
+
if (isNonEmptyString(doc.frontMatter?.slug)) {
|
|
216
|
+
const slug = doc.frontMatter.slug.trim().replace(/^\/+|\/+$/g, ''); // Trim whitespace and slashes
|
|
217
|
+
|
|
218
|
+
if (isNonEmptyString(slug)) { // Only process if slug is not empty after trimming
|
|
219
|
+
if (slug.includes('/')) {
|
|
220
|
+
// Nested slug: create directory structure
|
|
221
|
+
relativePath = slug + '.md';
|
|
222
|
+
} else {
|
|
223
|
+
// Simple slug: replace just the filename
|
|
224
|
+
const pathParts = relativePath.replace(/\.md$/, '').split('/');
|
|
225
|
+
pathParts[pathParts.length - 1] = slug;
|
|
226
|
+
relativePath = pathParts.join('/') + '.md';
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
// Otherwise, if frontmatter has id, use that.
|
|
231
|
+
else if (isNonEmptyString(doc.frontMatter?.id)) {
|
|
232
|
+
const id = doc.frontMatter.id.trim().replace(/^\/+|\/+$/g, ''); // Trim whitespace and slashes
|
|
233
|
+
|
|
234
|
+
if (isNonEmptyString(id)) { // Only process if id is not empty after trimming
|
|
235
|
+
if (id.includes('/')) {
|
|
236
|
+
// Nested id: create directory structure
|
|
237
|
+
relativePath = id + '.md';
|
|
238
|
+
} else {
|
|
239
|
+
// Simple id: replace just the filename
|
|
240
|
+
const pathParts = relativePath.replace(/\.md$/, '').split('/');
|
|
241
|
+
pathParts[pathParts.length - 1] = id;
|
|
242
|
+
relativePath = pathParts.join('/') + '.md';
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// Trim any leading/trailing whitespace from the path
|
|
248
|
+
relativePath = relativePath.trim();
|
|
249
|
+
|
|
250
|
+
// If path is empty or invalid, create a fallback path
|
|
251
|
+
if (!isNonEmptyString(relativePath) || relativePath === '.md') {
|
|
252
|
+
const sanitizedTitle = sanitizeForFilename(doc.title, 'untitled');
|
|
253
|
+
relativePath = `${sanitizedTitle}.md`;
|
|
184
254
|
}
|
|
185
255
|
|
|
186
|
-
// Ensure
|
|
187
|
-
let
|
|
256
|
+
// Ensure path uniqueness
|
|
257
|
+
let uniquePath = relativePath;
|
|
188
258
|
let counter = 1;
|
|
189
|
-
|
|
190
|
-
|
|
259
|
+
const MAX_PATH_ITERATIONS = 10000;
|
|
260
|
+
let pathIterations = 0;
|
|
261
|
+
|
|
262
|
+
while (usedPaths.has(uniquePath.toLowerCase())) {
|
|
191
263
|
counter++;
|
|
264
|
+
const pathParts = relativePath.split('.');
|
|
265
|
+
const extension = pathParts.pop() || 'md';
|
|
266
|
+
const basePath = pathParts.join('.');
|
|
267
|
+
uniquePath = `${basePath}-${counter}.${extension}`;
|
|
268
|
+
|
|
269
|
+
pathIterations++;
|
|
270
|
+
if (pathIterations >= MAX_PATH_ITERATIONS) {
|
|
271
|
+
// Fallback to timestamp
|
|
272
|
+
const timestamp = Date.now();
|
|
273
|
+
uniquePath = `${basePath}-${timestamp}.${extension}`;
|
|
274
|
+
logger.warn(`Maximum iterations reached for unique path. Using timestamp: ${uniquePath}`);
|
|
275
|
+
break;
|
|
276
|
+
}
|
|
192
277
|
}
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
// Create markdown content following llmstxt.org standard
|
|
196
|
-
const markdownContent = `# ${doc.title}
|
|
278
|
+
usedPaths.add(uniquePath.toLowerCase());
|
|
197
279
|
|
|
198
|
-
|
|
280
|
+
// Create the full file path and validate/shorten if needed
|
|
281
|
+
let fullPath = path.join(outputDir, uniquePath);
|
|
282
|
+
fullPath = shortenPathIfNeeded(fullPath, outputDir, uniquePath);
|
|
199
283
|
|
|
200
|
-
|
|
201
|
-
|
|
284
|
+
// Update uniquePath to reflect the shortened path if it was changed
|
|
285
|
+
if (fullPath !== path.join(outputDir, uniquePath)) {
|
|
286
|
+
uniquePath = path.relative(outputDir, fullPath);
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
const directory = path.dirname(fullPath);
|
|
290
|
+
|
|
291
|
+
// Create directory structure if it doesn't exist
|
|
292
|
+
try {
|
|
293
|
+
await fs.mkdir(directory, { recursive: true });
|
|
294
|
+
} catch (error: unknown) {
|
|
295
|
+
throw new Error(`Failed to create directory ${directory}: ${getErrorMessage(error)}`);
|
|
296
|
+
}
|
|
202
297
|
|
|
298
|
+
// Extract preserved frontmatter if specified
|
|
299
|
+
let preservedFrontMatter: Record<string, any> = {};
|
|
300
|
+
if (isNonEmptyArray(keepFrontMatter) && isDefined(doc.frontMatter)) {
|
|
301
|
+
for (const key of keepFrontMatter) {
|
|
302
|
+
if (key in doc.frontMatter) {
|
|
303
|
+
preservedFrontMatter[key] = doc.frontMatter[key];
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
// Create markdown content using the utility function
|
|
309
|
+
const markdownContent = createMarkdownContent(
|
|
310
|
+
doc.title,
|
|
311
|
+
doc.description,
|
|
312
|
+
doc.content,
|
|
313
|
+
true, // includeMetadata
|
|
314
|
+
Object.keys(preservedFrontMatter).length > 0 ? preservedFrontMatter : undefined
|
|
315
|
+
);
|
|
316
|
+
|
|
203
317
|
// Write the markdown file
|
|
204
|
-
|
|
205
|
-
|
|
318
|
+
try {
|
|
319
|
+
await writeFile(fullPath, markdownContent);
|
|
320
|
+
} catch (error: unknown) {
|
|
321
|
+
throw new Error(`Failed to write file ${fullPath}: ${getErrorMessage(error)}`);
|
|
322
|
+
}
|
|
206
323
|
|
|
207
324
|
// Create updated DocInfo with new URL pointing to the generated markdown file
|
|
208
|
-
|
|
325
|
+
// Convert file path to URL path (use forward slashes)
|
|
326
|
+
const urlPath = normalizePath(uniquePath);
|
|
327
|
+
const newUrl = `${siteUrl}/${urlPath}`;
|
|
209
328
|
|
|
210
329
|
updatedDocs.push({
|
|
211
330
|
...doc,
|
|
212
331
|
url: newUrl,
|
|
213
|
-
path: `/${
|
|
332
|
+
path: `/${urlPath}` // Update path to the new markdown file
|
|
214
333
|
});
|
|
215
334
|
|
|
216
|
-
|
|
335
|
+
logger.verbose(`Generated markdown file: ${uniquePath}`);
|
|
217
336
|
}
|
|
218
337
|
|
|
219
338
|
return updatedDocs;
|
|
@@ -236,8 +355,8 @@ export async function generateStandardLLMFiles(
|
|
|
236
355
|
options
|
|
237
356
|
} = context;
|
|
238
357
|
|
|
239
|
-
const {
|
|
240
|
-
generateLLMsTxt,
|
|
358
|
+
const {
|
|
359
|
+
generateLLMsTxt,
|
|
241
360
|
generateLLMsFullTxt,
|
|
242
361
|
llmsTxtFilename = 'llms.txt',
|
|
243
362
|
llmsFullTxtFilename = 'llms-full.txt',
|
|
@@ -246,10 +365,12 @@ export async function generateStandardLLMFiles(
|
|
|
246
365
|
version,
|
|
247
366
|
generateMarkdownFiles = false,
|
|
248
367
|
rootContent,
|
|
249
|
-
fullRootContent
|
|
368
|
+
fullRootContent,
|
|
369
|
+
processingBatchSize = 100
|
|
250
370
|
} = options;
|
|
251
371
|
|
|
252
372
|
if (!generateLLMsTxt && !generateLLMsFullTxt) {
|
|
373
|
+
logger.warn('No standard LLM files configured for generation. Skipping.');
|
|
253
374
|
return;
|
|
254
375
|
}
|
|
255
376
|
|
|
@@ -263,15 +384,24 @@ export async function generateStandardLLMFiles(
|
|
|
263
384
|
includeUnmatchedLast
|
|
264
385
|
);
|
|
265
386
|
|
|
266
|
-
|
|
267
|
-
|
|
387
|
+
logger.verbose(`Processed ${processedDocs.length} documentation files for standard LLM files`);
|
|
388
|
+
|
|
389
|
+
// Check if we have documents to process
|
|
390
|
+
if (!isNonEmptyArray(processedDocs)) {
|
|
391
|
+
logger.warn('No documents found matching patterns for standard LLM files. Skipping.');
|
|
392
|
+
return;
|
|
393
|
+
}
|
|
394
|
+
|
|
268
395
|
// Generate individual markdown files if requested
|
|
269
|
-
if (generateMarkdownFiles
|
|
270
|
-
|
|
396
|
+
if (generateMarkdownFiles) {
|
|
397
|
+
logger.info('Generating individual markdown files...');
|
|
271
398
|
processedDocs = await generateIndividualMarkdownFiles(
|
|
272
399
|
processedDocs,
|
|
273
400
|
outDir,
|
|
274
|
-
siteUrl
|
|
401
|
+
siteUrl,
|
|
402
|
+
context.docsDir,
|
|
403
|
+
context.options.keepFrontMatter || [],
|
|
404
|
+
context.options.preserveDirectoryStructure !== false // Default to true
|
|
275
405
|
);
|
|
276
406
|
}
|
|
277
407
|
|
|
@@ -285,7 +415,8 @@ export async function generateStandardLLMFiles(
|
|
|
285
415
|
docDescription,
|
|
286
416
|
false, // links only
|
|
287
417
|
version,
|
|
288
|
-
rootContent
|
|
418
|
+
rootContent,
|
|
419
|
+
processingBatchSize
|
|
289
420
|
);
|
|
290
421
|
}
|
|
291
422
|
|
|
@@ -299,7 +430,8 @@ export async function generateStandardLLMFiles(
|
|
|
299
430
|
docDescription,
|
|
300
431
|
true, // full content
|
|
301
432
|
version,
|
|
302
|
-
fullRootContent
|
|
433
|
+
fullRootContent,
|
|
434
|
+
processingBatchSize
|
|
303
435
|
);
|
|
304
436
|
}
|
|
305
437
|
}
|
|
@@ -314,16 +446,22 @@ export async function generateCustomLLMFiles(
|
|
|
314
446
|
allDocFiles: string[]
|
|
315
447
|
): Promise<void> {
|
|
316
448
|
const { outDir, siteUrl, docTitle, docDescription, options } = context;
|
|
317
|
-
const {
|
|
449
|
+
const {
|
|
450
|
+
customLLMFiles = [],
|
|
451
|
+
ignoreFiles = [],
|
|
452
|
+
generateMarkdownFiles = false,
|
|
453
|
+
processingBatchSize = 100
|
|
454
|
+
} = options;
|
|
318
455
|
|
|
319
456
|
if (customLLMFiles.length === 0) {
|
|
457
|
+
logger.warn('No custom LLM files configured. Skipping.');
|
|
320
458
|
return;
|
|
321
459
|
}
|
|
322
460
|
|
|
323
|
-
|
|
461
|
+
logger.info(`Generating ${customLLMFiles.length} custom LLM files...`);
|
|
324
462
|
|
|
325
463
|
for (const customFile of customLLMFiles) {
|
|
326
|
-
|
|
464
|
+
logger.verbose(`Processing custom file: ${customFile.filename}, version: ${customFile.version || 'undefined'}`);
|
|
327
465
|
|
|
328
466
|
// Combine global ignores with custom ignores
|
|
329
467
|
const combinedIgnores = [...ignoreFiles];
|
|
@@ -344,11 +482,14 @@ export async function generateCustomLLMFiles(
|
|
|
344
482
|
if (customDocs.length > 0) {
|
|
345
483
|
// Generate individual markdown files if requested
|
|
346
484
|
if (generateMarkdownFiles) {
|
|
347
|
-
|
|
485
|
+
logger.info(`Generating individual markdown files for custom file: ${customFile.filename}...`);
|
|
348
486
|
customDocs = await generateIndividualMarkdownFiles(
|
|
349
487
|
customDocs,
|
|
350
488
|
outDir,
|
|
351
|
-
siteUrl
|
|
489
|
+
siteUrl,
|
|
490
|
+
context.docsDir,
|
|
491
|
+
context.options.keepFrontMatter || [],
|
|
492
|
+
context.options.preserveDirectoryStructure !== false // Default to true
|
|
352
493
|
);
|
|
353
494
|
}
|
|
354
495
|
|
|
@@ -365,12 +506,13 @@ export async function generateCustomLLMFiles(
|
|
|
365
506
|
customDescription,
|
|
366
507
|
customFile.fullContent,
|
|
367
508
|
customFile.version,
|
|
368
|
-
customFile.rootContent
|
|
509
|
+
customFile.rootContent,
|
|
510
|
+
processingBatchSize
|
|
369
511
|
);
|
|
370
512
|
|
|
371
|
-
|
|
513
|
+
logger.info(`Generated custom LLM file: ${customFile.filename} with ${customDocs.length} documents`);
|
|
372
514
|
} else {
|
|
373
|
-
|
|
515
|
+
logger.warn(`No matching documents found for custom LLM file: ${customFile.filename}`);
|
|
374
516
|
}
|
|
375
517
|
}
|
|
376
518
|
}
|
|
@@ -382,7 +524,7 @@ export async function generateCustomLLMFiles(
|
|
|
382
524
|
*/
|
|
383
525
|
export async function collectDocFiles(context: PluginContext): Promise<string[]> {
|
|
384
526
|
const { siteDir, docsDir, options } = context;
|
|
385
|
-
const { ignoreFiles = [], includeBlog = false } = options;
|
|
527
|
+
const { ignoreFiles = [], includeBlog = false, warnOnIgnoredFiles = false } = options;
|
|
386
528
|
|
|
387
529
|
const allDocFiles: string[] = [];
|
|
388
530
|
|
|
@@ -391,13 +533,13 @@ export async function collectDocFiles(context: PluginContext): Promise<string[]>
|
|
|
391
533
|
|
|
392
534
|
try {
|
|
393
535
|
await fs.access(fullDocsDir);
|
|
394
|
-
|
|
536
|
+
|
|
395
537
|
// Collect all markdown files from docs directory
|
|
396
|
-
const docFiles = await readMarkdownFiles(fullDocsDir, siteDir, ignoreFiles);
|
|
538
|
+
const docFiles = await readMarkdownFiles(fullDocsDir, siteDir, ignoreFiles, docsDir, warnOnIgnoredFiles);
|
|
397
539
|
allDocFiles.push(...docFiles);
|
|
398
|
-
|
|
399
|
-
} catch (err) {
|
|
400
|
-
|
|
540
|
+
|
|
541
|
+
} catch (err: unknown) {
|
|
542
|
+
logger.warn(`Docs directory not found: ${fullDocsDir}`);
|
|
401
543
|
}
|
|
402
544
|
|
|
403
545
|
// Process blog if enabled
|
|
@@ -406,13 +548,13 @@ export async function collectDocFiles(context: PluginContext): Promise<string[]>
|
|
|
406
548
|
|
|
407
549
|
try {
|
|
408
550
|
await fs.access(blogDir);
|
|
409
|
-
|
|
551
|
+
|
|
410
552
|
// Collect all markdown files from blog directory
|
|
411
|
-
const blogFiles = await readMarkdownFiles(blogDir, siteDir, ignoreFiles);
|
|
553
|
+
const blogFiles = await readMarkdownFiles(blogDir, siteDir, ignoreFiles, docsDir, warnOnIgnoredFiles);
|
|
412
554
|
allDocFiles.push(...blogFiles);
|
|
413
|
-
|
|
414
|
-
} catch (err) {
|
|
415
|
-
|
|
555
|
+
|
|
556
|
+
} catch (err: unknown) {
|
|
557
|
+
logger.warn(`Blog directory not found: ${blogDir}`);
|
|
416
558
|
}
|
|
417
559
|
}
|
|
418
560
|
|