docusaurus-plugin-llms 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +246 -15
- package/lib/generator-current.d.ts +44 -0
- package/lib/generator-current.js +398 -0
- package/lib/generator.d.ts +6 -2
- package/lib/generator.js +200 -120
- package/lib/index.js +175 -10
- package/lib/null-handling-guide.d.ts +47 -0
- package/lib/null-handling-guide.js +290 -0
- package/lib/processor.d.ts +0 -10
- package/lib/processor.js +230 -83
- package/lib/types.d.ts +13 -0
- package/lib/utils.d.ts +165 -6
- package/lib/utils.js +481 -28
- package/package.json +5 -3
- package/src/generator.ts +270 -128
- package/src/index.ts +204 -14
- package/src/null-handling-guide.ts +321 -0
- package/src/processor.ts +314 -127
- package/src/types.ts +20 -1
- package/src/utils.ts +594 -48
package/lib/generator.js
CHANGED
|
@@ -51,10 +51,11 @@ const processor_1 = require("./processor");
|
|
|
51
51
|
* @returns Cleaned description suitable for TOC
|
|
52
52
|
*/
|
|
53
53
|
function cleanDescriptionForToc(description) {
|
|
54
|
-
if (!description)
|
|
54
|
+
if (!(0, utils_1.isNonEmptyString)(description))
|
|
55
55
|
return '';
|
|
56
56
|
// Get just the first line for TOC display
|
|
57
|
-
const
|
|
57
|
+
const lines = description.split('\n');
|
|
58
|
+
const firstLine = lines.length > 0 ? lines[0] : '';
|
|
58
59
|
// Remove heading markers only at the beginning of the line
|
|
59
60
|
// Be careful to only remove actual heading markers (# followed by space at beginning)
|
|
60
61
|
// and not hashtag symbols that are part of the content (inline hashtags)
|
|
@@ -71,77 +72,74 @@ function cleanDescriptionForToc(description) {
|
|
|
71
72
|
* @param includeFullContent - Whether to include full content or just links
|
|
72
73
|
* @param version - Version of the file
|
|
73
74
|
* @param customRootContent - Optional custom content to include at the root level
|
|
75
|
+
* @param batchSize - Batch size for processing documents (default: 100)
|
|
74
76
|
*/
|
|
75
|
-
async function generateLLMFile(docs, outputPath, fileTitle, fileDescription, includeFullContent, version, customRootContent) {
|
|
76
|
-
|
|
77
|
+
async function generateLLMFile(docs, outputPath, fileTitle, fileDescription, includeFullContent, version, customRootContent, batchSize = 100) {
|
|
78
|
+
// Validate path length before proceeding
|
|
79
|
+
if (!(0, utils_1.validatePathLength)(outputPath)) {
|
|
80
|
+
throw new Error(`Output path exceeds maximum length: ${outputPath}`);
|
|
81
|
+
}
|
|
82
|
+
utils_1.logger.verbose(`Generating file: ${outputPath}, version: ${version || 'undefined'}`);
|
|
77
83
|
const versionInfo = version ? `\n\nVersion: ${version}` : '';
|
|
78
84
|
if (includeFullContent) {
|
|
79
85
|
// Generate full content file with header deduplication
|
|
86
|
+
// Process documents in batches to prevent memory issues on large sites
|
|
80
87
|
const usedHeaders = new Set();
|
|
81
|
-
const fullContentSections =
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
const
|
|
85
|
-
|
|
86
|
-
const
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
let headerText = doc.title;
|
|
90
|
-
let uniqueHeader = headerText;
|
|
91
|
-
let counter = 1;
|
|
92
|
-
// If this header has been used before, make it unique by adding a suffix
|
|
93
|
-
while (usedHeaders.has(uniqueHeader.toLowerCase())) {
|
|
94
|
-
counter++;
|
|
95
|
-
// Try to make it more descriptive by adding the file path info if available
|
|
96
|
-
if (doc.path && counter === 2) {
|
|
97
|
-
const pathParts = doc.path.split('/');
|
|
98
|
-
const folderName = pathParts.length > 1 ? pathParts[pathParts.length - 2] : '';
|
|
99
|
-
if (folderName) {
|
|
100
|
-
uniqueHeader = `${headerText} (${folderName.charAt(0).toUpperCase() + folderName.slice(1)})`;
|
|
101
|
-
}
|
|
102
|
-
else {
|
|
103
|
-
uniqueHeader = `${headerText} (${counter})`;
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
|
-
else {
|
|
107
|
-
uniqueHeader = `${headerText} (${counter})`;
|
|
108
|
-
}
|
|
88
|
+
const fullContentSections = [];
|
|
89
|
+
// Process documents in batches
|
|
90
|
+
for (let i = 0; i < docs.length; i += batchSize) {
|
|
91
|
+
const batch = docs.slice(i, i + batchSize);
|
|
92
|
+
const batchNumber = Math.floor(i / batchSize) + 1;
|
|
93
|
+
const totalBatches = Math.ceil(docs.length / batchSize);
|
|
94
|
+
if (totalBatches > 1) {
|
|
95
|
+
utils_1.logger.verbose(`Processing batch ${batchNumber}/${totalBatches} (${batch.length} documents)`);
|
|
109
96
|
}
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
97
|
+
const batchSections = batch.map(doc => {
|
|
98
|
+
// Check if content already starts with the same heading to avoid duplication
|
|
99
|
+
const trimmedContent = doc.content.trim();
|
|
100
|
+
const contentLines = trimmedContent.split('\n');
|
|
101
|
+
const firstLine = contentLines.length > 0 ? contentLines[0] : '';
|
|
102
|
+
// Check if the first line is a heading that matches our title
|
|
103
|
+
const headingMatch = firstLine.match(/^#+\s+(.+)$/);
|
|
104
|
+
const firstHeadingText = headingMatch ? headingMatch[1].trim() : null;
|
|
105
|
+
// Generate unique header using the utility function
|
|
106
|
+
const uniqueHeader = (0, utils_1.ensureUniqueIdentifier)(doc.title, usedHeaders, (counter, base) => {
|
|
107
|
+
// Try to make it more descriptive by adding the file path info if available
|
|
108
|
+
if ((0, utils_1.isNonEmptyString)(doc.path) && counter === 2) {
|
|
109
|
+
const pathParts = doc.path.split('/');
|
|
110
|
+
const folderName = pathParts.length >= 2 ? pathParts[pathParts.length - 2] : '';
|
|
111
|
+
if ((0, utils_1.isNonEmptyString)(folderName)) {
|
|
112
|
+
return `(${folderName.charAt(0).toUpperCase() + folderName.slice(1)})`;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
return `(${counter})`;
|
|
116
|
+
});
|
|
117
|
+
if (firstHeadingText === doc.title) {
|
|
118
|
+
// Content already has the same heading, replace it with our unique header
|
|
114
119
|
const restOfContent = trimmedContent.split('\n').slice(1).join('\n');
|
|
115
120
|
return `## ${uniqueHeader}
|
|
116
121
|
|
|
117
122
|
${restOfContent}`;
|
|
118
123
|
}
|
|
119
124
|
else {
|
|
120
|
-
//
|
|
121
|
-
const restOfContent = trimmedContent.split('\n').slice(1).join('\n');
|
|
125
|
+
// Content doesn't have the same heading, add our unique H2 header
|
|
122
126
|
return `## ${uniqueHeader}
|
|
123
127
|
|
|
124
|
-
${restOfContent}`;
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
else {
|
|
128
|
-
// Content doesn't have the same heading, add our unique H2 header
|
|
129
|
-
return `## ${uniqueHeader}
|
|
130
|
-
|
|
131
128
|
${doc.content}`;
|
|
132
|
-
|
|
133
|
-
|
|
129
|
+
}
|
|
130
|
+
});
|
|
131
|
+
fullContentSections.push(...batchSections);
|
|
132
|
+
}
|
|
134
133
|
// Use custom root content or default message
|
|
135
134
|
const rootContent = customRootContent || 'This file contains all documentation content in a single document following the llmstxt.org standard.';
|
|
136
|
-
const llmFileContent =
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
${
|
|
143
|
-
|
|
144
|
-
await (0, utils_1.writeFile)(outputPath, llmFileContent);
|
|
135
|
+
const llmFileContent = (0, utils_1.createMarkdownContent)(fileTitle, `${fileDescription}${versionInfo}`, `${rootContent}\n\n${fullContentSections.join('\n\n---\n\n')}`, true // include metadata (description)
|
|
136
|
+
);
|
|
137
|
+
try {
|
|
138
|
+
await (0, utils_1.writeFile)(outputPath, llmFileContent);
|
|
139
|
+
}
|
|
140
|
+
catch (error) {
|
|
141
|
+
throw new Error(`Failed to write file ${outputPath}: ${(0, utils_1.getErrorMessage)(error)}`);
|
|
142
|
+
}
|
|
145
143
|
}
|
|
146
144
|
else {
|
|
147
145
|
// Generate links-only file
|
|
@@ -152,71 +150,144 @@ ${fullContentSections.join('\n\n---\n\n')}
|
|
|
152
150
|
});
|
|
153
151
|
// Use custom root content or default message
|
|
154
152
|
const rootContent = customRootContent || 'This file contains links to documentation sections following the llmstxt.org standard.';
|
|
155
|
-
const llmFileContent =
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
${tocItems.join('\n')}
|
|
164
|
-
`;
|
|
165
|
-
await (0, utils_1.writeFile)(outputPath, llmFileContent);
|
|
153
|
+
const llmFileContent = (0, utils_1.createMarkdownContent)(fileTitle, `${fileDescription}${versionInfo}`, `${rootContent}\n\n## Table of Contents\n\n${tocItems.join('\n')}`, true // include metadata (description)
|
|
154
|
+
);
|
|
155
|
+
try {
|
|
156
|
+
await (0, utils_1.writeFile)(outputPath, llmFileContent);
|
|
157
|
+
}
|
|
158
|
+
catch (error) {
|
|
159
|
+
throw new Error(`Failed to write file ${outputPath}: ${(0, utils_1.getErrorMessage)(error)}`);
|
|
160
|
+
}
|
|
166
161
|
}
|
|
167
|
-
|
|
162
|
+
utils_1.logger.info(`Generated: ${outputPath}`);
|
|
168
163
|
}
|
|
169
164
|
/**
|
|
170
165
|
* Generate individual markdown files for each document
|
|
171
166
|
* @param docs - Processed document information
|
|
172
167
|
* @param outputDir - Directory to write the markdown files
|
|
173
168
|
* @param siteUrl - Base site URL
|
|
169
|
+
* @param docsDir - The configured docs directory name (e.g., 'docs', 'documentation', etc.)
|
|
170
|
+
* @param keepFrontMatter - Array of frontmatter keys to preserve in generated files
|
|
171
|
+
* @param preserveDirectoryStructure - Whether to preserve the full directory structure (default: true)
|
|
174
172
|
* @returns Updated docs with new URLs pointing to generated markdown files
|
|
175
173
|
*/
|
|
176
|
-
async function generateIndividualMarkdownFiles(docs, outputDir, siteUrl) {
|
|
174
|
+
async function generateIndividualMarkdownFiles(docs, outputDir, siteUrl, docsDir = 'docs', keepFrontMatter = [], preserveDirectoryStructure = true) {
|
|
177
175
|
const updatedDocs = [];
|
|
178
|
-
|
|
179
|
-
const usedFilenames = new Set();
|
|
176
|
+
const usedPaths = new Set();
|
|
180
177
|
for (const doc of docs) {
|
|
181
|
-
//
|
|
182
|
-
let
|
|
183
|
-
.
|
|
184
|
-
.replace(
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
178
|
+
// Use the original path structure as default filename.
|
|
179
|
+
let relativePath = doc.path
|
|
180
|
+
.replace(/^\/+/, '') // Remove leading slashes
|
|
181
|
+
.replace(/\.mdx?$/, '.md'); // Ensure .md extension
|
|
182
|
+
// Strip the docsDir prefix only if preserveDirectoryStructure is false
|
|
183
|
+
if (!preserveDirectoryStructure) {
|
|
184
|
+
relativePath = relativePath
|
|
185
|
+
.replace(new RegExp(`^${docsDir.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}/`), ''); // Remove configured docs dir prefix
|
|
186
|
+
}
|
|
187
|
+
// If frontmatter has slug, use that.
|
|
188
|
+
if ((0, utils_1.isNonEmptyString)(doc.frontMatter?.slug)) {
|
|
189
|
+
const slug = doc.frontMatter.slug.trim().replace(/^\/+|\/+$/g, ''); // Trim whitespace and slashes
|
|
190
|
+
if ((0, utils_1.isNonEmptyString)(slug)) { // Only process if slug is not empty after trimming
|
|
191
|
+
if (slug.includes('/')) {
|
|
192
|
+
// Nested slug: create directory structure
|
|
193
|
+
relativePath = slug + '.md';
|
|
194
|
+
}
|
|
195
|
+
else {
|
|
196
|
+
// Simple slug: replace just the filename
|
|
197
|
+
const pathParts = relativePath.replace(/\.md$/, '').split('/');
|
|
198
|
+
pathParts[pathParts.length - 1] = slug;
|
|
199
|
+
relativePath = pathParts.join('/') + '.md';
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
// Otherwise, if frontmatter has id, use that.
|
|
204
|
+
else if ((0, utils_1.isNonEmptyString)(doc.frontMatter?.id)) {
|
|
205
|
+
const id = doc.frontMatter.id.trim().replace(/^\/+|\/+$/g, ''); // Trim whitespace and slashes
|
|
206
|
+
if ((0, utils_1.isNonEmptyString)(id)) { // Only process if id is not empty after trimming
|
|
207
|
+
if (id.includes('/')) {
|
|
208
|
+
// Nested id: create directory structure
|
|
209
|
+
relativePath = id + '.md';
|
|
210
|
+
}
|
|
211
|
+
else {
|
|
212
|
+
// Simple id: replace just the filename
|
|
213
|
+
const pathParts = relativePath.replace(/\.md$/, '').split('/');
|
|
214
|
+
pathParts[pathParts.length - 1] = id;
|
|
215
|
+
relativePath = pathParts.join('/') + '.md';
|
|
216
|
+
}
|
|
217
|
+
}
|
|
193
218
|
}
|
|
194
|
-
//
|
|
195
|
-
|
|
219
|
+
// Trim any leading/trailing whitespace from the path
|
|
220
|
+
relativePath = relativePath.trim();
|
|
221
|
+
// If path is empty or invalid, create a fallback path
|
|
222
|
+
if (!(0, utils_1.isNonEmptyString)(relativePath) || relativePath === '.md') {
|
|
223
|
+
const sanitizedTitle = (0, utils_1.sanitizeForFilename)(doc.title, 'untitled');
|
|
224
|
+
relativePath = `${sanitizedTitle}.md`;
|
|
225
|
+
}
|
|
226
|
+
// Ensure path uniqueness
|
|
227
|
+
let uniquePath = relativePath;
|
|
196
228
|
let counter = 1;
|
|
197
|
-
|
|
198
|
-
|
|
229
|
+
const MAX_PATH_ITERATIONS = 10000;
|
|
230
|
+
let pathIterations = 0;
|
|
231
|
+
while (usedPaths.has(uniquePath.toLowerCase())) {
|
|
199
232
|
counter++;
|
|
233
|
+
const pathParts = relativePath.split('.');
|
|
234
|
+
const extension = pathParts.pop() || 'md';
|
|
235
|
+
const basePath = pathParts.join('.');
|
|
236
|
+
uniquePath = `${basePath}-${counter}.${extension}`;
|
|
237
|
+
pathIterations++;
|
|
238
|
+
if (pathIterations >= MAX_PATH_ITERATIONS) {
|
|
239
|
+
// Fallback to timestamp
|
|
240
|
+
const timestamp = Date.now();
|
|
241
|
+
uniquePath = `${basePath}-${timestamp}.${extension}`;
|
|
242
|
+
utils_1.logger.warn(`Maximum iterations reached for unique path. Using timestamp: ${uniquePath}`);
|
|
243
|
+
break;
|
|
244
|
+
}
|
|
200
245
|
}
|
|
201
|
-
|
|
202
|
-
// Create
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
246
|
+
usedPaths.add(uniquePath.toLowerCase());
|
|
247
|
+
// Create the full file path and validate/shorten if needed
|
|
248
|
+
let fullPath = path.join(outputDir, uniquePath);
|
|
249
|
+
fullPath = (0, utils_1.shortenPathIfNeeded)(fullPath, outputDir, uniquePath);
|
|
250
|
+
// Update uniquePath to reflect the shortened path if it was changed
|
|
251
|
+
if (fullPath !== path.join(outputDir, uniquePath)) {
|
|
252
|
+
uniquePath = path.relative(outputDir, fullPath);
|
|
253
|
+
}
|
|
254
|
+
const directory = path.dirname(fullPath);
|
|
255
|
+
// Create directory structure if it doesn't exist
|
|
256
|
+
try {
|
|
257
|
+
await fs.mkdir(directory, { recursive: true });
|
|
258
|
+
}
|
|
259
|
+
catch (error) {
|
|
260
|
+
throw new Error(`Failed to create directory ${directory}: ${(0, utils_1.getErrorMessage)(error)}`);
|
|
261
|
+
}
|
|
262
|
+
// Extract preserved frontmatter if specified
|
|
263
|
+
let preservedFrontMatter = {};
|
|
264
|
+
if ((0, utils_1.isNonEmptyArray)(keepFrontMatter) && (0, utils_1.isDefined)(doc.frontMatter)) {
|
|
265
|
+
for (const key of keepFrontMatter) {
|
|
266
|
+
if (key in doc.frontMatter) {
|
|
267
|
+
preservedFrontMatter[key] = doc.frontMatter[key];
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
// Create markdown content using the utility function
|
|
272
|
+
const markdownContent = (0, utils_1.createMarkdownContent)(doc.title, doc.description, doc.content, true, // includeMetadata
|
|
273
|
+
Object.keys(preservedFrontMatter).length > 0 ? preservedFrontMatter : undefined);
|
|
209
274
|
// Write the markdown file
|
|
210
|
-
|
|
211
|
-
|
|
275
|
+
try {
|
|
276
|
+
await (0, utils_1.writeFile)(fullPath, markdownContent);
|
|
277
|
+
}
|
|
278
|
+
catch (error) {
|
|
279
|
+
throw new Error(`Failed to write file ${fullPath}: ${(0, utils_1.getErrorMessage)(error)}`);
|
|
280
|
+
}
|
|
212
281
|
// Create updated DocInfo with new URL pointing to the generated markdown file
|
|
213
|
-
|
|
282
|
+
// Convert file path to URL path (use forward slashes)
|
|
283
|
+
const urlPath = (0, utils_1.normalizePath)(uniquePath);
|
|
284
|
+
const newUrl = `${siteUrl}/${urlPath}`;
|
|
214
285
|
updatedDocs.push({
|
|
215
286
|
...doc,
|
|
216
287
|
url: newUrl,
|
|
217
|
-
path: `/${
|
|
288
|
+
path: `/${urlPath}` // Update path to the new markdown file
|
|
218
289
|
});
|
|
219
|
-
|
|
290
|
+
utils_1.logger.verbose(`Generated markdown file: ${uniquePath}`);
|
|
220
291
|
}
|
|
221
292
|
return updatedDocs;
|
|
222
293
|
}
|
|
@@ -227,31 +298,38 @@ ${doc.content}
|
|
|
227
298
|
*/
|
|
228
299
|
async function generateStandardLLMFiles(context, allDocFiles) {
|
|
229
300
|
const { outDir, siteUrl, docTitle, docDescription, options } = context;
|
|
230
|
-
const { generateLLMsTxt, generateLLMsFullTxt, llmsTxtFilename = 'llms.txt', llmsFullTxtFilename = 'llms-full.txt', includeOrder = [], includeUnmatchedLast = true, version, generateMarkdownFiles = false, rootContent, fullRootContent } = options;
|
|
301
|
+
const { generateLLMsTxt, generateLLMsFullTxt, llmsTxtFilename = 'llms.txt', llmsFullTxtFilename = 'llms-full.txt', includeOrder = [], includeUnmatchedLast = true, version, generateMarkdownFiles = false, rootContent, fullRootContent, processingBatchSize = 100 } = options;
|
|
231
302
|
if (!generateLLMsTxt && !generateLLMsFullTxt) {
|
|
303
|
+
utils_1.logger.warn('No standard LLM files configured for generation. Skipping.');
|
|
232
304
|
return;
|
|
233
305
|
}
|
|
234
306
|
// Process files for the standard outputs
|
|
235
307
|
let processedDocs = await (0, processor_1.processFilesWithPatterns)(context, allDocFiles, [], // No specific include patterns - include all
|
|
236
308
|
[], // No additional ignore patterns beyond global ignoreFiles
|
|
237
309
|
includeOrder, includeUnmatchedLast);
|
|
238
|
-
|
|
310
|
+
utils_1.logger.verbose(`Processed ${processedDocs.length} documentation files for standard LLM files`);
|
|
311
|
+
// Check if we have documents to process
|
|
312
|
+
if (!(0, utils_1.isNonEmptyArray)(processedDocs)) {
|
|
313
|
+
utils_1.logger.warn('No documents found matching patterns for standard LLM files. Skipping.');
|
|
314
|
+
return;
|
|
315
|
+
}
|
|
239
316
|
// Generate individual markdown files if requested
|
|
240
|
-
if (generateMarkdownFiles
|
|
241
|
-
|
|
242
|
-
processedDocs = await generateIndividualMarkdownFiles(processedDocs, outDir, siteUrl
|
|
317
|
+
if (generateMarkdownFiles) {
|
|
318
|
+
utils_1.logger.info('Generating individual markdown files...');
|
|
319
|
+
processedDocs = await generateIndividualMarkdownFiles(processedDocs, outDir, siteUrl, context.docsDir, context.options.keepFrontMatter || [], context.options.preserveDirectoryStructure !== false // Default to true
|
|
320
|
+
);
|
|
243
321
|
}
|
|
244
322
|
// Generate llms.txt
|
|
245
323
|
if (generateLLMsTxt) {
|
|
246
324
|
const llmsTxtPath = path.join(outDir, llmsTxtFilename);
|
|
247
325
|
await generateLLMFile(processedDocs, llmsTxtPath, docTitle, docDescription, false, // links only
|
|
248
|
-
version, rootContent);
|
|
326
|
+
version, rootContent, processingBatchSize);
|
|
249
327
|
}
|
|
250
328
|
// Generate llms-full.txt
|
|
251
329
|
if (generateLLMsFullTxt) {
|
|
252
330
|
const llmsFullTxtPath = path.join(outDir, llmsFullTxtFilename);
|
|
253
331
|
await generateLLMFile(processedDocs, llmsFullTxtPath, docTitle, docDescription, true, // full content
|
|
254
|
-
version, fullRootContent);
|
|
332
|
+
version, fullRootContent, processingBatchSize);
|
|
255
333
|
}
|
|
256
334
|
}
|
|
257
335
|
/**
|
|
@@ -261,13 +339,14 @@ async function generateStandardLLMFiles(context, allDocFiles) {
|
|
|
261
339
|
*/
|
|
262
340
|
async function generateCustomLLMFiles(context, allDocFiles) {
|
|
263
341
|
const { outDir, siteUrl, docTitle, docDescription, options } = context;
|
|
264
|
-
const { customLLMFiles = [], ignoreFiles = [], generateMarkdownFiles = false } = options;
|
|
342
|
+
const { customLLMFiles = [], ignoreFiles = [], generateMarkdownFiles = false, processingBatchSize = 100 } = options;
|
|
265
343
|
if (customLLMFiles.length === 0) {
|
|
344
|
+
utils_1.logger.warn('No custom LLM files configured. Skipping.');
|
|
266
345
|
return;
|
|
267
346
|
}
|
|
268
|
-
|
|
347
|
+
utils_1.logger.info(`Generating ${customLLMFiles.length} custom LLM files...`);
|
|
269
348
|
for (const customFile of customLLMFiles) {
|
|
270
|
-
|
|
349
|
+
utils_1.logger.verbose(`Processing custom file: ${customFile.filename}, version: ${customFile.version || 'undefined'}`);
|
|
271
350
|
// Combine global ignores with custom ignores
|
|
272
351
|
const combinedIgnores = [...ignoreFiles];
|
|
273
352
|
if (customFile.ignorePatterns) {
|
|
@@ -278,19 +357,20 @@ async function generateCustomLLMFiles(context, allDocFiles) {
|
|
|
278
357
|
if (customDocs.length > 0) {
|
|
279
358
|
// Generate individual markdown files if requested
|
|
280
359
|
if (generateMarkdownFiles) {
|
|
281
|
-
|
|
282
|
-
customDocs = await generateIndividualMarkdownFiles(customDocs, outDir, siteUrl
|
|
360
|
+
utils_1.logger.info(`Generating individual markdown files for custom file: ${customFile.filename}...`);
|
|
361
|
+
customDocs = await generateIndividualMarkdownFiles(customDocs, outDir, siteUrl, context.docsDir, context.options.keepFrontMatter || [], context.options.preserveDirectoryStructure !== false // Default to true
|
|
362
|
+
);
|
|
283
363
|
}
|
|
284
364
|
// Use custom title/description or fall back to defaults
|
|
285
365
|
const customTitle = customFile.title || docTitle;
|
|
286
366
|
const customDescription = customFile.description || docDescription;
|
|
287
367
|
// Generate the custom LLM file
|
|
288
368
|
const customFilePath = path.join(outDir, customFile.filename);
|
|
289
|
-
await generateLLMFile(customDocs, customFilePath, customTitle, customDescription, customFile.fullContent, customFile.version, customFile.rootContent);
|
|
290
|
-
|
|
369
|
+
await generateLLMFile(customDocs, customFilePath, customTitle, customDescription, customFile.fullContent, customFile.version, customFile.rootContent, processingBatchSize);
|
|
370
|
+
utils_1.logger.info(`Generated custom LLM file: ${customFile.filename} with ${customDocs.length} documents`);
|
|
291
371
|
}
|
|
292
372
|
else {
|
|
293
|
-
|
|
373
|
+
utils_1.logger.warn(`No matching documents found for custom LLM file: ${customFile.filename}`);
|
|
294
374
|
}
|
|
295
375
|
}
|
|
296
376
|
}
|
|
@@ -301,18 +381,18 @@ async function generateCustomLLMFiles(context, allDocFiles) {
|
|
|
301
381
|
*/
|
|
302
382
|
async function collectDocFiles(context) {
|
|
303
383
|
const { siteDir, docsDir, options } = context;
|
|
304
|
-
const { ignoreFiles = [], includeBlog = false } = options;
|
|
384
|
+
const { ignoreFiles = [], includeBlog = false, warnOnIgnoredFiles = false } = options;
|
|
305
385
|
const allDocFiles = [];
|
|
306
386
|
// Process docs directory
|
|
307
387
|
const fullDocsDir = path.join(siteDir, docsDir);
|
|
308
388
|
try {
|
|
309
389
|
await fs.access(fullDocsDir);
|
|
310
390
|
// Collect all markdown files from docs directory
|
|
311
|
-
const docFiles = await (0, utils_1.readMarkdownFiles)(fullDocsDir, siteDir, ignoreFiles);
|
|
391
|
+
const docFiles = await (0, utils_1.readMarkdownFiles)(fullDocsDir, siteDir, ignoreFiles, docsDir, warnOnIgnoredFiles);
|
|
312
392
|
allDocFiles.push(...docFiles);
|
|
313
393
|
}
|
|
314
394
|
catch (err) {
|
|
315
|
-
|
|
395
|
+
utils_1.logger.warn(`Docs directory not found: ${fullDocsDir}`);
|
|
316
396
|
}
|
|
317
397
|
// Process blog if enabled
|
|
318
398
|
if (includeBlog) {
|
|
@@ -320,11 +400,11 @@ async function collectDocFiles(context) {
|
|
|
320
400
|
try {
|
|
321
401
|
await fs.access(blogDir);
|
|
322
402
|
// Collect all markdown files from blog directory
|
|
323
|
-
const blogFiles = await (0, utils_1.readMarkdownFiles)(blogDir, siteDir, ignoreFiles);
|
|
403
|
+
const blogFiles = await (0, utils_1.readMarkdownFiles)(blogDir, siteDir, ignoreFiles, docsDir, warnOnIgnoredFiles);
|
|
324
404
|
allDocFiles.push(...blogFiles);
|
|
325
405
|
}
|
|
326
406
|
catch (err) {
|
|
327
|
-
|
|
407
|
+
utils_1.logger.warn(`Blog directory not found: ${blogDir}`);
|
|
328
408
|
}
|
|
329
409
|
}
|
|
330
410
|
return allDocFiles;
|