docusaurus-plugin-llms 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,398 @@
1
+ "use strict";
2
+ /**
3
+ * LLM file generation functions for the docusaurus-plugin-llms plugin
4
+ */
5
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
6
+ if (k2 === undefined) k2 = k;
7
+ var desc = Object.getOwnPropertyDescriptor(m, k);
8
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
9
+ desc = { enumerable: true, get: function() { return m[k]; } };
10
+ }
11
+ Object.defineProperty(o, k2, desc);
12
+ }) : (function(o, m, k, k2) {
13
+ if (k2 === undefined) k2 = k;
14
+ o[k2] = m[k];
15
+ }));
16
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
17
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
18
+ }) : function(o, v) {
19
+ o["default"] = v;
20
+ });
21
+ var __importStar = (this && this.__importStar) || (function () {
22
+ var ownKeys = function(o) {
23
+ ownKeys = Object.getOwnPropertyNames || function (o) {
24
+ var ar = [];
25
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
26
+ return ar;
27
+ };
28
+ return ownKeys(o);
29
+ };
30
+ return function (mod) {
31
+ if (mod && mod.__esModule) return mod;
32
+ var result = {};
33
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
34
+ __setModuleDefault(result, mod);
35
+ return result;
36
+ };
37
+ })();
38
+ Object.defineProperty(exports, "__esModule", { value: true });
39
+ exports.generateLLMFile = generateLLMFile;
40
+ exports.generateIndividualMarkdownFiles = generateIndividualMarkdownFiles;
41
+ exports.generateStandardLLMFiles = generateStandardLLMFiles;
42
+ exports.generateCustomLLMFiles = generateCustomLLMFiles;
43
+ exports.collectDocFiles = collectDocFiles;
44
+ const path = __importStar(require("path"));
45
+ const fs = __importStar(require("fs/promises"));
46
+ const utils_1 = require("./utils");
47
+ const processor_1 = require("./processor");
48
+ /**
49
+ * Clean a description for use in a TOC item
50
+ * @param description - The original description
51
+ * @returns Cleaned description suitable for TOC
52
+ */
53
+ function cleanDescriptionForToc(description) {
54
+ if (!description)
55
+ return '';
56
+ // Get just the first line for TOC display
57
+ const lines = description.split('\n');
58
+ const firstLine = lines.length > 0 ? lines[0] : '';
59
+ // Remove heading markers only at the beginning of the line
60
+ // Be careful to only remove actual heading markers (# followed by space at beginning)
61
+ // and not hashtag symbols that are part of the content (inline hashtags)
62
+ const cleaned = firstLine.replace(/^(#+)\s+/g, '');
63
+ // Truncate if too long (150 characters max with ellipsis)
64
+ return cleaned.length > 150 ? cleaned.substring(0, 147) + '...' : cleaned;
65
+ }
66
+ /**
67
+ * Generate an LLM-friendly file
68
+ * @param docs - Processed document information
69
+ * @param outputPath - Path to write the output file
70
+ * @param fileTitle - Title for the file
71
+ * @param fileDescription - Description for the file
72
+ * @param includeFullContent - Whether to include full content or just links
73
+ * @param version - Version of the file
74
+ * @param customRootContent - Optional custom content to include at the root level
75
+ */
76
+ async function generateLLMFile(docs, outputPath, fileTitle, fileDescription, includeFullContent, version, customRootContent) {
77
+ // Validate path length before proceeding
78
+ if (!(0, utils_1.validatePathLength)(outputPath)) {
79
+ throw new Error(`Output path exceeds maximum length: ${outputPath}`);
80
+ }
81
+ utils_1.logger.verbose(`Generating file: ${outputPath}, version: ${version || 'undefined'}`);
82
+ const versionInfo = version ? `\n\nVersion: ${version}` : '';
83
+ if (includeFullContent) {
84
+ // Generate full content file with header deduplication
85
+ const usedHeaders = new Set();
86
+ const fullContentSections = docs.map(doc => {
87
+ // Check if content already starts with the same heading to avoid duplication
88
+ const trimmedContent = doc.content.trim();
89
+ const contentLines = trimmedContent.split('\n');
90
+ const firstLine = contentLines.length > 0 ? contentLines[0] : '';
91
+ // Check if the first line is a heading that matches our title
92
+ const headingMatch = firstLine.match(/^#+\s+(.+)$/);
93
+ const firstHeadingText = headingMatch ? headingMatch[1].trim() : null;
94
+ // Generate unique header using the utility function
95
+ const uniqueHeader = (0, utils_1.ensureUniqueIdentifier)(doc.title, usedHeaders, (counter, base) => {
96
+ // Try to make it more descriptive by adding the file path info if available
97
+ if (doc.path && counter === 2) {
98
+ const pathParts = doc.path.split('/');
99
+ const folderName = pathParts.length >= 2 ? pathParts[pathParts.length - 2] : '';
100
+ if (folderName) {
101
+ return `(${folderName.charAt(0).toUpperCase() + folderName.slice(1)})`;
102
+ }
103
+ }
104
+ return `(${counter})`;
105
+ });
106
+ if (firstHeadingText === doc.title) {
107
+ // Content already has the same heading, replace it with our unique header
108
+ const restOfContent = trimmedContent.split('\n').slice(1).join('\n');
109
+ return `## ${uniqueHeader}
110
+
111
+ ${restOfContent}`;
112
+ }
113
+ else {
114
+ // Content doesn't have the same heading, add our unique H2 header
115
+ return `## ${uniqueHeader}
116
+
117
+ ${doc.content}`;
118
+ }
119
+ });
120
+ // Use custom root content or default message
121
+ const rootContent = customRootContent || 'This file contains all documentation content in a single document following the llmstxt.org standard.';
122
+ const llmFileContent = (0, utils_1.createMarkdownContent)(fileTitle, `${fileDescription}${versionInfo}`, `${rootContent}\n\n${fullContentSections.join('\n\n---\n\n')}`, true // include metadata (description)
123
+ );
124
+ try {
125
+ await (0, utils_1.writeFile)(outputPath, llmFileContent);
126
+ }
127
+ catch (error) {
128
+ throw new Error(`Failed to write file ${outputPath}: ${getErrorMessage(error)}`);
129
+ }
130
+ }
131
+ else {
132
+ // Generate links-only file
133
+ const tocItems = docs.map(doc => {
134
+ // Clean and format the description for TOC
135
+ const cleanedDescription = cleanDescriptionForToc(doc.description);
136
+ return `- [${doc.title}](${doc.url})${cleanedDescription ? `: ${cleanedDescription}` : ''}`;
137
+ });
138
+ // Use custom root content or default message
139
+ const rootContent = customRootContent || 'This file contains links to documentation sections following the llmstxt.org standard.';
140
+ const llmFileContent = (0, utils_1.createMarkdownContent)(fileTitle, `${fileDescription}${versionInfo}`, `${rootContent}\n\n## Table of Contents\n\n${tocItems.join('\n')}`, true // include metadata (description)
141
+ );
142
+ try {
143
+ await (0, utils_1.writeFile)(outputPath, llmFileContent);
144
+ }
145
+ catch (error) {
146
+ throw new Error(`Failed to write file ${outputPath}: ${getErrorMessage(error)}`);
147
+ }
148
+ }
149
+ utils_1.logger.info(`Generated: ${outputPath}`);
150
+ }
151
+ /**
152
+ * Generate individual markdown files for each document
153
+ * @param docs - Processed document information
154
+ * @param outputDir - Directory to write the markdown files
155
+ * @param siteUrl - Base site URL
156
+ * @param docsDir - The configured docs directory name (e.g., 'docs', 'documentation', etc.)
157
+ * @param keepFrontMatter - Array of frontmatter keys to preserve in generated files
158
+ * @param preserveDirectoryStructure - Whether to preserve the full directory structure (default: true)
159
+ * @returns Updated docs with new URLs pointing to generated markdown files
160
+ */
161
+ async function generateIndividualMarkdownFiles(docs, outputDir, siteUrl, docsDir = 'docs', keepFrontMatter = [], preserveDirectoryStructure = true) {
162
+ const updatedDocs = [];
163
+ const usedPaths = new Set();
164
+ for (const doc of docs) {
165
+ // Use the original path structure as default filename.
166
+ let relativePath = doc.path
167
+ .replace(/^\/+/, '') // Remove leading slashes
168
+ .replace(/\.mdx?$/, '.md'); // Ensure .md extension
169
+ // Strip the docsDir prefix only if preserveDirectoryStructure is false
170
+ if (!preserveDirectoryStructure) {
171
+ relativePath = relativePath
172
+ .replace(new RegExp(`^${docsDir.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}/`), ''); // Remove configured docs dir prefix
173
+ }
174
+ // If frontmatter has slug, use that.
175
+ if (doc.frontMatter?.slug && typeof doc.frontMatter.slug === 'string') {
176
+ const slug = doc.frontMatter.slug.trim().replace(/^\/+|\/+$/g, ''); // Trim whitespace and slashes
177
+ if (slug) { // Only process if slug is not empty after trimming
178
+ if (slug.includes('/')) {
179
+ // Nested slug: create directory structure
180
+ relativePath = slug + '.md';
181
+ }
182
+ else {
183
+ // Simple slug: replace just the filename
184
+ const pathParts = relativePath.replace(/\.md$/, '').split('/');
185
+ pathParts[pathParts.length - 1] = slug;
186
+ relativePath = pathParts.join('/') + '.md';
187
+ }
188
+ }
189
+ }
190
+ // Otherwise, if frontmatter has id, use that.
191
+ else if (doc.frontMatter?.id && typeof doc.frontMatter.id === 'string') {
192
+ const id = doc.frontMatter.id.trim().replace(/^\/+|\/+$/g, ''); // Trim whitespace and slashes
193
+ if (id) { // Only process if id is not empty after trimming
194
+ if (id.includes('/')) {
195
+ // Nested id: create directory structure
196
+ relativePath = id + '.md';
197
+ }
198
+ else {
199
+ // Simple id: replace just the filename
200
+ const pathParts = relativePath.replace(/\.md$/, '').split('/');
201
+ pathParts[pathParts.length - 1] = id;
202
+ relativePath = pathParts.join('/') + '.md';
203
+ }
204
+ }
205
+ }
206
+ // Trim any leading/trailing whitespace from the path
207
+ relativePath = relativePath.trim();
208
+ // If path is empty or invalid, create a fallback path
209
+ if (!relativePath || relativePath === '.md' || relativePath === '') {
210
+ const sanitizedTitle = (0, utils_1.sanitizeForFilename)(doc.title, 'untitled');
211
+ relativePath = `${sanitizedTitle}.md`;
212
+ }
213
+ // Ensure path uniqueness
214
+ let uniquePath = relativePath;
215
+ let counter = 1;
216
+ const MAX_PATH_ITERATIONS = 10000;
217
+ let pathIterations = 0;
218
+ while (usedPaths.has(uniquePath.toLowerCase())) {
219
+ counter++;
220
+ const pathParts = relativePath.split('.');
221
+ const extension = pathParts.pop() || 'md';
222
+ const basePath = pathParts.join('.');
223
+ uniquePath = `${basePath}-${counter}.${extension}`;
224
+ pathIterations++;
225
+ if (pathIterations >= MAX_PATH_ITERATIONS) {
226
+ // Fallback to timestamp
227
+ const timestamp = Date.now();
228
+ uniquePath = `${basePath}-${timestamp}.${extension}`;
229
+ utils_1.logger.warn(`Maximum iterations reached for unique path. Using timestamp: ${uniquePath}`);
230
+ break;
231
+ }
232
+ }
233
+ usedPaths.add(uniquePath.toLowerCase());
234
+ // Create the full file path and validate/shorten if needed
235
+ let fullPath = path.join(outputDir, uniquePath);
236
+ fullPath = (0, utils_1.shortenPathIfNeeded)(fullPath, outputDir, uniquePath);
237
+ // Update uniquePath to reflect the shortened path if it was changed
238
+ if (fullPath !== path.join(outputDir, uniquePath)) {
239
+ uniquePath = path.relative(outputDir, fullPath);
240
+ }
241
+ const directory = path.dirname(fullPath);
242
+ // Create directory structure if it doesn't exist
243
+ try {
244
+ await fs.mkdir(directory, { recursive: true });
245
+ }
246
+ catch (error) {
247
+ throw new Error(`Failed to create directory ${directory}: ${getErrorMessage(error)}`);
248
+ }
249
+ // Extract preserved frontmatter if specified
250
+ let preservedFrontMatter = {};
251
+ if (keepFrontMatter.length > 0 && doc.frontMatter) {
252
+ for (const key of keepFrontMatter) {
253
+ if (key in doc.frontMatter) {
254
+ preservedFrontMatter[key] = doc.frontMatter[key];
255
+ }
256
+ }
257
+ }
258
+ // Create markdown content using the utility function
259
+ const markdownContent = (0, utils_1.createMarkdownContent)(doc.title, doc.description, doc.content, true, // includeMetadata
260
+ Object.keys(preservedFrontMatter).length > 0 ? preservedFrontMatter : undefined);
261
+ // Write the markdown file
262
+ try {
263
+ await (0, utils_1.writeFile)(fullPath, markdownContent);
264
+ }
265
+ catch (error) {
266
+ throw new Error(`Failed to write file ${fullPath}: ${getErrorMessage(error)}`);
267
+ }
268
+ // Create updated DocInfo with new URL pointing to the generated markdown file
269
+ // Convert file path to URL path (use forward slashes)
270
+ const urlPath = (0, utils_1.normalizePath)(uniquePath);
271
+ const newUrl = `${siteUrl}/${urlPath}`;
272
+ updatedDocs.push({
273
+ ...doc,
274
+ url: newUrl,
275
+ path: `/${urlPath}` // Update path to the new markdown file
276
+ });
277
+ utils_1.logger.verbose(`Generated markdown file: ${uniquePath}`);
278
+ }
279
+ return updatedDocs;
280
+ }
281
+ /**
282
+ * Generate standard LLM files (llms.txt and llms-full.txt)
283
+ * @param context - Plugin context
284
+ * @param allDocFiles - Array of all document files
285
+ */
286
+ async function generateStandardLLMFiles(context, allDocFiles) {
287
+ const { outDir, siteUrl, docTitle, docDescription, options } = context;
288
+ const { generateLLMsTxt, generateLLMsFullTxt, llmsTxtFilename = 'llms.txt', llmsFullTxtFilename = 'llms-full.txt', includeOrder = [], includeUnmatchedLast = true, version, generateMarkdownFiles = false, rootContent, fullRootContent, processingBatchSize = 100 } = options;
289
+ if (!generateLLMsTxt && !generateLLMsFullTxt) {
290
+ utils_1.logger.warn('No standard LLM files configured for generation. Skipping.');
291
+ return;
292
+ }
293
+ // Process files for the standard outputs
294
+ let processedDocs = await (0, processor_1.processFilesWithPatterns)(context, allDocFiles, [], // No specific include patterns - include all
295
+ [], // No additional ignore patterns beyond global ignoreFiles
296
+ includeOrder, includeUnmatchedLast);
297
+ utils_1.logger.verbose(`Processed ${processedDocs.length} documentation files for standard LLM files`);
298
+ // Check if we have documents to process
299
+ if (processedDocs.length === 0) {
300
+ utils_1.logger.warn('No documents found matching patterns for standard LLM files. Skipping.');
301
+ return;
302
+ }
303
+ // Generate individual markdown files if requested
304
+ if (generateMarkdownFiles) {
305
+ utils_1.logger.info('Generating individual markdown files...');
306
+ processedDocs = await generateIndividualMarkdownFiles(processedDocs, outDir, siteUrl, context.docsDir, context.options.keepFrontMatter || [], context.options.preserveDirectoryStructure !== false // Default to true
307
+ );
308
+ }
309
+ // Generate llms.txt
310
+ if (generateLLMsTxt) {
311
+ const llmsTxtPath = path.join(outDir, llmsTxtFilename);
312
+ await generateLLMFile(processedDocs, llmsTxtPath, docTitle, docDescription, false, // links only
313
+ version, rootContent);
314
+ }
315
+ // Generate llms-full.txt
316
+ if (generateLLMsFullTxt) {
317
+ const llmsFullTxtPath = path.join(outDir, llmsFullTxtFilename);
318
+ await generateLLMFile(processedDocs, llmsFullTxtPath, docTitle, docDescription, true, // full content
319
+ version, fullRootContent);
320
+ }
321
+ }
322
+ /**
323
+ * Generate custom LLM files based on configuration
324
+ * @param context - Plugin context
325
+ * @param allDocFiles - Array of all document files
326
+ */
327
+ async function generateCustomLLMFiles(context, allDocFiles) {
328
+ const { outDir, siteUrl, docTitle, docDescription, options } = context;
329
+ const { customLLMFiles = [], ignoreFiles = [], generateMarkdownFiles = false } = options;
330
+ if (customLLMFiles.length === 0) {
331
+ utils_1.logger.warn('No custom LLM files configured. Skipping.');
332
+ return;
333
+ }
334
+ utils_1.logger.info(`Generating ${customLLMFiles.length} custom LLM files...`);
335
+ for (const customFile of customLLMFiles) {
336
+ utils_1.logger.verbose(`Processing custom file: ${customFile.filename}, version: ${customFile.version || 'undefined'}`);
337
+ // Combine global ignores with custom ignores
338
+ const combinedIgnores = [...ignoreFiles];
339
+ if (customFile.ignorePatterns) {
340
+ combinedIgnores.push(...customFile.ignorePatterns);
341
+ }
342
+ // Process files according to the custom configuration
343
+ let customDocs = await (0, processor_1.processFilesWithPatterns)(context, allDocFiles, customFile.includePatterns, combinedIgnores, customFile.orderPatterns || [], customFile.includeUnmatchedLast ?? false);
344
+ if (customDocs.length > 0) {
345
+ // Generate individual markdown files if requested
346
+ if (generateMarkdownFiles) {
347
+ utils_1.logger.info(`Generating individual markdown files for custom file: ${customFile.filename}...`);
348
+ customDocs = await generateIndividualMarkdownFiles(customDocs, outDir, siteUrl, context.docsDir, context.options.keepFrontMatter || [], context.options.preserveDirectoryStructure !== false // Default to true
349
+ );
350
+ }
351
+ // Use custom title/description or fall back to defaults
352
+ const customTitle = customFile.title || docTitle;
353
+ const customDescription = customFile.description || docDescription;
354
+ // Generate the custom LLM file
355
+ const customFilePath = path.join(outDir, customFile.filename);
356
+ await generateLLMFile(customDocs, customFilePath, customTitle, customDescription, customFile.fullContent, customFile.version, customFile.rootContent);
357
+ utils_1.logger.info(`Generated custom LLM file: ${customFile.filename} with ${customDocs.length} documents`);
358
+ }
359
+ else {
360
+ utils_1.logger.warn(`No matching documents found for custom LLM file: ${customFile.filename}`);
361
+ }
362
+ }
363
+ }
364
+ /**
365
+ * Collect all document files from docs directory and optionally blog
366
+ * @param context - Plugin context
367
+ * @returns Array of file paths
368
+ */
369
+ async function collectDocFiles(context) {
370
+ const { siteDir, docsDir, options } = context;
371
+ const { ignoreFiles = [], includeBlog = false, warnOnIgnoredFiles = false } = options;
372
+ const allDocFiles = [];
373
+ // Process docs directory
374
+ const fullDocsDir = path.join(siteDir, docsDir);
375
+ try {
376
+ await fs.access(fullDocsDir);
377
+ // Collect all markdown files from docs directory
378
+ const docFiles = await (0, utils_1.readMarkdownFiles)(fullDocsDir, siteDir, ignoreFiles, docsDir, warnOnIgnoredFiles);
379
+ allDocFiles.push(...docFiles);
380
+ }
381
+ catch (err) {
382
+ utils_1.logger.warn(`Docs directory not found: ${fullDocsDir}`);
383
+ }
384
+ // Process blog if enabled
385
+ if (includeBlog) {
386
+ const blogDir = path.join(siteDir, 'blog');
387
+ try {
388
+ await fs.access(blogDir);
389
+ // Collect all markdown files from blog directory
390
+ const blogFiles = await (0, utils_1.readMarkdownFiles)(blogDir, siteDir, ignoreFiles, docsDir, warnOnIgnoredFiles);
391
+ allDocFiles.push(...blogFiles);
392
+ }
393
+ catch (err) {
394
+ utils_1.logger.warn(`Blog directory not found: ${blogDir}`);
395
+ }
396
+ }
397
+ return allDocFiles;
398
+ }
@@ -11,8 +11,9 @@ import { DocInfo, PluginContext } from './types';
11
11
  * @param includeFullContent - Whether to include full content or just links
12
12
  * @param version - Version of the file
13
13
  * @param customRootContent - Optional custom content to include at the root level
14
+ * @param batchSize - Batch size for processing documents (default: 100)
14
15
  */
15
- export declare function generateLLMFile(docs: DocInfo[], outputPath: string, fileTitle: string, fileDescription: string, includeFullContent: boolean, version?: string, customRootContent?: string): Promise<void>;
16
+ export declare function generateLLMFile(docs: DocInfo[], outputPath: string, fileTitle: string, fileDescription: string, includeFullContent: boolean, version?: string, customRootContent?: string, batchSize?: number): Promise<void>;
16
17
  /**
17
18
  * Generate individual markdown files for each document
18
19
  * @param docs - Processed document information
@@ -20,9 +21,10 @@ export declare function generateLLMFile(docs: DocInfo[], outputPath: string, fil
20
21
  * @param siteUrl - Base site URL
21
22
  * @param docsDir - The configured docs directory name (e.g., 'docs', 'documentation', etc.)
22
23
  * @param keepFrontMatter - Array of frontmatter keys to preserve in generated files
24
+ * @param preserveDirectoryStructure - Whether to preserve the full directory structure (default: true)
23
25
  * @returns Updated docs with new URLs pointing to generated markdown files
24
26
  */
25
- export declare function generateIndividualMarkdownFiles(docs: DocInfo[], outputDir: string, siteUrl: string, docsDir?: string, keepFrontMatter?: string[]): Promise<DocInfo[]>;
27
+ export declare function generateIndividualMarkdownFiles(docs: DocInfo[], outputDir: string, siteUrl: string, docsDir?: string, keepFrontMatter?: string[], preserveDirectoryStructure?: boolean): Promise<DocInfo[]>;
26
28
  /**
27
29
  * Generate standard LLM files (llms.txt and llms-full.txt)
28
30
  * @param context - Plugin context