docusaurus-plugin-llms 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +306 -17
- package/lib/generator.d.ts +32 -0
- package/lib/generator.js +212 -0
- package/lib/index.d.ts +1 -24
- package/lib/index.js +39 -288
- package/lib/processor.d.ts +28 -0
- package/lib/processor.js +211 -0
- package/lib/utils.d.ts +53 -0
- package/lib/utils.js +177 -0
- package/package.json +4 -2
- package/src/generator.ts +266 -0
- package/src/index.ts +48 -348
- package/src/processor.ts +236 -0
- package/src/types.ts +113 -0
- package/src/utils.ts +165 -0
package/lib/utils.js
ADDED
@@ -0,0 +1,177 @@
|
|
1
|
+
"use strict";
|
2
|
+
/**
|
3
|
+
* Utility functions for the docusaurus-plugin-llms plugin
|
4
|
+
*/
|
5
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
6
|
+
if (k2 === undefined) k2 = k;
|
7
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
8
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
9
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
10
|
+
}
|
11
|
+
Object.defineProperty(o, k2, desc);
|
12
|
+
}) : (function(o, m, k, k2) {
|
13
|
+
if (k2 === undefined) k2 = k;
|
14
|
+
o[k2] = m[k];
|
15
|
+
}));
|
16
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
17
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
18
|
+
}) : function(o, v) {
|
19
|
+
o["default"] = v;
|
20
|
+
});
|
21
|
+
var __importStar = (this && this.__importStar) || (function () {
|
22
|
+
var ownKeys = function(o) {
|
23
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
24
|
+
var ar = [];
|
25
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
26
|
+
return ar;
|
27
|
+
};
|
28
|
+
return ownKeys(o);
|
29
|
+
};
|
30
|
+
return function (mod) {
|
31
|
+
if (mod && mod.__esModule) return mod;
|
32
|
+
var result = {};
|
33
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
34
|
+
__setModuleDefault(result, mod);
|
35
|
+
return result;
|
36
|
+
};
|
37
|
+
})();
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
39
|
+
exports.writeFile = writeFile;
|
40
|
+
exports.readFile = readFile;
|
41
|
+
exports.shouldIgnoreFile = shouldIgnoreFile;
|
42
|
+
exports.readMarkdownFiles = readMarkdownFiles;
|
43
|
+
exports.extractTitle = extractTitle;
|
44
|
+
exports.cleanMarkdownContent = cleanMarkdownContent;
|
45
|
+
exports.applyPathTransformations = applyPathTransformations;
|
46
|
+
const fs = __importStar(require("fs/promises"));
|
47
|
+
const path = __importStar(require("path"));
|
48
|
+
const minimatch_1 = require("minimatch");
|
49
|
+
/**
|
50
|
+
* Write content to a file
|
51
|
+
* @param filePath - Path to write the file to
|
52
|
+
* @param data - Content to write
|
53
|
+
*/
|
54
|
+
async function writeFile(filePath, data) {
|
55
|
+
return fs.writeFile(filePath, data, 'utf8');
|
56
|
+
}
|
57
|
+
/**
|
58
|
+
* Read content from a file
|
59
|
+
* @param filePath - Path of the file to read
|
60
|
+
* @returns Content of the file
|
61
|
+
*/
|
62
|
+
async function readFile(filePath) {
|
63
|
+
return fs.readFile(filePath, 'utf8');
|
64
|
+
}
|
65
|
+
/**
|
66
|
+
* Check if a file should be ignored based on glob patterns
|
67
|
+
* @param filePath - Path to the file
|
68
|
+
* @param baseDir - Base directory for relative paths
|
69
|
+
* @param ignorePatterns - Glob patterns for files to ignore
|
70
|
+
* @returns Whether the file should be ignored
|
71
|
+
*/
|
72
|
+
function shouldIgnoreFile(filePath, baseDir, ignorePatterns) {
|
73
|
+
if (ignorePatterns.length === 0) {
|
74
|
+
return false;
|
75
|
+
}
|
76
|
+
const relativePath = path.relative(baseDir, filePath);
|
77
|
+
return ignorePatterns.some(pattern => (0, minimatch_1.minimatch)(relativePath, pattern, { matchBase: true }));
|
78
|
+
}
|
79
|
+
/**
|
80
|
+
* Recursively reads all Markdown files in a directory
|
81
|
+
* @param dir - Directory to scan
|
82
|
+
* @param baseDir - Base directory for relative paths
|
83
|
+
* @param ignorePatterns - Glob patterns for files to ignore
|
84
|
+
* @returns Array of file paths
|
85
|
+
*/
|
86
|
+
async function readMarkdownFiles(dir, baseDir, ignorePatterns = []) {
|
87
|
+
const files = [];
|
88
|
+
const entries = await fs.readdir(dir, { withFileTypes: true });
|
89
|
+
for (const entry of entries) {
|
90
|
+
const fullPath = path.join(dir, entry.name);
|
91
|
+
if (shouldIgnoreFile(fullPath, baseDir, ignorePatterns)) {
|
92
|
+
continue;
|
93
|
+
}
|
94
|
+
if (entry.isDirectory()) {
|
95
|
+
const subDirFiles = await readMarkdownFiles(fullPath, baseDir, ignorePatterns);
|
96
|
+
files.push(...subDirFiles);
|
97
|
+
}
|
98
|
+
else if (entry.name.endsWith('.md') || entry.name.endsWith('.mdx')) {
|
99
|
+
files.push(fullPath);
|
100
|
+
}
|
101
|
+
}
|
102
|
+
return files;
|
103
|
+
}
|
104
|
+
/**
|
105
|
+
* Extract title from content or use the filename
|
106
|
+
* @param data - Frontmatter data
|
107
|
+
* @param content - Markdown content
|
108
|
+
* @param filePath - Path to the file
|
109
|
+
* @returns Extracted title
|
110
|
+
*/
|
111
|
+
function extractTitle(data, content, filePath) {
|
112
|
+
// First try frontmatter
|
113
|
+
if (data.title) {
|
114
|
+
return data.title;
|
115
|
+
}
|
116
|
+
// Then try first heading
|
117
|
+
const headingMatch = content.match(/^#\s+(.*)/m);
|
118
|
+
if (headingMatch) {
|
119
|
+
return headingMatch[1].trim();
|
120
|
+
}
|
121
|
+
// Finally use filename
|
122
|
+
return path.basename(filePath, path.extname(filePath))
|
123
|
+
.replace(/-/g, ' ')
|
124
|
+
.replace(/\b\w/g, c => c.toUpperCase());
|
125
|
+
}
|
126
|
+
/**
|
127
|
+
* Clean markdown content for LLM consumption
|
128
|
+
* @param content - Raw markdown content
|
129
|
+
* @returns Cleaned content
|
130
|
+
*/
|
131
|
+
function cleanMarkdownContent(content) {
|
132
|
+
// Remove HTML tags
|
133
|
+
let cleaned = content.replace(/<[^>]*>/g, '');
|
134
|
+
// Normalize whitespace
|
135
|
+
cleaned = cleaned.replace(/\r\n/g, '\n')
|
136
|
+
.replace(/\n{3,}/g, '\n\n')
|
137
|
+
.trim();
|
138
|
+
return cleaned;
|
139
|
+
}
|
140
|
+
/**
|
141
|
+
* Apply path transformations according to configuration
|
142
|
+
* @param urlPath - Original URL path
|
143
|
+
* @param pathTransformation - Path transformation configuration
|
144
|
+
* @returns Transformed URL path
|
145
|
+
*/
|
146
|
+
function applyPathTransformations(urlPath, pathTransformation) {
|
147
|
+
if (!pathTransformation) {
|
148
|
+
return urlPath;
|
149
|
+
}
|
150
|
+
let transformedPath = urlPath;
|
151
|
+
// Remove ignored path segments
|
152
|
+
if (pathTransformation.ignorePaths?.length) {
|
153
|
+
for (const ignorePath of pathTransformation.ignorePaths) {
|
154
|
+
// Create a regex that matches the ignore path at the beginning, middle, or end of the path
|
155
|
+
// We use word boundaries to ensure we match complete path segments
|
156
|
+
const ignoreRegex = new RegExp(`(^|/)(${ignorePath})(/|$)`, 'g');
|
157
|
+
transformedPath = transformedPath.replace(ignoreRegex, '$1$3');
|
158
|
+
}
|
159
|
+
// Clean up any double slashes that might have been created
|
160
|
+
transformedPath = transformedPath.replace(/\/+/g, '/');
|
161
|
+
// Remove leading slash if present
|
162
|
+
transformedPath = transformedPath.replace(/^\//, '');
|
163
|
+
}
|
164
|
+
// Add path segments if they're not already present
|
165
|
+
if (pathTransformation.addPaths?.length) {
|
166
|
+
// Process in reverse order to maintain the specified order in the final path
|
167
|
+
// This is because each path is prepended to the front
|
168
|
+
const pathsToAdd = [...pathTransformation.addPaths].reverse();
|
169
|
+
for (const addPath of pathsToAdd) {
|
170
|
+
// Only add if not already present at the beginning
|
171
|
+
if (!transformedPath.startsWith(addPath + '/') && transformedPath !== addPath) {
|
172
|
+
transformedPath = `${addPath}/${transformedPath}`;
|
173
|
+
}
|
174
|
+
}
|
175
|
+
}
|
176
|
+
return transformedPath;
|
177
|
+
}
|
package/package.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
{
|
2
2
|
"name": "docusaurus-plugin-llms",
|
3
|
-
"version": "0.1.
|
3
|
+
"version": "0.1.3",
|
4
4
|
"description": "Docusaurus plugin for generating LLM-friendly documentation following the llmtxt.org standard",
|
5
5
|
"main": "lib/index.js",
|
6
6
|
"scripts": {
|
@@ -8,7 +8,9 @@
|
|
8
8
|
"watch": "tsc --watch",
|
9
9
|
"cleanup": "node cleanup.js",
|
10
10
|
"prepublishOnly": "npm run build && npm run cleanup",
|
11
|
-
"test": "
|
11
|
+
"test:unit": "node tests/test-path-transforms.js",
|
12
|
+
"test:integration": "node tests/test-path-transformation.js",
|
13
|
+
"test": "npm run build && npm run test:unit && npm run test:integration"
|
12
14
|
},
|
13
15
|
"files": [
|
14
16
|
"lib",
|
package/src/generator.ts
ADDED
@@ -0,0 +1,266 @@
|
|
1
|
+
/**
|
2
|
+
* LLM file generation functions for the docusaurus-plugin-llms plugin
|
3
|
+
*/
|
4
|
+
|
5
|
+
import * as path from 'path';
|
6
|
+
import * as fs from 'fs/promises';
|
7
|
+
import { DocInfo, PluginContext, CustomLLMFile } from './types';
|
8
|
+
import { writeFile, readMarkdownFiles } from './utils';
|
9
|
+
import { processFilesWithPatterns } from './processor';
|
10
|
+
|
11
|
+
/**
|
12
|
+
* Clean a description for use in a TOC item
|
13
|
+
* @param description - The original description
|
14
|
+
* @returns Cleaned description suitable for TOC
|
15
|
+
*/
|
16
|
+
function cleanDescriptionForToc(description: string): string {
|
17
|
+
if (!description) return '';
|
18
|
+
|
19
|
+
// Get just the first line for TOC display
|
20
|
+
const firstLine = description.split('\n')[0];
|
21
|
+
|
22
|
+
// Remove heading markers only at the beginning of the line
|
23
|
+
// Be careful to only remove actual heading markers (# followed by space at beginning)
|
24
|
+
// and not hashtag symbols that are part of the content (inline hashtags)
|
25
|
+
const cleaned = firstLine.replace(/^(#+)\s+/g, '');
|
26
|
+
|
27
|
+
// Truncate if too long (150 characters max with ellipsis)
|
28
|
+
return cleaned.length > 150 ? cleaned.substring(0, 147) + '...' : cleaned;
|
29
|
+
}
|
30
|
+
|
31
|
+
/**
|
32
|
+
* Generate an LLM-friendly file
|
33
|
+
* @param docs - Processed document information
|
34
|
+
* @param outputPath - Path to write the output file
|
35
|
+
* @param fileTitle - Title for the file
|
36
|
+
* @param fileDescription - Description for the file
|
37
|
+
* @param includeFullContent - Whether to include full content or just links
|
38
|
+
* @param version - Version of the file
|
39
|
+
*/
|
40
|
+
export async function generateLLMFile(
|
41
|
+
docs: DocInfo[],
|
42
|
+
outputPath: string,
|
43
|
+
fileTitle: string,
|
44
|
+
fileDescription: string,
|
45
|
+
includeFullContent: boolean,
|
46
|
+
version?: string
|
47
|
+
): Promise<void> {
|
48
|
+
console.log(`Generating file: ${outputPath}, version: ${version || 'undefined'}`);
|
49
|
+
const versionInfo = version ? `\n\nVersion: ${version}` : '';
|
50
|
+
|
51
|
+
if (includeFullContent) {
|
52
|
+
// Generate full content file
|
53
|
+
const fullContentSections = docs.map(doc => {
|
54
|
+
return `## ${doc.title}
|
55
|
+
|
56
|
+
${doc.content}`;
|
57
|
+
});
|
58
|
+
|
59
|
+
const llmFileContent = `# ${fileTitle}
|
60
|
+
|
61
|
+
> ${fileDescription}${versionInfo}
|
62
|
+
|
63
|
+
This file contains all documentation content in a single document following the llmtxt.org standard.
|
64
|
+
|
65
|
+
${fullContentSections.join('\n\n---\n\n')}
|
66
|
+
`;
|
67
|
+
|
68
|
+
await writeFile(outputPath, llmFileContent);
|
69
|
+
} else {
|
70
|
+
// Generate links-only file
|
71
|
+
const tocItems = docs.map(doc => {
|
72
|
+
// Clean and format the description for TOC
|
73
|
+
const cleanedDescription = cleanDescriptionForToc(doc.description);
|
74
|
+
|
75
|
+
return `- [${doc.title}](${doc.url})${cleanedDescription ? `: ${cleanedDescription}` : ''}`;
|
76
|
+
});
|
77
|
+
|
78
|
+
const llmFileContent = `# ${fileTitle}
|
79
|
+
|
80
|
+
> ${fileDescription}${versionInfo}
|
81
|
+
|
82
|
+
This file contains links to documentation sections following the llmtxt.org standard.
|
83
|
+
|
84
|
+
## Table of Contents
|
85
|
+
|
86
|
+
${tocItems.join('\n')}
|
87
|
+
`;
|
88
|
+
|
89
|
+
await writeFile(outputPath, llmFileContent);
|
90
|
+
}
|
91
|
+
|
92
|
+
console.log(`Generated: ${outputPath}`);
|
93
|
+
}
|
94
|
+
|
95
|
+
/**
|
96
|
+
* Generate standard LLM files (llms.txt and llms-full.txt)
|
97
|
+
* @param context - Plugin context
|
98
|
+
* @param allDocFiles - Array of all document files
|
99
|
+
*/
|
100
|
+
export async function generateStandardLLMFiles(
|
101
|
+
context: PluginContext,
|
102
|
+
allDocFiles: string[]
|
103
|
+
): Promise<void> {
|
104
|
+
const {
|
105
|
+
outDir,
|
106
|
+
docTitle,
|
107
|
+
docDescription,
|
108
|
+
options
|
109
|
+
} = context;
|
110
|
+
|
111
|
+
const {
|
112
|
+
generateLLMsTxt,
|
113
|
+
generateLLMsFullTxt,
|
114
|
+
llmsTxtFilename = 'llms.txt',
|
115
|
+
llmsFullTxtFilename = 'llms-full.txt',
|
116
|
+
includeOrder = [],
|
117
|
+
includeUnmatchedLast = true,
|
118
|
+
version
|
119
|
+
} = options;
|
120
|
+
|
121
|
+
if (!generateLLMsTxt && !generateLLMsFullTxt) {
|
122
|
+
return;
|
123
|
+
}
|
124
|
+
|
125
|
+
// Process files for the standard outputs
|
126
|
+
const processedDocs = await processFilesWithPatterns(
|
127
|
+
context,
|
128
|
+
allDocFiles,
|
129
|
+
[], // No specific include patterns - include all
|
130
|
+
[], // No additional ignore patterns beyond global ignoreFiles
|
131
|
+
includeOrder,
|
132
|
+
includeUnmatchedLast
|
133
|
+
);
|
134
|
+
|
135
|
+
console.log(`Processed ${processedDocs.length} documentation files for standard LLM files`);
|
136
|
+
|
137
|
+
// Generate llms.txt
|
138
|
+
if (generateLLMsTxt) {
|
139
|
+
const llmsTxtPath = path.join(outDir, llmsTxtFilename);
|
140
|
+
await generateLLMFile(
|
141
|
+
processedDocs,
|
142
|
+
llmsTxtPath,
|
143
|
+
docTitle,
|
144
|
+
docDescription,
|
145
|
+
false, // links only
|
146
|
+
version
|
147
|
+
);
|
148
|
+
}
|
149
|
+
|
150
|
+
// Generate llms-full.txt
|
151
|
+
if (generateLLMsFullTxt) {
|
152
|
+
const llmsFullTxtPath = path.join(outDir, llmsFullTxtFilename);
|
153
|
+
await generateLLMFile(
|
154
|
+
processedDocs,
|
155
|
+
llmsFullTxtPath,
|
156
|
+
docTitle,
|
157
|
+
docDescription,
|
158
|
+
true, // full content
|
159
|
+
version
|
160
|
+
);
|
161
|
+
}
|
162
|
+
}
|
163
|
+
|
164
|
+
/**
|
165
|
+
* Generate custom LLM files based on configuration
|
166
|
+
* @param context - Plugin context
|
167
|
+
* @param allDocFiles - Array of all document files
|
168
|
+
*/
|
169
|
+
export async function generateCustomLLMFiles(
|
170
|
+
context: PluginContext,
|
171
|
+
allDocFiles: string[]
|
172
|
+
): Promise<void> {
|
173
|
+
const { outDir, docTitle, docDescription, options } = context;
|
174
|
+
const { customLLMFiles = [], ignoreFiles = [] } = options;
|
175
|
+
|
176
|
+
if (customLLMFiles.length === 0) {
|
177
|
+
return;
|
178
|
+
}
|
179
|
+
|
180
|
+
console.log(`Generating ${customLLMFiles.length} custom LLM files...`);
|
181
|
+
|
182
|
+
for (const customFile of customLLMFiles) {
|
183
|
+
console.log(`Processing custom file: ${customFile.filename}, version: ${customFile.version || 'undefined'}`);
|
184
|
+
|
185
|
+
// Combine global ignores with custom ignores
|
186
|
+
const combinedIgnores = [...ignoreFiles];
|
187
|
+
if (customFile.ignorePatterns) {
|
188
|
+
combinedIgnores.push(...customFile.ignorePatterns);
|
189
|
+
}
|
190
|
+
|
191
|
+
// Process files according to the custom configuration
|
192
|
+
const customDocs = await processFilesWithPatterns(
|
193
|
+
context,
|
194
|
+
allDocFiles,
|
195
|
+
customFile.includePatterns,
|
196
|
+
combinedIgnores,
|
197
|
+
customFile.orderPatterns || [],
|
198
|
+
customFile.includeUnmatchedLast ?? false
|
199
|
+
);
|
200
|
+
|
201
|
+
if (customDocs.length > 0) {
|
202
|
+
// Use custom title/description or fall back to defaults
|
203
|
+
const customTitle = customFile.title || docTitle;
|
204
|
+
const customDescription = customFile.description || docDescription;
|
205
|
+
|
206
|
+
// Generate the custom LLM file
|
207
|
+
const customFilePath = path.join(outDir, customFile.filename);
|
208
|
+
await generateLLMFile(
|
209
|
+
customDocs,
|
210
|
+
customFilePath,
|
211
|
+
customTitle,
|
212
|
+
customDescription,
|
213
|
+
customFile.fullContent,
|
214
|
+
customFile.version
|
215
|
+
);
|
216
|
+
|
217
|
+
console.log(`Generated custom LLM file: ${customFile.filename} with ${customDocs.length} documents`);
|
218
|
+
} else {
|
219
|
+
console.warn(`No matching documents found for custom LLM file: ${customFile.filename}`);
|
220
|
+
}
|
221
|
+
}
|
222
|
+
}
|
223
|
+
|
224
|
+
/**
|
225
|
+
* Collect all document files from docs directory and optionally blog
|
226
|
+
* @param context - Plugin context
|
227
|
+
* @returns Array of file paths
|
228
|
+
*/
|
229
|
+
export async function collectDocFiles(context: PluginContext): Promise<string[]> {
|
230
|
+
const { siteDir, docsDir, options } = context;
|
231
|
+
const { ignoreFiles = [], includeBlog = false } = options;
|
232
|
+
|
233
|
+
const allDocFiles: string[] = [];
|
234
|
+
|
235
|
+
// Process docs directory
|
236
|
+
const fullDocsDir = path.join(siteDir, docsDir);
|
237
|
+
|
238
|
+
try {
|
239
|
+
await fs.access(fullDocsDir);
|
240
|
+
|
241
|
+
// Collect all markdown files from docs directory
|
242
|
+
const docFiles = await readMarkdownFiles(fullDocsDir, siteDir, ignoreFiles);
|
243
|
+
allDocFiles.push(...docFiles);
|
244
|
+
|
245
|
+
} catch (err) {
|
246
|
+
console.warn(`Docs directory not found: ${fullDocsDir}`);
|
247
|
+
}
|
248
|
+
|
249
|
+
// Process blog if enabled
|
250
|
+
if (includeBlog) {
|
251
|
+
const blogDir = path.join(siteDir, 'blog');
|
252
|
+
|
253
|
+
try {
|
254
|
+
await fs.access(blogDir);
|
255
|
+
|
256
|
+
// Collect all markdown files from blog directory
|
257
|
+
const blogFiles = await readMarkdownFiles(blogDir, siteDir, ignoreFiles);
|
258
|
+
allDocFiles.push(...blogFiles);
|
259
|
+
|
260
|
+
} catch (err) {
|
261
|
+
console.warn(`Blog directory not found: ${blogDir}`);
|
262
|
+
}
|
263
|
+
}
|
264
|
+
|
265
|
+
return allDocFiles;
|
266
|
+
}
|