docusaurus-plugin-llms 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +129 -0
- package/lib/index.d.ts +43 -0
- package/lib/index.js +330 -0
- package/package.json +52 -0
- package/src/declarations.d.ts +0 -0
- package/src/index.ts +415 -0
package/README.md
ADDED
@@ -0,0 +1,129 @@
|
|
1
|
+
# ๐ docusaurus-plugin-llms
|
2
|
+
|
3
|
+
A Docusaurus plugin for generating LLM-friendly documentation following the [llmtxt standard](https://llmtxt.org/).
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
There are two ways to use this plugin:
|
8
|
+
|
9
|
+
### 1. Direct Integration (Simplest Method)
|
10
|
+
|
11
|
+
For quick integration, create a plugin file directly in your Docusaurus project:
|
12
|
+
|
13
|
+
```bash
|
14
|
+
mkdir -p src/plugins/llms
|
15
|
+
```
|
16
|
+
|
17
|
+
Then create a file at `src/plugins/llms/index.js` with the plugin code. Finally, add it to your `docusaurus.config.js`:
|
18
|
+
|
19
|
+
```js
|
20
|
+
module.exports = {
|
21
|
+
// ... your existing Docusaurus config
|
22
|
+
plugins: [
|
23
|
+
require('./src/plugins/llms'),
|
24
|
+
// ... your other plugins
|
25
|
+
],
|
26
|
+
};
|
27
|
+
```
|
28
|
+
|
29
|
+
### 2. As a Package (Not Yet Published)
|
30
|
+
|
31
|
+
```bash
|
32
|
+
npm install docusaurus-plugin-llms --save-dev
|
33
|
+
```
|
34
|
+
|
35
|
+
Then add to your Docusaurus configuration:
|
36
|
+
|
37
|
+
```js
|
38
|
+
module.exports = {
|
39
|
+
// ... your existing Docusaurus config
|
40
|
+
plugins: [
|
41
|
+
'docusaurus-plugin-llms',
|
42
|
+
// ... your other plugins
|
43
|
+
],
|
44
|
+
};
|
45
|
+
```
|
46
|
+
|
47
|
+
## Configuration Options
|
48
|
+
|
49
|
+
You can configure the plugin by passing options:
|
50
|
+
|
51
|
+
```js
|
52
|
+
module.exports = {
|
53
|
+
// ... your existing Docusaurus config
|
54
|
+
plugins: [
|
55
|
+
[
|
56
|
+
'docusaurus-plugin-llms',
|
57
|
+
{
|
58
|
+
// Options here
|
59
|
+
generateLLMsTxt: true,
|
60
|
+
generateLLMsFullTxt: true,
|
61
|
+
docsDir: 'docs',
|
62
|
+
ignoreFiles: ['advanced/*', 'private/*'],
|
63
|
+
title: 'My Project Documentation',
|
64
|
+
description: 'Complete reference documentation for My Project',
|
65
|
+
includeBlog: true,
|
66
|
+
},
|
67
|
+
],
|
68
|
+
// ... your other plugins
|
69
|
+
],
|
70
|
+
};
|
71
|
+
```
|
72
|
+
|
73
|
+
### Available Options
|
74
|
+
|
75
|
+
| Option | Type | Default | Description |
|
76
|
+
|--------|------|---------|-------------|
|
77
|
+
| `generateLLMsTxt` | boolean | `true` | Whether to generate the links file |
|
78
|
+
| `generateLLMsFullTxt` | boolean | `true` | Whether to generate the full content file |
|
79
|
+
| `docsDir` | string | `'docs'` | Base directory for documentation files |
|
80
|
+
| `ignoreFiles` | string[] | `[]` | Array of glob patterns for files to ignore |
|
81
|
+
| `title` | string | Site title | Custom title to use in generated files |
|
82
|
+
| `description` | string | Site tagline | Custom description to use in generated files |
|
83
|
+
| `llmsTxtFilename` | string | `'llms.txt'` | Custom filename for the links file |
|
84
|
+
| `llmsFullTxtFilename` | string | `'llms-full.txt'` | Custom filename for the full content file |
|
85
|
+
| `includeBlog` | boolean | `false` | Whether to include blog content |
|
86
|
+
|
87
|
+
## How It Works
|
88
|
+
|
89
|
+
This plugin automatically generates the following files during the build process:
|
90
|
+
|
91
|
+
- **llms.txt**: Contains links to all sections of your documentation
|
92
|
+
- **llms-full.txt**: Contains all documentation content in a single file
|
93
|
+
|
94
|
+
These files follow the [llmtxt standard](https://llmtxt.org/), making your documentation optimized for use with Large Language Models (LLMs).
|
95
|
+
|
96
|
+
## Features
|
97
|
+
|
98
|
+
- โก๏ธ Easy integration with Docusaurus
|
99
|
+
- โ
Zero config required, works out of the box
|
100
|
+
- โ๏ธ Highly customizable with multiple options
|
101
|
+
- ๐ Creates `llms.txt` with section links
|
102
|
+
- ๐ Produces `llms-full.txt` with all content in one file
|
103
|
+
- ๐งน Cleans HTML and normalizes content for optimal LLM consumption
|
104
|
+
- ๐ Provides statistics about generated documentation
|
105
|
+
- ๐ Option to include blog posts
|
106
|
+
|
107
|
+
## Implementation Details
|
108
|
+
|
109
|
+
The plugin:
|
110
|
+
|
111
|
+
1. Scans your `docs` directory recursively for all Markdown files
|
112
|
+
2. Optionally includes blog content
|
113
|
+
3. Extracts metadata, titles, and content from each file
|
114
|
+
4. Creates proper URL links to each document section
|
115
|
+
5. Generates a table of contents in `llms.txt`
|
116
|
+
6. Combines all documentation content in `llms-full.txt`
|
117
|
+
7. Provides statistics about the generated documentation
|
118
|
+
|
119
|
+
## Future Enhancements
|
120
|
+
|
121
|
+
Planned features for future versions:
|
122
|
+
|
123
|
+
- Advanced glob pattern matching for file filtering
|
124
|
+
- Support for i18n content
|
125
|
+
- Specific content tags for LLM-only sections
|
126
|
+
|
127
|
+
## License
|
128
|
+
|
129
|
+
MIT
|
package/lib/index.d.ts
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
/**
|
2
|
+
* @fileoverview Docusaurus plugin that generates LLM-friendly documentation following the llmtxt.org standard.
|
3
|
+
*
|
4
|
+
* This plugin creates two files:
|
5
|
+
* - llms.txt: Contains links to all sections of documentation
|
6
|
+
* - llms-full.txt: Contains all documentation content in a single file
|
7
|
+
*
|
8
|
+
* The plugin runs during the Docusaurus build process and scans all Markdown files in the docs directory.
|
9
|
+
*/
|
10
|
+
import type { LoadContext, Plugin } from '@docusaurus/types';
|
11
|
+
/**
|
12
|
+
* Plugin options interface
|
13
|
+
*/
|
14
|
+
interface PluginOptions {
|
15
|
+
/** Whether to generate the llms.txt file (default: true) */
|
16
|
+
generateLLMsTxt?: boolean;
|
17
|
+
/** Whether to generate the llms-full.txt file (default: true) */
|
18
|
+
generateLLMsFullTxt?: boolean;
|
19
|
+
/** Base directory for documentation files (default: 'docs') */
|
20
|
+
docsDir?: string;
|
21
|
+
/** Array of glob patterns for files to ignore */
|
22
|
+
ignoreFiles?: string[];
|
23
|
+
/** Custom title to use in generated files (defaults to site title) */
|
24
|
+
title?: string;
|
25
|
+
/** Custom description to use in generated files (defaults to site tagline) */
|
26
|
+
description?: string;
|
27
|
+
/** Custom file name for the links file (default: 'llms.txt') */
|
28
|
+
llmsTxtFilename?: string;
|
29
|
+
/** Custom file name for the full content file (default: 'llms-full.txt') */
|
30
|
+
llmsFullTxtFilename?: string;
|
31
|
+
/** Whether to include blog content (default: false) */
|
32
|
+
includeBlog?: boolean;
|
33
|
+
}
|
34
|
+
/**
|
35
|
+
* A Docusaurus plugin to generate LLM-friendly documentation following
|
36
|
+
* the llmtxt.org standard
|
37
|
+
*
|
38
|
+
* @param context - Docusaurus context
|
39
|
+
* @param options - Plugin options
|
40
|
+
* @returns Plugin object
|
41
|
+
*/
|
42
|
+
export default function docusaurusPluginLLMs(context: LoadContext, options?: PluginOptions): Plugin<void>;
|
43
|
+
export {};
|
package/lib/index.js
ADDED
@@ -0,0 +1,330 @@
|
|
1
|
+
"use strict";
|
2
|
+
/**
|
3
|
+
* @fileoverview Docusaurus plugin that generates LLM-friendly documentation following the llmtxt.org standard.
|
4
|
+
*
|
5
|
+
* This plugin creates two files:
|
6
|
+
* - llms.txt: Contains links to all sections of documentation
|
7
|
+
* - llms-full.txt: Contains all documentation content in a single file
|
8
|
+
*
|
9
|
+
* The plugin runs during the Docusaurus build process and scans all Markdown files in the docs directory.
|
10
|
+
*/
|
11
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
12
|
+
if (k2 === undefined) k2 = k;
|
13
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
14
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
15
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
16
|
+
}
|
17
|
+
Object.defineProperty(o, k2, desc);
|
18
|
+
}) : (function(o, m, k, k2) {
|
19
|
+
if (k2 === undefined) k2 = k;
|
20
|
+
o[k2] = m[k];
|
21
|
+
}));
|
22
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
23
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
24
|
+
}) : function(o, v) {
|
25
|
+
o["default"] = v;
|
26
|
+
});
|
27
|
+
var __importStar = (this && this.__importStar) || (function () {
|
28
|
+
var ownKeys = function(o) {
|
29
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
30
|
+
var ar = [];
|
31
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
32
|
+
return ar;
|
33
|
+
};
|
34
|
+
return ownKeys(o);
|
35
|
+
};
|
36
|
+
return function (mod) {
|
37
|
+
if (mod && mod.__esModule) return mod;
|
38
|
+
var result = {};
|
39
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
40
|
+
__setModuleDefault(result, mod);
|
41
|
+
return result;
|
42
|
+
};
|
43
|
+
})();
|
44
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
45
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
46
|
+
};
|
47
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
48
|
+
exports.default = docusaurusPluginLLMs;
|
49
|
+
const fs = __importStar(require("fs/promises"));
|
50
|
+
const path = __importStar(require("path"));
|
51
|
+
const gray_matter_1 = __importDefault(require("gray-matter"));
|
52
|
+
const minimatch_1 = require("minimatch");
|
53
|
+
/**
|
54
|
+
* Write content to a file
|
55
|
+
* @param filePath - Path to write the file to
|
56
|
+
* @param data - Content to write
|
57
|
+
*/
|
58
|
+
async function writeFile(filePath, data) {
|
59
|
+
return fs.writeFile(filePath, data, 'utf8');
|
60
|
+
}
|
61
|
+
/**
|
62
|
+
* Read content from a file
|
63
|
+
* @param filePath - Path of the file to read
|
64
|
+
* @returns Content of the file
|
65
|
+
*/
|
66
|
+
async function readFile(filePath) {
|
67
|
+
return fs.readFile(filePath, 'utf8');
|
68
|
+
}
|
69
|
+
/**
|
70
|
+
* Check if a file should be ignored based on glob patterns
|
71
|
+
* @param filePath - Path to the file
|
72
|
+
* @param baseDir - Base directory for relative paths
|
73
|
+
* @param ignorePatterns - Glob patterns for files to ignore
|
74
|
+
* @returns Whether the file should be ignored
|
75
|
+
*/
|
76
|
+
function shouldIgnoreFile(filePath, baseDir, ignorePatterns) {
|
77
|
+
if (ignorePatterns.length === 0) {
|
78
|
+
return false;
|
79
|
+
}
|
80
|
+
const relativePath = path.relative(baseDir, filePath);
|
81
|
+
return ignorePatterns.some(pattern => (0, minimatch_1.minimatch)(relativePath, pattern, { matchBase: true }));
|
82
|
+
}
|
83
|
+
/**
|
84
|
+
* Recursively reads all Markdown files in a directory
|
85
|
+
* @param dir - Directory to scan
|
86
|
+
* @param baseDir - Base directory for relative paths
|
87
|
+
* @param ignorePatterns - Glob patterns for files to ignore
|
88
|
+
* @returns Array of file paths
|
89
|
+
*/
|
90
|
+
async function readMarkdownFiles(dir, baseDir, ignorePatterns = []) {
|
91
|
+
const files = [];
|
92
|
+
const entries = await fs.readdir(dir, { withFileTypes: true });
|
93
|
+
for (const entry of entries) {
|
94
|
+
const fullPath = path.join(dir, entry.name);
|
95
|
+
if (shouldIgnoreFile(fullPath, baseDir, ignorePatterns)) {
|
96
|
+
continue;
|
97
|
+
}
|
98
|
+
if (entry.isDirectory()) {
|
99
|
+
const subDirFiles = await readMarkdownFiles(fullPath, baseDir, ignorePatterns);
|
100
|
+
files.push(...subDirFiles);
|
101
|
+
}
|
102
|
+
else if (entry.name.endsWith('.md') || entry.name.endsWith('.mdx')) {
|
103
|
+
files.push(fullPath);
|
104
|
+
}
|
105
|
+
}
|
106
|
+
return files;
|
107
|
+
}
|
108
|
+
/**
|
109
|
+
* Extract title from content or use the filename
|
110
|
+
* @param data - Frontmatter data
|
111
|
+
* @param content - Markdown content
|
112
|
+
* @param filePath - Path to the file
|
113
|
+
* @returns Extracted title
|
114
|
+
*/
|
115
|
+
function extractTitle(data, content, filePath) {
|
116
|
+
// First try frontmatter
|
117
|
+
if (data.title) {
|
118
|
+
return data.title;
|
119
|
+
}
|
120
|
+
// Then try first heading
|
121
|
+
const headingMatch = content.match(/^#\s+(.*)/m);
|
122
|
+
if (headingMatch) {
|
123
|
+
return headingMatch[1].trim();
|
124
|
+
}
|
125
|
+
// Finally use filename
|
126
|
+
return path.basename(filePath, path.extname(filePath))
|
127
|
+
.replace(/-/g, ' ')
|
128
|
+
.replace(/\b\w/g, c => c.toUpperCase());
|
129
|
+
}
|
130
|
+
/**
|
131
|
+
* Clean markdown content for LLM consumption
|
132
|
+
* @param content - Raw markdown content
|
133
|
+
* @returns Cleaned content
|
134
|
+
*/
|
135
|
+
function cleanMarkdownContent(content) {
|
136
|
+
// Remove HTML tags
|
137
|
+
let cleaned = content.replace(/<[^>]*>/g, '');
|
138
|
+
// Normalize whitespace
|
139
|
+
cleaned = cleaned.replace(/\r\n/g, '\n')
|
140
|
+
.replace(/\n{3,}/g, '\n\n')
|
141
|
+
.trim();
|
142
|
+
return cleaned;
|
143
|
+
}
|
144
|
+
/**
|
145
|
+
* Process a markdown file and extract its metadata and content
|
146
|
+
* @param filePath - Path to the markdown file
|
147
|
+
* @param baseDir - Base directory
|
148
|
+
* @param siteUrl - Base URL of the site
|
149
|
+
* @param pathPrefix - Path prefix for URLs (e.g., 'docs' or 'blog')
|
150
|
+
* @returns Processed file data
|
151
|
+
*/
|
152
|
+
async function processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix = 'docs') {
|
153
|
+
const content = await readFile(filePath);
|
154
|
+
const { data, content: markdownContent } = (0, gray_matter_1.default)(content);
|
155
|
+
const relativePath = path.relative(baseDir, filePath);
|
156
|
+
// Convert to URL path format (replace backslashes with forward slashes on Windows)
|
157
|
+
const normalizedPath = relativePath.replace(/\\/g, '/');
|
158
|
+
// Convert .md extension to appropriate path
|
159
|
+
const linkPathBase = normalizedPath.replace(/\.mdx?$/, '');
|
160
|
+
// Handle index files specially
|
161
|
+
const linkPath = linkPathBase.endsWith('index')
|
162
|
+
? linkPathBase.replace(/\/index$/, '')
|
163
|
+
: linkPathBase;
|
164
|
+
// Generate full URL
|
165
|
+
const fullUrl = new URL(`${pathPrefix}/${linkPath}`, siteUrl).toString();
|
166
|
+
// Extract title
|
167
|
+
const title = extractTitle(data, markdownContent, filePath);
|
168
|
+
// Get description from frontmatter or first paragraph
|
169
|
+
let description = data.description || '';
|
170
|
+
if (!description) {
|
171
|
+
const paragraphs = markdownContent.split('\n\n');
|
172
|
+
for (const para of paragraphs) {
|
173
|
+
if (para.trim() && !para.startsWith('#')) {
|
174
|
+
description = para.trim();
|
175
|
+
break;
|
176
|
+
}
|
177
|
+
}
|
178
|
+
}
|
179
|
+
// Clean and process content
|
180
|
+
const cleanedContent = cleanMarkdownContent(markdownContent);
|
181
|
+
return {
|
182
|
+
title,
|
183
|
+
path: normalizedPath,
|
184
|
+
url: fullUrl,
|
185
|
+
content: cleanedContent,
|
186
|
+
description: description || '',
|
187
|
+
};
|
188
|
+
}
|
189
|
+
/**
|
190
|
+
* A Docusaurus plugin to generate LLM-friendly documentation following
|
191
|
+
* the llmtxt.org standard
|
192
|
+
*
|
193
|
+
* @param context - Docusaurus context
|
194
|
+
* @param options - Plugin options
|
195
|
+
* @returns Plugin object
|
196
|
+
*/
|
197
|
+
function docusaurusPluginLLMs(context, options = {}) {
|
198
|
+
// Set default options
|
199
|
+
const { generateLLMsTxt = true, generateLLMsFullTxt = true, docsDir = 'docs', ignoreFiles = [], title, description, llmsTxtFilename = 'llms.txt', llmsFullTxtFilename = 'llms-full.txt', includeBlog = false, } = options;
|
200
|
+
const { siteDir, siteConfig, outDir, } = context;
|
201
|
+
return {
|
202
|
+
name: 'docusaurus-plugin-llms',
|
203
|
+
/**
|
204
|
+
* Generates LLM-friendly documentation files after the build is complete
|
205
|
+
*/
|
206
|
+
async postBuild() {
|
207
|
+
console.log('Generating LLM-friendly documentation...');
|
208
|
+
// Custom title and description or fallback to site values
|
209
|
+
const docTitle = title || siteConfig.title;
|
210
|
+
const docDescription = description || siteConfig.tagline || '';
|
211
|
+
// Build the site URL with proper trailing slash
|
212
|
+
const siteUrl = siteConfig.url + (siteConfig.baseUrl.endsWith('/')
|
213
|
+
? siteConfig.baseUrl.slice(0, -1)
|
214
|
+
: siteConfig.baseUrl || '');
|
215
|
+
// Initialize docs collection
|
216
|
+
const allDocs = [];
|
217
|
+
try {
|
218
|
+
// Process docs directory
|
219
|
+
const fullDocsDir = path.join(siteDir, docsDir);
|
220
|
+
try {
|
221
|
+
await fs.access(fullDocsDir);
|
222
|
+
// Collect all markdown files from docs directory
|
223
|
+
const docFiles = await readMarkdownFiles(fullDocsDir, siteDir, ignoreFiles);
|
224
|
+
if (docFiles.length > 0) {
|
225
|
+
// Process each file
|
226
|
+
for (const filePath of docFiles) {
|
227
|
+
try {
|
228
|
+
const docInfo = await processMarkdownFile(filePath, fullDocsDir, siteUrl, 'docs');
|
229
|
+
allDocs.push(docInfo);
|
230
|
+
}
|
231
|
+
catch (err) {
|
232
|
+
console.warn(`Error processing ${filePath}: ${err.message}`);
|
233
|
+
}
|
234
|
+
}
|
235
|
+
console.log(`Processed ${docFiles.length} documentation files`);
|
236
|
+
}
|
237
|
+
else {
|
238
|
+
console.warn('No markdown files found in docs directory.');
|
239
|
+
}
|
240
|
+
}
|
241
|
+
catch (err) {
|
242
|
+
console.warn(`Docs directory not found: ${fullDocsDir}`);
|
243
|
+
}
|
244
|
+
// Process blog if enabled
|
245
|
+
if (includeBlog) {
|
246
|
+
const blogDir = path.join(siteDir, 'blog');
|
247
|
+
try {
|
248
|
+
await fs.access(blogDir);
|
249
|
+
// Collect all markdown files from blog directory
|
250
|
+
const blogFiles = await readMarkdownFiles(blogDir, siteDir, ignoreFiles);
|
251
|
+
if (blogFiles.length > 0) {
|
252
|
+
// Process each file
|
253
|
+
for (const filePath of blogFiles) {
|
254
|
+
try {
|
255
|
+
const docInfo = await processMarkdownFile(filePath, blogDir, siteUrl, 'blog');
|
256
|
+
allDocs.push(docInfo);
|
257
|
+
}
|
258
|
+
catch (err) {
|
259
|
+
console.warn(`Error processing ${filePath}: ${err.message}`);
|
260
|
+
}
|
261
|
+
}
|
262
|
+
console.log(`Processed ${blogFiles.length} blog files`);
|
263
|
+
}
|
264
|
+
else {
|
265
|
+
console.warn('No markdown files found in blog directory.');
|
266
|
+
}
|
267
|
+
}
|
268
|
+
catch (err) {
|
269
|
+
console.warn(`Blog directory not found: ${blogDir}`);
|
270
|
+
}
|
271
|
+
}
|
272
|
+
// Skip further processing if no documents were found
|
273
|
+
if (allDocs.length === 0) {
|
274
|
+
console.warn('No documents found to process.');
|
275
|
+
return;
|
276
|
+
}
|
277
|
+
// Sort files to ensure consistent ordering
|
278
|
+
allDocs.sort((a, b) => a.title.localeCompare(b.title));
|
279
|
+
// Generate llms.txt
|
280
|
+
if (generateLLMsTxt) {
|
281
|
+
const llmsTxtPath = path.join(outDir, llmsTxtFilename);
|
282
|
+
const tocItems = allDocs.map(doc => {
|
283
|
+
return `- [${doc.title}](${doc.url})${doc.description ? `: ${doc.description.split('\n')[0]}` : ''}`;
|
284
|
+
});
|
285
|
+
const llmsTxtContent = `# ${docTitle}
|
286
|
+
|
287
|
+
> ${docDescription}
|
288
|
+
|
289
|
+
This file contains links to all documentation sections following the llmtxt.org standard.
|
290
|
+
|
291
|
+
## Table of Contents
|
292
|
+
|
293
|
+
${tocItems.join('\n')}
|
294
|
+
`;
|
295
|
+
await writeFile(llmsTxtPath, llmsTxtContent);
|
296
|
+
console.log(`Generated ${llmsTxtFilename}: ${llmsTxtPath}`);
|
297
|
+
}
|
298
|
+
// Generate llms-full.txt with all content
|
299
|
+
if (generateLLMsFullTxt) {
|
300
|
+
const llmsFullTxtPath = path.join(outDir, llmsFullTxtFilename);
|
301
|
+
const fullContentSections = allDocs.map(doc => {
|
302
|
+
return `## ${doc.title}
|
303
|
+
|
304
|
+
${doc.content}`;
|
305
|
+
});
|
306
|
+
const llmsFullTxtContent = `# ${docTitle}
|
307
|
+
|
308
|
+
> ${docDescription}
|
309
|
+
|
310
|
+
This file contains all documentation content in a single document following the llmtxt.org standard.
|
311
|
+
|
312
|
+
${fullContentSections.join('\n\n---\n\n')}
|
313
|
+
`;
|
314
|
+
await writeFile(llmsFullTxtPath, llmsFullTxtContent);
|
315
|
+
console.log(`Generated ${llmsFullTxtFilename}: ${llmsFullTxtPath}`);
|
316
|
+
}
|
317
|
+
// Output statistics
|
318
|
+
const stats = {
|
319
|
+
totalDocuments: allDocs.length,
|
320
|
+
totalBytes: allDocs.reduce((sum, doc) => sum + doc.content.length, 0),
|
321
|
+
approxTokens: Math.round(allDocs.reduce((sum, doc) => sum + doc.content.length, 0) / 4), // Rough token estimate
|
322
|
+
};
|
323
|
+
console.log(`Stats: ${stats.totalDocuments} documents, ${Math.round(stats.totalBytes / 1024)}KB, ~${stats.approxTokens} tokens`);
|
324
|
+
}
|
325
|
+
catch (err) {
|
326
|
+
console.error('Error generating LLM documentation:', err);
|
327
|
+
}
|
328
|
+
},
|
329
|
+
};
|
330
|
+
}
|
package/package.json
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
{
|
2
|
+
"name": "docusaurus-plugin-llms",
|
3
|
+
"version": "0.1.0",
|
4
|
+
"description": "Docusaurus plugin for generating LLM-friendly documentation following the llmtxt.org standard",
|
5
|
+
"main": "lib/index.js",
|
6
|
+
"scripts": {
|
7
|
+
"build": "tsc",
|
8
|
+
"watch": "tsc --watch",
|
9
|
+
"cleanup": "node cleanup.js",
|
10
|
+
"prepublishOnly": "npm run build && npm run cleanup",
|
11
|
+
"test": "echo \"No tests specified\""
|
12
|
+
},
|
13
|
+
"files": [
|
14
|
+
"lib",
|
15
|
+
"src"
|
16
|
+
],
|
17
|
+
"repository": {
|
18
|
+
"type": "git",
|
19
|
+
"url": "https://github.com/rachfop/docusaurus-plugin-llms.git"
|
20
|
+
},
|
21
|
+
"bugs": {
|
22
|
+
"url": "https://github.com/rachfop/docusaurus-plugin-llms/issues"
|
23
|
+
},
|
24
|
+
"homepage": "https://github.com/rachfop/docusaurus-plugin-llms#readme",
|
25
|
+
"keywords": [
|
26
|
+
"docusaurus",
|
27
|
+
"docusaurus-plugin",
|
28
|
+
"documentation",
|
29
|
+
"llm",
|
30
|
+
"llms",
|
31
|
+
"llmtxt"
|
32
|
+
],
|
33
|
+
"author": "Patrick Rachford",
|
34
|
+
"email": "prachford@icloud.com",
|
35
|
+
"license": "MIT",
|
36
|
+
"dependencies": {
|
37
|
+
"gray-matter": "^4.0.3",
|
38
|
+
"minimatch": "^9.0.3"
|
39
|
+
},
|
40
|
+
"peerDependencies": {
|
41
|
+
"@docusaurus/core": "^3.0.0"
|
42
|
+
},
|
43
|
+
"devDependencies": {
|
44
|
+
"@docusaurus/types": "^3.0.0",
|
45
|
+
"@types/minimatch": "^5.1.2",
|
46
|
+
"@types/node": "^20.6.0",
|
47
|
+
"typescript": "^5.2.2"
|
48
|
+
},
|
49
|
+
"engines": {
|
50
|
+
"node": ">=18.0"
|
51
|
+
}
|
52
|
+
}
|
File without changes
|
package/src/index.ts
ADDED
@@ -0,0 +1,415 @@
|
|
1
|
+
/**
|
2
|
+
* @fileoverview Docusaurus plugin that generates LLM-friendly documentation following the llmtxt.org standard.
|
3
|
+
*
|
4
|
+
* This plugin creates two files:
|
5
|
+
* - llms.txt: Contains links to all sections of documentation
|
6
|
+
* - llms-full.txt: Contains all documentation content in a single file
|
7
|
+
*
|
8
|
+
* The plugin runs during the Docusaurus build process and scans all Markdown files in the docs directory.
|
9
|
+
*/
|
10
|
+
|
11
|
+
import * as fs from 'fs/promises';
|
12
|
+
import * as path from 'path';
|
13
|
+
import matter from 'gray-matter';
|
14
|
+
import { minimatch } from 'minimatch';
|
15
|
+
import type { LoadContext, Plugin } from '@docusaurus/types';
|
16
|
+
|
17
|
+
/**
|
18
|
+
* Interface for processed document information
|
19
|
+
*/
|
20
|
+
interface DocInfo {
|
21
|
+
title: string;
|
22
|
+
path: string;
|
23
|
+
url: string;
|
24
|
+
content: string;
|
25
|
+
description: string;
|
26
|
+
}
|
27
|
+
|
28
|
+
/**
|
29
|
+
* Plugin options interface
|
30
|
+
*/
|
31
|
+
interface PluginOptions {
|
32
|
+
/** Whether to generate the llms.txt file (default: true) */
|
33
|
+
generateLLMsTxt?: boolean;
|
34
|
+
|
35
|
+
/** Whether to generate the llms-full.txt file (default: true) */
|
36
|
+
generateLLMsFullTxt?: boolean;
|
37
|
+
|
38
|
+
/** Base directory for documentation files (default: 'docs') */
|
39
|
+
docsDir?: string;
|
40
|
+
|
41
|
+
/** Array of glob patterns for files to ignore */
|
42
|
+
ignoreFiles?: string[];
|
43
|
+
|
44
|
+
/** Custom title to use in generated files (defaults to site title) */
|
45
|
+
title?: string;
|
46
|
+
|
47
|
+
/** Custom description to use in generated files (defaults to site tagline) */
|
48
|
+
description?: string;
|
49
|
+
|
50
|
+
/** Custom file name for the links file (default: 'llms.txt') */
|
51
|
+
llmsTxtFilename?: string;
|
52
|
+
|
53
|
+
/** Custom file name for the full content file (default: 'llms-full.txt') */
|
54
|
+
llmsFullTxtFilename?: string;
|
55
|
+
|
56
|
+
/** Whether to include blog content (default: false) */
|
57
|
+
includeBlog?: boolean;
|
58
|
+
}
|
59
|
+
|
60
|
+
/**
|
61
|
+
* Write content to a file
|
62
|
+
* @param filePath - Path to write the file to
|
63
|
+
* @param data - Content to write
|
64
|
+
*/
|
65
|
+
async function writeFile(filePath: string, data: string): Promise<void> {
|
66
|
+
return fs.writeFile(filePath, data, 'utf8');
|
67
|
+
}
|
68
|
+
|
69
|
+
/**
|
70
|
+
* Read content from a file
|
71
|
+
* @param filePath - Path of the file to read
|
72
|
+
* @returns Content of the file
|
73
|
+
*/
|
74
|
+
async function readFile(filePath: string): Promise<string> {
|
75
|
+
return fs.readFile(filePath, 'utf8');
|
76
|
+
}
|
77
|
+
|
78
|
+
/**
|
79
|
+
* Check if a file should be ignored based on glob patterns
|
80
|
+
* @param filePath - Path to the file
|
81
|
+
* @param baseDir - Base directory for relative paths
|
82
|
+
* @param ignorePatterns - Glob patterns for files to ignore
|
83
|
+
* @returns Whether the file should be ignored
|
84
|
+
*/
|
85
|
+
function shouldIgnoreFile(filePath: string, baseDir: string, ignorePatterns: string[]): boolean {
|
86
|
+
if (ignorePatterns.length === 0) {
|
87
|
+
return false;
|
88
|
+
}
|
89
|
+
|
90
|
+
const relativePath = path.relative(baseDir, filePath);
|
91
|
+
|
92
|
+
return ignorePatterns.some(pattern =>
|
93
|
+
minimatch(relativePath, pattern, { matchBase: true })
|
94
|
+
);
|
95
|
+
}
|
96
|
+
|
97
|
+
/**
|
98
|
+
* Recursively reads all Markdown files in a directory
|
99
|
+
* @param dir - Directory to scan
|
100
|
+
* @param baseDir - Base directory for relative paths
|
101
|
+
* @param ignorePatterns - Glob patterns for files to ignore
|
102
|
+
* @returns Array of file paths
|
103
|
+
*/
|
104
|
+
async function readMarkdownFiles(dir: string, baseDir: string, ignorePatterns: string[] = []): Promise<string[]> {
|
105
|
+
const files: string[] = [];
|
106
|
+
const entries = await fs.readdir(dir, { withFileTypes: true });
|
107
|
+
|
108
|
+
for (const entry of entries) {
|
109
|
+
const fullPath = path.join(dir, entry.name);
|
110
|
+
|
111
|
+
if (shouldIgnoreFile(fullPath, baseDir, ignorePatterns)) {
|
112
|
+
continue;
|
113
|
+
}
|
114
|
+
|
115
|
+
if (entry.isDirectory()) {
|
116
|
+
const subDirFiles = await readMarkdownFiles(fullPath, baseDir, ignorePatterns);
|
117
|
+
files.push(...subDirFiles);
|
118
|
+
} else if (entry.name.endsWith('.md') || entry.name.endsWith('.mdx')) {
|
119
|
+
files.push(fullPath);
|
120
|
+
}
|
121
|
+
}
|
122
|
+
|
123
|
+
return files;
|
124
|
+
}
|
125
|
+
|
126
|
+
/**
|
127
|
+
* Extract title from content or use the filename
|
128
|
+
* @param data - Frontmatter data
|
129
|
+
* @param content - Markdown content
|
130
|
+
* @param filePath - Path to the file
|
131
|
+
* @returns Extracted title
|
132
|
+
*/
|
133
|
+
function extractTitle(data: any, content: string, filePath: string): string {
|
134
|
+
// First try frontmatter
|
135
|
+
if (data.title) {
|
136
|
+
return data.title;
|
137
|
+
}
|
138
|
+
|
139
|
+
// Then try first heading
|
140
|
+
const headingMatch = content.match(/^#\s+(.*)/m);
|
141
|
+
if (headingMatch) {
|
142
|
+
return headingMatch[1].trim();
|
143
|
+
}
|
144
|
+
|
145
|
+
// Finally use filename
|
146
|
+
return path.basename(filePath, path.extname(filePath))
|
147
|
+
.replace(/-/g, ' ')
|
148
|
+
.replace(/\b\w/g, c => c.toUpperCase());
|
149
|
+
}
|
150
|
+
|
151
|
+
/**
|
152
|
+
* Clean markdown content for LLM consumption
|
153
|
+
* @param content - Raw markdown content
|
154
|
+
* @returns Cleaned content
|
155
|
+
*/
|
156
|
+
function cleanMarkdownContent(content: string): string {
|
157
|
+
// Remove HTML tags
|
158
|
+
let cleaned = content.replace(/<[^>]*>/g, '');
|
159
|
+
|
160
|
+
// Normalize whitespace
|
161
|
+
cleaned = cleaned.replace(/\r\n/g, '\n')
|
162
|
+
.replace(/\n{3,}/g, '\n\n')
|
163
|
+
.trim();
|
164
|
+
|
165
|
+
return cleaned;
|
166
|
+
}
|
167
|
+
|
168
|
+
/**
|
169
|
+
* Process a markdown file and extract its metadata and content
|
170
|
+
* @param filePath - Path to the markdown file
|
171
|
+
* @param baseDir - Base directory
|
172
|
+
* @param siteUrl - Base URL of the site
|
173
|
+
* @param pathPrefix - Path prefix for URLs (e.g., 'docs' or 'blog')
|
174
|
+
* @returns Processed file data
|
175
|
+
*/
|
176
|
+
async function processMarkdownFile(
|
177
|
+
filePath: string,
|
178
|
+
baseDir: string,
|
179
|
+
siteUrl: string,
|
180
|
+
pathPrefix: string = 'docs'
|
181
|
+
): Promise<DocInfo> {
|
182
|
+
const content = await readFile(filePath);
|
183
|
+
const { data, content: markdownContent } = matter(content);
|
184
|
+
|
185
|
+
const relativePath = path.relative(baseDir, filePath);
|
186
|
+
// Convert to URL path format (replace backslashes with forward slashes on Windows)
|
187
|
+
const normalizedPath = relativePath.replace(/\\/g, '/');
|
188
|
+
|
189
|
+
// Convert .md extension to appropriate path
|
190
|
+
const linkPathBase = normalizedPath.replace(/\.mdx?$/, '');
|
191
|
+
|
192
|
+
// Handle index files specially
|
193
|
+
const linkPath = linkPathBase.endsWith('index')
|
194
|
+
? linkPathBase.replace(/\/index$/, '')
|
195
|
+
: linkPathBase;
|
196
|
+
|
197
|
+
// Generate full URL
|
198
|
+
const fullUrl = new URL(`${pathPrefix}/${linkPath}`, siteUrl).toString();
|
199
|
+
|
200
|
+
// Extract title
|
201
|
+
const title = extractTitle(data, markdownContent, filePath);
|
202
|
+
|
203
|
+
// Get description from frontmatter or first paragraph
|
204
|
+
let description = data.description || '';
|
205
|
+
if (!description) {
|
206
|
+
const paragraphs = markdownContent.split('\n\n');
|
207
|
+
for (const para of paragraphs) {
|
208
|
+
if (para.trim() && !para.startsWith('#')) {
|
209
|
+
description = para.trim();
|
210
|
+
break;
|
211
|
+
}
|
212
|
+
}
|
213
|
+
}
|
214
|
+
|
215
|
+
// Clean and process content
|
216
|
+
const cleanedContent = cleanMarkdownContent(markdownContent);
|
217
|
+
|
218
|
+
return {
|
219
|
+
title,
|
220
|
+
path: normalizedPath,
|
221
|
+
url: fullUrl,
|
222
|
+
content: cleanedContent,
|
223
|
+
description: description || '',
|
224
|
+
};
|
225
|
+
}
|
226
|
+
|
227
|
+
/**
|
228
|
+
* A Docusaurus plugin to generate LLM-friendly documentation following
|
229
|
+
* the llmtxt.org standard
|
230
|
+
*
|
231
|
+
* @param context - Docusaurus context
|
232
|
+
* @param options - Plugin options
|
233
|
+
* @returns Plugin object
|
234
|
+
*/
|
235
|
+
export default function docusaurusPluginLLMs(
|
236
|
+
context: LoadContext,
|
237
|
+
options: PluginOptions = {}
|
238
|
+
): Plugin<void> {
|
239
|
+
// Set default options
|
240
|
+
const {
|
241
|
+
generateLLMsTxt = true,
|
242
|
+
generateLLMsFullTxt = true,
|
243
|
+
docsDir = 'docs',
|
244
|
+
ignoreFiles = [],
|
245
|
+
title,
|
246
|
+
description,
|
247
|
+
llmsTxtFilename = 'llms.txt',
|
248
|
+
llmsFullTxtFilename = 'llms-full.txt',
|
249
|
+
includeBlog = false,
|
250
|
+
} = options;
|
251
|
+
|
252
|
+
const {
|
253
|
+
siteDir,
|
254
|
+
siteConfig,
|
255
|
+
outDir,
|
256
|
+
} = context;
|
257
|
+
|
258
|
+
return {
|
259
|
+
name: 'docusaurus-plugin-llms',
|
260
|
+
|
261
|
+
/**
|
262
|
+
* Generates LLM-friendly documentation files after the build is complete
|
263
|
+
*/
|
264
|
+
async postBuild(): Promise<void> {
|
265
|
+
console.log('Generating LLM-friendly documentation...');
|
266
|
+
|
267
|
+
// Custom title and description or fallback to site values
|
268
|
+
const docTitle = title || siteConfig.title;
|
269
|
+
const docDescription = description || siteConfig.tagline || '';
|
270
|
+
|
271
|
+
// Build the site URL with proper trailing slash
|
272
|
+
const siteUrl = siteConfig.url + (
|
273
|
+
siteConfig.baseUrl.endsWith('/')
|
274
|
+
? siteConfig.baseUrl.slice(0, -1)
|
275
|
+
: siteConfig.baseUrl || ''
|
276
|
+
);
|
277
|
+
|
278
|
+
// Initialize docs collection
|
279
|
+
const allDocs: DocInfo[] = [];
|
280
|
+
|
281
|
+
try {
|
282
|
+
// Process docs directory
|
283
|
+
const fullDocsDir = path.join(siteDir, docsDir);
|
284
|
+
|
285
|
+
try {
|
286
|
+
await fs.access(fullDocsDir);
|
287
|
+
|
288
|
+
// Collect all markdown files from docs directory
|
289
|
+
const docFiles = await readMarkdownFiles(fullDocsDir, siteDir, ignoreFiles);
|
290
|
+
|
291
|
+
if (docFiles.length > 0) {
|
292
|
+
// Process each file
|
293
|
+
for (const filePath of docFiles) {
|
294
|
+
try {
|
295
|
+
const docInfo = await processMarkdownFile(
|
296
|
+
filePath,
|
297
|
+
fullDocsDir,
|
298
|
+
siteUrl,
|
299
|
+
'docs'
|
300
|
+
);
|
301
|
+
allDocs.push(docInfo);
|
302
|
+
} catch (err: any) {
|
303
|
+
console.warn(`Error processing ${filePath}: ${err.message}`);
|
304
|
+
}
|
305
|
+
}
|
306
|
+
console.log(`Processed ${docFiles.length} documentation files`);
|
307
|
+
} else {
|
308
|
+
console.warn('No markdown files found in docs directory.');
|
309
|
+
}
|
310
|
+
} catch (err) {
|
311
|
+
console.warn(`Docs directory not found: ${fullDocsDir}`);
|
312
|
+
}
|
313
|
+
|
314
|
+
// Process blog if enabled
|
315
|
+
if (includeBlog) {
|
316
|
+
const blogDir = path.join(siteDir, 'blog');
|
317
|
+
|
318
|
+
try {
|
319
|
+
await fs.access(blogDir);
|
320
|
+
|
321
|
+
// Collect all markdown files from blog directory
|
322
|
+
const blogFiles = await readMarkdownFiles(blogDir, siteDir, ignoreFiles);
|
323
|
+
|
324
|
+
if (blogFiles.length > 0) {
|
325
|
+
// Process each file
|
326
|
+
for (const filePath of blogFiles) {
|
327
|
+
try {
|
328
|
+
const docInfo = await processMarkdownFile(
|
329
|
+
filePath,
|
330
|
+
blogDir,
|
331
|
+
siteUrl,
|
332
|
+
'blog'
|
333
|
+
);
|
334
|
+
allDocs.push(docInfo);
|
335
|
+
} catch (err: any) {
|
336
|
+
console.warn(`Error processing ${filePath}: ${err.message}`);
|
337
|
+
}
|
338
|
+
}
|
339
|
+
console.log(`Processed ${blogFiles.length} blog files`);
|
340
|
+
} else {
|
341
|
+
console.warn('No markdown files found in blog directory.');
|
342
|
+
}
|
343
|
+
} catch (err) {
|
344
|
+
console.warn(`Blog directory not found: ${blogDir}`);
|
345
|
+
}
|
346
|
+
}
|
347
|
+
|
348
|
+
// Skip further processing if no documents were found
|
349
|
+
if (allDocs.length === 0) {
|
350
|
+
console.warn('No documents found to process.');
|
351
|
+
return;
|
352
|
+
}
|
353
|
+
|
354
|
+
// Sort files to ensure consistent ordering
|
355
|
+
allDocs.sort((a, b) => a.title.localeCompare(b.title));
|
356
|
+
|
357
|
+
// Generate llms.txt
|
358
|
+
if (generateLLMsTxt) {
|
359
|
+
const llmsTxtPath = path.join(outDir, llmsTxtFilename);
|
360
|
+
const tocItems = allDocs.map(doc => {
|
361
|
+
return `- [${doc.title}](${doc.url})${doc.description ? `: ${doc.description.split('\n')[0]}` : ''}`;
|
362
|
+
});
|
363
|
+
|
364
|
+
const llmsTxtContent = `# ${docTitle}
|
365
|
+
|
366
|
+
> ${docDescription}
|
367
|
+
|
368
|
+
This file contains links to all documentation sections following the llmtxt.org standard.
|
369
|
+
|
370
|
+
## Table of Contents
|
371
|
+
|
372
|
+
${tocItems.join('\n')}
|
373
|
+
`;
|
374
|
+
|
375
|
+
await writeFile(llmsTxtPath, llmsTxtContent);
|
376
|
+
console.log(`Generated ${llmsTxtFilename}: ${llmsTxtPath}`);
|
377
|
+
}
|
378
|
+
|
379
|
+
// Generate llms-full.txt with all content
|
380
|
+
if (generateLLMsFullTxt) {
|
381
|
+
const llmsFullTxtPath = path.join(outDir, llmsFullTxtFilename);
|
382
|
+
|
383
|
+
const fullContentSections = allDocs.map(doc => {
|
384
|
+
return `## ${doc.title}
|
385
|
+
|
386
|
+
${doc.content}`;
|
387
|
+
});
|
388
|
+
|
389
|
+
const llmsFullTxtContent = `# ${docTitle}
|
390
|
+
|
391
|
+
> ${docDescription}
|
392
|
+
|
393
|
+
This file contains all documentation content in a single document following the llmtxt.org standard.
|
394
|
+
|
395
|
+
${fullContentSections.join('\n\n---\n\n')}
|
396
|
+
`;
|
397
|
+
|
398
|
+
await writeFile(llmsFullTxtPath, llmsFullTxtContent);
|
399
|
+
console.log(`Generated ${llmsFullTxtFilename}: ${llmsFullTxtPath}`);
|
400
|
+
}
|
401
|
+
|
402
|
+
// Output statistics
|
403
|
+
const stats = {
|
404
|
+
totalDocuments: allDocs.length,
|
405
|
+
totalBytes: allDocs.reduce((sum, doc) => sum + doc.content.length, 0),
|
406
|
+
approxTokens: Math.round(allDocs.reduce((sum, doc) => sum + doc.content.length, 0) / 4), // Rough token estimate
|
407
|
+
};
|
408
|
+
|
409
|
+
console.log(`Stats: ${stats.totalDocuments} documents, ${Math.round(stats.totalBytes / 1024)}KB, ~${stats.approxTokens} tokens`);
|
410
|
+
} catch (err: any) {
|
411
|
+
console.error('Error generating LLM documentation:', err);
|
412
|
+
}
|
413
|
+
},
|
414
|
+
};
|
415
|
+
}
|