docusaurus-plugin-llms 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,129 @@
1
+ # ๐Ÿ“œ docusaurus-plugin-llms
2
+
3
+ A Docusaurus plugin for generating LLM-friendly documentation following the [llmtxt standard](https://llmtxt.org/).
4
+
5
+ ## Installation
6
+
7
+ There are two ways to use this plugin:
8
+
9
+ ### 1. Direct Integration (Simplest Method)
10
+
11
+ For quick integration, create a plugin file directly in your Docusaurus project:
12
+
13
+ ```bash
14
+ mkdir -p src/plugins/llms
15
+ ```
16
+
17
+ Then create a file at `src/plugins/llms/index.js` with the plugin code. Finally, add it to your `docusaurus.config.js`:
18
+
19
+ ```js
20
+ module.exports = {
21
+ // ... your existing Docusaurus config
22
+ plugins: [
23
+ require('./src/plugins/llms'),
24
+ // ... your other plugins
25
+ ],
26
+ };
27
+ ```
28
+
29
+ ### 2. As a Package (Not Yet Published)
30
+
31
+ ```bash
32
+ npm install docusaurus-plugin-llms --save-dev
33
+ ```
34
+
35
+ Then add to your Docusaurus configuration:
36
+
37
+ ```js
38
+ module.exports = {
39
+ // ... your existing Docusaurus config
40
+ plugins: [
41
+ 'docusaurus-plugin-llms',
42
+ // ... your other plugins
43
+ ],
44
+ };
45
+ ```
46
+
47
+ ## Configuration Options
48
+
49
+ You can configure the plugin by passing options:
50
+
51
+ ```js
52
+ module.exports = {
53
+ // ... your existing Docusaurus config
54
+ plugins: [
55
+ [
56
+ 'docusaurus-plugin-llms',
57
+ {
58
+ // Options here
59
+ generateLLMsTxt: true,
60
+ generateLLMsFullTxt: true,
61
+ docsDir: 'docs',
62
+ ignoreFiles: ['advanced/*', 'private/*'],
63
+ title: 'My Project Documentation',
64
+ description: 'Complete reference documentation for My Project',
65
+ includeBlog: true,
66
+ },
67
+ ],
68
+ // ... your other plugins
69
+ ],
70
+ };
71
+ ```
72
+
73
+ ### Available Options
74
+
75
+ | Option | Type | Default | Description |
76
+ |--------|------|---------|-------------|
77
+ | `generateLLMsTxt` | boolean | `true` | Whether to generate the links file |
78
+ | `generateLLMsFullTxt` | boolean | `true` | Whether to generate the full content file |
79
+ | `docsDir` | string | `'docs'` | Base directory for documentation files |
80
+ | `ignoreFiles` | string[] | `[]` | Array of glob patterns for files to ignore |
81
+ | `title` | string | Site title | Custom title to use in generated files |
82
+ | `description` | string | Site tagline | Custom description to use in generated files |
83
+ | `llmsTxtFilename` | string | `'llms.txt'` | Custom filename for the links file |
84
+ | `llmsFullTxtFilename` | string | `'llms-full.txt'` | Custom filename for the full content file |
85
+ | `includeBlog` | boolean | `false` | Whether to include blog content |
86
+
87
+ ## How It Works
88
+
89
+ This plugin automatically generates the following files during the build process:
90
+
91
+ - **llms.txt**: Contains links to all sections of your documentation
92
+ - **llms-full.txt**: Contains all documentation content in a single file
93
+
94
+ These files follow the [llmtxt standard](https://llmtxt.org/), making your documentation optimized for use with Large Language Models (LLMs).
95
+
96
+ ## Features
97
+
98
+ - โšก๏ธ Easy integration with Docusaurus
99
+ - โœ… Zero config required, works out of the box
100
+ - โš™๏ธ Highly customizable with multiple options
101
+ - ๐Ÿ“ Creates `llms.txt` with section links
102
+ - ๐Ÿ“– Produces `llms-full.txt` with all content in one file
103
+ - ๐Ÿงน Cleans HTML and normalizes content for optimal LLM consumption
104
+ - ๐Ÿ“Š Provides statistics about generated documentation
105
+ - ๐Ÿ“š Option to include blog posts
106
+
107
+ ## Implementation Details
108
+
109
+ The plugin:
110
+
111
+ 1. Scans your `docs` directory recursively for all Markdown files
112
+ 2. Optionally includes blog content
113
+ 3. Extracts metadata, titles, and content from each file
114
+ 4. Creates proper URL links to each document section
115
+ 5. Generates a table of contents in `llms.txt`
116
+ 6. Combines all documentation content in `llms-full.txt`
117
+ 7. Provides statistics about the generated documentation
118
+
119
+ ## Future Enhancements
120
+
121
+ Planned features for future versions:
122
+
123
+ - Advanced glob pattern matching for file filtering
124
+ - Support for i18n content
125
+ - Specific content tags for LLM-only sections
126
+
127
+ ## License
128
+
129
+ MIT
package/lib/index.d.ts ADDED
@@ -0,0 +1,43 @@
1
+ /**
2
+ * @fileoverview Docusaurus plugin that generates LLM-friendly documentation following the llmtxt.org standard.
3
+ *
4
+ * This plugin creates two files:
5
+ * - llms.txt: Contains links to all sections of documentation
6
+ * - llms-full.txt: Contains all documentation content in a single file
7
+ *
8
+ * The plugin runs during the Docusaurus build process and scans all Markdown files in the docs directory.
9
+ */
10
+ import type { LoadContext, Plugin } from '@docusaurus/types';
11
+ /**
12
+ * Plugin options interface
13
+ */
14
+ interface PluginOptions {
15
+ /** Whether to generate the llms.txt file (default: true) */
16
+ generateLLMsTxt?: boolean;
17
+ /** Whether to generate the llms-full.txt file (default: true) */
18
+ generateLLMsFullTxt?: boolean;
19
+ /** Base directory for documentation files (default: 'docs') */
20
+ docsDir?: string;
21
+ /** Array of glob patterns for files to ignore */
22
+ ignoreFiles?: string[];
23
+ /** Custom title to use in generated files (defaults to site title) */
24
+ title?: string;
25
+ /** Custom description to use in generated files (defaults to site tagline) */
26
+ description?: string;
27
+ /** Custom file name for the links file (default: 'llms.txt') */
28
+ llmsTxtFilename?: string;
29
+ /** Custom file name for the full content file (default: 'llms-full.txt') */
30
+ llmsFullTxtFilename?: string;
31
+ /** Whether to include blog content (default: false) */
32
+ includeBlog?: boolean;
33
+ }
34
+ /**
35
+ * A Docusaurus plugin to generate LLM-friendly documentation following
36
+ * the llmtxt.org standard
37
+ *
38
+ * @param context - Docusaurus context
39
+ * @param options - Plugin options
40
+ * @returns Plugin object
41
+ */
42
+ export default function docusaurusPluginLLMs(context: LoadContext, options?: PluginOptions): Plugin<void>;
43
+ export {};
package/lib/index.js ADDED
@@ -0,0 +1,330 @@
1
+ "use strict";
2
+ /**
3
+ * @fileoverview Docusaurus plugin that generates LLM-friendly documentation following the llmtxt.org standard.
4
+ *
5
+ * This plugin creates two files:
6
+ * - llms.txt: Contains links to all sections of documentation
7
+ * - llms-full.txt: Contains all documentation content in a single file
8
+ *
9
+ * The plugin runs during the Docusaurus build process and scans all Markdown files in the docs directory.
10
+ */
11
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
12
+ if (k2 === undefined) k2 = k;
13
+ var desc = Object.getOwnPropertyDescriptor(m, k);
14
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
15
+ desc = { enumerable: true, get: function() { return m[k]; } };
16
+ }
17
+ Object.defineProperty(o, k2, desc);
18
+ }) : (function(o, m, k, k2) {
19
+ if (k2 === undefined) k2 = k;
20
+ o[k2] = m[k];
21
+ }));
22
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
23
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
24
+ }) : function(o, v) {
25
+ o["default"] = v;
26
+ });
27
+ var __importStar = (this && this.__importStar) || (function () {
28
+ var ownKeys = function(o) {
29
+ ownKeys = Object.getOwnPropertyNames || function (o) {
30
+ var ar = [];
31
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
32
+ return ar;
33
+ };
34
+ return ownKeys(o);
35
+ };
36
+ return function (mod) {
37
+ if (mod && mod.__esModule) return mod;
38
+ var result = {};
39
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
40
+ __setModuleDefault(result, mod);
41
+ return result;
42
+ };
43
+ })();
44
+ var __importDefault = (this && this.__importDefault) || function (mod) {
45
+ return (mod && mod.__esModule) ? mod : { "default": mod };
46
+ };
47
+ Object.defineProperty(exports, "__esModule", { value: true });
48
+ exports.default = docusaurusPluginLLMs;
49
+ const fs = __importStar(require("fs/promises"));
50
+ const path = __importStar(require("path"));
51
+ const gray_matter_1 = __importDefault(require("gray-matter"));
52
+ const minimatch_1 = require("minimatch");
53
+ /**
54
+ * Write content to a file
55
+ * @param filePath - Path to write the file to
56
+ * @param data - Content to write
57
+ */
58
+ async function writeFile(filePath, data) {
59
+ return fs.writeFile(filePath, data, 'utf8');
60
+ }
61
+ /**
62
+ * Read content from a file
63
+ * @param filePath - Path of the file to read
64
+ * @returns Content of the file
65
+ */
66
+ async function readFile(filePath) {
67
+ return fs.readFile(filePath, 'utf8');
68
+ }
69
+ /**
70
+ * Check if a file should be ignored based on glob patterns
71
+ * @param filePath - Path to the file
72
+ * @param baseDir - Base directory for relative paths
73
+ * @param ignorePatterns - Glob patterns for files to ignore
74
+ * @returns Whether the file should be ignored
75
+ */
76
+ function shouldIgnoreFile(filePath, baseDir, ignorePatterns) {
77
+ if (ignorePatterns.length === 0) {
78
+ return false;
79
+ }
80
+ const relativePath = path.relative(baseDir, filePath);
81
+ return ignorePatterns.some(pattern => (0, minimatch_1.minimatch)(relativePath, pattern, { matchBase: true }));
82
+ }
83
+ /**
84
+ * Recursively reads all Markdown files in a directory
85
+ * @param dir - Directory to scan
86
+ * @param baseDir - Base directory for relative paths
87
+ * @param ignorePatterns - Glob patterns for files to ignore
88
+ * @returns Array of file paths
89
+ */
90
+ async function readMarkdownFiles(dir, baseDir, ignorePatterns = []) {
91
+ const files = [];
92
+ const entries = await fs.readdir(dir, { withFileTypes: true });
93
+ for (const entry of entries) {
94
+ const fullPath = path.join(dir, entry.name);
95
+ if (shouldIgnoreFile(fullPath, baseDir, ignorePatterns)) {
96
+ continue;
97
+ }
98
+ if (entry.isDirectory()) {
99
+ const subDirFiles = await readMarkdownFiles(fullPath, baseDir, ignorePatterns);
100
+ files.push(...subDirFiles);
101
+ }
102
+ else if (entry.name.endsWith('.md') || entry.name.endsWith('.mdx')) {
103
+ files.push(fullPath);
104
+ }
105
+ }
106
+ return files;
107
+ }
108
+ /**
109
+ * Extract title from content or use the filename
110
+ * @param data - Frontmatter data
111
+ * @param content - Markdown content
112
+ * @param filePath - Path to the file
113
+ * @returns Extracted title
114
+ */
115
+ function extractTitle(data, content, filePath) {
116
+ // First try frontmatter
117
+ if (data.title) {
118
+ return data.title;
119
+ }
120
+ // Then try first heading
121
+ const headingMatch = content.match(/^#\s+(.*)/m);
122
+ if (headingMatch) {
123
+ return headingMatch[1].trim();
124
+ }
125
+ // Finally use filename
126
+ return path.basename(filePath, path.extname(filePath))
127
+ .replace(/-/g, ' ')
128
+ .replace(/\b\w/g, c => c.toUpperCase());
129
+ }
130
+ /**
131
+ * Clean markdown content for LLM consumption
132
+ * @param content - Raw markdown content
133
+ * @returns Cleaned content
134
+ */
135
+ function cleanMarkdownContent(content) {
136
+ // Remove HTML tags
137
+ let cleaned = content.replace(/<[^>]*>/g, '');
138
+ // Normalize whitespace
139
+ cleaned = cleaned.replace(/\r\n/g, '\n')
140
+ .replace(/\n{3,}/g, '\n\n')
141
+ .trim();
142
+ return cleaned;
143
+ }
144
+ /**
145
+ * Process a markdown file and extract its metadata and content
146
+ * @param filePath - Path to the markdown file
147
+ * @param baseDir - Base directory
148
+ * @param siteUrl - Base URL of the site
149
+ * @param pathPrefix - Path prefix for URLs (e.g., 'docs' or 'blog')
150
+ * @returns Processed file data
151
+ */
152
+ async function processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix = 'docs') {
153
+ const content = await readFile(filePath);
154
+ const { data, content: markdownContent } = (0, gray_matter_1.default)(content);
155
+ const relativePath = path.relative(baseDir, filePath);
156
+ // Convert to URL path format (replace backslashes with forward slashes on Windows)
157
+ const normalizedPath = relativePath.replace(/\\/g, '/');
158
+ // Convert .md extension to appropriate path
159
+ const linkPathBase = normalizedPath.replace(/\.mdx?$/, '');
160
+ // Handle index files specially
161
+ const linkPath = linkPathBase.endsWith('index')
162
+ ? linkPathBase.replace(/\/index$/, '')
163
+ : linkPathBase;
164
+ // Generate full URL
165
+ const fullUrl = new URL(`${pathPrefix}/${linkPath}`, siteUrl).toString();
166
+ // Extract title
167
+ const title = extractTitle(data, markdownContent, filePath);
168
+ // Get description from frontmatter or first paragraph
169
+ let description = data.description || '';
170
+ if (!description) {
171
+ const paragraphs = markdownContent.split('\n\n');
172
+ for (const para of paragraphs) {
173
+ if (para.trim() && !para.startsWith('#')) {
174
+ description = para.trim();
175
+ break;
176
+ }
177
+ }
178
+ }
179
+ // Clean and process content
180
+ const cleanedContent = cleanMarkdownContent(markdownContent);
181
+ return {
182
+ title,
183
+ path: normalizedPath,
184
+ url: fullUrl,
185
+ content: cleanedContent,
186
+ description: description || '',
187
+ };
188
+ }
189
+ /**
190
+ * A Docusaurus plugin to generate LLM-friendly documentation following
191
+ * the llmtxt.org standard
192
+ *
193
+ * @param context - Docusaurus context
194
+ * @param options - Plugin options
195
+ * @returns Plugin object
196
+ */
197
+ function docusaurusPluginLLMs(context, options = {}) {
198
+ // Set default options
199
+ const { generateLLMsTxt = true, generateLLMsFullTxt = true, docsDir = 'docs', ignoreFiles = [], title, description, llmsTxtFilename = 'llms.txt', llmsFullTxtFilename = 'llms-full.txt', includeBlog = false, } = options;
200
+ const { siteDir, siteConfig, outDir, } = context;
201
+ return {
202
+ name: 'docusaurus-plugin-llms',
203
+ /**
204
+ * Generates LLM-friendly documentation files after the build is complete
205
+ */
206
+ async postBuild() {
207
+ console.log('Generating LLM-friendly documentation...');
208
+ // Custom title and description or fallback to site values
209
+ const docTitle = title || siteConfig.title;
210
+ const docDescription = description || siteConfig.tagline || '';
211
+ // Build the site URL with proper trailing slash
212
+ const siteUrl = siteConfig.url + (siteConfig.baseUrl.endsWith('/')
213
+ ? siteConfig.baseUrl.slice(0, -1)
214
+ : siteConfig.baseUrl || '');
215
+ // Initialize docs collection
216
+ const allDocs = [];
217
+ try {
218
+ // Process docs directory
219
+ const fullDocsDir = path.join(siteDir, docsDir);
220
+ try {
221
+ await fs.access(fullDocsDir);
222
+ // Collect all markdown files from docs directory
223
+ const docFiles = await readMarkdownFiles(fullDocsDir, siteDir, ignoreFiles);
224
+ if (docFiles.length > 0) {
225
+ // Process each file
226
+ for (const filePath of docFiles) {
227
+ try {
228
+ const docInfo = await processMarkdownFile(filePath, fullDocsDir, siteUrl, 'docs');
229
+ allDocs.push(docInfo);
230
+ }
231
+ catch (err) {
232
+ console.warn(`Error processing ${filePath}: ${err.message}`);
233
+ }
234
+ }
235
+ console.log(`Processed ${docFiles.length} documentation files`);
236
+ }
237
+ else {
238
+ console.warn('No markdown files found in docs directory.');
239
+ }
240
+ }
241
+ catch (err) {
242
+ console.warn(`Docs directory not found: ${fullDocsDir}`);
243
+ }
244
+ // Process blog if enabled
245
+ if (includeBlog) {
246
+ const blogDir = path.join(siteDir, 'blog');
247
+ try {
248
+ await fs.access(blogDir);
249
+ // Collect all markdown files from blog directory
250
+ const blogFiles = await readMarkdownFiles(blogDir, siteDir, ignoreFiles);
251
+ if (blogFiles.length > 0) {
252
+ // Process each file
253
+ for (const filePath of blogFiles) {
254
+ try {
255
+ const docInfo = await processMarkdownFile(filePath, blogDir, siteUrl, 'blog');
256
+ allDocs.push(docInfo);
257
+ }
258
+ catch (err) {
259
+ console.warn(`Error processing ${filePath}: ${err.message}`);
260
+ }
261
+ }
262
+ console.log(`Processed ${blogFiles.length} blog files`);
263
+ }
264
+ else {
265
+ console.warn('No markdown files found in blog directory.');
266
+ }
267
+ }
268
+ catch (err) {
269
+ console.warn(`Blog directory not found: ${blogDir}`);
270
+ }
271
+ }
272
+ // Skip further processing if no documents were found
273
+ if (allDocs.length === 0) {
274
+ console.warn('No documents found to process.');
275
+ return;
276
+ }
277
+ // Sort files to ensure consistent ordering
278
+ allDocs.sort((a, b) => a.title.localeCompare(b.title));
279
+ // Generate llms.txt
280
+ if (generateLLMsTxt) {
281
+ const llmsTxtPath = path.join(outDir, llmsTxtFilename);
282
+ const tocItems = allDocs.map(doc => {
283
+ return `- [${doc.title}](${doc.url})${doc.description ? `: ${doc.description.split('\n')[0]}` : ''}`;
284
+ });
285
+ const llmsTxtContent = `# ${docTitle}
286
+
287
+ > ${docDescription}
288
+
289
+ This file contains links to all documentation sections following the llmtxt.org standard.
290
+
291
+ ## Table of Contents
292
+
293
+ ${tocItems.join('\n')}
294
+ `;
295
+ await writeFile(llmsTxtPath, llmsTxtContent);
296
+ console.log(`Generated ${llmsTxtFilename}: ${llmsTxtPath}`);
297
+ }
298
+ // Generate llms-full.txt with all content
299
+ if (generateLLMsFullTxt) {
300
+ const llmsFullTxtPath = path.join(outDir, llmsFullTxtFilename);
301
+ const fullContentSections = allDocs.map(doc => {
302
+ return `## ${doc.title}
303
+
304
+ ${doc.content}`;
305
+ });
306
+ const llmsFullTxtContent = `# ${docTitle}
307
+
308
+ > ${docDescription}
309
+
310
+ This file contains all documentation content in a single document following the llmtxt.org standard.
311
+
312
+ ${fullContentSections.join('\n\n---\n\n')}
313
+ `;
314
+ await writeFile(llmsFullTxtPath, llmsFullTxtContent);
315
+ console.log(`Generated ${llmsFullTxtFilename}: ${llmsFullTxtPath}`);
316
+ }
317
+ // Output statistics
318
+ const stats = {
319
+ totalDocuments: allDocs.length,
320
+ totalBytes: allDocs.reduce((sum, doc) => sum + doc.content.length, 0),
321
+ approxTokens: Math.round(allDocs.reduce((sum, doc) => sum + doc.content.length, 0) / 4), // Rough token estimate
322
+ };
323
+ console.log(`Stats: ${stats.totalDocuments} documents, ${Math.round(stats.totalBytes / 1024)}KB, ~${stats.approxTokens} tokens`);
324
+ }
325
+ catch (err) {
326
+ console.error('Error generating LLM documentation:', err);
327
+ }
328
+ },
329
+ };
330
+ }
package/package.json ADDED
@@ -0,0 +1,52 @@
1
+ {
2
+ "name": "docusaurus-plugin-llms",
3
+ "version": "0.1.0",
4
+ "description": "Docusaurus plugin for generating LLM-friendly documentation following the llmtxt.org standard",
5
+ "main": "lib/index.js",
6
+ "scripts": {
7
+ "build": "tsc",
8
+ "watch": "tsc --watch",
9
+ "cleanup": "node cleanup.js",
10
+ "prepublishOnly": "npm run build && npm run cleanup",
11
+ "test": "echo \"No tests specified\""
12
+ },
13
+ "files": [
14
+ "lib",
15
+ "src"
16
+ ],
17
+ "repository": {
18
+ "type": "git",
19
+ "url": "https://github.com/rachfop/docusaurus-plugin-llms.git"
20
+ },
21
+ "bugs": {
22
+ "url": "https://github.com/rachfop/docusaurus-plugin-llms/issues"
23
+ },
24
+ "homepage": "https://github.com/rachfop/docusaurus-plugin-llms#readme",
25
+ "keywords": [
26
+ "docusaurus",
27
+ "docusaurus-plugin",
28
+ "documentation",
29
+ "llm",
30
+ "llms",
31
+ "llmtxt"
32
+ ],
33
+ "author": "Patrick Rachford",
34
+ "email": "prachford@icloud.com",
35
+ "license": "MIT",
36
+ "dependencies": {
37
+ "gray-matter": "^4.0.3",
38
+ "minimatch": "^9.0.3"
39
+ },
40
+ "peerDependencies": {
41
+ "@docusaurus/core": "^3.0.0"
42
+ },
43
+ "devDependencies": {
44
+ "@docusaurus/types": "^3.0.0",
45
+ "@types/minimatch": "^5.1.2",
46
+ "@types/node": "^20.6.0",
47
+ "typescript": "^5.2.2"
48
+ },
49
+ "engines": {
50
+ "node": ">=18.0"
51
+ }
52
+ }
File without changes
package/src/index.ts ADDED
@@ -0,0 +1,415 @@
1
+ /**
2
+ * @fileoverview Docusaurus plugin that generates LLM-friendly documentation following the llmtxt.org standard.
3
+ *
4
+ * This plugin creates two files:
5
+ * - llms.txt: Contains links to all sections of documentation
6
+ * - llms-full.txt: Contains all documentation content in a single file
7
+ *
8
+ * The plugin runs during the Docusaurus build process and scans all Markdown files in the docs directory.
9
+ */
10
+
11
+ import * as fs from 'fs/promises';
12
+ import * as path from 'path';
13
+ import matter from 'gray-matter';
14
+ import { minimatch } from 'minimatch';
15
+ import type { LoadContext, Plugin } from '@docusaurus/types';
16
+
17
+ /**
18
+ * Interface for processed document information
19
+ */
20
+ interface DocInfo {
21
+ title: string;
22
+ path: string;
23
+ url: string;
24
+ content: string;
25
+ description: string;
26
+ }
27
+
28
+ /**
29
+ * Plugin options interface
30
+ */
31
+ interface PluginOptions {
32
+ /** Whether to generate the llms.txt file (default: true) */
33
+ generateLLMsTxt?: boolean;
34
+
35
+ /** Whether to generate the llms-full.txt file (default: true) */
36
+ generateLLMsFullTxt?: boolean;
37
+
38
+ /** Base directory for documentation files (default: 'docs') */
39
+ docsDir?: string;
40
+
41
+ /** Array of glob patterns for files to ignore */
42
+ ignoreFiles?: string[];
43
+
44
+ /** Custom title to use in generated files (defaults to site title) */
45
+ title?: string;
46
+
47
+ /** Custom description to use in generated files (defaults to site tagline) */
48
+ description?: string;
49
+
50
+ /** Custom file name for the links file (default: 'llms.txt') */
51
+ llmsTxtFilename?: string;
52
+
53
+ /** Custom file name for the full content file (default: 'llms-full.txt') */
54
+ llmsFullTxtFilename?: string;
55
+
56
+ /** Whether to include blog content (default: false) */
57
+ includeBlog?: boolean;
58
+ }
59
+
60
+ /**
61
+ * Write content to a file
62
+ * @param filePath - Path to write the file to
63
+ * @param data - Content to write
64
+ */
65
+ async function writeFile(filePath: string, data: string): Promise<void> {
66
+ return fs.writeFile(filePath, data, 'utf8');
67
+ }
68
+
69
+ /**
70
+ * Read content from a file
71
+ * @param filePath - Path of the file to read
72
+ * @returns Content of the file
73
+ */
74
+ async function readFile(filePath: string): Promise<string> {
75
+ return fs.readFile(filePath, 'utf8');
76
+ }
77
+
78
+ /**
79
+ * Check if a file should be ignored based on glob patterns
80
+ * @param filePath - Path to the file
81
+ * @param baseDir - Base directory for relative paths
82
+ * @param ignorePatterns - Glob patterns for files to ignore
83
+ * @returns Whether the file should be ignored
84
+ */
85
+ function shouldIgnoreFile(filePath: string, baseDir: string, ignorePatterns: string[]): boolean {
86
+ if (ignorePatterns.length === 0) {
87
+ return false;
88
+ }
89
+
90
+ const relativePath = path.relative(baseDir, filePath);
91
+
92
+ return ignorePatterns.some(pattern =>
93
+ minimatch(relativePath, pattern, { matchBase: true })
94
+ );
95
+ }
96
+
97
+ /**
98
+ * Recursively reads all Markdown files in a directory
99
+ * @param dir - Directory to scan
100
+ * @param baseDir - Base directory for relative paths
101
+ * @param ignorePatterns - Glob patterns for files to ignore
102
+ * @returns Array of file paths
103
+ */
104
+ async function readMarkdownFiles(dir: string, baseDir: string, ignorePatterns: string[] = []): Promise<string[]> {
105
+ const files: string[] = [];
106
+ const entries = await fs.readdir(dir, { withFileTypes: true });
107
+
108
+ for (const entry of entries) {
109
+ const fullPath = path.join(dir, entry.name);
110
+
111
+ if (shouldIgnoreFile(fullPath, baseDir, ignorePatterns)) {
112
+ continue;
113
+ }
114
+
115
+ if (entry.isDirectory()) {
116
+ const subDirFiles = await readMarkdownFiles(fullPath, baseDir, ignorePatterns);
117
+ files.push(...subDirFiles);
118
+ } else if (entry.name.endsWith('.md') || entry.name.endsWith('.mdx')) {
119
+ files.push(fullPath);
120
+ }
121
+ }
122
+
123
+ return files;
124
+ }
125
+
126
+ /**
127
+ * Extract title from content or use the filename
128
+ * @param data - Frontmatter data
129
+ * @param content - Markdown content
130
+ * @param filePath - Path to the file
131
+ * @returns Extracted title
132
+ */
133
+ function extractTitle(data: any, content: string, filePath: string): string {
134
+ // First try frontmatter
135
+ if (data.title) {
136
+ return data.title;
137
+ }
138
+
139
+ // Then try first heading
140
+ const headingMatch = content.match(/^#\s+(.*)/m);
141
+ if (headingMatch) {
142
+ return headingMatch[1].trim();
143
+ }
144
+
145
+ // Finally use filename
146
+ return path.basename(filePath, path.extname(filePath))
147
+ .replace(/-/g, ' ')
148
+ .replace(/\b\w/g, c => c.toUpperCase());
149
+ }
150
+
151
+ /**
152
+ * Clean markdown content for LLM consumption
153
+ * @param content - Raw markdown content
154
+ * @returns Cleaned content
155
+ */
156
+ function cleanMarkdownContent(content: string): string {
157
+ // Remove HTML tags
158
+ let cleaned = content.replace(/<[^>]*>/g, '');
159
+
160
+ // Normalize whitespace
161
+ cleaned = cleaned.replace(/\r\n/g, '\n')
162
+ .replace(/\n{3,}/g, '\n\n')
163
+ .trim();
164
+
165
+ return cleaned;
166
+ }
167
+
168
+ /**
169
+ * Process a markdown file and extract its metadata and content
170
+ * @param filePath - Path to the markdown file
171
+ * @param baseDir - Base directory
172
+ * @param siteUrl - Base URL of the site
173
+ * @param pathPrefix - Path prefix for URLs (e.g., 'docs' or 'blog')
174
+ * @returns Processed file data
175
+ */
176
+ async function processMarkdownFile(
177
+ filePath: string,
178
+ baseDir: string,
179
+ siteUrl: string,
180
+ pathPrefix: string = 'docs'
181
+ ): Promise<DocInfo> {
182
+ const content = await readFile(filePath);
183
+ const { data, content: markdownContent } = matter(content);
184
+
185
+ const relativePath = path.relative(baseDir, filePath);
186
+ // Convert to URL path format (replace backslashes with forward slashes on Windows)
187
+ const normalizedPath = relativePath.replace(/\\/g, '/');
188
+
189
+ // Convert .md extension to appropriate path
190
+ const linkPathBase = normalizedPath.replace(/\.mdx?$/, '');
191
+
192
+ // Handle index files specially
193
+ const linkPath = linkPathBase.endsWith('index')
194
+ ? linkPathBase.replace(/\/index$/, '')
195
+ : linkPathBase;
196
+
197
+ // Generate full URL
198
+ const fullUrl = new URL(`${pathPrefix}/${linkPath}`, siteUrl).toString();
199
+
200
+ // Extract title
201
+ const title = extractTitle(data, markdownContent, filePath);
202
+
203
+ // Get description from frontmatter or first paragraph
204
+ let description = data.description || '';
205
+ if (!description) {
206
+ const paragraphs = markdownContent.split('\n\n');
207
+ for (const para of paragraphs) {
208
+ if (para.trim() && !para.startsWith('#')) {
209
+ description = para.trim();
210
+ break;
211
+ }
212
+ }
213
+ }
214
+
215
+ // Clean and process content
216
+ const cleanedContent = cleanMarkdownContent(markdownContent);
217
+
218
+ return {
219
+ title,
220
+ path: normalizedPath,
221
+ url: fullUrl,
222
+ content: cleanedContent,
223
+ description: description || '',
224
+ };
225
+ }
226
+
227
+ /**
228
+ * A Docusaurus plugin to generate LLM-friendly documentation following
229
+ * the llmtxt.org standard
230
+ *
231
+ * @param context - Docusaurus context
232
+ * @param options - Plugin options
233
+ * @returns Plugin object
234
+ */
235
+ export default function docusaurusPluginLLMs(
236
+ context: LoadContext,
237
+ options: PluginOptions = {}
238
+ ): Plugin<void> {
239
+ // Set default options
240
+ const {
241
+ generateLLMsTxt = true,
242
+ generateLLMsFullTxt = true,
243
+ docsDir = 'docs',
244
+ ignoreFiles = [],
245
+ title,
246
+ description,
247
+ llmsTxtFilename = 'llms.txt',
248
+ llmsFullTxtFilename = 'llms-full.txt',
249
+ includeBlog = false,
250
+ } = options;
251
+
252
+ const {
253
+ siteDir,
254
+ siteConfig,
255
+ outDir,
256
+ } = context;
257
+
258
+ return {
259
+ name: 'docusaurus-plugin-llms',
260
+
261
+ /**
262
+ * Generates LLM-friendly documentation files after the build is complete
263
+ */
264
+ async postBuild(): Promise<void> {
265
+ console.log('Generating LLM-friendly documentation...');
266
+
267
+ // Custom title and description or fallback to site values
268
+ const docTitle = title || siteConfig.title;
269
+ const docDescription = description || siteConfig.tagline || '';
270
+
271
+ // Build the site URL with proper trailing slash
272
+ const siteUrl = siteConfig.url + (
273
+ siteConfig.baseUrl.endsWith('/')
274
+ ? siteConfig.baseUrl.slice(0, -1)
275
+ : siteConfig.baseUrl || ''
276
+ );
277
+
278
+ // Initialize docs collection
279
+ const allDocs: DocInfo[] = [];
280
+
281
+ try {
282
+ // Process docs directory
283
+ const fullDocsDir = path.join(siteDir, docsDir);
284
+
285
+ try {
286
+ await fs.access(fullDocsDir);
287
+
288
+ // Collect all markdown files from docs directory
289
+ const docFiles = await readMarkdownFiles(fullDocsDir, siteDir, ignoreFiles);
290
+
291
+ if (docFiles.length > 0) {
292
+ // Process each file
293
+ for (const filePath of docFiles) {
294
+ try {
295
+ const docInfo = await processMarkdownFile(
296
+ filePath,
297
+ fullDocsDir,
298
+ siteUrl,
299
+ 'docs'
300
+ );
301
+ allDocs.push(docInfo);
302
+ } catch (err: any) {
303
+ console.warn(`Error processing ${filePath}: ${err.message}`);
304
+ }
305
+ }
306
+ console.log(`Processed ${docFiles.length} documentation files`);
307
+ } else {
308
+ console.warn('No markdown files found in docs directory.');
309
+ }
310
+ } catch (err) {
311
+ console.warn(`Docs directory not found: ${fullDocsDir}`);
312
+ }
313
+
314
+ // Process blog if enabled
315
+ if (includeBlog) {
316
+ const blogDir = path.join(siteDir, 'blog');
317
+
318
+ try {
319
+ await fs.access(blogDir);
320
+
321
+ // Collect all markdown files from blog directory
322
+ const blogFiles = await readMarkdownFiles(blogDir, siteDir, ignoreFiles);
323
+
324
+ if (blogFiles.length > 0) {
325
+ // Process each file
326
+ for (const filePath of blogFiles) {
327
+ try {
328
+ const docInfo = await processMarkdownFile(
329
+ filePath,
330
+ blogDir,
331
+ siteUrl,
332
+ 'blog'
333
+ );
334
+ allDocs.push(docInfo);
335
+ } catch (err: any) {
336
+ console.warn(`Error processing ${filePath}: ${err.message}`);
337
+ }
338
+ }
339
+ console.log(`Processed ${blogFiles.length} blog files`);
340
+ } else {
341
+ console.warn('No markdown files found in blog directory.');
342
+ }
343
+ } catch (err) {
344
+ console.warn(`Blog directory not found: ${blogDir}`);
345
+ }
346
+ }
347
+
348
+ // Skip further processing if no documents were found
349
+ if (allDocs.length === 0) {
350
+ console.warn('No documents found to process.');
351
+ return;
352
+ }
353
+
354
+ // Sort files to ensure consistent ordering
355
+ allDocs.sort((a, b) => a.title.localeCompare(b.title));
356
+
357
+ // Generate llms.txt
358
+ if (generateLLMsTxt) {
359
+ const llmsTxtPath = path.join(outDir, llmsTxtFilename);
360
+ const tocItems = allDocs.map(doc => {
361
+ return `- [${doc.title}](${doc.url})${doc.description ? `: ${doc.description.split('\n')[0]}` : ''}`;
362
+ });
363
+
364
+ const llmsTxtContent = `# ${docTitle}
365
+
366
+ > ${docDescription}
367
+
368
+ This file contains links to all documentation sections following the llmtxt.org standard.
369
+
370
+ ## Table of Contents
371
+
372
+ ${tocItems.join('\n')}
373
+ `;
374
+
375
+ await writeFile(llmsTxtPath, llmsTxtContent);
376
+ console.log(`Generated ${llmsTxtFilename}: ${llmsTxtPath}`);
377
+ }
378
+
379
+ // Generate llms-full.txt with all content
380
+ if (generateLLMsFullTxt) {
381
+ const llmsFullTxtPath = path.join(outDir, llmsFullTxtFilename);
382
+
383
+ const fullContentSections = allDocs.map(doc => {
384
+ return `## ${doc.title}
385
+
386
+ ${doc.content}`;
387
+ });
388
+
389
+ const llmsFullTxtContent = `# ${docTitle}
390
+
391
+ > ${docDescription}
392
+
393
+ This file contains all documentation content in a single document following the llmtxt.org standard.
394
+
395
+ ${fullContentSections.join('\n\n---\n\n')}
396
+ `;
397
+
398
+ await writeFile(llmsFullTxtPath, llmsFullTxtContent);
399
+ console.log(`Generated ${llmsFullTxtFilename}: ${llmsFullTxtPath}`);
400
+ }
401
+
402
+ // Output statistics
403
+ const stats = {
404
+ totalDocuments: allDocs.length,
405
+ totalBytes: allDocs.reduce((sum, doc) => sum + doc.content.length, 0),
406
+ approxTokens: Math.round(allDocs.reduce((sum, doc) => sum + doc.content.length, 0) / 4), // Rough token estimate
407
+ };
408
+
409
+ console.log(`Stats: ${stats.totalDocuments} documents, ${Math.round(stats.totalBytes / 1024)}KB, ~${stats.approxTokens} tokens`);
410
+ } catch (err: any) {
411
+ console.error('Error generating LLM documentation:', err);
412
+ }
413
+ },
414
+ };
415
+ }