docusaurus-plugin-llms 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/utils.js ADDED
@@ -0,0 +1,177 @@
1
+ "use strict";
2
+ /**
3
+ * Utility functions for the docusaurus-plugin-llms plugin
4
+ */
5
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
6
+ if (k2 === undefined) k2 = k;
7
+ var desc = Object.getOwnPropertyDescriptor(m, k);
8
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
9
+ desc = { enumerable: true, get: function() { return m[k]; } };
10
+ }
11
+ Object.defineProperty(o, k2, desc);
12
+ }) : (function(o, m, k, k2) {
13
+ if (k2 === undefined) k2 = k;
14
+ o[k2] = m[k];
15
+ }));
16
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
17
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
18
+ }) : function(o, v) {
19
+ o["default"] = v;
20
+ });
21
+ var __importStar = (this && this.__importStar) || (function () {
22
+ var ownKeys = function(o) {
23
+ ownKeys = Object.getOwnPropertyNames || function (o) {
24
+ var ar = [];
25
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
26
+ return ar;
27
+ };
28
+ return ownKeys(o);
29
+ };
30
+ return function (mod) {
31
+ if (mod && mod.__esModule) return mod;
32
+ var result = {};
33
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
34
+ __setModuleDefault(result, mod);
35
+ return result;
36
+ };
37
+ })();
38
+ Object.defineProperty(exports, "__esModule", { value: true });
39
+ exports.writeFile = writeFile;
40
+ exports.readFile = readFile;
41
+ exports.shouldIgnoreFile = shouldIgnoreFile;
42
+ exports.readMarkdownFiles = readMarkdownFiles;
43
+ exports.extractTitle = extractTitle;
44
+ exports.cleanMarkdownContent = cleanMarkdownContent;
45
+ exports.applyPathTransformations = applyPathTransformations;
46
+ const fs = __importStar(require("fs/promises"));
47
+ const path = __importStar(require("path"));
48
+ const minimatch_1 = require("minimatch");
49
+ /**
50
+ * Write content to a file
51
+ * @param filePath - Path to write the file to
52
+ * @param data - Content to write
53
+ */
54
+ async function writeFile(filePath, data) {
55
+ return fs.writeFile(filePath, data, 'utf8');
56
+ }
57
+ /**
58
+ * Read content from a file
59
+ * @param filePath - Path of the file to read
60
+ * @returns Content of the file
61
+ */
62
+ async function readFile(filePath) {
63
+ return fs.readFile(filePath, 'utf8');
64
+ }
65
+ /**
66
+ * Check if a file should be ignored based on glob patterns
67
+ * @param filePath - Path to the file
68
+ * @param baseDir - Base directory for relative paths
69
+ * @param ignorePatterns - Glob patterns for files to ignore
70
+ * @returns Whether the file should be ignored
71
+ */
72
+ function shouldIgnoreFile(filePath, baseDir, ignorePatterns) {
73
+ if (ignorePatterns.length === 0) {
74
+ return false;
75
+ }
76
+ const relativePath = path.relative(baseDir, filePath);
77
+ return ignorePatterns.some(pattern => (0, minimatch_1.minimatch)(relativePath, pattern, { matchBase: true }));
78
+ }
79
+ /**
80
+ * Recursively reads all Markdown files in a directory
81
+ * @param dir - Directory to scan
82
+ * @param baseDir - Base directory for relative paths
83
+ * @param ignorePatterns - Glob patterns for files to ignore
84
+ * @returns Array of file paths
85
+ */
86
+ async function readMarkdownFiles(dir, baseDir, ignorePatterns = []) {
87
+ const files = [];
88
+ const entries = await fs.readdir(dir, { withFileTypes: true });
89
+ for (const entry of entries) {
90
+ const fullPath = path.join(dir, entry.name);
91
+ if (shouldIgnoreFile(fullPath, baseDir, ignorePatterns)) {
92
+ continue;
93
+ }
94
+ if (entry.isDirectory()) {
95
+ const subDirFiles = await readMarkdownFiles(fullPath, baseDir, ignorePatterns);
96
+ files.push(...subDirFiles);
97
+ }
98
+ else if (entry.name.endsWith('.md') || entry.name.endsWith('.mdx')) {
99
+ files.push(fullPath);
100
+ }
101
+ }
102
+ return files;
103
+ }
104
+ /**
105
+ * Extract title from content or use the filename
106
+ * @param data - Frontmatter data
107
+ * @param content - Markdown content
108
+ * @param filePath - Path to the file
109
+ * @returns Extracted title
110
+ */
111
+ function extractTitle(data, content, filePath) {
112
+ // First try frontmatter
113
+ if (data.title) {
114
+ return data.title;
115
+ }
116
+ // Then try first heading
117
+ const headingMatch = content.match(/^#\s+(.*)/m);
118
+ if (headingMatch) {
119
+ return headingMatch[1].trim();
120
+ }
121
+ // Finally use filename
122
+ return path.basename(filePath, path.extname(filePath))
123
+ .replace(/-/g, ' ')
124
+ .replace(/\b\w/g, c => c.toUpperCase());
125
+ }
126
+ /**
127
+ * Clean markdown content for LLM consumption
128
+ * @param content - Raw markdown content
129
+ * @returns Cleaned content
130
+ */
131
+ function cleanMarkdownContent(content) {
132
+ // Remove HTML tags
133
+ let cleaned = content.replace(/<[^>]*>/g, '');
134
+ // Normalize whitespace
135
+ cleaned = cleaned.replace(/\r\n/g, '\n')
136
+ .replace(/\n{3,}/g, '\n\n')
137
+ .trim();
138
+ return cleaned;
139
+ }
140
+ /**
141
+ * Apply path transformations according to configuration
142
+ * @param urlPath - Original URL path
143
+ * @param pathTransformation - Path transformation configuration
144
+ * @returns Transformed URL path
145
+ */
146
+ function applyPathTransformations(urlPath, pathTransformation) {
147
+ if (!pathTransformation) {
148
+ return urlPath;
149
+ }
150
+ let transformedPath = urlPath;
151
+ // Remove ignored path segments
152
+ if (pathTransformation.ignorePaths?.length) {
153
+ for (const ignorePath of pathTransformation.ignorePaths) {
154
+ // Create a regex that matches the ignore path at the beginning, middle, or end of the path
155
+ // We use word boundaries to ensure we match complete path segments
156
+ const ignoreRegex = new RegExp(`(^|/)(${ignorePath})(/|$)`, 'g');
157
+ transformedPath = transformedPath.replace(ignoreRegex, '$1$3');
158
+ }
159
+ // Clean up any double slashes that might have been created
160
+ transformedPath = transformedPath.replace(/\/+/g, '/');
161
+ // Remove leading slash if present
162
+ transformedPath = transformedPath.replace(/^\//, '');
163
+ }
164
+ // Add path segments if they're not already present
165
+ if (pathTransformation.addPaths?.length) {
166
+ // Process in reverse order to maintain the specified order in the final path
167
+ // This is because each path is prepended to the front
168
+ const pathsToAdd = [...pathTransformation.addPaths].reverse();
169
+ for (const addPath of pathsToAdd) {
170
+ // Only add if not already present at the beginning
171
+ if (!transformedPath.startsWith(addPath + '/') && transformedPath !== addPath) {
172
+ transformedPath = `${addPath}/${transformedPath}`;
173
+ }
174
+ }
175
+ }
176
+ return transformedPath;
177
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "docusaurus-plugin-llms",
3
- "version": "0.1.1",
3
+ "version": "0.1.3",
4
4
  "description": "Docusaurus plugin for generating LLM-friendly documentation following the llmtxt.org standard",
5
5
  "main": "lib/index.js",
6
6
  "scripts": {
@@ -8,7 +8,9 @@
8
8
  "watch": "tsc --watch",
9
9
  "cleanup": "node cleanup.js",
10
10
  "prepublishOnly": "npm run build && npm run cleanup",
11
- "test": "echo \"No tests specified\""
11
+ "test:unit": "node tests/test-path-transforms.js",
12
+ "test:integration": "node tests/test-path-transformation.js",
13
+ "test": "npm run build && npm run test:unit && npm run test:integration"
12
14
  },
13
15
  "files": [
14
16
  "lib",
@@ -0,0 +1,266 @@
1
+ /**
2
+ * LLM file generation functions for the docusaurus-plugin-llms plugin
3
+ */
4
+
5
+ import * as path from 'path';
6
+ import * as fs from 'fs/promises';
7
+ import { DocInfo, PluginContext, CustomLLMFile } from './types';
8
+ import { writeFile, readMarkdownFiles } from './utils';
9
+ import { processFilesWithPatterns } from './processor';
10
+
11
+ /**
12
+ * Clean a description for use in a TOC item
13
+ * @param description - The original description
14
+ * @returns Cleaned description suitable for TOC
15
+ */
16
+ function cleanDescriptionForToc(description: string): string {
17
+ if (!description) return '';
18
+
19
+ // Get just the first line for TOC display
20
+ const firstLine = description.split('\n')[0];
21
+
22
+ // Remove heading markers only at the beginning of the line
23
+ // Be careful to only remove actual heading markers (# followed by space at beginning)
24
+ // and not hashtag symbols that are part of the content (inline hashtags)
25
+ const cleaned = firstLine.replace(/^(#+)\s+/g, '');
26
+
27
+ // Truncate if too long (150 characters max with ellipsis)
28
+ return cleaned.length > 150 ? cleaned.substring(0, 147) + '...' : cleaned;
29
+ }
30
+
31
+ /**
32
+ * Generate an LLM-friendly file
33
+ * @param docs - Processed document information
34
+ * @param outputPath - Path to write the output file
35
+ * @param fileTitle - Title for the file
36
+ * @param fileDescription - Description for the file
37
+ * @param includeFullContent - Whether to include full content or just links
38
+ * @param version - Version of the file
39
+ */
40
+ export async function generateLLMFile(
41
+ docs: DocInfo[],
42
+ outputPath: string,
43
+ fileTitle: string,
44
+ fileDescription: string,
45
+ includeFullContent: boolean,
46
+ version?: string
47
+ ): Promise<void> {
48
+ console.log(`Generating file: ${outputPath}, version: ${version || 'undefined'}`);
49
+ const versionInfo = version ? `\n\nVersion: ${version}` : '';
50
+
51
+ if (includeFullContent) {
52
+ // Generate full content file
53
+ const fullContentSections = docs.map(doc => {
54
+ return `## ${doc.title}
55
+
56
+ ${doc.content}`;
57
+ });
58
+
59
+ const llmFileContent = `# ${fileTitle}
60
+
61
+ > ${fileDescription}${versionInfo}
62
+
63
+ This file contains all documentation content in a single document following the llmtxt.org standard.
64
+
65
+ ${fullContentSections.join('\n\n---\n\n')}
66
+ `;
67
+
68
+ await writeFile(outputPath, llmFileContent);
69
+ } else {
70
+ // Generate links-only file
71
+ const tocItems = docs.map(doc => {
72
+ // Clean and format the description for TOC
73
+ const cleanedDescription = cleanDescriptionForToc(doc.description);
74
+
75
+ return `- [${doc.title}](${doc.url})${cleanedDescription ? `: ${cleanedDescription}` : ''}`;
76
+ });
77
+
78
+ const llmFileContent = `# ${fileTitle}
79
+
80
+ > ${fileDescription}${versionInfo}
81
+
82
+ This file contains links to documentation sections following the llmtxt.org standard.
83
+
84
+ ## Table of Contents
85
+
86
+ ${tocItems.join('\n')}
87
+ `;
88
+
89
+ await writeFile(outputPath, llmFileContent);
90
+ }
91
+
92
+ console.log(`Generated: ${outputPath}`);
93
+ }
94
+
95
+ /**
96
+ * Generate standard LLM files (llms.txt and llms-full.txt)
97
+ * @param context - Plugin context
98
+ * @param allDocFiles - Array of all document files
99
+ */
100
+ export async function generateStandardLLMFiles(
101
+ context: PluginContext,
102
+ allDocFiles: string[]
103
+ ): Promise<void> {
104
+ const {
105
+ outDir,
106
+ docTitle,
107
+ docDescription,
108
+ options
109
+ } = context;
110
+
111
+ const {
112
+ generateLLMsTxt,
113
+ generateLLMsFullTxt,
114
+ llmsTxtFilename = 'llms.txt',
115
+ llmsFullTxtFilename = 'llms-full.txt',
116
+ includeOrder = [],
117
+ includeUnmatchedLast = true,
118
+ version
119
+ } = options;
120
+
121
+ if (!generateLLMsTxt && !generateLLMsFullTxt) {
122
+ return;
123
+ }
124
+
125
+ // Process files for the standard outputs
126
+ const processedDocs = await processFilesWithPatterns(
127
+ context,
128
+ allDocFiles,
129
+ [], // No specific include patterns - include all
130
+ [], // No additional ignore patterns beyond global ignoreFiles
131
+ includeOrder,
132
+ includeUnmatchedLast
133
+ );
134
+
135
+ console.log(`Processed ${processedDocs.length} documentation files for standard LLM files`);
136
+
137
+ // Generate llms.txt
138
+ if (generateLLMsTxt) {
139
+ const llmsTxtPath = path.join(outDir, llmsTxtFilename);
140
+ await generateLLMFile(
141
+ processedDocs,
142
+ llmsTxtPath,
143
+ docTitle,
144
+ docDescription,
145
+ false, // links only
146
+ version
147
+ );
148
+ }
149
+
150
+ // Generate llms-full.txt
151
+ if (generateLLMsFullTxt) {
152
+ const llmsFullTxtPath = path.join(outDir, llmsFullTxtFilename);
153
+ await generateLLMFile(
154
+ processedDocs,
155
+ llmsFullTxtPath,
156
+ docTitle,
157
+ docDescription,
158
+ true, // full content
159
+ version
160
+ );
161
+ }
162
+ }
163
+
164
+ /**
165
+ * Generate custom LLM files based on configuration
166
+ * @param context - Plugin context
167
+ * @param allDocFiles - Array of all document files
168
+ */
169
+ export async function generateCustomLLMFiles(
170
+ context: PluginContext,
171
+ allDocFiles: string[]
172
+ ): Promise<void> {
173
+ const { outDir, docTitle, docDescription, options } = context;
174
+ const { customLLMFiles = [], ignoreFiles = [] } = options;
175
+
176
+ if (customLLMFiles.length === 0) {
177
+ return;
178
+ }
179
+
180
+ console.log(`Generating ${customLLMFiles.length} custom LLM files...`);
181
+
182
+ for (const customFile of customLLMFiles) {
183
+ console.log(`Processing custom file: ${customFile.filename}, version: ${customFile.version || 'undefined'}`);
184
+
185
+ // Combine global ignores with custom ignores
186
+ const combinedIgnores = [...ignoreFiles];
187
+ if (customFile.ignorePatterns) {
188
+ combinedIgnores.push(...customFile.ignorePatterns);
189
+ }
190
+
191
+ // Process files according to the custom configuration
192
+ const customDocs = await processFilesWithPatterns(
193
+ context,
194
+ allDocFiles,
195
+ customFile.includePatterns,
196
+ combinedIgnores,
197
+ customFile.orderPatterns || [],
198
+ customFile.includeUnmatchedLast ?? false
199
+ );
200
+
201
+ if (customDocs.length > 0) {
202
+ // Use custom title/description or fall back to defaults
203
+ const customTitle = customFile.title || docTitle;
204
+ const customDescription = customFile.description || docDescription;
205
+
206
+ // Generate the custom LLM file
207
+ const customFilePath = path.join(outDir, customFile.filename);
208
+ await generateLLMFile(
209
+ customDocs,
210
+ customFilePath,
211
+ customTitle,
212
+ customDescription,
213
+ customFile.fullContent,
214
+ customFile.version
215
+ );
216
+
217
+ console.log(`Generated custom LLM file: ${customFile.filename} with ${customDocs.length} documents`);
218
+ } else {
219
+ console.warn(`No matching documents found for custom LLM file: ${customFile.filename}`);
220
+ }
221
+ }
222
+ }
223
+
224
+ /**
225
+ * Collect all document files from docs directory and optionally blog
226
+ * @param context - Plugin context
227
+ * @returns Array of file paths
228
+ */
229
+ export async function collectDocFiles(context: PluginContext): Promise<string[]> {
230
+ const { siteDir, docsDir, options } = context;
231
+ const { ignoreFiles = [], includeBlog = false } = options;
232
+
233
+ const allDocFiles: string[] = [];
234
+
235
+ // Process docs directory
236
+ const fullDocsDir = path.join(siteDir, docsDir);
237
+
238
+ try {
239
+ await fs.access(fullDocsDir);
240
+
241
+ // Collect all markdown files from docs directory
242
+ const docFiles = await readMarkdownFiles(fullDocsDir, siteDir, ignoreFiles);
243
+ allDocFiles.push(...docFiles);
244
+
245
+ } catch (err) {
246
+ console.warn(`Docs directory not found: ${fullDocsDir}`);
247
+ }
248
+
249
+ // Process blog if enabled
250
+ if (includeBlog) {
251
+ const blogDir = path.join(siteDir, 'blog');
252
+
253
+ try {
254
+ await fs.access(blogDir);
255
+
256
+ // Collect all markdown files from blog directory
257
+ const blogFiles = await readMarkdownFiles(blogDir, siteDir, ignoreFiles);
258
+ allDocFiles.push(...blogFiles);
259
+
260
+ } catch (err) {
261
+ console.warn(`Blog directory not found: ${blogDir}`);
262
+ }
263
+ }
264
+
265
+ return allDocFiles;
266
+ }