docusaurus-plugin-llms 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/utils.js CHANGED
@@ -35,17 +35,22 @@ var __importStar = (this && this.__importStar) || (function () {
35
35
  return result;
36
36
  };
37
37
  })();
38
+ var __importDefault = (this && this.__importDefault) || function (mod) {
39
+ return (mod && mod.__esModule) ? mod : { "default": mod };
40
+ };
38
41
  Object.defineProperty(exports, "__esModule", { value: true });
39
42
  exports.writeFile = writeFile;
40
43
  exports.readFile = readFile;
41
44
  exports.shouldIgnoreFile = shouldIgnoreFile;
42
45
  exports.readMarkdownFiles = readMarkdownFiles;
43
46
  exports.extractTitle = extractTitle;
47
+ exports.resolvePartialImports = resolvePartialImports;
44
48
  exports.cleanMarkdownContent = cleanMarkdownContent;
45
49
  exports.applyPathTransformations = applyPathTransformations;
46
50
  const fs = __importStar(require("fs/promises"));
47
51
  const path = __importStar(require("path"));
48
52
  const minimatch_1 = require("minimatch");
53
+ const gray_matter_1 = __importDefault(require("gray-matter"));
49
54
  /**
50
55
  * Write content to a file
51
56
  * @param filePath - Path to write the file to
@@ -96,7 +101,10 @@ async function readMarkdownFiles(dir, baseDir, ignorePatterns = []) {
96
101
  files.push(...subDirFiles);
97
102
  }
98
103
  else if (entry.name.endsWith('.md') || entry.name.endsWith('.mdx')) {
99
- files.push(fullPath);
104
+ // Skip partial files (those starting with underscore)
105
+ if (!entry.name.startsWith('_')) {
106
+ files.push(fullPath);
107
+ }
100
108
  }
101
109
  }
102
110
  return files;
@@ -123,14 +131,117 @@ function extractTitle(data, content, filePath) {
123
131
  .replace(/-/g, ' ')
124
132
  .replace(/\b\w/g, c => c.toUpperCase());
125
133
  }
134
+ /**
135
+ * Resolve and inline partial imports in markdown content
136
+ * @param content - The markdown content with import statements
137
+ * @param filePath - The path of the file containing the imports
138
+ * @returns Content with partials resolved
139
+ */
140
+ async function resolvePartialImports(content, filePath) {
141
+ let resolved = content;
142
+ // Match import statements for partials and JSX usage
143
+ // Pattern 1: import PartialName from './_partial.mdx'
144
+ // Pattern 2: import { PartialName } from './_partial.mdx'
145
+ const importRegex = /^\s*import\s+(?:(\w+)|{\s*(\w+)\s*})\s+from\s+['"]([^'"]+_[^'"]+\.mdx?)['"];?\s*$/gm;
146
+ const imports = new Map();
147
+ // First pass: collect all imports
148
+ let match;
149
+ while ((match = importRegex.exec(content)) !== null) {
150
+ const componentName = match[1] || match[2];
151
+ const importPath = match[3];
152
+ // Only process imports for partial files (containing underscore)
153
+ if (importPath.includes('_')) {
154
+ imports.set(componentName, importPath);
155
+ }
156
+ }
157
+ // Resolve each partial import
158
+ for (const [componentName, importPath] of imports) {
159
+ try {
160
+ // Resolve the partial file path relative to the current file
161
+ const dir = path.dirname(filePath);
162
+ const partialPath = path.resolve(dir, importPath);
163
+ // Read the partial file
164
+ const partialContent = await readFile(partialPath);
165
+ const { content: partialMarkdown } = (0, gray_matter_1.default)(partialContent);
166
+ // Remove the import statement
167
+ resolved = resolved.replace(new RegExp(`^\\s*import\\s+(?:${componentName}|{\\s*${componentName}\\s*})\\s+from\\s+['"]${importPath.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}['"];?\\s*$`, 'gm'), '');
168
+ // Replace JSX usage with the partial content
169
+ // Handle both self-closing tags and tags with content
170
+ // <PartialName /> or <PartialName></PartialName> or <PartialName>...</PartialName>
171
+ const jsxRegex = new RegExp(`<${componentName}\\s*(?:[^>]*?)(?:/>|>[^<]*</${componentName}>)`, 'g');
172
+ resolved = resolved.replace(jsxRegex, partialMarkdown.trim());
173
+ }
174
+ catch (error) {
175
+ console.warn(`Failed to resolve partial import "${importPath}" in ${filePath}: ${error}`);
176
+ // Leave the import and usage as-is if we can't resolve it
177
+ }
178
+ }
179
+ return resolved;
180
+ }
126
181
  /**
127
182
  * Clean markdown content for LLM consumption
128
183
  * @param content - Raw markdown content
184
+ * @param excludeImports - Whether to exclude import statements
185
+ * @param removeDuplicateHeadings - Whether to remove redundant content that duplicates heading text
129
186
  * @returns Cleaned content
130
187
  */
131
- function cleanMarkdownContent(content) {
132
- // Remove HTML tags
133
- let cleaned = content.replace(/<[^>]*>/g, '');
188
+ function cleanMarkdownContent(content, excludeImports = false, removeDuplicateHeadings = false) {
189
+ let cleaned = content;
190
+ // Remove import statements if requested
191
+ if (excludeImports) {
192
+ // Remove ES6/React import statements
193
+ // This regex matches:
194
+ // - import ... from "...";
195
+ // - import ... from '...';
196
+ // - import { ... } from "...";
197
+ // - import * as ... from "...";
198
+ // - import "..."; (side-effect imports)
199
+ cleaned = cleaned.replace(/^\s*import\s+.*?;?\s*$/gm, '');
200
+ }
201
+ // Remove HTML tags, but preserve XML content in code blocks
202
+ // We need to be selective to avoid removing XML content from code blocks
203
+ // This regex targets common HTML tags while being more conservative about XML
204
+ cleaned = cleaned.replace(/<\/?(?:div|span|p|br|hr|img|a|strong|em|b|i|u|h[1-6]|ul|ol|li|table|tr|td|th|thead|tbody)\b[^>]*>/gi, '');
205
+ // Remove redundant content that just repeats the heading (if requested)
206
+ if (removeDuplicateHeadings) {
207
+ // Split content into lines and process line by line
208
+ const lines = cleaned.split('\n');
209
+ const processedLines = [];
210
+ let i = 0;
211
+ while (i < lines.length) {
212
+ const currentLine = lines[i];
213
+ // Check if current line is a heading (accounting for leading whitespace)
214
+ const headingMatch = currentLine.match(/^\s*(#+)\s+(.+)$/);
215
+ if (headingMatch) {
216
+ const headingLevel = headingMatch[1];
217
+ const headingText = headingMatch[2].trim();
218
+ processedLines.push(currentLine);
219
+ i++;
220
+ // Look ahead for potential redundant content
221
+ // Skip empty lines
222
+ while (i < lines.length && lines[i].trim() === '') {
223
+ processedLines.push(lines[i]);
224
+ i++;
225
+ }
226
+ // Check if the next non-empty line just repeats the heading text
227
+ // but is NOT itself a heading (to avoid removing valid headings of different levels)
228
+ if (i < lines.length) {
229
+ const nextLine = lines[i].trim();
230
+ const nextLineIsHeading = /^\s*#+\s+/.test(nextLine);
231
+ // Only remove if it exactly matches the heading text AND is not a heading itself
232
+ if (nextLine === headingText && !nextLineIsHeading) {
233
+ // Skip this redundant line
234
+ i++;
235
+ }
236
+ }
237
+ }
238
+ else {
239
+ processedLines.push(currentLine);
240
+ i++;
241
+ }
242
+ }
243
+ cleaned = processedLines.join('\n');
244
+ }
134
245
  // Normalize whitespace
135
246
  cleaned = cleaned.replace(/\r\n/g, '\n')
136
247
  .replace(/\n{3,}/g, '\n\n')
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "docusaurus-plugin-llms",
3
- "version": "0.1.5",
3
+ "version": "0.2.0",
4
4
  "description": "Docusaurus plugin for generating LLM-friendly documentation following the llmstxt.org standard",
5
5
  "main": "lib/index.js",
6
6
  "types": "lib/index.d.ts",
@@ -9,7 +9,7 @@
9
9
  "watch": "tsc --watch",
10
10
  "cleanup": "node cleanup.js",
11
11
  "prepublishOnly": "npm run build && npm run cleanup",
12
- "test:unit": "node tests/test-path-transforms.js",
12
+ "test:unit": "node tests/test-path-transforms.js && node tests/test-header-deduplication.js && node tests/test-import-removal.js && node tests/test-partials.js && node tests/test-root-content.js",
13
13
  "test:integration": "node tests/test-path-transformation.js",
14
14
  "test": "npm run build && npm run test:unit && npm run test:integration"
15
15
  },
package/src/generator.ts CHANGED
@@ -36,6 +36,7 @@ function cleanDescriptionForToc(description: string): string {
36
36
  * @param fileDescription - Description for the file
37
37
  * @param includeFullContent - Whether to include full content or just links
38
38
  * @param version - Version of the file
39
+ * @param customRootContent - Optional custom content to include at the root level
39
40
  */
40
41
  export async function generateLLMFile(
41
42
  docs: DocInfo[],
@@ -43,24 +44,78 @@ export async function generateLLMFile(
43
44
  fileTitle: string,
44
45
  fileDescription: string,
45
46
  includeFullContent: boolean,
46
- version?: string
47
+ version?: string,
48
+ customRootContent?: string
47
49
  ): Promise<void> {
48
50
  console.log(`Generating file: ${outputPath}, version: ${version || 'undefined'}`);
49
51
  const versionInfo = version ? `\n\nVersion: ${version}` : '';
50
52
 
51
53
  if (includeFullContent) {
52
- // Generate full content file
54
+ // Generate full content file with header deduplication
55
+ const usedHeaders = new Set<string>();
53
56
  const fullContentSections = docs.map(doc => {
54
- return `## ${doc.title}
57
+ // Check if content already starts with the same heading to avoid duplication
58
+ const trimmedContent = doc.content.trim();
59
+ const firstLine = trimmedContent.split('\n')[0];
60
+
61
+ // Check if the first line is a heading that matches our title
62
+ const headingMatch = firstLine.match(/^#+\s+(.+)$/);
63
+ const firstHeadingText = headingMatch ? headingMatch[1].trim() : null;
64
+
65
+ // Determine the header text to use (original title or make it unique)
66
+ let headerText = doc.title;
67
+ let uniqueHeader = headerText;
68
+ let counter = 1;
69
+
70
+ // If this header has been used before, make it unique by adding a suffix
71
+ while (usedHeaders.has(uniqueHeader.toLowerCase())) {
72
+ counter++;
73
+ // Try to make it more descriptive by adding the file path info if available
74
+ if (doc.path && counter === 2) {
75
+ const pathParts = doc.path.split('/');
76
+ const folderName = pathParts.length > 1 ? pathParts[pathParts.length - 2] : '';
77
+ if (folderName) {
78
+ uniqueHeader = `${headerText} (${folderName.charAt(0).toUpperCase() + folderName.slice(1)})`;
79
+ } else {
80
+ uniqueHeader = `${headerText} (${counter})`;
81
+ }
82
+ } else {
83
+ uniqueHeader = `${headerText} (${counter})`;
84
+ }
85
+ }
86
+
87
+ usedHeaders.add(uniqueHeader.toLowerCase());
88
+
89
+ if (firstHeadingText === doc.title) {
90
+ // Content already has the same heading, replace it with our unique header if needed
91
+ if (uniqueHeader !== doc.title) {
92
+ const restOfContent = trimmedContent.split('\n').slice(1).join('\n');
93
+ return `## ${uniqueHeader}
94
+
95
+ ${restOfContent}`;
96
+ } else {
97
+ // Replace the existing H1 with H2 to comply with llmstxt.org standard
98
+ const restOfContent = trimmedContent.split('\n').slice(1).join('\n');
99
+ return `## ${uniqueHeader}
100
+
101
+ ${restOfContent}`;
102
+ }
103
+ } else {
104
+ // Content doesn't have the same heading, add our unique H2 header
105
+ return `## ${uniqueHeader}
55
106
 
56
107
  ${doc.content}`;
108
+ }
57
109
  });
58
110
 
111
+ // Use custom root content or default message
112
+ const rootContent = customRootContent || 'This file contains all documentation content in a single document following the llmstxt.org standard.';
113
+
59
114
  const llmFileContent = `# ${fileTitle}
60
115
 
61
116
  > ${fileDescription}${versionInfo}
62
117
 
63
- This file contains all documentation content in a single document following the llmstxt.org standard.
118
+ ${rootContent}
64
119
 
65
120
  ${fullContentSections.join('\n\n---\n\n')}
66
121
  `;
@@ -75,11 +130,14 @@ ${fullContentSections.join('\n\n---\n\n')}
75
130
  return `- [${doc.title}](${doc.url})${cleanedDescription ? `: ${cleanedDescription}` : ''}`;
76
131
  });
77
132
 
133
+ // Use custom root content or default message
134
+ const rootContent = customRootContent || 'This file contains links to documentation sections following the llmstxt.org standard.';
135
+
78
136
  const llmFileContent = `# ${fileTitle}
79
137
 
80
138
  > ${fileDescription}${versionInfo}
81
139
 
82
- This file contains links to documentation sections following the llmstxt.org standard.
140
+ ${rootContent}
83
141
 
84
142
  ## Table of Contents
85
143
 
@@ -92,6 +150,75 @@ ${tocItems.join('\n')}
92
150
  console.log(`Generated: ${outputPath}`);
93
151
  }
94
152
 
153
+ /**
154
+ * Generate individual markdown files for each document
155
+ * @param docs - Processed document information
156
+ * @param outputDir - Directory to write the markdown files
157
+ * @param siteUrl - Base site URL
158
+ * @returns Updated docs with new URLs pointing to generated markdown files
159
+ */
160
+ export async function generateIndividualMarkdownFiles(
161
+ docs: DocInfo[],
162
+ outputDir: string,
163
+ siteUrl: string
164
+ ): Promise<DocInfo[]> {
165
+ const updatedDocs: DocInfo[] = [];
166
+
167
+ // Create a map to ensure unique filenames
168
+ const usedFilenames = new Set<string>();
169
+
170
+ for (const doc of docs) {
171
+ // Generate a filename from the document title or URL path
172
+ let baseFilename = doc.title
173
+ .toLowerCase()
174
+ .replace(/[^a-z0-9]+/g, '-')
175
+ .replace(/^-+|-+$/g, '');
176
+
177
+ // Fallback to URL path if title generates empty filename
178
+ if (!baseFilename) {
179
+ baseFilename = doc.path
180
+ .replace(/^\/+|\/+$/g, '') // Remove leading/trailing slashes
181
+ .replace(/\//g, '-')
182
+ .replace(/[^a-z0-9-]/gi, '-')
183
+ .toLowerCase();
184
+ }
185
+
186
+ // Ensure filename uniqueness
187
+ let filename = `${baseFilename}.md`;
188
+ let counter = 1;
189
+ while (usedFilenames.has(filename)) {
190
+ filename = `${baseFilename}-${counter}.md`;
191
+ counter++;
192
+ }
193
+ usedFilenames.add(filename);
194
+
195
+ // Create markdown content following llmstxt.org standard
196
+ const markdownContent = `# ${doc.title}
197
+
198
+ > ${doc.description}
199
+
200
+ ${doc.content}
201
+ `;
202
+
203
+ // Write the markdown file
204
+ const markdownPath = path.join(outputDir, filename);
205
+ await writeFile(markdownPath, markdownContent);
206
+
207
+ // Create updated DocInfo with new URL pointing to the generated markdown file
208
+ const newUrl = `${siteUrl}/${filename}`;
209
+
210
+ updatedDocs.push({
211
+ ...doc,
212
+ url: newUrl,
213
+ path: `/${filename}` // Update path to the new markdown file
214
+ });
215
+
216
+ console.log(`Generated markdown file: ${filename}`);
217
+ }
218
+
219
+ return updatedDocs;
220
+ }
221
+
95
222
  /**
96
223
  * Generate standard LLM files (llms.txt and llms-full.txt)
97
224
  * @param context - Plugin context
@@ -103,6 +230,7 @@ export async function generateStandardLLMFiles(
103
230
  ): Promise<void> {
104
231
  const {
105
232
  outDir,
233
+ siteUrl,
106
234
  docTitle,
107
235
  docDescription,
108
236
  options
@@ -115,7 +243,10 @@ export async function generateStandardLLMFiles(
115
243
  llmsFullTxtFilename = 'llms-full.txt',
116
244
  includeOrder = [],
117
245
  includeUnmatchedLast = true,
118
- version
246
+ version,
247
+ generateMarkdownFiles = false,
248
+ rootContent,
249
+ fullRootContent
119
250
  } = options;
120
251
 
121
252
  if (!generateLLMsTxt && !generateLLMsFullTxt) {
@@ -123,7 +254,7 @@ export async function generateStandardLLMFiles(
123
254
  }
124
255
 
125
256
  // Process files for the standard outputs
126
- const processedDocs = await processFilesWithPatterns(
257
+ let processedDocs = await processFilesWithPatterns(
127
258
  context,
128
259
  allDocFiles,
129
260
  [], // No specific include patterns - include all
@@ -134,6 +265,16 @@ export async function generateStandardLLMFiles(
134
265
 
135
266
  console.log(`Processed ${processedDocs.length} documentation files for standard LLM files`);
136
267
 
268
+ // Generate individual markdown files if requested
269
+ if (generateMarkdownFiles && processedDocs.length > 0) {
270
+ console.log('Generating individual markdown files...');
271
+ processedDocs = await generateIndividualMarkdownFiles(
272
+ processedDocs,
273
+ outDir,
274
+ siteUrl
275
+ );
276
+ }
277
+
137
278
  // Generate llms.txt
138
279
  if (generateLLMsTxt) {
139
280
  const llmsTxtPath = path.join(outDir, llmsTxtFilename);
@@ -143,7 +284,8 @@ export async function generateStandardLLMFiles(
143
284
  docTitle,
144
285
  docDescription,
145
286
  false, // links only
146
- version
287
+ version,
288
+ rootContent
147
289
  );
148
290
  }
149
291
 
@@ -156,7 +298,8 @@ export async function generateStandardLLMFiles(
156
298
  docTitle,
157
299
  docDescription,
158
300
  true, // full content
159
- version
301
+ version,
302
+ fullRootContent
160
303
  );
161
304
  }
162
305
  }
@@ -170,8 +313,8 @@ export async function generateCustomLLMFiles(
170
313
  context: PluginContext,
171
314
  allDocFiles: string[]
172
315
  ): Promise<void> {
173
- const { outDir, docTitle, docDescription, options } = context;
174
- const { customLLMFiles = [], ignoreFiles = [] } = options;
316
+ const { outDir, siteUrl, docTitle, docDescription, options } = context;
317
+ const { customLLMFiles = [], ignoreFiles = [], generateMarkdownFiles = false } = options;
175
318
 
176
319
  if (customLLMFiles.length === 0) {
177
320
  return;
@@ -189,7 +332,7 @@ export async function generateCustomLLMFiles(
189
332
  }
190
333
 
191
334
  // Process files according to the custom configuration
192
- const customDocs = await processFilesWithPatterns(
335
+ let customDocs = await processFilesWithPatterns(
193
336
  context,
194
337
  allDocFiles,
195
338
  customFile.includePatterns,
@@ -199,6 +342,16 @@ export async function generateCustomLLMFiles(
199
342
  );
200
343
 
201
344
  if (customDocs.length > 0) {
345
+ // Generate individual markdown files if requested
346
+ if (generateMarkdownFiles) {
347
+ console.log(`Generating individual markdown files for custom file: ${customFile.filename}...`);
348
+ customDocs = await generateIndividualMarkdownFiles(
349
+ customDocs,
350
+ outDir,
351
+ siteUrl
352
+ );
353
+ }
354
+
202
355
  // Use custom title/description or fall back to defaults
203
356
  const customTitle = customFile.title || docTitle;
204
357
  const customDescription = customFile.description || docDescription;
@@ -211,7 +364,8 @@ export async function generateCustomLLMFiles(
211
364
  customTitle,
212
365
  customDescription,
213
366
  customFile.fullContent,
214
- customFile.version
367
+ customFile.version,
368
+ customFile.rootContent
215
369
  );
216
370
 
217
371
  console.log(`Generated custom LLM file: ${customFile.filename} with ${customDocs.length} documents`);
package/src/index.ts CHANGED
@@ -9,7 +9,7 @@
9
9
  */
10
10
 
11
11
  import * as path from 'path';
12
- import type { LoadContext, Plugin } from '@docusaurus/types';
12
+ import type { LoadContext, Plugin, Props, RouteConfig } from '@docusaurus/types';
13
13
  import { PluginOptions, PluginContext } from './types';
14
14
  import { collectDocFiles, generateStandardLLMFiles, generateCustomLLMFiles } from './generator';
15
15
 
@@ -40,6 +40,11 @@ export default function docusaurusPluginLLMs(
40
40
  includeOrder = [],
41
41
  includeUnmatchedLast = true,
42
42
  customLLMFiles = [],
43
+ excludeImports = false,
44
+ removeDuplicateHeadings = false,
45
+ generateMarkdownFiles = false,
46
+ rootContent,
47
+ fullRootContent,
43
48
  } = options;
44
49
 
45
50
  const {
@@ -77,6 +82,11 @@ export default function docusaurusPluginLLMs(
77
82
  includeOrder,
78
83
  includeUnmatchedLast,
79
84
  customLLMFiles,
85
+ excludeImports,
86
+ removeDuplicateHeadings,
87
+ generateMarkdownFiles,
88
+ rootContent,
89
+ fullRootContent,
80
90
  }
81
91
  };
82
92
 
@@ -86,12 +96,46 @@ export default function docusaurusPluginLLMs(
86
96
  /**
87
97
  * Generates LLM-friendly documentation files after the build is complete
88
98
  */
89
- async postBuild(): Promise<void> {
99
+ async postBuild(props?: Props & { content: unknown }): Promise<void> {
90
100
  console.log('Generating LLM-friendly documentation...');
91
101
 
92
102
  try {
103
+ let enhancedContext = pluginContext;
104
+
105
+ // If props are provided (Docusaurus 3.x+), use the resolved routes
106
+ if (props?.routes) {
107
+ // Create a map of file paths to their resolved URLs
108
+ const routeMap = new Map<string, string>();
109
+
110
+ // Helper function to recursively process routes
111
+ const processRoutes = (routes: RouteConfig[]) => {
112
+ routes.forEach(route => {
113
+ if (route.path) {
114
+ // Store the actual resolved path
115
+ routeMap.set(route.path, route.path);
116
+ }
117
+
118
+ // Process nested routes recursively
119
+ if (route.routes) {
120
+ processRoutes(route.routes);
121
+ }
122
+ });
123
+ };
124
+
125
+ // Process all routes (cast to RouteConfig[] for recursive processing)
126
+ processRoutes(props.routes as RouteConfig[]);
127
+
128
+ // Pass the resolved routes to the plugin context
129
+ enhancedContext = {
130
+ ...pluginContext,
131
+ routesPaths: props.routesPaths,
132
+ routes: props.routes,
133
+ routeMap,
134
+ };
135
+ }
136
+
93
137
  // Collect all document files
94
- const allDocFiles = await collectDocFiles(pluginContext);
138
+ const allDocFiles = await collectDocFiles(enhancedContext);
95
139
 
96
140
  // Skip further processing if no documents were found
97
141
  if (allDocFiles.length === 0) {
@@ -100,10 +144,10 @@ export default function docusaurusPluginLLMs(
100
144
  }
101
145
 
102
146
  // Process standard LLM files (llms.txt and llms-full.txt)
103
- await generateStandardLLMFiles(pluginContext, allDocFiles);
147
+ await generateStandardLLMFiles(enhancedContext, allDocFiles);
104
148
 
105
149
  // Process custom LLM files
106
- await generateCustomLLMFiles(pluginContext, allDocFiles);
150
+ await generateCustomLLMFiles(enhancedContext, allDocFiles);
107
151
 
108
152
  // Output overall statistics
109
153
  console.log(`Stats: ${allDocFiles.length} total available documents processed`);