docusaurus-plugin-llms 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -137,6 +137,7 @@ module.exports = {
137
137
  | `version` | string | `undefined` | Global version to include in all generated files |
138
138
  | `customLLMFiles` | array | `[]` | Array of custom LLM file configurations |
139
139
  | `generateMarkdownFiles` | boolean | `false` | Generate individual markdown files and link to them from llms.txt |
140
+ | `keepFrontMatter` | string[] | [] | Preserve selected front matter items when generating individual markdown files
140
141
  | `rootContent` | string | (see below) | Custom content to include at the root level of llms.txt |
141
142
  | `fullRootContent` | string | (see below) | Custom content to include at the root level of llms-full.txt |
142
143
 
@@ -18,9 +18,11 @@ export declare function generateLLMFile(docs: DocInfo[], outputPath: string, fil
18
18
  * @param docs - Processed document information
19
19
  * @param outputDir - Directory to write the markdown files
20
20
  * @param siteUrl - Base site URL
21
+ * @param docsDir - The configured docs directory name (e.g., 'docs', 'documentation', etc.)
22
+ * @param keepFrontMatter - Array of frontmatter keys to preserve in generated files
21
23
  * @returns Updated docs with new URLs pointing to generated markdown files
22
24
  */
23
- export declare function generateIndividualMarkdownFiles(docs: DocInfo[], outputDir: string, siteUrl: string): Promise<DocInfo[]>;
25
+ export declare function generateIndividualMarkdownFiles(docs: DocInfo[], outputDir: string, siteUrl: string, docsDir?: string, keepFrontMatter?: string[]): Promise<DocInfo[]>;
24
26
  /**
25
27
  * Generate standard LLM files (llms.txt and llms-full.txt)
26
28
  * @param context - Plugin context
package/lib/generator.js CHANGED
@@ -85,29 +85,18 @@ async function generateLLMFile(docs, outputPath, fileTitle, fileDescription, inc
85
85
  // Check if the first line is a heading that matches our title
86
86
  const headingMatch = firstLine.match(/^#+\s+(.+)$/);
87
87
  const firstHeadingText = headingMatch ? headingMatch[1].trim() : null;
88
- // Determine the header text to use (original title or make it unique)
89
- let headerText = doc.title;
90
- let uniqueHeader = headerText;
91
- let counter = 1;
92
- // If this header has been used before, make it unique by adding a suffix
93
- while (usedHeaders.has(uniqueHeader.toLowerCase())) {
94
- counter++;
88
+ // Generate unique header using the utility function
89
+ const uniqueHeader = (0, utils_1.ensureUniqueIdentifier)(doc.title, usedHeaders, (counter, base) => {
95
90
  // Try to make it more descriptive by adding the file path info if available
96
91
  if (doc.path && counter === 2) {
97
92
  const pathParts = doc.path.split('/');
98
93
  const folderName = pathParts.length > 1 ? pathParts[pathParts.length - 2] : '';
99
94
  if (folderName) {
100
- uniqueHeader = `${headerText} (${folderName.charAt(0).toUpperCase() + folderName.slice(1)})`;
95
+ return `(${folderName.charAt(0).toUpperCase() + folderName.slice(1)})`;
101
96
  }
102
- else {
103
- uniqueHeader = `${headerText} (${counter})`;
104
- }
105
- }
106
- else {
107
- uniqueHeader = `${headerText} (${counter})`;
108
97
  }
109
- }
110
- usedHeaders.add(uniqueHeader.toLowerCase());
98
+ return `(${counter})`;
99
+ });
111
100
  if (firstHeadingText === doc.title) {
112
101
  // Content already has the same heading, replace it with our unique header if needed
113
102
  if (uniqueHeader !== doc.title) {
@@ -133,14 +122,8 @@ ${doc.content}`;
133
122
  });
134
123
  // Use custom root content or default message
135
124
  const rootContent = customRootContent || 'This file contains all documentation content in a single document following the llmstxt.org standard.';
136
- const llmFileContent = `# ${fileTitle}
137
-
138
- > ${fileDescription}${versionInfo}
139
-
140
- ${rootContent}
141
-
142
- ${fullContentSections.join('\n\n---\n\n')}
143
- `;
125
+ const llmFileContent = (0, utils_1.createMarkdownContent)(fileTitle, `${fileDescription}${versionInfo}`, `${rootContent}\n\n${fullContentSections.join('\n\n---\n\n')}`, true // include metadata (description)
126
+ );
144
127
  await (0, utils_1.writeFile)(outputPath, llmFileContent);
145
128
  }
146
129
  else {
@@ -152,16 +135,8 @@ ${fullContentSections.join('\n\n---\n\n')}
152
135
  });
153
136
  // Use custom root content or default message
154
137
  const rootContent = customRootContent || 'This file contains links to documentation sections following the llmstxt.org standard.';
155
- const llmFileContent = `# ${fileTitle}
156
-
157
- > ${fileDescription}${versionInfo}
158
-
159
- ${rootContent}
160
-
161
- ## Table of Contents
162
-
163
- ${tocItems.join('\n')}
164
- `;
138
+ const llmFileContent = (0, utils_1.createMarkdownContent)(fileTitle, `${fileDescription}${versionInfo}`, `${rootContent}\n\n## Table of Contents\n\n${tocItems.join('\n')}`, true // include metadata (description)
139
+ );
165
140
  await (0, utils_1.writeFile)(outputPath, llmFileContent);
166
141
  }
167
142
  console.log(`Generated: ${outputPath}`);
@@ -171,52 +146,65 @@ ${tocItems.join('\n')}
171
146
  * @param docs - Processed document information
172
147
  * @param outputDir - Directory to write the markdown files
173
148
  * @param siteUrl - Base site URL
149
+ * @param docsDir - The configured docs directory name (e.g., 'docs', 'documentation', etc.)
150
+ * @param keepFrontMatter - Array of frontmatter keys to preserve in generated files
174
151
  * @returns Updated docs with new URLs pointing to generated markdown files
175
152
  */
176
- async function generateIndividualMarkdownFiles(docs, outputDir, siteUrl) {
153
+ async function generateIndividualMarkdownFiles(docs, outputDir, siteUrl, docsDir = 'docs', keepFrontMatter = []) {
177
154
  const updatedDocs = [];
178
- // Create a map to ensure unique filenames
179
- const usedFilenames = new Set();
155
+ const usedPaths = new Set();
180
156
  for (const doc of docs) {
181
- // Generate a filename from the document title or URL path
182
- let baseFilename = doc.title
183
- .toLowerCase()
184
- .replace(/[^a-z0-9]+/g, '-')
185
- .replace(/^-+|-+$/g, '');
186
- // Fallback to URL path if title generates empty filename
187
- if (!baseFilename) {
188
- baseFilename = doc.path
189
- .replace(/^\/+|\/+$/g, '') // Remove leading/trailing slashes
190
- .replace(/\//g, '-')
191
- .replace(/[^a-z0-9-]/gi, '-')
192
- .toLowerCase();
157
+ // Use the original path structure, cleaning it up for file system use
158
+ let relativePath = doc.path
159
+ .replace(/^\/+/, '') // Remove leading slashes
160
+ .replace(/\.mdx?$/, '.md'); // Ensure .md extension
161
+ relativePath = relativePath
162
+ .replace(new RegExp(`^${docsDir.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}/`), ''); // Remove configured docs dir prefix
163
+ // If path is empty or invalid, create a fallback path
164
+ if (!relativePath || relativePath === '.md') {
165
+ const sanitizedTitle = (0, utils_1.sanitizeForFilename)(doc.title, 'untitled');
166
+ relativePath = `${sanitizedTitle}.md`;
193
167
  }
194
- // Ensure filename uniqueness
195
- let filename = `${baseFilename}.md`;
168
+ // Ensure path uniqueness
169
+ let uniquePath = relativePath;
196
170
  let counter = 1;
197
- while (usedFilenames.has(filename)) {
198
- filename = `${baseFilename}-${counter}.md`;
171
+ while (usedPaths.has(uniquePath.toLowerCase())) {
199
172
  counter++;
173
+ const pathParts = relativePath.split('.');
174
+ const extension = pathParts.pop() || 'md';
175
+ const basePath = pathParts.join('.');
176
+ uniquePath = `${basePath}-${counter}.${extension}`;
200
177
  }
201
- usedFilenames.add(filename);
202
- // Create markdown content following llmstxt.org standard
203
- const markdownContent = `# ${doc.title}
204
-
205
- > ${doc.description}
206
-
207
- ${doc.content}
208
- `;
178
+ usedPaths.add(uniquePath.toLowerCase());
179
+ // Create the full file path and ensure directory exists
180
+ const fullPath = path.join(outputDir, uniquePath);
181
+ const directory = path.dirname(fullPath);
182
+ // Create directory structure if it doesn't exist
183
+ await fs.mkdir(directory, { recursive: true });
184
+ // Extract preserved frontmatter if specified
185
+ let preservedFrontMatter = {};
186
+ if (keepFrontMatter.length > 0 && doc.frontMatter) {
187
+ for (const key of keepFrontMatter) {
188
+ if (key in doc.frontMatter) {
189
+ preservedFrontMatter[key] = doc.frontMatter[key];
190
+ }
191
+ }
192
+ }
193
+ // Create markdown content using the utility function
194
+ const markdownContent = (0, utils_1.createMarkdownContent)(doc.title, doc.description, doc.content, true, // includeMetadata
195
+ Object.keys(preservedFrontMatter).length > 0 ? preservedFrontMatter : undefined);
209
196
  // Write the markdown file
210
- const markdownPath = path.join(outputDir, filename);
211
- await (0, utils_1.writeFile)(markdownPath, markdownContent);
197
+ await (0, utils_1.writeFile)(fullPath, markdownContent);
212
198
  // Create updated DocInfo with new URL pointing to the generated markdown file
213
- const newUrl = `${siteUrl}/${filename}`;
199
+ // Convert file path to URL path (use forward slashes)
200
+ const urlPath = uniquePath.replace(/\\/g, '/');
201
+ const newUrl = `${siteUrl}/${urlPath}`;
214
202
  updatedDocs.push({
215
203
  ...doc,
216
204
  url: newUrl,
217
- path: `/${filename}` // Update path to the new markdown file
205
+ path: `/${urlPath}` // Update path to the new markdown file
218
206
  });
219
- console.log(`Generated markdown file: ${filename}`);
207
+ console.log(`Generated markdown file: ${uniquePath}`);
220
208
  }
221
209
  return updatedDocs;
222
210
  }
@@ -239,7 +227,7 @@ async function generateStandardLLMFiles(context, allDocFiles) {
239
227
  // Generate individual markdown files if requested
240
228
  if (generateMarkdownFiles && processedDocs.length > 0) {
241
229
  console.log('Generating individual markdown files...');
242
- processedDocs = await generateIndividualMarkdownFiles(processedDocs, outDir, siteUrl);
230
+ processedDocs = await generateIndividualMarkdownFiles(processedDocs, outDir, siteUrl, context.docsDir, context.options.keepFrontMatter || []);
243
231
  }
244
232
  // Generate llms.txt
245
233
  if (generateLLMsTxt) {
@@ -279,7 +267,7 @@ async function generateCustomLLMFiles(context, allDocFiles) {
279
267
  // Generate individual markdown files if requested
280
268
  if (generateMarkdownFiles) {
281
269
  console.log(`Generating individual markdown files for custom file: ${customFile.filename}...`);
282
- customDocs = await generateIndividualMarkdownFiles(customDocs, outDir, siteUrl);
270
+ customDocs = await generateIndividualMarkdownFiles(customDocs, outDir, siteUrl, context.docsDir, context.options.keepFrontMatter || []);
283
271
  }
284
272
  // Use custom title/description or fall back to defaults
285
273
  const customTitle = customFile.title || docTitle;
package/lib/index.js CHANGED
@@ -21,7 +21,7 @@ const generator_1 = require("./generator");
21
21
  */
22
22
  function docusaurusPluginLLMs(context, options = {}) {
23
23
  // Set default options
24
- const { generateLLMsTxt = true, generateLLMsFullTxt = true, docsDir = 'docs', ignoreFiles = [], title, description, llmsTxtFilename = 'llms.txt', llmsFullTxtFilename = 'llms-full.txt', includeBlog = false, pathTransformation, includeOrder = [], includeUnmatchedLast = true, customLLMFiles = [], excludeImports = false, removeDuplicateHeadings = false, generateMarkdownFiles = false, rootContent, fullRootContent, } = options;
24
+ const { generateLLMsTxt = true, generateLLMsFullTxt = true, docsDir = 'docs', ignoreFiles = [], title, description, llmsTxtFilename = 'llms.txt', llmsFullTxtFilename = 'llms-full.txt', includeBlog = false, pathTransformation, includeOrder = [], includeUnmatchedLast = true, customLLMFiles = [], excludeImports = false, removeDuplicateHeadings = false, generateMarkdownFiles = false, keepFrontMatter = [], rootContent, fullRootContent, } = options;
25
25
  const { siteDir, siteConfig, outDir, } = context;
26
26
  // Build the site URL with proper trailing slash
27
27
  const siteUrl = siteConfig.url + (siteConfig.baseUrl.endsWith('/')
@@ -52,6 +52,7 @@ function docusaurusPluginLLMs(context, options = {}) {
52
52
  excludeImports,
53
53
  removeDuplicateHeadings,
54
54
  generateMarkdownFiles,
55
+ keepFrontMatter,
55
56
  rootContent,
56
57
  fullRootContent,
57
58
  }
package/lib/processor.js CHANGED
@@ -76,10 +76,18 @@ async function processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix = 'doc
76
76
  // Convert .md extension to appropriate path
77
77
  const linkPathBase = normalizedPath.replace(/\.mdx?$/, '');
78
78
  // Handle index files specially
79
- const linkPath = linkPathBase.endsWith('index')
79
+ let linkPath = linkPathBase.endsWith('index')
80
80
  ? linkPathBase.replace(/\/index$/, '')
81
81
  : linkPathBase;
82
- // Apply path transformations to the link path
82
+ // linkPath might include the pathPrefix (e.g., "docs/api/core")
83
+ // We need to remove the pathPrefix before applying transformations, then add it back later
84
+ if (pathPrefix && linkPath.startsWith(`${pathPrefix}/`)) {
85
+ linkPath = linkPath.substring(`${pathPrefix}/`.length);
86
+ }
87
+ else if (pathPrefix && linkPath === pathPrefix) {
88
+ linkPath = '';
89
+ }
90
+ // Apply path transformations to the clean link path (without pathPrefix)
83
91
  const transformedLinkPath = (0, utils_1.applyPathTransformations)(linkPath, pathTransformation);
84
92
  // Also apply path transformations to the pathPrefix if it's not empty
85
93
  // This allows removing 'docs' from the path when specified in ignorePaths
@@ -153,6 +161,7 @@ async function processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix = 'doc
153
161
  url: fullUrl,
154
162
  content: cleanedContent,
155
163
  description: description || '',
164
+ frontMatter: data,
156
165
  };
157
166
  }
158
167
  /**
@@ -212,7 +221,8 @@ async function processFilesWithPatterns(context, allFiles, includePatterns = [],
212
221
  try {
213
222
  // Determine if this is a blog or docs file
214
223
  const isBlogFile = filePath.includes(path.join(siteDir, 'blog'));
215
- const baseDir = isBlogFile ? path.join(siteDir, 'blog') : path.join(siteDir, docsDir);
224
+ // Use siteDir as baseDir to preserve full directory structure (docs/path/file.md instead of just path/file.md)
225
+ const baseDir = siteDir;
216
226
  const pathPrefix = isBlogFile ? 'blog' : 'docs';
217
227
  // Try to find the resolved URL for this file from the route map
218
228
  let resolvedUrl;
package/lib/types.d.ts CHANGED
@@ -11,6 +11,7 @@ export interface DocInfo {
11
11
  url: string;
12
12
  content: string;
13
13
  description: string;
14
+ frontMatter?: Record<string, any>;
14
15
  }
15
16
  /**
16
17
  * Interface for custom LLM file configuration
@@ -80,6 +81,8 @@ export interface PluginOptions {
80
81
  removeDuplicateHeadings?: boolean;
81
82
  /** Whether to generate individual markdown files and link to them from llms.txt instead of original docs (default: false) */
82
83
  generateMarkdownFiles?: boolean;
84
+ /** Array of frontmatter keys to preserve in generated individual markdown files (only used when generateMarkdownFiles is true) */
85
+ keepFrontMatter?: string[];
83
86
  /** Custom content to include at the root level of llms.txt (after title/description, before TOC) */
84
87
  rootContent?: string;
85
88
  /** Custom content to include at the root level of llms-full.txt (after title/description, before content sections) */
package/lib/utils.d.ts CHANGED
@@ -60,3 +60,28 @@ export declare function cleanMarkdownContent(content: string, excludeImports?: b
60
60
  * @returns Transformed URL path
61
61
  */
62
62
  export declare function applyPathTransformations(urlPath: string, pathTransformation?: PluginOptions['pathTransformation']): string;
63
+ /**
64
+ * Sanitize a string to create a safe filename
65
+ * @param input - Input string (typically a title)
66
+ * @param fallback - Fallback string if input becomes empty after sanitization
67
+ * @returns Sanitized filename (without extension)
68
+ */
69
+ export declare function sanitizeForFilename(input: string, fallback?: string): string;
70
+ /**
71
+ * Ensure a unique identifier from a set of used identifiers
72
+ * @param baseIdentifier - Base identifier to make unique
73
+ * @param usedIdentifiers - Set of already used identifiers
74
+ * @param suffix - Suffix pattern (default: number in parentheses)
75
+ * @returns Unique identifier
76
+ */
77
+ export declare function ensureUniqueIdentifier(baseIdentifier: string, usedIdentifiers: Set<string>, suffix?: (counter: number, base: string) => string): string;
78
+ /**
79
+ * Create standardized markdown content template
80
+ * @param title - Document title
81
+ * @param description - Document description
82
+ * @param content - Document content
83
+ * @param includeMetadata - Whether to include description metadata
84
+ * @param frontMatter - Optional frontmatter to include at the top
85
+ * @returns Formatted markdown content
86
+ */
87
+ export declare function createMarkdownContent(title: string, description?: string, content?: string, includeMetadata?: boolean, frontMatter?: Record<string, any>): string;
package/lib/utils.js CHANGED
@@ -47,10 +47,14 @@ exports.extractTitle = extractTitle;
47
47
  exports.resolvePartialImports = resolvePartialImports;
48
48
  exports.cleanMarkdownContent = cleanMarkdownContent;
49
49
  exports.applyPathTransformations = applyPathTransformations;
50
+ exports.sanitizeForFilename = sanitizeForFilename;
51
+ exports.ensureUniqueIdentifier = ensureUniqueIdentifier;
52
+ exports.createMarkdownContent = createMarkdownContent;
50
53
  const fs = __importStar(require("fs/promises"));
51
54
  const path = __importStar(require("path"));
52
55
  const minimatch_1 = require("minimatch");
53
56
  const gray_matter_1 = __importDefault(require("gray-matter"));
57
+ const YAML = __importStar(require("yaml"));
54
58
  /**
55
59
  * Write content to a file
56
60
  * @param filePath - Path to write the file to
@@ -129,7 +133,7 @@ function extractTitle(data, content, filePath) {
129
133
  // Finally use filename
130
134
  return path.basename(filePath, path.extname(filePath))
131
135
  .replace(/-/g, ' ')
132
- .replace(/\b\w/g, c => c.toUpperCase());
136
+ .replace(/\b\w/g, (c) => c.toUpperCase());
133
137
  }
134
138
  /**
135
139
  * Resolve and inline partial imports in markdown content
@@ -286,3 +290,57 @@ function applyPathTransformations(urlPath, pathTransformation) {
286
290
  }
287
291
  return transformedPath;
288
292
  }
293
+ /**
294
+ * Sanitize a string to create a safe filename
295
+ * @param input - Input string (typically a title)
296
+ * @param fallback - Fallback string if input becomes empty after sanitization
297
+ * @returns Sanitized filename (without extension)
298
+ */
299
+ function sanitizeForFilename(input, fallback = 'untitled') {
300
+ if (!input)
301
+ return fallback;
302
+ const sanitized = input
303
+ .toLowerCase()
304
+ .replace(/[^a-z0-9]+/g, '-')
305
+ .replace(/^-+|-+$/g, '');
306
+ return sanitized || fallback;
307
+ }
308
+ /**
309
+ * Ensure a unique identifier from a set of used identifiers
310
+ * @param baseIdentifier - Base identifier to make unique
311
+ * @param usedIdentifiers - Set of already used identifiers
312
+ * @param suffix - Suffix pattern (default: number in parentheses)
313
+ * @returns Unique identifier
314
+ */
315
+ function ensureUniqueIdentifier(baseIdentifier, usedIdentifiers, suffix = (counter) => `(${counter})`) {
316
+ let uniqueIdentifier = baseIdentifier;
317
+ let counter = 1;
318
+ while (usedIdentifiers.has(uniqueIdentifier.toLowerCase())) {
319
+ counter++;
320
+ uniqueIdentifier = `${baseIdentifier}${suffix(counter, baseIdentifier)}`;
321
+ }
322
+ usedIdentifiers.add(uniqueIdentifier.toLowerCase());
323
+ return uniqueIdentifier;
324
+ }
325
+ /**
326
+ * Create standardized markdown content template
327
+ * @param title - Document title
328
+ * @param description - Document description
329
+ * @param content - Document content
330
+ * @param includeMetadata - Whether to include description metadata
331
+ * @param frontMatter - Optional frontmatter to include at the top
332
+ * @returns Formatted markdown content
333
+ */
334
+ function createMarkdownContent(title, description = '', content = '', includeMetadata = true, frontMatter) {
335
+ let result = '';
336
+ // Add frontmatter if provided
337
+ if (frontMatter && Object.keys(frontMatter).length > 0) {
338
+ result += '---\n';
339
+ result += YAML.stringify(frontMatter);
340
+ result += '---\n\n';
341
+ }
342
+ const descriptionLine = includeMetadata && description ? `\n\n> ${description}\n` : '\n';
343
+ result += `# ${title}${descriptionLine}
344
+ ${content}`.trim() + '\n';
345
+ return result;
346
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "docusaurus-plugin-llms",
3
- "version": "0.2.0",
3
+ "version": "0.2.2",
4
4
  "description": "Docusaurus plugin for generating LLM-friendly documentation following the llmstxt.org standard",
5
5
  "main": "lib/index.js",
6
6
  "types": "lib/index.d.ts",
@@ -38,13 +38,15 @@
38
38
  "license": "MIT",
39
39
  "dependencies": {
40
40
  "gray-matter": "^4.0.3",
41
- "minimatch": "^9.0.3"
41
+ "minimatch": "^9.0.3",
42
+ "yaml": "^2.8.1"
42
43
  },
43
44
  "peerDependencies": {
44
45
  "@docusaurus/core": "^3.0.0"
45
46
  },
46
47
  "devDependencies": {
47
48
  "@docusaurus/types": "^3.0.0",
49
+ "@types/js-yaml": "^4.0.9",
48
50
  "@types/minimatch": "^5.1.2",
49
51
  "@types/node": "^20.6.0",
50
52
  "typescript": "^5.2.2"
package/src/generator.ts CHANGED
@@ -5,7 +5,13 @@
5
5
  import * as path from 'path';
6
6
  import * as fs from 'fs/promises';
7
7
  import { DocInfo, PluginContext, CustomLLMFile } from './types';
8
- import { writeFile, readMarkdownFiles } from './utils';
8
+ import {
9
+ writeFile,
10
+ readMarkdownFiles,
11
+ sanitizeForFilename,
12
+ ensureUniqueIdentifier,
13
+ createMarkdownContent
14
+ } from './utils';
9
15
  import { processFilesWithPatterns } from './processor';
10
16
 
11
17
  /**
@@ -62,29 +68,22 @@ export async function generateLLMFile(
62
68
  const headingMatch = firstLine.match(/^#+\s+(.+)$/);
63
69
  const firstHeadingText = headingMatch ? headingMatch[1].trim() : null;
64
70
 
65
- // Determine the header text to use (original title or make it unique)
66
- let headerText = doc.title;
67
- let uniqueHeader = headerText;
68
- let counter = 1;
69
-
70
- // If this header has been used before, make it unique by adding a suffix
71
- while (usedHeaders.has(uniqueHeader.toLowerCase())) {
72
- counter++;
73
- // Try to make it more descriptive by adding the file path info if available
74
- if (doc.path && counter === 2) {
75
- const pathParts = doc.path.split('/');
76
- const folderName = pathParts.length > 1 ? pathParts[pathParts.length - 2] : '';
77
- if (folderName) {
78
- uniqueHeader = `${headerText} (${folderName.charAt(0).toUpperCase() + folderName.slice(1)})`;
79
- } else {
80
- uniqueHeader = `${headerText} (${counter})`;
71
+ // Generate unique header using the utility function
72
+ const uniqueHeader = ensureUniqueIdentifier(
73
+ doc.title,
74
+ usedHeaders,
75
+ (counter, base) => {
76
+ // Try to make it more descriptive by adding the file path info if available
77
+ if (doc.path && counter === 2) {
78
+ const pathParts = doc.path.split('/');
79
+ const folderName = pathParts.length > 1 ? pathParts[pathParts.length - 2] : '';
80
+ if (folderName) {
81
+ return `(${folderName.charAt(0).toUpperCase() + folderName.slice(1)})`;
82
+ }
81
83
  }
82
- } else {
83
- uniqueHeader = `${headerText} (${counter})`;
84
+ return `(${counter})`;
84
85
  }
85
- }
86
-
87
- usedHeaders.add(uniqueHeader.toLowerCase());
86
+ );
88
87
 
89
88
  if (firstHeadingText === doc.title) {
90
89
  // Content already has the same heading, replace it with our unique header if needed
@@ -111,14 +110,12 @@ ${doc.content}`;
111
110
  // Use custom root content or default message
112
111
  const rootContent = customRootContent || 'This file contains all documentation content in a single document following the llmstxt.org standard.';
113
112
 
114
- const llmFileContent = `# ${fileTitle}
115
-
116
- > ${fileDescription}${versionInfo}
117
-
118
- ${rootContent}
119
-
120
- ${fullContentSections.join('\n\n---\n\n')}
121
- `;
113
+ const llmFileContent = createMarkdownContent(
114
+ fileTitle,
115
+ `${fileDescription}${versionInfo}`,
116
+ `${rootContent}\n\n${fullContentSections.join('\n\n---\n\n')}`,
117
+ true // include metadata (description)
118
+ );
122
119
 
123
120
  await writeFile(outputPath, llmFileContent);
124
121
  } else {
@@ -133,16 +130,12 @@ ${fullContentSections.join('\n\n---\n\n')}
133
130
  // Use custom root content or default message
134
131
  const rootContent = customRootContent || 'This file contains links to documentation sections following the llmstxt.org standard.';
135
132
 
136
- const llmFileContent = `# ${fileTitle}
137
-
138
- > ${fileDescription}${versionInfo}
139
-
140
- ${rootContent}
141
-
142
- ## Table of Contents
143
-
144
- ${tocItems.join('\n')}
145
- `;
133
+ const llmFileContent = createMarkdownContent(
134
+ fileTitle,
135
+ `${fileDescription}${versionInfo}`,
136
+ `${rootContent}\n\n## Table of Contents\n\n${tocItems.join('\n')}`,
137
+ true // include metadata (description)
138
+ );
146
139
 
147
140
  await writeFile(outputPath, llmFileContent);
148
141
  }
@@ -155,65 +148,90 @@ ${tocItems.join('\n')}
155
148
  * @param docs - Processed document information
156
149
  * @param outputDir - Directory to write the markdown files
157
150
  * @param siteUrl - Base site URL
151
+ * @param docsDir - The configured docs directory name (e.g., 'docs', 'documentation', etc.)
152
+ * @param keepFrontMatter - Array of frontmatter keys to preserve in generated files
158
153
  * @returns Updated docs with new URLs pointing to generated markdown files
159
154
  */
160
155
  export async function generateIndividualMarkdownFiles(
161
156
  docs: DocInfo[],
162
157
  outputDir: string,
163
- siteUrl: string
158
+ siteUrl: string,
159
+ docsDir: string = 'docs',
160
+ keepFrontMatter: string[] = []
164
161
  ): Promise<DocInfo[]> {
165
162
  const updatedDocs: DocInfo[] = [];
163
+ const usedPaths = new Set<string>();
166
164
 
167
- // Create a map to ensure unique filenames
168
- const usedFilenames = new Set<string>();
169
165
 
170
166
  for (const doc of docs) {
171
- // Generate a filename from the document title or URL path
172
- let baseFilename = doc.title
173
- .toLowerCase()
174
- .replace(/[^a-z0-9]+/g, '-')
175
- .replace(/^-+|-+$/g, '');
167
+ // Use the original path structure, cleaning it up for file system use
168
+ let relativePath = doc.path
169
+ .replace(/^\/+/, '') // Remove leading slashes
170
+ .replace(/\.mdx?$/, '.md'); // Ensure .md extension
171
+
176
172
 
177
- // Fallback to URL path if title generates empty filename
178
- if (!baseFilename) {
179
- baseFilename = doc.path
180
- .replace(/^\/+|\/+$/g, '') // Remove leading/trailing slashes
181
- .replace(/\//g, '-')
182
- .replace(/[^a-z0-9-]/gi, '-')
183
- .toLowerCase();
173
+ relativePath = relativePath
174
+ .replace(new RegExp(`^${docsDir.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}/`), '');// Remove configured docs dir prefix
175
+
176
+ // If path is empty or invalid, create a fallback path
177
+ if (!relativePath || relativePath === '.md') {
178
+ const sanitizedTitle = sanitizeForFilename(doc.title, 'untitled');
179
+ relativePath = `${sanitizedTitle}.md`;
184
180
  }
185
181
 
186
- // Ensure filename uniqueness
187
- let filename = `${baseFilename}.md`;
182
+ // Ensure path uniqueness
183
+ let uniquePath = relativePath;
188
184
  let counter = 1;
189
- while (usedFilenames.has(filename)) {
190
- filename = `${baseFilename}-${counter}.md`;
185
+ while (usedPaths.has(uniquePath.toLowerCase())) {
191
186
  counter++;
187
+ const pathParts = relativePath.split('.');
188
+ const extension = pathParts.pop() || 'md';
189
+ const basePath = pathParts.join('.');
190
+ uniquePath = `${basePath}-${counter}.${extension}`;
192
191
  }
193
- usedFilenames.add(filename);
192
+ usedPaths.add(uniquePath.toLowerCase());
194
193
 
195
- // Create markdown content following llmstxt.org standard
196
- const markdownContent = `# ${doc.title}
197
-
198
- > ${doc.description}
194
+ // Create the full file path and ensure directory exists
195
+ const fullPath = path.join(outputDir, uniquePath);
196
+ const directory = path.dirname(fullPath);
197
+
198
+ // Create directory structure if it doesn't exist
199
+ await fs.mkdir(directory, { recursive: true });
200
+
201
+ // Extract preserved frontmatter if specified
202
+ let preservedFrontMatter: Record<string, any> = {};
203
+ if (keepFrontMatter.length > 0 && doc.frontMatter) {
204
+ for (const key of keepFrontMatter) {
205
+ if (key in doc.frontMatter) {
206
+ preservedFrontMatter[key] = doc.frontMatter[key];
207
+ }
208
+ }
209
+ }
199
210
 
200
- ${doc.content}
201
- `;
211
+ // Create markdown content using the utility function
212
+ const markdownContent = createMarkdownContent(
213
+ doc.title,
214
+ doc.description,
215
+ doc.content,
216
+ true, // includeMetadata
217
+ Object.keys(preservedFrontMatter).length > 0 ? preservedFrontMatter : undefined
218
+ );
202
219
 
203
220
  // Write the markdown file
204
- const markdownPath = path.join(outputDir, filename);
205
- await writeFile(markdownPath, markdownContent);
221
+ await writeFile(fullPath, markdownContent);
206
222
 
207
223
  // Create updated DocInfo with new URL pointing to the generated markdown file
208
- const newUrl = `${siteUrl}/${filename}`;
224
+ // Convert file path to URL path (use forward slashes)
225
+ const urlPath = uniquePath.replace(/\\/g, '/');
226
+ const newUrl = `${siteUrl}/${urlPath}`;
209
227
 
210
228
  updatedDocs.push({
211
229
  ...doc,
212
230
  url: newUrl,
213
- path: `/${filename}` // Update path to the new markdown file
231
+ path: `/${urlPath}` // Update path to the new markdown file
214
232
  });
215
233
 
216
- console.log(`Generated markdown file: ${filename}`);
234
+ console.log(`Generated markdown file: ${uniquePath}`);
217
235
  }
218
236
 
219
237
  return updatedDocs;
@@ -271,7 +289,9 @@ export async function generateStandardLLMFiles(
271
289
  processedDocs = await generateIndividualMarkdownFiles(
272
290
  processedDocs,
273
291
  outDir,
274
- siteUrl
292
+ siteUrl,
293
+ context.docsDir,
294
+ context.options.keepFrontMatter || []
275
295
  );
276
296
  }
277
297
 
@@ -348,7 +368,9 @@ export async function generateCustomLLMFiles(
348
368
  customDocs = await generateIndividualMarkdownFiles(
349
369
  customDocs,
350
370
  outDir,
351
- siteUrl
371
+ siteUrl,
372
+ context.docsDir,
373
+ context.options.keepFrontMatter || []
352
374
  );
353
375
  }
354
376
 
package/src/index.ts CHANGED
@@ -43,6 +43,7 @@ export default function docusaurusPluginLLMs(
43
43
  excludeImports = false,
44
44
  removeDuplicateHeadings = false,
45
45
  generateMarkdownFiles = false,
46
+ keepFrontMatter = [],
46
47
  rootContent,
47
48
  fullRootContent,
48
49
  } = options;
@@ -85,6 +86,7 @@ export default function docusaurusPluginLLMs(
85
86
  excludeImports,
86
87
  removeDuplicateHeadings,
87
88
  generateMarkdownFiles,
89
+ keepFrontMatter,
88
90
  rootContent,
89
91
  fullRootContent,
90
92
  }
package/src/processor.ts CHANGED
@@ -62,11 +62,19 @@ export async function processMarkdownFile(
62
62
  const linkPathBase = normalizedPath.replace(/\.mdx?$/, '');
63
63
 
64
64
  // Handle index files specially
65
- const linkPath = linkPathBase.endsWith('index')
65
+ let linkPath = linkPathBase.endsWith('index')
66
66
  ? linkPathBase.replace(/\/index$/, '')
67
67
  : linkPathBase;
68
68
 
69
- // Apply path transformations to the link path
69
+ // linkPath might include the pathPrefix (e.g., "docs/api/core")
70
+ // We need to remove the pathPrefix before applying transformations, then add it back later
71
+ if (pathPrefix && linkPath.startsWith(`${pathPrefix}/`)) {
72
+ linkPath = linkPath.substring(`${pathPrefix}/`.length);
73
+ } else if (pathPrefix && linkPath === pathPrefix) {
74
+ linkPath = '';
75
+ }
76
+
77
+ // Apply path transformations to the clean link path (without pathPrefix)
70
78
  const transformedLinkPath = applyPathTransformations(linkPath, pathTransformation);
71
79
 
72
80
  // Also apply path transformations to the pathPrefix if it's not empty
@@ -156,6 +164,7 @@ export async function processMarkdownFile(
156
164
  url: fullUrl,
157
165
  content: cleanedContent,
158
166
  description: description || '',
167
+ frontMatter: data,
159
168
  };
160
169
  }
161
170
 
@@ -236,7 +245,8 @@ export async function processFilesWithPatterns(
236
245
  try {
237
246
  // Determine if this is a blog or docs file
238
247
  const isBlogFile = filePath.includes(path.join(siteDir, 'blog'));
239
- const baseDir = isBlogFile ? path.join(siteDir, 'blog') : path.join(siteDir, docsDir);
248
+ // Use siteDir as baseDir to preserve full directory structure (docs/path/file.md instead of just path/file.md)
249
+ const baseDir = siteDir;
240
250
  const pathPrefix = isBlogFile ? 'blog' : 'docs';
241
251
 
242
252
  // Try to find the resolved URL for this file from the route map
package/src/types.ts CHANGED
@@ -13,6 +13,7 @@ export interface DocInfo {
13
13
  url: string;
14
14
  content: string;
15
15
  description: string;
16
+ frontMatter?: Record<string, any>;
16
17
  }
17
18
 
18
19
  /**
@@ -109,7 +110,10 @@ export interface PluginOptions {
109
110
 
110
111
  /** Whether to generate individual markdown files and link to them from llms.txt instead of original docs (default: false) */
111
112
  generateMarkdownFiles?: boolean;
112
-
113
+
114
+ /** Array of frontmatter keys to preserve in generated individual markdown files (only used when generateMarkdownFiles is true) */
115
+ keepFrontMatter?: string[];
116
+
113
117
  /** Custom content to include at the root level of llms.txt (after title/description, before TOC) */
114
118
  rootContent?: string;
115
119
 
package/src/utils.ts CHANGED
@@ -6,6 +6,7 @@ import * as fs from 'fs/promises';
6
6
  import * as path from 'path';
7
7
  import { minimatch } from 'minimatch';
8
8
  import matter from 'gray-matter';
9
+ import * as YAML from 'yaml';
9
10
  import { PluginOptions } from './types';
10
11
 
11
12
  /**
@@ -99,7 +100,7 @@ export function extractTitle(data: any, content: string, filePath: string): stri
99
100
  // Finally use filename
100
101
  return path.basename(filePath, path.extname(filePath))
101
102
  .replace(/-/g, ' ')
102
- .replace(/\b\w/g, c => c.toUpperCase());
103
+ .replace(/\b\w/g, (c: string) => c.toUpperCase());
103
104
  }
104
105
 
105
106
  /**
@@ -290,4 +291,78 @@ export function applyPathTransformations(
290
291
  }
291
292
 
292
293
  return transformedPath;
294
+ }
295
+
296
+ /**
297
+ * Sanitize a string to create a safe filename
298
+ * @param input - Input string (typically a title)
299
+ * @param fallback - Fallback string if input becomes empty after sanitization
300
+ * @returns Sanitized filename (without extension)
301
+ */
302
+ export function sanitizeForFilename(input: string, fallback: string = 'untitled'): string {
303
+ if (!input) return fallback;
304
+
305
+ const sanitized = input
306
+ .toLowerCase()
307
+ .replace(/[^a-z0-9]+/g, '-')
308
+ .replace(/^-+|-+$/g, '');
309
+
310
+ return sanitized || fallback;
311
+ }
312
+
313
+ /**
314
+ * Ensure a unique identifier from a set of used identifiers
315
+ * @param baseIdentifier - Base identifier to make unique
316
+ * @param usedIdentifiers - Set of already used identifiers
317
+ * @param suffix - Suffix pattern (default: number in parentheses)
318
+ * @returns Unique identifier
319
+ */
320
+ export function ensureUniqueIdentifier(
321
+ baseIdentifier: string,
322
+ usedIdentifiers: Set<string>,
323
+ suffix: (counter: number, base: string) => string = (counter) => `(${counter})`
324
+ ): string {
325
+ let uniqueIdentifier = baseIdentifier;
326
+ let counter = 1;
327
+
328
+ while (usedIdentifiers.has(uniqueIdentifier.toLowerCase())) {
329
+ counter++;
330
+ uniqueIdentifier = `${baseIdentifier}${suffix(counter, baseIdentifier)}`;
331
+ }
332
+
333
+ usedIdentifiers.add(uniqueIdentifier.toLowerCase());
334
+ return uniqueIdentifier;
335
+ }
336
+
337
+ /**
338
+ * Create standardized markdown content template
339
+ * @param title - Document title
340
+ * @param description - Document description
341
+ * @param content - Document content
342
+ * @param includeMetadata - Whether to include description metadata
343
+ * @param frontMatter - Optional frontmatter to include at the top
344
+ * @returns Formatted markdown content
345
+ */
346
+ export function createMarkdownContent(
347
+ title: string,
348
+ description: string = '',
349
+ content: string = '',
350
+ includeMetadata: boolean = true,
351
+ frontMatter?: Record<string, any>
352
+ ): string {
353
+ let result = '';
354
+
355
+ // Add frontmatter if provided
356
+ if (frontMatter && Object.keys(frontMatter).length > 0) {
357
+ result += '---\n';
358
+ result += YAML.stringify(frontMatter);
359
+ result += '---\n\n';
360
+ }
361
+
362
+ const descriptionLine = includeMetadata && description ? `\n\n> ${description}\n` : '\n';
363
+
364
+ result += `# ${title}${descriptionLine}
365
+ ${content}`.trim() + '\n';
366
+
367
+ return result;
293
368
  }