docusaurus-plugin-llms 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/index.js CHANGED
@@ -8,231 +8,9 @@
8
8
  *
9
9
  * The plugin runs during the Docusaurus build process and scans all Markdown files in the docs directory.
10
10
  */
11
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
12
- if (k2 === undefined) k2 = k;
13
- var desc = Object.getOwnPropertyDescriptor(m, k);
14
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
15
- desc = { enumerable: true, get: function() { return m[k]; } };
16
- }
17
- Object.defineProperty(o, k2, desc);
18
- }) : (function(o, m, k, k2) {
19
- if (k2 === undefined) k2 = k;
20
- o[k2] = m[k];
21
- }));
22
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
23
- Object.defineProperty(o, "default", { enumerable: true, value: v });
24
- }) : function(o, v) {
25
- o["default"] = v;
26
- });
27
- var __importStar = (this && this.__importStar) || (function () {
28
- var ownKeys = function(o) {
29
- ownKeys = Object.getOwnPropertyNames || function (o) {
30
- var ar = [];
31
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
32
- return ar;
33
- };
34
- return ownKeys(o);
35
- };
36
- return function (mod) {
37
- if (mod && mod.__esModule) return mod;
38
- var result = {};
39
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
40
- __setModuleDefault(result, mod);
41
- return result;
42
- };
43
- })();
44
- var __importDefault = (this && this.__importDefault) || function (mod) {
45
- return (mod && mod.__esModule) ? mod : { "default": mod };
46
- };
47
11
  Object.defineProperty(exports, "__esModule", { value: true });
48
12
  exports.default = docusaurusPluginLLMs;
49
- const fs = __importStar(require("fs/promises"));
50
- const path = __importStar(require("path"));
51
- const gray_matter_1 = __importDefault(require("gray-matter"));
52
- const minimatch_1 = require("minimatch");
53
- /**
54
- * Write content to a file
55
- * @param filePath - Path to write the file to
56
- * @param data - Content to write
57
- */
58
- async function writeFile(filePath, data) {
59
- return fs.writeFile(filePath, data, 'utf8');
60
- }
61
- /**
62
- * Read content from a file
63
- * @param filePath - Path of the file to read
64
- * @returns Content of the file
65
- */
66
- async function readFile(filePath) {
67
- return fs.readFile(filePath, 'utf8');
68
- }
69
- /**
70
- * Check if a file should be ignored based on glob patterns
71
- * @param filePath - Path to the file
72
- * @param baseDir - Base directory for relative paths
73
- * @param ignorePatterns - Glob patterns for files to ignore
74
- * @returns Whether the file should be ignored
75
- */
76
- function shouldIgnoreFile(filePath, baseDir, ignorePatterns) {
77
- if (ignorePatterns.length === 0) {
78
- return false;
79
- }
80
- const relativePath = path.relative(baseDir, filePath);
81
- return ignorePatterns.some(pattern => (0, minimatch_1.minimatch)(relativePath, pattern, { matchBase: true }));
82
- }
83
- /**
84
- * Recursively reads all Markdown files in a directory
85
- * @param dir - Directory to scan
86
- * @param baseDir - Base directory for relative paths
87
- * @param ignorePatterns - Glob patterns for files to ignore
88
- * @returns Array of file paths
89
- */
90
- async function readMarkdownFiles(dir, baseDir, ignorePatterns = []) {
91
- const files = [];
92
- const entries = await fs.readdir(dir, { withFileTypes: true });
93
- for (const entry of entries) {
94
- const fullPath = path.join(dir, entry.name);
95
- if (shouldIgnoreFile(fullPath, baseDir, ignorePatterns)) {
96
- continue;
97
- }
98
- if (entry.isDirectory()) {
99
- const subDirFiles = await readMarkdownFiles(fullPath, baseDir, ignorePatterns);
100
- files.push(...subDirFiles);
101
- }
102
- else if (entry.name.endsWith('.md') || entry.name.endsWith('.mdx')) {
103
- files.push(fullPath);
104
- }
105
- }
106
- return files;
107
- }
108
- /**
109
- * Extract title from content or use the filename
110
- * @param data - Frontmatter data
111
- * @param content - Markdown content
112
- * @param filePath - Path to the file
113
- * @returns Extracted title
114
- */
115
- function extractTitle(data, content, filePath) {
116
- // First try frontmatter
117
- if (data.title) {
118
- return data.title;
119
- }
120
- // Then try first heading
121
- const headingMatch = content.match(/^#\s+(.*)/m);
122
- if (headingMatch) {
123
- return headingMatch[1].trim();
124
- }
125
- // Finally use filename
126
- return path.basename(filePath, path.extname(filePath))
127
- .replace(/-/g, ' ')
128
- .replace(/\b\w/g, c => c.toUpperCase());
129
- }
130
- /**
131
- * Clean markdown content for LLM consumption
132
- * @param content - Raw markdown content
133
- * @returns Cleaned content
134
- */
135
- function cleanMarkdownContent(content) {
136
- // Remove HTML tags
137
- let cleaned = content.replace(/<[^>]*>/g, '');
138
- // Normalize whitespace
139
- cleaned = cleaned.replace(/\r\n/g, '\n')
140
- .replace(/\n{3,}/g, '\n\n')
141
- .trim();
142
- return cleaned;
143
- }
144
- /**
145
- * Apply path transformations according to configuration
146
- * @param urlPath - Original URL path
147
- * @param pathTransformation - Path transformation configuration
148
- * @returns Transformed URL path
149
- */
150
- function applyPathTransformations(urlPath, pathTransformation) {
151
- if (!pathTransformation) {
152
- return urlPath;
153
- }
154
- let transformedPath = urlPath;
155
- // Remove ignored path segments
156
- if (pathTransformation.ignorePaths?.length) {
157
- for (const ignorePath of pathTransformation.ignorePaths) {
158
- // Create a regex that matches the ignore path at the beginning, middle, or end of the path
159
- // We use word boundaries to ensure we match complete path segments
160
- const ignoreRegex = new RegExp(`(^|/)(${ignorePath})(/|$)`, 'g');
161
- transformedPath = transformedPath.replace(ignoreRegex, '$1$3');
162
- }
163
- // Clean up any double slashes that might have been created
164
- transformedPath = transformedPath.replace(/\/+/g, '/');
165
- // Remove leading slash if present
166
- transformedPath = transformedPath.replace(/^\//, '');
167
- }
168
- // Add path segments if they're not already present
169
- if (pathTransformation.addPaths?.length) {
170
- // Process in reverse order to maintain the specified order in the final path
171
- // This is because each path is prepended to the front
172
- const pathsToAdd = [...pathTransformation.addPaths].reverse();
173
- for (const addPath of pathsToAdd) {
174
- // Only add if not already present at the beginning
175
- if (!transformedPath.startsWith(addPath + '/') && transformedPath !== addPath) {
176
- transformedPath = `${addPath}/${transformedPath}`;
177
- }
178
- }
179
- }
180
- return transformedPath;
181
- }
182
- /**
183
- * Process a markdown file and extract its metadata and content
184
- * @param filePath - Path to the markdown file
185
- * @param baseDir - Base directory
186
- * @param siteUrl - Base URL of the site
187
- * @param pathPrefix - Path prefix for URLs (e.g., 'docs' or 'blog')
188
- * @param pathTransformation - Path transformation configuration
189
- * @returns Processed file data
190
- */
191
- async function processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix = 'docs', pathTransformation) {
192
- const content = await readFile(filePath);
193
- const { data, content: markdownContent } = (0, gray_matter_1.default)(content);
194
- const relativePath = path.relative(baseDir, filePath);
195
- // Convert to URL path format (replace backslashes with forward slashes on Windows)
196
- const normalizedPath = relativePath.replace(/\\/g, '/');
197
- // Convert .md extension to appropriate path
198
- const linkPathBase = normalizedPath.replace(/\.mdx?$/, '');
199
- // Handle index files specially
200
- const linkPath = linkPathBase.endsWith('index')
201
- ? linkPathBase.replace(/\/index$/, '')
202
- : linkPathBase;
203
- // Apply path transformations to the link path
204
- const transformedLinkPath = applyPathTransformations(linkPath, pathTransformation);
205
- // Also apply path transformations to the pathPrefix if it's not empty
206
- // This allows removing 'docs' from the path when specified in ignorePaths
207
- let transformedPathPrefix = pathPrefix;
208
- if (pathPrefix && pathTransformation?.ignorePaths?.includes(pathPrefix)) {
209
- transformedPathPrefix = '';
210
- }
211
- // Generate full URL with transformed path and path prefix
212
- const fullUrl = new URL(`${transformedPathPrefix ? `${transformedPathPrefix}/` : ''}${transformedLinkPath}`, siteUrl).toString();
213
- // Extract title
214
- const title = extractTitle(data, markdownContent, filePath);
215
- // Get description from frontmatter or first paragraph
216
- let description = data.description || '';
217
- if (!description) {
218
- const paragraphs = markdownContent.split('\n\n');
219
- for (const para of paragraphs) {
220
- if (para.trim() && !para.startsWith('#')) {
221
- description = para.trim();
222
- break;
223
- }
224
- }
225
- }
226
- // Clean and process content
227
- const cleanedContent = cleanMarkdownContent(markdownContent);
228
- return {
229
- title,
230
- path: normalizedPath,
231
- url: fullUrl,
232
- content: cleanedContent,
233
- description: description || '',
234
- };
235
- }
13
+ const generator_1 = require("./generator");
236
14
  /**
237
15
  * A Docusaurus plugin to generate LLM-friendly documentation following
238
16
  * the llmtxt.org standard
@@ -243,8 +21,36 @@ async function processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix = 'doc
243
21
  */
244
22
  function docusaurusPluginLLMs(context, options = {}) {
245
23
  // Set default options
246
- const { generateLLMsTxt = true, generateLLMsFullTxt = true, docsDir = 'docs', ignoreFiles = [], title, description, llmsTxtFilename = 'llms.txt', llmsFullTxtFilename = 'llms-full.txt', includeBlog = false, pathTransformation, } = options;
24
+ const { generateLLMsTxt = true, generateLLMsFullTxt = true, docsDir = 'docs', ignoreFiles = [], title, description, llmsTxtFilename = 'llms.txt', llmsFullTxtFilename = 'llms-full.txt', includeBlog = false, pathTransformation, includeOrder = [], includeUnmatchedLast = true, customLLMFiles = [], } = options;
247
25
  const { siteDir, siteConfig, outDir, } = context;
26
+ // Build the site URL with proper trailing slash
27
+ const siteUrl = siteConfig.url + (siteConfig.baseUrl.endsWith('/')
28
+ ? siteConfig.baseUrl.slice(0, -1)
29
+ : siteConfig.baseUrl || '');
30
+ // Create a plugin context object with processed options
31
+ const pluginContext = {
32
+ siteDir,
33
+ outDir,
34
+ siteUrl,
35
+ docsDir,
36
+ docTitle: title || siteConfig.title,
37
+ docDescription: description || siteConfig.tagline || '',
38
+ options: {
39
+ generateLLMsTxt,
40
+ generateLLMsFullTxt,
41
+ docsDir,
42
+ ignoreFiles,
43
+ title,
44
+ description,
45
+ llmsTxtFilename,
46
+ llmsFullTxtFilename,
47
+ includeBlog,
48
+ pathTransformation,
49
+ includeOrder,
50
+ includeUnmatchedLast,
51
+ customLLMFiles,
52
+ }
53
+ };
248
54
  return {
249
55
  name: 'docusaurus-plugin-llms',
250
56
  /**
@@ -252,122 +58,20 @@ function docusaurusPluginLLMs(context, options = {}) {
252
58
  */
253
59
  async postBuild() {
254
60
  console.log('Generating LLM-friendly documentation...');
255
- // Custom title and description or fallback to site values
256
- const docTitle = title || siteConfig.title;
257
- const docDescription = description || siteConfig.tagline || '';
258
- // Build the site URL with proper trailing slash
259
- const siteUrl = siteConfig.url + (siteConfig.baseUrl.endsWith('/')
260
- ? siteConfig.baseUrl.slice(0, -1)
261
- : siteConfig.baseUrl || '');
262
- // Initialize docs collection
263
- const allDocs = [];
264
61
  try {
265
- // Process docs directory
266
- const fullDocsDir = path.join(siteDir, docsDir);
267
- try {
268
- await fs.access(fullDocsDir);
269
- // Collect all markdown files from docs directory
270
- const docFiles = await readMarkdownFiles(fullDocsDir, siteDir, ignoreFiles);
271
- if (docFiles.length > 0) {
272
- // Process each file
273
- for (const filePath of docFiles) {
274
- try {
275
- const docInfo = await processMarkdownFile(filePath, fullDocsDir, siteUrl, 'docs', pathTransformation);
276
- allDocs.push(docInfo);
277
- }
278
- catch (err) {
279
- console.warn(`Error processing ${filePath}: ${err.message}`);
280
- }
281
- }
282
- console.log(`Processed ${docFiles.length} documentation files`);
283
- }
284
- else {
285
- console.warn('No markdown files found in docs directory.');
286
- }
287
- }
288
- catch (err) {
289
- console.warn(`Docs directory not found: ${fullDocsDir}`);
290
- }
291
- // Process blog if enabled
292
- if (includeBlog) {
293
- const blogDir = path.join(siteDir, 'blog');
294
- try {
295
- await fs.access(blogDir);
296
- // Collect all markdown files from blog directory
297
- const blogFiles = await readMarkdownFiles(blogDir, siteDir, ignoreFiles);
298
- if (blogFiles.length > 0) {
299
- // Process each file
300
- for (const filePath of blogFiles) {
301
- try {
302
- const docInfo = await processMarkdownFile(filePath, blogDir, siteUrl, 'blog', pathTransformation);
303
- allDocs.push(docInfo);
304
- }
305
- catch (err) {
306
- console.warn(`Error processing ${filePath}: ${err.message}`);
307
- }
308
- }
309
- console.log(`Processed ${blogFiles.length} blog files`);
310
- }
311
- else {
312
- console.warn('No markdown files found in blog directory.');
313
- }
314
- }
315
- catch (err) {
316
- console.warn(`Blog directory not found: ${blogDir}`);
317
- }
318
- }
62
+ // Collect all document files
63
+ const allDocFiles = await (0, generator_1.collectDocFiles)(pluginContext);
319
64
  // Skip further processing if no documents were found
320
- if (allDocs.length === 0) {
65
+ if (allDocFiles.length === 0) {
321
66
  console.warn('No documents found to process.');
322
67
  return;
323
68
  }
324
- // Sort files to ensure consistent ordering
325
- allDocs.sort((a, b) => a.title.localeCompare(b.title));
326
- // Generate llms.txt
327
- if (generateLLMsTxt) {
328
- const llmsTxtPath = path.join(outDir, llmsTxtFilename);
329
- const tocItems = allDocs.map(doc => {
330
- return `- [${doc.title}](${doc.url})${doc.description ? `: ${doc.description.split('\n')[0]}` : ''}`;
331
- });
332
- const llmsTxtContent = `# ${docTitle}
333
-
334
- > ${docDescription}
335
-
336
- This file contains links to all documentation sections following the llmtxt.org standard.
337
-
338
- ## Table of Contents
339
-
340
- ${tocItems.join('\n')}
341
- `;
342
- await writeFile(llmsTxtPath, llmsTxtContent);
343
- console.log(`Generated ${llmsTxtFilename}: ${llmsTxtPath}`);
344
- }
345
- // Generate llms-full.txt with all content
346
- if (generateLLMsFullTxt) {
347
- const llmsFullTxtPath = path.join(outDir, llmsFullTxtFilename);
348
- const fullContentSections = allDocs.map(doc => {
349
- return `## ${doc.title}
350
-
351
- ${doc.content}`;
352
- });
353
- const llmsFullTxtContent = `# ${docTitle}
354
-
355
- > ${docDescription}
356
-
357
- This file contains all documentation content in a single document following the llmtxt.org standard.
358
-
359
- ${fullContentSections.join('\n\n---\n\n')}
360
- `;
361
- await writeFile(llmsFullTxtPath, llmsFullTxtContent);
362
- console.log(`Generated ${llmsFullTxtFilename}: ${llmsFullTxtPath}`);
363
- }
364
- // Output statistics
365
- const stats = {
366
- totalDocuments: allDocs.length,
367
- totalBytes: allDocs.reduce((sum, doc) => sum + doc.content.length, 0),
368
- approxTokens: Math.round(allDocs.reduce((sum, doc) => sum + doc.content.length, 0) / 4), // Rough token estimate
369
- };
370
- console.log(`Stats: ${stats.totalDocuments} documents, ${Math.round(stats.totalBytes / 1024)}KB, ~${stats.approxTokens} tokens`);
69
+ // Process standard LLM files (llms.txt and llms-full.txt)
70
+ await (0, generator_1.generateStandardLLMFiles)(pluginContext, allDocFiles);
71
+ // Process custom LLM files
72
+ await (0, generator_1.generateCustomLLMFiles)(pluginContext, allDocFiles);
73
+ // Output overall statistics
74
+ console.log(`Stats: ${allDocFiles.length} total available documents processed`);
371
75
  }
372
76
  catch (err) {
373
77
  console.error('Error generating LLM documentation:', err);
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Document processing functions for the docusaurus-plugin-llms plugin
3
+ */
4
+ import { DocInfo, PluginContext } from './types';
5
+ /**
6
+ * Process a markdown file and extract its metadata and content
7
+ * @param filePath - Path to the markdown file
8
+ * @param baseDir - Base directory
9
+ * @param siteUrl - Base URL of the site
10
+ * @param pathPrefix - Path prefix for URLs (e.g., 'docs' or 'blog')
11
+ * @param pathTransformation - Path transformation configuration
12
+ * @returns Processed file data
13
+ */
14
+ export declare function processMarkdownFile(filePath: string, baseDir: string, siteUrl: string, pathPrefix?: string, pathTransformation?: {
15
+ ignorePaths?: string[];
16
+ addPaths?: string[];
17
+ }): Promise<DocInfo>;
18
+ /**
19
+ * Process files based on include patterns, ignore patterns, and ordering
20
+ * @param context - Plugin context
21
+ * @param allFiles - All available files
22
+ * @param includePatterns - Patterns for files to include
23
+ * @param ignorePatterns - Patterns for files to ignore
24
+ * @param orderPatterns - Patterns for ordering files
25
+ * @param includeUnmatched - Whether to include unmatched files
26
+ * @returns Processed files
27
+ */
28
+ export declare function processFilesWithPatterns(context: PluginContext, allFiles: string[], includePatterns?: string[], ignorePatterns?: string[], orderPatterns?: string[], includeUnmatched?: boolean): Promise<DocInfo[]>;
@@ -0,0 +1,211 @@
1
+ "use strict";
2
+ /**
3
+ * Document processing functions for the docusaurus-plugin-llms plugin
4
+ */
5
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
6
+ if (k2 === undefined) k2 = k;
7
+ var desc = Object.getOwnPropertyDescriptor(m, k);
8
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
9
+ desc = { enumerable: true, get: function() { return m[k]; } };
10
+ }
11
+ Object.defineProperty(o, k2, desc);
12
+ }) : (function(o, m, k, k2) {
13
+ if (k2 === undefined) k2 = k;
14
+ o[k2] = m[k];
15
+ }));
16
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
17
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
18
+ }) : function(o, v) {
19
+ o["default"] = v;
20
+ });
21
+ var __importStar = (this && this.__importStar) || (function () {
22
+ var ownKeys = function(o) {
23
+ ownKeys = Object.getOwnPropertyNames || function (o) {
24
+ var ar = [];
25
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
26
+ return ar;
27
+ };
28
+ return ownKeys(o);
29
+ };
30
+ return function (mod) {
31
+ if (mod && mod.__esModule) return mod;
32
+ var result = {};
33
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
34
+ __setModuleDefault(result, mod);
35
+ return result;
36
+ };
37
+ })();
38
+ var __importDefault = (this && this.__importDefault) || function (mod) {
39
+ return (mod && mod.__esModule) ? mod : { "default": mod };
40
+ };
41
+ Object.defineProperty(exports, "__esModule", { value: true });
42
+ exports.processMarkdownFile = processMarkdownFile;
43
+ exports.processFilesWithPatterns = processFilesWithPatterns;
44
+ const path = __importStar(require("path"));
45
+ const gray_matter_1 = __importDefault(require("gray-matter"));
46
+ const minimatch_1 = require("minimatch");
47
+ const utils_1 = require("./utils");
48
+ /**
49
+ * Process a markdown file and extract its metadata and content
50
+ * @param filePath - Path to the markdown file
51
+ * @param baseDir - Base directory
52
+ * @param siteUrl - Base URL of the site
53
+ * @param pathPrefix - Path prefix for URLs (e.g., 'docs' or 'blog')
54
+ * @param pathTransformation - Path transformation configuration
55
+ * @returns Processed file data
56
+ */
57
+ async function processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix = 'docs', pathTransformation) {
58
+ const content = await (0, utils_1.readFile)(filePath);
59
+ const { data, content: markdownContent } = (0, gray_matter_1.default)(content);
60
+ const relativePath = path.relative(baseDir, filePath);
61
+ // Convert to URL path format (replace backslashes with forward slashes on Windows)
62
+ const normalizedPath = relativePath.replace(/\\/g, '/');
63
+ // Convert .md extension to appropriate path
64
+ const linkPathBase = normalizedPath.replace(/\.mdx?$/, '');
65
+ // Handle index files specially
66
+ const linkPath = linkPathBase.endsWith('index')
67
+ ? linkPathBase.replace(/\/index$/, '')
68
+ : linkPathBase;
69
+ // Apply path transformations to the link path
70
+ const transformedLinkPath = (0, utils_1.applyPathTransformations)(linkPath, pathTransformation);
71
+ // Also apply path transformations to the pathPrefix if it's not empty
72
+ // This allows removing 'docs' from the path when specified in ignorePaths
73
+ let transformedPathPrefix = pathPrefix;
74
+ if (pathPrefix && pathTransformation?.ignorePaths?.includes(pathPrefix)) {
75
+ transformedPathPrefix = '';
76
+ }
77
+ // Generate full URL with transformed path and path prefix
78
+ const fullUrl = new URL(`${transformedPathPrefix ? `${transformedPathPrefix}/` : ''}${transformedLinkPath}`, siteUrl).toString();
79
+ // Extract title
80
+ const title = (0, utils_1.extractTitle)(data, markdownContent, filePath);
81
+ // Get description from frontmatter or first paragraph
82
+ let description = '';
83
+ // First priority: Use frontmatter description if available
84
+ if (data.description) {
85
+ description = data.description;
86
+ }
87
+ else {
88
+ // Second priority: Find the first non-heading paragraph
89
+ const paragraphs = markdownContent.split('\n\n');
90
+ for (const para of paragraphs) {
91
+ const trimmedPara = para.trim();
92
+ // Skip empty paragraphs and headings
93
+ if (trimmedPara && !trimmedPara.startsWith('#')) {
94
+ description = trimmedPara;
95
+ break;
96
+ }
97
+ }
98
+ // Third priority: If still no description, use the first heading's content
99
+ if (!description) {
100
+ const firstHeadingMatch = markdownContent.match(/^#\s+(.*?)$/m);
101
+ if (firstHeadingMatch && firstHeadingMatch[1]) {
102
+ description = firstHeadingMatch[1].trim();
103
+ }
104
+ }
105
+ }
106
+ // Only remove heading markers at the beginning of descriptions or lines
107
+ // This preserves # characters that are part of the content
108
+ if (description) {
109
+ // Original approach had issues with hashtags inside content
110
+ // Fix: Only remove # symbols at the beginning of lines or description
111
+ // that are followed by a space (actual heading markers)
112
+ description = description.replace(/^(#+)\s+/gm, '');
113
+ // Special handling for description frontmatter with heading markers
114
+ if (data.description && data.description.startsWith('#')) {
115
+ // If the description in frontmatter starts with a heading marker,
116
+ // we should preserve it in the extracted description
117
+ description = description.replace(/^#+\s+/, '');
118
+ }
119
+ // Preserve inline hashtags (not heading markers)
120
+ // We don't want to treat hashtags in the middle of content as headings
121
+ // Validate that the description doesn't contain markdown headings
122
+ if (description.match(/^#+\s+/m)) {
123
+ console.warn(`Warning: Description for "${title}" may still contain heading markers`);
124
+ }
125
+ // Warn if the description contains HTML tags
126
+ if (/<[^>]+>/g.test(description)) {
127
+ console.warn(`Warning: Description for "${title}" contains HTML tags`);
128
+ }
129
+ // Warn if the description is very long
130
+ if (description.length > 500) {
131
+ console.warn(`Warning: Description for "${title}" is very long (${description.length} characters)`);
132
+ }
133
+ }
134
+ // Clean and process content
135
+ const cleanedContent = (0, utils_1.cleanMarkdownContent)(markdownContent);
136
+ return {
137
+ title,
138
+ path: normalizedPath,
139
+ url: fullUrl,
140
+ content: cleanedContent,
141
+ description: description || '',
142
+ };
143
+ }
144
+ /**
145
+ * Process files based on include patterns, ignore patterns, and ordering
146
+ * @param context - Plugin context
147
+ * @param allFiles - All available files
148
+ * @param includePatterns - Patterns for files to include
149
+ * @param ignorePatterns - Patterns for files to ignore
150
+ * @param orderPatterns - Patterns for ordering files
151
+ * @param includeUnmatched - Whether to include unmatched files
152
+ * @returns Processed files
153
+ */
154
+ async function processFilesWithPatterns(context, allFiles, includePatterns = [], ignorePatterns = [], orderPatterns = [], includeUnmatched = false) {
155
+ const { siteDir, siteUrl, docsDir } = context;
156
+ // Filter files based on include patterns
157
+ let filteredFiles = allFiles;
158
+ if (includePatterns.length > 0) {
159
+ filteredFiles = allFiles.filter(file => {
160
+ const relativePath = path.relative(siteDir, file);
161
+ return includePatterns.some(pattern => (0, minimatch_1.minimatch)(relativePath, pattern, { matchBase: true }));
162
+ });
163
+ }
164
+ // Apply ignore patterns
165
+ if (ignorePatterns.length > 0) {
166
+ filteredFiles = filteredFiles.filter(file => {
167
+ const relativePath = path.relative(siteDir, file);
168
+ return !ignorePatterns.some(pattern => (0, minimatch_1.minimatch)(relativePath, pattern, { matchBase: true }));
169
+ });
170
+ }
171
+ // Order files according to orderPatterns
172
+ let filesToProcess = [];
173
+ if (orderPatterns.length > 0) {
174
+ const matchedFiles = new Set();
175
+ // Process files according to orderPatterns
176
+ for (const pattern of orderPatterns) {
177
+ const matchingFiles = filteredFiles.filter(file => {
178
+ const relativePath = path.relative(siteDir, file);
179
+ return (0, minimatch_1.minimatch)(relativePath, pattern, { matchBase: true }) && !matchedFiles.has(file);
180
+ });
181
+ for (const file of matchingFiles) {
182
+ filesToProcess.push(file);
183
+ matchedFiles.add(file);
184
+ }
185
+ }
186
+ // Add remaining files if includeUnmatched is true
187
+ if (includeUnmatched) {
188
+ const remainingFiles = filteredFiles.filter(file => !matchedFiles.has(file));
189
+ filesToProcess.push(...remainingFiles);
190
+ }
191
+ }
192
+ else {
193
+ filesToProcess = filteredFiles;
194
+ }
195
+ // Process each file to generate DocInfo
196
+ const processedDocs = [];
197
+ for (const filePath of filesToProcess) {
198
+ try {
199
+ // Determine if this is a blog or docs file
200
+ const isBlogFile = filePath.includes(path.join(siteDir, 'blog'));
201
+ const baseDir = isBlogFile ? path.join(siteDir, 'blog') : path.join(siteDir, docsDir);
202
+ const pathPrefix = isBlogFile ? 'blog' : 'docs';
203
+ const docInfo = await processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix, context.options.pathTransformation);
204
+ processedDocs.push(docInfo);
205
+ }
206
+ catch (err) {
207
+ console.warn(`Error processing ${filePath}: ${err.message}`);
208
+ }
209
+ }
210
+ return processedDocs;
211
+ }