docusaurus-plugin-llms 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/utils.js CHANGED
@@ -39,6 +39,19 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
39
39
  return (mod && mod.__esModule) ? mod : { "default": mod };
40
40
  };
41
41
  Object.defineProperty(exports, "__esModule", { value: true });
42
+ exports.logger = exports.LogLevel = exports.ValidationError = void 0;
43
+ exports.isDefined = isDefined;
44
+ exports.isNonEmptyString = isNonEmptyString;
45
+ exports.isNonEmptyArray = isNonEmptyArray;
46
+ exports.getErrorMessage = getErrorMessage;
47
+ exports.getErrorStack = getErrorStack;
48
+ exports.validateRequired = validateRequired;
49
+ exports.validateString = validateString;
50
+ exports.validateArray = validateArray;
51
+ exports.setLogLevel = setLogLevel;
52
+ exports.normalizePath = normalizePath;
53
+ exports.validatePathLength = validatePathLength;
54
+ exports.shortenPathIfNeeded = shortenPathIfNeeded;
42
55
  exports.writeFile = writeFile;
43
56
  exports.readFile = readFile;
44
57
  exports.shouldIgnoreFile = shouldIgnoreFile;
@@ -47,10 +60,240 @@ exports.extractTitle = extractTitle;
47
60
  exports.resolvePartialImports = resolvePartialImports;
48
61
  exports.cleanMarkdownContent = cleanMarkdownContent;
49
62
  exports.applyPathTransformations = applyPathTransformations;
63
+ exports.sanitizeForFilename = sanitizeForFilename;
64
+ exports.ensureUniqueIdentifier = ensureUniqueIdentifier;
65
+ exports.createMarkdownContent = createMarkdownContent;
50
66
  const fs = __importStar(require("fs/promises"));
51
67
  const path = __importStar(require("path"));
68
+ const crypto = __importStar(require("crypto"));
52
69
  const minimatch_1 = require("minimatch");
53
70
  const gray_matter_1 = __importDefault(require("gray-matter"));
71
+ const YAML = __importStar(require("yaml"));
72
+ /**
73
+ * Null/Undefined Handling Guidelines:
74
+ *
75
+ * 1. For required parameters: Throw early if null/undefined
76
+ * 2. For optional parameters: Use optional chaining `value?.property`
77
+ * 3. For explicit null checks: Use `!== null` and `!== undefined` or the isDefined type guard
78
+ * 4. For string validation: Use isNonEmptyString() type guard
79
+ * 5. For truthy checks on booleans: Use explicit comparison or Boolean(value)
80
+ *
81
+ * Avoid: `if (value)` when value could be 0, '', or false legitimately
82
+ * Use: Type guards for consistent, type-safe checks
83
+ */
84
+ /**
85
+ * Type guard to check if a value is defined (not null or undefined)
86
+ * @param value - Value to check
87
+ * @returns True if value is not null or undefined
88
+ */
89
+ function isDefined(value) {
90
+ return value !== null && value !== undefined;
91
+ }
92
+ /**
93
+ * Type guard to check if a value is a non-empty string
94
+ * @param value - Value to check
95
+ * @returns True if value is a string with at least one non-whitespace character
96
+ */
97
+ function isNonEmptyString(value) {
98
+ return typeof value === 'string' && value.trim().length > 0;
99
+ }
100
+ /**
101
+ * Type guard to check if a value is a non-empty array
102
+ * @param value - Value to check
103
+ * @returns True if value is an array with at least one element
104
+ */
105
+ function isNonEmptyArray(value) {
106
+ return Array.isArray(value) && value.length > 0;
107
+ }
108
+ /**
109
+ * Safely extract an error message from an unknown error value
110
+ * @param error - The error value (can be Error, string, or any other type)
111
+ * @returns A string representation of the error
112
+ */
113
+ function getErrorMessage(error) {
114
+ if (error instanceof Error) {
115
+ return error.message;
116
+ }
117
+ if (typeof error === 'string') {
118
+ return error;
119
+ }
120
+ try {
121
+ const stringified = JSON.stringify(error);
122
+ // JSON.stringify returns undefined for undefined values, handle that case
123
+ return stringified !== undefined ? stringified : 'Unknown error';
124
+ }
125
+ catch {
126
+ return 'Unknown error';
127
+ }
128
+ }
129
+ /**
130
+ * Extract stack trace from unknown error types
131
+ * @param error - The error value (can be Error or any other type)
132
+ * @returns Stack trace if available, undefined otherwise
133
+ */
134
+ function getErrorStack(error) {
135
+ if (error instanceof Error) {
136
+ return error.stack;
137
+ }
138
+ return undefined;
139
+ }
140
+ /**
141
+ * Custom error class for validation errors
142
+ */
143
+ class ValidationError extends Error {
144
+ constructor(message) {
145
+ super(message);
146
+ this.name = 'ValidationError';
147
+ }
148
+ }
149
+ exports.ValidationError = ValidationError;
150
+ /**
151
+ * Validates that a value is not null or undefined
152
+ * @param value - The value to validate
153
+ * @param paramName - The parameter name for error messages
154
+ * @returns The validated value
155
+ * @throws ValidationError if the value is null or undefined
156
+ */
157
+ function validateRequired(value, paramName) {
158
+ if (value === null || value === undefined) {
159
+ throw new ValidationError(`Required parameter '${paramName}' is null or undefined`);
160
+ }
161
+ return value;
162
+ }
163
+ /**
164
+ * Validates that a value is a string and optionally checks its properties
165
+ * @param value - The value to validate
166
+ * @param paramName - The parameter name for error messages
167
+ * @param options - Validation options for min/max length and pattern
168
+ * @returns The validated string
169
+ * @throws ValidationError if validation fails
170
+ */
171
+ function validateString(value, paramName, options = {}) {
172
+ if (typeof value !== 'string') {
173
+ throw new ValidationError(`Parameter '${paramName}' must be a string, got ${typeof value}`);
174
+ }
175
+ if (options.minLength !== undefined && value.length < options.minLength) {
176
+ throw new ValidationError(`Parameter '${paramName}' must be at least ${options.minLength} characters`);
177
+ }
178
+ if (options.maxLength !== undefined && value.length > options.maxLength) {
179
+ throw new ValidationError(`Parameter '${paramName}' exceeds maximum length of ${options.maxLength}`);
180
+ }
181
+ if (options.pattern && !options.pattern.test(value)) {
182
+ throw new ValidationError(`Parameter '${paramName}' does not match required pattern`);
183
+ }
184
+ return value;
185
+ }
186
+ /**
187
+ * Validates that a value is an array and optionally validates elements
188
+ * @param value - The value to validate
189
+ * @param paramName - The parameter name for error messages
190
+ * @param elementValidator - Optional function to validate each element
191
+ * @returns The validated array
192
+ * @throws ValidationError if validation fails
193
+ */
194
+ function validateArray(value, paramName, elementValidator) {
195
+ if (!Array.isArray(value)) {
196
+ throw new ValidationError(`Parameter '${paramName}' must be an array`);
197
+ }
198
+ if (elementValidator) {
199
+ value.forEach((item, index) => {
200
+ if (!elementValidator(item)) {
201
+ throw new ValidationError(`Element at index ${index} in '${paramName}' failed validation`);
202
+ }
203
+ });
204
+ }
205
+ return value;
206
+ }
207
+ /**
208
+ * Logging level enumeration
209
+ */
210
+ var LogLevel;
211
+ (function (LogLevel) {
212
+ LogLevel[LogLevel["QUIET"] = 0] = "QUIET";
213
+ LogLevel[LogLevel["NORMAL"] = 1] = "NORMAL";
214
+ LogLevel[LogLevel["VERBOSE"] = 2] = "VERBOSE";
215
+ })(LogLevel || (exports.LogLevel = LogLevel = {}));
216
+ let currentLogLevel = LogLevel.NORMAL;
217
+ /**
218
+ * Set the logging level for the plugin
219
+ * @param level - The logging level to use
220
+ */
221
+ function setLogLevel(level) {
222
+ currentLogLevel = level;
223
+ }
224
+ /**
225
+ * Logger utility for consistent logging across the plugin
226
+ */
227
+ exports.logger = {
228
+ error: (message) => {
229
+ console.error(`[docusaurus-plugin-llms] ERROR: ${message}`);
230
+ },
231
+ warn: (message) => {
232
+ if (currentLogLevel >= LogLevel.NORMAL) {
233
+ console.warn(`[docusaurus-plugin-llms] ${message}`);
234
+ }
235
+ },
236
+ info: (message) => {
237
+ if (currentLogLevel >= LogLevel.NORMAL) {
238
+ console.log(`[docusaurus-plugin-llms] ${message}`);
239
+ }
240
+ },
241
+ verbose: (message) => {
242
+ if (currentLogLevel >= LogLevel.VERBOSE) {
243
+ console.log(`[docusaurus-plugin-llms] ${message}`);
244
+ }
245
+ }
246
+ };
247
+ /**
248
+ * Constants for path length limits
249
+ */
250
+ const MAX_PATH_LENGTH_WINDOWS = 260;
251
+ const MAX_PATH_LENGTH_UNIX = 4096;
252
+ /**
253
+ * Normalizes a file path by converting all backslashes to forward slashes.
254
+ * This ensures consistent path handling across Windows and Unix systems.
255
+ *
256
+ * @param filePath - The file path to normalize
257
+ * @returns The normalized path with forward slashes
258
+ * @throws ValidationError if filePath is not a string
259
+ */
260
+ function normalizePath(filePath) {
261
+ validateString(filePath, 'filePath');
262
+ return filePath.replace(/\\/g, '/');
263
+ }
264
+ /**
265
+ * Validates that a file path does not exceed the platform-specific maximum length
266
+ * @param filePath - The file path to validate
267
+ * @returns True if the path is within limits, false otherwise
268
+ */
269
+ function validatePathLength(filePath) {
270
+ const maxLength = process.platform === 'win32'
271
+ ? MAX_PATH_LENGTH_WINDOWS
272
+ : MAX_PATH_LENGTH_UNIX;
273
+ if (filePath.length > maxLength) {
274
+ exports.logger.error(`Path exceeds maximum length (${maxLength}): ${filePath}`);
275
+ return false;
276
+ }
277
+ return true;
278
+ }
279
+ /**
280
+ * Shortens a file path by creating a hash-based filename if the path is too long
281
+ * @param fullPath - The full file path that may be too long
282
+ * @param outputDir - The output directory base path
283
+ * @param relativePath - The relative path from the output directory
284
+ * @returns A shortened path if necessary, or the original path if it's within limits
285
+ */
286
+ function shortenPathIfNeeded(fullPath, outputDir, relativePath) {
287
+ if (validatePathLength(fullPath)) {
288
+ return fullPath;
289
+ }
290
+ // Create a hash of the relative path to ensure uniqueness
291
+ const hash = crypto.createHash('md5').update(relativePath).digest('hex').substring(0, 8);
292
+ const shortenedPath = path.join(outputDir, `${hash}.md`);
293
+ exports.logger.warn(`Path too long, using shortened path: ${shortenedPath}`);
294
+ exports.logger.verbose(`Original path: ${fullPath}`);
295
+ return shortenedPath;
296
+ }
54
297
  /**
55
298
  * Write content to a file
56
299
  * @param filePath - Path to write the file to
@@ -62,50 +305,121 @@ async function writeFile(filePath, data) {
62
305
  /**
63
306
  * Read content from a file
64
307
  * @param filePath - Path of the file to read
65
- * @returns Content of the file
308
+ * @returns Content of the file with BOM removed if present
66
309
  */
67
310
  async function readFile(filePath) {
68
- return fs.readFile(filePath, 'utf8');
311
+ let content = await fs.readFile(filePath, 'utf8');
312
+ // Remove UTF-8 BOM if present
313
+ // UTF-8 BOM is the character U+FEFF at the start of the file
314
+ if (content.charCodeAt(0) === 0xFEFF) {
315
+ content = content.slice(1);
316
+ }
317
+ return content;
69
318
  }
70
319
  /**
71
320
  * Check if a file should be ignored based on glob patterns
321
+ * Matches against both site-relative and docs-relative paths
72
322
  * @param filePath - Path to the file
73
- * @param baseDir - Base directory for relative paths
323
+ * @param baseDir - Base directory (site root) for relative paths
74
324
  * @param ignorePatterns - Glob patterns for files to ignore
325
+ * @param docsDir - Docs directory name (e.g., 'docs')
75
326
  * @returns Whether the file should be ignored
76
327
  */
77
- function shouldIgnoreFile(filePath, baseDir, ignorePatterns) {
78
- if (ignorePatterns.length === 0) {
328
+ function shouldIgnoreFile(filePath, baseDir, ignorePatterns, docsDir = 'docs') {
329
+ if (!isNonEmptyArray(ignorePatterns)) {
79
330
  return false;
80
331
  }
81
- const relativePath = path.relative(baseDir, filePath);
82
- return ignorePatterns.some(pattern => (0, minimatch_1.minimatch)(relativePath, pattern, { matchBase: true }));
332
+ const minimatchOptions = { matchBase: true };
333
+ // Get site-relative path (e.g., "docs/quickstart/file.md")
334
+ const siteRelativePath = normalizePath(path.relative(baseDir, filePath));
335
+ // Get docs-relative path (e.g., "quickstart/file.md")
336
+ const docsBaseDir = path.resolve(path.join(baseDir, docsDir));
337
+ const resolvedFile = path.resolve(filePath);
338
+ const docsRelativePath = resolvedFile.startsWith(docsBaseDir)
339
+ ? normalizePath(path.relative(docsBaseDir, resolvedFile))
340
+ : null;
341
+ return ignorePatterns.some(pattern => {
342
+ // Try matching against site-relative path
343
+ if ((0, minimatch_1.minimatch)(siteRelativePath, pattern, minimatchOptions)) {
344
+ return true;
345
+ }
346
+ // Try matching against docs-relative path if available
347
+ if (docsRelativePath && (0, minimatch_1.minimatch)(docsRelativePath, pattern, minimatchOptions)) {
348
+ return true;
349
+ }
350
+ return false;
351
+ });
83
352
  }
84
353
  /**
85
354
  * Recursively reads all Markdown files in a directory
86
355
  * @param dir - Directory to scan
87
- * @param baseDir - Base directory for relative paths
356
+ * @param baseDir - Base directory (site root) for relative paths
88
357
  * @param ignorePatterns - Glob patterns for files to ignore
358
+ * @param docsDir - Docs directory name (e.g., 'docs')
359
+ * @param warnOnIgnoredFiles - Whether to warn about ignored files
360
+ * @param visitedPaths - Set of already visited real paths to detect symlink loops (internal use)
89
361
  * @returns Array of file paths
90
362
  */
91
- async function readMarkdownFiles(dir, baseDir, ignorePatterns = []) {
363
+ async function readMarkdownFiles(dir, baseDir, ignorePatterns = [], docsDir = 'docs', warnOnIgnoredFiles = false, visitedPaths = new Set()) {
364
+ // Get real path to detect symlink loops
365
+ let realPath;
366
+ try {
367
+ realPath = await fs.realpath(dir);
368
+ }
369
+ catch (error) {
370
+ exports.logger.warn(`Failed to resolve real path for ${dir}: ${getErrorMessage(error)}`);
371
+ return [];
372
+ }
373
+ // Check if we've already visited this path (symlink loop detection)
374
+ if (visitedPaths.has(realPath)) {
375
+ exports.logger.warn(`Skipping already visited path (possible symlink loop): ${dir}`);
376
+ return [];
377
+ }
378
+ // Add to visited paths
379
+ visitedPaths.add(realPath);
92
380
  const files = [];
93
381
  const entries = await fs.readdir(dir, { withFileTypes: true });
94
382
  for (const entry of entries) {
95
383
  const fullPath = path.join(dir, entry.name);
96
- if (shouldIgnoreFile(fullPath, baseDir, ignorePatterns)) {
384
+ if (shouldIgnoreFile(fullPath, baseDir, ignorePatterns, docsDir)) {
97
385
  continue;
98
386
  }
99
- if (entry.isDirectory()) {
100
- const subDirFiles = await readMarkdownFiles(fullPath, baseDir, ignorePatterns);
387
+ // Handle both regular directories and symlinked directories
388
+ let isDir = entry.isDirectory();
389
+ if (!isDir && entry.isSymbolicLink()) {
390
+ // Check if symlink points to a directory
391
+ try {
392
+ const stats = await fs.stat(fullPath);
393
+ isDir = stats.isDirectory();
394
+ }
395
+ catch (error) {
396
+ // Broken symlink, warn and skip it
397
+ exports.logger.warn(`Skipping broken symlink: ${fullPath}`);
398
+ continue;
399
+ }
400
+ }
401
+ if (isDir) {
402
+ const subDirFiles = await readMarkdownFiles(fullPath, baseDir, ignorePatterns, docsDir, warnOnIgnoredFiles, visitedPaths);
101
403
  files.push(...subDirFiles);
102
404
  }
405
+ else if (!entry.name.includes('.')) {
406
+ // File without extension
407
+ if (warnOnIgnoredFiles) {
408
+ exports.logger.warn(`Ignoring file without extension: ${fullPath}`);
409
+ }
410
+ }
103
411
  else if (entry.name.endsWith('.md') || entry.name.endsWith('.mdx')) {
104
412
  // Skip partial files (those starting with underscore)
105
413
  if (!entry.name.startsWith('_')) {
106
414
  files.push(fullPath);
107
415
  }
108
416
  }
417
+ else {
418
+ // Other extension
419
+ if (warnOnIgnoredFiles) {
420
+ exports.logger.warn(`Ignoring file with unsupported extension: ${fullPath}`);
421
+ }
422
+ }
109
423
  }
110
424
  return files;
111
425
  }
@@ -117,35 +431,46 @@ async function readMarkdownFiles(dir, baseDir, ignorePatterns = []) {
117
431
  * @returns Extracted title
118
432
  */
119
433
  function extractTitle(data, content, filePath) {
120
- // First try frontmatter
121
- if (data.title) {
434
+ // First try frontmatter (check for valid non-empty string)
435
+ if (isNonEmptyString(data.title)) {
122
436
  return data.title;
123
437
  }
124
438
  // Then try first heading
125
439
  const headingMatch = content.match(/^#\s+(.*)/m);
126
- if (headingMatch) {
440
+ if (isNonEmptyString(headingMatch?.[1])) {
127
441
  return headingMatch[1].trim();
128
442
  }
129
443
  // Finally use filename
130
444
  return path.basename(filePath, path.extname(filePath))
131
445
  .replace(/-/g, ' ')
132
- .replace(/\b\w/g, c => c.toUpperCase());
446
+ .replace(/\b\w/g, (c) => c.toUpperCase());
447
+ }
448
+ /**
449
+ * Escape special regex characters in a string
450
+ * @param str - String to escape
451
+ * @returns Escaped string safe for use in regex
452
+ */
453
+ function escapeRegex(str) {
454
+ return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
133
455
  }
134
456
  /**
135
457
  * Resolve and inline partial imports in markdown content
136
458
  * @param content - The markdown content with import statements
137
459
  * @param filePath - The path of the file containing the imports
460
+ * @param importChain - Set of file paths in the current import chain (for circular dependency detection)
138
461
  * @returns Content with partials resolved
139
462
  */
140
- async function resolvePartialImports(content, filePath) {
463
+ async function resolvePartialImports(content, filePath, importChain = new Set()) {
141
464
  let resolved = content;
142
465
  // Match import statements for partials and JSX usage
143
466
  // Pattern 1: import PartialName from './_partial.mdx'
144
467
  // Pattern 2: import { PartialName } from './_partial.mdx'
145
- const importRegex = /^\s*import\s+(?:(\w+)|{\s*(\w+)\s*})\s+from\s+['"]([^'"]+_[^'"]+\.mdx?)['"];?\s*$/gm;
468
+ // Create a fresh regex for each invocation to avoid lastIndex state leakage
469
+ const createImportRegex = () => /^\s*import\s+(?:(\w+)|{\s*(\w+)\s*})\s+from\s+['"]([^'"]+_[^'"]+\.mdx?)['"];?\s*$/gm;
146
470
  const imports = new Map();
147
471
  // First pass: collect all imports
148
472
  let match;
473
+ const importRegex = createImportRegex();
149
474
  while ((match = importRegex.exec(content)) !== null) {
150
475
  const componentName = match[1] || match[2];
151
476
  const importPath = match[3];
@@ -160,20 +485,52 @@ async function resolvePartialImports(content, filePath) {
160
485
  // Resolve the partial file path relative to the current file
161
486
  const dir = path.dirname(filePath);
162
487
  const partialPath = path.resolve(dir, importPath);
488
+ // Check for circular import
489
+ if (importChain.has(partialPath)) {
490
+ const chain = Array.from(importChain).join(' -> ');
491
+ exports.logger.error(`Circular import detected: ${chain} -> ${partialPath}`);
492
+ // Escape special regex characters in component name and import path
493
+ const escapedComponentName = escapeRegex(componentName);
494
+ const escapedImportPath = escapeRegex(importPath);
495
+ // Remove the import statement to prevent infinite recursion
496
+ resolved = resolved.replace(new RegExp(`^\\s*import\\s+(?:${escapedComponentName}|{\\s*${escapedComponentName}\\s*})\\s+from\\s+['"]${escapedImportPath}['"];?\\s*$`, 'gm'), '');
497
+ // Remove JSX usage of this component
498
+ const jsxRegex = new RegExp(`<${escapedComponentName}(?:\\s+[^>]*)?\\s*\\/?>(?:[\\s\\S]*?<\\/${escapedComponentName}>)?`, 'gm');
499
+ resolved = resolved.replace(jsxRegex, '');
500
+ continue;
501
+ }
502
+ // Add to chain before recursive call
503
+ const newChain = new Set(importChain);
504
+ newChain.add(partialPath);
163
505
  // Read the partial file
164
- const partialContent = await readFile(partialPath);
506
+ let partialContent = await readFile(partialPath);
165
507
  const { content: partialMarkdown } = (0, gray_matter_1.default)(partialContent);
508
+ // Recursively resolve imports in the partial with the updated chain
509
+ const resolvedPartial = await resolvePartialImports(partialMarkdown, partialPath, newChain);
510
+ // Escape special regex characters in component name and import path
511
+ const escapedComponentName = escapeRegex(componentName);
512
+ const escapedImportPath = escapeRegex(importPath);
166
513
  // Remove the import statement
167
- resolved = resolved.replace(new RegExp(`^\\s*import\\s+(?:${componentName}|{\\s*${componentName}\\s*})\\s+from\\s+['"]${importPath.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}['"];?\\s*$`, 'gm'), '');
514
+ resolved = resolved.replace(new RegExp(`^\\s*import\\s+(?:${escapedComponentName}|{\\s*${escapedComponentName}\\s*})\\s+from\\s+['"]${escapedImportPath}['"];?\\s*$`, 'gm'), '');
168
515
  // Replace JSX usage with the partial content
169
516
  // Handle both self-closing tags and tags with content
170
517
  // <PartialName /> or <PartialName></PartialName> or <PartialName>...</PartialName>
171
- const jsxRegex = new RegExp(`<${componentName}\\s*(?:[^>]*?)(?:/>|>[^<]*</${componentName}>)`, 'g');
172
- resolved = resolved.replace(jsxRegex, partialMarkdown.trim());
518
+ const jsxRegex = new RegExp(`<${escapedComponentName}\\s*(?:[^>]*?)(?:/>|>[^<]*</${escapedComponentName}>)`, 'g');
519
+ resolved = resolved.replace(jsxRegex, resolvedPartial.trim());
173
520
  }
174
521
  catch (error) {
175
- console.warn(`Failed to resolve partial import "${importPath}" in ${filePath}: ${error}`);
176
- // Leave the import and usage as-is if we can't resolve it
522
+ exports.logger.warn(`Failed to resolve partial import from ${importPath}: ${getErrorMessage(error)}`);
523
+ // Remove both the import statement AND the JSX usage even if partial can't be resolved
524
+ // This prevents leaving broken references in the output
525
+ // Escape special regex characters in component name and import path
526
+ const escapedComponentName = escapeRegex(componentName);
527
+ const escapedImportPath = escapeRegex(importPath);
528
+ // Remove the import statement
529
+ resolved = resolved.replace(new RegExp(`^\\s*import\\s+(?:${escapedComponentName}|{\\s*${escapedComponentName}\\s*})\\s+from\\s+['"]${escapedImportPath}['"];?\\s*$`, 'gm'), '');
530
+ // Remove JSX usage of this component
531
+ // Handle both self-closing tags (<Component />) and regular tags with content (<Component>...</Component>)
532
+ const jsxRegex = new RegExp(`<${escapedComponentName}(?:\\s+[^>]*)?\\s*\\/?>(?:[\\s\\S]*?<\\/${escapedComponentName}>)?`, 'gm');
533
+ resolved = resolved.replace(jsxRegex, '');
177
534
  }
178
535
  }
179
536
  return resolved;
@@ -255,16 +612,18 @@ function cleanMarkdownContent(content, excludeImports = false, removeDuplicateHe
255
612
  * @returns Transformed URL path
256
613
  */
257
614
  function applyPathTransformations(urlPath, pathTransformation) {
258
- if (!pathTransformation) {
615
+ if (!isDefined(pathTransformation)) {
259
616
  return urlPath;
260
617
  }
261
618
  let transformedPath = urlPath;
262
619
  // Remove ignored path segments
263
- if (pathTransformation.ignorePaths?.length) {
620
+ if (isNonEmptyArray(pathTransformation.ignorePaths)) {
264
621
  for (const ignorePath of pathTransformation.ignorePaths) {
265
622
  // Create a regex that matches the ignore path at the beginning, middle, or end of the path
266
623
  // We use word boundaries to ensure we match complete path segments
267
- const ignoreRegex = new RegExp(`(^|/)(${ignorePath})(/|$)`, 'g');
624
+ // Escape special regex characters in ignorePath to prevent regex injection
625
+ const escapedIgnorePath = ignorePath.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
626
+ const ignoreRegex = new RegExp(`(^|/)(${escapedIgnorePath})(/|$)`, 'g');
268
627
  transformedPath = transformedPath.replace(ignoreRegex, '$1$3');
269
628
  }
270
629
  // Clean up any double slashes that might have been created
@@ -273,7 +632,7 @@ function applyPathTransformations(urlPath, pathTransformation) {
273
632
  transformedPath = transformedPath.replace(/^\//, '');
274
633
  }
275
634
  // Add path segments if they're not already present
276
- if (pathTransformation.addPaths?.length) {
635
+ if (isNonEmptyArray(pathTransformation.addPaths)) {
277
636
  // Process in reverse order to maintain the specified order in the final path
278
637
  // This is because each path is prepended to the front
279
638
  const pathsToAdd = [...pathTransformation.addPaths].reverse();
@@ -286,3 +645,97 @@ function applyPathTransformations(urlPath, pathTransformation) {
286
645
  }
287
646
  return transformedPath;
288
647
  }
648
+ /**
649
+ * Sanitize a string to create a safe filename
650
+ * @param input - Input string (typically a title)
651
+ * @param fallback - Fallback string if input becomes empty after sanitization
652
+ * @returns Sanitized filename (without extension)
653
+ * @throws ValidationError if input or fallback are not strings
654
+ */
655
+ function sanitizeForFilename(input, fallback = 'untitled', options = {}) {
656
+ // Validate input parameters
657
+ validateString(input, 'input');
658
+ validateString(fallback, 'fallback', { minLength: 1 });
659
+ if (!isNonEmptyString(input))
660
+ return fallback;
661
+ const { preserveUnicode = true, preserveCase = false } = options;
662
+ let sanitized = preserveCase ? input : input.toLowerCase();
663
+ if (preserveUnicode) {
664
+ // Only remove filesystem-unsafe characters: / \ : * ? " < > |
665
+ // Keep underscores, dots (except at start), hyphens, and unicode
666
+ // Also replace spaces with dashes for better filesystem compatibility
667
+ sanitized = sanitized.replace(/[/\\:*?"<>|\s]+/g, '-');
668
+ }
669
+ else {
670
+ // Allow alphanumeric, underscores, hyphens, dots
671
+ sanitized = sanitized.replace(/[^a-z0-9_.-]+/g, '-');
672
+ }
673
+ // Remove leading dots (hidden files on Unix)
674
+ sanitized = sanitized.replace(/^\.+/, '');
675
+ // Clean up multiple dashes and trim
676
+ sanitized = sanitized
677
+ .replace(/-+/g, '-')
678
+ .replace(/^-+|-+$/g, '');
679
+ return sanitized || fallback;
680
+ }
681
+ /**
682
+ * Ensure a unique identifier from a set of used identifiers
683
+ * @param baseIdentifier - Base identifier to make unique
684
+ * @param usedIdentifiers - Set of already used identifiers
685
+ * @param suffix - Suffix pattern (default: number in parentheses)
686
+ * @returns Unique identifier
687
+ * @throws ValidationError if baseIdentifier is not a string or usedIdentifiers is not a Set
688
+ */
689
+ function ensureUniqueIdentifier(baseIdentifier, usedIdentifiers, suffix = (counter) => `(${counter})`) {
690
+ // Validate input parameters
691
+ validateString(baseIdentifier, 'baseIdentifier', { minLength: 1 });
692
+ validateRequired(usedIdentifiers, 'usedIdentifiers');
693
+ if (!(usedIdentifiers instanceof Set)) {
694
+ throw new ValidationError(`Parameter 'usedIdentifiers' must be a Set`);
695
+ }
696
+ const MAX_ITERATIONS = 10000;
697
+ let uniqueIdentifier = baseIdentifier;
698
+ let counter = 1;
699
+ let iterations = 0;
700
+ while (usedIdentifiers.has(uniqueIdentifier.toLowerCase())) {
701
+ counter++;
702
+ uniqueIdentifier = `${baseIdentifier}${suffix(counter, baseIdentifier)}`;
703
+ iterations++;
704
+ if (iterations >= MAX_ITERATIONS) {
705
+ // Fallback to timestamp-based unique identifier
706
+ const timestamp = Date.now().toString(36);
707
+ const random = Math.random().toString(36).substring(2, 8);
708
+ uniqueIdentifier = `${baseIdentifier}-${timestamp}-${random}`;
709
+ exports.logger.warn(`Maximum iterations reached for unique identifier. Using fallback: ${uniqueIdentifier}`);
710
+ break;
711
+ }
712
+ }
713
+ usedIdentifiers.add(uniqueIdentifier.toLowerCase());
714
+ return uniqueIdentifier;
715
+ }
716
+ /**
717
+ * Create standardized markdown content template
718
+ * @param title - Document title
719
+ * @param description - Document description
720
+ * @param content - Document content
721
+ * @param includeMetadata - Whether to include description metadata
722
+ * @param frontMatter - Optional frontmatter to include at the top
723
+ * @returns Formatted markdown content
724
+ */
725
+ function createMarkdownContent(title, description = '', content = '', includeMetadata = true, frontMatter) {
726
+ let result = '';
727
+ // Add frontmatter if provided
728
+ if (isDefined(frontMatter) && Object.keys(frontMatter).length > 0) {
729
+ result += '---\n';
730
+ result += YAML.stringify(frontMatter, {
731
+ lineWidth: 0,
732
+ defaultStringType: 'QUOTE_DOUBLE',
733
+ defaultKeyType: 'PLAIN'
734
+ });
735
+ result += '---\n\n';
736
+ }
737
+ const descriptionLine = includeMetadata && description ? `\n\n> ${description}\n` : '\n';
738
+ result += `# ${title}${descriptionLine}
739
+ ${content}`.trim() + '\n';
740
+ return result;
741
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "docusaurus-plugin-llms",
3
- "version": "0.2.0",
3
+ "version": "0.3.0",
4
4
  "description": "Docusaurus plugin for generating LLM-friendly documentation following the llmstxt.org standard",
5
5
  "main": "lib/index.js",
6
6
  "types": "lib/index.d.ts",
@@ -9,7 +9,7 @@
9
9
  "watch": "tsc --watch",
10
10
  "cleanup": "node cleanup.js",
11
11
  "prepublishOnly": "npm run build && npm run cleanup",
12
- "test:unit": "node tests/test-path-transforms.js && node tests/test-header-deduplication.js && node tests/test-import-removal.js && node tests/test-partials.js && node tests/test-root-content.js",
12
+ "test:unit": "node tests/test-plugin-options-validation.js && node tests/test-plugin-validation-integration.js && node tests/test-regex-escaping.js && node tests/test-baseurl-handling.js && node tests/test-path-transforms.js && node tests/test-header-deduplication.js && node tests/test-import-removal.js && node tests/test-partials.js && node tests/test-missing-partials.js && node tests/test-circular-imports.js && node tests/test-root-content.js && node tests/test-filenames.js && node tests/test-url-encoding.js && node tests/test-nested-paths.js && node tests/test-filename-sanitization.js && node tests/test-yaml-encoding.js && node tests/test-url-error-handling.js && node tests/test-regex-lastindex.js && node tests/test-whitespace-paths.js && node tests/test-unique-identifier-iteration-limit.js && node tests/test-error-handling.js && node tests/test-file-io-error-handling.js && node tests/test-parallel-processing.js && node tests/test-path-length-validation.js && node tests/test-bom-handling.js && node tests/test-batch-processing.js && node tests/test-input-validation.js",
13
13
  "test:integration": "node tests/test-path-transformation.js",
14
14
  "test": "npm run build && npm run test:unit && npm run test:integration"
15
15
  },
@@ -38,13 +38,15 @@
38
38
  "license": "MIT",
39
39
  "dependencies": {
40
40
  "gray-matter": "^4.0.3",
41
- "minimatch": "^9.0.3"
41
+ "minimatch": "^9.0.3",
42
+ "yaml": "^2.8.1"
42
43
  },
43
44
  "peerDependencies": {
44
45
  "@docusaurus/core": "^3.0.0"
45
46
  },
46
47
  "devDependencies": {
47
48
  "@docusaurus/types": "^3.0.0",
49
+ "@types/js-yaml": "^4.0.9",
48
50
  "@types/minimatch": "^5.1.2",
49
51
  "@types/node": "^20.6.0",
50
52
  "typescript": "^5.2.2"