docusaurus-plugin-llms 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/utils.js CHANGED
@@ -39,6 +39,19 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
39
39
  return (mod && mod.__esModule) ? mod : { "default": mod };
40
40
  };
41
41
  Object.defineProperty(exports, "__esModule", { value: true });
42
+ exports.logger = exports.LogLevel = exports.ValidationError = void 0;
43
+ exports.isDefined = isDefined;
44
+ exports.isNonEmptyString = isNonEmptyString;
45
+ exports.isNonEmptyArray = isNonEmptyArray;
46
+ exports.getErrorMessage = getErrorMessage;
47
+ exports.getErrorStack = getErrorStack;
48
+ exports.validateRequired = validateRequired;
49
+ exports.validateString = validateString;
50
+ exports.validateArray = validateArray;
51
+ exports.setLogLevel = setLogLevel;
52
+ exports.normalizePath = normalizePath;
53
+ exports.validatePathLength = validatePathLength;
54
+ exports.shortenPathIfNeeded = shortenPathIfNeeded;
42
55
  exports.writeFile = writeFile;
43
56
  exports.readFile = readFile;
44
57
  exports.shouldIgnoreFile = shouldIgnoreFile;
@@ -52,9 +65,235 @@ exports.ensureUniqueIdentifier = ensureUniqueIdentifier;
52
65
  exports.createMarkdownContent = createMarkdownContent;
53
66
  const fs = __importStar(require("fs/promises"));
54
67
  const path = __importStar(require("path"));
68
+ const crypto = __importStar(require("crypto"));
55
69
  const minimatch_1 = require("minimatch");
56
70
  const gray_matter_1 = __importDefault(require("gray-matter"));
57
71
  const YAML = __importStar(require("yaml"));
72
+ /**
73
+ * Null/Undefined Handling Guidelines:
74
+ *
75
+ * 1. For required parameters: Throw early if null/undefined
76
+ * 2. For optional parameters: Use optional chaining `value?.property`
77
+ * 3. For explicit null checks: Use `!== null` and `!== undefined` or the isDefined type guard
78
+ * 4. For string validation: Use isNonEmptyString() type guard
79
+ * 5. For truthy checks on booleans: Use explicit comparison or Boolean(value)
80
+ *
81
+ * Avoid: `if (value)` when value could be 0, '', or false legitimately
82
+ * Use: Type guards for consistent, type-safe checks
83
+ */
84
+ /**
85
+ * Type guard to check if a value is defined (not null or undefined)
86
+ * @param value - Value to check
87
+ * @returns True if value is not null or undefined
88
+ */
89
+ function isDefined(value) {
90
+ return value !== null && value !== undefined;
91
+ }
92
+ /**
93
+ * Type guard to check if a value is a non-empty string
94
+ * @param value - Value to check
95
+ * @returns True if value is a string with at least one non-whitespace character
96
+ */
97
+ function isNonEmptyString(value) {
98
+ return typeof value === 'string' && value.trim().length > 0;
99
+ }
100
+ /**
101
+ * Type guard to check if a value is a non-empty array
102
+ * @param value - Value to check
103
+ * @returns True if value is an array with at least one element
104
+ */
105
+ function isNonEmptyArray(value) {
106
+ return Array.isArray(value) && value.length > 0;
107
+ }
108
+ /**
109
+ * Safely extract an error message from an unknown error value
110
+ * @param error - The error value (can be Error, string, or any other type)
111
+ * @returns A string representation of the error
112
+ */
113
+ function getErrorMessage(error) {
114
+ if (error instanceof Error) {
115
+ return error.message;
116
+ }
117
+ if (typeof error === 'string') {
118
+ return error;
119
+ }
120
+ try {
121
+ const stringified = JSON.stringify(error);
122
+ // JSON.stringify returns undefined for undefined values, handle that case
123
+ return stringified !== undefined ? stringified : 'Unknown error';
124
+ }
125
+ catch {
126
+ return 'Unknown error';
127
+ }
128
+ }
129
+ /**
130
+ * Extract stack trace from unknown error types
131
+ * @param error - The error value (can be Error or any other type)
132
+ * @returns Stack trace if available, undefined otherwise
133
+ */
134
+ function getErrorStack(error) {
135
+ if (error instanceof Error) {
136
+ return error.stack;
137
+ }
138
+ return undefined;
139
+ }
140
+ /**
141
+ * Custom error class for validation errors
142
+ */
143
+ class ValidationError extends Error {
144
+ constructor(message) {
145
+ super(message);
146
+ this.name = 'ValidationError';
147
+ }
148
+ }
149
+ exports.ValidationError = ValidationError;
150
+ /**
151
+ * Validates that a value is not null or undefined
152
+ * @param value - The value to validate
153
+ * @param paramName - The parameter name for error messages
154
+ * @returns The validated value
155
+ * @throws ValidationError if the value is null or undefined
156
+ */
157
+ function validateRequired(value, paramName) {
158
+ if (value === null || value === undefined) {
159
+ throw new ValidationError(`Required parameter '${paramName}' is null or undefined`);
160
+ }
161
+ return value;
162
+ }
163
+ /**
164
+ * Validates that a value is a string and optionally checks its properties
165
+ * @param value - The value to validate
166
+ * @param paramName - The parameter name for error messages
167
+ * @param options - Validation options for min/max length and pattern
168
+ * @returns The validated string
169
+ * @throws ValidationError if validation fails
170
+ */
171
+ function validateString(value, paramName, options = {}) {
172
+ if (typeof value !== 'string') {
173
+ throw new ValidationError(`Parameter '${paramName}' must be a string, got ${typeof value}`);
174
+ }
175
+ if (options.minLength !== undefined && value.length < options.minLength) {
176
+ throw new ValidationError(`Parameter '${paramName}' must be at least ${options.minLength} characters`);
177
+ }
178
+ if (options.maxLength !== undefined && value.length > options.maxLength) {
179
+ throw new ValidationError(`Parameter '${paramName}' exceeds maximum length of ${options.maxLength}`);
180
+ }
181
+ if (options.pattern && !options.pattern.test(value)) {
182
+ throw new ValidationError(`Parameter '${paramName}' does not match required pattern`);
183
+ }
184
+ return value;
185
+ }
186
+ /**
187
+ * Validates that a value is an array and optionally validates elements
188
+ * @param value - The value to validate
189
+ * @param paramName - The parameter name for error messages
190
+ * @param elementValidator - Optional function to validate each element
191
+ * @returns The validated array
192
+ * @throws ValidationError if validation fails
193
+ */
194
+ function validateArray(value, paramName, elementValidator) {
195
+ if (!Array.isArray(value)) {
196
+ throw new ValidationError(`Parameter '${paramName}' must be an array`);
197
+ }
198
+ if (elementValidator) {
199
+ value.forEach((item, index) => {
200
+ if (!elementValidator(item)) {
201
+ throw new ValidationError(`Element at index ${index} in '${paramName}' failed validation`);
202
+ }
203
+ });
204
+ }
205
+ return value;
206
+ }
207
+ /**
208
+ * Logging level enumeration
209
+ */
210
+ var LogLevel;
211
+ (function (LogLevel) {
212
+ LogLevel[LogLevel["QUIET"] = 0] = "QUIET";
213
+ LogLevel[LogLevel["NORMAL"] = 1] = "NORMAL";
214
+ LogLevel[LogLevel["VERBOSE"] = 2] = "VERBOSE";
215
+ })(LogLevel || (exports.LogLevel = LogLevel = {}));
216
+ let currentLogLevel = LogLevel.NORMAL;
217
+ /**
218
+ * Set the logging level for the plugin
219
+ * @param level - The logging level to use
220
+ */
221
+ function setLogLevel(level) {
222
+ currentLogLevel = level;
223
+ }
224
+ /**
225
+ * Logger utility for consistent logging across the plugin
226
+ */
227
+ exports.logger = {
228
+ error: (message) => {
229
+ console.error(`[docusaurus-plugin-llms] ERROR: ${message}`);
230
+ },
231
+ warn: (message) => {
232
+ if (currentLogLevel >= LogLevel.NORMAL) {
233
+ console.warn(`[docusaurus-plugin-llms] ${message}`);
234
+ }
235
+ },
236
+ info: (message) => {
237
+ if (currentLogLevel >= LogLevel.NORMAL) {
238
+ console.log(`[docusaurus-plugin-llms] ${message}`);
239
+ }
240
+ },
241
+ verbose: (message) => {
242
+ if (currentLogLevel >= LogLevel.VERBOSE) {
243
+ console.log(`[docusaurus-plugin-llms] ${message}`);
244
+ }
245
+ }
246
+ };
247
+ /**
248
+ * Constants for path length limits
249
+ */
250
+ const MAX_PATH_LENGTH_WINDOWS = 260;
251
+ const MAX_PATH_LENGTH_UNIX = 4096;
252
+ /**
253
+ * Normalizes a file path by converting all backslashes to forward slashes.
254
+ * This ensures consistent path handling across Windows and Unix systems.
255
+ *
256
+ * @param filePath - The file path to normalize
257
+ * @returns The normalized path with forward slashes
258
+ * @throws ValidationError if filePath is not a string
259
+ */
260
+ function normalizePath(filePath) {
261
+ validateString(filePath, 'filePath');
262
+ return filePath.replace(/\\/g, '/');
263
+ }
264
+ /**
265
+ * Validates that a file path does not exceed the platform-specific maximum length
266
+ * @param filePath - The file path to validate
267
+ * @returns True if the path is within limits, false otherwise
268
+ */
269
+ function validatePathLength(filePath) {
270
+ const maxLength = process.platform === 'win32'
271
+ ? MAX_PATH_LENGTH_WINDOWS
272
+ : MAX_PATH_LENGTH_UNIX;
273
+ if (filePath.length > maxLength) {
274
+ exports.logger.error(`Path exceeds maximum length (${maxLength}): ${filePath}`);
275
+ return false;
276
+ }
277
+ return true;
278
+ }
279
+ /**
280
+ * Shortens a file path by creating a hash-based filename if the path is too long
281
+ * @param fullPath - The full file path that may be too long
282
+ * @param outputDir - The output directory base path
283
+ * @param relativePath - The relative path from the output directory
284
+ * @returns A shortened path if necessary, or the original path if it's within limits
285
+ */
286
+ function shortenPathIfNeeded(fullPath, outputDir, relativePath) {
287
+ if (validatePathLength(fullPath)) {
288
+ return fullPath;
289
+ }
290
+ // Create a hash of the relative path to ensure uniqueness
291
+ const hash = crypto.createHash('md5').update(relativePath).digest('hex').substring(0, 8);
292
+ const shortenedPath = path.join(outputDir, `${hash}.md`);
293
+ exports.logger.warn(`Path too long, using shortened path: ${shortenedPath}`);
294
+ exports.logger.verbose(`Original path: ${fullPath}`);
295
+ return shortenedPath;
296
+ }
58
297
  /**
59
298
  * Write content to a file
60
299
  * @param filePath - Path to write the file to
@@ -66,50 +305,121 @@ async function writeFile(filePath, data) {
66
305
  /**
67
306
  * Read content from a file
68
307
  * @param filePath - Path of the file to read
69
- * @returns Content of the file
308
+ * @returns Content of the file with BOM removed if present
70
309
  */
71
310
  async function readFile(filePath) {
72
- return fs.readFile(filePath, 'utf8');
311
+ let content = await fs.readFile(filePath, 'utf8');
312
+ // Remove UTF-8 BOM if present
313
+ // UTF-8 BOM is the character U+FEFF at the start of the file
314
+ if (content.charCodeAt(0) === 0xFEFF) {
315
+ content = content.slice(1);
316
+ }
317
+ return content;
73
318
  }
74
319
  /**
75
320
  * Check if a file should be ignored based on glob patterns
321
+ * Matches against both site-relative and docs-relative paths
76
322
  * @param filePath - Path to the file
77
- * @param baseDir - Base directory for relative paths
323
+ * @param baseDir - Base directory (site root) for relative paths
78
324
  * @param ignorePatterns - Glob patterns for files to ignore
325
+ * @param docsDir - Docs directory name (e.g., 'docs')
79
326
  * @returns Whether the file should be ignored
80
327
  */
81
- function shouldIgnoreFile(filePath, baseDir, ignorePatterns) {
82
- if (ignorePatterns.length === 0) {
328
+ function shouldIgnoreFile(filePath, baseDir, ignorePatterns, docsDir = 'docs') {
329
+ if (!isNonEmptyArray(ignorePatterns)) {
83
330
  return false;
84
331
  }
85
- const relativePath = path.relative(baseDir, filePath);
86
- return ignorePatterns.some(pattern => (0, minimatch_1.minimatch)(relativePath, pattern, { matchBase: true }));
332
+ const minimatchOptions = { matchBase: true };
333
+ // Get site-relative path (e.g., "docs/quickstart/file.md")
334
+ const siteRelativePath = normalizePath(path.relative(baseDir, filePath));
335
+ // Get docs-relative path (e.g., "quickstart/file.md")
336
+ const docsBaseDir = path.resolve(path.join(baseDir, docsDir));
337
+ const resolvedFile = path.resolve(filePath);
338
+ const docsRelativePath = resolvedFile.startsWith(docsBaseDir)
339
+ ? normalizePath(path.relative(docsBaseDir, resolvedFile))
340
+ : null;
341
+ return ignorePatterns.some(pattern => {
342
+ // Try matching against site-relative path
343
+ if ((0, minimatch_1.minimatch)(siteRelativePath, pattern, minimatchOptions)) {
344
+ return true;
345
+ }
346
+ // Try matching against docs-relative path if available
347
+ if (docsRelativePath && (0, minimatch_1.minimatch)(docsRelativePath, pattern, minimatchOptions)) {
348
+ return true;
349
+ }
350
+ return false;
351
+ });
87
352
  }
88
353
  /**
89
354
  * Recursively reads all Markdown files in a directory
90
355
  * @param dir - Directory to scan
91
- * @param baseDir - Base directory for relative paths
356
+ * @param baseDir - Base directory (site root) for relative paths
92
357
  * @param ignorePatterns - Glob patterns for files to ignore
358
+ * @param docsDir - Docs directory name (e.g., 'docs')
359
+ * @param warnOnIgnoredFiles - Whether to warn about ignored files
360
+ * @param visitedPaths - Set of already visited real paths to detect symlink loops (internal use)
93
361
  * @returns Array of file paths
94
362
  */
95
- async function readMarkdownFiles(dir, baseDir, ignorePatterns = []) {
363
+ async function readMarkdownFiles(dir, baseDir, ignorePatterns = [], docsDir = 'docs', warnOnIgnoredFiles = false, visitedPaths = new Set()) {
364
+ // Get real path to detect symlink loops
365
+ let realPath;
366
+ try {
367
+ realPath = await fs.realpath(dir);
368
+ }
369
+ catch (error) {
370
+ exports.logger.warn(`Failed to resolve real path for ${dir}: ${getErrorMessage(error)}`);
371
+ return [];
372
+ }
373
+ // Check if we've already visited this path (symlink loop detection)
374
+ if (visitedPaths.has(realPath)) {
375
+ exports.logger.warn(`Skipping already visited path (possible symlink loop): ${dir}`);
376
+ return [];
377
+ }
378
+ // Add to visited paths
379
+ visitedPaths.add(realPath);
96
380
  const files = [];
97
381
  const entries = await fs.readdir(dir, { withFileTypes: true });
98
382
  for (const entry of entries) {
99
383
  const fullPath = path.join(dir, entry.name);
100
- if (shouldIgnoreFile(fullPath, baseDir, ignorePatterns)) {
384
+ if (shouldIgnoreFile(fullPath, baseDir, ignorePatterns, docsDir)) {
101
385
  continue;
102
386
  }
103
- if (entry.isDirectory()) {
104
- const subDirFiles = await readMarkdownFiles(fullPath, baseDir, ignorePatterns);
387
+ // Handle both regular directories and symlinked directories
388
+ let isDir = entry.isDirectory();
389
+ if (!isDir && entry.isSymbolicLink()) {
390
+ // Check if symlink points to a directory
391
+ try {
392
+ const stats = await fs.stat(fullPath);
393
+ isDir = stats.isDirectory();
394
+ }
395
+ catch (error) {
396
+ // Broken symlink, warn and skip it
397
+ exports.logger.warn(`Skipping broken symlink: ${fullPath}`);
398
+ continue;
399
+ }
400
+ }
401
+ if (isDir) {
402
+ const subDirFiles = await readMarkdownFiles(fullPath, baseDir, ignorePatterns, docsDir, warnOnIgnoredFiles, visitedPaths);
105
403
  files.push(...subDirFiles);
106
404
  }
405
+ else if (!entry.name.includes('.')) {
406
+ // File without extension
407
+ if (warnOnIgnoredFiles) {
408
+ exports.logger.warn(`Ignoring file without extension: ${fullPath}`);
409
+ }
410
+ }
107
411
  else if (entry.name.endsWith('.md') || entry.name.endsWith('.mdx')) {
108
412
  // Skip partial files (those starting with underscore)
109
413
  if (!entry.name.startsWith('_')) {
110
414
  files.push(fullPath);
111
415
  }
112
416
  }
417
+ else {
418
+ // Other extension
419
+ if (warnOnIgnoredFiles) {
420
+ exports.logger.warn(`Ignoring file with unsupported extension: ${fullPath}`);
421
+ }
422
+ }
113
423
  }
114
424
  return files;
115
425
  }
@@ -121,13 +431,13 @@ async function readMarkdownFiles(dir, baseDir, ignorePatterns = []) {
121
431
  * @returns Extracted title
122
432
  */
123
433
  function extractTitle(data, content, filePath) {
124
- // First try frontmatter
125
- if (data.title) {
434
+ // First try frontmatter (check for valid non-empty string)
435
+ if (isNonEmptyString(data.title)) {
126
436
  return data.title;
127
437
  }
128
438
  // Then try first heading
129
439
  const headingMatch = content.match(/^#\s+(.*)/m);
130
- if (headingMatch) {
440
+ if (isNonEmptyString(headingMatch?.[1])) {
131
441
  return headingMatch[1].trim();
132
442
  }
133
443
  // Finally use filename
@@ -135,21 +445,32 @@ function extractTitle(data, content, filePath) {
135
445
  .replace(/-/g, ' ')
136
446
  .replace(/\b\w/g, (c) => c.toUpperCase());
137
447
  }
448
+ /**
449
+ * Escape special regex characters in a string
450
+ * @param str - String to escape
451
+ * @returns Escaped string safe for use in regex
452
+ */
453
+ function escapeRegex(str) {
454
+ return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
455
+ }
138
456
  /**
139
457
  * Resolve and inline partial imports in markdown content
140
458
  * @param content - The markdown content with import statements
141
459
  * @param filePath - The path of the file containing the imports
460
+ * @param importChain - Set of file paths in the current import chain (for circular dependency detection)
142
461
  * @returns Content with partials resolved
143
462
  */
144
- async function resolvePartialImports(content, filePath) {
463
+ async function resolvePartialImports(content, filePath, importChain = new Set()) {
145
464
  let resolved = content;
146
465
  // Match import statements for partials and JSX usage
147
466
  // Pattern 1: import PartialName from './_partial.mdx'
148
467
  // Pattern 2: import { PartialName } from './_partial.mdx'
149
- const importRegex = /^\s*import\s+(?:(\w+)|{\s*(\w+)\s*})\s+from\s+['"]([^'"]+_[^'"]+\.mdx?)['"];?\s*$/gm;
468
+ // Create a fresh regex for each invocation to avoid lastIndex state leakage
469
+ const createImportRegex = () => /^\s*import\s+(?:(\w+)|{\s*(\w+)\s*})\s+from\s+['"]([^'"]+_[^'"]+\.mdx?)['"];?\s*$/gm;
150
470
  const imports = new Map();
151
471
  // First pass: collect all imports
152
472
  let match;
473
+ const importRegex = createImportRegex();
153
474
  while ((match = importRegex.exec(content)) !== null) {
154
475
  const componentName = match[1] || match[2];
155
476
  const importPath = match[3];
@@ -164,20 +485,52 @@ async function resolvePartialImports(content, filePath) {
164
485
  // Resolve the partial file path relative to the current file
165
486
  const dir = path.dirname(filePath);
166
487
  const partialPath = path.resolve(dir, importPath);
488
+ // Check for circular import
489
+ if (importChain.has(partialPath)) {
490
+ const chain = Array.from(importChain).join(' -> ');
491
+ exports.logger.error(`Circular import detected: ${chain} -> ${partialPath}`);
492
+ // Escape special regex characters in component name and import path
493
+ const escapedComponentName = escapeRegex(componentName);
494
+ const escapedImportPath = escapeRegex(importPath);
495
+ // Remove the import statement to prevent infinite recursion
496
+ resolved = resolved.replace(new RegExp(`^\\s*import\\s+(?:${escapedComponentName}|{\\s*${escapedComponentName}\\s*})\\s+from\\s+['"]${escapedImportPath}['"];?\\s*$`, 'gm'), '');
497
+ // Remove JSX usage of this component
498
+ const jsxRegex = new RegExp(`<${escapedComponentName}(?:\\s+[^>]*)?\\s*\\/?>(?:[\\s\\S]*?<\\/${escapedComponentName}>)?`, 'gm');
499
+ resolved = resolved.replace(jsxRegex, '');
500
+ continue;
501
+ }
502
+ // Add to chain before recursive call
503
+ const newChain = new Set(importChain);
504
+ newChain.add(partialPath);
167
505
  // Read the partial file
168
- const partialContent = await readFile(partialPath);
506
+ let partialContent = await readFile(partialPath);
169
507
  const { content: partialMarkdown } = (0, gray_matter_1.default)(partialContent);
508
+ // Recursively resolve imports in the partial with the updated chain
509
+ const resolvedPartial = await resolvePartialImports(partialMarkdown, partialPath, newChain);
510
+ // Escape special regex characters in component name and import path
511
+ const escapedComponentName = escapeRegex(componentName);
512
+ const escapedImportPath = escapeRegex(importPath);
170
513
  // Remove the import statement
171
- resolved = resolved.replace(new RegExp(`^\\s*import\\s+(?:${componentName}|{\\s*${componentName}\\s*})\\s+from\\s+['"]${importPath.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}['"];?\\s*$`, 'gm'), '');
514
+ resolved = resolved.replace(new RegExp(`^\\s*import\\s+(?:${escapedComponentName}|{\\s*${escapedComponentName}\\s*})\\s+from\\s+['"]${escapedImportPath}['"];?\\s*$`, 'gm'), '');
172
515
  // Replace JSX usage with the partial content
173
516
  // Handle both self-closing tags and tags with content
174
517
  // <PartialName /> or <PartialName></PartialName> or <PartialName>...</PartialName>
175
- const jsxRegex = new RegExp(`<${componentName}\\s*(?:[^>]*?)(?:/>|>[^<]*</${componentName}>)`, 'g');
176
- resolved = resolved.replace(jsxRegex, partialMarkdown.trim());
518
+ const jsxRegex = new RegExp(`<${escapedComponentName}\\s*(?:[^>]*?)(?:/>|>[^<]*</${escapedComponentName}>)`, 'g');
519
+ resolved = resolved.replace(jsxRegex, resolvedPartial.trim());
177
520
  }
178
521
  catch (error) {
179
- console.warn(`Failed to resolve partial import "${importPath}" in ${filePath}: ${error}`);
180
- // Leave the import and usage as-is if we can't resolve it
522
+ exports.logger.warn(`Failed to resolve partial import from ${importPath}: ${getErrorMessage(error)}`);
523
+ // Remove both the import statement AND the JSX usage even if partial can't be resolved
524
+ // This prevents leaving broken references in the output
525
+ // Escape special regex characters in component name and import path
526
+ const escapedComponentName = escapeRegex(componentName);
527
+ const escapedImportPath = escapeRegex(importPath);
528
+ // Remove the import statement
529
+ resolved = resolved.replace(new RegExp(`^\\s*import\\s+(?:${escapedComponentName}|{\\s*${escapedComponentName}\\s*})\\s+from\\s+['"]${escapedImportPath}['"];?\\s*$`, 'gm'), '');
530
+ // Remove JSX usage of this component
531
+ // Handle both self-closing tags (<Component />) and regular tags with content (<Component>...</Component>)
532
+ const jsxRegex = new RegExp(`<${escapedComponentName}(?:\\s+[^>]*)?\\s*\\/?>(?:[\\s\\S]*?<\\/${escapedComponentName}>)?`, 'gm');
533
+ resolved = resolved.replace(jsxRegex, '');
181
534
  }
182
535
  }
183
536
  return resolved;
@@ -259,16 +612,18 @@ function cleanMarkdownContent(content, excludeImports = false, removeDuplicateHe
259
612
  * @returns Transformed URL path
260
613
  */
261
614
  function applyPathTransformations(urlPath, pathTransformation) {
262
- if (!pathTransformation) {
615
+ if (!isDefined(pathTransformation)) {
263
616
  return urlPath;
264
617
  }
265
618
  let transformedPath = urlPath;
266
619
  // Remove ignored path segments
267
- if (pathTransformation.ignorePaths?.length) {
620
+ if (isNonEmptyArray(pathTransformation.ignorePaths)) {
268
621
  for (const ignorePath of pathTransformation.ignorePaths) {
269
622
  // Create a regex that matches the ignore path at the beginning, middle, or end of the path
270
623
  // We use word boundaries to ensure we match complete path segments
271
- const ignoreRegex = new RegExp(`(^|/)(${ignorePath})(/|$)`, 'g');
624
+ // Escape special regex characters in ignorePath to prevent regex injection
625
+ const escapedIgnorePath = ignorePath.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
626
+ const ignoreRegex = new RegExp(`(^|/)(${escapedIgnorePath})(/|$)`, 'g');
272
627
  transformedPath = transformedPath.replace(ignoreRegex, '$1$3');
273
628
  }
274
629
  // Clean up any double slashes that might have been created
@@ -277,7 +632,7 @@ function applyPathTransformations(urlPath, pathTransformation) {
277
632
  transformedPath = transformedPath.replace(/^\//, '');
278
633
  }
279
634
  // Add path segments if they're not already present
280
- if (pathTransformation.addPaths?.length) {
635
+ if (isNonEmptyArray(pathTransformation.addPaths)) {
281
636
  // Process in reverse order to maintain the specified order in the final path
282
637
  // This is because each path is prepended to the front
283
638
  const pathsToAdd = [...pathTransformation.addPaths].reverse();
@@ -295,13 +650,31 @@ function applyPathTransformations(urlPath, pathTransformation) {
295
650
  * @param input - Input string (typically a title)
296
651
  * @param fallback - Fallback string if input becomes empty after sanitization
297
652
  * @returns Sanitized filename (without extension)
653
+ * @throws ValidationError if input or fallback are not strings
298
654
  */
299
- function sanitizeForFilename(input, fallback = 'untitled') {
300
- if (!input)
655
+ function sanitizeForFilename(input, fallback = 'untitled', options = {}) {
656
+ // Validate input parameters
657
+ validateString(input, 'input');
658
+ validateString(fallback, 'fallback', { minLength: 1 });
659
+ if (!isNonEmptyString(input))
301
660
  return fallback;
302
- const sanitized = input
303
- .toLowerCase()
304
- .replace(/[^a-z0-9]+/g, '-')
661
+ const { preserveUnicode = true, preserveCase = false } = options;
662
+ let sanitized = preserveCase ? input : input.toLowerCase();
663
+ if (preserveUnicode) {
664
+ // Only remove filesystem-unsafe characters: / \ : * ? " < > |
665
+ // Keep underscores, dots (except at start), hyphens, and unicode
666
+ // Also replace spaces with dashes for better filesystem compatibility
667
+ sanitized = sanitized.replace(/[/\\:*?"<>|\s]+/g, '-');
668
+ }
669
+ else {
670
+ // Allow alphanumeric, underscores, hyphens, dots
671
+ sanitized = sanitized.replace(/[^a-z0-9_.-]+/g, '-');
672
+ }
673
+ // Remove leading dots (hidden files on Unix)
674
+ sanitized = sanitized.replace(/^\.+/, '');
675
+ // Clean up multiple dashes and trim
676
+ sanitized = sanitized
677
+ .replace(/-+/g, '-')
305
678
  .replace(/^-+|-+$/g, '');
306
679
  return sanitized || fallback;
307
680
  }
@@ -311,13 +684,31 @@ function sanitizeForFilename(input, fallback = 'untitled') {
311
684
  * @param usedIdentifiers - Set of already used identifiers
312
685
  * @param suffix - Suffix pattern (default: number in parentheses)
313
686
  * @returns Unique identifier
687
+ * @throws ValidationError if baseIdentifier is not a string or usedIdentifiers is not a Set
314
688
  */
315
689
  function ensureUniqueIdentifier(baseIdentifier, usedIdentifiers, suffix = (counter) => `(${counter})`) {
690
+ // Validate input parameters
691
+ validateString(baseIdentifier, 'baseIdentifier', { minLength: 1 });
692
+ validateRequired(usedIdentifiers, 'usedIdentifiers');
693
+ if (!(usedIdentifiers instanceof Set)) {
694
+ throw new ValidationError(`Parameter 'usedIdentifiers' must be a Set`);
695
+ }
696
+ const MAX_ITERATIONS = 10000;
316
697
  let uniqueIdentifier = baseIdentifier;
317
698
  let counter = 1;
699
+ let iterations = 0;
318
700
  while (usedIdentifiers.has(uniqueIdentifier.toLowerCase())) {
319
701
  counter++;
320
702
  uniqueIdentifier = `${baseIdentifier}${suffix(counter, baseIdentifier)}`;
703
+ iterations++;
704
+ if (iterations >= MAX_ITERATIONS) {
705
+ // Fallback to timestamp-based unique identifier
706
+ const timestamp = Date.now().toString(36);
707
+ const random = Math.random().toString(36).substring(2, 8);
708
+ uniqueIdentifier = `${baseIdentifier}-${timestamp}-${random}`;
709
+ exports.logger.warn(`Maximum iterations reached for unique identifier. Using fallback: ${uniqueIdentifier}`);
710
+ break;
711
+ }
321
712
  }
322
713
  usedIdentifiers.add(uniqueIdentifier.toLowerCase());
323
714
  return uniqueIdentifier;
@@ -334,9 +725,13 @@ function ensureUniqueIdentifier(baseIdentifier, usedIdentifiers, suffix = (count
334
725
  function createMarkdownContent(title, description = '', content = '', includeMetadata = true, frontMatter) {
335
726
  let result = '';
336
727
  // Add frontmatter if provided
337
- if (frontMatter && Object.keys(frontMatter).length > 0) {
728
+ if (isDefined(frontMatter) && Object.keys(frontMatter).length > 0) {
338
729
  result += '---\n';
339
- result += YAML.stringify(frontMatter);
730
+ result += YAML.stringify(frontMatter, {
731
+ lineWidth: 0,
732
+ defaultStringType: 'QUOTE_DOUBLE',
733
+ defaultKeyType: 'PLAIN'
734
+ });
340
735
  result += '---\n\n';
341
736
  }
342
737
  const descriptionLine = includeMetadata && description ? `\n\n> ${description}\n` : '\n';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "docusaurus-plugin-llms",
3
- "version": "0.2.2",
3
+ "version": "0.3.0",
4
4
  "description": "Docusaurus plugin for generating LLM-friendly documentation following the llmstxt.org standard",
5
5
  "main": "lib/index.js",
6
6
  "types": "lib/index.d.ts",
@@ -9,7 +9,7 @@
9
9
  "watch": "tsc --watch",
10
10
  "cleanup": "node cleanup.js",
11
11
  "prepublishOnly": "npm run build && npm run cleanup",
12
- "test:unit": "node tests/test-path-transforms.js && node tests/test-header-deduplication.js && node tests/test-import-removal.js && node tests/test-partials.js && node tests/test-root-content.js",
12
+ "test:unit": "node tests/test-plugin-options-validation.js && node tests/test-plugin-validation-integration.js && node tests/test-regex-escaping.js && node tests/test-baseurl-handling.js && node tests/test-path-transforms.js && node tests/test-header-deduplication.js && node tests/test-import-removal.js && node tests/test-partials.js && node tests/test-missing-partials.js && node tests/test-circular-imports.js && node tests/test-root-content.js && node tests/test-filenames.js && node tests/test-url-encoding.js && node tests/test-nested-paths.js && node tests/test-filename-sanitization.js && node tests/test-yaml-encoding.js && node tests/test-url-error-handling.js && node tests/test-regex-lastindex.js && node tests/test-whitespace-paths.js && node tests/test-unique-identifier-iteration-limit.js && node tests/test-error-handling.js && node tests/test-file-io-error-handling.js && node tests/test-parallel-processing.js && node tests/test-path-length-validation.js && node tests/test-bom-handling.js && node tests/test-batch-processing.js && node tests/test-input-validation.js",
13
13
  "test:integration": "node tests/test-path-transformation.js",
14
14
  "test": "npm run build && npm run test:unit && npm run test:integration"
15
15
  },