docusaurus-plugin-llms 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/utils.ts CHANGED
@@ -4,10 +4,277 @@
4
4
 
5
5
  import * as fs from 'fs/promises';
6
6
  import * as path from 'path';
7
+ import * as crypto from 'crypto';
7
8
  import { minimatch } from 'minimatch';
8
9
  import matter from 'gray-matter';
10
+ import * as YAML from 'yaml';
9
11
  import { PluginOptions } from './types';
10
12
 
13
+ /**
14
+ * Null/Undefined Handling Guidelines:
15
+ *
16
+ * 1. For required parameters: Throw early if null/undefined
17
+ * 2. For optional parameters: Use optional chaining `value?.property`
18
+ * 3. For explicit null checks: Use `!== null` and `!== undefined` or the isDefined type guard
19
+ * 4. For string validation: Use isNonEmptyString() type guard
20
+ * 5. For truthy checks on booleans: Use explicit comparison or Boolean(value)
21
+ *
22
+ * Avoid: `if (value)` when value could be 0, '', or false legitimately
23
+ * Use: Type guards for consistent, type-safe checks
24
+ */
25
+
26
+ /**
27
+ * Type guard to check if a value is defined (not null or undefined)
28
+ * @param value - Value to check
29
+ * @returns True if value is not null or undefined
30
+ */
31
+ export function isDefined<T>(value: T | null | undefined): value is T {
32
+ return value !== null && value !== undefined;
33
+ }
34
+
35
+ /**
36
+ * Type guard to check if a value is a non-empty string
37
+ * @param value - Value to check
38
+ * @returns True if value is a string with at least one non-whitespace character
39
+ */
40
+ export function isNonEmptyString(value: unknown): value is string {
41
+ return typeof value === 'string' && value.trim().length > 0;
42
+ }
43
+
44
+ /**
45
+ * Type guard to check if a value is a non-empty array
46
+ * @param value - Value to check
47
+ * @returns True if value is an array with at least one element
48
+ */
49
+ export function isNonEmptyArray<T>(value: unknown): value is T[] {
50
+ return Array.isArray(value) && value.length > 0;
51
+ }
52
+
53
+ /**
54
+ * Safely extract an error message from an unknown error value
55
+ * @param error - The error value (can be Error, string, or any other type)
56
+ * @returns A string representation of the error
57
+ */
58
+ export function getErrorMessage(error: unknown): string {
59
+ if (error instanceof Error) {
60
+ return error.message;
61
+ }
62
+ if (typeof error === 'string') {
63
+ return error;
64
+ }
65
+ try {
66
+ const stringified = JSON.stringify(error);
67
+ // JSON.stringify returns undefined for undefined values, handle that case
68
+ return stringified !== undefined ? stringified : 'Unknown error';
69
+ } catch {
70
+ return 'Unknown error';
71
+ }
72
+ }
73
+
74
+ /**
75
+ * Extract stack trace from unknown error types
76
+ * @param error - The error value (can be Error or any other type)
77
+ * @returns Stack trace if available, undefined otherwise
78
+ */
79
+ export function getErrorStack(error: unknown): string | undefined {
80
+ if (error instanceof Error) {
81
+ return error.stack;
82
+ }
83
+ return undefined;
84
+ }
85
+
86
+ /**
87
+ * Custom error class for validation errors
88
+ */
89
+ export class ValidationError extends Error {
90
+ constructor(message: string) {
91
+ super(message);
92
+ this.name = 'ValidationError';
93
+ }
94
+ }
95
+
96
+ /**
97
+ * Validates that a value is not null or undefined
98
+ * @param value - The value to validate
99
+ * @param paramName - The parameter name for error messages
100
+ * @returns The validated value
101
+ * @throws ValidationError if the value is null or undefined
102
+ */
103
+ export function validateRequired<T>(
104
+ value: T | null | undefined,
105
+ paramName: string
106
+ ): T {
107
+ if (value === null || value === undefined) {
108
+ throw new ValidationError(`Required parameter '${paramName}' is null or undefined`);
109
+ }
110
+ return value;
111
+ }
112
+
113
+ /**
114
+ * Validates that a value is a string and optionally checks its properties
115
+ * @param value - The value to validate
116
+ * @param paramName - The parameter name for error messages
117
+ * @param options - Validation options for min/max length and pattern
118
+ * @returns The validated string
119
+ * @throws ValidationError if validation fails
120
+ */
121
+ export function validateString(
122
+ value: unknown,
123
+ paramName: string,
124
+ options: { minLength?: number; maxLength?: number; pattern?: RegExp } = {}
125
+ ): string {
126
+ if (typeof value !== 'string') {
127
+ throw new ValidationError(`Parameter '${paramName}' must be a string, got ${typeof value}`);
128
+ }
129
+
130
+ if (options.minLength !== undefined && value.length < options.minLength) {
131
+ throw new ValidationError(`Parameter '${paramName}' must be at least ${options.minLength} characters`);
132
+ }
133
+
134
+ if (options.maxLength !== undefined && value.length > options.maxLength) {
135
+ throw new ValidationError(`Parameter '${paramName}' exceeds maximum length of ${options.maxLength}`);
136
+ }
137
+
138
+ if (options.pattern && !options.pattern.test(value)) {
139
+ throw new ValidationError(`Parameter '${paramName}' does not match required pattern`);
140
+ }
141
+
142
+ return value;
143
+ }
144
+
145
+ /**
146
+ * Validates that a value is an array and optionally validates elements
147
+ * @param value - The value to validate
148
+ * @param paramName - The parameter name for error messages
149
+ * @param elementValidator - Optional function to validate each element
150
+ * @returns The validated array
151
+ * @throws ValidationError if validation fails
152
+ */
153
+ export function validateArray<T>(
154
+ value: unknown,
155
+ paramName: string,
156
+ elementValidator?: (item: unknown) => boolean
157
+ ): T[] {
158
+ if (!Array.isArray(value)) {
159
+ throw new ValidationError(`Parameter '${paramName}' must be an array`);
160
+ }
161
+
162
+ if (elementValidator) {
163
+ value.forEach((item, index) => {
164
+ if (!elementValidator(item)) {
165
+ throw new ValidationError(`Element at index ${index} in '${paramName}' failed validation`);
166
+ }
167
+ });
168
+ }
169
+
170
+ return value as T[];
171
+ }
172
+
173
+ /**
174
+ * Logging level enumeration
175
+ */
176
+ export enum LogLevel {
177
+ QUIET = 0,
178
+ NORMAL = 1,
179
+ VERBOSE = 2
180
+ }
181
+
182
+ let currentLogLevel = LogLevel.NORMAL;
183
+
184
+ /**
185
+ * Set the logging level for the plugin
186
+ * @param level - The logging level to use
187
+ */
188
+ export function setLogLevel(level: LogLevel): void {
189
+ currentLogLevel = level;
190
+ }
191
+
192
+ /**
193
+ * Logger utility for consistent logging across the plugin
194
+ */
195
+ export const logger = {
196
+ error: (message: string) => {
197
+ console.error(`[docusaurus-plugin-llms] ERROR: ${message}`);
198
+ },
199
+ warn: (message: string) => {
200
+ if (currentLogLevel >= LogLevel.NORMAL) {
201
+ console.warn(`[docusaurus-plugin-llms] ${message}`);
202
+ }
203
+ },
204
+ info: (message: string) => {
205
+ if (currentLogLevel >= LogLevel.NORMAL) {
206
+ console.log(`[docusaurus-plugin-llms] ${message}`);
207
+ }
208
+ },
209
+ verbose: (message: string) => {
210
+ if (currentLogLevel >= LogLevel.VERBOSE) {
211
+ console.log(`[docusaurus-plugin-llms] ${message}`);
212
+ }
213
+ }
214
+ };
215
+
216
+ /**
217
+ * Constants for path length limits
218
+ */
219
+ const MAX_PATH_LENGTH_WINDOWS = 260;
220
+ const MAX_PATH_LENGTH_UNIX = 4096;
221
+
222
+ /**
223
+ * Normalizes a file path by converting all backslashes to forward slashes.
224
+ * This ensures consistent path handling across Windows and Unix systems.
225
+ *
226
+ * @param filePath - The file path to normalize
227
+ * @returns The normalized path with forward slashes
228
+ * @throws ValidationError if filePath is not a string
229
+ */
230
+ export function normalizePath(filePath: string): string {
231
+ validateString(filePath, 'filePath');
232
+ return filePath.replace(/\\/g, '/');
233
+ }
234
+
235
+ /**
236
+ * Validates that a file path does not exceed the platform-specific maximum length
237
+ * @param filePath - The file path to validate
238
+ * @returns True if the path is within limits, false otherwise
239
+ */
240
+ export function validatePathLength(filePath: string): boolean {
241
+ const maxLength = process.platform === 'win32'
242
+ ? MAX_PATH_LENGTH_WINDOWS
243
+ : MAX_PATH_LENGTH_UNIX;
244
+
245
+ if (filePath.length > maxLength) {
246
+ logger.error(`Path exceeds maximum length (${maxLength}): ${filePath}`);
247
+ return false;
248
+ }
249
+ return true;
250
+ }
251
+
252
+ /**
253
+ * Shortens a file path by creating a hash-based filename if the path is too long
254
+ * @param fullPath - The full file path that may be too long
255
+ * @param outputDir - The output directory base path
256
+ * @param relativePath - The relative path from the output directory
257
+ * @returns A shortened path if necessary, or the original path if it's within limits
258
+ */
259
+ export function shortenPathIfNeeded(
260
+ fullPath: string,
261
+ outputDir: string,
262
+ relativePath: string
263
+ ): string {
264
+ if (validatePathLength(fullPath)) {
265
+ return fullPath;
266
+ }
267
+
268
+ // Create a hash of the relative path to ensure uniqueness
269
+ const hash = crypto.createHash('md5').update(relativePath).digest('hex').substring(0, 8);
270
+ const shortenedPath = path.join(outputDir, `${hash}.md`);
271
+
272
+ logger.warn(`Path too long, using shortened path: ${shortenedPath}`);
273
+ logger.verbose(`Original path: ${fullPath}`);
274
+
275
+ return shortenedPath;
276
+ }
277
+
11
278
  /**
12
279
  * Write content to a file
13
280
  * @param filePath - Path to write the file to
@@ -20,63 +287,146 @@ export async function writeFile(filePath: string, data: string): Promise<void> {
20
287
  /**
21
288
  * Read content from a file
22
289
  * @param filePath - Path of the file to read
23
- * @returns Content of the file
290
+ * @returns Content of the file with BOM removed if present
24
291
  */
25
292
  export async function readFile(filePath: string): Promise<string> {
26
- return fs.readFile(filePath, 'utf8');
293
+ let content = await fs.readFile(filePath, 'utf8');
294
+
295
+ // Remove UTF-8 BOM if present
296
+ // UTF-8 BOM is the character U+FEFF at the start of the file
297
+ if (content.charCodeAt(0) === 0xFEFF) {
298
+ content = content.slice(1);
299
+ }
300
+
301
+ return content;
27
302
  }
28
303
 
29
304
  /**
30
305
  * Check if a file should be ignored based on glob patterns
306
+ * Matches against both site-relative and docs-relative paths
31
307
  * @param filePath - Path to the file
32
- * @param baseDir - Base directory for relative paths
308
+ * @param baseDir - Base directory (site root) for relative paths
33
309
  * @param ignorePatterns - Glob patterns for files to ignore
310
+ * @param docsDir - Docs directory name (e.g., 'docs')
34
311
  * @returns Whether the file should be ignored
35
312
  */
36
- export function shouldIgnoreFile(filePath: string, baseDir: string, ignorePatterns: string[]): boolean {
37
- if (ignorePatterns.length === 0) {
313
+ export function shouldIgnoreFile(filePath: string, baseDir: string, ignorePatterns: string[], docsDir: string = 'docs'): boolean {
314
+ if (!isNonEmptyArray(ignorePatterns)) {
38
315
  return false;
39
316
  }
40
-
41
- const relativePath = path.relative(baseDir, filePath);
42
-
43
- return ignorePatterns.some(pattern =>
44
- minimatch(relativePath, pattern, { matchBase: true })
45
- );
317
+
318
+ const minimatchOptions = { matchBase: true };
319
+
320
+ // Get site-relative path (e.g., "docs/quickstart/file.md")
321
+ const siteRelativePath = normalizePath(path.relative(baseDir, filePath));
322
+
323
+ // Get docs-relative path (e.g., "quickstart/file.md")
324
+ const docsBaseDir = path.resolve(path.join(baseDir, docsDir));
325
+ const resolvedFile = path.resolve(filePath);
326
+ const docsRelativePath = resolvedFile.startsWith(docsBaseDir)
327
+ ? normalizePath(path.relative(docsBaseDir, resolvedFile))
328
+ : null;
329
+
330
+ return ignorePatterns.some(pattern => {
331
+ // Try matching against site-relative path
332
+ if (minimatch(siteRelativePath, pattern, minimatchOptions)) {
333
+ return true;
334
+ }
335
+
336
+ // Try matching against docs-relative path if available
337
+ if (docsRelativePath && minimatch(docsRelativePath, pattern, minimatchOptions)) {
338
+ return true;
339
+ }
340
+
341
+ return false;
342
+ });
46
343
  }
47
344
 
48
345
  /**
49
346
  * Recursively reads all Markdown files in a directory
50
347
  * @param dir - Directory to scan
51
- * @param baseDir - Base directory for relative paths
348
+ * @param baseDir - Base directory (site root) for relative paths
52
349
  * @param ignorePatterns - Glob patterns for files to ignore
350
+ * @param docsDir - Docs directory name (e.g., 'docs')
351
+ * @param warnOnIgnoredFiles - Whether to warn about ignored files
352
+ * @param visitedPaths - Set of already visited real paths to detect symlink loops (internal use)
53
353
  * @returns Array of file paths
54
354
  */
55
- export async function readMarkdownFiles(dir: string, baseDir: string, ignorePatterns: string[] = []): Promise<string[]> {
355
+ export async function readMarkdownFiles(
356
+ dir: string,
357
+ baseDir: string,
358
+ ignorePatterns: string[] = [],
359
+ docsDir: string = 'docs',
360
+ warnOnIgnoredFiles: boolean = false,
361
+ visitedPaths: Set<string> = new Set()
362
+ ): Promise<string[]> {
363
+ // Get real path to detect symlink loops
364
+ let realPath: string;
365
+ try {
366
+ realPath = await fs.realpath(dir);
367
+ } catch (error: unknown) {
368
+ logger.warn(`Failed to resolve real path for ${dir}: ${getErrorMessage(error)}`);
369
+ return [];
370
+ }
371
+
372
+ // Check if we've already visited this path (symlink loop detection)
373
+ if (visitedPaths.has(realPath)) {
374
+ logger.warn(`Skipping already visited path (possible symlink loop): ${dir}`);
375
+ return [];
376
+ }
377
+
378
+ // Add to visited paths
379
+ visitedPaths.add(realPath);
380
+
56
381
  const files: string[] = [];
57
382
  const entries = await fs.readdir(dir, { withFileTypes: true });
58
383
 
59
384
  for (const entry of entries) {
60
385
  const fullPath = path.join(dir, entry.name);
61
-
62
- if (shouldIgnoreFile(fullPath, baseDir, ignorePatterns)) {
386
+
387
+ if (shouldIgnoreFile(fullPath, baseDir, ignorePatterns, docsDir)) {
63
388
  continue;
64
389
  }
65
-
66
- if (entry.isDirectory()) {
67
- const subDirFiles = await readMarkdownFiles(fullPath, baseDir, ignorePatterns);
390
+
391
+ // Handle both regular directories and symlinked directories
392
+ let isDir = entry.isDirectory();
393
+ if (!isDir && entry.isSymbolicLink()) {
394
+ // Check if symlink points to a directory
395
+ try {
396
+ const stats = await fs.stat(fullPath);
397
+ isDir = stats.isDirectory();
398
+ } catch (error: unknown) {
399
+ // Broken symlink, warn and skip it
400
+ logger.warn(`Skipping broken symlink: ${fullPath}`);
401
+ continue;
402
+ }
403
+ }
404
+
405
+ if (isDir) {
406
+ const subDirFiles = await readMarkdownFiles(fullPath, baseDir, ignorePatterns, docsDir, warnOnIgnoredFiles, visitedPaths);
68
407
  files.push(...subDirFiles);
408
+ } else if (!entry.name.includes('.')) {
409
+ // File without extension
410
+ if (warnOnIgnoredFiles) {
411
+ logger.warn(`Ignoring file without extension: ${fullPath}`);
412
+ }
69
413
  } else if (entry.name.endsWith('.md') || entry.name.endsWith('.mdx')) {
70
414
  // Skip partial files (those starting with underscore)
71
415
  if (!entry.name.startsWith('_')) {
72
416
  files.push(fullPath);
73
417
  }
418
+ } else {
419
+ // Other extension
420
+ if (warnOnIgnoredFiles) {
421
+ logger.warn(`Ignoring file with unsupported extension: ${fullPath}`);
422
+ }
74
423
  }
75
424
  }
76
425
 
77
426
  return files;
78
427
  }
79
428
 
429
+
80
430
  /**
81
431
  * Extract title from content or use the filename
82
432
  * @param data - Frontmatter data
@@ -85,79 +435,145 @@ export async function readMarkdownFiles(dir: string, baseDir: string, ignorePatt
85
435
  * @returns Extracted title
86
436
  */
87
437
  export function extractTitle(data: any, content: string, filePath: string): string {
88
- // First try frontmatter
89
- if (data.title) {
438
+ // First try frontmatter (check for valid non-empty string)
439
+ if (isNonEmptyString(data.title)) {
90
440
  return data.title;
91
441
  }
92
-
442
+
93
443
  // Then try first heading
94
444
  const headingMatch = content.match(/^#\s+(.*)/m);
95
- if (headingMatch) {
445
+ if (isNonEmptyString(headingMatch?.[1])) {
96
446
  return headingMatch[1].trim();
97
447
  }
98
-
448
+
99
449
  // Finally use filename
100
450
  return path.basename(filePath, path.extname(filePath))
101
451
  .replace(/-/g, ' ')
102
- .replace(/\b\w/g, c => c.toUpperCase());
452
+ .replace(/\b\w/g, (c: string) => c.toUpperCase());
453
+ }
454
+
455
+ /**
456
+ * Escape special regex characters in a string
457
+ * @param str - String to escape
458
+ * @returns Escaped string safe for use in regex
459
+ */
460
+ function escapeRegex(str: string): string {
461
+ return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
103
462
  }
104
463
 
105
464
  /**
106
465
  * Resolve and inline partial imports in markdown content
107
466
  * @param content - The markdown content with import statements
108
467
  * @param filePath - The path of the file containing the imports
468
+ * @param importChain - Set of file paths in the current import chain (for circular dependency detection)
109
469
  * @returns Content with partials resolved
110
470
  */
111
- export async function resolvePartialImports(content: string, filePath: string): Promise<string> {
471
+ export async function resolvePartialImports(
472
+ content: string,
473
+ filePath: string,
474
+ importChain: Set<string> = new Set()
475
+ ): Promise<string> {
112
476
  let resolved = content;
113
-
477
+
114
478
  // Match import statements for partials and JSX usage
115
479
  // Pattern 1: import PartialName from './_partial.mdx'
116
480
  // Pattern 2: import { PartialName } from './_partial.mdx'
117
- const importRegex = /^\s*import\s+(?:(\w+)|{\s*(\w+)\s*})\s+from\s+['"]([^'"]+_[^'"]+\.mdx?)['"];?\s*$/gm;
481
+ // Create a fresh regex for each invocation to avoid lastIndex state leakage
482
+ const createImportRegex = () => /^\s*import\s+(?:(\w+)|{\s*(\w+)\s*})\s+from\s+['"]([^'"]+_[^'"]+\.mdx?)['"];?\s*$/gm;
118
483
  const imports = new Map<string, string>();
119
-
484
+
120
485
  // First pass: collect all imports
121
486
  let match;
487
+ const importRegex = createImportRegex();
122
488
  while ((match = importRegex.exec(content)) !== null) {
123
489
  const componentName = match[1] || match[2];
124
490
  const importPath = match[3];
125
-
491
+
126
492
  // Only process imports for partial files (containing underscore)
127
493
  if (importPath.includes('_')) {
128
494
  imports.set(componentName, importPath);
129
495
  }
130
496
  }
131
-
497
+
132
498
  // Resolve each partial import
133
499
  for (const [componentName, importPath] of imports) {
134
500
  try {
135
501
  // Resolve the partial file path relative to the current file
136
502
  const dir = path.dirname(filePath);
137
503
  const partialPath = path.resolve(dir, importPath);
138
-
504
+
505
+ // Check for circular import
506
+ if (importChain.has(partialPath)) {
507
+ const chain = Array.from(importChain).join(' -> ');
508
+ logger.error(`Circular import detected: ${chain} -> ${partialPath}`);
509
+
510
+ // Escape special regex characters in component name and import path
511
+ const escapedComponentName = escapeRegex(componentName);
512
+ const escapedImportPath = escapeRegex(importPath);
513
+
514
+ // Remove the import statement to prevent infinite recursion
515
+ resolved = resolved.replace(
516
+ new RegExp(`^\\s*import\\s+(?:${escapedComponentName}|{\\s*${escapedComponentName}\\s*})\\s+from\\s+['"]${escapedImportPath}['"];?\\s*$`, 'gm'),
517
+ ''
518
+ );
519
+
520
+ // Remove JSX usage of this component
521
+ const jsxRegex = new RegExp(`<${escapedComponentName}(?:\\s+[^>]*)?\\s*\\/?>(?:[\\s\\S]*?<\\/${escapedComponentName}>)?`, 'gm');
522
+ resolved = resolved.replace(jsxRegex, '');
523
+
524
+ continue;
525
+ }
526
+
527
+ // Add to chain before recursive call
528
+ const newChain = new Set(importChain);
529
+ newChain.add(partialPath);
530
+
139
531
  // Read the partial file
140
- const partialContent = await readFile(partialPath);
532
+ let partialContent = await readFile(partialPath);
141
533
  const { content: partialMarkdown } = matter(partialContent);
142
-
534
+
535
+ // Recursively resolve imports in the partial with the updated chain
536
+ const resolvedPartial = await resolvePartialImports(partialMarkdown, partialPath, newChain);
537
+
538
+ // Escape special regex characters in component name and import path
539
+ const escapedComponentName = escapeRegex(componentName);
540
+ const escapedImportPath = escapeRegex(importPath);
541
+
143
542
  // Remove the import statement
144
543
  resolved = resolved.replace(
145
- new RegExp(`^\\s*import\\s+(?:${componentName}|{\\s*${componentName}\\s*})\\s+from\\s+['"]${importPath.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}['"];?\\s*$`, 'gm'),
544
+ new RegExp(`^\\s*import\\s+(?:${escapedComponentName}|{\\s*${escapedComponentName}\\s*})\\s+from\\s+['"]${escapedImportPath}['"];?\\s*$`, 'gm'),
146
545
  ''
147
546
  );
148
-
547
+
149
548
  // Replace JSX usage with the partial content
150
549
  // Handle both self-closing tags and tags with content
151
550
  // <PartialName /> or <PartialName></PartialName> or <PartialName>...</PartialName>
152
- const jsxRegex = new RegExp(`<${componentName}\\s*(?:[^>]*?)(?:/>|>[^<]*</${componentName}>)`, 'g');
153
- resolved = resolved.replace(jsxRegex, partialMarkdown.trim());
154
-
155
- } catch (error) {
156
- console.warn(`Failed to resolve partial import "${importPath}" in ${filePath}: ${error}`);
157
- // Leave the import and usage as-is if we can't resolve it
551
+ const jsxRegex = new RegExp(`<${escapedComponentName}\\s*(?:[^>]*?)(?:/>|>[^<]*</${escapedComponentName}>)`, 'g');
552
+ resolved = resolved.replace(jsxRegex, resolvedPartial.trim());
553
+
554
+ } catch (error: unknown) {
555
+ logger.warn(`Failed to resolve partial import from ${importPath}: ${getErrorMessage(error)}`);
556
+
557
+ // Remove both the import statement AND the JSX usage even if partial can't be resolved
558
+ // This prevents leaving broken references in the output
559
+
560
+ // Escape special regex characters in component name and import path
561
+ const escapedComponentName = escapeRegex(componentName);
562
+ const escapedImportPath = escapeRegex(importPath);
563
+
564
+ // Remove the import statement
565
+ resolved = resolved.replace(
566
+ new RegExp(`^\\s*import\\s+(?:${escapedComponentName}|{\\s*${escapedComponentName}\\s*})\\s+from\\s+['"]${escapedImportPath}['"];?\\s*$`, 'gm'),
567
+ ''
568
+ );
569
+
570
+ // Remove JSX usage of this component
571
+ // Handle both self-closing tags (<Component />) and regular tags with content (<Component>...</Component>)
572
+ const jsxRegex = new RegExp(`<${escapedComponentName}(?:\\s+[^>]*)?\\s*\\/?>(?:[\\s\\S]*?<\\/${escapedComponentName}>)?`, 'gm');
573
+ resolved = resolved.replace(jsxRegex, '');
158
574
  }
159
575
  }
160
-
576
+
161
577
  return resolved;
162
578
  }
163
579
 
@@ -253,18 +669,20 @@ export function applyPathTransformations(
253
669
  urlPath: string,
254
670
  pathTransformation?: PluginOptions['pathTransformation']
255
671
  ): string {
256
- if (!pathTransformation) {
672
+ if (!isDefined(pathTransformation)) {
257
673
  return urlPath;
258
674
  }
259
675
 
260
676
  let transformedPath = urlPath;
261
-
677
+
262
678
  // Remove ignored path segments
263
- if (pathTransformation.ignorePaths?.length) {
679
+ if (isNonEmptyArray(pathTransformation.ignorePaths)) {
264
680
  for (const ignorePath of pathTransformation.ignorePaths) {
265
681
  // Create a regex that matches the ignore path at the beginning, middle, or end of the path
266
682
  // We use word boundaries to ensure we match complete path segments
267
- const ignoreRegex = new RegExp(`(^|/)(${ignorePath})(/|$)`, 'g');
683
+ // Escape special regex characters in ignorePath to prevent regex injection
684
+ const escapedIgnorePath = ignorePath.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
685
+ const ignoreRegex = new RegExp(`(^|/)(${escapedIgnorePath})(/|$)`, 'g');
268
686
  transformedPath = transformedPath.replace(ignoreRegex, '$1$3');
269
687
  }
270
688
 
@@ -276,7 +694,7 @@ export function applyPathTransformations(
276
694
  }
277
695
 
278
696
  // Add path segments if they're not already present
279
- if (pathTransformation.addPaths?.length) {
697
+ if (isNonEmptyArray(pathTransformation.addPaths)) {
280
698
  // Process in reverse order to maintain the specified order in the final path
281
699
  // This is because each path is prepended to the front
282
700
  const pathsToAdd = [...pathTransformation.addPaths].reverse();
@@ -290,4 +708,132 @@ export function applyPathTransformations(
290
708
  }
291
709
 
292
710
  return transformedPath;
293
- }
711
+ }
712
+
713
+ /**
714
+ * Sanitize a string to create a safe filename
715
+ * @param input - Input string (typically a title)
716
+ * @param fallback - Fallback string if input becomes empty after sanitization
717
+ * @returns Sanitized filename (without extension)
718
+ * @throws ValidationError if input or fallback are not strings
719
+ */
720
+ export function sanitizeForFilename(
721
+ input: string,
722
+ fallback: string = 'untitled',
723
+ options: {
724
+ preserveUnicode?: boolean;
725
+ preserveCase?: boolean;
726
+ } = {}
727
+ ): string {
728
+ // Validate input parameters
729
+ validateString(input, 'input');
730
+ validateString(fallback, 'fallback', { minLength: 1 });
731
+
732
+ if (!isNonEmptyString(input)) return fallback;
733
+
734
+ const { preserveUnicode = true, preserveCase = false } = options;
735
+
736
+ let sanitized = preserveCase ? input : input.toLowerCase();
737
+
738
+ if (preserveUnicode) {
739
+ // Only remove filesystem-unsafe characters: / \ : * ? " < > |
740
+ // Keep underscores, dots (except at start), hyphens, and unicode
741
+ // Also replace spaces with dashes for better filesystem compatibility
742
+ sanitized = sanitized.replace(/[/\\:*?"<>|\s]+/g, '-');
743
+ } else {
744
+ // Allow alphanumeric, underscores, hyphens, dots
745
+ sanitized = sanitized.replace(/[^a-z0-9_.-]+/g, '-');
746
+ }
747
+
748
+ // Remove leading dots (hidden files on Unix)
749
+ sanitized = sanitized.replace(/^\.+/, '');
750
+
751
+ // Clean up multiple dashes and trim
752
+ sanitized = sanitized
753
+ .replace(/-+/g, '-')
754
+ .replace(/^-+|-+$/g, '');
755
+
756
+ return sanitized || fallback;
757
+ }
758
+
759
+ /**
760
+ * Ensure a unique identifier from a set of used identifiers
761
+ * @param baseIdentifier - Base identifier to make unique
762
+ * @param usedIdentifiers - Set of already used identifiers
763
+ * @param suffix - Suffix pattern (default: number in parentheses)
764
+ * @returns Unique identifier
765
+ * @throws ValidationError if baseIdentifier is not a string or usedIdentifiers is not a Set
766
+ */
767
+ export function ensureUniqueIdentifier(
768
+ baseIdentifier: string,
769
+ usedIdentifiers: Set<string>,
770
+ suffix: (counter: number, base: string) => string = (counter) => `(${counter})`
771
+ ): string {
772
+ // Validate input parameters
773
+ validateString(baseIdentifier, 'baseIdentifier', { minLength: 1 });
774
+ validateRequired(usedIdentifiers, 'usedIdentifiers');
775
+
776
+ if (!(usedIdentifiers instanceof Set)) {
777
+ throw new ValidationError(`Parameter 'usedIdentifiers' must be a Set`);
778
+ }
779
+
780
+ const MAX_ITERATIONS = 10000;
781
+ let uniqueIdentifier = baseIdentifier;
782
+ let counter = 1;
783
+ let iterations = 0;
784
+
785
+ while (usedIdentifiers.has(uniqueIdentifier.toLowerCase())) {
786
+ counter++;
787
+ uniqueIdentifier = `${baseIdentifier}${suffix(counter, baseIdentifier)}`;
788
+
789
+ iterations++;
790
+ if (iterations >= MAX_ITERATIONS) {
791
+ // Fallback to timestamp-based unique identifier
792
+ const timestamp = Date.now().toString(36);
793
+ const random = Math.random().toString(36).substring(2, 8);
794
+ uniqueIdentifier = `${baseIdentifier}-${timestamp}-${random}`;
795
+ logger.warn(`Maximum iterations reached for unique identifier. Using fallback: ${uniqueIdentifier}`);
796
+ break;
797
+ }
798
+ }
799
+
800
+ usedIdentifiers.add(uniqueIdentifier.toLowerCase());
801
+ return uniqueIdentifier;
802
+ }
803
+
804
+ /**
805
+ * Create standardized markdown content template
806
+ * @param title - Document title
807
+ * @param description - Document description
808
+ * @param content - Document content
809
+ * @param includeMetadata - Whether to include description metadata
810
+ * @param frontMatter - Optional frontmatter to include at the top
811
+ * @returns Formatted markdown content
812
+ */
813
+ export function createMarkdownContent(
814
+ title: string,
815
+ description: string = '',
816
+ content: string = '',
817
+ includeMetadata: boolean = true,
818
+ frontMatter?: Record<string, any>
819
+ ): string {
820
+ let result = '';
821
+
822
+ // Add frontmatter if provided
823
+ if (isDefined(frontMatter) && Object.keys(frontMatter).length > 0) {
824
+ result += '---\n';
825
+ result += YAML.stringify(frontMatter, {
826
+ lineWidth: 0,
827
+ defaultStringType: 'QUOTE_DOUBLE',
828
+ defaultKeyType: 'PLAIN'
829
+ });
830
+ result += '---\n\n';
831
+ }
832
+
833
+ const descriptionLine = includeMetadata && description ? `\n\n> ${description}\n` : '\n';
834
+
835
+ result += `# ${title}${descriptionLine}
836
+ ${content}`.trim() + '\n';
837
+
838
+ return result;
839
+ }