@j0hanz/superfetch 1.0.2 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/README.md +345 -57
  2. package/dist/config/index.d.ts.map +1 -1
  3. package/dist/config/index.js +6 -10
  4. package/dist/config/index.js.map +1 -1
  5. package/dist/config/types.d.ts +256 -0
  6. package/dist/config/types.d.ts.map +1 -0
  7. package/dist/config/types.js +2 -0
  8. package/dist/config/types.js.map +1 -0
  9. package/dist/errors/app-error.d.ts +6 -20
  10. package/dist/errors/app-error.d.ts.map +1 -1
  11. package/dist/errors/app-error.js +7 -18
  12. package/dist/errors/app-error.js.map +1 -1
  13. package/dist/index.js +75 -62
  14. package/dist/index.js.map +1 -1
  15. package/dist/middleware/error-handler.d.ts +1 -5
  16. package/dist/middleware/error-handler.d.ts.map +1 -1
  17. package/dist/middleware/error-handler.js +4 -12
  18. package/dist/middleware/error-handler.js.map +1 -1
  19. package/dist/middleware/rate-limiter.d.ts +2 -20
  20. package/dist/middleware/rate-limiter.d.ts.map +1 -1
  21. package/dist/middleware/rate-limiter.js +22 -47
  22. package/dist/middleware/rate-limiter.js.map +1 -1
  23. package/dist/prompts/index.d.ts +0 -3
  24. package/dist/prompts/index.d.ts.map +1 -1
  25. package/dist/prompts/index.js +2 -10
  26. package/dist/prompts/index.js.map +1 -1
  27. package/dist/resources/cached-content.d.ts +5 -0
  28. package/dist/resources/cached-content.d.ts.map +1 -0
  29. package/dist/resources/cached-content.js +93 -0
  30. package/dist/resources/cached-content.js.map +1 -0
  31. package/dist/resources/index.d.ts +0 -3
  32. package/dist/resources/index.d.ts.map +1 -1
  33. package/dist/resources/index.js +40 -5
  34. package/dist/resources/index.js.map +1 -1
  35. package/dist/server.d.ts +0 -4
  36. package/dist/server.d.ts.map +1 -1
  37. package/dist/server.js +11 -6
  38. package/dist/server.js.map +1 -1
  39. package/dist/services/cache.d.ts +20 -6
  40. package/dist/services/cache.d.ts.map +1 -1
  41. package/dist/services/cache.js +128 -20
  42. package/dist/services/cache.js.map +1 -1
  43. package/dist/services/card-extractor.d.ts +10 -0
  44. package/dist/services/card-extractor.d.ts.map +1 -0
  45. package/dist/services/card-extractor.js +194 -0
  46. package/dist/services/card-extractor.js.map +1 -0
  47. package/dist/services/extractor.d.ts +12 -19
  48. package/dist/services/extractor.d.ts.map +1 -1
  49. package/dist/services/extractor.js +60 -46
  50. package/dist/services/extractor.js.map +1 -1
  51. package/dist/services/fetcher.d.ts +13 -11
  52. package/dist/services/fetcher.d.ts.map +1 -1
  53. package/dist/services/fetcher.js +143 -54
  54. package/dist/services/fetcher.js.map +1 -1
  55. package/dist/services/logger.d.ts.map +1 -1
  56. package/dist/services/logger.js +4 -6
  57. package/dist/services/logger.js.map +1 -1
  58. package/dist/services/parser.d.ts +1 -6
  59. package/dist/services/parser.d.ts.map +1 -1
  60. package/dist/services/parser.js +57 -27
  61. package/dist/services/parser.js.map +1 -1
  62. package/dist/tools/handlers/fetch-links.tool.d.ts +6 -18
  63. package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
  64. package/dist/tools/handlers/fetch-links.tool.js +104 -79
  65. package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
  66. package/dist/tools/handlers/fetch-markdown.tool.d.ts +6 -10
  67. package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
  68. package/dist/tools/handlers/fetch-markdown.tool.js +83 -84
  69. package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
  70. package/dist/tools/handlers/fetch-url.tool.d.ts +6 -12
  71. package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
  72. package/dist/tools/handlers/fetch-url.tool.js +51 -93
  73. package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
  74. package/dist/tools/handlers/fetch-urls.tool.d.ts +12 -0
  75. package/dist/tools/handlers/fetch-urls.tool.d.ts.map +1 -0
  76. package/dist/tools/handlers/fetch-urls.tool.js +184 -0
  77. package/dist/tools/handlers/fetch-urls.tool.js.map +1 -0
  78. package/dist/tools/index.d.ts +0 -4
  79. package/dist/tools/index.d.ts.map +1 -1
  80. package/dist/tools/index.js +145 -15
  81. package/dist/tools/index.js.map +1 -1
  82. package/dist/tools/utils/common.d.ts +8 -0
  83. package/dist/tools/utils/common.d.ts.map +1 -0
  84. package/dist/tools/utils/common.js +35 -0
  85. package/dist/tools/utils/common.js.map +1 -0
  86. package/dist/tools/utils/fetch-pipeline.d.ts +3 -0
  87. package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -0
  88. package/dist/tools/utils/fetch-pipeline.js +78 -0
  89. package/dist/tools/utils/fetch-pipeline.js.map +1 -0
  90. package/dist/tools/utils/index.d.ts +4 -0
  91. package/dist/tools/utils/index.d.ts.map +1 -0
  92. package/dist/tools/utils/index.js +3 -0
  93. package/dist/tools/utils/index.js.map +1 -0
  94. package/dist/tools/utils/response-builder.d.ts +3 -0
  95. package/dist/tools/utils/response-builder.d.ts.map +1 -0
  96. package/dist/tools/utils/response-builder.js +24 -0
  97. package/dist/tools/utils/response-builder.js.map +1 -0
  98. package/dist/transformers/jsonl.transformer.d.ts +1 -1
  99. package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
  100. package/dist/transformers/jsonl.transformer.js +2 -1
  101. package/dist/transformers/jsonl.transformer.js.map +1 -1
  102. package/dist/transformers/markdown.transformer.d.ts +1 -1
  103. package/dist/transformers/markdown.transformer.d.ts.map +1 -1
  104. package/dist/transformers/markdown.transformer.js +99 -5
  105. package/dist/transformers/markdown.transformer.js.map +1 -1
  106. package/dist/types/content.types.d.ts +11 -11
  107. package/dist/types/content.types.d.ts.map +1 -1
  108. package/dist/types/index.d.ts +1 -2
  109. package/dist/types/index.d.ts.map +1 -1
  110. package/dist/types/index.js +1 -2
  111. package/dist/types/index.js.map +1 -1
  112. package/dist/types/schemas.d.ts +39 -12
  113. package/dist/types/schemas.d.ts.map +1 -1
  114. package/dist/utils/concurrency.d.ts +6 -0
  115. package/dist/utils/concurrency.d.ts.map +1 -0
  116. package/dist/utils/concurrency.js +38 -0
  117. package/dist/utils/concurrency.js.map +1 -0
  118. package/dist/utils/content-cleaner.d.ts +32 -0
  119. package/dist/utils/content-cleaner.d.ts.map +1 -0
  120. package/dist/utils/content-cleaner.js +238 -0
  121. package/dist/utils/content-cleaner.js.map +1 -0
  122. package/dist/utils/language-detector.d.ts +5 -0
  123. package/dist/utils/language-detector.d.ts.map +1 -0
  124. package/dist/utils/language-detector.js +50 -0
  125. package/dist/utils/language-detector.js.map +1 -0
  126. package/dist/utils/sanitizer.d.ts +0 -10
  127. package/dist/utils/sanitizer.d.ts.map +1 -1
  128. package/dist/utils/sanitizer.js +4 -12
  129. package/dist/utils/sanitizer.js.map +1 -1
  130. package/dist/utils/tool-error-handler.d.ts +1 -15
  131. package/dist/utils/tool-error-handler.d.ts.map +1 -1
  132. package/dist/utils/tool-error-handler.js +34 -6
  133. package/dist/utils/tool-error-handler.js.map +1 -1
  134. package/dist/utils/url-validator.d.ts +0 -8
  135. package/dist/utils/url-validator.d.ts.map +1 -1
  136. package/dist/utils/url-validator.js +17 -31
  137. package/dist/utils/url-validator.js.map +1 -1
  138. package/package.json +81 -79
@@ -0,0 +1,38 @@
1
+ function createConcurrencyLimiter(limit) {
2
+ const maxConcurrency = Math.min(Math.max(1, limit), 10);
3
+ let active = 0;
4
+ const queue = [];
5
+ return async (fn) => {
6
+ while (active >= maxConcurrency) {
7
+ await new Promise((resolve) => queue.push(resolve));
8
+ }
9
+ active++;
10
+ try {
11
+ return await fn();
12
+ }
13
+ finally {
14
+ active--;
15
+ const next = queue.shift();
16
+ if (next)
17
+ next();
18
+ }
19
+ };
20
+ }
21
+ export async function runWithConcurrency(limit, tasks, options) {
22
+ const limiter = createConcurrencyLimiter(limit);
23
+ const total = tasks.length;
24
+ let completed = 0;
25
+ const wrappedTasks = tasks.map((task) => async () => {
26
+ try {
27
+ return await limiter(task);
28
+ }
29
+ finally {
30
+ completed++;
31
+ if (options?.onProgress) {
32
+ options.onProgress(completed, total);
33
+ }
34
+ }
35
+ });
36
+ return Promise.allSettled(wrappedTasks.map(async (task) => task()));
37
+ }
38
+ //# sourceMappingURL=concurrency.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"concurrency.js","sourceRoot":"","sources":["../../src/utils/concurrency.ts"],"names":[],"mappings":"AAMA,SAAS,wBAAwB,CAAC,KAAa;IAC7C,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC;IACxD,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,MAAM,KAAK,GAAmB,EAAE,CAAC;IAEjC,OAAO,KAAK,EAAK,EAAoB,EAAc,EAAE;QACnD,OAAO,MAAM,IAAI,cAAc,EAAE,CAAC;YAChC,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;QAC5D,CAAC;QAED,MAAM,EAAE,CAAC;QACT,IAAI,CAAC;YACH,OAAO,MAAM,EAAE,EAAE,CAAC;QACpB,CAAC;gBAAS,CAAC;YACT,MAAM,EAAE,CAAC;YACT,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,EAAE,CAAC;YAC3B,IAAI,IAAI;gBAAE,IAAI,EAAE,CAAC;QACnB,CAAC;IACH,CAAC,CAAC;AACJ,CAAC;AACD,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,KAAa,EACb,KAA2B,EAC3B,OAA4B;IAE5B,MAAM,OAAO,GAAG,wBAAwB,CAAC,KAAK,CAAC,CAAC;IAChD,MAAM,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC;IAC3B,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,MAAM,YAAY,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,KAAK,IAAI,EAAE;QAClD,IAAI,CAAC;YACH,OAAO,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;QAC7B,CAAC;gBAAS,CAAC;YACT,SAAS,EAAE,CAAC;YACZ,IAAI,OAAO,EAAE,UAAU,EAAE,CAAC;gBACxB,OAAO,CAAC,UAAU,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;YACvC,CAAC;QACH,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,OAAO,CAAC,UAAU,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;AACtE,CAAC"}
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Post-processing content cleaner for removing noise artifacts
3
+ * that slip through Readability extraction.
4
+ */
5
+ /**
6
+ * Clean paragraph text by removing noise
7
+ */
8
+ export declare function cleanParagraph(text: string): string | null;
9
+ /**
10
+ * Clean heading text by removing noise and markdown link syntax
11
+ */
12
+ export declare function cleanHeading(text: string): string | null;
13
+ /**
14
+ * Clean list items by filtering out noise
15
+ */
16
+ export declare function cleanListItems(items: string[]): string[];
17
+ /**
18
+ * Clean code block text - minimal cleaning to preserve code integrity
19
+ */
20
+ export declare function cleanCodeBlock(code: string): string | null;
21
+ /**
22
+ * Strip markdown link syntax from text for cleaner slugs/display
23
+ * [Text](#anchor) -> Text
24
+ * [Text](url) -> Text
25
+ */
26
+ export declare function stripMarkdownLinks(text: string): string;
27
+ /**
28
+ * Remove common timestamp patterns from text (inline removal)
29
+ * Use when you want to strip timestamps from within longer content
30
+ */
31
+ export declare function removeInlineTimestamps(text: string): string;
32
+ //# sourceMappingURL=content-cleaner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"content-cleaner.d.ts","sourceRoot":"","sources":["../../src/utils/content-cleaner.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAsKH;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAsB1D;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAuBxD;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,CAQxD;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAc1D;AAED;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEvD;AAED;;;GAGG;AACH,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAsB3D"}
@@ -0,0 +1,238 @@
1
+ /**
2
+ * Post-processing content cleaner for removing noise artifacts
3
+ * that slip through Readability extraction.
4
+ */
5
+ // Pre-compiled combined pattern for optimal performance
6
+ const NOISE_PATTERN_COMBINED = new RegExp([
7
+ // Relative timestamps
8
+ '^\\d+\\s*(seconds?|minutes?|hours?|days?|weeks?|months?|years?)\\s*ago$',
9
+ '^(just now|recently|today|yesterday|last week|last month)$',
10
+ '^(updated|modified|edited|created|published)\\s*:?\\s*\\d+\\s*(seconds?|minutes?|hours?|days?|weeks?|months?|years?)\\s*ago$',
11
+ '^(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\\s+\\d{1,2},?\\s+\\d{4}$',
12
+ '^\\d{1,2}\\s+(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\\s+\\d{4}$',
13
+ '^\\d{4}-\\d{2}-\\d{2}$',
14
+ '^last\\s+updated\\s*:?',
15
+ // Share/action buttons
16
+ '^(share|copy|like|follow|subscribe|download|print|save|bookmark|tweet|pin it|email|export)$',
17
+ '^(copy to clipboard|copied!?|copy code|copy link)$',
18
+ '^(share on|share to|share via)\\s+(twitter|facebook|linkedin|reddit|x|email)$',
19
+ // UI artifacts
20
+ '^(click to copy|expand|collapse|show more|show less|load more|view more|read more|see more|see all|view all)$',
21
+ '^(toggle|switch|enable|disable|on|off)$',
22
+ '^(edit|delete|remove|add|new|create|update|cancel|confirm|submit|reset|clear)$',
23
+ '^(open in|view in|edit in)\\s+\\w+$',
24
+ '^(try it|run|execute|play|preview|demo|live demo|playground)$',
25
+ '^(source|view source|edit this page|edit on github|improve this doc)$',
26
+ // Empty/placeholder
27
+ '^(loading\\.{0,3}|please wait\\.{0,3}|\\.{2,})$',
28
+ '^(n\\/a|tbd|todo|coming soon|placeholder|untitled)$',
29
+ // Navigation
30
+ '^(next|previous|prev|back|forward|home|menu|close|open|skip to|jump to|go to)$',
31
+ '^(table of contents|toc|contents|on this page|in this article|in this section)$',
32
+ '^(scroll to top|back to top|top)$',
33
+ // Cookie/consent
34
+ '^(accept|reject|accept all|reject all|cookie settings|privacy settings|manage preferences)$',
35
+ '^(accept cookies|decline cookies|cookie policy|privacy policy|terms of service|terms & conditions)$',
36
+ // Counts
37
+ '^\\d+\\s*(comments?|replies?|reactions?|responses?)$',
38
+ '^\\d+\\s*(likes?|shares?|views?|followers?|retweets?|stars?|forks?|claps?|upvotes?|downvotes?)$',
39
+ '^(liked by|shared by|followed by)\\s+\\d+',
40
+ // Version badges
41
+ '^v?\\d+\\.\\d+(\\.\\d+)?(-\\w+)?$',
42
+ '^(stable|beta|alpha|rc|preview|experimental|deprecated|legacy|new|updated)$',
43
+ // Structural
44
+ '^(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)$',
45
+ '^panel\\s*[a-z]?$',
46
+ // API artifacts
47
+ '^(required|optional|default|type|example|description|parameters?|returns?|response|request)$',
48
+ '^(get|post|put|patch|delete|head|options)\\s*$',
49
+ // Interactive
50
+ '^(drag|drop|resize|zoom|scroll|swipe|tap|click|hover|focus)(\\s+to\\s+\\w+)?$',
51
+ '^(drag the|move the|resize the|drag to|click to)\\s+\\w+',
52
+ // Breadcrumbs
53
+ '^[/\\\\>→»›]+$',
54
+ // Ads
55
+ '^(ad|advertisement|sponsored|promoted|partner content)$',
56
+ ].join('|'), 'i');
57
+ // Pre-compiled pattern for short text noise
58
+ const SHORT_TEXT_NOISE_PATTERN = new RegExp([
59
+ '^#\\w+$',
60
+ '^@\\w+$',
61
+ '^\\d+$',
62
+ '^[•·→←↑↓►▼▲◄▶◀■□●○★☆✓✗✔✘×]+$',
63
+ '^[,;:\\-–—]+$',
64
+ '^\\[\\d+\\]$',
65
+ '^\\(\\d+\\)$',
66
+ '^fig\\.?\\s*\\d+$',
67
+ '^table\\s*\\d+$',
68
+ '^step\\s*\\d+$',
69
+ '^note:?$',
70
+ '^tip:?$',
71
+ '^warning:?$',
72
+ '^info:?$',
73
+ '^caution:?$',
74
+ ].join('|'), 'i');
75
+ // Pre-compiled pattern for UI chrome detection
76
+ const UI_CHROME_PATTERN = new RegExp([
77
+ '^(sign in|sign up|log in|log out|register|create account)$',
78
+ '^(search|search\\.\\.\\.|search docs|search documentation)$',
79
+ '^(dark mode|light mode|theme|language|locale)$',
80
+ '^(feedback|report issue|report a bug|file an issue|suggest edit)$',
81
+ '^(documentation|docs|api|reference|guide|tutorial|examples?)$',
82
+ "^(version|changelog|release notes|what's new)$",
83
+ ].join('|'), 'i');
84
+ // Minimum lengths for different content types
85
+ const MIN_PARAGRAPH_LENGTH = 20;
86
+ const MIN_HEADING_LENGTH = 2;
87
+ const MIN_LIST_ITEM_LENGTH = 3;
88
+ const SHORT_TEXT_THRESHOLD = 25;
89
+ /**
90
+ * Check if text matches any noise pattern
91
+ */
92
+ function isNoiseText(text) {
93
+ const trimmed = text.trim();
94
+ // Empty or whitespace-only
95
+ if (!trimmed) {
96
+ return true;
97
+ }
98
+ // Check combined noise pattern (single regex test)
99
+ if (NOISE_PATTERN_COMBINED.test(trimmed)) {
100
+ return true;
101
+ }
102
+ // Check short text patterns for brief content
103
+ if (trimmed.length < SHORT_TEXT_THRESHOLD) {
104
+ if (SHORT_TEXT_NOISE_PATTERN.test(trimmed)) {
105
+ return true;
106
+ }
107
+ // Also check UI chrome patterns for short text
108
+ if (UI_CHROME_PATTERN.test(trimmed)) {
109
+ return true;
110
+ }
111
+ }
112
+ return false;
113
+ }
114
+ // Pre-compiled placeholder pattern (combined for performance)
115
+ const PLACEHOLDER_PATTERN = /^(lorem ipsum|sample text|placeholder|example (text|content|data)|test (text|content|data)|your (text|content|name|email) here|enter (your|a) |type (your|a|something) )/i;
116
+ // Cache for placeholder checks to avoid repeated regex tests
117
+ const PLACEHOLDER_CACHE = new Map();
118
+ const PLACEHOLDER_CACHE_MAX_SIZE = 1000;
119
+ /**
120
+ * Check if text looks like placeholder/demo content
121
+ * Uses caching for 3-8x performance improvement on repeated patterns
122
+ */
123
+ function isPlaceholderContent(text) {
124
+ const trimmed = text.trim().toLowerCase();
125
+ // Check cache first
126
+ const cached = PLACEHOLDER_CACHE.get(trimmed);
127
+ if (cached !== undefined) {
128
+ return cached;
129
+ }
130
+ // Single regex test (faster than array iteration)
131
+ const result = PLACEHOLDER_PATTERN.test(trimmed);
132
+ // Cache result with LRU eviction
133
+ if (PLACEHOLDER_CACHE.size >= PLACEHOLDER_CACHE_MAX_SIZE) {
134
+ const firstKey = PLACEHOLDER_CACHE.keys().next().value;
135
+ if (firstKey !== undefined) {
136
+ PLACEHOLDER_CACHE.delete(firstKey);
137
+ }
138
+ }
139
+ PLACEHOLDER_CACHE.set(trimmed, result);
140
+ return result;
141
+ }
142
+ /**
143
+ * Clean paragraph text by removing noise
144
+ */
145
+ export function cleanParagraph(text) {
146
+ const trimmed = text.trim();
147
+ // Too short to be meaningful
148
+ if (trimmed.length < MIN_PARAGRAPH_LENGTH) {
149
+ // Allow very short paragraphs if they end with punctuation (likely real content)
150
+ if (!/[.!?]$/.test(trimmed)) {
151
+ return null;
152
+ }
153
+ }
154
+ // Is noise content
155
+ if (isNoiseText(trimmed)) {
156
+ return null;
157
+ }
158
+ // Is placeholder content (in paragraphs, not in examples)
159
+ if (isPlaceholderContent(trimmed)) {
160
+ return null;
161
+ }
162
+ return trimmed;
163
+ }
164
+ /**
165
+ * Clean heading text by removing noise and markdown link syntax
166
+ */
167
+ export function cleanHeading(text) {
168
+ let cleaned = text.trim();
169
+ // Too short
170
+ if (cleaned.length < MIN_HEADING_LENGTH) {
171
+ return null;
172
+ }
173
+ // Remove markdown link syntax: [Text](#anchor) -> Text
174
+ cleaned = cleaned.replace(/\[([^\]]+)\]\([^)]*\)/g, '$1');
175
+ // Remove trailing anchor links like "Link for this heading"
176
+ cleaned = cleaned.replace(/\s*Link for (this heading|[\w\s]+)\s*$/i, '');
177
+ // Remove trailing hash symbols often used for anchor links
178
+ cleaned = cleaned.replace(/\s*#+\s*$/, '');
179
+ // Is noise content
180
+ if (isNoiseText(cleaned)) {
181
+ return null;
182
+ }
183
+ return cleaned.trim();
184
+ }
185
+ /**
186
+ * Clean list items by filtering out noise
187
+ */
188
+ export function cleanListItems(items) {
189
+ return items
190
+ .map((item) => item.trim())
191
+ .filter((item) => {
192
+ if (item.length < MIN_LIST_ITEM_LENGTH)
193
+ return false;
194
+ if (isNoiseText(item))
195
+ return false;
196
+ return true;
197
+ });
198
+ }
199
+ /**
200
+ * Clean code block text - minimal cleaning to preserve code integrity
201
+ */
202
+ export function cleanCodeBlock(code) {
203
+ const trimmed = code.trim();
204
+ // Empty code block
205
+ if (trimmed.length === 0) {
206
+ return null;
207
+ }
208
+ // Very short code blocks that are likely just labels
209
+ if (trimmed.length < 3 && !/^[{}[\]();<>]$/.test(trimmed)) {
210
+ return null;
211
+ }
212
+ return trimmed;
213
+ }
214
+ /**
215
+ * Strip markdown link syntax from text for cleaner slugs/display
216
+ * [Text](#anchor) -> Text
217
+ * [Text](url) -> Text
218
+ */
219
+ export function stripMarkdownLinks(text) {
220
+ return text.replace(/\[([^\]]+)\]\([^)]*\)/g, '$1');
221
+ }
222
+ /**
223
+ * Remove common timestamp patterns from text (inline removal)
224
+ * Use when you want to strip timestamps from within longer content
225
+ */
226
+ export function removeInlineTimestamps(text) {
227
+ return (text
228
+ // Remove "X days/hours/etc ago" patterns
229
+ .replace(/\b\d+\s*(seconds?|minutes?|hours?|days?|weeks?|months?|years?)\s*ago\b/gi, '')
230
+ // Remove "Updated: date" patterns
231
+ .replace(/\b(updated|modified|edited|created|published)\s*:?\s*\d+\s*(seconds?|minutes?|hours?|days?|weeks?|months?|years?)\s*ago\b/gi, '')
232
+ // Remove standalone dates
233
+ .replace(/\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\s+\d{1,2},?\s+\d{4}\b/gi, '')
234
+ // Clean up extra whitespace
235
+ .replace(/\s{2,}/g, ' ')
236
+ .trim());
237
+ }
238
+ //# sourceMappingURL=content-cleaner.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"content-cleaner.js","sourceRoot":"","sources":["../../src/utils/content-cleaner.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,wDAAwD;AACxD,MAAM,sBAAsB,GAAG,IAAI,MAAM,CACvC;IACE,sBAAsB;IACtB,yEAAyE;IACzE,4DAA4D;IAC5D,8HAA8H;IAC9H,6EAA6E;IAC7E,2EAA2E;IAC3E,wBAAwB;IACxB,wBAAwB;IACxB,uBAAuB;IACvB,6FAA6F;IAC7F,oDAAoD;IACpD,+EAA+E;IAC/E,eAAe;IACf,+GAA+G;IAC/G,yCAAyC;IACzC,gFAAgF;IAChF,qCAAqC;IACrC,+DAA+D;IAC/D,uEAAuE;IACvE,oBAAoB;IACpB,iDAAiD;IACjD,qDAAqD;IACrD,aAAa;IACb,gFAAgF;IAChF,iFAAiF;IACjF,mCAAmC;IACnC,iBAAiB;IACjB,6FAA6F;IAC7F,qGAAqG;IACrG,SAAS;IACT,sDAAsD;IACtD,iGAAiG;IACjG,2CAA2C;IAC3C,iBAAiB;IACjB,mCAAmC;IACnC,6EAA6E;IAC7E,aAAa;IACb,yDAAyD;IACzD,mBAAmB;IACnB,gBAAgB;IAChB,8FAA8F;IAC9F,gDAAgD;IAChD,cAAc;IACd,+EAA+E;IAC/E,0DAA0D;IAC1D,cAAc;IACd,gBAAgB;IAChB,MAAM;IACN,yDAAyD;CAC1D,CAAC,IAAI,CAAC,GAAG,CAAC,EACX,GAAG,CACJ,CAAC;AAEF,4CAA4C;AAC5C,MAAM,wBAAwB,GAAG,IAAI,MAAM,CACzC;IACE,SAAS;IACT,SAAS;IACT,QAAQ;IACR,8BAA8B;IAC9B,eAAe;IACf,cAAc;IACd,cAAc;IACd,mBAAmB;IACnB,iBAAiB;IACjB,gBAAgB;IAChB,UAAU;IACV,SAAS;IACT,aAAa;IACb,UAAU;IACV,aAAa;CACd,CAAC,IAAI,CAAC,GAAG,CAAC,EACX,GAAG,CACJ,CAAC;AAEF,+CAA+C;AAC/C,MAAM,iBAAiB,GAAG,IAAI,MAAM,CAClC;IACE,4DAA4D;IAC5D,6DAA6D;IAC7D,gDAAgD;IAChD,mEAAmE;IACnE,+DAA+D;IAC/D,gDAAgD;CACjD,CAAC,IAAI,CAAC,GAAG,CAAC,EACX,GAAG,CACJ,CAAC;AAEF,8CAA8C;AAC9C,MAAM,oBAAoB,GAAG,EAAE,CAAC;AAChC,MAAM,kBAAkB,GAAG,CAAC,CAAC;AAC7B,MAAM,oBAAoB,GAAG,CAAC,CAAC;AAC/B,MAAM,oBAAoB,GAAG,EAAE,CAAC;AAEhC;;GAEG;AACH,SAAS,WAAW,CAAC,IAAY;IAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE5B,2BAA2B;IAC3B,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO,IAAI,CAAC;IACd,CAAC;IAED,mDAAmD;IACnD,IAAI,sBAAsB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QACzC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,8CAA8C;IAC9C,IAAI,OAAO,CAAC,MAAM,GAAG,oBAAoB,EAAE,CAAC;QAC1C,IAAI,wBAAwB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAC3C,OAAO,IAAI,CAAC;QACd,CAAC;QAED,+CAA+C;QAC/C,IAAI,iBAAiB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YACpC,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,8DAA8D;AAC9D,MAAM,mBAAmB,GACvB,2KAA2K,CAAC;AAE9K,6DAA6D;AAC7D,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAAmB,CAAC;AACrD,MAAM,0BAA0B,GAAG,IAAI,CAAC;AAExC;;;GAGG;AACH,SAAS,oBAAoB,CAAC,IAAY;IACxC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAE1C,oBAAoB;IACpB,MAAM,MAAM,GAAG,iBAAiB,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAC9C,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;QACzB,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,kDAAkD;IAClD,MAAM,MAAM,GAAG,mBAAmB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAEjD,iCAAiC;IACjC,IAAI,iBAAiB,CAAC,IAAI,IAAI,0BAA0B,EAAE,CAAC;QACzD,MAAM,QAAQ,GAAG,iBAAiB,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC;QACvD,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;YAC3B,iBAAiB,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QACrC,CAAC;IACH,CAAC;IACD,iBAAiB,CAAC,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IAEvC,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE5B,6BAA6B;IAC7B,IAAI,OAAO,CAAC,MAAM,GAAG,oBAAoB,EAAE,CAAC;QAC1C,iFAAiF;QACjF,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAC5B,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,mBAAmB;IACnB,IAAI,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,0DAA0D;IAC1D,IAAI,oBAAoB,CAAC,OAAO,CAAC,EAAE,CAAC;QAClC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,IAAI,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE1B,YAAY;IACZ,IAAI,OAAO,CAAC,MAAM,GAAG,kBAAkB,EAAE,CAAC;QACxC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,uDAAuD;IACvD,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,wBAAwB,EAAE,IAAI,CAAC,CAAC;IAE1D,4DAA4D;IAC5D,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,yCAAyC,EAAE,EAAE,CAAC,CAAC;IAEzE,2DAA2D;IAC3D,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;IAE3C,mBAAmB;IACnB,IAAI,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,OAAO,CAAC,IAAI,EAAE,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,KAAe;IAC5C,OAAO,KAAK;SACT,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;SAC1B,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;QACf,IAAI,IAAI,CAAC,MAAM,GAAG,oBAAoB;YAAE,OAAO,KAAK,CAAC;QACrD,IAAI,WAAW,CAAC,IAAI,CAAC;YAAE,OAAO,KAAK,CAAC;QACpC,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;AACP,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE5B,mBAAmB;IACnB,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,qDAAqD;IACrD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAC1D,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,kBAAkB,CAAC,IAAY;IAC7C,OAAO,IAAI,CAAC,OAAO,CAAC,wBAAwB,EAAE,IAAI,CAAC,CAAC;AACtD,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,sBAAsB,CAAC,IAAY;IACjD,OAAO,CACL,IAAI;QACF,yCAAyC;SACxC,OAAO,CACN,0EAA0E,EAC1E,EAAE,CACH;QACD,kCAAkC;SACjC,OAAO,CACN,6HAA6H,EAC7H,EAAE,CACH;QACD,0BAA0B;SACzB,OAAO,CACN,6EAA6E,EAC7E,EAAE,CACH;QACD,4BAA4B;SAC3B,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,IAAI,EAAE,CACV,CAAC;AACJ,CAAC"}
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Detect programming language from code content
3
+ */
4
+ export declare function detectLanguage(code: string): string | undefined;
5
+ //# sourceMappingURL=language-detector.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"language-detector.d.ts","sourceRoot":"","sources":["../../src/utils/language-detector.ts"],"names":[],"mappings":"AA4CA;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAE/D"}
@@ -0,0 +1,50 @@
1
+ /**
2
+ * Language detection patterns for code blocks
3
+ * Shared between parser and markdown transformer
4
+ */
5
+ const LANGUAGE_PATTERNS = [
6
+ // JSX/TSX patterns
7
+ [
8
+ /^\s*import\s+.*\s+from\s+['"]react['"]|<[A-Z][a-zA-Z]*[\s/>]|jsx\s*:|className=/m,
9
+ 'jsx',
10
+ ],
11
+ // TypeScript patterns
12
+ [
13
+ /:\s*(string|number|boolean|void|any|unknown|never)\b|interface\s+\w+|type\s+\w+\s*=/m,
14
+ 'typescript',
15
+ ],
16
+ // Rust patterns
17
+ [/^\s*(fn|let\s+mut|impl|struct|enum|use\s+\w+::)/m, 'rust'],
18
+ // JavaScript patterns (generic)
19
+ [
20
+ /^\s*(export|const|let|var|function|class|async|await)\b|^\s*import\s+.*['"]/m,
21
+ 'javascript',
22
+ ],
23
+ // Python patterns
24
+ [/^\s*(def|class|import|from|if __name__|print\()/m, 'python'],
25
+ // Bash/Shell patterns
26
+ [
27
+ /^\s*(npm|yarn|pnpm|npx|brew|apt|pip|cargo|go )\s+(install|add|run|build|start)/m,
28
+ 'bash',
29
+ ],
30
+ [/^\s*[$#]\s+\w+|^\s*#!|^\s*(sudo|chmod|mkdir|cd|ls|cat|echo)\s+/m, 'bash'],
31
+ // CSS patterns
32
+ [/^\s*[.#@]?[\w-]+\s*\{[^}]*\}|@media|@import|@keyframes/m, 'css'],
33
+ // HTML patterns
34
+ [/^\s*<(!DOCTYPE|html|head|body|div|span|p|a|script|style)\b/im, 'html'],
35
+ // JSON patterns
36
+ [/^\s*\{\s*"|^\s*\[\s*("|\d|true|false|null)/m, 'json'],
37
+ // YAML patterns
38
+ [/^\s*[\w-]+:\s*.+$/m, 'yaml'],
39
+ // SQL patterns
40
+ [/^\s*(SELECT|INSERT|UPDATE|DELETE|CREATE|ALTER|DROP)\s+/im, 'sql'],
41
+ // Go patterns
42
+ [/^\s*(func|package|import\s+")/m, 'go'],
43
+ ];
44
+ /**
45
+ * Detect programming language from code content
46
+ */
47
+ export function detectLanguage(code) {
48
+ return LANGUAGE_PATTERNS.find(([pattern]) => pattern.test(code))?.[1];
49
+ }
50
+ //# sourceMappingURL=language-detector.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"language-detector.js","sourceRoot":"","sources":["../../src/utils/language-detector.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,MAAM,iBAAiB,GAAG;IACxB,mBAAmB;IACnB;QACE,kFAAkF;QAClF,KAAK;KACN;IACD,sBAAsB;IACtB;QACE,sFAAsF;QACtF,YAAY;KACb;IACD,gBAAgB;IAChB,CAAC,kDAAkD,EAAE,MAAM,CAAC;IAC5D,gCAAgC;IAChC;QACE,8EAA8E;QAC9E,YAAY;KACb;IACD,kBAAkB;IAClB,CAAC,kDAAkD,EAAE,QAAQ,CAAC;IAC9D,sBAAsB;IACtB;QACE,iFAAiF;QACjF,MAAM;KACP;IACD,CAAC,iEAAiE,EAAE,MAAM,CAAC;IAC3E,eAAe;IACf,CAAC,yDAAyD,EAAE,KAAK,CAAC;IAClE,gBAAgB;IAChB,CAAC,8DAA8D,EAAE,MAAM,CAAC;IACxE,gBAAgB;IAChB,CAAC,6CAA6C,EAAE,MAAM,CAAC;IACvD,gBAAgB;IAChB,CAAC,oBAAoB,EAAE,MAAM,CAAC;IAC9B,eAAe;IACf,CAAC,0DAA0D,EAAE,KAAK,CAAC;IACnE,cAAc;IACd,CAAC,gCAAgC,EAAE,IAAI,CAAC;CAChC,CAAC;AAEX;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,OAAO,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;AACxE,CAAC"}
@@ -1,13 +1,3 @@
1
- /**
2
- * Sanitizes text content by collapsing whitespace and trimming
3
- * Returns empty string for null/undefined input
4
- */
5
1
  export declare function sanitizeText(text: string | null | undefined): string;
6
- /**
7
- * Truncates text to a maximum length with ellipsis
8
- * @param text - Text to truncate
9
- * @param maxLength - Maximum length (must be > 3 to accommodate ellipsis)
10
- * @returns Truncated text with ellipsis if needed
11
- */
12
2
  export declare function truncateText(text: string, maxLength: number): string;
13
3
  //# sourceMappingURL=sanitizer.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"sanitizer.d.ts","sourceRoot":"","sources":["../../src/utils/sanitizer.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,GAAG,SAAS,GAAG,MAAM,CAIpE;AAED;;;;;GAKG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,CAQpE"}
1
+ {"version":3,"file":"sanitizer.d.ts","sourceRoot":"","sources":["../../src/utils/sanitizer.ts"],"names":[],"mappings":"AAGA,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,GAAG,SAAS,GAAG,MAAM,CAIpE;AAED,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,CAQpE"}
@@ -1,20 +1,12 @@
1
- /**
2
- * Sanitizes text content by collapsing whitespace and trimming
3
- * Returns empty string for null/undefined input
4
- */
1
+ // Pre-compiled regex patterns for hot path optimization
2
+ const WHITESPACE_REGEX = /\s+/g;
5
3
  export function sanitizeText(text) {
6
4
  if (text == null)
7
5
  return '';
8
6
  if (typeof text !== 'string')
9
7
  return String(text);
10
- return text.replace(/\s+/g, ' ').trim();
8
+ return text.replace(WHITESPACE_REGEX, ' ').trim();
11
9
  }
12
- /**
13
- * Truncates text to a maximum length with ellipsis
14
- * @param text - Text to truncate
15
- * @param maxLength - Maximum length (must be > 3 to accommodate ellipsis)
16
- * @returns Truncated text with ellipsis if needed
17
- */
18
10
  export function truncateText(text, maxLength) {
19
11
  if (maxLength < 4) {
20
12
  return text.length > 0 ? text.charAt(0) : '';
@@ -22,6 +14,6 @@ export function truncateText(text, maxLength) {
22
14
  if (text.length <= maxLength) {
23
15
  return text;
24
16
  }
25
- return text.substring(0, maxLength - 3) + '...';
17
+ return `${text.substring(0, maxLength - 3)}...`;
26
18
  }
27
19
  //# sourceMappingURL=sanitizer.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"sanitizer.js","sourceRoot":"","sources":["../../src/utils/sanitizer.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,MAAM,UAAU,YAAY,CAAC,IAA+B;IAC1D,IAAI,IAAI,IAAI,IAAI;QAAE,OAAO,EAAE,CAAC;IAC5B,IAAI,OAAO,IAAI,KAAK,QAAQ;QAAE,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC;IAClD,OAAO,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;AAC1C,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY,EAAE,SAAiB;IAC1D,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;QAClB,OAAO,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAC/C,CAAC;IACD,IAAI,IAAI,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QAC7B,OAAO,IAAI,CAAC;IACd,CAAC;IACD,OAAO,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,SAAS,GAAG,CAAC,CAAC,GAAG,KAAK,CAAC;AAClD,CAAC"}
1
+ {"version":3,"file":"sanitizer.js","sourceRoot":"","sources":["../../src/utils/sanitizer.ts"],"names":[],"mappings":"AAAA,wDAAwD;AACxD,MAAM,gBAAgB,GAAG,MAAM,CAAC;AAEhC,MAAM,UAAU,YAAY,CAAC,IAA+B;IAC1D,IAAI,IAAI,IAAI,IAAI;QAAE,OAAO,EAAE,CAAC;IAC5B,IAAI,OAAO,IAAI,KAAK,QAAQ;QAAE,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC;IAClD,OAAO,IAAI,CAAC,OAAO,CAAC,gBAAgB,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;AACpD,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,IAAY,EAAE,SAAiB;IAC1D,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;QAClB,OAAO,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAC/C,CAAC;IACD,IAAI,IAAI,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QAC7B,OAAO,IAAI,CAAC;IACd,CAAC;IACD,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,SAAS,GAAG,CAAC,CAAC,KAAK,CAAC;AAClD,CAAC"}
@@ -1,18 +1,4 @@
1
- /** MCP SDK-compatible error response (index signature required by SDK) */
2
- export type ToolErrorResponse = {
3
- [x: string]: unknown;
4
- content: {
5
- type: 'text';
6
- text: string;
7
- }[];
8
- structuredContent: {
9
- [x: string]: unknown;
10
- error: string;
11
- url: string;
12
- errorCode: string;
13
- };
14
- isError: true;
15
- };
1
+ import type { ToolErrorResponse } from '../config/types.js';
16
2
  export declare function createToolErrorResponse(message: string, url: string, code: string): ToolErrorResponse;
17
3
  export declare function handleToolError(error: unknown, url: string, fallbackMessage?: string): ToolErrorResponse;
18
4
  //# sourceMappingURL=tool-error-handler.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"tool-error-handler.d.ts","sourceRoot":"","sources":["../../src/utils/tool-error-handler.ts"],"names":[],"mappings":"AAOA,0EAA0E;AAC1E,MAAM,MAAM,iBAAiB,GAAG;IAC9B,CAAC,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC;IACrB,OAAO,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IAC1C,iBAAiB,EAAE;QACjB,CAAC,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC;QACrB,KAAK,EAAE,MAAM,CAAC;QACd,GAAG,EAAE,MAAM,CAAC;QACZ,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,OAAO,EAAE,IAAI,CAAC;CACf,CAAC;AAEF,wBAAgB,uBAAuB,CACrC,OAAO,EAAE,MAAM,EACf,GAAG,EAAE,MAAM,EACX,IAAI,EAAE,MAAM,GACX,iBAAiB,CAOnB;AAED,wBAAgB,eAAe,CAC7B,KAAK,EAAE,OAAO,EACd,GAAG,EAAE,MAAM,EACX,eAAe,SAAqB,GACnC,iBAAiB,CAqBnB"}
1
+ {"version":3,"file":"tool-error-handler.d.ts","sourceRoot":"","sources":["../../src/utils/tool-error-handler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAa5D,wBAAgB,uBAAuB,CACrC,OAAO,EAAE,MAAM,EACf,GAAG,EAAE,MAAM,EACX,IAAI,EAAE,MAAM,GACX,iBAAiB,CAOnB;AAED,wBAAgB,eAAe,CAC7B,KAAK,EAAE,OAAO,EACd,GAAG,EAAE,MAAM,EACX,eAAe,SAAqB,GACnC,iBAAiB,CA8CnB"}
@@ -1,4 +1,5 @@
1
- import { AppError, UrlValidationError, FetchError, TimeoutError, } from '../errors/index.js';
1
+ import { AbortError, AppError, FetchError, RateLimitError, TimeoutError, UrlValidationError, } from '../errors/index.js';
2
+ const isDevelopment = process.env.NODE_ENV === 'development';
2
3
  export function createToolErrorResponse(message, url, code) {
3
4
  const structuredContent = { error: message, url, errorCode: code };
4
5
  return {
@@ -9,19 +10,46 @@ export function createToolErrorResponse(message, url, code) {
9
10
  }
10
11
  export function handleToolError(error, url, fallbackMessage = 'Operation failed') {
11
12
  if (error instanceof UrlValidationError) {
12
- return createToolErrorResponse(error.message, url, 'INVALID_URL');
13
+ const message = isDevelopment
14
+ ? `${error.message}\nURL: ${error.url}\nStack: ${error.stack ?? ''}`
15
+ : error.message;
16
+ return createToolErrorResponse(message, url, 'INVALID_URL');
17
+ }
18
+ if (error instanceof AbortError) {
19
+ const message = isDevelopment
20
+ ? `Request aborted${error.reason ? `: ${error.reason}` : ''}\n${error.stack ?? ''}`
21
+ : `Request aborted${error.reason ? `: ${error.reason}` : ''}`;
22
+ return createToolErrorResponse(message, url, 'ABORTED');
13
23
  }
14
24
  if (error instanceof TimeoutError) {
15
- return createToolErrorResponse(`Request timed out after ${error.timeoutMs}ms`, url, 'TIMEOUT');
25
+ const message = isDevelopment
26
+ ? `Request timed out after ${error.timeoutMs}ms\n${error.stack ?? ''}`
27
+ : `Request timed out after ${error.timeoutMs}ms`;
28
+ return createToolErrorResponse(message, url, 'TIMEOUT');
29
+ }
30
+ if (error instanceof RateLimitError) {
31
+ const message = isDevelopment
32
+ ? `Rate limited. Retry after ${error.retryAfter}s\n${error.stack ?? ''}`
33
+ : `Rate limited. Retry after ${error.retryAfter}s`;
34
+ return createToolErrorResponse(message, url, 'RATE_LIMITED');
16
35
  }
17
36
  if (error instanceof FetchError) {
18
37
  const code = error.httpStatus ? `HTTP_${error.httpStatus}` : 'FETCH_ERROR';
19
- return createToolErrorResponse(error.message, url, code);
38
+ const message = isDevelopment
39
+ ? `${error.message}\n${error.stack ?? ''}`
40
+ : error.message;
41
+ return createToolErrorResponse(message, url, code);
20
42
  }
21
43
  if (error instanceof AppError) {
22
- return createToolErrorResponse(error.message, url, error.code);
44
+ const message = isDevelopment
45
+ ? `${error.message}\n${error.stack ?? ''}`
46
+ : error.message;
47
+ return createToolErrorResponse(message, url, error.code);
23
48
  }
24
49
  const message = error instanceof Error ? error.message : 'Unknown error';
25
- return createToolErrorResponse(`${fallbackMessage}: ${message}`, url, 'UNKNOWN_ERROR');
50
+ const fullMessage = isDevelopment && error instanceof Error
51
+ ? `${fallbackMessage}: ${message}\n${error.stack ?? ''}`
52
+ : `${fallbackMessage}: ${message}`;
53
+ return createToolErrorResponse(fullMessage, url, 'UNKNOWN_ERROR');
26
54
  }
27
55
  //# sourceMappingURL=tool-error-handler.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"tool-error-handler.js","sourceRoot":"","sources":["../../src/utils/tool-error-handler.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,EACR,kBAAkB,EAClB,UAAU,EACV,YAAY,GACb,MAAM,oBAAoB,CAAC;AAe5B,MAAM,UAAU,uBAAuB,CACrC,OAAe,EACf,GAAW,EACX,IAAY;IAEZ,MAAM,iBAAiB,GAAG,EAAE,KAAK,EAAE,OAAO,EAAE,GAAG,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;IACnE,OAAO;QACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,iBAAiB,CAAC,EAAE,CAAC;QACpE,iBAAiB;QACjB,OAAO,EAAE,IAAI;KACd,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,KAAc,EACd,GAAW,EACX,eAAe,GAAG,kBAAkB;IAEpC,IAAI,KAAK,YAAY,kBAAkB,EAAE,CAAC;QACxC,OAAO,uBAAuB,CAAC,KAAK,CAAC,OAAO,EAAE,GAAG,EAAE,aAAa,CAAC,CAAC;IACpE,CAAC;IACD,IAAI,KAAK,YAAY,YAAY,EAAE,CAAC;QAClC,OAAO,uBAAuB,CAC5B,2BAA2B,KAAK,CAAC,SAAS,IAAI,EAC9C,GAAG,EACH,SAAS,CACV,CAAC;IACJ,CAAC;IACD,IAAI,KAAK,YAAY,UAAU,EAAE,CAAC;QAChC,MAAM,IAAI,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,QAAQ,KAAK,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC;QAC3E,OAAO,uBAAuB,CAAC,KAAK,CAAC,OAAO,EAAE,GAAG,EAAE,IAAI,CAAC,CAAC;IAC3D,CAAC;IACD,IAAI,KAAK,YAAY,QAAQ,EAAE,CAAC;QAC9B,OAAO,uBAAuB,CAAC,KAAK,CAAC,OAAO,EAAE,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;IACjE,CAAC;IAED,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;IACzE,OAAO,uBAAuB,CAAC,GAAG,eAAe,KAAK,OAAO,EAAE,EAAE,GAAG,EAAE,eAAe,CAAC,CAAC;AACzF,CAAC"}
1
+ {"version":3,"file":"tool-error-handler.js","sourceRoot":"","sources":["../../src/utils/tool-error-handler.ts"],"names":[],"mappings":"AAEA,OAAO,EACL,UAAU,EACV,QAAQ,EACR,UAAU,EACV,cAAc,EACd,YAAY,EACZ,kBAAkB,GACnB,MAAM,oBAAoB,CAAC;AAE5B,MAAM,aAAa,GAAG,OAAO,CAAC,GAAG,CAAC,QAAQ,KAAK,aAAa,CAAC;AAE7D,MAAM,UAAU,uBAAuB,CACrC,OAAe,EACf,GAAW,EACX,IAAY;IAEZ,MAAM,iBAAiB,GAAG,EAAE,KAAK,EAAE,OAAO,EAAE,GAAG,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;IACnE,OAAO;QACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,iBAAiB,CAAC,EAAE,CAAC;QACpE,iBAAiB;QACjB,OAAO,EAAE,IAAI;KACd,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,KAAc,EACd,GAAW,EACX,eAAe,GAAG,kBAAkB;IAEpC,IAAI,KAAK,YAAY,kBAAkB,EAAE,CAAC;QACxC,MAAM,OAAO,GAAG,aAAa;YAC3B,CAAC,CAAC,GAAG,KAAK,CAAC,OAAO,UAAU,KAAK,CAAC,GAAG,YAAY,KAAK,CAAC,KAAK,IAAI,EAAE,EAAE;YACpE,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC;QAClB,OAAO,uBAAuB,CAAC,OAAO,EAAE,GAAG,EAAE,aAAa,CAAC,CAAC;IAC9D,CAAC;IACD,IAAI,KAAK,YAAY,UAAU,EAAE,CAAC;QAChC,MAAM,OAAO,GAAG,aAAa;YAC3B,CAAC,CAAC,kBAAkB,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,KAAK,CAAC,KAAK,IAAI,EAAE,EAAE;YACnF,CAAC,CAAC,kBAAkB,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;QAChE,OAAO,uBAAuB,CAAC,OAAO,EAAE,GAAG,EAAE,SAAS,CAAC,CAAC;IAC1D,CAAC;IACD,IAAI,KAAK,YAAY,YAAY,EAAE,CAAC;QAClC,MAAM,OAAO,GAAG,aAAa;YAC3B,CAAC,CAAC,2BAA2B,KAAK,CAAC,SAAS,OAAO,KAAK,CAAC,KAAK,IAAI,EAAE,EAAE;YACtE,CAAC,CAAC,2BAA2B,KAAK,CAAC,SAAS,IAAI,CAAC;QACnD,OAAO,uBAAuB,CAAC,OAAO,EAAE,GAAG,EAAE,SAAS,CAAC,CAAC;IAC1D,CAAC;IACD,IAAI,KAAK,YAAY,cAAc,EAAE,CAAC;QACpC,MAAM,OAAO,GAAG,aAAa;YAC3B,CAAC,CAAC,6BAA6B,KAAK,CAAC,UAAU,MAAM,KAAK,CAAC,KAAK,IAAI,EAAE,EAAE;YACxE,CAAC,CAAC,6BAA6B,KAAK,CAAC,UAAU,GAAG,CAAC;QACrD,OAAO,uBAAuB,CAAC,OAAO,EAAE,GAAG,EAAE,cAAc,CAAC,CAAC;IAC/D,CAAC;IACD,IAAI,KAAK,YAAY,UAAU,EAAE,CAAC;QAChC,MAAM,IAAI,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,QAAQ,KAAK,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC;QAC3E,MAAM,OAAO,GAAG,aAAa;YAC3B,CAAC,CAAC,GAAG,KAAK,CAAC,OAAO,KAAK,KAAK,CAAC,KAAK,IAAI,EAAE,EAAE;YAC1C,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC;QAClB,OAAO,uBAAuB,CAAC,OAAO,EAAE,GAAG,EAAE,IAAI,CAAC,CAAC;IACrD,CAAC;IACD,IAAI,KAAK,YAAY,QAAQ,EAAE,CAAC;QAC9B,MAAM,OAAO,GAAG,aAAa;YAC3B,CAAC,CAAC,GAAG,KAAK,CAAC,OAAO,KAAK,KAAK,CAAC,KAAK,IAAI,EAAE,EAAE;YAC1C,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC;QAClB,OAAO,uBAAuB,CAAC,OAAO,EAAE,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;IAC3D,CAAC;IAED,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;IACzE,MAAM,WAAW,GACf,aAAa,IAAI,KAAK,YAAY,KAAK;QACrC,CAAC,CAAC,GAAG,eAAe,KAAK,OAAO,KAAK,KAAK,CAAC,KAAK,IAAI,EAAE,EAAE;QACxD,CAAC,CAAC,GAAG,eAAe,KAAK,OAAO,EAAE,CAAC;IAEvC,OAAO,uBAAuB,CAAC,WAAW,EAAE,GAAG,EAAE,eAAe,CAAC,CAAC;AACpE,CAAC"}
@@ -1,11 +1,3 @@
1
- /**
2
- * Validates and normalizes a URL, blocking SSRF attack vectors
3
- * @throws {ValidationError} if URL is empty or too long
4
- * @throws {UrlValidationError} if URL is invalid or blocked
5
- */
6
1
  export declare function validateAndNormalizeUrl(urlString: string): string;
7
- /**
8
- * Checks if a URL is internal (same domain)
9
- */
10
2
  export declare function isInternalUrl(url: string, baseUrl: string): boolean;
11
3
  //# sourceMappingURL=url-validator.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"url-validator.d.ts","sourceRoot":"","sources":["../../src/utils/url-validator.ts"],"names":[],"mappings":"AAyCA;;;;GAIG;AACH,wBAAgB,uBAAuB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CA2EjE;AAED;;GAEG;AACH,wBAAgB,aAAa,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAQnE"}
1
+ {"version":3,"file":"url-validator.d.ts","sourceRoot":"","sources":["../../src/utils/url-validator.ts"],"names":[],"mappings":"AAmCA,wBAAgB,uBAAuB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CA2EjE;AAED,wBAAgB,aAAa,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAQnE"}
@@ -1,44 +1,33 @@
1
1
  import { UrlValidationError, ValidationError } from '../errors/app-error.js';
2
- // Maximum URL length to prevent DoS attacks
3
2
  const MAX_URL_LENGTH = 2048;
4
- // Blocked hosts to prevent SSRF attacks
5
3
  const BLOCKED_HOSTS = new Set([
6
4
  'localhost',
7
5
  '127.0.0.1',
8
6
  '0.0.0.0',
9
7
  '::1',
10
- '169.254.169.254', // AWS metadata endpoint
11
- 'metadata.google.internal', // GCP metadata
12
- 'metadata.azure.com', // Azure metadata
13
- '100.100.100.200', // Alibaba Cloud metadata
14
- 'instance-data', // Common cloud metadata hostname
8
+ '169.254.169.254',
9
+ 'metadata.google.internal',
10
+ 'metadata.azure.com',
11
+ '100.100.100.200',
12
+ 'instance-data',
15
13
  ]);
16
- // Blocked IP patterns (private networks)
17
14
  const BLOCKED_IP_PATTERNS = [
18
- /^10\./, // Private Class A
19
- /^172\.(1[6-9]|2\d|3[01])\./, // Private Class B
20
- /^192\.168\./, // Private Class C
21
- /^127\./, // Loopback
22
- /^0\./, // Current network
23
- /^169\.254\./, // Link-local
24
- /^fc00:/i, // IPv6 unique local
25
- /^fe80:/i, // IPv6 link-local
26
- /^::ffff:127\./, // IPv4-mapped IPv6 loopback
27
- /^::ffff:10\./, // IPv4-mapped IPv6 private
28
- /^::ffff:172\.(1[6-9]|2\d|3[01])\./, // IPv4-mapped IPv6 private
29
- /^::ffff:192\.168\./, // IPv4-mapped IPv6 private
15
+ /^10\./,
16
+ /^172\.(1[6-9]|2\d|3[01])\./,
17
+ /^192\.168\./,
18
+ /^127\./,
19
+ /^0\./,
20
+ /^169\.254\./,
21
+ /^fc00:/i,
22
+ /^fe80:/i,
23
+ /^::ffff:127\./,
24
+ /^::ffff:10\./,
25
+ /^::ffff:172\.(1[6-9]|2\d|3[01])\./,
26
+ /^::ffff:192\.168\./,
30
27
  ];
31
- /**
32
- * Checks if a hostname matches blocked IP patterns
33
- */
34
28
  function isBlockedIp(hostname) {
35
29
  return BLOCKED_IP_PATTERNS.some((pattern) => pattern.test(hostname));
36
30
  }
37
- /**
38
- * Validates and normalizes a URL, blocking SSRF attack vectors
39
- * @throws {ValidationError} if URL is empty or too long
40
- * @throws {UrlValidationError} if URL is invalid or blocked
41
- */
42
31
  export function validateAndNormalizeUrl(urlString) {
43
32
  // Check for empty or whitespace-only input
44
33
  if (!urlString || typeof urlString !== 'string') {
@@ -86,9 +75,6 @@ export function validateAndNormalizeUrl(urlString) {
86
75
  }
87
76
  return url.href;
88
77
  }
89
- /**
90
- * Checks if a URL is internal (same domain)
91
- */
92
78
  export function isInternalUrl(url, baseUrl) {
93
79
  try {
94
80
  const urlObj = new URL(url, baseUrl);