@j0hanz/superfetch 1.0.6 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/README.md +200 -36
  2. package/dist/config/index.d.ts +5 -5
  3. package/dist/config/index.d.ts.map +1 -1
  4. package/dist/config/index.js +38 -19
  5. package/dist/config/index.js.map +1 -1
  6. package/dist/config/types.d.ts +98 -57
  7. package/dist/config/types.d.ts.map +1 -1
  8. package/dist/errors/app-error.d.ts +4 -28
  9. package/dist/errors/app-error.d.ts.map +1 -1
  10. package/dist/errors/app-error.js +10 -51
  11. package/dist/errors/app-error.js.map +1 -1
  12. package/dist/index.js +10 -55
  13. package/dist/index.js.map +1 -1
  14. package/dist/middleware/error-handler.d.ts +2 -2
  15. package/dist/middleware/error-handler.d.ts.map +1 -1
  16. package/dist/middleware/error-handler.js +12 -14
  17. package/dist/middleware/error-handler.js.map +1 -1
  18. package/dist/middleware/rate-limiter.d.ts.map +1 -1
  19. package/dist/middleware/rate-limiter.js +0 -8
  20. package/dist/middleware/rate-limiter.js.map +1 -1
  21. package/dist/parsers/base-html-element-parser.d.ts +43 -0
  22. package/dist/parsers/base-html-element-parser.d.ts.map +1 -0
  23. package/dist/parsers/base-html-element-parser.js +59 -0
  24. package/dist/parsers/base-html-element-parser.js.map +1 -0
  25. package/dist/parsers/heading-element-parser.d.ts +14 -0
  26. package/dist/parsers/heading-element-parser.d.ts.map +1 -0
  27. package/dist/parsers/heading-element-parser.js +26 -0
  28. package/dist/parsers/heading-element-parser.js.map +1 -0
  29. package/dist/parsers/image-element-parser.d.ts +16 -0
  30. package/dist/parsers/image-element-parser.d.ts.map +1 -0
  31. package/dist/parsers/image-element-parser.js +33 -0
  32. package/dist/parsers/image-element-parser.js.map +1 -0
  33. package/dist/parsers/link-element-parser.d.ts +15 -0
  34. package/dist/parsers/link-element-parser.d.ts.map +1 -0
  35. package/dist/parsers/link-element-parser.js +28 -0
  36. package/dist/parsers/link-element-parser.js.map +1 -0
  37. package/dist/parsers/open-graph-parser.d.ts +17 -0
  38. package/dist/parsers/open-graph-parser.d.ts.map +1 -0
  39. package/dist/parsers/open-graph-parser.js +41 -0
  40. package/dist/parsers/open-graph-parser.js.map +1 -0
  41. package/dist/parsers/schema-org-parser.d.ts +17 -0
  42. package/dist/parsers/schema-org-parser.d.ts.map +1 -0
  43. package/dist/parsers/schema-org-parser.js +32 -0
  44. package/dist/parsers/schema-org-parser.js.map +1 -0
  45. package/dist/parsers/standard-meta-parser.d.ts +18 -0
  46. package/dist/parsers/standard-meta-parser.d.ts.map +1 -0
  47. package/dist/parsers/standard-meta-parser.js +32 -0
  48. package/dist/parsers/standard-meta-parser.js.map +1 -0
  49. package/dist/parsers/twitter-card-parser.d.ts +17 -0
  50. package/dist/parsers/twitter-card-parser.d.ts.map +1 -0
  51. package/dist/parsers/twitter-card-parser.js +41 -0
  52. package/dist/parsers/twitter-card-parser.js.map +1 -0
  53. package/dist/resources/cached-content.d.ts +0 -1
  54. package/dist/resources/cached-content.d.ts.map +1 -1
  55. package/dist/resources/cached-content.js +3 -9
  56. package/dist/resources/cached-content.js.map +1 -1
  57. package/dist/resources/index.d.ts.map +1 -1
  58. package/dist/resources/index.js +8 -8
  59. package/dist/resources/index.js.map +1 -1
  60. package/dist/server.d.ts.map +1 -1
  61. package/dist/server.js +10 -10
  62. package/dist/server.js.map +1 -1
  63. package/dist/services/cache.d.ts +0 -28
  64. package/dist/services/cache.d.ts.map +1 -1
  65. package/dist/services/cache.js +10 -173
  66. package/dist/services/cache.js.map +1 -1
  67. package/dist/services/extractor.d.ts +1 -11
  68. package/dist/services/extractor.d.ts.map +1 -1
  69. package/dist/services/extractor.js +86 -84
  70. package/dist/services/extractor.js.map +1 -1
  71. package/dist/services/fetcher.d.ts +2 -13
  72. package/dist/services/fetcher.d.ts.map +1 -1
  73. package/dist/services/fetcher.js +48 -79
  74. package/dist/services/fetcher.js.map +1 -1
  75. package/dist/services/logger.d.ts +5 -4
  76. package/dist/services/logger.d.ts.map +1 -1
  77. package/dist/services/logger.js +27 -42
  78. package/dist/services/logger.js.map +1 -1
  79. package/dist/services/parser.d.ts.map +1 -1
  80. package/dist/services/parser.js +35 -26
  81. package/dist/services/parser.js.map +1 -1
  82. package/dist/services/session-manager.d.ts +18 -0
  83. package/dist/services/session-manager.d.ts.map +1 -0
  84. package/dist/services/session-manager.js +73 -0
  85. package/dist/services/session-manager.js.map +1 -0
  86. package/dist/strategies/exponential-backoff-strategy.d.ts +13 -0
  87. package/dist/strategies/exponential-backoff-strategy.d.ts.map +1 -0
  88. package/dist/strategies/exponential-backoff-strategy.js +32 -0
  89. package/dist/strategies/exponential-backoff-strategy.js.map +1 -0
  90. package/dist/tools/handlers/fetch-links.tool.d.ts +2 -9
  91. package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
  92. package/dist/tools/handlers/fetch-links.tool.js +0 -1
  93. package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
  94. package/dist/tools/handlers/fetch-markdown.tool.d.ts +5 -2
  95. package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
  96. package/dist/tools/handlers/fetch-markdown.tool.js +23 -33
  97. package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
  98. package/dist/tools/handlers/fetch-url.tool.d.ts +2 -9
  99. package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
  100. package/dist/tools/handlers/fetch-url.tool.js +15 -20
  101. package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
  102. package/dist/tools/handlers/fetch-urls.tool.d.ts +2 -9
  103. package/dist/tools/handlers/fetch-urls.tool.d.ts.map +1 -1
  104. package/dist/tools/handlers/fetch-urls.tool.js +124 -105
  105. package/dist/tools/handlers/fetch-urls.tool.js.map +1 -1
  106. package/dist/tools/index.d.ts.map +1 -1
  107. package/dist/tools/index.js +0 -4
  108. package/dist/tools/index.js.map +1 -1
  109. package/dist/tools/utils/common.d.ts +6 -7
  110. package/dist/tools/utils/common.d.ts.map +1 -1
  111. package/dist/tools/utils/common.js +8 -8
  112. package/dist/tools/utils/common.js.map +1 -1
  113. package/dist/tools/utils/fetch-pipeline.d.ts +8 -0
  114. package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -1
  115. package/dist/tools/utils/fetch-pipeline.js +47 -79
  116. package/dist/tools/utils/fetch-pipeline.js.map +1 -1
  117. package/dist/transformers/jsonl.transformer.d.ts +1 -1
  118. package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
  119. package/dist/transformers/jsonl.transformer.js +15 -10
  120. package/dist/transformers/jsonl.transformer.js.map +1 -1
  121. package/dist/transformers/markdown.transformer.d.ts.map +1 -1
  122. package/dist/transformers/markdown.transformer.js +58 -62
  123. package/dist/transformers/markdown.transformer.js.map +1 -1
  124. package/dist/utils/concurrency.d.ts +2 -5
  125. package/dist/utils/concurrency.d.ts.map +1 -1
  126. package/dist/utils/concurrency.js +19 -19
  127. package/dist/utils/concurrency.js.map +1 -1
  128. package/dist/utils/content-cleaner.d.ts +0 -25
  129. package/dist/utils/content-cleaner.d.ts.map +1 -1
  130. package/dist/utils/content-cleaner.js +12 -187
  131. package/dist/utils/content-cleaner.js.map +1 -1
  132. package/dist/utils/html-truncator.d.ts +2 -0
  133. package/dist/utils/html-truncator.d.ts.map +1 -0
  134. package/dist/utils/html-truncator.js +14 -0
  135. package/dist/utils/html-truncator.js.map +1 -0
  136. package/dist/utils/language-detector.d.ts +0 -3
  137. package/dist/utils/language-detector.d.ts.map +1 -1
  138. package/dist/utils/language-detector.js +0 -11
  139. package/dist/utils/language-detector.js.map +1 -1
  140. package/dist/utils/sanitizer.d.ts.map +1 -1
  141. package/dist/utils/sanitizer.js +7 -5
  142. package/dist/utils/sanitizer.js.map +1 -1
  143. package/dist/utils/tool-error-handler.d.ts.map +1 -1
  144. package/dist/utils/tool-error-handler.js +15 -42
  145. package/dist/utils/tool-error-handler.js.map +1 -1
  146. package/dist/utils/url-validator.d.ts +0 -9
  147. package/dist/utils/url-validator.d.ts.map +1 -1
  148. package/dist/utils/url-validator.js +14 -33
  149. package/dist/utils/url-validator.js.map +1 -1
  150. package/package.json +3 -4
@@ -1,208 +1,51 @@
1
- /**
2
- * Post-processing content cleaner for removing noise artifacts
3
- * that slip through Readability extraction.
4
- */
5
- // Pre-compiled combined pattern for optimal performance
6
- const NOISE_PATTERN_COMBINED = new RegExp([
7
- // Relative timestamps
8
- '^\\d+\\s*(seconds?|minutes?|hours?|days?|weeks?|months?|years?)\\s*ago$',
9
- '^(just now|recently|today|yesterday|last week|last month)$',
10
- '^(updated|modified|edited|created|published)\\s*:?\\s*\\d+\\s*(seconds?|minutes?|hours?|days?|weeks?|months?|years?)\\s*ago$',
11
- '^(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\\s+\\d{1,2},?\\s+\\d{4}$',
12
- '^\\d{1,2}\\s+(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\\s+\\d{4}$',
13
- '^\\d{4}-\\d{2}-\\d{2}$',
14
- '^last\\s+updated\\s*:?',
15
- // Share/action buttons
16
- '^(share|copy|like|follow|subscribe|download|print|save|bookmark|tweet|pin it|email|export)$',
1
+ const NOISE_PATTERN = new RegExp([
2
+ '^(share|copy|like|follow|subscribe|download|print|save)$',
17
3
  '^(copy to clipboard|copied!?|copy code|copy link)$',
18
- '^(share on|share to|share via)\\s+(twitter|facebook|linkedin|reddit|x|email)$',
19
- // UI artifacts
20
- '^(click to copy|expand|collapse|show more|show less|load more|view more|read more|see more|see all|view all)$',
21
- '^(toggle|switch|enable|disable|on|off)$',
22
- '^(edit|delete|remove|add|new|create|update|cancel|confirm|submit|reset|clear)$',
23
- '^(open in|view in|edit in)\\s+\\w+$',
24
- '^(try it|run|execute|play|preview|demo|live demo|playground)$',
25
- '^(source|view source|edit this page|edit on github|improve this doc)$',
26
- // Empty/placeholder
4
+ '^(show more|show less|load more|view more|read more|see all|view all)$',
5
+ '^(next|previous|prev|back|forward|home|menu|close|skip to)$',
6
+ '^(table of contents|toc|on this page)$',
27
7
  '^(loading\\.{0,3}|please wait\\.{0,3}|\\.{2,})$',
28
- '^(n\\/a|tbd|todo|coming soon|placeholder|untitled)$',
29
- // Navigation
30
- '^(next|previous|prev|back|forward|home|menu|close|open|skip to|jump to|go to)$',
31
- '^(table of contents|toc|contents|on this page|in this article|in this section)$',
32
- '^(scroll to top|back to top|top)$',
33
- // Cookie/consent
34
- '^(accept|reject|accept all|reject all|cookie settings|privacy settings|manage preferences)$',
35
- '^(accept cookies|decline cookies|cookie policy|privacy policy|terms of service|terms & conditions)$',
36
- // Counts
37
- '^\\d+\\s*(comments?|replies?|reactions?|responses?)$',
38
- '^\\d+\\s*(likes?|shares?|views?|followers?|retweets?|stars?|forks?|claps?|upvotes?|downvotes?)$',
39
- '^(liked by|shared by|followed by)\\s+\\d+',
40
- // Version badges
41
- '^v?\\d+\\.\\d+(\\.\\d+)?(-\\w+)?$',
42
- '^(stable|beta|alpha|rc|preview|experimental|deprecated|legacy|new|updated)$',
43
- // Structural
44
- '^(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)$',
45
- '^panel\\s*[a-z]?$',
46
- // API artifacts
47
- '^(required|optional|default|type|example|description|parameters?|returns?|response|request)$',
48
- '^(get|post|put|patch|delete|head|options)\\s*$',
49
- // Interactive
50
- '^(drag|drop|resize|zoom|scroll|swipe|tap|click|hover|focus)(\\s+to\\s+\\w+)?$',
51
- '^(drag the|move the|resize the|drag to|click to)\\s+\\w+',
52
- // Breadcrumbs
8
+ '^(n\\/a|tbd|todo|coming soon|placeholder)$',
9
+ '^\\d+\\s*(comments?|replies?|likes?|shares?|views?)$',
53
10
  '^[/\\\\>→»›]+$',
54
- // Ads
55
- '^(ad|advertisement|sponsored|promoted|partner content)$',
56
11
  ].join('|'), 'i');
57
- // Pre-compiled pattern for short text noise
58
- const SHORT_TEXT_NOISE_PATTERN = new RegExp([
59
- '^#\\w+$',
60
- '^@\\w+$',
61
- '^\\d+$',
62
- '^[•·→←↑↓►▼▲◄▶◀■□●○★☆✓✗✔✘×]+$',
63
- '^[,;:\\-–—]+$',
64
- '^\\[\\d+\\]$',
65
- '^\\(\\d+\\)$',
66
- '^fig\\.?\\s*\\d+$',
67
- '^table\\s*\\d+$',
68
- '^step\\s*\\d+$',
69
- '^note:?$',
70
- '^tip:?$',
71
- '^warning:?$',
72
- '^info:?$',
73
- '^caution:?$',
74
- ].join('|'), 'i');
75
- // Pre-compiled pattern for UI chrome detection
76
- const UI_CHROME_PATTERN = new RegExp([
77
- '^(sign in|sign up|log in|log out|register|create account)$',
78
- '^(search|search\\.\\.\\.|search docs|search documentation)$',
79
- '^(dark mode|light mode|theme|language|locale)$',
80
- '^(feedback|report issue|report a bug|file an issue|suggest edit)$',
81
- '^(documentation|docs|api|reference|guide|tutorial|examples?)$',
82
- "^(version|changelog|release notes|what's new)$",
83
- ].join('|'), 'i');
84
- // Minimum lengths for different content types
85
12
  const MIN_PARAGRAPH_LENGTH = 20;
86
13
  const MIN_HEADING_LENGTH = 2;
87
14
  const MIN_LIST_ITEM_LENGTH = 3;
88
- const SHORT_TEXT_THRESHOLD = 25;
89
- // Maximum text length to test against regex patterns (ReDoS protection)
90
15
  const MAX_REGEX_INPUT_LENGTH = 500;
91
- /**
92
- * Check if text matches any noise pattern
93
- * Protected against ReDoS by limiting input length
94
- */
95
16
  function isNoiseText(text) {
96
17
  const trimmed = text.trim();
97
- // Empty or whitespace-only
98
- if (!trimmed) {
18
+ if (!trimmed)
99
19
  return true;
100
- }
101
- // ReDoS protection: skip regex for very long strings
102
- if (trimmed.length > MAX_REGEX_INPUT_LENGTH) {
20
+ if (trimmed.length > MAX_REGEX_INPUT_LENGTH)
103
21
  return false;
104
- }
105
- // Check combined noise pattern (single regex test)
106
- if (NOISE_PATTERN_COMBINED.test(trimmed)) {
107
- return true;
108
- }
109
- // Check short text patterns for brief content
110
- if (trimmed.length < SHORT_TEXT_THRESHOLD) {
111
- if (SHORT_TEXT_NOISE_PATTERN.test(trimmed)) {
112
- return true;
113
- }
114
- // Also check UI chrome patterns for short text
115
- if (UI_CHROME_PATTERN.test(trimmed)) {
116
- return true;
117
- }
118
- }
119
- return false;
120
- }
121
- // Pre-compiled placeholder pattern (combined for performance)
122
- const PLACEHOLDER_PATTERN = /^(lorem ipsum|sample text|placeholder|example (text|content|data)|test (text|content|data)|your (text|content|name|email) here|enter (your|a) |type (your|a|something) )/i;
123
- const PLACEHOLDER_CACHE = new Map();
124
- const PLACEHOLDER_CACHE_MAX_SIZE = 1000;
125
- const PLACEHOLDER_CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
126
- /**
127
- * Check if text looks like placeholder/demo content
128
- * Uses caching with TTL for performance and memory safety
129
- */
130
- function isPlaceholderContent(text) {
131
- const trimmed = text.trim().toLowerCase();
132
- const now = Date.now();
133
- // Check cache first
134
- const cached = PLACEHOLDER_CACHE.get(trimmed);
135
- if (cached !== undefined) {
136
- // Check if entry is still valid
137
- if (now - cached.timestamp < PLACEHOLDER_CACHE_TTL_MS) {
138
- return cached.value;
139
- }
140
- // Expired entry, remove it
141
- PLACEHOLDER_CACHE.delete(trimmed);
142
- }
143
- // Single regex test (faster than array iteration)
144
- const result = PLACEHOLDER_PATTERN.test(trimmed);
145
- // Cache result with LRU eviction and timestamp
146
- if (PLACEHOLDER_CACHE.size >= PLACEHOLDER_CACHE_MAX_SIZE) {
147
- // Remove oldest entries (first 10% of cache)
148
- const keysToDelete = Math.ceil(PLACEHOLDER_CACHE_MAX_SIZE * 0.1);
149
- const iterator = PLACEHOLDER_CACHE.keys();
150
- for (let i = 0; i < keysToDelete; i++) {
151
- const key = iterator.next().value;
152
- if (key !== undefined) {
153
- PLACEHOLDER_CACHE.delete(key);
154
- }
155
- }
156
- }
157
- PLACEHOLDER_CACHE.set(trimmed, { value: result, timestamp: now });
158
- return result;
22
+ return NOISE_PATTERN.test(trimmed);
159
23
  }
160
- /**
161
- * Clean paragraph text by removing noise
162
- */
163
24
  export function cleanParagraph(text) {
164
25
  const trimmed = text.trim();
165
- // Too short to be meaningful
166
26
  if (trimmed.length < MIN_PARAGRAPH_LENGTH) {
167
- // Allow very short paragraphs if they end with punctuation (likely real content)
168
27
  if (!/[.!?]$/.test(trimmed)) {
169
28
  return null;
170
29
  }
171
30
  }
172
- // Is noise content
173
31
  if (isNoiseText(trimmed)) {
174
32
  return null;
175
33
  }
176
- // Is placeholder content (in paragraphs, not in examples)
177
- if (isPlaceholderContent(trimmed)) {
178
- return null;
179
- }
180
34
  return trimmed;
181
35
  }
182
- /**
183
- * Clean heading text by removing noise and markdown link syntax
184
- */
185
36
  export function cleanHeading(text) {
186
37
  let cleaned = text.trim();
187
- // Too short
188
38
  if (cleaned.length < MIN_HEADING_LENGTH) {
189
39
  return null;
190
40
  }
191
- // Remove markdown link syntax: [Text](#anchor) -> Text
192
41
  cleaned = cleaned.replace(/\[([^\]]+)\]\([^)]*\)/g, '$1');
193
- // Remove trailing anchor links like "Link for this heading"
194
42
  cleaned = cleaned.replace(/\s*Link for (this heading|[\w\s]+)\s*$/i, '');
195
- // Remove trailing hash symbols often used for anchor links
196
43
  cleaned = cleaned.replace(/\s*#+\s*$/, '');
197
- // Is noise content
198
44
  if (isNoiseText(cleaned)) {
199
45
  return null;
200
46
  }
201
47
  return cleaned.trim();
202
48
  }
203
- /**
204
- * Clean list items by filtering out noise
205
- */
206
49
  export function cleanListItems(items) {
207
50
  return items
208
51
  .map((item) => item.trim())
@@ -214,43 +57,25 @@ export function cleanListItems(items) {
214
57
  return true;
215
58
  });
216
59
  }
217
- /**
218
- * Clean code block text - minimal cleaning to preserve code integrity
219
- */
220
60
  export function cleanCodeBlock(code) {
221
61
  const trimmed = code.trim();
222
- // Empty code block
223
62
  if (trimmed.length === 0) {
224
63
  return null;
225
64
  }
226
- // Very short code blocks that are likely just labels
227
65
  if (trimmed.length < 3 && !/^[{}[\]();<>]$/.test(trimmed)) {
228
66
  return null;
229
67
  }
230
68
  return trimmed;
231
69
  }
232
- /**
233
- * Strip markdown link syntax from text for cleaner slugs/display
234
- * [Text](#anchor) -> Text
235
- * [Text](url) -> Text
236
- */
237
70
  export function stripMarkdownLinks(text) {
238
71
  return text.replace(/\[([^\]]+)\]\([^)]*\)/g, '$1');
239
72
  }
240
- /**
241
- * Remove common timestamp patterns from text (inline removal)
242
- * Use when you want to strip timestamps from within longer content
243
- */
244
73
  export function removeInlineTimestamps(text) {
245
- return (text
246
- // Remove "X days/hours/etc ago" patterns
74
+ return text
247
75
  .replace(/\b\d+\s*(seconds?|minutes?|hours?|days?|weeks?|months?|years?)\s*ago\b/gi, '')
248
- // Remove "Updated: date" patterns
249
76
  .replace(/\b(updated|modified|edited|created|published)\s*:?\s*\d+\s*(seconds?|minutes?|hours?|days?|weeks?|months?|years?)\s*ago\b/gi, '')
250
- // Remove standalone dates
251
77
  .replace(/\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\s+\d{1,2},?\s+\d{4}\b/gi, '')
252
- // Clean up extra whitespace
253
78
  .replace(/\s{2,}/g, ' ')
254
- .trim());
79
+ .trim();
255
80
  }
256
81
  //# sourceMappingURL=content-cleaner.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"content-cleaner.js","sourceRoot":"","sources":["../../src/utils/content-cleaner.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,wDAAwD;AACxD,MAAM,sBAAsB,GAAG,IAAI,MAAM,CACvC;IACE,sBAAsB;IACtB,yEAAyE;IACzE,4DAA4D;IAC5D,8HAA8H;IAC9H,6EAA6E;IAC7E,2EAA2E;IAC3E,wBAAwB;IACxB,wBAAwB;IACxB,uBAAuB;IACvB,6FAA6F;IAC7F,oDAAoD;IACpD,+EAA+E;IAC/E,eAAe;IACf,+GAA+G;IAC/G,yCAAyC;IACzC,gFAAgF;IAChF,qCAAqC;IACrC,+DAA+D;IAC/D,uEAAuE;IACvE,oBAAoB;IACpB,iDAAiD;IACjD,qDAAqD;IACrD,aAAa;IACb,gFAAgF;IAChF,iFAAiF;IACjF,mCAAmC;IACnC,iBAAiB;IACjB,6FAA6F;IAC7F,qGAAqG;IACrG,SAAS;IACT,sDAAsD;IACtD,iGAAiG;IACjG,2CAA2C;IAC3C,iBAAiB;IACjB,mCAAmC;IACnC,6EAA6E;IAC7E,aAAa;IACb,yDAAyD;IACzD,mBAAmB;IACnB,gBAAgB;IAChB,8FAA8F;IAC9F,gDAAgD;IAChD,cAAc;IACd,+EAA+E;IAC/E,0DAA0D;IAC1D,cAAc;IACd,gBAAgB;IAChB,MAAM;IACN,yDAAyD;CAC1D,CAAC,IAAI,CAAC,GAAG,CAAC,EACX,GAAG,CACJ,CAAC;AAEF,4CAA4C;AAC5C,MAAM,wBAAwB,GAAG,IAAI,MAAM,CACzC;IACE,SAAS;IACT,SAAS;IACT,QAAQ;IACR,8BAA8B;IAC9B,eAAe;IACf,cAAc;IACd,cAAc;IACd,mBAAmB;IACnB,iBAAiB;IACjB,gBAAgB;IAChB,UAAU;IACV,SAAS;IACT,aAAa;IACb,UAAU;IACV,aAAa;CACd,CAAC,IAAI,CAAC,GAAG,CAAC,EACX,GAAG,CACJ,CAAC;AAEF,+CAA+C;AAC/C,MAAM,iBAAiB,GAAG,IAAI,MAAM,CAClC;IACE,4DAA4D;IAC5D,6DAA6D;IAC7D,gDAAgD;IAChD,mEAAmE;IACnE,+DAA+D;IAC/D,gDAAgD;CACjD,CAAC,IAAI,CAAC,GAAG,CAAC,EACX,GAAG,CACJ,CAAC;AAEF,8CAA8C;AAC9C,MAAM,oBAAoB,GAAG,EAAE,CAAC;AAChC,MAAM,kBAAkB,GAAG,CAAC,CAAC;AAC7B,MAAM,oBAAoB,GAAG,CAAC,CAAC;AAC/B,MAAM,oBAAoB,GAAG,EAAE,CAAC;AAEhC,wEAAwE;AACxE,MAAM,sBAAsB,GAAG,GAAG,CAAC;AAEnC;;;GAGG;AACH,SAAS,WAAW,CAAC,IAAY;IAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE5B,2BAA2B;IAC3B,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO,IAAI,CAAC;IACd,CAAC;IAED,qDAAqD;IACrD,IAAI,OAAO,CAAC,MAAM,GAAG,sBAAsB,EAAE,CAAC;QAC5C,OAAO,KAAK,CAAC;IACf,CAAC;IAED,mDAAmD;IACnD,IAAI,sBAAsB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QACzC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,8CAA8C;IAC9C,IAAI,OAAO,CAAC,MAAM,GAAG,oBAAoB,EAAE,CAAC;QAC1C,IAAI,wBAAwB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAC3C,OAAO,IAAI,CAAC;QACd,CAAC;QAED,+CAA+C;QAC/C,IAAI,iBAAiB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YACpC,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,8DAA8D;AAC9D,MAAM,mBAAmB,GACvB,2KAA2K,CAAC;AAO9K,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAAsB,CAAC;AACxD,MAAM,0BAA0B,GAAG,IAAI,CAAC;AACxC,MAAM,wBAAwB,GAAG,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,CAAC,YAAY;AAE5D;;;GAGG;AACH,SAAS,oBAAoB,CAAC,IAAY;IACxC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAC1C,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAEvB,oBAAoB;IACpB,MAAM,MAAM,GAAG,iBAAiB,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAC9C,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;QACzB,gCAAgC;QAChC,IAAI,GAAG,GAAG,MAAM,CAAC,SAAS,GAAG,wBAAwB,EAAE,CAAC;YACtD,OAAO,MAAM,CAAC,KAAK,CAAC;QACtB,CAAC;QACD,2BAA2B;QAC3B,iBAAiB,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IACpC,CAAC;IAED,kDAAkD;IAClD,MAAM,MAAM,GAAG,mBAAmB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAEjD,+CAA+C;IAC/C,IAAI,iBAAiB,CAAC,IAAI,IAAI,0BAA0B,EAAE,CAAC;QACzD,6CAA6C;QAC7C,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,0BAA0B,GAAG,GAAG,CAAC,CAAC;QACjE,MAAM,QAAQ,GAAG,iBAAiB,CAAC,IAAI,EAAE,CAAC;QAC1C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,GAAG,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC;YAClC,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;gBACtB,iBAAiB,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAChC,CAAC;QACH,CAAC;IACH,CAAC;IACD,iBAAiB,CAAC,GAAG,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC;IAElE,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE5B,6BAA6B;IAC7B,IAAI,OAAO,CAAC,MAAM,GAAG,oBAAoB,EAAE,CAAC;QAC1C,iFAAiF;QACjF,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAC5B,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,mBAAmB;IACnB,IAAI,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,0DAA0D;IAC1D,IAAI,oBAAoB,CAAC,OAAO,CAAC,EAAE,CAAC;QAClC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,IAAI,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE1B,YAAY;IACZ,IAAI,OAAO,CAAC,MAAM,GAAG,kBAAkB,EAAE,CAAC;QACxC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,uDAAuD;IACvD,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,wBAAwB,EAAE,IAAI,CAAC,CAAC;IAE1D,4DAA4D;IAC5D,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,yCAAyC,EAAE,EAAE,CAAC,CAAC;IAEzE,2DAA2D;IAC3D,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;IAE3C,mBAAmB;IACnB,IAAI,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,OAAO,CAAC,IAAI,EAAE,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,KAAe;IAC5C,OAAO,KAAK;SACT,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;SAC1B,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;QACf,IAAI,IAAI,CAAC,MAAM,GAAG,oBAAoB;YAAE,OAAO,KAAK,CAAC;QACrD,IAAI,WAAW,CAAC,IAAI,CAAC;YAAE,OAAO,KAAK,CAAC;QACpC,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;AACP,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE5B,mBAAmB;IACnB,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,qDAAqD;IACrD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAC1D,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,kBAAkB,CAAC,IAAY;IAC7C,OAAO,IAAI,CAAC,OAAO,CAAC,wBAAwB,EAAE,IAAI,CAAC,CAAC;AACtD,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,sBAAsB,CAAC,IAAY;IACjD,OAAO,CACL,IAAI;QACF,yCAAyC;SACxC,OAAO,CACN,0EAA0E,EAC1E,EAAE,CACH;QACD,kCAAkC;SACjC,OAAO,CACN,6HAA6H,EAC7H,EAAE,CACH;QACD,0BAA0B;SACzB,OAAO,CACN,6EAA6E,EAC7E,EAAE,CACH;QACD,4BAA4B;SAC3B,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,IAAI,EAAE,CACV,CAAC;AACJ,CAAC"}
1
+ {"version":3,"file":"content-cleaner.js","sourceRoot":"","sources":["../../src/utils/content-cleaner.ts"],"names":[],"mappings":"AAAA,MAAM,aAAa,GAAG,IAAI,MAAM,CAC9B;IACE,0DAA0D;IAC1D,oDAAoD;IACpD,wEAAwE;IACxE,6DAA6D;IAC7D,wCAAwC;IACxC,iDAAiD;IACjD,4CAA4C;IAC5C,sDAAsD;IACtD,gBAAgB;CACjB,CAAC,IAAI,CAAC,GAAG,CAAC,EACX,GAAG,CACJ,CAAC;AAEF,MAAM,oBAAoB,GAAG,EAAE,CAAC;AAChC,MAAM,kBAAkB,GAAG,CAAC,CAAC;AAC7B,MAAM,oBAAoB,GAAG,CAAC,CAAC;AAC/B,MAAM,sBAAsB,GAAG,GAAG,CAAC;AAEnC,SAAS,WAAW,CAAC,IAAY;IAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAC1B,IAAI,OAAO,CAAC,MAAM,GAAG,sBAAsB;QAAE,OAAO,KAAK,CAAC;IAC1D,OAAO,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;AACrC,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE5B,IAAI,OAAO,CAAC,MAAM,GAAG,oBAAoB,EAAE,CAAC;QAC1C,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAC5B,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,IAAI,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,IAAI,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE1B,IAAI,OAAO,CAAC,MAAM,GAAG,kBAAkB,EAAE,CAAC;QACxC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,wBAAwB,EAAE,IAAI,CAAC,CAAC;IAC1D,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,yCAAyC,EAAE,EAAE,CAAC,CAAC;IACzE,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;IAE3C,IAAI,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,OAAO,CAAC,IAAI,EAAE,CAAC;AACxB,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,KAAe;IAC5C,OAAO,KAAK;SACT,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;SAC1B,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;QACf,IAAI,IAAI,CAAC,MAAM,GAAG,oBAAoB;YAAE,OAAO,KAAK,CAAC;QACrD,IAAI,WAAW,CAAC,IAAI,CAAC;YAAE,OAAO,KAAK,CAAC;QACpC,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;AACP,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE5B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAC1D,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,IAAY;IAC7C,OAAO,IAAI,CAAC,OAAO,CAAC,wBAAwB,EAAE,IAAI,CAAC,CAAC;AACtD,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,IAAY;IACjD,OAAO,IAAI;SACR,OAAO,CACN,0EAA0E,EAC1E,EAAE,CACH;SACA,OAAO,CACN,6HAA6H,EAC7H,EAAE,CACH;SACA,OAAO,CACN,6EAA6E,EAC7E,EAAE,CACH;SACA,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,IAAI,EAAE,CAAC;AACZ,CAAC"}
@@ -0,0 +1,2 @@
1
+ export declare function truncateHtml(html: string): string;
2
+ //# sourceMappingURL=html-truncator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"html-truncator.d.ts","sourceRoot":"","sources":["../../src/utils/html-truncator.ts"],"names":[],"mappings":"AAIA,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAajD"}
@@ -0,0 +1,14 @@
1
+ import { config } from '../config/index.js';
2
+ import { logWarn } from '../services/logger.js';
3
+ export function truncateHtml(html) {
4
+ const maxSize = config.constants.maxHtmlSize;
5
+ if (html.length <= maxSize) {
6
+ return html;
7
+ }
8
+ logWarn('HTML content exceeds maximum size, truncating', {
9
+ size: html.length,
10
+ maxSize,
11
+ });
12
+ return html.substring(0, maxSize);
13
+ }
14
+ //# sourceMappingURL=html-truncator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"html-truncator.js","sourceRoot":"","sources":["../../src/utils/html-truncator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAE5C,OAAO,EAAE,OAAO,EAAE,MAAM,uBAAuB,CAAC;AAEhD,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,MAAM,OAAO,GAAG,MAAM,CAAC,SAAS,CAAC,WAAW,CAAC;IAE7C,IAAI,IAAI,CAAC,MAAM,IAAI,OAAO,EAAE,CAAC;QAC3B,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,CAAC,+CAA+C,EAAE;QACvD,IAAI,EAAE,IAAI,CAAC,MAAM;QACjB,OAAO;KACR,CAAC,CAAC;IAEH,OAAO,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;AACpC,CAAC"}
@@ -1,5 +1,2 @@
1
- /**
2
- * Detect programming language from code content
3
- */
4
1
  export declare function detectLanguage(code: string): string | undefined;
5
2
  //# sourceMappingURL=language-detector.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"language-detector.d.ts","sourceRoot":"","sources":["../../src/utils/language-detector.ts"],"names":[],"mappings":"AA4CA;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAE/D"}
1
+ {"version":3,"file":"language-detector.d.ts","sourceRoot":"","sources":["../../src/utils/language-detector.ts"],"names":[],"mappings":"AAoCA,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAE/D"}
@@ -1,21 +1,13 @@
1
- /**
2
- * Language detection patterns for code blocks
3
- * Shared between parser and markdown transformer
4
- */
5
1
  const LANGUAGE_PATTERNS = [
6
- // JSX/TSX patterns
7
2
  [
8
3
  /^\s*import\s+.*\s+from\s+['"]react['"]|<[A-Z][a-zA-Z]*[\s/>]|jsx\s*:|className=/m,
9
4
  'jsx',
10
5
  ],
11
- // TypeScript patterns
12
6
  [
13
7
  /:\s*(string|number|boolean|void|any|unknown|never)\b|interface\s+\w+|type\s+\w+\s*=/m,
14
8
  'typescript',
15
9
  ],
16
- // Rust patterns
17
10
  [/^\s*(fn|let\s+mut|impl|struct|enum|use\s+\w+::)/m, 'rust'],
18
- // JavaScript patterns (generic)
19
11
  [
20
12
  /^\s*(export|const|let|var|function|class|async|await)\b|^\s*import\s+.*['"]/m,
21
13
  'javascript',
@@ -41,9 +33,6 @@ const LANGUAGE_PATTERNS = [
41
33
  // Go patterns
42
34
  [/^\s*(func|package|import\s+")/m, 'go'],
43
35
  ];
44
- /**
45
- * Detect programming language from code content
46
- */
47
36
  export function detectLanguage(code) {
48
37
  return LANGUAGE_PATTERNS.find(([pattern]) => pattern.test(code))?.[1];
49
38
  }
@@ -1 +1 @@
1
- {"version":3,"file":"language-detector.js","sourceRoot":"","sources":["../../src/utils/language-detector.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,MAAM,iBAAiB,GAAG;IACxB,mBAAmB;IACnB;QACE,kFAAkF;QAClF,KAAK;KACN;IACD,sBAAsB;IACtB;QACE,sFAAsF;QACtF,YAAY;KACb;IACD,gBAAgB;IAChB,CAAC,kDAAkD,EAAE,MAAM,CAAC;IAC5D,gCAAgC;IAChC;QACE,8EAA8E;QAC9E,YAAY;KACb;IACD,kBAAkB;IAClB,CAAC,kDAAkD,EAAE,QAAQ,CAAC;IAC9D,sBAAsB;IACtB;QACE,iFAAiF;QACjF,MAAM;KACP;IACD,CAAC,iEAAiE,EAAE,MAAM,CAAC;IAC3E,eAAe;IACf,CAAC,yDAAyD,EAAE,KAAK,CAAC;IAClE,gBAAgB;IAChB,CAAC,8DAA8D,EAAE,MAAM,CAAC;IACxE,gBAAgB;IAChB,CAAC,6CAA6C,EAAE,MAAM,CAAC;IACvD,gBAAgB;IAChB,CAAC,oBAAoB,EAAE,MAAM,CAAC;IAC9B,eAAe;IACf,CAAC,0DAA0D,EAAE,KAAK,CAAC;IACnE,cAAc;IACd,CAAC,gCAAgC,EAAE,IAAI,CAAC;CAChC,CAAC;AAEX;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,OAAO,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;AACxE,CAAC"}
1
+ {"version":3,"file":"language-detector.js","sourceRoot":"","sources":["../../src/utils/language-detector.ts"],"names":[],"mappings":"AAAA,MAAM,iBAAiB,GAAG;IACxB;QACE,kFAAkF;QAClF,KAAK;KACN;IACD;QACE,sFAAsF;QACtF,YAAY;KACb;IACD,CAAC,kDAAkD,EAAE,MAAM,CAAC;IAC5D;QACE,8EAA8E;QAC9E,YAAY;KACb;IACD,kBAAkB;IAClB,CAAC,kDAAkD,EAAE,QAAQ,CAAC;IAC9D,sBAAsB;IACtB;QACE,iFAAiF;QACjF,MAAM;KACP;IACD,CAAC,iEAAiE,EAAE,MAAM,CAAC;IAC3E,eAAe;IACf,CAAC,yDAAyD,EAAE,KAAK,CAAC;IAClE,gBAAgB;IAChB,CAAC,8DAA8D,EAAE,MAAM,CAAC;IACxE,gBAAgB;IAChB,CAAC,6CAA6C,EAAE,MAAM,CAAC;IACvD,gBAAgB;IAChB,CAAC,oBAAoB,EAAE,MAAM,CAAC;IAC9B,eAAe;IACf,CAAC,0DAA0D,EAAE,KAAK,CAAC;IACnE,cAAc;IACd,CAAC,gCAAgC,EAAE,IAAI,CAAC;CAChC,CAAC;AAEX,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,OAAO,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;AACxE,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"sanitizer.d.ts","sourceRoot":"","sources":["../../src/utils/sanitizer.ts"],"names":[],"mappings":"AAGA,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,GAAG,SAAS,GAAG,MAAM,CAIpE;AAED,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,CAQpE"}
1
+ {"version":3,"file":"sanitizer.d.ts","sourceRoot":"","sources":["../../src/utils/sanitizer.ts"],"names":[],"mappings":"AAIA,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,GAAG,SAAS,GAAG,MAAM,CAKpE;AAED,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,CAWpE"}
@@ -1,19 +1,21 @@
1
- // Pre-compiled regex patterns for hot path optimization
2
- const WHITESPACE_REGEX = /\s+/g;
1
+ const CONSECUTIVE_WHITESPACE = /\s+/g;
2
+ const MIN_TRUNCATION_LENGTH = 4;
3
+ const TRUNCATION_SUFFIX = '...';
3
4
  export function sanitizeText(text) {
4
5
  if (text == null)
5
6
  return '';
6
7
  if (typeof text !== 'string')
7
8
  return String(text);
8
- return text.replace(WHITESPACE_REGEX, ' ').trim();
9
+ return text.replace(CONSECUTIVE_WHITESPACE, ' ').trim();
9
10
  }
10
11
  export function truncateText(text, maxLength) {
11
- if (maxLength < 4) {
12
+ if (maxLength < MIN_TRUNCATION_LENGTH) {
12
13
  return text.length > 0 ? text.charAt(0) : '';
13
14
  }
14
15
  if (text.length <= maxLength) {
15
16
  return text;
16
17
  }
17
- return `${text.substring(0, maxLength - 3)}...`;
18
+ const truncationPoint = maxLength - TRUNCATION_SUFFIX.length;
19
+ return `${text.substring(0, truncationPoint)}${TRUNCATION_SUFFIX}`;
18
20
  }
19
21
  //# sourceMappingURL=sanitizer.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"sanitizer.js","sourceRoot":"","sources":["../../src/utils/sanitizer.ts"],"names":[],"mappings":"AAAA,wDAAwD;AACxD,MAAM,gBAAgB,GAAG,MAAM,CAAC;AAEhC,MAAM,UAAU,YAAY,CAAC,IAA+B;IAC1D,IAAI,IAAI,IAAI,IAAI;QAAE,OAAO,EAAE,CAAC;IAC5B,IAAI,OAAO,IAAI,KAAK,QAAQ;QAAE,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC;IAClD,OAAO,IAAI,CAAC,OAAO,CAAC,gBAAgB,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;AACpD,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,IAAY,EAAE,SAAiB;IAC1D,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;QAClB,OAAO,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAC/C,CAAC;IACD,IAAI,IAAI,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QAC7B,OAAO,IAAI,CAAC;IACd,CAAC;IACD,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,SAAS,GAAG,CAAC,CAAC,KAAK,CAAC;AAClD,CAAC"}
1
+ {"version":3,"file":"sanitizer.js","sourceRoot":"","sources":["../../src/utils/sanitizer.ts"],"names":[],"mappings":"AAAA,MAAM,sBAAsB,GAAG,MAAM,CAAC;AACtC,MAAM,qBAAqB,GAAG,CAAC,CAAC;AAChC,MAAM,iBAAiB,GAAG,KAAK,CAAC;AAEhC,MAAM,UAAU,YAAY,CAAC,IAA+B;IAC1D,IAAI,IAAI,IAAI,IAAI;QAAE,OAAO,EAAE,CAAC;IAC5B,IAAI,OAAO,IAAI,KAAK,QAAQ;QAAE,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC;IAElD,OAAO,IAAI,CAAC,OAAO,CAAC,sBAAsB,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;AAC1D,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,IAAY,EAAE,SAAiB;IAC1D,IAAI,SAAS,GAAG,qBAAqB,EAAE,CAAC;QACtC,OAAO,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAC/C,CAAC;IAED,IAAI,IAAI,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QAC7B,OAAO,IAAI,CAAC;IACd,CAAC;IAED,MAAM,eAAe,GAAG,SAAS,GAAG,iBAAiB,CAAC,MAAM,CAAC;IAC7D,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,eAAe,CAAC,GAAG,iBAAiB,EAAE,CAAC;AACrE,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"tool-error-handler.d.ts","sourceRoot":"","sources":["../../src/utils/tool-error-handler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAgB5D,wBAAgB,uBAAuB,CACrC,OAAO,EAAE,MAAM,EACf,GAAG,EAAE,MAAM,EACX,IAAI,EAAE,MAAM,GACX,iBAAiB,CAOnB;AAED,wBAAgB,eAAe,CAC7B,KAAK,EAAE,OAAO,EACd,GAAG,EAAE,MAAM,EACX,eAAe,SAAqB,GACnC,iBAAiB,CA8CnB"}
1
+ {"version":3,"file":"tool-error-handler.d.ts","sourceRoot":"","sources":["../../src/utils/tool-error-handler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAQ5D,wBAAgB,uBAAuB,CACrC,OAAO,EAAE,MAAM,EACf,GAAG,EAAE,MAAM,EACX,IAAI,EAAE,MAAM,GACX,iBAAiB,CAQnB;AAgBD,wBAAgB,eAAe,CAC7B,KAAK,EAAE,OAAO,EACd,GAAG,EAAE,MAAM,EACX,eAAe,SAAqB,GACnC,iBAAiB,CAgBnB"}
@@ -1,6 +1,5 @@
1
- import { AbortError, AppError, FetchError, RateLimitError, TimeoutError, UrlValidationError, } from '../errors/app-error.js';
2
- // Stack traces only exposed when explicitly enabled in development
3
- const isDevelopment = process.env.NODE_ENV === 'development' &&
1
+ import { FetchError } from '../errors/app-error.js';
2
+ const IS_DEVELOPMENT_WITH_STACK_TRACES = process.env.NODE_ENV === 'development' &&
4
3
  process.env.EXPOSE_STACK_TRACES === 'true';
5
4
  export function createToolErrorResponse(message, url, code) {
6
5
  const structuredContent = { error: message, url, errorCode: code };
@@ -10,48 +9,22 @@ export function createToolErrorResponse(message, url, code) {
10
9
  isError: true,
11
10
  };
12
11
  }
13
- export function handleToolError(error, url, fallbackMessage = 'Operation failed') {
14
- if (error instanceof UrlValidationError) {
15
- const message = isDevelopment
16
- ? `${error.message}\nURL: ${error.url}\nStack: ${error.stack ?? ''}`
17
- : error.message;
18
- return createToolErrorResponse(message, url, 'INVALID_URL');
19
- }
20
- if (error instanceof AbortError) {
21
- const message = isDevelopment
22
- ? `Request aborted${error.reason ? `: ${error.reason}` : ''}\n${error.stack ?? ''}`
23
- : `Request aborted${error.reason ? `: ${error.reason}` : ''}`;
24
- return createToolErrorResponse(message, url, 'ABORTED');
25
- }
26
- if (error instanceof TimeoutError) {
27
- const message = isDevelopment
28
- ? `Request timed out after ${error.timeoutMs}ms\n${error.stack ?? ''}`
29
- : `Request timed out after ${error.timeoutMs}ms`;
30
- return createToolErrorResponse(message, url, 'TIMEOUT');
31
- }
32
- if (error instanceof RateLimitError) {
33
- const message = isDevelopment
34
- ? `Rate limited. Retry after ${error.retryAfter}s\n${error.stack ?? ''}`
35
- : `Rate limited. Retry after ${error.retryAfter}s`;
36
- return createToolErrorResponse(message, url, 'RATE_LIMITED');
12
+ function formatErrorMessage(baseMessage, error, fallback) {
13
+ const message = fallback ? `${fallback}: ${error.message}` : error.message;
14
+ if (IS_DEVELOPMENT_WITH_STACK_TRACES && error.stack) {
15
+ return `${message}\n${error.stack}`;
37
16
  }
17
+ return message;
18
+ }
19
+ export function handleToolError(error, url, fallbackMessage = 'Operation failed') {
38
20
  if (error instanceof FetchError) {
39
- const code = error.httpStatus ? `HTTP_${error.httpStatus}` : 'FETCH_ERROR';
40
- const message = isDevelopment
41
- ? `${error.message}\n${error.stack ?? ''}`
42
- : error.message;
43
- return createToolErrorResponse(message, url, code);
44
- }
45
- if (error instanceof AppError) {
46
- const message = isDevelopment
47
- ? `${error.message}\n${error.stack ?? ''}`
48
- : error.message;
21
+ const message = formatErrorMessage(error.message, error);
49
22
  return createToolErrorResponse(message, url, error.code);
50
23
  }
51
- const message = error instanceof Error ? error.message : 'Unknown error';
52
- const fullMessage = isDevelopment && error instanceof Error
53
- ? `${fallbackMessage}: ${message}\n${error.stack ?? ''}`
54
- : `${fallbackMessage}: ${message}`;
55
- return createToolErrorResponse(fullMessage, url, 'UNKNOWN_ERROR');
24
+ if (error instanceof Error) {
25
+ const message = formatErrorMessage(error.message, error, fallbackMessage);
26
+ return createToolErrorResponse(message, url, 'UNKNOWN_ERROR');
27
+ }
28
+ return createToolErrorResponse(`${fallbackMessage}: Unknown error`, url, 'UNKNOWN_ERROR');
56
29
  }
57
30
  //# sourceMappingURL=tool-error-handler.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"tool-error-handler.js","sourceRoot":"","sources":["../../src/utils/tool-error-handler.ts"],"names":[],"mappings":"AAEA,OAAO,EACL,UAAU,EACV,QAAQ,EACR,UAAU,EACV,cAAc,EACd,YAAY,EACZ,kBAAkB,GACnB,MAAM,wBAAwB,CAAC;AAEhC,mEAAmE;AACnE,MAAM,aAAa,GACjB,OAAO,CAAC,GAAG,CAAC,QAAQ,KAAK,aAAa;IACtC,OAAO,CAAC,GAAG,CAAC,mBAAmB,KAAK,MAAM,CAAC;AAE7C,MAAM,UAAU,uBAAuB,CACrC,OAAe,EACf,GAAW,EACX,IAAY;IAEZ,MAAM,iBAAiB,GAAG,EAAE,KAAK,EAAE,OAAO,EAAE,GAAG,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;IACnE,OAAO;QACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,iBAAiB,CAAC,EAAE,CAAC;QACpE,iBAAiB;QACjB,OAAO,EAAE,IAAI;KACd,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,KAAc,EACd,GAAW,EACX,eAAe,GAAG,kBAAkB;IAEpC,IAAI,KAAK,YAAY,kBAAkB,EAAE,CAAC;QACxC,MAAM,OAAO,GAAG,aAAa;YAC3B,CAAC,CAAC,GAAG,KAAK,CAAC,OAAO,UAAU,KAAK,CAAC,GAAG,YAAY,KAAK,CAAC,KAAK,IAAI,EAAE,EAAE;YACpE,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC;QAClB,OAAO,uBAAuB,CAAC,OAAO,EAAE,GAAG,EAAE,aAAa,CAAC,CAAC;IAC9D,CAAC;IACD,IAAI,KAAK,YAAY,UAAU,EAAE,CAAC;QAChC,MAAM,OAAO,GAAG,aAAa;YAC3B,CAAC,CAAC,kBAAkB,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,KAAK,CAAC,KAAK,IAAI,EAAE,EAAE;YACnF,CAAC,CAAC,kBAAkB,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;QAChE,OAAO,uBAAuB,CAAC,OAAO,EAAE,GAAG,EAAE,SAAS,CAAC,CAAC;IAC1D,CAAC;IACD,IAAI,KAAK,YAAY,YAAY,EAAE,CAAC;QAClC,MAAM,OAAO,GAAG,aAAa;YAC3B,CAAC,CAAC,2BAA2B,KAAK,CAAC,SAAS,OAAO,KAAK,CAAC,KAAK,IAAI,EAAE,EAAE;YACtE,CAAC,CAAC,2BAA2B,KAAK,CAAC,SAAS,IAAI,CAAC;QACnD,OAAO,uBAAuB,CAAC,OAAO,EAAE,GAAG,EAAE,SAAS,CAAC,CAAC;IAC1D,CAAC;IACD,IAAI,KAAK,YAAY,cAAc,EAAE,CAAC;QACpC,MAAM,OAAO,GAAG,aAAa;YAC3B,CAAC,CAAC,6BAA6B,KAAK,CAAC,UAAU,MAAM,KAAK,CAAC,KAAK,IAAI,EAAE,EAAE;YACxE,CAAC,CAAC,6BAA6B,KAAK,CAAC,UAAU,GAAG,CAAC;QACrD,OAAO,uBAAuB,CAAC,OAAO,EAAE,GAAG,EAAE,cAAc,CAAC,CAAC;IAC/D,CAAC;IACD,IAAI,KAAK,YAAY,UAAU,EAAE,CAAC;QAChC,MAAM,IAAI,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,QAAQ,KAAK,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC;QAC3E,MAAM,OAAO,GAAG,aAAa;YAC3B,CAAC,CAAC,GAAG,KAAK,CAAC,OAAO,KAAK,KAAK,CAAC,KAAK,IAAI,EAAE,EAAE;YAC1C,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC;QAClB,OAAO,uBAAuB,CAAC,OAAO,EAAE,GAAG,EAAE,IAAI,CAAC,CAAC;IACrD,CAAC;IACD,IAAI,KAAK,YAAY,QAAQ,EAAE,CAAC;QAC9B,MAAM,OAAO,GAAG,aAAa;YAC3B,CAAC,CAAC,GAAG,KAAK,CAAC,OAAO,KAAK,KAAK,CAAC,KAAK,IAAI,EAAE,EAAE;YAC1C,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC;QAClB,OAAO,uBAAuB,CAAC,OAAO,EAAE,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;IAC3D,CAAC;IAED,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;IACzE,MAAM,WAAW,GACf,aAAa,IAAI,KAAK,YAAY,KAAK;QACrC,CAAC,CAAC,GAAG,eAAe,KAAK,OAAO,KAAK,KAAK,CAAC,KAAK,IAAI,EAAE,EAAE;QACxD,CAAC,CAAC,GAAG,eAAe,KAAK,OAAO,EAAE,CAAC;IAEvC,OAAO,uBAAuB,CAAC,WAAW,EAAE,GAAG,EAAE,eAAe,CAAC,CAAC;AACpE,CAAC"}
1
+ {"version":3,"file":"tool-error-handler.js","sourceRoot":"","sources":["../../src/utils/tool-error-handler.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAEpD,MAAM,gCAAgC,GACpC,OAAO,CAAC,GAAG,CAAC,QAAQ,KAAK,aAAa;IACtC,OAAO,CAAC,GAAG,CAAC,mBAAmB,KAAK,MAAM,CAAC;AAE7C,MAAM,UAAU,uBAAuB,CACrC,OAAe,EACf,GAAW,EACX,IAAY;IAEZ,MAAM,iBAAiB,GAAG,EAAE,KAAK,EAAE,OAAO,EAAE,GAAG,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;IAEnE,OAAO;QACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,iBAAiB,CAAC,EAAE,CAAC;QACpE,iBAAiB;QACjB,OAAO,EAAE,IAAI;KACd,CAAC;AACJ,CAAC;AAED,SAAS,kBAAkB,CACzB,WAAmB,EACnB,KAAY,EACZ,QAAiB;IAEjB,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,GAAG,QAAQ,KAAK,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC;IAE3E,IAAI,gCAAgC,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;QACpD,OAAO,GAAG,OAAO,KAAK,KAAK,CAAC,KAAK,EAAE,CAAC;IACtC,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,KAAc,EACd,GAAW,EACX,eAAe,GAAG,kBAAkB;IAEpC,IAAI,KAAK,YAAY,UAAU,EAAE,CAAC;QAChC,MAAM,OAAO,GAAG,kBAAkB,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QACzD,OAAO,uBAAuB,CAAC,OAAO,EAAE,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;IAC3D,CAAC;IAED,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;QAC3B,MAAM,OAAO,GAAG,kBAAkB,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,EAAE,eAAe,CAAC,CAAC;QAC1E,OAAO,uBAAuB,CAAC,OAAO,EAAE,GAAG,EAAE,eAAe,CAAC,CAAC;IAChE,CAAC;IAED,OAAO,uBAAuB,CAC5B,GAAG,eAAe,iBAAiB,EACnC,GAAG,EACH,eAAe,CAChB,CAAC;AACJ,CAAC"}
@@ -1,12 +1,3 @@
1
- /**
2
- * Check if an IP address is in a blocked private range
3
- */
4
- export declare function isBlockedIp(ip: string): boolean;
5
- /**
6
- * Validate resolved IP addresses to prevent DNS rebinding attacks.
7
- * This should be called after DNS resolution to ensure the resolved
8
- * IPs are not in blocked private ranges.
9
- */
10
1
  export declare function validateResolvedIps(hostname: string): Promise<void>;
11
2
  export declare function validateAndNormalizeUrl(urlString: string): string;
12
3
  export declare function isInternalUrl(url: string, baseUrl: string): boolean;
@@ -1 +1 @@
1
- {"version":3,"file":"url-validator.d.ts","sourceRoot":"","sources":["../../src/utils/url-validator.ts"],"names":[],"mappings":"AAiCA;;GAEG;AACH,wBAAgB,WAAW,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAE/C;AAED;;;;GAIG;AACH,wBAAsB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAmCzE;AAED,wBAAgB,uBAAuB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CA2EjE;AAED,wBAAgB,aAAa,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAQnE"}
1
+ {"version":3,"file":"url-validator.d.ts","sourceRoot":"","sources":["../../src/utils/url-validator.ts"],"names":[],"mappings":"AAsCA,wBAAsB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CA4BzE;AAED,wBAAgB,uBAAuB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CA2DjE;AAED,wBAAgB,aAAa,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAQnE"}
@@ -1,6 +1,5 @@
1
1
  import dns from 'dns/promises';
2
2
  import { config } from '../config/index.js';
3
- import { UrlValidationError, ValidationError } from '../errors/app-error.js';
4
3
  const BLOCKED_HOSTS = new Set([
5
4
  'localhost',
6
5
  '127.0.0.1',
@@ -29,87 +28,69 @@ const BLOCKED_IP_PATTERNS = [
29
28
  /**
30
29
  * Check if an IP address is in a blocked private range
31
30
  */
32
- export function isBlockedIp(ip) {
31
+ function isBlockedIp(ip) {
33
32
  return BLOCKED_IP_PATTERNS.some((pattern) => pattern.test(ip));
34
33
  }
35
- /**
36
- * Validate resolved IP addresses to prevent DNS rebinding attacks.
37
- * This should be called after DNS resolution to ensure the resolved
38
- * IPs are not in blocked private ranges.
39
- */
40
34
  export async function validateResolvedIps(hostname) {
41
- // Skip validation for direct IP addresses (already validated in validateAndNormalizeUrl)
42
35
  if (/^[\d.]+$/.test(hostname) || hostname.includes(':')) {
43
36
  return;
44
37
  }
45
38
  try {
46
- // Resolve IPv4 addresses
47
39
  const ipv4Addresses = await dns.resolve4(hostname).catch(() => []);
48
40
  for (const ip of ipv4Addresses) {
49
41
  if (isBlockedIp(ip) || BLOCKED_HOSTS.has(ip)) {
50
- throw new UrlValidationError(`DNS rebinding detected: ${hostname} resolves to blocked IP ${ip}`, hostname);
42
+ throw new Error(`DNS rebinding detected: ${hostname} resolves to blocked IP ${ip}`);
51
43
  }
52
44
  }
53
- // Resolve IPv6 addresses
54
45
  const ipv6Addresses = await dns.resolve6(hostname).catch(() => []);
55
46
  for (const ip of ipv6Addresses) {
56
47
  if (isBlockedIp(ip) || BLOCKED_HOSTS.has(ip)) {
57
- throw new UrlValidationError(`DNS rebinding detected: ${hostname} resolves to blocked IP ${ip}`, hostname);
48
+ throw new Error(`DNS rebinding detected: ${hostname} resolves to blocked IP ${ip}`);
58
49
  }
59
50
  }
60
51
  }
61
52
  catch (error) {
62
- // Re-throw UrlValidationError, ignore DNS resolution errors
63
- if (error instanceof UrlValidationError) {
53
+ if (error instanceof Error && error.message.includes('DNS rebinding')) {
64
54
  throw error;
65
55
  }
66
- // DNS resolution failed - let the actual request handle the error
67
56
  }
68
57
  }
69
58
  export function validateAndNormalizeUrl(urlString) {
70
- // Check for empty or whitespace-only input
71
59
  if (!urlString || typeof urlString !== 'string') {
72
- throw new ValidationError('URL is required');
60
+ throw new Error('URL is required');
73
61
  }
74
62
  const trimmedUrl = urlString.trim();
75
63
  if (!trimmedUrl) {
76
- throw new ValidationError('URL cannot be empty');
64
+ throw new Error('URL cannot be empty');
77
65
  }
78
- // Check URL length to prevent DoS
79
66
  if (trimmedUrl.length > config.constants.maxUrlLength) {
80
- throw new ValidationError(`URL exceeds maximum length of ${config.constants.maxUrlLength} characters`, { length: trimmedUrl.length, maxLength: config.constants.maxUrlLength });
67
+ throw new Error(`URL exceeds maximum length of ${config.constants.maxUrlLength} characters`);
81
68
  }
82
69
  let url;
83
70
  try {
84
71
  url = new URL(trimmedUrl);
85
72
  }
86
73
  catch {
87
- throw new UrlValidationError(`Invalid URL format`, trimmedUrl);
74
+ throw new Error('Invalid URL format');
88
75
  }
89
- // Only allow HTTP(S) protocols
90
76
  if (url.protocol !== 'http:' && url.protocol !== 'https:') {
91
- throw new UrlValidationError(`Invalid protocol: ${url.protocol}. Only http: and https: are allowed`, trimmedUrl);
77
+ throw new Error(`Invalid protocol: ${url.protocol}. Only http: and https: are allowed`);
92
78
  }
93
- // Block URLs with credentials (user:pass@host)
94
79
  if (url.username || url.password) {
95
- throw new UrlValidationError('URLs with embedded credentials are not allowed', trimmedUrl);
80
+ throw new Error('URLs with embedded credentials are not allowed');
96
81
  }
97
82
  const hostname = url.hostname.toLowerCase();
98
- // Block empty hostname
99
83
  if (!hostname) {
100
- throw new UrlValidationError('URL must have a valid hostname', trimmedUrl);
84
+ throw new Error('URL must have a valid hostname');
101
85
  }
102
- // Block known internal/metadata hosts
103
86
  if (BLOCKED_HOSTS.has(hostname)) {
104
- throw new UrlValidationError(`Blocked host: ${hostname}. Internal hosts are not allowed`, trimmedUrl);
87
+ throw new Error(`Blocked host: ${hostname}. Internal hosts are not allowed`);
105
88
  }
106
- // Block private IP ranges
107
89
  if (isBlockedIp(hostname)) {
108
- throw new UrlValidationError(`Blocked IP range: ${hostname}. Private IPs are not allowed`, trimmedUrl);
90
+ throw new Error(`Blocked IP range: ${hostname}. Private IPs are not allowed`);
109
91
  }
110
- // Block hostnames that look like they might resolve to internal addresses
111
92
  if (hostname.endsWith('.local') || hostname.endsWith('.internal')) {
112
- throw new UrlValidationError(`Blocked hostname pattern: ${hostname}. Internal domain suffixes are not allowed`, trimmedUrl);
93
+ throw new Error(`Blocked hostname pattern: ${hostname}. Internal domain suffixes are not allowed`);
113
94
  }
114
95
  return url.href;
115
96
  }