@j0hanz/superfetch 1.0.5 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +200 -36
- package/dist/config/index.d.ts +10 -5
- package/dist/config/index.d.ts.map +1 -1
- package/dist/config/index.js +41 -17
- package/dist/config/index.js.map +1 -1
- package/dist/config/types.d.ts +98 -57
- package/dist/config/types.d.ts.map +1 -1
- package/dist/errors/app-error.d.ts +4 -28
- package/dist/errors/app-error.d.ts.map +1 -1
- package/dist/errors/app-error.js +10 -51
- package/dist/errors/app-error.js.map +1 -1
- package/dist/index.js +31 -46
- package/dist/index.js.map +1 -1
- package/dist/middleware/error-handler.d.ts +2 -2
- package/dist/middleware/error-handler.d.ts.map +1 -1
- package/dist/middleware/error-handler.js +12 -14
- package/dist/middleware/error-handler.js.map +1 -1
- package/dist/middleware/rate-limiter.d.ts.map +1 -1
- package/dist/middleware/rate-limiter.js +31 -14
- package/dist/middleware/rate-limiter.js.map +1 -1
- package/dist/parsers/base-html-element-parser.d.ts +43 -0
- package/dist/parsers/base-html-element-parser.d.ts.map +1 -0
- package/dist/parsers/base-html-element-parser.js +59 -0
- package/dist/parsers/base-html-element-parser.js.map +1 -0
- package/dist/parsers/heading-element-parser.d.ts +14 -0
- package/dist/parsers/heading-element-parser.d.ts.map +1 -0
- package/dist/parsers/heading-element-parser.js +26 -0
- package/dist/parsers/heading-element-parser.js.map +1 -0
- package/dist/parsers/image-element-parser.d.ts +16 -0
- package/dist/parsers/image-element-parser.d.ts.map +1 -0
- package/dist/parsers/image-element-parser.js +33 -0
- package/dist/parsers/image-element-parser.js.map +1 -0
- package/dist/parsers/link-element-parser.d.ts +15 -0
- package/dist/parsers/link-element-parser.d.ts.map +1 -0
- package/dist/parsers/link-element-parser.js +28 -0
- package/dist/parsers/link-element-parser.js.map +1 -0
- package/dist/parsers/open-graph-parser.d.ts +17 -0
- package/dist/parsers/open-graph-parser.d.ts.map +1 -0
- package/dist/parsers/open-graph-parser.js +41 -0
- package/dist/parsers/open-graph-parser.js.map +1 -0
- package/dist/parsers/schema-org-parser.d.ts +17 -0
- package/dist/parsers/schema-org-parser.d.ts.map +1 -0
- package/dist/parsers/schema-org-parser.js +32 -0
- package/dist/parsers/schema-org-parser.js.map +1 -0
- package/dist/parsers/standard-meta-parser.d.ts +18 -0
- package/dist/parsers/standard-meta-parser.d.ts.map +1 -0
- package/dist/parsers/standard-meta-parser.js +32 -0
- package/dist/parsers/standard-meta-parser.js.map +1 -0
- package/dist/parsers/twitter-card-parser.d.ts +17 -0
- package/dist/parsers/twitter-card-parser.d.ts.map +1 -0
- package/dist/parsers/twitter-card-parser.js +41 -0
- package/dist/parsers/twitter-card-parser.js.map +1 -0
- package/dist/resources/cached-content.d.ts +0 -2
- package/dist/resources/cached-content.d.ts.map +1 -1
- package/dist/resources/cached-content.js +3 -34
- package/dist/resources/cached-content.js.map +1 -1
- package/dist/resources/index.d.ts.map +1 -1
- package/dist/resources/index.js +8 -8
- package/dist/resources/index.js.map +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +12 -11
- package/dist/server.js.map +1 -1
- package/dist/services/cache.d.ts +0 -28
- package/dist/services/cache.d.ts.map +1 -1
- package/dist/services/cache.js +10 -166
- package/dist/services/cache.js.map +1 -1
- package/dist/services/card-extractor.d.ts +0 -4
- package/dist/services/card-extractor.d.ts.map +1 -1
- package/dist/services/card-extractor.js +6 -1
- package/dist/services/card-extractor.js.map +1 -1
- package/dist/services/extractor.d.ts +1 -11
- package/dist/services/extractor.d.ts.map +1 -1
- package/dist/services/extractor.js +86 -84
- package/dist/services/extractor.js.map +1 -1
- package/dist/services/fetcher.d.ts +2 -13
- package/dist/services/fetcher.d.ts.map +1 -1
- package/dist/services/fetcher.js +79 -79
- package/dist/services/fetcher.js.map +1 -1
- package/dist/services/logger.d.ts +5 -4
- package/dist/services/logger.d.ts.map +1 -1
- package/dist/services/logger.js +27 -42
- package/dist/services/logger.js.map +1 -1
- package/dist/services/parser.d.ts.map +1 -1
- package/dist/services/parser.js +35 -27
- package/dist/services/parser.js.map +1 -1
- package/dist/services/session-manager.d.ts +18 -0
- package/dist/services/session-manager.d.ts.map +1 -0
- package/dist/services/session-manager.js +73 -0
- package/dist/services/session-manager.js.map +1 -0
- package/dist/strategies/exponential-backoff-strategy.d.ts +13 -0
- package/dist/strategies/exponential-backoff-strategy.d.ts.map +1 -0
- package/dist/strategies/exponential-backoff-strategy.js +32 -0
- package/dist/strategies/exponential-backoff-strategy.js.map +1 -0
- package/dist/tools/handlers/fetch-links.tool.d.ts +2 -9
- package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.js +3 -0
- package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +5 -2
- package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.js +23 -33
- package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.d.ts +2 -9
- package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.js +15 -20
- package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-urls.tool.d.ts +2 -9
- package/dist/tools/handlers/fetch-urls.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-urls.tool.js +141 -108
- package/dist/tools/handlers/fetch-urls.tool.js.map +1 -1
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +0 -4
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/utils/common.d.ts +6 -7
- package/dist/tools/utils/common.d.ts.map +1 -1
- package/dist/tools/utils/common.js +8 -8
- package/dist/tools/utils/common.js.map +1 -1
- package/dist/tools/utils/fetch-pipeline.d.ts +8 -0
- package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -1
- package/dist/tools/utils/fetch-pipeline.js +60 -63
- package/dist/tools/utils/fetch-pipeline.js.map +1 -1
- package/dist/transformers/jsonl.transformer.d.ts +1 -1
- package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
- package/dist/transformers/jsonl.transformer.js +15 -10
- package/dist/transformers/jsonl.transformer.js.map +1 -1
- package/dist/transformers/markdown.transformer.d.ts.map +1 -1
- package/dist/transformers/markdown.transformer.js +58 -62
- package/dist/transformers/markdown.transformer.js.map +1 -1
- package/dist/utils/concurrency.d.ts +2 -5
- package/dist/utils/concurrency.d.ts.map +1 -1
- package/dist/utils/concurrency.js +19 -19
- package/dist/utils/concurrency.js.map +1 -1
- package/dist/utils/content-cleaner.d.ts +0 -25
- package/dist/utils/content-cleaner.d.ts.map +1 -1
- package/dist/utils/content-cleaner.js +14 -171
- package/dist/utils/content-cleaner.js.map +1 -1
- package/dist/utils/html-truncator.d.ts +2 -0
- package/dist/utils/html-truncator.d.ts.map +1 -0
- package/dist/utils/html-truncator.js +14 -0
- package/dist/utils/html-truncator.js.map +1 -0
- package/dist/utils/language-detector.d.ts +0 -3
- package/dist/utils/language-detector.d.ts.map +1 -1
- package/dist/utils/language-detector.js +0 -11
- package/dist/utils/language-detector.js.map +1 -1
- package/dist/utils/sanitizer.d.ts.map +1 -1
- package/dist/utils/sanitizer.js +7 -5
- package/dist/utils/sanitizer.js.map +1 -1
- package/dist/utils/tool-error-handler.d.ts.map +1 -1
- package/dist/utils/tool-error-handler.js +16 -41
- package/dist/utils/tool-error-handler.js.map +1 -1
- package/dist/utils/url-validator.d.ts +1 -0
- package/dist/utils/url-validator.d.ts.map +1 -1
- package/dist/utils/url-validator.js +42 -23
- package/dist/utils/url-validator.js.map +1 -1
- package/package.json +9 -8
|
@@ -1,190 +1,51 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
* that slip through Readability extraction.
|
|
4
|
-
*/
|
|
5
|
-
// Pre-compiled combined pattern for optimal performance
|
|
6
|
-
const NOISE_PATTERN_COMBINED = new RegExp([
|
|
7
|
-
// Relative timestamps
|
|
8
|
-
'^\\d+\\s*(seconds?|minutes?|hours?|days?|weeks?|months?|years?)\\s*ago$',
|
|
9
|
-
'^(just now|recently|today|yesterday|last week|last month)$',
|
|
10
|
-
'^(updated|modified|edited|created|published)\\s*:?\\s*\\d+\\s*(seconds?|minutes?|hours?|days?|weeks?|months?|years?)\\s*ago$',
|
|
11
|
-
'^(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\\s+\\d{1,2},?\\s+\\d{4}$',
|
|
12
|
-
'^\\d{1,2}\\s+(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\\s+\\d{4}$',
|
|
13
|
-
'^\\d{4}-\\d{2}-\\d{2}$',
|
|
14
|
-
'^last\\s+updated\\s*:?',
|
|
15
|
-
// Share/action buttons
|
|
16
|
-
'^(share|copy|like|follow|subscribe|download|print|save|bookmark|tweet|pin it|email|export)$',
|
|
1
|
+
const NOISE_PATTERN = new RegExp([
|
|
2
|
+
'^(share|copy|like|follow|subscribe|download|print|save)$',
|
|
17
3
|
'^(copy to clipboard|copied!?|copy code|copy link)$',
|
|
18
|
-
'^(
|
|
19
|
-
|
|
20
|
-
'^(
|
|
21
|
-
'^(toggle|switch|enable|disable|on|off)$',
|
|
22
|
-
'^(edit|delete|remove|add|new|create|update|cancel|confirm|submit|reset|clear)$',
|
|
23
|
-
'^(open in|view in|edit in)\\s+\\w+$',
|
|
24
|
-
'^(try it|run|execute|play|preview|demo|live demo|playground)$',
|
|
25
|
-
'^(source|view source|edit this page|edit on github|improve this doc)$',
|
|
26
|
-
// Empty/placeholder
|
|
4
|
+
'^(show more|show less|load more|view more|read more|see all|view all)$',
|
|
5
|
+
'^(next|previous|prev|back|forward|home|menu|close|skip to)$',
|
|
6
|
+
'^(table of contents|toc|on this page)$',
|
|
27
7
|
'^(loading\\.{0,3}|please wait\\.{0,3}|\\.{2,})$',
|
|
28
|
-
'^(n\\/a|tbd|todo|coming soon|placeholder
|
|
29
|
-
|
|
30
|
-
'^(next|previous|prev|back|forward|home|menu|close|open|skip to|jump to|go to)$',
|
|
31
|
-
'^(table of contents|toc|contents|on this page|in this article|in this section)$',
|
|
32
|
-
'^(scroll to top|back to top|top)$',
|
|
33
|
-
// Cookie/consent
|
|
34
|
-
'^(accept|reject|accept all|reject all|cookie settings|privacy settings|manage preferences)$',
|
|
35
|
-
'^(accept cookies|decline cookies|cookie policy|privacy policy|terms of service|terms & conditions)$',
|
|
36
|
-
// Counts
|
|
37
|
-
'^\\d+\\s*(comments?|replies?|reactions?|responses?)$',
|
|
38
|
-
'^\\d+\\s*(likes?|shares?|views?|followers?|retweets?|stars?|forks?|claps?|upvotes?|downvotes?)$',
|
|
39
|
-
'^(liked by|shared by|followed by)\\s+\\d+',
|
|
40
|
-
// Version badges
|
|
41
|
-
'^v?\\d+\\.\\d+(\\.\\d+)?(-\\w+)?$',
|
|
42
|
-
'^(stable|beta|alpha|rc|preview|experimental|deprecated|legacy|new|updated)$',
|
|
43
|
-
// Structural
|
|
44
|
-
'^(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)$',
|
|
45
|
-
'^panel\\s*[a-z]?$',
|
|
46
|
-
// API artifacts
|
|
47
|
-
'^(required|optional|default|type|example|description|parameters?|returns?|response|request)$',
|
|
48
|
-
'^(get|post|put|patch|delete|head|options)\\s*$',
|
|
49
|
-
// Interactive
|
|
50
|
-
'^(drag|drop|resize|zoom|scroll|swipe|tap|click|hover|focus)(\\s+to\\s+\\w+)?$',
|
|
51
|
-
'^(drag the|move the|resize the|drag to|click to)\\s+\\w+',
|
|
52
|
-
// Breadcrumbs
|
|
8
|
+
'^(n\\/a|tbd|todo|coming soon|placeholder)$',
|
|
9
|
+
'^\\d+\\s*(comments?|replies?|likes?|shares?|views?)$',
|
|
53
10
|
'^[/\\\\>→»›]+$',
|
|
54
|
-
// Ads
|
|
55
|
-
'^(ad|advertisement|sponsored|promoted|partner content)$',
|
|
56
11
|
].join('|'), 'i');
|
|
57
|
-
// Pre-compiled pattern for short text noise
|
|
58
|
-
const SHORT_TEXT_NOISE_PATTERN = new RegExp([
|
|
59
|
-
'^#\\w+$',
|
|
60
|
-
'^@\\w+$',
|
|
61
|
-
'^\\d+$',
|
|
62
|
-
'^[•·→←↑↓►▼▲◄▶◀■□●○★☆✓✗✔✘×]+$',
|
|
63
|
-
'^[,;:\\-–—]+$',
|
|
64
|
-
'^\\[\\d+\\]$',
|
|
65
|
-
'^\\(\\d+\\)$',
|
|
66
|
-
'^fig\\.?\\s*\\d+$',
|
|
67
|
-
'^table\\s*\\d+$',
|
|
68
|
-
'^step\\s*\\d+$',
|
|
69
|
-
'^note:?$',
|
|
70
|
-
'^tip:?$',
|
|
71
|
-
'^warning:?$',
|
|
72
|
-
'^info:?$',
|
|
73
|
-
'^caution:?$',
|
|
74
|
-
].join('|'), 'i');
|
|
75
|
-
// Pre-compiled pattern for UI chrome detection
|
|
76
|
-
const UI_CHROME_PATTERN = new RegExp([
|
|
77
|
-
'^(sign in|sign up|log in|log out|register|create account)$',
|
|
78
|
-
'^(search|search\\.\\.\\.|search docs|search documentation)$',
|
|
79
|
-
'^(dark mode|light mode|theme|language|locale)$',
|
|
80
|
-
'^(feedback|report issue|report a bug|file an issue|suggest edit)$',
|
|
81
|
-
'^(documentation|docs|api|reference|guide|tutorial|examples?)$',
|
|
82
|
-
"^(version|changelog|release notes|what's new)$",
|
|
83
|
-
].join('|'), 'i');
|
|
84
|
-
// Minimum lengths for different content types
|
|
85
12
|
const MIN_PARAGRAPH_LENGTH = 20;
|
|
86
13
|
const MIN_HEADING_LENGTH = 2;
|
|
87
14
|
const MIN_LIST_ITEM_LENGTH = 3;
|
|
88
|
-
const
|
|
89
|
-
/**
|
|
90
|
-
* Check if text matches any noise pattern
|
|
91
|
-
*/
|
|
15
|
+
const MAX_REGEX_INPUT_LENGTH = 500;
|
|
92
16
|
function isNoiseText(text) {
|
|
93
17
|
const trimmed = text.trim();
|
|
94
|
-
|
|
95
|
-
if (!trimmed) {
|
|
96
|
-
return true;
|
|
97
|
-
}
|
|
98
|
-
// Check combined noise pattern (single regex test)
|
|
99
|
-
if (NOISE_PATTERN_COMBINED.test(trimmed)) {
|
|
18
|
+
if (!trimmed)
|
|
100
19
|
return true;
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
if (SHORT_TEXT_NOISE_PATTERN.test(trimmed)) {
|
|
105
|
-
return true;
|
|
106
|
-
}
|
|
107
|
-
// Also check UI chrome patterns for short text
|
|
108
|
-
if (UI_CHROME_PATTERN.test(trimmed)) {
|
|
109
|
-
return true;
|
|
110
|
-
}
|
|
111
|
-
}
|
|
112
|
-
return false;
|
|
20
|
+
if (trimmed.length > MAX_REGEX_INPUT_LENGTH)
|
|
21
|
+
return false;
|
|
22
|
+
return NOISE_PATTERN.test(trimmed);
|
|
113
23
|
}
|
|
114
|
-
// Pre-compiled placeholder pattern (combined for performance)
|
|
115
|
-
const PLACEHOLDER_PATTERN = /^(lorem ipsum|sample text|placeholder|example (text|content|data)|test (text|content|data)|your (text|content|name|email) here|enter (your|a) |type (your|a|something) )/i;
|
|
116
|
-
// Cache for placeholder checks to avoid repeated regex tests
|
|
117
|
-
const PLACEHOLDER_CACHE = new Map();
|
|
118
|
-
const PLACEHOLDER_CACHE_MAX_SIZE = 1000;
|
|
119
|
-
/**
|
|
120
|
-
* Check if text looks like placeholder/demo content
|
|
121
|
-
* Uses caching for 3-8x performance improvement on repeated patterns
|
|
122
|
-
*/
|
|
123
|
-
function isPlaceholderContent(text) {
|
|
124
|
-
const trimmed = text.trim().toLowerCase();
|
|
125
|
-
// Check cache first
|
|
126
|
-
const cached = PLACEHOLDER_CACHE.get(trimmed);
|
|
127
|
-
if (cached !== undefined) {
|
|
128
|
-
return cached;
|
|
129
|
-
}
|
|
130
|
-
// Single regex test (faster than array iteration)
|
|
131
|
-
const result = PLACEHOLDER_PATTERN.test(trimmed);
|
|
132
|
-
// Cache result with LRU eviction
|
|
133
|
-
if (PLACEHOLDER_CACHE.size >= PLACEHOLDER_CACHE_MAX_SIZE) {
|
|
134
|
-
const firstKey = PLACEHOLDER_CACHE.keys().next().value;
|
|
135
|
-
if (firstKey !== undefined) {
|
|
136
|
-
PLACEHOLDER_CACHE.delete(firstKey);
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
PLACEHOLDER_CACHE.set(trimmed, result);
|
|
140
|
-
return result;
|
|
141
|
-
}
|
|
142
|
-
/**
|
|
143
|
-
* Clean paragraph text by removing noise
|
|
144
|
-
*/
|
|
145
24
|
export function cleanParagraph(text) {
|
|
146
25
|
const trimmed = text.trim();
|
|
147
|
-
// Too short to be meaningful
|
|
148
26
|
if (trimmed.length < MIN_PARAGRAPH_LENGTH) {
|
|
149
|
-
// Allow very short paragraphs if they end with punctuation (likely real content)
|
|
150
27
|
if (!/[.!?]$/.test(trimmed)) {
|
|
151
28
|
return null;
|
|
152
29
|
}
|
|
153
30
|
}
|
|
154
|
-
// Is noise content
|
|
155
31
|
if (isNoiseText(trimmed)) {
|
|
156
32
|
return null;
|
|
157
33
|
}
|
|
158
|
-
// Is placeholder content (in paragraphs, not in examples)
|
|
159
|
-
if (isPlaceholderContent(trimmed)) {
|
|
160
|
-
return null;
|
|
161
|
-
}
|
|
162
34
|
return trimmed;
|
|
163
35
|
}
|
|
164
|
-
/**
|
|
165
|
-
* Clean heading text by removing noise and markdown link syntax
|
|
166
|
-
*/
|
|
167
36
|
export function cleanHeading(text) {
|
|
168
37
|
let cleaned = text.trim();
|
|
169
|
-
// Too short
|
|
170
38
|
if (cleaned.length < MIN_HEADING_LENGTH) {
|
|
171
39
|
return null;
|
|
172
40
|
}
|
|
173
|
-
// Remove markdown link syntax: [Text](#anchor) -> Text
|
|
174
41
|
cleaned = cleaned.replace(/\[([^\]]+)\]\([^)]*\)/g, '$1');
|
|
175
|
-
// Remove trailing anchor links like "Link for this heading"
|
|
176
42
|
cleaned = cleaned.replace(/\s*Link for (this heading|[\w\s]+)\s*$/i, '');
|
|
177
|
-
// Remove trailing hash symbols often used for anchor links
|
|
178
43
|
cleaned = cleaned.replace(/\s*#+\s*$/, '');
|
|
179
|
-
// Is noise content
|
|
180
44
|
if (isNoiseText(cleaned)) {
|
|
181
45
|
return null;
|
|
182
46
|
}
|
|
183
47
|
return cleaned.trim();
|
|
184
48
|
}
|
|
185
|
-
/**
|
|
186
|
-
* Clean list items by filtering out noise
|
|
187
|
-
*/
|
|
188
49
|
export function cleanListItems(items) {
|
|
189
50
|
return items
|
|
190
51
|
.map((item) => item.trim())
|
|
@@ -196,43 +57,25 @@ export function cleanListItems(items) {
|
|
|
196
57
|
return true;
|
|
197
58
|
});
|
|
198
59
|
}
|
|
199
|
-
/**
|
|
200
|
-
* Clean code block text - minimal cleaning to preserve code integrity
|
|
201
|
-
*/
|
|
202
60
|
export function cleanCodeBlock(code) {
|
|
203
61
|
const trimmed = code.trim();
|
|
204
|
-
// Empty code block
|
|
205
62
|
if (trimmed.length === 0) {
|
|
206
63
|
return null;
|
|
207
64
|
}
|
|
208
|
-
// Very short code blocks that are likely just labels
|
|
209
65
|
if (trimmed.length < 3 && !/^[{}[\]();<>]$/.test(trimmed)) {
|
|
210
66
|
return null;
|
|
211
67
|
}
|
|
212
68
|
return trimmed;
|
|
213
69
|
}
|
|
214
|
-
/**
|
|
215
|
-
* Strip markdown link syntax from text for cleaner slugs/display
|
|
216
|
-
* [Text](#anchor) -> Text
|
|
217
|
-
* [Text](url) -> Text
|
|
218
|
-
*/
|
|
219
70
|
export function stripMarkdownLinks(text) {
|
|
220
71
|
return text.replace(/\[([^\]]+)\]\([^)]*\)/g, '$1');
|
|
221
72
|
}
|
|
222
|
-
/**
|
|
223
|
-
* Remove common timestamp patterns from text (inline removal)
|
|
224
|
-
* Use when you want to strip timestamps from within longer content
|
|
225
|
-
*/
|
|
226
73
|
export function removeInlineTimestamps(text) {
|
|
227
|
-
return
|
|
228
|
-
// Remove "X days/hours/etc ago" patterns
|
|
74
|
+
return text
|
|
229
75
|
.replace(/\b\d+\s*(seconds?|minutes?|hours?|days?|weeks?|months?|years?)\s*ago\b/gi, '')
|
|
230
|
-
// Remove "Updated: date" patterns
|
|
231
76
|
.replace(/\b(updated|modified|edited|created|published)\s*:?\s*\d+\s*(seconds?|minutes?|hours?|days?|weeks?|months?|years?)\s*ago\b/gi, '')
|
|
232
|
-
// Remove standalone dates
|
|
233
77
|
.replace(/\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\s+\d{1,2},?\s+\d{4}\b/gi, '')
|
|
234
|
-
// Clean up extra whitespace
|
|
235
78
|
.replace(/\s{2,}/g, ' ')
|
|
236
|
-
.trim()
|
|
79
|
+
.trim();
|
|
237
80
|
}
|
|
238
81
|
//# sourceMappingURL=content-cleaner.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"content-cleaner.js","sourceRoot":"","sources":["../../src/utils/content-cleaner.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"content-cleaner.js","sourceRoot":"","sources":["../../src/utils/content-cleaner.ts"],"names":[],"mappings":"AAAA,MAAM,aAAa,GAAG,IAAI,MAAM,CAC9B;IACE,0DAA0D;IAC1D,oDAAoD;IACpD,wEAAwE;IACxE,6DAA6D;IAC7D,wCAAwC;IACxC,iDAAiD;IACjD,4CAA4C;IAC5C,sDAAsD;IACtD,gBAAgB;CACjB,CAAC,IAAI,CAAC,GAAG,CAAC,EACX,GAAG,CACJ,CAAC;AAEF,MAAM,oBAAoB,GAAG,EAAE,CAAC;AAChC,MAAM,kBAAkB,GAAG,CAAC,CAAC;AAC7B,MAAM,oBAAoB,GAAG,CAAC,CAAC;AAC/B,MAAM,sBAAsB,GAAG,GAAG,CAAC;AAEnC,SAAS,WAAW,CAAC,IAAY;IAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAC1B,IAAI,OAAO,CAAC,MAAM,GAAG,sBAAsB;QAAE,OAAO,KAAK,CAAC;IAC1D,OAAO,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;AACrC,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE5B,IAAI,OAAO,CAAC,MAAM,GAAG,oBAAoB,EAAE,CAAC;QAC1C,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAC5B,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,IAAI,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,IAAI,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE1B,IAAI,OAAO,CAAC,MAAM,GAAG,kBAAkB,EAAE,CAAC;QACxC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,wBAAwB,EAAE,IAAI,CAAC,CAAC;IAC1D,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,yCAAyC,EAAE,EAAE,CAAC,CAAC;IACzE,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;IAE3C,IAAI,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,OAAO,CAAC,IAAI,EAAE,CAAC;AACxB,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,KAAe;IAC5C,OAAO,KAAK;SACT,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;SAC1B,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;QACf,IAAI,IAAI,CAAC,MAAM,GAAG,oBAAoB;YAAE,OAAO,KAAK,CAAC;QACrD,IAAI,WAAW,CAAC,IAAI,CAAC;YAAE,OAAO,KAAK,CAAC;QACpC,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;AACP,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE5B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAC1D,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,IAAY;IAC7C,OAAO,IAAI,CAAC,OAAO,CAAC,wBAAwB,EAAE,IAAI,CAAC,CAAC;AACtD,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,IAAY;IACjD,OAAO,IAAI;SACR,OAAO,CACN,0EAA0E,EAC1E,EAAE,CACH;SACA,OAAO,CACN,6HAA6H,EAC7H,EAAE,CACH;SACA,OAAO,CACN,6EAA6E,EAC7E,EAAE,CACH;SACA,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,IAAI,EAAE,CAAC;AACZ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"html-truncator.d.ts","sourceRoot":"","sources":["../../src/utils/html-truncator.ts"],"names":[],"mappings":"AAIA,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAajD"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { config } from '../config/index.js';
|
|
2
|
+
import { logWarn } from '../services/logger.js';
|
|
3
|
+
export function truncateHtml(html) {
|
|
4
|
+
const maxSize = config.constants.maxHtmlSize;
|
|
5
|
+
if (html.length <= maxSize) {
|
|
6
|
+
return html;
|
|
7
|
+
}
|
|
8
|
+
logWarn('HTML content exceeds maximum size, truncating', {
|
|
9
|
+
size: html.length,
|
|
10
|
+
maxSize,
|
|
11
|
+
});
|
|
12
|
+
return html.substring(0, maxSize);
|
|
13
|
+
}
|
|
14
|
+
//# sourceMappingURL=html-truncator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"html-truncator.js","sourceRoot":"","sources":["../../src/utils/html-truncator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAE5C,OAAO,EAAE,OAAO,EAAE,MAAM,uBAAuB,CAAC;AAEhD,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,MAAM,OAAO,GAAG,MAAM,CAAC,SAAS,CAAC,WAAW,CAAC;IAE7C,IAAI,IAAI,CAAC,MAAM,IAAI,OAAO,EAAE,CAAC;QAC3B,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,CAAC,+CAA+C,EAAE;QACvD,IAAI,EAAE,IAAI,CAAC,MAAM;QACjB,OAAO;KACR,CAAC,CAAC;IAEH,OAAO,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;AACpC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"language-detector.d.ts","sourceRoot":"","sources":["../../src/utils/language-detector.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"language-detector.d.ts","sourceRoot":"","sources":["../../src/utils/language-detector.ts"],"names":[],"mappings":"AAoCA,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAE/D"}
|
|
@@ -1,21 +1,13 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Language detection patterns for code blocks
|
|
3
|
-
* Shared between parser and markdown transformer
|
|
4
|
-
*/
|
|
5
1
|
const LANGUAGE_PATTERNS = [
|
|
6
|
-
// JSX/TSX patterns
|
|
7
2
|
[
|
|
8
3
|
/^\s*import\s+.*\s+from\s+['"]react['"]|<[A-Z][a-zA-Z]*[\s/>]|jsx\s*:|className=/m,
|
|
9
4
|
'jsx',
|
|
10
5
|
],
|
|
11
|
-
// TypeScript patterns
|
|
12
6
|
[
|
|
13
7
|
/:\s*(string|number|boolean|void|any|unknown|never)\b|interface\s+\w+|type\s+\w+\s*=/m,
|
|
14
8
|
'typescript',
|
|
15
9
|
],
|
|
16
|
-
// Rust patterns
|
|
17
10
|
[/^\s*(fn|let\s+mut|impl|struct|enum|use\s+\w+::)/m, 'rust'],
|
|
18
|
-
// JavaScript patterns (generic)
|
|
19
11
|
[
|
|
20
12
|
/^\s*(export|const|let|var|function|class|async|await)\b|^\s*import\s+.*['"]/m,
|
|
21
13
|
'javascript',
|
|
@@ -41,9 +33,6 @@ const LANGUAGE_PATTERNS = [
|
|
|
41
33
|
// Go patterns
|
|
42
34
|
[/^\s*(func|package|import\s+")/m, 'go'],
|
|
43
35
|
];
|
|
44
|
-
/**
|
|
45
|
-
* Detect programming language from code content
|
|
46
|
-
*/
|
|
47
36
|
export function detectLanguage(code) {
|
|
48
37
|
return LANGUAGE_PATTERNS.find(([pattern]) => pattern.test(code))?.[1];
|
|
49
38
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"language-detector.js","sourceRoot":"","sources":["../../src/utils/language-detector.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"language-detector.js","sourceRoot":"","sources":["../../src/utils/language-detector.ts"],"names":[],"mappings":"AAAA,MAAM,iBAAiB,GAAG;IACxB;QACE,kFAAkF;QAClF,KAAK;KACN;IACD;QACE,sFAAsF;QACtF,YAAY;KACb;IACD,CAAC,kDAAkD,EAAE,MAAM,CAAC;IAC5D;QACE,8EAA8E;QAC9E,YAAY;KACb;IACD,kBAAkB;IAClB,CAAC,kDAAkD,EAAE,QAAQ,CAAC;IAC9D,sBAAsB;IACtB;QACE,iFAAiF;QACjF,MAAM;KACP;IACD,CAAC,iEAAiE,EAAE,MAAM,CAAC;IAC3E,eAAe;IACf,CAAC,yDAAyD,EAAE,KAAK,CAAC;IAClE,gBAAgB;IAChB,CAAC,8DAA8D,EAAE,MAAM,CAAC;IACxE,gBAAgB;IAChB,CAAC,6CAA6C,EAAE,MAAM,CAAC;IACvD,gBAAgB;IAChB,CAAC,oBAAoB,EAAE,MAAM,CAAC;IAC9B,eAAe;IACf,CAAC,0DAA0D,EAAE,KAAK,CAAC;IACnE,cAAc;IACd,CAAC,gCAAgC,EAAE,IAAI,CAAC;CAChC,CAAC;AAEX,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,OAAO,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;AACxE,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sanitizer.d.ts","sourceRoot":"","sources":["../../src/utils/sanitizer.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"sanitizer.d.ts","sourceRoot":"","sources":["../../src/utils/sanitizer.ts"],"names":[],"mappings":"AAIA,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,GAAG,SAAS,GAAG,MAAM,CAKpE;AAED,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,CAWpE"}
|
package/dist/utils/sanitizer.js
CHANGED
|
@@ -1,19 +1,21 @@
|
|
|
1
|
-
|
|
2
|
-
const
|
|
1
|
+
const CONSECUTIVE_WHITESPACE = /\s+/g;
|
|
2
|
+
const MIN_TRUNCATION_LENGTH = 4;
|
|
3
|
+
const TRUNCATION_SUFFIX = '...';
|
|
3
4
|
export function sanitizeText(text) {
|
|
4
5
|
if (text == null)
|
|
5
6
|
return '';
|
|
6
7
|
if (typeof text !== 'string')
|
|
7
8
|
return String(text);
|
|
8
|
-
return text.replace(
|
|
9
|
+
return text.replace(CONSECUTIVE_WHITESPACE, ' ').trim();
|
|
9
10
|
}
|
|
10
11
|
export function truncateText(text, maxLength) {
|
|
11
|
-
if (maxLength <
|
|
12
|
+
if (maxLength < MIN_TRUNCATION_LENGTH) {
|
|
12
13
|
return text.length > 0 ? text.charAt(0) : '';
|
|
13
14
|
}
|
|
14
15
|
if (text.length <= maxLength) {
|
|
15
16
|
return text;
|
|
16
17
|
}
|
|
17
|
-
|
|
18
|
+
const truncationPoint = maxLength - TRUNCATION_SUFFIX.length;
|
|
19
|
+
return `${text.substring(0, truncationPoint)}${TRUNCATION_SUFFIX}`;
|
|
18
20
|
}
|
|
19
21
|
//# sourceMappingURL=sanitizer.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sanitizer.js","sourceRoot":"","sources":["../../src/utils/sanitizer.ts"],"names":[],"mappings":"AAAA,
|
|
1
|
+
{"version":3,"file":"sanitizer.js","sourceRoot":"","sources":["../../src/utils/sanitizer.ts"],"names":[],"mappings":"AAAA,MAAM,sBAAsB,GAAG,MAAM,CAAC;AACtC,MAAM,qBAAqB,GAAG,CAAC,CAAC;AAChC,MAAM,iBAAiB,GAAG,KAAK,CAAC;AAEhC,MAAM,UAAU,YAAY,CAAC,IAA+B;IAC1D,IAAI,IAAI,IAAI,IAAI;QAAE,OAAO,EAAE,CAAC;IAC5B,IAAI,OAAO,IAAI,KAAK,QAAQ;QAAE,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC;IAElD,OAAO,IAAI,CAAC,OAAO,CAAC,sBAAsB,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;AAC1D,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,IAAY,EAAE,SAAiB;IAC1D,IAAI,SAAS,GAAG,qBAAqB,EAAE,CAAC;QACtC,OAAO,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAC/C,CAAC;IAED,IAAI,IAAI,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QAC7B,OAAO,IAAI,CAAC;IACd,CAAC;IAED,MAAM,eAAe,GAAG,SAAS,GAAG,iBAAiB,CAAC,MAAM,CAAC;IAC7D,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,eAAe,CAAC,GAAG,iBAAiB,EAAE,CAAC;AACrE,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tool-error-handler.d.ts","sourceRoot":"","sources":["../../src/utils/tool-error-handler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;
|
|
1
|
+
{"version":3,"file":"tool-error-handler.d.ts","sourceRoot":"","sources":["../../src/utils/tool-error-handler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAQ5D,wBAAgB,uBAAuB,CACrC,OAAO,EAAE,MAAM,EACf,GAAG,EAAE,MAAM,EACX,IAAI,EAAE,MAAM,GACX,iBAAiB,CAQnB;AAgBD,wBAAgB,eAAe,CAC7B,KAAK,EAAE,OAAO,EACd,GAAG,EAAE,MAAM,EACX,eAAe,SAAqB,GACnC,iBAAiB,CAgBnB"}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
import {
|
|
2
|
-
const
|
|
1
|
+
import { FetchError } from '../errors/app-error.js';
|
|
2
|
+
const IS_DEVELOPMENT_WITH_STACK_TRACES = process.env.NODE_ENV === 'development' &&
|
|
3
|
+
process.env.EXPOSE_STACK_TRACES === 'true';
|
|
3
4
|
export function createToolErrorResponse(message, url, code) {
|
|
4
5
|
const structuredContent = { error: message, url, errorCode: code };
|
|
5
6
|
return {
|
|
@@ -8,48 +9,22 @@ export function createToolErrorResponse(message, url, code) {
|
|
|
8
9
|
isError: true,
|
|
9
10
|
};
|
|
10
11
|
}
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
: error.message;
|
|
16
|
-
return createToolErrorResponse(message, url, 'INVALID_URL');
|
|
17
|
-
}
|
|
18
|
-
if (error instanceof AbortError) {
|
|
19
|
-
const message = isDevelopment
|
|
20
|
-
? `Request aborted${error.reason ? `: ${error.reason}` : ''}\n${error.stack ?? ''}`
|
|
21
|
-
: `Request aborted${error.reason ? `: ${error.reason}` : ''}`;
|
|
22
|
-
return createToolErrorResponse(message, url, 'ABORTED');
|
|
23
|
-
}
|
|
24
|
-
if (error instanceof TimeoutError) {
|
|
25
|
-
const message = isDevelopment
|
|
26
|
-
? `Request timed out after ${error.timeoutMs}ms\n${error.stack ?? ''}`
|
|
27
|
-
: `Request timed out after ${error.timeoutMs}ms`;
|
|
28
|
-
return createToolErrorResponse(message, url, 'TIMEOUT');
|
|
29
|
-
}
|
|
30
|
-
if (error instanceof RateLimitError) {
|
|
31
|
-
const message = isDevelopment
|
|
32
|
-
? `Rate limited. Retry after ${error.retryAfter}s\n${error.stack ?? ''}`
|
|
33
|
-
: `Rate limited. Retry after ${error.retryAfter}s`;
|
|
34
|
-
return createToolErrorResponse(message, url, 'RATE_LIMITED');
|
|
12
|
+
function formatErrorMessage(baseMessage, error, fallback) {
|
|
13
|
+
const message = fallback ? `${fallback}: ${error.message}` : error.message;
|
|
14
|
+
if (IS_DEVELOPMENT_WITH_STACK_TRACES && error.stack) {
|
|
15
|
+
return `${message}\n${error.stack}`;
|
|
35
16
|
}
|
|
17
|
+
return message;
|
|
18
|
+
}
|
|
19
|
+
export function handleToolError(error, url, fallbackMessage = 'Operation failed') {
|
|
36
20
|
if (error instanceof FetchError) {
|
|
37
|
-
const
|
|
38
|
-
const message = isDevelopment
|
|
39
|
-
? `${error.message}\n${error.stack ?? ''}`
|
|
40
|
-
: error.message;
|
|
41
|
-
return createToolErrorResponse(message, url, code);
|
|
42
|
-
}
|
|
43
|
-
if (error instanceof AppError) {
|
|
44
|
-
const message = isDevelopment
|
|
45
|
-
? `${error.message}\n${error.stack ?? ''}`
|
|
46
|
-
: error.message;
|
|
21
|
+
const message = formatErrorMessage(error.message, error);
|
|
47
22
|
return createToolErrorResponse(message, url, error.code);
|
|
48
23
|
}
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
return createToolErrorResponse(
|
|
24
|
+
if (error instanceof Error) {
|
|
25
|
+
const message = formatErrorMessage(error.message, error, fallbackMessage);
|
|
26
|
+
return createToolErrorResponse(message, url, 'UNKNOWN_ERROR');
|
|
27
|
+
}
|
|
28
|
+
return createToolErrorResponse(`${fallbackMessage}: Unknown error`, url, 'UNKNOWN_ERROR');
|
|
54
29
|
}
|
|
55
30
|
//# sourceMappingURL=tool-error-handler.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tool-error-handler.js","sourceRoot":"","sources":["../../src/utils/tool-error-handler.ts"],"names":[],"mappings":"AAEA,OAAO,
|
|
1
|
+
{"version":3,"file":"tool-error-handler.js","sourceRoot":"","sources":["../../src/utils/tool-error-handler.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAEpD,MAAM,gCAAgC,GACpC,OAAO,CAAC,GAAG,CAAC,QAAQ,KAAK,aAAa;IACtC,OAAO,CAAC,GAAG,CAAC,mBAAmB,KAAK,MAAM,CAAC;AAE7C,MAAM,UAAU,uBAAuB,CACrC,OAAe,EACf,GAAW,EACX,IAAY;IAEZ,MAAM,iBAAiB,GAAG,EAAE,KAAK,EAAE,OAAO,EAAE,GAAG,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;IAEnE,OAAO;QACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,iBAAiB,CAAC,EAAE,CAAC;QACpE,iBAAiB;QACjB,OAAO,EAAE,IAAI;KACd,CAAC;AACJ,CAAC;AAED,SAAS,kBAAkB,CACzB,WAAmB,EACnB,KAAY,EACZ,QAAiB;IAEjB,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,GAAG,QAAQ,KAAK,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC;IAE3E,IAAI,gCAAgC,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;QACpD,OAAO,GAAG,OAAO,KAAK,KAAK,CAAC,KAAK,EAAE,CAAC;IACtC,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,KAAc,EACd,GAAW,EACX,eAAe,GAAG,kBAAkB;IAEpC,IAAI,KAAK,YAAY,UAAU,EAAE,CAAC;QAChC,MAAM,OAAO,GAAG,kBAAkB,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QACzD,OAAO,uBAAuB,CAAC,OAAO,EAAE,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;IAC3D,CAAC;IAED,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;QAC3B,MAAM,OAAO,GAAG,kBAAkB,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,EAAE,eAAe,CAAC,CAAC;QAC1E,OAAO,uBAAuB,CAAC,OAAO,EAAE,GAAG,EAAE,eAAe,CAAC,CAAC;IAChE,CAAC;IAED,OAAO,uBAAuB,CAC5B,GAAG,eAAe,iBAAiB,EACnC,GAAG,EACH,eAAe,CAChB,CAAC;AACJ,CAAC"}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
export declare function validateResolvedIps(hostname: string): Promise<void>;
|
|
1
2
|
export declare function validateAndNormalizeUrl(urlString: string): string;
|
|
2
3
|
export declare function isInternalUrl(url: string, baseUrl: string): boolean;
|
|
3
4
|
//# sourceMappingURL=url-validator.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"url-validator.d.ts","sourceRoot":"","sources":["../../src/utils/url-validator.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"url-validator.d.ts","sourceRoot":"","sources":["../../src/utils/url-validator.ts"],"names":[],"mappings":"AAsCA,wBAAsB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CA4BzE;AAED,wBAAgB,uBAAuB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CA2DjE;AAED,wBAAgB,aAAa,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAQnE"}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import
|
|
2
|
-
|
|
1
|
+
import dns from 'dns/promises';
|
|
2
|
+
import { config } from '../config/index.js';
|
|
3
3
|
const BLOCKED_HOSTS = new Set([
|
|
4
4
|
'localhost',
|
|
5
5
|
'127.0.0.1',
|
|
@@ -25,53 +25,72 @@ const BLOCKED_IP_PATTERNS = [
|
|
|
25
25
|
/^::ffff:172\.(1[6-9]|2\d|3[01])\./,
|
|
26
26
|
/^::ffff:192\.168\./,
|
|
27
27
|
];
|
|
28
|
-
|
|
29
|
-
|
|
28
|
+
/**
|
|
29
|
+
* Check if an IP address is in a blocked private range
|
|
30
|
+
*/
|
|
31
|
+
function isBlockedIp(ip) {
|
|
32
|
+
return BLOCKED_IP_PATTERNS.some((pattern) => pattern.test(ip));
|
|
33
|
+
}
|
|
34
|
+
export async function validateResolvedIps(hostname) {
|
|
35
|
+
if (/^[\d.]+$/.test(hostname) || hostname.includes(':')) {
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
try {
|
|
39
|
+
const ipv4Addresses = await dns.resolve4(hostname).catch(() => []);
|
|
40
|
+
for (const ip of ipv4Addresses) {
|
|
41
|
+
if (isBlockedIp(ip) || BLOCKED_HOSTS.has(ip)) {
|
|
42
|
+
throw new Error(`DNS rebinding detected: ${hostname} resolves to blocked IP ${ip}`);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
const ipv6Addresses = await dns.resolve6(hostname).catch(() => []);
|
|
46
|
+
for (const ip of ipv6Addresses) {
|
|
47
|
+
if (isBlockedIp(ip) || BLOCKED_HOSTS.has(ip)) {
|
|
48
|
+
throw new Error(`DNS rebinding detected: ${hostname} resolves to blocked IP ${ip}`);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
catch (error) {
|
|
53
|
+
if (error instanceof Error && error.message.includes('DNS rebinding')) {
|
|
54
|
+
throw error;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
30
57
|
}
|
|
31
58
|
export function validateAndNormalizeUrl(urlString) {
|
|
32
|
-
// Check for empty or whitespace-only input
|
|
33
59
|
if (!urlString || typeof urlString !== 'string') {
|
|
34
|
-
throw new
|
|
60
|
+
throw new Error('URL is required');
|
|
35
61
|
}
|
|
36
62
|
const trimmedUrl = urlString.trim();
|
|
37
63
|
if (!trimmedUrl) {
|
|
38
|
-
throw new
|
|
64
|
+
throw new Error('URL cannot be empty');
|
|
39
65
|
}
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
throw new ValidationError(`URL exceeds maximum length of ${MAX_URL_LENGTH} characters`, { length: trimmedUrl.length, maxLength: MAX_URL_LENGTH });
|
|
66
|
+
if (trimmedUrl.length > config.constants.maxUrlLength) {
|
|
67
|
+
throw new Error(`URL exceeds maximum length of ${config.constants.maxUrlLength} characters`);
|
|
43
68
|
}
|
|
44
69
|
let url;
|
|
45
70
|
try {
|
|
46
71
|
url = new URL(trimmedUrl);
|
|
47
72
|
}
|
|
48
73
|
catch {
|
|
49
|
-
throw new
|
|
74
|
+
throw new Error('Invalid URL format');
|
|
50
75
|
}
|
|
51
|
-
// Only allow HTTP(S) protocols
|
|
52
76
|
if (url.protocol !== 'http:' && url.protocol !== 'https:') {
|
|
53
|
-
throw new
|
|
77
|
+
throw new Error(`Invalid protocol: ${url.protocol}. Only http: and https: are allowed`);
|
|
54
78
|
}
|
|
55
|
-
// Block URLs with credentials (user:pass@host)
|
|
56
79
|
if (url.username || url.password) {
|
|
57
|
-
throw new
|
|
80
|
+
throw new Error('URLs with embedded credentials are not allowed');
|
|
58
81
|
}
|
|
59
82
|
const hostname = url.hostname.toLowerCase();
|
|
60
|
-
// Block empty hostname
|
|
61
83
|
if (!hostname) {
|
|
62
|
-
throw new
|
|
84
|
+
throw new Error('URL must have a valid hostname');
|
|
63
85
|
}
|
|
64
|
-
// Block known internal/metadata hosts
|
|
65
86
|
if (BLOCKED_HOSTS.has(hostname)) {
|
|
66
|
-
throw new
|
|
87
|
+
throw new Error(`Blocked host: ${hostname}. Internal hosts are not allowed`);
|
|
67
88
|
}
|
|
68
|
-
// Block private IP ranges
|
|
69
89
|
if (isBlockedIp(hostname)) {
|
|
70
|
-
throw new
|
|
90
|
+
throw new Error(`Blocked IP range: ${hostname}. Private IPs are not allowed`);
|
|
71
91
|
}
|
|
72
|
-
// Block hostnames that look like they might resolve to internal addresses
|
|
73
92
|
if (hostname.endsWith('.local') || hostname.endsWith('.internal')) {
|
|
74
|
-
throw new
|
|
93
|
+
throw new Error(`Blocked hostname pattern: ${hostname}. Internal domain suffixes are not allowed`);
|
|
75
94
|
}
|
|
76
95
|
return url.href;
|
|
77
96
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"url-validator.js","sourceRoot":"","sources":["../../src/utils/url-validator.ts"],"names":[],"mappings":"AAAA,OAAO,
|
|
1
|
+
{"version":3,"file":"url-validator.js","sourceRoot":"","sources":["../../src/utils/url-validator.ts"],"names":[],"mappings":"AAAA,OAAO,GAAG,MAAM,cAAc,CAAC;AAE/B,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAE5C,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC;IAC5B,WAAW;IACX,WAAW;IACX,SAAS;IACT,KAAK;IACL,iBAAiB;IACjB,0BAA0B;IAC1B,oBAAoB;IACpB,iBAAiB;IACjB,eAAe;CAChB,CAAC,CAAC;AAEH,MAAM,mBAAmB,GAAsB;IAC7C,OAAO;IACP,4BAA4B;IAC5B,aAAa;IACb,QAAQ;IACR,MAAM;IACN,aAAa;IACb,SAAS;IACT,SAAS;IACT,eAAe;IACf,cAAc;IACd,mCAAmC;IACnC,oBAAoB;CACrB,CAAC;AAEF;;GAEG;AACH,SAAS,WAAW,CAAC,EAAU;IAC7B,OAAO,mBAAmB,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;AACjE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,QAAgB;IACxD,IAAI,UAAU,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QACxD,OAAO;IACT,CAAC;IAED,IAAI,CAAC;QACH,MAAM,aAAa,GAAG,MAAM,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;QACnE,KAAK,MAAM,EAAE,IAAI,aAAa,EAAE,CAAC;YAC/B,IAAI,WAAW,CAAC,EAAE,CAAC,IAAI,aAAa,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;gBAC7C,MAAM,IAAI,KAAK,CACb,2BAA2B,QAAQ,2BAA2B,EAAE,EAAE,CACnE,CAAC;YACJ,CAAC;QACH,CAAC;QAED,MAAM,aAAa,GAAG,MAAM,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;QACnE,KAAK,MAAM,EAAE,IAAI,aAAa,EAAE,CAAC;YAC/B,IAAI,WAAW,CAAC,EAAE,CAAC,IAAI,aAAa,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;gBAC7C,MAAM,IAAI,KAAK,CACb,2BAA2B,QAAQ,2BAA2B,EAAE,EAAE,CACnE,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,EAAE,CAAC;YACtE,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,SAAiB;IACvD,IAAI,CAAC,SAAS,IAAI,OAAO,SAAS,KAAK,QAAQ,EAAE,CAAC;QAChD,MAAM,IAAI,KAAK,CAAC,iBAAiB,CAAC,CAAC;IACrC,CAAC;IAED,MAAM,UAAU,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC;IACpC,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,MAAM,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;IACzC,CAAC;IAED,IAAI,UAAU,CAAC,MAAM,GAAG,MAAM,CAAC,SAAS,CAAC,YAAY,EAAE,CAAC;QACtD,MAAM,IAAI,KAAK,CACb,iCAAiC,MAAM,CAAC,SAAS,CAAC,YAAY,aAAa,CAC5E,CAAC;IACJ,CAAC;IAED,IAAI,GAAQ,CAAC;IAEb,IAAI,CAAC;QACH,GAAG,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC;IACxC,CAAC;IAED,IAAI,GAAG,CAAC,QAAQ,KAAK,OAAO,IAAI,GAAG,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;QAC1D,MAAM,IAAI,KAAK,CACb,qBAAqB,GAAG,CAAC,QAAQ,qCAAqC,CACvE,CAAC;IACJ,CAAC;IAED,IAAI,GAAG,CAAC,QAAQ,IAAI,GAAG,CAAC,QAAQ,EAAE,CAAC;QACjC,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;IACpE,CAAC;IAED,MAAM,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;IAE5C,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,MAAM,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC;IACpD,CAAC;IAED,IAAI,aAAa,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;QAChC,MAAM,IAAI,KAAK,CACb,iBAAiB,QAAQ,kCAAkC,CAC5D,CAAC;IACJ,CAAC;IAED,IAAI,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC1B,MAAM,IAAI,KAAK,CACb,qBAAqB,QAAQ,+BAA+B,CAC7D,CAAC;IACJ,CAAC;IAED,IAAI,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;QAClE,MAAM,IAAI,KAAK,CACb,6BAA6B,QAAQ,4CAA4C,CAClF,CAAC;IACJ,CAAC;IAED,OAAO,GAAG,CAAC,IAAI,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,GAAW,EAAE,OAAe;IACxD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;QACrC,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;QACpC,OAAO,MAAM,CAAC,QAAQ,KAAK,UAAU,CAAC,QAAQ,CAAC;IACjD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC"}
|