@j0hanz/fetch-url-mcp 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/README.md +570 -0
  2. package/dist/AGENTS.md +115 -0
  3. package/dist/assets/logo.svg +24837 -0
  4. package/dist/cache.d.ts +47 -0
  5. package/dist/cache.js +316 -0
  6. package/dist/cli.d.ts +17 -0
  7. package/dist/cli.js +48 -0
  8. package/dist/config.d.ts +142 -0
  9. package/dist/config.js +480 -0
  10. package/dist/crypto.d.ts +3 -0
  11. package/dist/crypto.js +49 -0
  12. package/dist/dom-noise-removal.d.ts +1 -0
  13. package/dist/dom-noise-removal.js +488 -0
  14. package/dist/errors.d.ts +10 -0
  15. package/dist/errors.js +61 -0
  16. package/dist/fetch.d.ts +42 -0
  17. package/dist/fetch.js +1544 -0
  18. package/dist/host-normalization.d.ts +1 -0
  19. package/dist/host-normalization.js +77 -0
  20. package/dist/http-native.d.ts +5 -0
  21. package/dist/http-native.js +1313 -0
  22. package/dist/index.d.ts +2 -0
  23. package/dist/index.js +91 -0
  24. package/dist/instructions.md +57 -0
  25. package/dist/ip-blocklist.d.ts +8 -0
  26. package/dist/ip-blocklist.js +74 -0
  27. package/dist/json.d.ts +1 -0
  28. package/dist/json.js +34 -0
  29. package/dist/language-detection.d.ts +2 -0
  30. package/dist/language-detection.js +364 -0
  31. package/dist/markdown-cleanup.d.ts +6 -0
  32. package/dist/markdown-cleanup.js +474 -0
  33. package/dist/mcp-validator.d.ts +15 -0
  34. package/dist/mcp-validator.js +44 -0
  35. package/dist/mcp.d.ts +4 -0
  36. package/dist/mcp.js +421 -0
  37. package/dist/observability.d.ts +21 -0
  38. package/dist/observability.js +211 -0
  39. package/dist/prompts.d.ts +7 -0
  40. package/dist/prompts.js +28 -0
  41. package/dist/resources.d.ts +8 -0
  42. package/dist/resources.js +216 -0
  43. package/dist/server-tuning.d.ts +13 -0
  44. package/dist/server-tuning.js +47 -0
  45. package/dist/server.d.ts +4 -0
  46. package/dist/server.js +174 -0
  47. package/dist/session.d.ts +39 -0
  48. package/dist/session.js +218 -0
  49. package/dist/tasks.d.ts +63 -0
  50. package/dist/tasks.js +327 -0
  51. package/dist/timer-utils.d.ts +5 -0
  52. package/dist/timer-utils.js +20 -0
  53. package/dist/tools.d.ts +135 -0
  54. package/dist/tools.js +812 -0
  55. package/dist/transform-types.d.ts +126 -0
  56. package/dist/transform-types.js +5 -0
  57. package/dist/transform.d.ts +36 -0
  58. package/dist/transform.js +2341 -0
  59. package/dist/type-guards.d.ts +14 -0
  60. package/dist/type-guards.js +13 -0
  61. package/dist/workers/transform-child.d.ts +1 -0
  62. package/dist/workers/transform-child.js +136 -0
  63. package/dist/workers/transform-worker.d.ts +1 -0
  64. package/dist/workers/transform-worker.js +128 -0
  65. package/package.json +91 -0
@@ -0,0 +1,488 @@
1
+ import { parseHTML } from 'linkedom';
2
+ import { config } from './config.js';
3
+ import { logDebug } from './observability.js';
4
+ // --- Constants & Pre-compiled Regex ---
5
+ const NOISE_SCAN_LIMIT = 50_000;
6
+ const MIN_BODY_CONTENT_LENGTH = 100;
7
+ const DIALOG_MIN_CHARS_FOR_PRESERVATION = 500;
8
+ const NAV_FOOTER_MIN_CHARS_FOR_PRESERVATION = 500;
9
+ // Merged markers for fast rejection
10
+ const HTML_DOCUMENT_MARKERS = /<\s*(?:!doctype|html|head|body)\b/i;
11
+ const HTML_FRAGMENT_MARKERS = /<\s*(?:article|main|section|div|nav|footer|header|aside|table|ul|ol)\b/i;
12
+ // Split into smaller regexes to stay within sonarjs/regex-complexity limit
13
+ const NOISE_PATTERNS = [
14
+ /<\s*(?:script|style|noscript|iframe|nav|footer|header|form|button|input|select|textarea|svg|canvas)\b/i,
15
+ /[\s"']role\s*=\s*['"]?(?:navigation|banner|complementary|contentinfo|tree|menubar|menu)['"]?/i,
16
+ /[\s"'](?:aria-hidden\s*=\s*['"]?true['"]?|hidden)/i,
17
+ /[\s"'](?:banner|promo|announcement|cta|advert|newsletter|subscribe|cookie|consent|popup|modal|overlay|toast)\b/i,
18
+ /[\s"'](?:fixed|sticky|z-50|z-4|isolate|breadcrumb|pagination)\b/i,
19
+ ];
20
+ const HEADER_NOISE_PATTERN = /\b(site-header|masthead|topbar|navbar|nav(?:bar)?|menu|header-nav)\b/i;
21
+ const FIXED_OR_HIGH_Z_PATTERN = /\b(?:fixed|sticky|z-(?:4\d|50)|isolate)\b/;
22
+ const SKIP_URL_PREFIXES = [
23
+ '#',
24
+ 'java' + 'script:',
25
+ 'mailto:',
26
+ 'tel:',
27
+ 'data:',
28
+ 'blob:',
29
+ ];
30
+ const BASE_STRUCTURAL_TAGS = new Set([
31
+ 'script',
32
+ 'style',
33
+ 'noscript',
34
+ 'iframe',
35
+ 'form',
36
+ 'button',
37
+ 'input',
38
+ 'select',
39
+ 'textarea',
40
+ ]);
41
+ const ALWAYS_NOISE_TAGS = new Set(['nav', 'footer']);
42
+ const NAVIGATION_ROLES = new Set([
43
+ 'navigation',
44
+ 'banner',
45
+ 'complementary',
46
+ 'contentinfo',
47
+ 'tree',
48
+ 'menubar',
49
+ 'menu',
50
+ 'dialog',
51
+ 'alertdialog',
52
+ 'search',
53
+ ]);
54
+ const INTERACTIVE_CONTENT_ROLES = new Set([
55
+ 'tabpanel',
56
+ 'tab',
57
+ 'tablist',
58
+ 'dialog',
59
+ 'alertdialog',
60
+ 'menu',
61
+ 'menuitem',
62
+ 'option',
63
+ 'listbox',
64
+ 'combobox',
65
+ 'tooltip',
66
+ 'alert',
67
+ ]);
68
+ const PROMO_TOKENS_ALWAYS = [
69
+ 'banner',
70
+ 'promo',
71
+ 'announcement',
72
+ 'cta',
73
+ 'advert',
74
+ 'ads',
75
+ 'sponsor',
76
+ 'recommend',
77
+ 'breadcrumb',
78
+ 'pagination',
79
+ 'pager',
80
+ 'taglist',
81
+ ];
82
+ const PROMO_TOKENS_AGGRESSIVE = ['ad', 'related', 'comment'];
83
+ const PROMO_TOKENS_BY_CATEGORY = {
84
+ 'cookie-banners': ['cookie', 'consent', 'popup', 'modal', 'overlay', 'toast'],
85
+ newsletters: ['newsletter', 'subscribe'],
86
+ 'social-share': ['share', 'social'],
87
+ };
88
+ const BASE_NOISE_SELECTORS = {
89
+ navFooter: 'nav,footer,header[class*="site"],header[class*="nav"],header[class*="menu"],[role="banner"],[role="navigation"]',
90
+ cookieBanners: '[role="dialog"]',
91
+ hidden: '[style*="display: none"],[style*="display:none"],[hidden],[aria-hidden="true"]',
92
+ };
93
+ const NO_MATCH_REGEX = /a^/i;
94
+ // --- State Cache ---
95
+ let cachedContext;
96
+ let lastConfigRef;
97
+ // --- Helpers Inlined/Optimized ---
98
+ function escapeRegexLiteral(value) {
99
+ return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
100
+ }
101
+ function buildTokenRegex(tokens) {
102
+ if (tokens.size === 0)
103
+ return NO_MATCH_REGEX;
104
+ return new RegExp(`(?:^|[^a-z0-9])(?:${[...tokens].map(escapeRegexLiteral).join('|')})(?:$|[^a-z0-9])`, 'i');
105
+ }
106
+ function getPromoMatchers(currentConfig, flags) {
107
+ const baseTokens = new Set(PROMO_TOKENS_ALWAYS);
108
+ const aggressiveTokens = new Set();
109
+ if (currentConfig.aggressiveMode) {
110
+ for (const t of PROMO_TOKENS_AGGRESSIVE)
111
+ aggressiveTokens.add(t);
112
+ }
113
+ if (flags.cookieBanners)
114
+ for (const t of PROMO_TOKENS_BY_CATEGORY['cookie-banners'])
115
+ baseTokens.add(t);
116
+ if (flags.newsletters)
117
+ for (const t of PROMO_TOKENS_BY_CATEGORY['newsletters'])
118
+ baseTokens.add(t);
119
+ if (flags.socialShare)
120
+ for (const t of PROMO_TOKENS_BY_CATEGORY['social-share'])
121
+ baseTokens.add(t);
122
+ for (const t of currentConfig.extraTokens) {
123
+ const n = t.toLowerCase().trim();
124
+ if (n)
125
+ baseTokens.add(n);
126
+ }
127
+ return {
128
+ base: buildTokenRegex(baseTokens),
129
+ aggressive: buildTokenRegex(aggressiveTokens),
130
+ };
131
+ }
132
+ function getContext() {
133
+ const currentConfig = config.noiseRemoval;
134
+ if (cachedContext && lastConfigRef === currentConfig) {
135
+ return cachedContext;
136
+ }
137
+ const enabled = new Set(currentConfig.enabledCategories
138
+ .map((c) => {
139
+ const s = c.toLowerCase().trim();
140
+ const { locale } = config.i18n;
141
+ return locale ? s.toLocaleLowerCase(locale) : s;
142
+ })
143
+ .filter(Boolean));
144
+ const isEnabled = (cat) => enabled.has(cat);
145
+ const flags = {
146
+ navFooter: isEnabled('nav-footer'),
147
+ cookieBanners: isEnabled('cookie-banners'),
148
+ newsletters: isEnabled('newsletters'),
149
+ socialShare: isEnabled('social-share'),
150
+ };
151
+ const structuralTags = new Set(BASE_STRUCTURAL_TAGS);
152
+ if (!currentConfig.preserveSvgCanvas) {
153
+ structuralTags.add('svg');
154
+ structuralTags.add('canvas');
155
+ }
156
+ const promoMatchers = getPromoMatchers(currentConfig, flags);
157
+ const extraSelectors = currentConfig.extraSelectors
158
+ .map((s) => s.trim())
159
+ .filter((s) => s.length > 0);
160
+ // Pre-build selectors
161
+ const selectors = [BASE_NOISE_SELECTORS.hidden];
162
+ if (flags.navFooter)
163
+ selectors.push(BASE_NOISE_SELECTORS.navFooter);
164
+ if (flags.cookieBanners)
165
+ selectors.push(BASE_NOISE_SELECTORS.cookieBanners);
166
+ const baseSelector = selectors.join(',');
167
+ const candidateSelector = [
168
+ ...structuralTags,
169
+ ...ALWAYS_NOISE_TAGS,
170
+ 'aside',
171
+ 'header',
172
+ '[class]',
173
+ '[id]',
174
+ '[role]',
175
+ '[style]',
176
+ ].join(',');
177
+ cachedContext = {
178
+ flags,
179
+ structuralTags,
180
+ weights: currentConfig.weights,
181
+ promoMatchers,
182
+ promoEnabled: flags.cookieBanners || flags.newsletters || flags.socialShare,
183
+ extraSelectors,
184
+ baseSelector,
185
+ candidateSelector,
186
+ };
187
+ lastConfigRef = currentConfig;
188
+ return cachedContext;
189
+ }
190
+ // --- Hot Path Logic ---
191
+ function isInteractive(element, role) {
192
+ if (role && INTERACTIVE_CONTENT_ROLES.has(role))
193
+ return true;
194
+ const ds = element.getAttribute('data-state');
195
+ if (ds === 'inactive' || ds === 'closed')
196
+ return true;
197
+ const dataOrientation = element.getAttribute('data-orientation');
198
+ if (dataOrientation === 'horizontal' || dataOrientation === 'vertical')
199
+ return true;
200
+ return (element.hasAttribute('data-accordion-item') ||
201
+ element.hasAttribute('data-radix-collection-item'));
202
+ }
203
+ function isWithinPrimaryContent(element) {
204
+ let current = element;
205
+ while (current) {
206
+ const tagName = current.tagName.toLowerCase();
207
+ if (tagName === 'article' || tagName === 'main')
208
+ return true;
209
+ if (current.getAttribute('role') === 'main')
210
+ return true;
211
+ current = current.parentElement;
212
+ }
213
+ return false;
214
+ }
215
+ function shouldPreserve(element, tagName) {
216
+ // Check Dialog
217
+ const role = element.getAttribute('role');
218
+ if (role === 'dialog' || role === 'alertdialog') {
219
+ if (isWithinPrimaryContent(element))
220
+ return true;
221
+ const textLen = (element.textContent || '').length;
222
+ if (textLen > DIALOG_MIN_CHARS_FOR_PRESERVATION)
223
+ return true;
224
+ return element.querySelector('h1,h2,h3,h4,h5,h6') !== null;
225
+ }
226
+ // Check Nav/Footer
227
+ if (tagName === 'nav' || tagName === 'footer') {
228
+ if (element.querySelector('article,main,section,[role="main"]'))
229
+ return true;
230
+ return ((element.textContent || '').trim().length >=
231
+ NAV_FOOTER_MIN_CHARS_FOR_PRESERVATION);
232
+ }
233
+ return false;
234
+ }
235
+ function removeNodes(nodes) {
236
+ for (let i = nodes.length - 1; i >= 0; i--) {
237
+ const node = nodes[i];
238
+ if (node?.parentNode && !shouldPreserve(node, node.tagName.toLowerCase())) {
239
+ node.remove();
240
+ }
241
+ }
242
+ }
243
+ function scoreNavFooter(tagName, role, className, id, weights) {
244
+ let score = 0;
245
+ if (ALWAYS_NOISE_TAGS.has(tagName))
246
+ score += weights.structural;
247
+ // Header Boilerplate
248
+ if (tagName === 'header') {
249
+ if ((role && NAVIGATION_ROLES.has(role)) ||
250
+ HEADER_NOISE_PATTERN.test(`${className} ${id}`)) {
251
+ score += weights.structural;
252
+ }
253
+ }
254
+ // Role Noise
255
+ if (role && NAVIGATION_ROLES.has(role)) {
256
+ if (tagName !== 'aside' || role !== 'complementary') {
257
+ score += weights.structural;
258
+ }
259
+ }
260
+ return score;
261
+ }
262
+ function extractElementMetadata(element) {
263
+ const tagName = element.tagName.toLowerCase();
264
+ const className = element.getAttribute('class') ?? '';
265
+ const id = element.getAttribute('id') ?? '';
266
+ const role = element.getAttribute('role');
267
+ const style = element.getAttribute('style');
268
+ const _isInteractive = isInteractive(element, role);
269
+ const isHidden = element.hasAttribute('hidden') ||
270
+ element.getAttribute('aria-hidden') === 'true' ||
271
+ (style !== null &&
272
+ /\b(?:display\s*:\s*none|visibility\s*:\s*hidden)\b/i.test(style));
273
+ return {
274
+ tagName,
275
+ className,
276
+ id,
277
+ role,
278
+ style,
279
+ isInteractive: _isInteractive,
280
+ isHidden,
281
+ };
282
+ }
283
+ function isNoiseElement(element, context) {
284
+ const meta = extractElementMetadata(element);
285
+ let score = 0;
286
+ const { weights } = context;
287
+ // Structural
288
+ if (context.structuralTags.has(meta.tagName) && !meta.isInteractive) {
289
+ score += weights.structural;
290
+ }
291
+ // Nav/Footer Scoring
292
+ if (context.flags.navFooter) {
293
+ score += scoreNavFooter(meta.tagName, meta.role, meta.className, meta.id, weights);
294
+ }
295
+ // Hidden
296
+ if (meta.isHidden && !meta.isInteractive) {
297
+ score += weights.hidden;
298
+ }
299
+ // Sticky/Fixed
300
+ if (FIXED_OR_HIGH_Z_PATTERN.test(meta.className)) {
301
+ score += weights.stickyFixed;
302
+ }
303
+ // Promo
304
+ if (context.promoEnabled) {
305
+ const aggTest = context.promoMatchers.aggressive.test(meta.className) ||
306
+ context.promoMatchers.aggressive.test(meta.id);
307
+ const isAggressiveMatch = aggTest && !isWithinPrimaryContent(element);
308
+ const isBaseMatch = !aggTest &&
309
+ (context.promoMatchers.base.test(meta.className) ||
310
+ context.promoMatchers.base.test(meta.id));
311
+ if (isAggressiveMatch || isBaseMatch) {
312
+ score += weights.promo;
313
+ }
314
+ }
315
+ return score >= weights.threshold;
316
+ }
317
+ function cleanHeadingWrapperDivs(h) {
318
+ const divs = h.querySelectorAll('div');
319
+ for (let j = divs.length - 1; j >= 0; j--) {
320
+ const d = divs[j];
321
+ if (!d?.parentNode)
322
+ continue;
323
+ const cls = d.getAttribute('class') ?? '';
324
+ const stl = d.getAttribute('style') ?? '';
325
+ if (cls.includes('absolute') ||
326
+ stl.includes('position') ||
327
+ d.getAttribute('tabindex') === '-1') {
328
+ d.remove();
329
+ }
330
+ }
331
+ }
332
+ function cleanHeadingAnchors(h) {
333
+ const anchors = h.querySelectorAll('a');
334
+ for (let j = anchors.length - 1; j >= 0; j--) {
335
+ const a = anchors[j];
336
+ if (!a?.parentNode)
337
+ continue;
338
+ const href = a.getAttribute('href') ?? '';
339
+ const txt = (a.textContent || '').replace(/[\u200B\s]/g, '');
340
+ if (href.startsWith('#') && txt.length === 0) {
341
+ a.remove();
342
+ }
343
+ }
344
+ }
345
+ function cleanHeadingZeroWidth(h, document) {
346
+ const walker = document.createTreeWalker(h, 4); // SHOW_TEXT
347
+ let node;
348
+ while ((node = walker.nextNode())) {
349
+ if (node.textContent?.includes('\u200B')) {
350
+ node.textContent = node.textContent.replace(/\u200B/g, '');
351
+ }
352
+ }
353
+ }
354
+ function cleanHeadings(document) {
355
+ // Clean Heading Anchors
356
+ const headings = document.querySelectorAll('h1,h2,h3,h4,h5,h6');
357
+ for (const h of headings) {
358
+ if (!h.parentNode)
359
+ continue;
360
+ cleanHeadingWrapperDivs(h);
361
+ cleanHeadingAnchors(h);
362
+ cleanHeadingZeroWidth(h, document);
363
+ }
364
+ }
365
+ function stripNoise(document, context) {
366
+ cleanHeadings(document);
367
+ // Remove Base & Extra
368
+ const { baseSelector, extraSelectors } = context;
369
+ // Base
370
+ const baseNodes = document.querySelectorAll(baseSelector);
371
+ removeNodes(baseNodes);
372
+ // Extra
373
+ if (extraSelectors.length > 0) {
374
+ const combinedExtra = extraSelectors.join(',');
375
+ const extraNodes = document.querySelectorAll(combinedExtra);
376
+ removeNodes(extraNodes);
377
+ }
378
+ // Candidates
379
+ const candidates = document.querySelectorAll(context.candidateSelector);
380
+ for (let i = candidates.length - 1; i >= 0; i--) {
381
+ const node = candidates[i];
382
+ if (!node)
383
+ continue;
384
+ if (!node.parentNode)
385
+ continue;
386
+ if (shouldPreserve(node, node.tagName.toLowerCase()))
387
+ continue;
388
+ if (isNoiseElement(node, context)) {
389
+ node.remove();
390
+ }
391
+ }
392
+ }
393
+ function processUrlElement(el, attr, base, isSrcset) {
394
+ if (!el.parentNode)
395
+ return;
396
+ if (isSrcset) {
397
+ const val = el.getAttribute(attr);
398
+ if (val) {
399
+ const newVal = val
400
+ .split(',')
401
+ .map((entry) => {
402
+ const parts = entry.trim().split(/\s+/);
403
+ if (!parts[0])
404
+ return entry;
405
+ try {
406
+ parts[0] = new URL(parts[0], base).href;
407
+ }
408
+ catch {
409
+ /* ignore */
410
+ }
411
+ return parts.join(' ');
412
+ })
413
+ .join(', ');
414
+ el.setAttribute(attr, newVal);
415
+ }
416
+ return;
417
+ }
418
+ const val = el.getAttribute(attr);
419
+ if (val &&
420
+ !SKIP_URL_PREFIXES.some((p) => val.trim().toLowerCase().startsWith(p))) {
421
+ try {
422
+ el.setAttribute(attr, new URL(val, base).href);
423
+ }
424
+ catch {
425
+ /* ignore */
426
+ }
427
+ }
428
+ }
429
+ function resolveUrls(document, baseUrlStr) {
430
+ let base;
431
+ try {
432
+ base = new URL(baseUrlStr);
433
+ }
434
+ catch {
435
+ return;
436
+ }
437
+ const elements = document.querySelectorAll('a[href],img[src],source[srcset]');
438
+ for (const el of Array.from(elements)) {
439
+ const tag = el.tagName.toLowerCase();
440
+ if (tag === 'a')
441
+ processUrlElement(el, 'href', base, false);
442
+ else if (tag === 'img')
443
+ processUrlElement(el, 'src', base, false);
444
+ else if (tag === 'source')
445
+ processUrlElement(el, 'srcset', base, true);
446
+ }
447
+ }
448
+ function serialize(document, fallback) {
449
+ const bodyHtml = document.body.innerHTML;
450
+ if (bodyHtml.trim().length > MIN_BODY_CONTENT_LENGTH)
451
+ return bodyHtml;
452
+ const outerHtml = document.documentElement.outerHTML;
453
+ if (outerHtml.trim().length > MIN_BODY_CONTENT_LENGTH)
454
+ return outerHtml;
455
+ return fallback;
456
+ }
457
+ function isFullDocumentHtml(html) {
458
+ return HTML_DOCUMENT_MARKERS.test(html);
459
+ }
460
+ function mayContainNoise(html) {
461
+ const sample = html.length <= NOISE_SCAN_LIMIT
462
+ ? html
463
+ : `${html.substring(0, NOISE_SCAN_LIMIT)}\n${html.substring(html.length - NOISE_SCAN_LIMIT)}`;
464
+ return NOISE_PATTERNS.some((re) => re.test(sample));
465
+ }
466
+ export function removeNoiseFromHtml(html, document, baseUrl) {
467
+ const shouldParse = isFullDocumentHtml(html) ||
468
+ mayContainNoise(html) ||
469
+ HTML_FRAGMENT_MARKERS.test(html);
470
+ if (!shouldParse)
471
+ return html;
472
+ try {
473
+ const context = getContext();
474
+ if (config.noiseRemoval.debug) {
475
+ logDebug('Noise removal audit enabled', {
476
+ categories: [...(context.flags.navFooter ? ['nav-footer'] : [])],
477
+ });
478
+ }
479
+ const doc = document ?? parseHTML(html).document;
480
+ stripNoise(doc, context);
481
+ if (baseUrl)
482
+ resolveUrls(doc, baseUrl);
483
+ return serialize(doc, html);
484
+ }
485
+ catch {
486
+ return html;
487
+ }
488
+ }
@@ -0,0 +1,10 @@
1
+ export declare class FetchError extends Error {
2
+ readonly url: string;
3
+ readonly statusCode: number;
4
+ readonly code: string;
5
+ readonly details: Readonly<Record<string, unknown>>;
6
+ constructor(message: string, url: string, httpStatus?: number, details?: Record<string, unknown>, options?: ErrorOptions);
7
+ }
8
+ export declare function getErrorMessage(error: unknown): string;
9
+ export declare function createErrorWithCode(message: string, code: string, options?: ErrorOptions): NodeJS.ErrnoException;
10
+ export declare function isSystemError(error: unknown): error is NodeJS.ErrnoException;
package/dist/errors.js ADDED
@@ -0,0 +1,61 @@
1
+ import { inspect } from 'node:util';
2
+ import { isError, isObject } from './type-guards.js';
3
+ const DEFAULT_HTTP_STATUS = 502;
4
+ export class FetchError extends Error {
5
+ url;
6
+ statusCode;
7
+ code;
8
+ details;
9
+ constructor(message, url, httpStatus, details = {}, options) {
10
+ super(message, options);
11
+ this.url = url;
12
+ this.name = 'FetchError';
13
+ this.statusCode = httpStatus ?? DEFAULT_HTTP_STATUS;
14
+ this.code = httpStatus ? `HTTP_${httpStatus}` : 'FETCH_ERROR';
15
+ this.details = Object.freeze({ url, httpStatus, ...details });
16
+ Error.captureStackTrace(this, this.constructor);
17
+ }
18
+ }
19
+ export function getErrorMessage(error) {
20
+ if (isError(error))
21
+ return error.message;
22
+ if (typeof error === 'string' && error.length > 0)
23
+ return error;
24
+ if (isErrorWithMessage(error))
25
+ return error.message;
26
+ return formatUnknownError(error);
27
+ }
28
+ function isErrorWithMessage(error) {
29
+ if (!isObject(error))
30
+ return false;
31
+ const { message } = error;
32
+ return typeof message === 'string' && message.length > 0;
33
+ }
34
+ function formatUnknownError(error) {
35
+ if (error === null || error === undefined)
36
+ return 'Unknown error';
37
+ try {
38
+ return inspect(error, {
39
+ depth: 2,
40
+ maxStringLength: 200,
41
+ breakLength: Infinity,
42
+ compact: true,
43
+ colors: false,
44
+ });
45
+ }
46
+ catch {
47
+ return 'Unknown error';
48
+ }
49
+ }
50
+ export function createErrorWithCode(message, code, options) {
51
+ const error = new Error(message, options);
52
+ return Object.assign(error, { code });
53
+ }
54
+ export function isSystemError(error) {
55
+ if (!isError(error))
56
+ return false;
57
+ if (!('code' in error))
58
+ return false;
59
+ const { code } = error;
60
+ return typeof code === 'string';
61
+ }
@@ -0,0 +1,42 @@
1
+ export interface FetchOptions {
2
+ signal?: AbortSignal;
3
+ }
4
+ export interface TransformResult {
5
+ readonly url: string;
6
+ readonly transformed: boolean;
7
+ readonly platform?: string;
8
+ }
9
+ export interface FetchTelemetryContext {
10
+ requestId: string;
11
+ startTime: number;
12
+ url: string;
13
+ method: string;
14
+ contextRequestId?: string;
15
+ operationId?: string;
16
+ }
17
+ export declare function isBlockedIp(ip: string): boolean;
18
+ export declare function normalizeUrl(urlString: string): {
19
+ normalizedUrl: string;
20
+ hostname: string;
21
+ };
22
+ export declare function validateAndNormalizeUrl(urlString: string): string;
23
+ export declare function transformToRawUrl(url: string): TransformResult;
24
+ export declare function isRawTextContentUrl(url: string): boolean;
25
+ export declare function startFetchTelemetry(url: string, method: string): FetchTelemetryContext;
26
+ export declare function recordFetchResponse(context: FetchTelemetryContext, response: Response, contentSize?: number): void;
27
+ export declare function recordFetchError(context: FetchTelemetryContext, error: unknown, status?: number): void;
28
+ export declare function fetchWithRedirects(url: string, init: RequestInit, maxRedirects: number): Promise<{
29
+ response: Response;
30
+ url: string;
31
+ }>;
32
+ export declare function readResponseText(response: Response, url: string, maxBytes: number, signal?: AbortSignal, encoding?: string): Promise<{
33
+ text: string;
34
+ size: number;
35
+ }>;
36
+ export declare function fetchNormalizedUrl(normalizedUrl: string, options?: FetchOptions): Promise<string>;
37
+ export declare function fetchNormalizedUrlBuffer(normalizedUrl: string, options?: FetchOptions): Promise<{
38
+ buffer: Uint8Array;
39
+ encoding: string;
40
+ truncated: boolean;
41
+ finalUrl: string;
42
+ }>;