@j0hanz/fetch-url-mcp 1.1.1 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/AGENTS.md CHANGED
@@ -34,7 +34,7 @@
34
34
  - `tests/` — Unit/integration tests (46+ test files) using Node.js built-in test runner
35
35
  - `scripts/` — Build & test orchestration (`tasks.mjs`)
36
36
  - `assets/` — Server icon (`logo.svg`)
37
- - `.github/workflows/` — CI/CD (`release.yml`: lint → type-check → test → build → publish to npm, MCP Registry, Docker)
37
+ - `.github/workflows/` — CI/CD (`release.yml`: lint → type-check → type-check:tests → test → build → publish to npm, MCP Registry, Docker)
38
38
 
39
39
  > Ignore: `dist/`, `node_modules/`, `coverage/`, `.cache/`, `.tsbuildinfo`
40
40
 
@@ -49,6 +49,7 @@ All commands verified from `.github/workflows/release.yml` (CI) and `package.jso
49
49
  - **Start:** `npm run start` → `node dist/index.js` (see `package.json`)
50
50
  - **Build:** `npm run build` → `node scripts/tasks.mjs build` — cleans `dist/`, compiles TS, validates `instructions.md`, copies assets, sets executable bit (see `scripts/tasks.mjs`, `package.json`)
51
51
  - **Type-check:** `npm run type-check` → `tsc -p tsconfig.json --noEmit` (see `scripts/tasks.mjs`, `.github/workflows/release.yml`)
52
+ - **Type-check (tests):** `npm run type-check:tests` → build output + `tsc -p tsconfig.tests.json --noEmit` (see `scripts/tasks.mjs`, `.github/workflows/release.yml`)
52
53
  - **Lint:** `npm run lint` → `eslint .` (see `package.json`, `.github/workflows/release.yml`)
53
54
  - **Lint (fix):** `npm run lint:fix` → `eslint . --fix` (see `package.json`)
54
55
  - **Format:** `npm run format` → `prettier --write .` (see `package.json`)
@@ -135,7 +136,7 @@ All commands verified from `.github/workflows/release.yml` (CI) and `package.jso
135
136
  - Config values temporarily overridden per test with `try/finally` cleanup (observed in `tests/fetch-url-tool.test.ts`)
136
137
  - Worker pool shutdown in `after()` hooks for clean teardown (observed in `tests/fetch-url-tool.test.ts`)
137
138
  - No external services (DB/containers) required for tests
138
- - **CI validation order:** `lint` → `type-check` → `test` → `build` (see `.github/workflows/release.yml`)
139
+ - **CI validation order:** `lint` → `type-check` → `type-check:tests` → `test` → `build` (see `.github/workflows/release.yml`)
139
140
 
140
141
  ## 7) Common Pitfalls (Verified Only)
141
142
 
package/dist/cache.js CHANGED
@@ -93,13 +93,16 @@ class InMemoryCacheStore {
93
93
  isEnabled() {
94
94
  return config.cache.enabled;
95
95
  }
96
+ isExpired(entry, now = Date.now()) {
97
+ return entry.expiresAtMs <= now;
98
+ }
96
99
  keys() {
97
100
  if (!this.isEnabled())
98
101
  return [];
99
102
  const now = Date.now();
100
103
  const result = [];
101
104
  for (const [key, entry] of this.entries) {
102
- if (entry.expiresAtMs > now)
105
+ if (!this.isExpired(entry, now))
103
106
  result.push(key);
104
107
  }
105
108
  return result;
@@ -130,7 +133,7 @@ class InMemoryCacheStore {
130
133
  if (!entry)
131
134
  return undefined;
132
135
  const now = Date.now();
133
- if (entry.expiresAtMs <= now) {
136
+ if (this.isExpired(entry, now)) {
134
137
  this.delete(cacheKey);
135
138
  this.notify(cacheKey, true);
136
139
  return undefined;
package/dist/cli.js CHANGED
@@ -19,6 +19,17 @@ const optionSchema = {
19
19
  function toErrorMessage(error) {
20
20
  return error instanceof Error ? error.message : String(error);
21
21
  }
22
+ function toBoolean(value) {
23
+ return value === true;
24
+ }
25
+ function buildCliValues(values) {
26
+ const { stdio, help, version } = values;
27
+ return {
28
+ stdio: toBoolean(stdio),
29
+ help: toBoolean(help),
30
+ version: toBoolean(version),
31
+ };
32
+ }
22
33
  export function renderCliUsage() {
23
34
  return `${usageLines.join('\n')}\n`;
24
35
  }
@@ -32,11 +43,7 @@ export function parseCliArgs(args) {
32
43
  });
33
44
  return {
34
45
  ok: true,
35
- values: {
36
- stdio: values.stdio,
37
- help: values.help,
38
- version: values.version,
39
- },
46
+ values: buildCliValues(values),
40
47
  };
41
48
  }
42
49
  catch (error) {
package/dist/config.js CHANGED
@@ -233,24 +233,25 @@ const RESOLVED_TASKS_MAX_PER_OWNER = Math.min(DEFAULT_TASKS_MAX_PER_OWNER, DEFAU
233
233
  function resolveWorkerResourceLimits() {
234
234
  const limits = {};
235
235
  let hasAny = false;
236
- const maxOldGenerationSizeMb = parseOptionalInteger(env['TRANSFORM_WORKER_MAX_OLD_GENERATION_MB'], 1);
237
- const maxYoungGenerationSizeMb = parseOptionalInteger(env['TRANSFORM_WORKER_MAX_YOUNG_GENERATION_MB'], 1);
238
- const codeRangeSizeMb = parseOptionalInteger(env['TRANSFORM_WORKER_CODE_RANGE_MB'], 1);
239
- const stackSizeMb = parseOptionalInteger(env['TRANSFORM_WORKER_STACK_MB'], 1);
240
- if (maxOldGenerationSizeMb !== undefined) {
241
- limits.maxOldGenerationSizeMb = maxOldGenerationSizeMb;
242
- hasAny = true;
243
- }
244
- if (maxYoungGenerationSizeMb !== undefined) {
245
- limits.maxYoungGenerationSizeMb = maxYoungGenerationSizeMb;
246
- hasAny = true;
247
- }
248
- if (codeRangeSizeMb !== undefined) {
249
- limits.codeRangeSizeMb = codeRangeSizeMb;
250
- hasAny = true;
251
- }
252
- if (stackSizeMb !== undefined) {
253
- limits.stackSizeMb = stackSizeMb;
236
+ const entries = [
237
+ [
238
+ 'maxOldGenerationSizeMb',
239
+ parseOptionalInteger(env['TRANSFORM_WORKER_MAX_OLD_GENERATION_MB'], 1),
240
+ ],
241
+ [
242
+ 'maxYoungGenerationSizeMb',
243
+ parseOptionalInteger(env['TRANSFORM_WORKER_MAX_YOUNG_GENERATION_MB'], 1),
244
+ ],
245
+ [
246
+ 'codeRangeSizeMb',
247
+ parseOptionalInteger(env['TRANSFORM_WORKER_CODE_RANGE_MB'], 1),
248
+ ],
249
+ ['stackSizeMb', parseOptionalInteger(env['TRANSFORM_WORKER_STACK_MB'], 1)],
250
+ ];
251
+ for (const [key, value] of entries) {
252
+ if (value === undefined)
253
+ continue;
254
+ limits[key] = value;
254
255
  hasAny = true;
255
256
  }
256
257
  return hasAny ? limits : undefined;
package/dist/crypto.js CHANGED
@@ -18,6 +18,11 @@ function assertAllowedAlgorithm(algorithm) {
18
18
  throw new Error(`Hash algorithm not allowed: ${algorithm}`);
19
19
  }
20
20
  }
21
+ function padBuffer(buffer, length) {
22
+ const padded = Buffer.alloc(length);
23
+ buffer.copy(padded);
24
+ return padded;
25
+ }
21
26
  export function timingSafeEqualUtf8(a, b) {
22
27
  const aBuffer = Buffer.from(a, 'utf8');
23
28
  const bBuffer = Buffer.from(b, 'utf8');
@@ -26,10 +31,8 @@ export function timingSafeEqualUtf8(a, b) {
26
31
  }
27
32
  // Avoid early return timing differences on length mismatch.
28
33
  const maxLength = Math.max(aBuffer.length, bBuffer.length);
29
- const paddedA = Buffer.alloc(maxLength);
30
- const paddedB = Buffer.alloc(maxLength);
31
- aBuffer.copy(paddedA);
32
- bBuffer.copy(paddedB);
34
+ const paddedA = padBuffer(aBuffer, maxLength);
35
+ const paddedB = padBuffer(bBuffer, maxLength);
33
36
  return timingSafeEqual(paddedA, paddedB) && aBuffer.length === bBuffer.length;
34
37
  }
35
38
  function hashHex(algorithm, input) {
@@ -103,22 +103,25 @@ function buildTokenRegex(tokens) {
103
103
  return NO_MATCH_REGEX;
104
104
  return new RegExp(`(?:^|[^a-z0-9])(?:${[...tokens].map(escapeRegexLiteral).join('|')})(?:$|[^a-z0-9])`, 'i');
105
105
  }
106
+ function addTokens(target, tokens) {
107
+ for (const token of tokens)
108
+ target.add(token);
109
+ }
106
110
  function getPromoMatchers(currentConfig, flags) {
107
111
  const baseTokens = new Set(PROMO_TOKENS_ALWAYS);
108
112
  const aggressiveTokens = new Set();
109
113
  if (currentConfig.aggressiveMode) {
110
- for (const t of PROMO_TOKENS_AGGRESSIVE)
111
- aggressiveTokens.add(t);
114
+ addTokens(aggressiveTokens, PROMO_TOKENS_AGGRESSIVE);
115
+ }
116
+ if (flags.cookieBanners) {
117
+ addTokens(baseTokens, PROMO_TOKENS_BY_CATEGORY['cookie-banners']);
118
+ }
119
+ if (flags.newsletters) {
120
+ addTokens(baseTokens, PROMO_TOKENS_BY_CATEGORY['newsletters']);
121
+ }
122
+ if (flags.socialShare) {
123
+ addTokens(baseTokens, PROMO_TOKENS_BY_CATEGORY['social-share']);
112
124
  }
113
- if (flags.cookieBanners)
114
- for (const t of PROMO_TOKENS_BY_CATEGORY['cookie-banners'])
115
- baseTokens.add(t);
116
- if (flags.newsletters)
117
- for (const t of PROMO_TOKENS_BY_CATEGORY['newsletters'])
118
- baseTokens.add(t);
119
- if (flags.socialShare)
120
- for (const t of PROMO_TOKENS_BY_CATEGORY['social-share'])
121
- baseTokens.add(t);
122
125
  for (const t of currentConfig.extraTokens) {
123
126
  const n = t.toLowerCase().trim();
124
127
  if (n)
package/dist/errors.js CHANGED
@@ -19,17 +19,20 @@ export class FetchError extends Error {
19
19
  export function getErrorMessage(error) {
20
20
  if (isError(error))
21
21
  return error.message;
22
- if (typeof error === 'string' && error.length > 0)
22
+ if (isNonEmptyString(error))
23
23
  return error;
24
24
  if (isErrorWithMessage(error))
25
25
  return error.message;
26
26
  return formatUnknownError(error);
27
27
  }
28
+ function isNonEmptyString(value) {
29
+ return typeof value === 'string' && value.length > 0;
30
+ }
28
31
  function isErrorWithMessage(error) {
29
32
  if (!isObject(error))
30
33
  return false;
31
34
  const { message } = error;
32
- return typeof message === 'string' && message.length > 0;
35
+ return isNonEmptyString(message);
33
36
  }
34
37
  function formatUnknownError(error) {
35
38
  if (error === null || error === undefined)
@@ -0,0 +1,4 @@
1
+ export declare function getCharsetFromContentType(contentType: string | null): string | undefined;
2
+ export declare function decodeBuffer(buffer: Uint8Array, encoding: string): string;
3
+ export declare function resolveEncoding(declaredEncoding: string | undefined, sample: Uint8Array): string | undefined;
4
+ export declare function isBinaryContent(buffer: Uint8Array, encoding?: string): boolean;
@@ -0,0 +1,163 @@
1
+ import { Buffer } from 'node:buffer';
2
+ export function getCharsetFromContentType(contentType) {
3
+ if (!contentType)
4
+ return undefined;
5
+ const match = /charset=([^;]+)/i.exec(contentType);
6
+ const charsetGroup = match?.[1];
7
+ if (!charsetGroup)
8
+ return undefined;
9
+ let charset = charsetGroup.trim();
10
+ if (charset.startsWith('"') && charset.endsWith('"')) {
11
+ charset = charset.slice(1, -1);
12
+ }
13
+ return charset.trim();
14
+ }
15
+ function createDecoder(encoding) {
16
+ if (!encoding)
17
+ return new TextDecoder('utf-8');
18
+ try {
19
+ return new TextDecoder(encoding);
20
+ }
21
+ catch {
22
+ return new TextDecoder('utf-8');
23
+ }
24
+ }
25
+ export function decodeBuffer(buffer, encoding) {
26
+ return createDecoder(encoding).decode(buffer);
27
+ }
28
+ function normalizeEncodingLabel(encoding) {
29
+ return encoding?.trim().toLowerCase() ?? '';
30
+ }
31
+ function isUnicodeWideEncoding(encoding) {
32
+ const normalized = normalizeEncodingLabel(encoding);
33
+ return (normalized.startsWith('utf-16') ||
34
+ normalized.startsWith('utf-32') ||
35
+ normalized === 'ucs-2' ||
36
+ normalized === 'unicodefffe' ||
37
+ normalized === 'unicodefeff');
38
+ }
39
+ const BOM_SIGNATURES = [
40
+ // 4-byte BOMs must come first to avoid false matches with 2-byte prefixes
41
+ { bytes: [0xff, 0xfe, 0x00, 0x00], encoding: 'utf-32le' },
42
+ { bytes: [0x00, 0x00, 0xfe, 0xff], encoding: 'utf-32be' },
43
+ { bytes: [0xef, 0xbb, 0xbf], encoding: 'utf-8' },
44
+ { bytes: [0xff, 0xfe], encoding: 'utf-16le' },
45
+ { bytes: [0xfe, 0xff], encoding: 'utf-16be' },
46
+ ];
47
+ function startsWithBytes(buffer, signature) {
48
+ const sigLen = signature.length;
49
+ if (buffer.length < sigLen)
50
+ return false;
51
+ for (let i = 0; i < sigLen; i += 1) {
52
+ if (buffer[i] !== signature[i])
53
+ return false;
54
+ }
55
+ return true;
56
+ }
57
+ function detectBomEncoding(buffer) {
58
+ for (const { bytes, encoding } of BOM_SIGNATURES) {
59
+ if (startsWithBytes(buffer, bytes))
60
+ return encoding;
61
+ }
62
+ return undefined;
63
+ }
64
+ function readQuotedValue(input, startIndex) {
65
+ const first = input[startIndex];
66
+ if (!first)
67
+ return '';
68
+ const quoted = first === '"' || first === "'";
69
+ if (quoted) {
70
+ const end = input.indexOf(first, startIndex + 1);
71
+ return end === -1 ? '' : input.slice(startIndex + 1, end).trim();
72
+ }
73
+ const tail = input.slice(startIndex);
74
+ const stop = tail.search(/[\s/>]/);
75
+ return (stop === -1 ? tail : tail.slice(0, stop)).trim();
76
+ }
77
+ function extractHtmlCharset(headSnippet) {
78
+ const lower = headSnippet.toLowerCase();
79
+ const charsetToken = 'charset=';
80
+ const charsetIdx = lower.indexOf(charsetToken);
81
+ if (charsetIdx === -1)
82
+ return undefined;
83
+ const valueStart = charsetIdx + charsetToken.length;
84
+ const charset = readQuotedValue(headSnippet, valueStart);
85
+ return charset ? charset.toLowerCase() : undefined;
86
+ }
87
+ function extractXmlEncoding(headSnippet) {
88
+ const lower = headSnippet.toLowerCase();
89
+ const xmlStart = lower.indexOf('<?xml');
90
+ if (xmlStart === -1)
91
+ return undefined;
92
+ const xmlEnd = lower.indexOf('?>', xmlStart);
93
+ const declaration = xmlEnd === -1
94
+ ? headSnippet.slice(xmlStart)
95
+ : headSnippet.slice(xmlStart, xmlEnd + 2);
96
+ const declarationLower = declaration.toLowerCase();
97
+ const encodingToken = 'encoding=';
98
+ const encodingIdx = declarationLower.indexOf(encodingToken);
99
+ if (encodingIdx === -1)
100
+ return undefined;
101
+ const valueStart = encodingIdx + encodingToken.length;
102
+ const encoding = readQuotedValue(declaration, valueStart);
103
+ return encoding ? encoding.toLowerCase() : undefined;
104
+ }
105
+ function detectHtmlDeclaredEncoding(buffer) {
106
+ const scanSize = Math.min(buffer.length, 8_192);
107
+ if (scanSize === 0)
108
+ return undefined;
109
+ const headSnippet = Buffer.from(buffer.buffer, buffer.byteOffset, scanSize).toString('latin1');
110
+ return extractHtmlCharset(headSnippet) ?? extractXmlEncoding(headSnippet);
111
+ }
112
+ export function resolveEncoding(declaredEncoding, sample) {
113
+ const bomEncoding = detectBomEncoding(sample);
114
+ if (bomEncoding)
115
+ return bomEncoding;
116
+ if (declaredEncoding)
117
+ return declaredEncoding;
118
+ return detectHtmlDeclaredEncoding(sample);
119
+ }
120
+ const BINARY_SIGNATURES = [
121
+ [0x25, 0x50, 0x44, 0x46],
122
+ [0x89, 0x50, 0x4e, 0x47],
123
+ [0x47, 0x49, 0x46, 0x38],
124
+ [0xff, 0xd8, 0xff],
125
+ [0x52, 0x49, 0x46, 0x46],
126
+ [0x42, 0x4d],
127
+ [0x49, 0x49, 0x2a, 0x00],
128
+ [0x4d, 0x4d, 0x00, 0x2a],
129
+ [0x00, 0x00, 0x01, 0x00],
130
+ [0x50, 0x4b, 0x03, 0x04],
131
+ [0x1f, 0x8b],
132
+ [0x42, 0x5a, 0x68],
133
+ [0x52, 0x61, 0x72, 0x21],
134
+ [0x37, 0x7a, 0xbc, 0xaf],
135
+ [0x7f, 0x45, 0x4c, 0x46],
136
+ [0x4d, 0x5a],
137
+ [0xcf, 0xfa, 0xed, 0xfe],
138
+ [0x00, 0x61, 0x73, 0x6d],
139
+ [0x1a, 0x45, 0xdf, 0xa3],
140
+ [0x66, 0x74, 0x79, 0x70],
141
+ [0x46, 0x4c, 0x56],
142
+ [0x49, 0x44, 0x33],
143
+ [0xff, 0xfb],
144
+ [0xff, 0xfa],
145
+ [0x4f, 0x67, 0x67, 0x53],
146
+ [0x66, 0x4c, 0x61, 0x43],
147
+ [0x4d, 0x54, 0x68, 0x64],
148
+ [0x77, 0x4f, 0x46, 0x46],
149
+ [0x00, 0x01, 0x00, 0x00],
150
+ [0x4f, 0x54, 0x54, 0x4f],
151
+ [0x53, 0x51, 0x4c, 0x69],
152
+ ];
153
+ function hasNullByte(buffer, limit) {
154
+ const checkLen = Math.min(buffer.length, limit);
155
+ return buffer.subarray(0, checkLen).includes(0x00);
156
+ }
157
+ export function isBinaryContent(buffer, encoding) {
158
+ for (const signature of BINARY_SIGNATURES) {
159
+ if (startsWithBytes(buffer, signature))
160
+ return true;
161
+ }
162
+ return !isUnicodeWideEncoding(encoding) && hasNullByte(buffer, 1000);
163
+ }
@@ -0,0 +1,4 @@
1
+ import { Readable } from 'node:stream';
2
+ import type { ReadableStream as NodeReadableStream } from 'node:stream/web';
3
+ export declare function toNodeReadableStream(stream: ReadableStream<Uint8Array>, url: string, stage: string): NodeReadableStream<Uint8Array>;
4
+ export declare function toWebReadableStream(stream: Readable, url: string, stage: string): ReadableStream<Uint8Array>;
@@ -0,0 +1,28 @@
1
+ import { Readable } from 'node:stream';
2
+ import { FetchError } from './errors.js';
3
+ import { isObject } from './type-guards.js';
4
+ function isReadableStreamLike(value) {
5
+ if (!isObject(value))
6
+ return false;
7
+ return (typeof value['getReader'] === 'function' &&
8
+ typeof value['cancel'] === 'function' &&
9
+ typeof value['tee'] === 'function' &&
10
+ typeof value['locked'] === 'boolean');
11
+ }
12
+ function assertReadableStreamLike(stream, url, stage) {
13
+ if (isReadableStreamLike(stream))
14
+ return;
15
+ throw new FetchError('Invalid response stream', url, 500, {
16
+ reason: 'invalid_stream',
17
+ stage,
18
+ });
19
+ }
20
+ export function toNodeReadableStream(stream, url, stage) {
21
+ assertReadableStreamLike(stream, url, stage);
22
+ return stream;
23
+ }
24
+ export function toWebReadableStream(stream, url, stage) {
25
+ const converted = Readable.toWeb(stream);
26
+ assertReadableStreamLike(converted, url, stage);
27
+ return converted;
28
+ }
package/dist/fetch.js CHANGED
@@ -10,6 +10,8 @@ import { finished, pipeline } from 'node:stream/promises';
10
10
  import { createBrotliDecompress, createGunzip, createInflate } from 'node:zlib';
11
11
  import { config } from './config.js';
12
12
  import { createErrorWithCode, FetchError, isSystemError } from './errors.js';
13
+ import { decodeBuffer, getCharsetFromContentType, isBinaryContent, resolveEncoding, } from './fetch-content.js';
14
+ import { toNodeReadableStream, toWebReadableStream } from './fetch-stream.js';
13
15
  import { createDefaultBlockList, normalizeIpForBlockList, } from './ip-blocklist.js';
14
16
  import { getOperationId, getRequestId, logDebug, logError, logWarn, redactUrl, } from './observability.js';
15
17
  import { isError, isObject } from './type-guards.js';
@@ -26,23 +28,6 @@ const defaultRedactor = {
26
28
  redact: redactUrl,
27
29
  };
28
30
  const defaultFetch = (input, init) => globalThis.fetch(input, init);
29
- function assertReadableStreamLike(stream, url, stage) {
30
- if (isObject(stream) && typeof stream['getReader'] === 'function')
31
- return;
32
- throw new FetchError('Invalid response stream', url, 500, {
33
- reason: 'invalid_stream',
34
- stage,
35
- });
36
- }
37
- function toNodeReadableStream(stream, url, stage) {
38
- assertReadableStreamLike(stream, url, stage);
39
- return stream;
40
- }
41
- function toWebReadableStream(stream, url, stage) {
42
- const converted = Readable.toWeb(stream);
43
- assertReadableStreamLike(converted, url, stage);
44
- return converted;
45
- }
46
31
  class IpBlocker {
47
32
  security;
48
33
  blockList = createDefaultBlockList();
@@ -569,8 +554,11 @@ function createTooManyRedirectsFetchError(url) {
569
554
  function createMissingRedirectLocationFetchError(url) {
570
555
  return new FetchError('Redirect response missing Location header', url);
571
556
  }
557
+ function buildNetworkErrorMessage(url) {
558
+ return `Network error: Could not reach ${url}`;
559
+ }
572
560
  function createNetworkFetchError(url, message) {
573
- return new FetchError(`Network error: Could not reach ${url}`, url, undefined, message ? { message } : {});
561
+ return new FetchError(buildNetworkErrorMessage(url), url, undefined, message ? { message } : {});
574
562
  }
575
563
  function createUnknownFetchError(url, message) {
576
564
  return new FetchError(message, url);
@@ -619,7 +607,7 @@ function mapFetchError(error, fallbackUrl, timeoutMs) {
619
607
  code === 'EINVAL') {
620
608
  return new FetchError(error.message, url, 400, { code });
621
609
  }
622
- return new FetchError(`Network error: Could not reach ${url}`, url, undefined, {
610
+ return new FetchError(buildNetworkErrorMessage(url), url, undefined, {
623
611
  code,
624
612
  message: error.message,
625
613
  });
@@ -868,168 +856,6 @@ class RedirectFollower {
868
856
  }
869
857
  }
870
858
  }
871
- function getCharsetFromContentType(contentType) {
872
- if (!contentType)
873
- return undefined;
874
- const match = /charset=([^;]+)/i.exec(contentType);
875
- const charsetGroup = match?.[1];
876
- if (!charsetGroup)
877
- return undefined;
878
- let charset = charsetGroup.trim();
879
- if (charset.startsWith('"') && charset.endsWith('"')) {
880
- charset = charset.slice(1, -1);
881
- }
882
- return charset.trim();
883
- }
884
- function createDecoder(encoding) {
885
- if (!encoding)
886
- return new TextDecoder('utf-8');
887
- try {
888
- return new TextDecoder(encoding);
889
- }
890
- catch {
891
- return new TextDecoder('utf-8');
892
- }
893
- }
894
- function decodeBuffer(buffer, encoding) {
895
- return createDecoder(encoding).decode(buffer);
896
- }
897
- function normalizeEncodingLabel(encoding) {
898
- return encoding?.trim().toLowerCase() ?? '';
899
- }
900
- function isUnicodeWideEncoding(encoding) {
901
- const normalized = normalizeEncodingLabel(encoding);
902
- return (normalized.startsWith('utf-16') ||
903
- normalized.startsWith('utf-32') ||
904
- normalized === 'ucs-2' ||
905
- normalized === 'unicodefffe' ||
906
- normalized === 'unicodefeff');
907
- }
908
- const BOM_SIGNATURES = [
909
- // 4-byte BOMs must come first to avoid false matches with 2-byte prefixes
910
- { bytes: [0xff, 0xfe, 0x00, 0x00], encoding: 'utf-32le' },
911
- { bytes: [0x00, 0x00, 0xfe, 0xff], encoding: 'utf-32be' },
912
- { bytes: [0xef, 0xbb, 0xbf], encoding: 'utf-8' },
913
- { bytes: [0xff, 0xfe], encoding: 'utf-16le' },
914
- { bytes: [0xfe, 0xff], encoding: 'utf-16be' },
915
- ];
916
- function detectBomEncoding(buffer) {
917
- for (const { bytes, encoding } of BOM_SIGNATURES) {
918
- if (startsWithBytes(buffer, bytes))
919
- return encoding;
920
- }
921
- return undefined;
922
- }
923
- function readQuotedValue(input, startIndex) {
924
- const first = input[startIndex];
925
- if (!first)
926
- return '';
927
- const quoted = first === '"' || first === "'";
928
- if (quoted) {
929
- const end = input.indexOf(first, startIndex + 1);
930
- return end === -1 ? '' : input.slice(startIndex + 1, end).trim();
931
- }
932
- const tail = input.slice(startIndex);
933
- const stop = tail.search(/[\s/>]/);
934
- return (stop === -1 ? tail : tail.slice(0, stop)).trim();
935
- }
936
- function extractHtmlCharset(headSnippet) {
937
- const lower = headSnippet.toLowerCase();
938
- const charsetToken = 'charset=';
939
- const charsetIdx = lower.indexOf(charsetToken);
940
- if (charsetIdx === -1)
941
- return undefined;
942
- const valueStart = charsetIdx + charsetToken.length;
943
- const charset = readQuotedValue(headSnippet, valueStart);
944
- return charset ? charset.toLowerCase() : undefined;
945
- }
946
- function extractXmlEncoding(headSnippet) {
947
- const lower = headSnippet.toLowerCase();
948
- const xmlStart = lower.indexOf('<?xml');
949
- if (xmlStart === -1)
950
- return undefined;
951
- const xmlEnd = lower.indexOf('?>', xmlStart);
952
- const declaration = xmlEnd === -1
953
- ? headSnippet.slice(xmlStart)
954
- : headSnippet.slice(xmlStart, xmlEnd + 2);
955
- const declarationLower = declaration.toLowerCase();
956
- const encodingToken = 'encoding=';
957
- const encodingIdx = declarationLower.indexOf(encodingToken);
958
- if (encodingIdx === -1)
959
- return undefined;
960
- const valueStart = encodingIdx + encodingToken.length;
961
- const encoding = readQuotedValue(declaration, valueStart);
962
- return encoding ? encoding.toLowerCase() : undefined;
963
- }
964
- function detectHtmlDeclaredEncoding(buffer) {
965
- const scanSize = Math.min(buffer.length, 8_192);
966
- if (scanSize === 0)
967
- return undefined;
968
- const headSnippet = Buffer.from(buffer.buffer, buffer.byteOffset, scanSize).toString('latin1');
969
- return extractHtmlCharset(headSnippet) ?? extractXmlEncoding(headSnippet);
970
- }
971
- function resolveEncoding(declaredEncoding, sample) {
972
- const bomEncoding = detectBomEncoding(sample);
973
- if (bomEncoding)
974
- return bomEncoding;
975
- if (declaredEncoding)
976
- return declaredEncoding;
977
- return detectHtmlDeclaredEncoding(sample);
978
- }
979
- const BINARY_SIGNATURES = [
980
- [0x25, 0x50, 0x44, 0x46],
981
- [0x89, 0x50, 0x4e, 0x47],
982
- [0x47, 0x49, 0x46, 0x38],
983
- [0xff, 0xd8, 0xff],
984
- [0x52, 0x49, 0x46, 0x46],
985
- [0x42, 0x4d],
986
- [0x49, 0x49, 0x2a, 0x00],
987
- [0x4d, 0x4d, 0x00, 0x2a],
988
- [0x00, 0x00, 0x01, 0x00],
989
- [0x50, 0x4b, 0x03, 0x04],
990
- [0x1f, 0x8b],
991
- [0x42, 0x5a, 0x68],
992
- [0x52, 0x61, 0x72, 0x21],
993
- [0x37, 0x7a, 0xbc, 0xaf],
994
- [0x7f, 0x45, 0x4c, 0x46],
995
- [0x4d, 0x5a],
996
- [0xcf, 0xfa, 0xed, 0xfe],
997
- [0x00, 0x61, 0x73, 0x6d],
998
- [0x1a, 0x45, 0xdf, 0xa3],
999
- [0x66, 0x74, 0x79, 0x70],
1000
- [0x46, 0x4c, 0x56],
1001
- [0x49, 0x44, 0x33],
1002
- [0xff, 0xfb],
1003
- [0xff, 0xfa],
1004
- [0x4f, 0x67, 0x67, 0x53],
1005
- [0x66, 0x4c, 0x61, 0x43],
1006
- [0x4d, 0x54, 0x68, 0x64],
1007
- [0x77, 0x4f, 0x46, 0x46],
1008
- [0x00, 0x01, 0x00, 0x00],
1009
- [0x4f, 0x54, 0x54, 0x4f],
1010
- [0x53, 0x51, 0x4c, 0x69],
1011
- ];
1012
- function startsWithBytes(buffer, signature) {
1013
- const sigLen = signature.length;
1014
- if (buffer.length < sigLen)
1015
- return false;
1016
- for (let i = 0; i < sigLen; i += 1) {
1017
- if (buffer[i] !== signature[i])
1018
- return false;
1019
- }
1020
- return true;
1021
- }
1022
- function hasNullByte(buffer, limit) {
1023
- const checkLen = Math.min(buffer.length, limit);
1024
- return buffer.subarray(0, checkLen).includes(0x00);
1025
- }
1026
- function isBinaryContent(buffer, encoding) {
1027
- for (const signature of BINARY_SIGNATURES) {
1028
- if (startsWithBytes(buffer, signature))
1029
- return true;
1030
- }
1031
- return !isUnicodeWideEncoding(encoding) && hasNullByte(buffer, 1000);
1032
- }
1033
859
  class ResponseTextReader {
1034
860
  async read(response, url, maxBytes, signal, encoding) {
1035
861
  const { buffer, encoding: effectiveEncoding, truncated, } = await this.readBuffer(response, url, maxBytes, signal, encoding);