@j0hanz/fetch-url-mcp 1.1.2 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/AGENTS.md +3 -2
- package/dist/fetch-content.d.ts +4 -0
- package/dist/fetch-content.js +163 -0
- package/dist/fetch-stream.d.ts +4 -0
- package/dist/fetch-stream.js +28 -0
- package/dist/fetch.js +2 -179
- package/package.json +2 -1
package/dist/AGENTS.md
CHANGED
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
- `tests/` — Unit/integration tests (46+ test files) using Node.js built-in test runner
|
|
35
35
|
- `scripts/` — Build & test orchestration (`tasks.mjs`)
|
|
36
36
|
- `assets/` — Server icon (`logo.svg`)
|
|
37
|
-
- `.github/workflows/` — CI/CD (`release.yml`: lint → type-check → test → build → publish to npm, MCP Registry, Docker)
|
|
37
|
+
- `.github/workflows/` — CI/CD (`release.yml`: lint → type-check → type-check:tests → test → build → publish to npm, MCP Registry, Docker)
|
|
38
38
|
|
|
39
39
|
> Ignore: `dist/`, `node_modules/`, `coverage/`, `.cache/`, `.tsbuildinfo`
|
|
40
40
|
|
|
@@ -49,6 +49,7 @@ All commands verified from `.github/workflows/release.yml` (CI) and `package.jso
|
|
|
49
49
|
- **Start:** `npm run start` → `node dist/index.js` (see `package.json`)
|
|
50
50
|
- **Build:** `npm run build` → `node scripts/tasks.mjs build` — cleans `dist/`, compiles TS, validates `instructions.md`, copies assets, sets executable bit (see `scripts/tasks.mjs`, `package.json`)
|
|
51
51
|
- **Type-check:** `npm run type-check` → `tsc -p tsconfig.json --noEmit` (see `scripts/tasks.mjs`, `.github/workflows/release.yml`)
|
|
52
|
+
- **Type-check (tests):** `npm run type-check:tests` → build output + `tsc -p tsconfig.tests.json --noEmit` (see `scripts/tasks.mjs`, `.github/workflows/release.yml`)
|
|
52
53
|
- **Lint:** `npm run lint` → `eslint .` (see `package.json`, `.github/workflows/release.yml`)
|
|
53
54
|
- **Lint (fix):** `npm run lint:fix` → `eslint . --fix` (see `package.json`)
|
|
54
55
|
- **Format:** `npm run format` → `prettier --write .` (see `package.json`)
|
|
@@ -135,7 +136,7 @@ All commands verified from `.github/workflows/release.yml` (CI) and `package.jso
|
|
|
135
136
|
- Config values temporarily overridden per test with `try/finally` cleanup (observed in `tests/fetch-url-tool.test.ts`)
|
|
136
137
|
- Worker pool shutdown in `after()` hooks for clean teardown (observed in `tests/fetch-url-tool.test.ts`)
|
|
137
138
|
- No external services (DB/containers) required for tests
|
|
138
|
-
- **CI validation order:** `lint` → `type-check` → `test` → `build` (see `.github/workflows/release.yml`)
|
|
139
|
+
- **CI validation order:** `lint` → `type-check` → `type-check:tests` → `test` → `build` (see `.github/workflows/release.yml`)
|
|
139
140
|
|
|
140
141
|
## 7) Common Pitfalls (Verified Only)
|
|
141
142
|
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export declare function getCharsetFromContentType(contentType: string | null): string | undefined;
|
|
2
|
+
export declare function decodeBuffer(buffer: Uint8Array, encoding: string): string;
|
|
3
|
+
export declare function resolveEncoding(declaredEncoding: string | undefined, sample: Uint8Array): string | undefined;
|
|
4
|
+
export declare function isBinaryContent(buffer: Uint8Array, encoding?: string): boolean;
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import { Buffer } from 'node:buffer';
|
|
2
|
+
export function getCharsetFromContentType(contentType) {
|
|
3
|
+
if (!contentType)
|
|
4
|
+
return undefined;
|
|
5
|
+
const match = /charset=([^;]+)/i.exec(contentType);
|
|
6
|
+
const charsetGroup = match?.[1];
|
|
7
|
+
if (!charsetGroup)
|
|
8
|
+
return undefined;
|
|
9
|
+
let charset = charsetGroup.trim();
|
|
10
|
+
if (charset.startsWith('"') && charset.endsWith('"')) {
|
|
11
|
+
charset = charset.slice(1, -1);
|
|
12
|
+
}
|
|
13
|
+
return charset.trim();
|
|
14
|
+
}
|
|
15
|
+
function createDecoder(encoding) {
|
|
16
|
+
if (!encoding)
|
|
17
|
+
return new TextDecoder('utf-8');
|
|
18
|
+
try {
|
|
19
|
+
return new TextDecoder(encoding);
|
|
20
|
+
}
|
|
21
|
+
catch {
|
|
22
|
+
return new TextDecoder('utf-8');
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
export function decodeBuffer(buffer, encoding) {
|
|
26
|
+
return createDecoder(encoding).decode(buffer);
|
|
27
|
+
}
|
|
28
|
+
function normalizeEncodingLabel(encoding) {
|
|
29
|
+
return encoding?.trim().toLowerCase() ?? '';
|
|
30
|
+
}
|
|
31
|
+
function isUnicodeWideEncoding(encoding) {
|
|
32
|
+
const normalized = normalizeEncodingLabel(encoding);
|
|
33
|
+
return (normalized.startsWith('utf-16') ||
|
|
34
|
+
normalized.startsWith('utf-32') ||
|
|
35
|
+
normalized === 'ucs-2' ||
|
|
36
|
+
normalized === 'unicodefffe' ||
|
|
37
|
+
normalized === 'unicodefeff');
|
|
38
|
+
}
|
|
39
|
+
const BOM_SIGNATURES = [
|
|
40
|
+
// 4-byte BOMs must come first to avoid false matches with 2-byte prefixes
|
|
41
|
+
{ bytes: [0xff, 0xfe, 0x00, 0x00], encoding: 'utf-32le' },
|
|
42
|
+
{ bytes: [0x00, 0x00, 0xfe, 0xff], encoding: 'utf-32be' },
|
|
43
|
+
{ bytes: [0xef, 0xbb, 0xbf], encoding: 'utf-8' },
|
|
44
|
+
{ bytes: [0xff, 0xfe], encoding: 'utf-16le' },
|
|
45
|
+
{ bytes: [0xfe, 0xff], encoding: 'utf-16be' },
|
|
46
|
+
];
|
|
47
|
+
function startsWithBytes(buffer, signature) {
|
|
48
|
+
const sigLen = signature.length;
|
|
49
|
+
if (buffer.length < sigLen)
|
|
50
|
+
return false;
|
|
51
|
+
for (let i = 0; i < sigLen; i += 1) {
|
|
52
|
+
if (buffer[i] !== signature[i])
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
return true;
|
|
56
|
+
}
|
|
57
|
+
function detectBomEncoding(buffer) {
|
|
58
|
+
for (const { bytes, encoding } of BOM_SIGNATURES) {
|
|
59
|
+
if (startsWithBytes(buffer, bytes))
|
|
60
|
+
return encoding;
|
|
61
|
+
}
|
|
62
|
+
return undefined;
|
|
63
|
+
}
|
|
64
|
+
function readQuotedValue(input, startIndex) {
|
|
65
|
+
const first = input[startIndex];
|
|
66
|
+
if (!first)
|
|
67
|
+
return '';
|
|
68
|
+
const quoted = first === '"' || first === "'";
|
|
69
|
+
if (quoted) {
|
|
70
|
+
const end = input.indexOf(first, startIndex + 1);
|
|
71
|
+
return end === -1 ? '' : input.slice(startIndex + 1, end).trim();
|
|
72
|
+
}
|
|
73
|
+
const tail = input.slice(startIndex);
|
|
74
|
+
const stop = tail.search(/[\s/>]/);
|
|
75
|
+
return (stop === -1 ? tail : tail.slice(0, stop)).trim();
|
|
76
|
+
}
|
|
77
|
+
function extractHtmlCharset(headSnippet) {
|
|
78
|
+
const lower = headSnippet.toLowerCase();
|
|
79
|
+
const charsetToken = 'charset=';
|
|
80
|
+
const charsetIdx = lower.indexOf(charsetToken);
|
|
81
|
+
if (charsetIdx === -1)
|
|
82
|
+
return undefined;
|
|
83
|
+
const valueStart = charsetIdx + charsetToken.length;
|
|
84
|
+
const charset = readQuotedValue(headSnippet, valueStart);
|
|
85
|
+
return charset ? charset.toLowerCase() : undefined;
|
|
86
|
+
}
|
|
87
|
+
function extractXmlEncoding(headSnippet) {
|
|
88
|
+
const lower = headSnippet.toLowerCase();
|
|
89
|
+
const xmlStart = lower.indexOf('<?xml');
|
|
90
|
+
if (xmlStart === -1)
|
|
91
|
+
return undefined;
|
|
92
|
+
const xmlEnd = lower.indexOf('?>', xmlStart);
|
|
93
|
+
const declaration = xmlEnd === -1
|
|
94
|
+
? headSnippet.slice(xmlStart)
|
|
95
|
+
: headSnippet.slice(xmlStart, xmlEnd + 2);
|
|
96
|
+
const declarationLower = declaration.toLowerCase();
|
|
97
|
+
const encodingToken = 'encoding=';
|
|
98
|
+
const encodingIdx = declarationLower.indexOf(encodingToken);
|
|
99
|
+
if (encodingIdx === -1)
|
|
100
|
+
return undefined;
|
|
101
|
+
const valueStart = encodingIdx + encodingToken.length;
|
|
102
|
+
const encoding = readQuotedValue(declaration, valueStart);
|
|
103
|
+
return encoding ? encoding.toLowerCase() : undefined;
|
|
104
|
+
}
|
|
105
|
+
function detectHtmlDeclaredEncoding(buffer) {
|
|
106
|
+
const scanSize = Math.min(buffer.length, 8_192);
|
|
107
|
+
if (scanSize === 0)
|
|
108
|
+
return undefined;
|
|
109
|
+
const headSnippet = Buffer.from(buffer.buffer, buffer.byteOffset, scanSize).toString('latin1');
|
|
110
|
+
return extractHtmlCharset(headSnippet) ?? extractXmlEncoding(headSnippet);
|
|
111
|
+
}
|
|
112
|
+
export function resolveEncoding(declaredEncoding, sample) {
|
|
113
|
+
const bomEncoding = detectBomEncoding(sample);
|
|
114
|
+
if (bomEncoding)
|
|
115
|
+
return bomEncoding;
|
|
116
|
+
if (declaredEncoding)
|
|
117
|
+
return declaredEncoding;
|
|
118
|
+
return detectHtmlDeclaredEncoding(sample);
|
|
119
|
+
}
|
|
120
|
+
const BINARY_SIGNATURES = [
|
|
121
|
+
[0x25, 0x50, 0x44, 0x46],
|
|
122
|
+
[0x89, 0x50, 0x4e, 0x47],
|
|
123
|
+
[0x47, 0x49, 0x46, 0x38],
|
|
124
|
+
[0xff, 0xd8, 0xff],
|
|
125
|
+
[0x52, 0x49, 0x46, 0x46],
|
|
126
|
+
[0x42, 0x4d],
|
|
127
|
+
[0x49, 0x49, 0x2a, 0x00],
|
|
128
|
+
[0x4d, 0x4d, 0x00, 0x2a],
|
|
129
|
+
[0x00, 0x00, 0x01, 0x00],
|
|
130
|
+
[0x50, 0x4b, 0x03, 0x04],
|
|
131
|
+
[0x1f, 0x8b],
|
|
132
|
+
[0x42, 0x5a, 0x68],
|
|
133
|
+
[0x52, 0x61, 0x72, 0x21],
|
|
134
|
+
[0x37, 0x7a, 0xbc, 0xaf],
|
|
135
|
+
[0x7f, 0x45, 0x4c, 0x46],
|
|
136
|
+
[0x4d, 0x5a],
|
|
137
|
+
[0xcf, 0xfa, 0xed, 0xfe],
|
|
138
|
+
[0x00, 0x61, 0x73, 0x6d],
|
|
139
|
+
[0x1a, 0x45, 0xdf, 0xa3],
|
|
140
|
+
[0x66, 0x74, 0x79, 0x70],
|
|
141
|
+
[0x46, 0x4c, 0x56],
|
|
142
|
+
[0x49, 0x44, 0x33],
|
|
143
|
+
[0xff, 0xfb],
|
|
144
|
+
[0xff, 0xfa],
|
|
145
|
+
[0x4f, 0x67, 0x67, 0x53],
|
|
146
|
+
[0x66, 0x4c, 0x61, 0x43],
|
|
147
|
+
[0x4d, 0x54, 0x68, 0x64],
|
|
148
|
+
[0x77, 0x4f, 0x46, 0x46],
|
|
149
|
+
[0x00, 0x01, 0x00, 0x00],
|
|
150
|
+
[0x4f, 0x54, 0x54, 0x4f],
|
|
151
|
+
[0x53, 0x51, 0x4c, 0x69],
|
|
152
|
+
];
|
|
153
|
+
function hasNullByte(buffer, limit) {
|
|
154
|
+
const checkLen = Math.min(buffer.length, limit);
|
|
155
|
+
return buffer.subarray(0, checkLen).includes(0x00);
|
|
156
|
+
}
|
|
157
|
+
export function isBinaryContent(buffer, encoding) {
|
|
158
|
+
for (const signature of BINARY_SIGNATURES) {
|
|
159
|
+
if (startsWithBytes(buffer, signature))
|
|
160
|
+
return true;
|
|
161
|
+
}
|
|
162
|
+
return !isUnicodeWideEncoding(encoding) && hasNullByte(buffer, 1000);
|
|
163
|
+
}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import { Readable } from 'node:stream';
|
|
2
|
+
import type { ReadableStream as NodeReadableStream } from 'node:stream/web';
|
|
3
|
+
export declare function toNodeReadableStream(stream: ReadableStream<Uint8Array>, url: string, stage: string): NodeReadableStream<Uint8Array>;
|
|
4
|
+
export declare function toWebReadableStream(stream: Readable, url: string, stage: string): ReadableStream<Uint8Array>;
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { Readable } from 'node:stream';
|
|
2
|
+
import { FetchError } from './errors.js';
|
|
3
|
+
import { isObject } from './type-guards.js';
|
|
4
|
+
function isReadableStreamLike(value) {
|
|
5
|
+
if (!isObject(value))
|
|
6
|
+
return false;
|
|
7
|
+
return (typeof value['getReader'] === 'function' &&
|
|
8
|
+
typeof value['cancel'] === 'function' &&
|
|
9
|
+
typeof value['tee'] === 'function' &&
|
|
10
|
+
typeof value['locked'] === 'boolean');
|
|
11
|
+
}
|
|
12
|
+
function assertReadableStreamLike(stream, url, stage) {
|
|
13
|
+
if (isReadableStreamLike(stream))
|
|
14
|
+
return;
|
|
15
|
+
throw new FetchError('Invalid response stream', url, 500, {
|
|
16
|
+
reason: 'invalid_stream',
|
|
17
|
+
stage,
|
|
18
|
+
});
|
|
19
|
+
}
|
|
20
|
+
export function toNodeReadableStream(stream, url, stage) {
|
|
21
|
+
assertReadableStreamLike(stream, url, stage);
|
|
22
|
+
return stream;
|
|
23
|
+
}
|
|
24
|
+
export function toWebReadableStream(stream, url, stage) {
|
|
25
|
+
const converted = Readable.toWeb(stream);
|
|
26
|
+
assertReadableStreamLike(converted, url, stage);
|
|
27
|
+
return converted;
|
|
28
|
+
}
|
package/dist/fetch.js
CHANGED
|
@@ -10,6 +10,8 @@ import { finished, pipeline } from 'node:stream/promises';
|
|
|
10
10
|
import { createBrotliDecompress, createGunzip, createInflate } from 'node:zlib';
|
|
11
11
|
import { config } from './config.js';
|
|
12
12
|
import { createErrorWithCode, FetchError, isSystemError } from './errors.js';
|
|
13
|
+
import { decodeBuffer, getCharsetFromContentType, isBinaryContent, resolveEncoding, } from './fetch-content.js';
|
|
14
|
+
import { toNodeReadableStream, toWebReadableStream } from './fetch-stream.js';
|
|
13
15
|
import { createDefaultBlockList, normalizeIpForBlockList, } from './ip-blocklist.js';
|
|
14
16
|
import { getOperationId, getRequestId, logDebug, logError, logWarn, redactUrl, } from './observability.js';
|
|
15
17
|
import { isError, isObject } from './type-guards.js';
|
|
@@ -26,23 +28,6 @@ const defaultRedactor = {
|
|
|
26
28
|
redact: redactUrl,
|
|
27
29
|
};
|
|
28
30
|
const defaultFetch = (input, init) => globalThis.fetch(input, init);
|
|
29
|
-
function assertReadableStreamLike(stream, url, stage) {
|
|
30
|
-
if (isObject(stream) && typeof stream['getReader'] === 'function')
|
|
31
|
-
return;
|
|
32
|
-
throw new FetchError('Invalid response stream', url, 500, {
|
|
33
|
-
reason: 'invalid_stream',
|
|
34
|
-
stage,
|
|
35
|
-
});
|
|
36
|
-
}
|
|
37
|
-
function toNodeReadableStream(stream, url, stage) {
|
|
38
|
-
assertReadableStreamLike(stream, url, stage);
|
|
39
|
-
return stream;
|
|
40
|
-
}
|
|
41
|
-
function toWebReadableStream(stream, url, stage) {
|
|
42
|
-
const converted = Readable.toWeb(stream);
|
|
43
|
-
assertReadableStreamLike(converted, url, stage);
|
|
44
|
-
return converted;
|
|
45
|
-
}
|
|
46
31
|
class IpBlocker {
|
|
47
32
|
security;
|
|
48
33
|
blockList = createDefaultBlockList();
|
|
@@ -871,168 +856,6 @@ class RedirectFollower {
|
|
|
871
856
|
}
|
|
872
857
|
}
|
|
873
858
|
}
|
|
874
|
-
function getCharsetFromContentType(contentType) {
|
|
875
|
-
if (!contentType)
|
|
876
|
-
return undefined;
|
|
877
|
-
const match = /charset=([^;]+)/i.exec(contentType);
|
|
878
|
-
const charsetGroup = match?.[1];
|
|
879
|
-
if (!charsetGroup)
|
|
880
|
-
return undefined;
|
|
881
|
-
let charset = charsetGroup.trim();
|
|
882
|
-
if (charset.startsWith('"') && charset.endsWith('"')) {
|
|
883
|
-
charset = charset.slice(1, -1);
|
|
884
|
-
}
|
|
885
|
-
return charset.trim();
|
|
886
|
-
}
|
|
887
|
-
function createDecoder(encoding) {
|
|
888
|
-
if (!encoding)
|
|
889
|
-
return new TextDecoder('utf-8');
|
|
890
|
-
try {
|
|
891
|
-
return new TextDecoder(encoding);
|
|
892
|
-
}
|
|
893
|
-
catch {
|
|
894
|
-
return new TextDecoder('utf-8');
|
|
895
|
-
}
|
|
896
|
-
}
|
|
897
|
-
function decodeBuffer(buffer, encoding) {
|
|
898
|
-
return createDecoder(encoding).decode(buffer);
|
|
899
|
-
}
|
|
900
|
-
function normalizeEncodingLabel(encoding) {
|
|
901
|
-
return encoding?.trim().toLowerCase() ?? '';
|
|
902
|
-
}
|
|
903
|
-
function isUnicodeWideEncoding(encoding) {
|
|
904
|
-
const normalized = normalizeEncodingLabel(encoding);
|
|
905
|
-
return (normalized.startsWith('utf-16') ||
|
|
906
|
-
normalized.startsWith('utf-32') ||
|
|
907
|
-
normalized === 'ucs-2' ||
|
|
908
|
-
normalized === 'unicodefffe' ||
|
|
909
|
-
normalized === 'unicodefeff');
|
|
910
|
-
}
|
|
911
|
-
const BOM_SIGNATURES = [
|
|
912
|
-
// 4-byte BOMs must come first to avoid false matches with 2-byte prefixes
|
|
913
|
-
{ bytes: [0xff, 0xfe, 0x00, 0x00], encoding: 'utf-32le' },
|
|
914
|
-
{ bytes: [0x00, 0x00, 0xfe, 0xff], encoding: 'utf-32be' },
|
|
915
|
-
{ bytes: [0xef, 0xbb, 0xbf], encoding: 'utf-8' },
|
|
916
|
-
{ bytes: [0xff, 0xfe], encoding: 'utf-16le' },
|
|
917
|
-
{ bytes: [0xfe, 0xff], encoding: 'utf-16be' },
|
|
918
|
-
];
|
|
919
|
-
function detectBomEncoding(buffer) {
|
|
920
|
-
for (const { bytes, encoding } of BOM_SIGNATURES) {
|
|
921
|
-
if (startsWithBytes(buffer, bytes))
|
|
922
|
-
return encoding;
|
|
923
|
-
}
|
|
924
|
-
return undefined;
|
|
925
|
-
}
|
|
926
|
-
function readQuotedValue(input, startIndex) {
|
|
927
|
-
const first = input[startIndex];
|
|
928
|
-
if (!first)
|
|
929
|
-
return '';
|
|
930
|
-
const quoted = first === '"' || first === "'";
|
|
931
|
-
if (quoted) {
|
|
932
|
-
const end = input.indexOf(first, startIndex + 1);
|
|
933
|
-
return end === -1 ? '' : input.slice(startIndex + 1, end).trim();
|
|
934
|
-
}
|
|
935
|
-
const tail = input.slice(startIndex);
|
|
936
|
-
const stop = tail.search(/[\s/>]/);
|
|
937
|
-
return (stop === -1 ? tail : tail.slice(0, stop)).trim();
|
|
938
|
-
}
|
|
939
|
-
function extractHtmlCharset(headSnippet) {
|
|
940
|
-
const lower = headSnippet.toLowerCase();
|
|
941
|
-
const charsetToken = 'charset=';
|
|
942
|
-
const charsetIdx = lower.indexOf(charsetToken);
|
|
943
|
-
if (charsetIdx === -1)
|
|
944
|
-
return undefined;
|
|
945
|
-
const valueStart = charsetIdx + charsetToken.length;
|
|
946
|
-
const charset = readQuotedValue(headSnippet, valueStart);
|
|
947
|
-
return charset ? charset.toLowerCase() : undefined;
|
|
948
|
-
}
|
|
949
|
-
function extractXmlEncoding(headSnippet) {
|
|
950
|
-
const lower = headSnippet.toLowerCase();
|
|
951
|
-
const xmlStart = lower.indexOf('<?xml');
|
|
952
|
-
if (xmlStart === -1)
|
|
953
|
-
return undefined;
|
|
954
|
-
const xmlEnd = lower.indexOf('?>', xmlStart);
|
|
955
|
-
const declaration = xmlEnd === -1
|
|
956
|
-
? headSnippet.slice(xmlStart)
|
|
957
|
-
: headSnippet.slice(xmlStart, xmlEnd + 2);
|
|
958
|
-
const declarationLower = declaration.toLowerCase();
|
|
959
|
-
const encodingToken = 'encoding=';
|
|
960
|
-
const encodingIdx = declarationLower.indexOf(encodingToken);
|
|
961
|
-
if (encodingIdx === -1)
|
|
962
|
-
return undefined;
|
|
963
|
-
const valueStart = encodingIdx + encodingToken.length;
|
|
964
|
-
const encoding = readQuotedValue(declaration, valueStart);
|
|
965
|
-
return encoding ? encoding.toLowerCase() : undefined;
|
|
966
|
-
}
|
|
967
|
-
function detectHtmlDeclaredEncoding(buffer) {
|
|
968
|
-
const scanSize = Math.min(buffer.length, 8_192);
|
|
969
|
-
if (scanSize === 0)
|
|
970
|
-
return undefined;
|
|
971
|
-
const headSnippet = Buffer.from(buffer.buffer, buffer.byteOffset, scanSize).toString('latin1');
|
|
972
|
-
return extractHtmlCharset(headSnippet) ?? extractXmlEncoding(headSnippet);
|
|
973
|
-
}
|
|
974
|
-
function resolveEncoding(declaredEncoding, sample) {
|
|
975
|
-
const bomEncoding = detectBomEncoding(sample);
|
|
976
|
-
if (bomEncoding)
|
|
977
|
-
return bomEncoding;
|
|
978
|
-
if (declaredEncoding)
|
|
979
|
-
return declaredEncoding;
|
|
980
|
-
return detectHtmlDeclaredEncoding(sample);
|
|
981
|
-
}
|
|
982
|
-
const BINARY_SIGNATURES = [
|
|
983
|
-
[0x25, 0x50, 0x44, 0x46],
|
|
984
|
-
[0x89, 0x50, 0x4e, 0x47],
|
|
985
|
-
[0x47, 0x49, 0x46, 0x38],
|
|
986
|
-
[0xff, 0xd8, 0xff],
|
|
987
|
-
[0x52, 0x49, 0x46, 0x46],
|
|
988
|
-
[0x42, 0x4d],
|
|
989
|
-
[0x49, 0x49, 0x2a, 0x00],
|
|
990
|
-
[0x4d, 0x4d, 0x00, 0x2a],
|
|
991
|
-
[0x00, 0x00, 0x01, 0x00],
|
|
992
|
-
[0x50, 0x4b, 0x03, 0x04],
|
|
993
|
-
[0x1f, 0x8b],
|
|
994
|
-
[0x42, 0x5a, 0x68],
|
|
995
|
-
[0x52, 0x61, 0x72, 0x21],
|
|
996
|
-
[0x37, 0x7a, 0xbc, 0xaf],
|
|
997
|
-
[0x7f, 0x45, 0x4c, 0x46],
|
|
998
|
-
[0x4d, 0x5a],
|
|
999
|
-
[0xcf, 0xfa, 0xed, 0xfe],
|
|
1000
|
-
[0x00, 0x61, 0x73, 0x6d],
|
|
1001
|
-
[0x1a, 0x45, 0xdf, 0xa3],
|
|
1002
|
-
[0x66, 0x74, 0x79, 0x70],
|
|
1003
|
-
[0x46, 0x4c, 0x56],
|
|
1004
|
-
[0x49, 0x44, 0x33],
|
|
1005
|
-
[0xff, 0xfb],
|
|
1006
|
-
[0xff, 0xfa],
|
|
1007
|
-
[0x4f, 0x67, 0x67, 0x53],
|
|
1008
|
-
[0x66, 0x4c, 0x61, 0x43],
|
|
1009
|
-
[0x4d, 0x54, 0x68, 0x64],
|
|
1010
|
-
[0x77, 0x4f, 0x46, 0x46],
|
|
1011
|
-
[0x00, 0x01, 0x00, 0x00],
|
|
1012
|
-
[0x4f, 0x54, 0x54, 0x4f],
|
|
1013
|
-
[0x53, 0x51, 0x4c, 0x69],
|
|
1014
|
-
];
|
|
1015
|
-
function startsWithBytes(buffer, signature) {
|
|
1016
|
-
const sigLen = signature.length;
|
|
1017
|
-
if (buffer.length < sigLen)
|
|
1018
|
-
return false;
|
|
1019
|
-
for (let i = 0; i < sigLen; i += 1) {
|
|
1020
|
-
if (buffer[i] !== signature[i])
|
|
1021
|
-
return false;
|
|
1022
|
-
}
|
|
1023
|
-
return true;
|
|
1024
|
-
}
|
|
1025
|
-
function hasNullByte(buffer, limit) {
|
|
1026
|
-
const checkLen = Math.min(buffer.length, limit);
|
|
1027
|
-
return buffer.subarray(0, checkLen).includes(0x00);
|
|
1028
|
-
}
|
|
1029
|
-
function isBinaryContent(buffer, encoding) {
|
|
1030
|
-
for (const signature of BINARY_SIGNATURES) {
|
|
1031
|
-
if (startsWithBytes(buffer, signature))
|
|
1032
|
-
return true;
|
|
1033
|
-
}
|
|
1034
|
-
return !isUnicodeWideEncoding(encoding) && hasNullByte(buffer, 1000);
|
|
1035
|
-
}
|
|
1036
859
|
class ResponseTextReader {
|
|
1037
860
|
async read(response, url, maxBytes, signal, encoding) {
|
|
1038
861
|
const { buffer, encoding: effectiveEncoding, truncated, } = await this.readBuffer(response, url, maxBytes, signal, encoding);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@j0hanz/fetch-url-mcp",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.3",
|
|
4
4
|
"mcpName": "io.github.j0hanz/fetch-url-mcp",
|
|
5
5
|
"description": "Intelligent web content fetcher MCP server that converts HTML to clean, AI-readable Markdown",
|
|
6
6
|
"type": "module",
|
|
@@ -52,6 +52,7 @@
|
|
|
52
52
|
"start": "node dist/index.js",
|
|
53
53
|
"format": "prettier --write .",
|
|
54
54
|
"type-check": "node scripts/tasks.mjs type-check",
|
|
55
|
+
"type-check:tests": "node scripts/tasks.mjs type-check:tests",
|
|
55
56
|
"type-check:diagnostics": "tsc --noEmit --extendedDiagnostics",
|
|
56
57
|
"type-check:trace": "node -e \"require('fs').rmSync('.ts-trace',{recursive:true,force:true})\" && tsc --noEmit --generateTrace .ts-trace",
|
|
57
58
|
"lint": "eslint .",
|