@j0hanz/fetch-url-mcp 1.3.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. package/README.md +24 -21
  2. package/dist/cli.d.ts +3 -3
  3. package/dist/cli.js +15 -8
  4. package/dist/http/auth.d.ts +6 -6
  5. package/dist/http/auth.js +78 -23
  6. package/dist/http/health.d.ts +1 -2
  7. package/dist/http/health.js +7 -18
  8. package/dist/http/helpers.d.ts +3 -11
  9. package/dist/http/helpers.js +28 -26
  10. package/dist/http/native.d.ts +0 -1
  11. package/dist/http/native.js +63 -41
  12. package/dist/http/rate-limit.d.ts +2 -2
  13. package/dist/http/rate-limit.js +11 -16
  14. package/dist/index.d.ts +0 -1
  15. package/dist/index.js +17 -20
  16. package/dist/{markdown-cleanup.d.ts → lib/content.d.ts} +4 -2
  17. package/dist/lib/content.js +1356 -0
  18. package/dist/lib/core.d.ts +253 -0
  19. package/dist/lib/core.js +1228 -0
  20. package/dist/{tool-pipeline.d.ts → lib/fetch-pipeline.d.ts} +1 -3
  21. package/dist/{tool-pipeline.js → lib/fetch-pipeline.js} +18 -44
  22. package/dist/{fetch.d.ts → lib/http.d.ts} +7 -9
  23. package/dist/{fetch.js → lib/http.js} +721 -1004
  24. package/dist/lib/mcp-tools.d.ts +28 -0
  25. package/dist/lib/mcp-tools.js +107 -0
  26. package/dist/{tool-progress.d.ts → lib/progress.d.ts} +0 -2
  27. package/dist/{tool-progress.js → lib/progress.js} +9 -14
  28. package/dist/lib/task-handlers.d.ts +5 -0
  29. package/dist/{mcp.js → lib/task-handlers.js} +95 -31
  30. package/dist/lib/url.d.ts +70 -0
  31. package/dist/lib/url.js +686 -0
  32. package/dist/lib/utils.d.ts +58 -0
  33. package/dist/lib/utils.js +304 -0
  34. package/dist/{prompts.d.ts → prompts/index.d.ts} +0 -1
  35. package/dist/{prompts.js → prompts/index.js} +1 -2
  36. package/dist/{resources.d.ts → resources/index.d.ts} +0 -1
  37. package/dist/{resources.js → resources/index.js} +87 -64
  38. package/dist/{instructions.d.ts → resources/instructions.d.ts} +0 -1
  39. package/dist/{instructions.js → resources/instructions.js} +5 -3
  40. package/dist/schemas/inputs.d.ts +7 -0
  41. package/dist/schemas/inputs.js +24 -0
  42. package/dist/schemas/outputs.d.ts +23 -0
  43. package/dist/schemas/outputs.js +77 -0
  44. package/dist/server.d.ts +0 -1
  45. package/dist/server.js +26 -25
  46. package/dist/tasks/execution.d.ts +0 -1
  47. package/dist/tasks/execution.js +106 -70
  48. package/dist/tasks/manager.d.ts +11 -3
  49. package/dist/tasks/manager.js +97 -73
  50. package/dist/tasks/owner.d.ts +3 -3
  51. package/dist/tasks/owner.js +2 -2
  52. package/dist/tasks/tool-registry.d.ts +11 -0
  53. package/dist/tasks/tool-registry.js +13 -0
  54. package/dist/tools/fetch-url.d.ts +28 -0
  55. package/dist/{tools.js → tools/fetch-url.js} +95 -147
  56. package/dist/tools/index.d.ts +2 -0
  57. package/dist/tools/index.js +4 -0
  58. package/dist/transform/html-translators.d.ts +1 -0
  59. package/dist/transform/html-translators.js +454 -0
  60. package/dist/transform/metadata.d.ts +4 -0
  61. package/dist/transform/metadata.js +183 -0
  62. package/dist/transform/transform.d.ts +0 -1
  63. package/dist/transform/transform.js +44 -679
  64. package/dist/transform/types.d.ts +9 -12
  65. package/dist/transform/types.js +0 -1
  66. package/dist/transform/worker-pool.d.ts +0 -1
  67. package/dist/transform/worker-pool.js +7 -16
  68. package/dist/transform/workers/shared.d.ts +7 -0
  69. package/dist/transform/workers/shared.js +130 -0
  70. package/dist/transform/workers/transform-child.d.ts +0 -1
  71. package/dist/transform/workers/transform-child.js +5 -135
  72. package/dist/transform/workers/transform-worker.d.ts +0 -1
  73. package/dist/transform/workers/transform-worker.js +7 -128
  74. package/package.json +11 -7
  75. package/dist/cache.d.ts +0 -54
  76. package/dist/cache.d.ts.map +0 -1
  77. package/dist/cache.js +0 -261
  78. package/dist/cache.js.map +0 -1
  79. package/dist/cli.d.ts.map +0 -1
  80. package/dist/cli.js.map +0 -1
  81. package/dist/config.d.ts +0 -141
  82. package/dist/config.d.ts.map +0 -1
  83. package/dist/config.js +0 -473
  84. package/dist/config.js.map +0 -1
  85. package/dist/crypto.d.ts +0 -4
  86. package/dist/crypto.d.ts.map +0 -1
  87. package/dist/crypto.js +0 -56
  88. package/dist/crypto.js.map +0 -1
  89. package/dist/dom-noise-removal.d.ts +0 -2
  90. package/dist/dom-noise-removal.d.ts.map +0 -1
  91. package/dist/dom-noise-removal.js +0 -494
  92. package/dist/dom-noise-removal.js.map +0 -1
  93. package/dist/download.d.ts +0 -4
  94. package/dist/download.d.ts.map +0 -1
  95. package/dist/download.js +0 -106
  96. package/dist/download.js.map +0 -1
  97. package/dist/errors.d.ts +0 -11
  98. package/dist/errors.d.ts.map +0 -1
  99. package/dist/errors.js +0 -65
  100. package/dist/errors.js.map +0 -1
  101. package/dist/examples/mcp-fetch-url-client.js +0 -329
  102. package/dist/examples/mcp-fetch-url-client.js.map +0 -1
  103. package/dist/fetch-content.d.ts +0 -5
  104. package/dist/fetch-content.d.ts.map +0 -1
  105. package/dist/fetch-content.js +0 -164
  106. package/dist/fetch-content.js.map +0 -1
  107. package/dist/fetch-stream.d.ts +0 -5
  108. package/dist/fetch-stream.d.ts.map +0 -1
  109. package/dist/fetch-stream.js +0 -29
  110. package/dist/fetch-stream.js.map +0 -1
  111. package/dist/fetch.d.ts.map +0 -1
  112. package/dist/fetch.js.map +0 -1
  113. package/dist/host-normalization.d.ts +0 -2
  114. package/dist/host-normalization.d.ts.map +0 -1
  115. package/dist/host-normalization.js +0 -91
  116. package/dist/host-normalization.js.map +0 -1
  117. package/dist/http/auth.d.ts.map +0 -1
  118. package/dist/http/auth.js.map +0 -1
  119. package/dist/http/health.d.ts.map +0 -1
  120. package/dist/http/health.js.map +0 -1
  121. package/dist/http/helpers.d.ts.map +0 -1
  122. package/dist/http/helpers.js.map +0 -1
  123. package/dist/http/native.d.ts.map +0 -1
  124. package/dist/http/native.js.map +0 -1
  125. package/dist/http/rate-limit.d.ts.map +0 -1
  126. package/dist/http/rate-limit.js.map +0 -1
  127. package/dist/index.d.ts.map +0 -1
  128. package/dist/index.js.map +0 -1
  129. package/dist/instructions.d.ts.map +0 -1
  130. package/dist/instructions.js.map +0 -1
  131. package/dist/ip-blocklist.d.ts +0 -9
  132. package/dist/ip-blocklist.d.ts.map +0 -1
  133. package/dist/ip-blocklist.js +0 -79
  134. package/dist/ip-blocklist.js.map +0 -1
  135. package/dist/json.d.ts +0 -2
  136. package/dist/json.d.ts.map +0 -1
  137. package/dist/json.js +0 -45
  138. package/dist/json.js.map +0 -1
  139. package/dist/language-detection.d.ts +0 -3
  140. package/dist/language-detection.d.ts.map +0 -1
  141. package/dist/language-detection.js +0 -355
  142. package/dist/language-detection.js.map +0 -1
  143. package/dist/markdown-cleanup.d.ts.map +0 -1
  144. package/dist/markdown-cleanup.js +0 -534
  145. package/dist/markdown-cleanup.js.map +0 -1
  146. package/dist/mcp-validator.d.ts +0 -17
  147. package/dist/mcp-validator.d.ts.map +0 -1
  148. package/dist/mcp-validator.js +0 -45
  149. package/dist/mcp-validator.js.map +0 -1
  150. package/dist/mcp.d.ts +0 -4
  151. package/dist/mcp.d.ts.map +0 -1
  152. package/dist/mcp.js.map +0 -1
  153. package/dist/observability.d.ts +0 -23
  154. package/dist/observability.d.ts.map +0 -1
  155. package/dist/observability.js +0 -238
  156. package/dist/observability.js.map +0 -1
  157. package/dist/prompts.d.ts.map +0 -1
  158. package/dist/prompts.js.map +0 -1
  159. package/dist/resources.d.ts.map +0 -1
  160. package/dist/resources.js.map +0 -1
  161. package/dist/server-tuning.d.ts +0 -15
  162. package/dist/server-tuning.d.ts.map +0 -1
  163. package/dist/server-tuning.js +0 -49
  164. package/dist/server-tuning.js.map +0 -1
  165. package/dist/server.d.ts.map +0 -1
  166. package/dist/server.js.map +0 -1
  167. package/dist/session.d.ts +0 -42
  168. package/dist/session.d.ts.map +0 -1
  169. package/dist/session.js +0 -255
  170. package/dist/session.js.map +0 -1
  171. package/dist/tasks/execution.d.ts.map +0 -1
  172. package/dist/tasks/execution.js.map +0 -1
  173. package/dist/tasks/manager.d.ts.map +0 -1
  174. package/dist/tasks/manager.js.map +0 -1
  175. package/dist/tasks/owner.d.ts.map +0 -1
  176. package/dist/tasks/owner.js.map +0 -1
  177. package/dist/timer-utils.d.ts +0 -6
  178. package/dist/timer-utils.d.ts.map +0 -1
  179. package/dist/timer-utils.js +0 -27
  180. package/dist/timer-utils.js.map +0 -1
  181. package/dist/tool-errors.d.ts +0 -12
  182. package/dist/tool-errors.d.ts.map +0 -1
  183. package/dist/tool-errors.js +0 -55
  184. package/dist/tool-errors.js.map +0 -1
  185. package/dist/tool-pipeline.d.ts.map +0 -1
  186. package/dist/tool-pipeline.js.map +0 -1
  187. package/dist/tool-progress.d.ts.map +0 -1
  188. package/dist/tool-progress.js.map +0 -1
  189. package/dist/tools.d.ts +0 -54
  190. package/dist/tools.d.ts.map +0 -1
  191. package/dist/tools.js.map +0 -1
  192. package/dist/transform/transform.d.ts.map +0 -1
  193. package/dist/transform/transform.js.map +0 -1
  194. package/dist/transform/types.d.ts.map +0 -1
  195. package/dist/transform/types.js.map +0 -1
  196. package/dist/transform/worker-pool.d.ts.map +0 -1
  197. package/dist/transform/worker-pool.js.map +0 -1
  198. package/dist/transform/workers/transform-child.d.ts.map +0 -1
  199. package/dist/transform/workers/transform-child.js.map +0 -1
  200. package/dist/transform/workers/transform-worker.d.ts.map +0 -1
  201. package/dist/transform/workers/transform-worker.js.map +0 -1
  202. package/dist/type-guards.d.ts +0 -16
  203. package/dist/type-guards.d.ts.map +0 -1
  204. package/dist/type-guards.js +0 -13
  205. package/dist/type-guards.js.map +0 -1
@@ -1,551 +1,277 @@
1
1
  import { Buffer } from 'node:buffer';
2
2
  import { randomUUID } from 'node:crypto';
3
3
  import diagnosticsChannel from 'node:diagnostics_channel';
4
- import dns from 'node:dns';
4
+ import {} from 'node:http';
5
5
  import { isIP } from 'node:net';
6
+ import { posix as pathPosix } from 'node:path';
6
7
  import { performance } from 'node:perf_hooks';
7
8
  import { PassThrough, Readable, Transform } from 'node:stream';
8
9
  import { buffer as consumeBuffer } from 'node:stream/consumers';
9
10
  import { finished, pipeline } from 'node:stream/promises';
11
+ import {} from 'node:stream/web';
12
+ import tls from 'node:tls';
10
13
  import { createBrotliDecompress, createGunzip, createInflate } from 'node:zlib';
11
14
  import { Agent } from 'undici';
12
- import { config } from './config.js';
13
- import { createErrorWithCode, FetchError, isSystemError } from './errors.js';
14
- import { decodeBuffer, getCharsetFromContentType, isBinaryContent, resolveEncoding, } from './fetch-content.js';
15
- import { toNodeReadableStream, toWebReadableStream } from './fetch-stream.js';
16
- import { createDefaultBlockList, normalizeIpForBlockList, } from './ip-blocklist.js';
17
- import { getOperationId, getRequestId, logDebug, logError, logWarn, redactUrl, } from './observability.js';
18
- import { isError, isObject } from './type-guards.js';
19
- const defaultLogger = {
20
- debug: logDebug,
21
- warn: logWarn,
22
- error: logError,
15
+ import { z } from 'zod';
16
+ import { get as cacheGet, config, getOperationId, getRequestId, logDebug, logError, logWarn, parseCachedPayload, redactUrl, resolveCachedPayloadContent, } from './core.js';
17
+ import { BLOCKED_HOST_SUFFIXES, createDnsPreflight, IpBlocker, RawUrlTransformer, SafeDnsResolver, UrlNormalizer, VALIDATION_ERROR_CODE, } from './url.js';
18
+ import { createErrorWithCode, FetchError, isError, isObject, isSystemError, toError, } from './utils.js';
19
+ const FILENAME_RULES = {
20
+ MAX_LEN: 200,
21
+ UNSAFE_CHARS: /[<>:"/\\|?*\p{C}]/gu,
22
+ WHITESPACE: /\s+/g,
23
+ EXTENSIONS: /\.(html?|php|aspx?|jsp)$/i,
23
24
  };
24
- const defaultContext = {
25
- getRequestId,
26
- getOperationId,
27
- };
28
- const defaultRedactor = {
29
- redact: redactUrl,
30
- };
31
- const defaultFetch = (input, init) => globalThis.fetch(input, init);
32
- function isLocalFetchAllowed() {
33
- return process.env['ALLOW_LOCAL_FETCH'] === 'true';
34
- }
35
- class IpBlocker {
36
- security;
37
- blockList = createDefaultBlockList();
38
- constructor(security) {
39
- this.security = security;
40
- }
41
- isBlockedIp(candidate) {
42
- const normalized = candidate.trim().toLowerCase();
43
- if (isCloudMetadataHost(normalized))
44
- return true;
45
- if (isLocalFetchAllowed())
46
- return false;
47
- if (!normalized)
48
- return false;
49
- if (this.security.blockedHosts.has(normalized))
50
- return true;
51
- const normalizedIp = normalizeIpForBlockList(normalized);
52
- return normalizedIp
53
- ? this.blockList.check(normalizedIp.ip, normalizedIp.family)
54
- : false;
55
- }
56
- }
57
- const VALIDATION_ERROR_CODE = 'VALIDATION_ERROR';
58
- function createValidationError(message) {
59
- return createErrorWithCode(message, VALIDATION_ERROR_CODE);
60
- }
61
- const BLOCKED_HOST_SUFFIXES = ['.local', '.internal'];
62
- // This list is not exhaustive but covers the most common cloud metadata endpoints.
63
- const CLOUD_METADATA_HOSTS = new Set([
64
- '169.254.169.254', // AWS / GCP / Azure
65
- 'metadata.google.internal', // GCP
66
- '100.100.100.200', // Alibaba Cloud
67
- 'fd00:ec2::254', // AWS IPv6
68
- ]);
69
- function isCloudMetadataHost(hostname) {
70
- const lowered = hostname.toLowerCase();
71
- if (CLOUD_METADATA_HOSTS.has(lowered))
72
- return true;
73
- const normalized = normalizeIpForBlockList(lowered);
74
- return normalized !== null && CLOUD_METADATA_HOSTS.has(normalized.ip);
75
- }
76
- class UrlNormalizer {
77
- constants;
78
- security;
79
- ipBlocker;
80
- blockedHostSuffixes;
81
- constructor(constants, security, ipBlocker, blockedHostSuffixes) {
82
- this.constants = constants;
83
- this.security = security;
84
- this.ipBlocker = ipBlocker;
85
- this.blockedHostSuffixes = blockedHostSuffixes;
86
- }
87
- normalize(urlString) {
88
- const trimmedUrl = this.requireTrimmedUrl(urlString);
89
- if (trimmedUrl.length > this.constants.maxUrlLength) {
90
- throw createValidationError(`URL exceeds maximum length of ${this.constants.maxUrlLength} characters`);
91
- }
92
- let url;
93
- try {
94
- url = new URL(trimmedUrl);
95
- }
96
- catch {
97
- throw createValidationError('Invalid URL format');
98
- }
99
- if (url.protocol !== 'http:' && url.protocol !== 'https:') {
100
- throw createValidationError(`Invalid protocol: ${url.protocol}. Only http: and https: are allowed`);
101
- }
102
- if (url.username || url.password) {
103
- throw createValidationError('URLs with embedded credentials are not allowed');
104
- }
105
- const hostname = this.normalizeHostname(url);
106
- this.assertHostnameAllowed(hostname);
107
- url.hostname = hostname;
108
- return { normalizedUrl: url.href, hostname };
109
- }
110
- validateAndNormalize(urlString) {
111
- return this.normalize(urlString).normalizedUrl;
112
- }
113
- requireTrimmedUrl(urlString) {
114
- if (!urlString || typeof urlString !== 'string') {
115
- throw createValidationError('URL is required');
116
- }
117
- const trimmed = urlString.trim();
118
- if (!trimmed)
119
- throw createValidationError('URL cannot be empty');
120
- return trimmed;
121
- }
122
- normalizeHostname(url) {
123
- const hostname = url.hostname.toLowerCase().replace(/\.+$/, '');
124
- if (!hostname) {
125
- throw createValidationError('URL must have a valid hostname');
126
- }
127
- return hostname;
128
- }
129
- assertHostnameAllowed(hostname) {
130
- this.assertNotBlockedHost(hostname);
131
- this.assertNotBlockedIp(hostname);
132
- this.assertNotBlockedHostnameSuffix(hostname);
133
- }
134
- assertNotBlockedHost(hostname) {
135
- if (isCloudMetadataHost(hostname)) {
136
- throw createValidationError(`Blocked host: ${hostname}. Cloud metadata endpoints are not allowed`);
137
- }
138
- if (isLocalFetchAllowed())
139
- return;
140
- if (!this.security.blockedHosts.has(hostname))
141
- return;
142
- throw createValidationError(`Blocked host: ${hostname}. Internal hosts are not allowed`);
143
- }
144
- assertNotBlockedIp(hostname) {
145
- if (isCloudMetadataHost(hostname)) {
146
- throw createValidationError(`Blocked IP range: ${hostname}. Cloud metadata endpoints are not allowed`);
147
- }
148
- if (isLocalFetchAllowed())
149
- return;
150
- if (!this.ipBlocker.isBlockedIp(hostname))
151
- return;
152
- throw createValidationError(`Blocked IP range: ${hostname}. Private IPs are not allowed`);
153
- }
154
- assertNotBlockedHostnameSuffix(hostname) {
155
- const blocked = this.blockedHostSuffixes.some((suffix) => hostname.endsWith(suffix));
156
- if (!blocked)
157
- return;
158
- throw createValidationError(`Blocked hostname pattern: ${hostname}. Internal domain suffixes are not allowed`);
159
- }
25
+ function sanitizeString(input) {
26
+ return input
27
+ .toLowerCase()
28
+ .replace(FILENAME_RULES.UNSAFE_CHARS, '')
29
+ .replace(FILENAME_RULES.WHITESPACE, '-')
30
+ .replace(/-+/g, '-')
31
+ .replace(/(?:^-|-$)/g, '');
160
32
  }
161
- function getPatternGroup(groups, key) {
162
- const value = groups[key];
163
- if (value === undefined)
33
+ function resolveUrlFilenameCandidate(url) {
34
+ const parsed = new URL(url);
35
+ if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:')
164
36
  return null;
165
- if (value === '')
37
+ const basename = pathPosix.basename(parsed.pathname);
38
+ if (!basename || basename === 'index')
166
39
  return null;
167
- return value;
40
+ const cleaned = basename.replace(FILENAME_RULES.EXTENSIONS, '');
41
+ const sanitized = sanitizeString(cleaned);
42
+ if (sanitized === 'index')
43
+ return null;
44
+ return sanitized || null;
168
45
  }
169
- const GITHUB_BLOB_PATTERN = new URLPattern({
170
- protocol: 'http{s}?',
171
- hostname: '{:sub.}?github.com',
172
- pathname: '/:owner/:repo/blob/:branch/:path+',
173
- });
174
- const GITHUB_GIST_PATTERN = new URLPattern({
175
- protocol: 'http{s}?',
176
- hostname: 'gist.github.com',
177
- pathname: '/:user/:gistId',
178
- });
179
- const GITHUB_GIST_RAW_PATTERN = new URLPattern({
180
- protocol: 'http{s}?',
181
- hostname: 'gist.github.com',
182
- pathname: '/:user/:gistId/raw/:filePath+',
183
- });
184
- const GITLAB_BLOB_PATTERNS = [
185
- new URLPattern({
186
- protocol: 'http{s}?',
187
- hostname: 'gitlab.com',
188
- pathname: '/:base+/-/blob/:branch/:path+',
189
- }),
190
- new URLPattern({
191
- protocol: 'http{s}?',
192
- hostname: '*:sub.gitlab.com',
193
- pathname: '/:base+/-/blob/:branch/:path+',
194
- }),
195
- ];
196
- const BITBUCKET_SRC_PATTERN = new URLPattern({
197
- protocol: 'http{s}?',
198
- hostname: '{:sub.}?bitbucket.org',
199
- pathname: '/:owner/:repo/src/:branch/:path+',
200
- });
201
- const BITBUCKET_RAW_RE = /bitbucket\.org\/[^/]+\/[^/]+\/raw\//;
202
- const RAW_TEXT_EXTENSIONS = new Set([
203
- '.md',
204
- '.markdown',
205
- '.txt',
206
- '.json',
207
- '.yaml',
208
- '.yml',
209
- '.toml',
210
- '.xml',
211
- '.csv',
212
- '.rst',
213
- '.adoc',
214
- '.org',
215
- ]);
216
- class RawUrlTransformer {
217
- logger;
218
- constructor(logger) {
219
- this.logger = logger;
220
- }
221
- transformToRawUrl(url) {
222
- if (!url)
223
- return { url, transformed: false };
224
- if (this.isRawUrl(url))
225
- return { url, transformed: false };
226
- let base;
227
- let hash;
228
- let parsed;
229
- try {
230
- parsed = new URL(url);
231
- base = parsed.origin + parsed.pathname;
232
- ({ hash } = parsed);
233
- }
234
- catch {
235
- ({ base, hash } = this.splitParams(url));
236
- }
237
- const match = this.tryTransformWithUrl(base, hash, parsed);
238
- if (!match)
239
- return { url, transformed: false };
240
- this.logger.debug('URL transformed to raw content URL', {
241
- platform: match.platform,
242
- original: url.substring(0, 100),
243
- transformed: match.url.substring(0, 100),
244
- });
245
- return { url: match.url, transformed: true, platform: match.platform };
246
- }
247
- isRawTextContentUrl(urlString) {
248
- if (!urlString)
249
- return false;
250
- if (this.isRawUrl(urlString))
251
- return true;
252
- try {
253
- const url = new URL(urlString);
254
- const pathname = url.pathname.toLowerCase();
255
- const lastDot = pathname.lastIndexOf('.');
256
- if (lastDot === -1)
257
- return false;
258
- return RAW_TEXT_EXTENSIONS.has(pathname.slice(lastDot));
259
- }
260
- catch {
261
- const { base } = this.splitParams(urlString);
262
- const lowerBase = base.toLowerCase();
263
- const lastDot = lowerBase.lastIndexOf('.');
264
- if (lastDot === -1)
265
- return false;
266
- return RAW_TEXT_EXTENSIONS.has(lowerBase.slice(lastDot));
267
- }
268
- }
269
- isRawUrl(url) {
270
- const lower = url.toLowerCase();
271
- return (lower.includes('raw.githubusercontent.com') ||
272
- lower.includes('gist.githubusercontent.com') ||
273
- lower.includes('/-/raw/') ||
274
- BITBUCKET_RAW_RE.test(lower));
275
- }
276
- splitParams(urlString) {
277
- const hashIndex = urlString.indexOf('#');
278
- const queryIndex = urlString.indexOf('?');
279
- const endIndex = Math.min(queryIndex === -1 ? urlString.length : queryIndex, hashIndex === -1 ? urlString.length : hashIndex);
280
- const hash = hashIndex !== -1 ? urlString.slice(hashIndex) : '';
281
- return { base: urlString.slice(0, endIndex), hash };
282
- }
283
- tryTransformWithUrl(base, hash, preParsed) {
284
- let parsed = preParsed ?? null;
285
- if (!parsed) {
286
- try {
287
- parsed = new URL(base);
288
- }
289
- catch {
290
- // Ignore invalid URLs
291
- }
292
- }
293
- if (!parsed)
294
- return null;
295
- if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:')
296
- return null;
297
- const gist = this.transformGithubGist(base, hash);
298
- if (gist)
299
- return gist;
300
- const github = this.transformGithubBlob(base);
301
- if (github)
302
- return github;
303
- const gitlab = this.transformGitLab(base, parsed.origin);
304
- if (gitlab)
305
- return gitlab;
306
- const bitbucket = this.transformBitbucket(base, parsed.origin);
307
- if (bitbucket)
308
- return bitbucket;
46
+ function truncateFilenameBase(name, extension) {
47
+ const maxBase = FILENAME_RULES.MAX_LEN - extension.length;
48
+ return name.length > maxBase ? name.substring(0, maxBase) : name;
49
+ }
50
+ function resolveTitleFilenameCandidate(title) {
51
+ if (!title)
309
52
  return null;
53
+ return sanitizeString(title) || null;
54
+ }
55
+ function resolveFilenameBase(url, title, hashFallback) {
56
+ try {
57
+ const fromUrl = resolveUrlFilenameCandidate(url);
58
+ if (fromUrl)
59
+ return fromUrl;
310
60
  }
311
- transformGithubBlob(url) {
312
- const match = GITHUB_BLOB_PATTERN.exec(url);
313
- if (!match)
314
- return null;
315
- const groups = match.pathname.groups;
316
- const owner = getPatternGroup(groups, 'owner');
317
- const repo = getPatternGroup(groups, 'repo');
318
- const branch = getPatternGroup(groups, 'branch');
319
- const path = getPatternGroup(groups, 'path');
320
- if (!owner || !repo || !branch || !path)
321
- return null;
322
- return {
323
- url: `https://raw.githubusercontent.com/${owner}/${repo}/${branch}/${path}`,
324
- platform: 'github',
325
- };
326
- }
327
- transformGithubGist(url, hash) {
328
- const rawMatch = GITHUB_GIST_RAW_PATTERN.exec(url);
329
- if (rawMatch) {
330
- const groups = rawMatch.pathname.groups;
331
- const user = getPatternGroup(groups, 'user');
332
- const gistId = getPatternGroup(groups, 'gistId');
333
- const filePath = getPatternGroup(groups, 'filePath');
334
- if (!user || !gistId)
335
- return null;
336
- const resolvedFilePath = filePath ? `/${filePath}` : '';
337
- return {
338
- url: `https://gist.githubusercontent.com/${user}/${gistId}/raw${resolvedFilePath}`,
339
- platform: 'github-gist',
340
- };
341
- }
342
- const match = GITHUB_GIST_PATTERN.exec(url);
343
- if (!match)
344
- return null;
345
- const groups = match.pathname.groups;
346
- const user = getPatternGroup(groups, 'user');
347
- const gistId = getPatternGroup(groups, 'gistId');
348
- if (!user || !gistId)
349
- return null;
350
- let filePath = '';
351
- if (hash.startsWith('#file-')) {
352
- const filename = hash.slice('#file-'.length).replace(/-/g, '.');
353
- if (filename)
354
- filePath = `/${filename}`;
355
- }
356
- return {
357
- url: `https://gist.githubusercontent.com/${user}/${gistId}/raw${filePath}`,
358
- platform: 'github-gist',
359
- };
61
+ catch {
62
+ // Ignore URL parsing errors and continue fallbacks.
63
+ }
64
+ const fromTitle = resolveTitleFilenameCandidate(title);
65
+ if (fromTitle)
66
+ return fromTitle;
67
+ if (hashFallback)
68
+ return hashFallback.substring(0, 16);
69
+ return `download-${Date.now()}`;
70
+ }
71
+ export function generateSafeFilename(url, title, hashFallback, extension = '.md') {
72
+ const name = resolveFilenameBase(url, title, hashFallback);
73
+ return `${truncateFilenameBase(name, extension)}${extension}`;
74
+ }
75
+ const DownloadParamsSchema = z.strictObject({
76
+ namespace: z.literal('markdown'),
77
+ hash: z
78
+ .string()
79
+ .regex(/^[a-f0-9.]+$/i)
80
+ .min(8)
81
+ .max(64),
82
+ });
83
+ function writeJsonError(res, status, message, code) {
84
+ res.writeHead(status, { 'Content-Type': 'application/json' });
85
+ res.end(JSON.stringify({ error: message, code }));
86
+ }
87
+ export function handleDownload(res, namespace, hash) {
88
+ const parsed = DownloadParamsSchema.safeParse({ namespace, hash });
89
+ if (!parsed.success) {
90
+ writeJsonError(res, 400, 'Invalid namespace or hash', 'BAD_REQUEST');
91
+ return;
360
92
  }
361
- transformGitLab(url, origin) {
362
- for (const pattern of GITLAB_BLOB_PATTERNS) {
363
- const match = pattern.exec(url);
364
- if (!match)
365
- continue;
366
- const groups = match.pathname.groups;
367
- const base = getPatternGroup(groups, 'base');
368
- const branch = getPatternGroup(groups, 'branch');
369
- const path = getPatternGroup(groups, 'path');
370
- if (!base || !branch || !path)
371
- return null;
372
- return {
373
- url: `${origin}/${base}/-/raw/${branch}/${path}`,
374
- platform: 'gitlab',
375
- };
376
- }
377
- return null;
93
+ const cacheKey = `${parsed.data.namespace}:${parsed.data.hash}`;
94
+ const entry = cacheGet(cacheKey, { force: true });
95
+ if (!entry) {
96
+ writeJsonError(res, 404, 'Not found or expired', 'NOT_FOUND');
97
+ return;
378
98
  }
379
- transformBitbucket(url, origin) {
380
- const match = BITBUCKET_SRC_PATTERN.exec(url);
381
- if (!match)
382
- return null;
383
- const groups = match.pathname.groups;
384
- const owner = getPatternGroup(groups, 'owner');
385
- const repo = getPatternGroup(groups, 'repo');
386
- const branch = getPatternGroup(groups, 'branch');
387
- const path = getPatternGroup(groups, 'path');
388
- if (!owner || !repo || !branch || !path)
389
- return null;
390
- return {
391
- url: `${origin}/${owner}/${repo}/raw/${branch}/${path}`,
392
- platform: 'bitbucket',
393
- };
99
+ const payload = parseCachedPayload(entry.content);
100
+ const content = payload ? resolveCachedPayloadContent(payload) : null;
101
+ if (!content) {
102
+ writeJsonError(res, 404, 'Content missing', 'NOT_FOUND');
103
+ return;
394
104
  }
105
+ const fileName = generateSafeFilename(entry.url, payload?.title, parsed.data.hash);
106
+ // Safe header generation — RFC 5987 encoding for non-ASCII filenames
107
+ const encoded = encodeURIComponent(fileName).replace(/'/g, '%27');
108
+ res.setHeader('Content-Type', 'text/markdown; charset=utf-8');
109
+ res.setHeader('Content-Disposition', `attachment; filename="${fileName}"; filename*=UTF-8''${encoded}`);
110
+ res.setHeader('Cache-Control', `private, max-age=${config.cache.ttl}`);
111
+ res.setHeader('X-Content-Type-Options', 'nosniff');
112
+ res.end(content);
395
113
  }
396
- const DNS_LOOKUP_TIMEOUT_MS = 5000;
397
- const CNAME_LOOKUP_MAX_DEPTH = 5;
398
- function normalizeDnsName(value) {
399
- const normalized = value.trim().toLowerCase().replace(/\.+$/, '');
400
- return normalized;
401
- }
402
- function createSignalAbortRace(signal, isAbort, onTimeout, onAbort) {
403
- let abortListener = null;
404
- const abortPromise = new Promise((_, reject) => {
405
- abortListener = () => {
406
- reject(isAbort() ? onAbort() : onTimeout());
407
- };
408
- signal.addEventListener('abort', abortListener, { once: true });
409
- if (signal.aborted)
410
- abortListener();
411
- });
412
- const cleanup = () => {
413
- if (!abortListener)
414
- return;
415
- try {
416
- signal.removeEventListener('abort', abortListener);
417
- }
418
- catch {
419
- // Ignore listener cleanup failures; they are non-fatal by design.
420
- }
421
- abortListener = null;
422
- };
423
- return { abortPromise, cleanup };
424
- }
425
- async function withTimeout(promise, timeoutMs, onTimeout, signal, onAbort) {
426
- const timeoutSignal = timeoutMs > 0 ? AbortSignal.timeout(timeoutMs) : undefined;
427
- const raceSignal = signal && timeoutSignal
428
- ? AbortSignal.any([signal, timeoutSignal])
429
- : (signal ?? timeoutSignal);
430
- if (!raceSignal)
431
- return promise;
432
- const abortRace = createSignalAbortRace(raceSignal, () => signal?.aborted === true, onTimeout, onAbort ?? (() => new Error('Request was canceled')));
114
+ const UTF8_ENCODING = 'utf-8';
115
+ function getCharsetFromContentType(contentType) {
116
+ if (!contentType)
117
+ return undefined;
118
+ const match = /charset=([^;]+)/i.exec(contentType);
119
+ const charsetGroup = match?.[1];
120
+ if (!charsetGroup)
121
+ return undefined;
122
+ let charset = charsetGroup.trim();
123
+ if (charset.startsWith('"') && charset.endsWith('"')) {
124
+ charset = charset.slice(1, -1);
125
+ }
126
+ return charset.trim();
127
+ }
128
+ function createDecoder(encoding) {
129
+ const fallback = () => new TextDecoder(UTF8_ENCODING);
130
+ if (!encoding)
131
+ return fallback();
433
132
  try {
434
- return await Promise.race([promise, abortRace.abortPromise]);
133
+ return new TextDecoder(encoding);
435
134
  }
436
- finally {
437
- abortRace.cleanup();
135
+ catch {
136
+ return fallback();
438
137
  }
439
138
  }
440
- function createAbortSignalError() {
441
- const err = new Error('Request was canceled');
442
- err.name = 'AbortError';
443
- return err;
444
- }
445
- class SafeDnsResolver {
446
- ipBlocker;
447
- security;
448
- blockedHostSuffixes;
449
- constructor(ipBlocker, security, blockedHostSuffixes) {
450
- this.ipBlocker = ipBlocker;
451
- this.security = security;
452
- this.blockedHostSuffixes = blockedHostSuffixes;
453
- }
454
- async resolveAndValidate(hostname, signal) {
455
- const normalizedHostname = normalizeDnsName(hostname.replace(/^\[|\]$/g, ''));
456
- if (!normalizedHostname) {
457
- throw createErrorWithCode('Invalid hostname provided', 'EINVAL');
458
- }
459
- if (signal?.aborted) {
460
- throw createAbortSignalError();
461
- }
462
- if (this.isBlockedHostname(normalizedHostname)) {
463
- throw createErrorWithCode(`Blocked host: ${normalizedHostname}. Internal hosts are not allowed`, 'EBLOCKED');
464
- }
465
- if (isIP(normalizedHostname)) {
466
- if (isCloudMetadataHost(normalizedHostname)) {
467
- throw createErrorWithCode(`Blocked IP range: ${normalizedHostname}. Cloud metadata endpoints are not allowed`, 'EBLOCKED');
468
- }
469
- if (process.env['ALLOW_LOCAL_FETCH'] !== 'true' &&
470
- this.ipBlocker.isBlockedIp(normalizedHostname)) {
471
- throw createErrorWithCode(`Blocked IP range: ${normalizedHostname}. Private IPs are not allowed`, 'EBLOCKED');
472
- }
473
- return normalizedHostname;
474
- }
475
- await this.assertNoBlockedCname(normalizedHostname, signal);
476
- const resultPromise = dns.promises.lookup(normalizedHostname, {
477
- all: true,
478
- order: 'verbatim',
479
- });
480
- const addresses = await withTimeout(resultPromise, DNS_LOOKUP_TIMEOUT_MS, () => createErrorWithCode(`DNS lookup timed out for ${normalizedHostname}`, 'ETIMEOUT'), signal, createAbortSignalError);
481
- if (addresses.length === 0 || !addresses[0]) {
482
- throw createErrorWithCode(`No DNS results returned for ${normalizedHostname}`, 'ENODATA');
483
- }
484
- for (const addr of addresses) {
485
- if (addr.family !== 4 && addr.family !== 6) {
486
- throw createErrorWithCode(`Invalid address family returned for ${normalizedHostname}`, 'EINVAL');
487
- }
488
- if (isCloudMetadataHost(addr.address)) {
489
- throw createErrorWithCode(`Blocked IP detected for ${normalizedHostname}`, 'EBLOCKED');
490
- }
491
- if (!isLocalFetchAllowed() && this.ipBlocker.isBlockedIp(addr.address)) {
492
- throw createErrorWithCode(`Blocked IP detected for ${normalizedHostname}`, 'EBLOCKED');
493
- }
494
- }
495
- return addresses[0].address;
496
- }
497
- isBlockedHostname(hostname) {
498
- if (isCloudMetadataHost(hostname))
499
- return true;
500
- if (isLocalFetchAllowed())
139
+ function decodeBuffer(buffer, encoding) {
140
+ return createDecoder(encoding).decode(buffer);
141
+ }
142
+ function normalizeEncodingLabel(encoding) {
143
+ return encoding?.trim().toLowerCase() ?? '';
144
+ }
145
+ function isUnicodeWideEncoding(encoding) {
146
+ const normalized = normalizeEncodingLabel(encoding);
147
+ return (normalized.startsWith('utf-16') ||
148
+ normalized.startsWith('utf-32') ||
149
+ normalized === 'ucs-2' ||
150
+ normalized === 'unicodefffe' ||
151
+ normalized === 'unicodefeff');
152
+ }
153
+ const BOM_SIGNATURES = [
154
+ // 4-byte BOMs must come first to avoid false matches with 2-byte prefixes
155
+ { bytes: [0xff, 0xfe, 0x00, 0x00], encoding: 'utf-32le' },
156
+ { bytes: [0x00, 0x00, 0xfe, 0xff], encoding: 'utf-32be' },
157
+ { bytes: [0xef, 0xbb, 0xbf], encoding: 'utf-8' },
158
+ { bytes: [0xff, 0xfe], encoding: 'utf-16le' },
159
+ { bytes: [0xfe, 0xff], encoding: 'utf-16be' },
160
+ ];
161
+ function startsWithBytes(buffer, signature) {
162
+ const sigLen = signature.length;
163
+ if (buffer.length < sigLen)
164
+ return false;
165
+ for (let i = 0; i < sigLen; i += 1) {
166
+ if (buffer[i] !== signature[i])
501
167
  return false;
502
- if (this.security.blockedHosts.has(hostname))
503
- return true;
504
- return this.blockedHostSuffixes.some((suffix) => hostname.endsWith(suffix));
505
168
  }
506
- async assertNoBlockedCname(hostname, signal) {
507
- let current = hostname;
508
- const seen = new Set();
509
- for (let depth = 0; depth < CNAME_LOOKUP_MAX_DEPTH; depth += 1) {
510
- if (!current || seen.has(current))
511
- return;
512
- seen.add(current);
513
- const cnames = await this.resolveCname(current, signal);
514
- if (cnames.length === 0)
515
- return;
516
- for (const cname of cnames) {
517
- if (this.isBlockedHostname(cname)) {
518
- throw createErrorWithCode(`Blocked DNS CNAME detected for ${hostname}: ${cname}`, 'EBLOCKED');
519
- }
520
- }
521
- current = cnames[0] ?? '';
522
- }
169
+ return true;
170
+ }
171
+ function detectBomEncoding(buffer) {
172
+ for (const { bytes, encoding } of BOM_SIGNATURES) {
173
+ if (startsWithBytes(buffer, bytes))
174
+ return encoding;
523
175
  }
524
- async resolveCname(hostname, signal) {
525
- try {
526
- const resultPromise = dns.promises.resolveCname(hostname);
527
- const cnames = await withTimeout(resultPromise, DNS_LOOKUP_TIMEOUT_MS, () => createErrorWithCode(`DNS CNAME lookup timed out for ${hostname}`, 'ETIMEOUT'), signal, createAbortSignalError);
528
- return cnames
529
- .map((value) => normalizeDnsName(value))
530
- .filter((value) => value.length > 0);
531
- }
532
- catch (error) {
533
- if (isError(error) && error.name === 'AbortError') {
534
- throw error;
535
- }
536
- if (isSystemError(error) &&
537
- (error.code === 'ENODATA' ||
538
- error.code === 'ENOTFOUND' ||
539
- error.code === 'ENODOMAIN')) {
540
- return [];
541
- }
542
- logDebug('DNS CNAME lookup failed; continuing with address lookup', {
543
- hostname,
544
- ...(isSystemError(error) ? { code: error.code } : {}),
545
- });
546
- return [];
547
- }
176
+ return undefined;
177
+ }
178
+ function readQuotedValue(input, startIndex) {
179
+ const first = input[startIndex];
180
+ if (!first)
181
+ return '';
182
+ const quoted = first === '"' || first === "'";
183
+ if (quoted) {
184
+ const end = input.indexOf(first, startIndex + 1);
185
+ return end === -1 ? '' : input.slice(startIndex + 1, end).trim();
186
+ }
187
+ const tail = input.slice(startIndex);
188
+ const stop = tail.search(/[\s/>]/);
189
+ return (stop === -1 ? tail : tail.slice(0, stop)).trim();
190
+ }
191
+ function findTokenValue(original, lower, token, fromIndex = 0) {
192
+ const tokenIndex = lower.indexOf(token, fromIndex);
193
+ if (tokenIndex === -1)
194
+ return undefined;
195
+ const valueStart = tokenIndex + token.length;
196
+ const value = readQuotedValue(original, valueStart);
197
+ return value || undefined;
198
+ }
199
+ function extractHtmlCharset(headSnippet) {
200
+ const lower = headSnippet.toLowerCase();
201
+ const charset = findTokenValue(headSnippet, lower, 'charset=');
202
+ return charset ? charset.toLowerCase() : undefined;
203
+ }
204
+ function extractXmlEncoding(headSnippet) {
205
+ const lower = headSnippet.toLowerCase();
206
+ const xmlStart = lower.indexOf('<?xml');
207
+ if (xmlStart === -1)
208
+ return undefined;
209
+ const xmlEnd = lower.indexOf('?>', xmlStart);
210
+ const declaration = xmlEnd === -1
211
+ ? headSnippet.slice(xmlStart)
212
+ : headSnippet.slice(xmlStart, xmlEnd + 2);
213
+ const declarationLower = declaration.toLowerCase();
214
+ const encoding = findTokenValue(declaration, declarationLower, 'encoding=');
215
+ return encoding ? encoding.toLowerCase() : undefined;
216
+ }
217
+ function detectHtmlDeclaredEncoding(buffer) {
218
+ const scanSize = Math.min(buffer.length, 8_192);
219
+ if (scanSize === 0)
220
+ return undefined;
221
+ const headSnippet = Buffer.from(buffer.buffer, buffer.byteOffset, scanSize).toString('latin1');
222
+ return extractHtmlCharset(headSnippet) ?? extractXmlEncoding(headSnippet);
223
+ }
224
+ function resolveEncoding(declaredEncoding, sample) {
225
+ const bomEncoding = detectBomEncoding(sample);
226
+ if (bomEncoding)
227
+ return bomEncoding;
228
+ if (declaredEncoding)
229
+ return declaredEncoding;
230
+ return detectHtmlDeclaredEncoding(sample);
231
+ }
232
+ const BINARY_SIGNATURES = [
233
+ [0x25, 0x50, 0x44, 0x46],
234
+ [0x89, 0x50, 0x4e, 0x47],
235
+ [0x47, 0x49, 0x46, 0x38],
236
+ [0xff, 0xd8, 0xff],
237
+ [0x52, 0x49, 0x46, 0x46],
238
+ [0x42, 0x4d],
239
+ [0x49, 0x49, 0x2a, 0x00],
240
+ [0x4d, 0x4d, 0x00, 0x2a],
241
+ [0x00, 0x00, 0x01, 0x00],
242
+ [0x50, 0x4b, 0x03, 0x04],
243
+ [0x1f, 0x8b],
244
+ [0x42, 0x5a, 0x68],
245
+ [0x52, 0x61, 0x72, 0x21],
246
+ [0x37, 0x7a, 0xbc, 0xaf],
247
+ [0x7f, 0x45, 0x4c, 0x46],
248
+ [0x4d, 0x5a],
249
+ [0xcf, 0xfa, 0xed, 0xfe],
250
+ [0x00, 0x61, 0x73, 0x6d],
251
+ [0x1a, 0x45, 0xdf, 0xa3],
252
+ [0x66, 0x74, 0x79, 0x70],
253
+ [0x46, 0x4c, 0x56],
254
+ [0x49, 0x44, 0x33],
255
+ [0xff, 0xfb],
256
+ [0xff, 0xfa],
257
+ [0x4f, 0x67, 0x67, 0x53],
258
+ [0x66, 0x4c, 0x61, 0x43],
259
+ [0x4d, 0x54, 0x68, 0x64],
260
+ [0x77, 0x4f, 0x46, 0x46],
261
+ [0x00, 0x01, 0x00, 0x00],
262
+ [0x4f, 0x54, 0x54, 0x4f],
263
+ [0x53, 0x51, 0x4c, 0x69],
264
+ ];
265
+ function hasNullByte(buffer, limit) {
266
+ const checkLen = Math.min(buffer.length, limit);
267
+ return buffer.subarray(0, checkLen).includes(0x00);
268
+ }
269
+ function isBinaryContent(buffer, encoding) {
270
+ for (const signature of BINARY_SIGNATURES) {
271
+ if (startsWithBytes(buffer, signature))
272
+ return true;
548
273
  }
274
+ return !isUnicodeWideEncoding(encoding) && hasNullByte(buffer, 1000);
549
275
  }
550
276
  function parseRetryAfter(header) {
551
277
  if (!header)
@@ -563,43 +289,31 @@ function parseRetryAfter(header) {
563
289
  return 0;
564
290
  return Math.ceil(deltaMs / 1000);
565
291
  }
566
- function createCanceledFetchError(url) {
567
- return new FetchError('Request was canceled', url, 499, {
568
- reason: 'aborted',
569
- });
570
- }
571
- function createTimeoutFetchError(url, timeoutMs) {
572
- return new FetchError(`Request timeout after ${timeoutMs}ms`, url, 504, {
573
- timeout: timeoutMs,
574
- });
575
- }
576
- function createRateLimitedFetchError(url, retryAfterHeader) {
577
- return new FetchError('Too many requests', url, 429, {
578
- retryAfter: parseRetryAfter(retryAfterHeader),
579
- });
580
- }
581
- function createHttpFetchError(url, status, statusText) {
582
- return new FetchError(`HTTP ${status}: ${statusText}`, url, status);
583
- }
584
- function createTooManyRedirectsFetchError(url) {
585
- return new FetchError('Too many redirects', url);
586
- }
587
- function createMissingRedirectLocationFetchError(url) {
588
- return new FetchError('Redirect response missing Location header', url);
589
- }
590
- function buildNetworkErrorMessage(url) {
591
- return `Network error: Could not reach ${url}`;
592
- }
593
- function createNetworkFetchError(url, message) {
594
- return new FetchError(buildNetworkErrorMessage(url), url, undefined, message ? { message } : {});
595
- }
596
- function createUnknownFetchError(url, message) {
597
- return new FetchError(message, url);
598
- }
599
- function createAbortedFetchError(url) {
600
- return new FetchError('Request was aborted during response read', url, 499, {
601
- reason: 'aborted',
602
- });
292
+ function createFetchError(input, url) {
293
+ switch (input.kind) {
294
+ case 'canceled':
295
+ return new FetchError('Request was canceled', url, 499, {
296
+ reason: 'aborted',
297
+ });
298
+ case 'aborted':
299
+ return new FetchError('Request was aborted during response read', url, 499, { reason: 'aborted' });
300
+ case 'timeout':
301
+ return new FetchError(`Request timeout after ${input.timeout}ms`, url, 504, { timeout: input.timeout });
302
+ case 'rate-limited':
303
+ return new FetchError('Too many requests', url, 429, {
304
+ retryAfter: parseRetryAfter(input.retryAfter),
305
+ });
306
+ case 'http':
307
+ return new FetchError(`HTTP ${input.status}: ${input.statusText}`, url, input.status);
308
+ case 'too-many-redirects':
309
+ return new FetchError('Too many redirects', url);
310
+ case 'missing-redirect-location':
311
+ return new FetchError('Redirect response missing Location header', url);
312
+ case 'network':
313
+ return new FetchError(`Network error: Could not reach ${url}`, url, undefined, { message: input.message });
314
+ case 'unknown':
315
+ return new FetchError(input.message ?? 'Unexpected error', url);
316
+ }
603
317
  }
604
318
  function isAbortError(error) {
605
319
  return (isError(error) &&
@@ -622,15 +336,15 @@ function mapFetchError(error, fallbackUrl, timeoutMs) {
622
336
  const url = resolveErrorUrl(error, fallbackUrl);
623
337
  if (isAbortError(error)) {
624
338
  return isTimeoutError(error)
625
- ? createTimeoutFetchError(url, timeoutMs)
626
- : createCanceledFetchError(url);
339
+ ? createFetchError({ kind: 'timeout', timeout: timeoutMs }, url)
340
+ : createFetchError({ kind: 'canceled' }, url);
627
341
  }
628
342
  if (!isError(error))
629
- return createUnknownFetchError(url, 'Unexpected error');
343
+ return createFetchError({ kind: 'unknown', message: 'Unexpected error' }, url);
630
344
  if (!isSystemError(error)) {
631
345
  const err = error;
632
346
  const causeStr = err.cause instanceof Error ? err.cause.message : String(err.cause);
633
- return createNetworkFetchError(url, `${err.message}. Cause: ${causeStr}`);
347
+ return createFetchError({ kind: 'network', message: `${err.message}. Cause: ${causeStr}` }, url);
634
348
  }
635
349
  const { code } = error;
636
350
  if (code === 'ETIMEOUT') {
@@ -643,158 +357,7 @@ function mapFetchError(error, fallbackUrl, timeoutMs) {
643
357
  code === 'EINVAL') {
644
358
  return new FetchError(error.message, url, 400, { code });
645
359
  }
646
- return new FetchError(buildNetworkErrorMessage(url), url, undefined, {
647
- code,
648
- message: error.message,
649
- });
650
- }
651
- const fetchChannel = diagnosticsChannel.channel('fetch-url-mcp.fetch');
652
- const SLOW_REQUEST_THRESHOLD_MS = 5000;
653
- class FetchTelemetry {
654
- logger;
655
- context;
656
- redactor;
657
- constructor(logger, context, redactor) {
658
- this.logger = logger;
659
- this.context = context;
660
- this.redactor = redactor;
661
- }
662
- redact(url) {
663
- return this.redactor.redact(url);
664
- }
665
- start(url, method) {
666
- const safeUrl = this.redactor.redact(url);
667
- const contextRequestId = this.context.getRequestId();
668
- const operationId = this.context.getOperationId();
669
- const ctx = {
670
- requestId: randomUUID(),
671
- startTime: performance.now(),
672
- url: safeUrl,
673
- method: method.toUpperCase(),
674
- };
675
- if (contextRequestId)
676
- ctx.contextRequestId = contextRequestId;
677
- if (operationId)
678
- ctx.operationId = operationId;
679
- const event = {
680
- v: 1,
681
- type: 'start',
682
- requestId: ctx.requestId,
683
- method: ctx.method,
684
- url: ctx.url,
685
- };
686
- if (ctx.contextRequestId)
687
- event.contextRequestId = ctx.contextRequestId;
688
- if (ctx.operationId)
689
- event.operationId = ctx.operationId;
690
- this.publish(event);
691
- const logData = {
692
- requestId: ctx.requestId,
693
- method: ctx.method,
694
- url: ctx.url,
695
- };
696
- if (ctx.contextRequestId)
697
- logData['contextRequestId'] = ctx.contextRequestId;
698
- if (ctx.operationId)
699
- logData['operationId'] = ctx.operationId;
700
- this.logger.debug('HTTP Request', logData);
701
- return ctx;
702
- }
703
- recordResponse(context, response, contentSize) {
704
- const duration = performance.now() - context.startTime;
705
- const durationLabel = `${Math.round(duration)}ms`;
706
- const event = {
707
- v: 1,
708
- type: 'end',
709
- requestId: context.requestId,
710
- status: response.status,
711
- duration,
712
- };
713
- if (context.contextRequestId)
714
- event.contextRequestId = context.contextRequestId;
715
- if (context.operationId)
716
- event.operationId = context.operationId;
717
- this.publish(event);
718
- const contentType = response.headers.get('content-type') ?? undefined;
719
- const contentLengthHeader = response.headers.get('content-length');
720
- const size = contentLengthHeader ??
721
- (contentSize === undefined ? undefined : String(contentSize));
722
- const logData = {
723
- requestId: context.requestId,
724
- status: response.status,
725
- url: context.url,
726
- duration: durationLabel,
727
- };
728
- if (context.contextRequestId)
729
- logData['contextRequestId'] = context.contextRequestId;
730
- if (context.operationId)
731
- logData['operationId'] = context.operationId;
732
- if (contentType)
733
- logData['contentType'] = contentType;
734
- if (size)
735
- logData['size'] = size;
736
- this.logger.debug('HTTP Response', logData);
737
- if (duration > SLOW_REQUEST_THRESHOLD_MS) {
738
- const warnData = {
739
- requestId: context.requestId,
740
- url: context.url,
741
- duration: durationLabel,
742
- };
743
- if (context.contextRequestId)
744
- warnData['contextRequestId'] = context.contextRequestId;
745
- if (context.operationId)
746
- warnData['operationId'] = context.operationId;
747
- this.logger.warn('Slow HTTP request detected', warnData);
748
- }
749
- }
750
- recordError(context, error, status) {
751
- const duration = performance.now() - context.startTime;
752
- const err = isError(error) ? error : new Error(String(error));
753
- const code = isSystemError(err) ? err.code : undefined;
754
- const event = {
755
- v: 1,
756
- type: 'error',
757
- requestId: context.requestId,
758
- url: context.url,
759
- error: err.message,
760
- duration,
761
- };
762
- if (code !== undefined)
763
- event.code = code;
764
- if (status !== undefined)
765
- event.status = status;
766
- if (context.contextRequestId)
767
- event.contextRequestId = context.contextRequestId;
768
- if (context.operationId)
769
- event.operationId = context.operationId;
770
- this.publish(event);
771
- const logData = {
772
- requestId: context.requestId,
773
- url: context.url,
774
- status,
775
- code,
776
- error: err.message,
777
- };
778
- if (context.contextRequestId)
779
- logData['contextRequestId'] = context.contextRequestId;
780
- if (context.operationId)
781
- logData['operationId'] = context.operationId;
782
- if (status === 429) {
783
- this.logger.warn('HTTP Request Error', logData);
784
- return;
785
- }
786
- this.logger.error('HTTP Request Error', logData);
787
- }
788
- publish(event) {
789
- if (!fetchChannel.hasSubscribers)
790
- return;
791
- try {
792
- fetchChannel.publish(event);
793
- }
794
- catch {
795
- // Best-effort telemetry; never crash request path.
796
- }
797
- }
360
+ return createFetchError({ kind: 'network', message: error.message }, url);
798
361
  }
799
362
  const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
800
363
  function isRedirectStatus(status) {
@@ -835,7 +398,7 @@ class RedirectFollower {
835
398
  return { response, url: currentUrl };
836
399
  currentUrl = nextUrl;
837
400
  }
838
- throw createTooManyRedirectsFetchError(currentUrl);
401
+ throw createFetchError({ kind: 'too-many-redirects' }, currentUrl);
839
402
  }
840
403
  async performFetchCycle(currentUrl, init, redirectLimit, redirectCount, ipAddress) {
841
404
  const fetchInit = {
@@ -843,6 +406,7 @@ class RedirectFollower {
843
406
  redirect: 'manual',
844
407
  };
845
408
  if (ipAddress) {
409
+ const ca = tls.rootCertificates.length > 0 ? tls.rootCertificates : undefined;
846
410
  const agent = new Agent({
847
411
  connect: {
848
412
  lookup: (hostname, options, callback) => {
@@ -854,6 +418,8 @@ class RedirectFollower {
854
418
  callback(null, ipAddress, family);
855
419
  }
856
420
  },
421
+ timeout: config.fetcher.timeout,
422
+ ...(ca ? { ca } : {}),
857
423
  },
858
424
  pipelining: 1,
859
425
  connections: 1,
@@ -865,7 +431,10 @@ class RedirectFollower {
865
431
  const response = await this.fetchFn(currentUrl, fetchInit);
866
432
  if (!isRedirectStatus(response.status))
867
433
  return { response };
868
- this.assertRedirectWithinLimit(response, currentUrl, redirectLimit, redirectCount);
434
+ if (redirectCount >= redirectLimit) {
435
+ cancelResponseBody(response);
436
+ throw createFetchError({ kind: 'too-many-redirects' }, currentUrl);
437
+ }
869
438
  const location = this.getRedirectLocation(response, currentUrl);
870
439
  cancelResponseBody(response);
871
440
  const nextUrl = this.resolveRedirectTarget(currentUrl, location);
@@ -874,221 +443,57 @@ class RedirectFollower {
874
443
  parsedNextUrl.protocol !== 'https:') {
875
444
  throw createErrorWithCode(`Unsupported redirect protocol: ${parsedNextUrl.protocol}`, 'EUNSUPPORTEDPROTOCOL');
876
445
  }
877
- return {
878
- response,
879
- nextUrl,
880
- };
881
- }
882
- assertRedirectWithinLimit(response, currentUrl, redirectLimit, redirectCount) {
883
- if (redirectCount < redirectLimit)
884
- return;
885
- cancelResponseBody(response);
886
- throw createTooManyRedirectsFetchError(currentUrl);
887
- }
888
- getRedirectLocation(response, currentUrl) {
889
- const location = response.headers.get('location');
890
- if (location)
891
- return location;
892
- cancelResponseBody(response);
893
- throw createMissingRedirectLocationFetchError(currentUrl);
894
- }
895
- resolveRedirectTarget(baseUrl, location) {
896
- let resolved;
897
- try {
898
- resolved = new URL(location, baseUrl);
899
- }
900
- catch {
901
- throw createErrorWithCode('Invalid redirect target', 'EBADREDIRECT');
902
- }
903
- if (resolved.username || resolved.password) {
904
- throw createErrorWithCode('Redirect target includes credentials', 'EBADREDIRECT');
905
- }
906
- return this.normalizeUrl(resolved.href);
907
- }
908
- annotateRedirectError(error, url) {
909
- if (!isObject(error))
910
- return;
911
- error['requestUrl'] = url;
912
- }
913
- async withRedirectErrorContext(url, fn) {
914
- try {
915
- return await fn();
916
- }
917
- catch (error) {
918
- this.annotateRedirectError(error, url);
919
- throw error;
920
- }
921
- }
922
- }
923
- class ResponseTextReader {
924
- async read(response, url, maxBytes, signal, encoding) {
925
- const { buffer, encoding: effectiveEncoding, truncated, } = await this.readBuffer(response, url, maxBytes, signal, encoding);
926
- const text = decodeBuffer(buffer, effectiveEncoding);
927
- return { text, size: buffer.byteLength, truncated };
928
- }
929
- async readBuffer(response, url, maxBytes, signal, encoding) {
930
- if (signal?.aborted) {
931
- cancelResponseBody(response);
932
- throw createAbortedFetchError(url);
933
- }
934
- if (!response.body) {
935
- return this.readNonStreamBuffer(response, url, maxBytes, signal, encoding);
936
- }
937
- return this.readStreamToBuffer(response.body, url, maxBytes, signal, encoding);
938
- }
939
- async readNonStreamBuffer(response, url, maxBytes, signal, encoding) {
940
- if (signal?.aborted)
941
- throw createCanceledFetchError(url);
942
- const limit = maxBytes <= 0 ? Number.POSITIVE_INFINITY : maxBytes;
943
- let buffer;
944
- let truncated = false;
945
- try {
946
- // Try safe blob slicing if available (Node 18+) to avoid OOM
947
- const blob = await response.blob();
948
- if (Number.isFinite(limit) && blob.size > limit) {
949
- const sliced = blob.slice(0, limit);
950
- buffer = new Uint8Array(await sliced.arrayBuffer());
951
- truncated = true;
952
- }
953
- else {
954
- buffer = new Uint8Array(await blob.arrayBuffer());
955
- }
956
- }
957
- catch {
958
- // Fallback if blob() fails
959
- const arrayBuffer = await response.arrayBuffer();
960
- const length = Math.min(arrayBuffer.byteLength, limit);
961
- buffer = new Uint8Array(arrayBuffer, 0, length);
962
- truncated = Number.isFinite(limit) && arrayBuffer.byteLength > limit;
963
- }
964
- const effectiveEncoding = resolveEncoding(encoding, buffer) ?? encoding ?? 'utf-8';
965
- if (isBinaryContent(buffer, effectiveEncoding)) {
966
- throw new FetchError('Detailed content type check failed: binary content detected', url, 500, { reason: 'binary_content_detected' });
967
- }
968
- return {
969
- buffer,
970
- encoding: effectiveEncoding,
971
- size: buffer.byteLength,
972
- truncated,
973
- };
974
- }
975
- async readStreamToBuffer(stream, url, maxBytes, signal, encoding) {
976
- const byteLimit = maxBytes <= 0 ? Number.POSITIVE_INFINITY : maxBytes;
977
- const captureChunks = byteLimit !== Number.POSITIVE_INFINITY;
978
- let effectiveEncoding = encoding ?? 'utf-8';
979
- let encodingResolved = false;
980
- let total = 0;
981
- const chunks = [];
982
- const source = Readable.fromWeb(toNodeReadableStream(stream, url, 'response:read-stream-buffer'));
983
- const guard = new Transform({
984
- transform(chunk, _encoding, callback) {
985
- try {
986
- const buf = Buffer.isBuffer(chunk)
987
- ? chunk
988
- : Buffer.from(chunk.buffer, chunk.byteOffset, chunk.byteLength);
989
- if (!encodingResolved) {
990
- encodingResolved = true;
991
- effectiveEncoding =
992
- resolveEncoding(encoding, buf) ?? encoding ?? 'utf-8';
993
- }
994
- if (isBinaryContent(buf, effectiveEncoding)) {
995
- callback(new FetchError('Detailed content type check failed: binary content detected', url, 500, { reason: 'binary_content_detected' }));
996
- return;
997
- }
998
- const newTotal = total + buf.length;
999
- if (newTotal > byteLimit) {
1000
- const remaining = byteLimit - total;
1001
- if (remaining > 0) {
1002
- const slice = buf.subarray(0, remaining);
1003
- total += remaining;
1004
- if (captureChunks)
1005
- chunks.push(slice);
1006
- this.push(slice);
1007
- }
1008
- callback(new MaxBytesError());
1009
- return;
1010
- }
1011
- total = newTotal;
1012
- if (captureChunks)
1013
- chunks.push(buf);
1014
- callback(null, buf);
1015
- }
1016
- catch (error) {
1017
- callback(error instanceof Error ? error : new Error(String(error)));
1018
- }
1019
- },
1020
- });
1021
- const guarded = source.pipe(guard);
1022
- const abortHandler = () => {
1023
- source.destroy();
1024
- guard.destroy();
446
+ return {
447
+ response,
448
+ nextUrl,
1025
449
  };
1026
- if (signal) {
1027
- signal.addEventListener('abort', abortHandler, { once: true });
450
+ }
451
+ getRedirectLocation(response, currentUrl) {
452
+ const location = response.headers.get('location');
453
+ if (location)
454
+ return location;
455
+ cancelResponseBody(response);
456
+ throw createFetchError({ kind: 'missing-redirect-location' }, currentUrl);
457
+ }
458
+ resolveRedirectTarget(baseUrl, location) {
459
+ let resolved;
460
+ try {
461
+ resolved = new URL(location, baseUrl);
1028
462
  }
463
+ catch {
464
+ throw createErrorWithCode('Invalid redirect target', 'EBADREDIRECT');
465
+ }
466
+ if (resolved.username || resolved.password) {
467
+ throw createErrorWithCode('Redirect target includes credentials', 'EBADREDIRECT');
468
+ }
469
+ return this.normalizeUrl(resolved.href);
470
+ }
471
+ annotateRedirectError(error, url) {
472
+ if (!isObject(error))
473
+ return;
474
+ error['requestUrl'] = url;
475
+ }
476
+ async withRedirectErrorContext(url, fn) {
1029
477
  try {
1030
- const buffer = await consumeBuffer(guarded);
1031
- return {
1032
- buffer,
1033
- encoding: effectiveEncoding,
1034
- size: total,
1035
- truncated: false,
1036
- };
478
+ return await fn();
1037
479
  }
1038
480
  catch (error) {
1039
- if (signal?.aborted)
1040
- throw createAbortedFetchError(url);
1041
- if (error instanceof FetchError)
1042
- throw error;
1043
- if (error instanceof MaxBytesError) {
1044
- source.destroy();
1045
- guard.destroy();
1046
- return {
1047
- buffer: Buffer.concat(chunks, total),
1048
- encoding: effectiveEncoding,
1049
- size: total,
1050
- truncated: true,
1051
- };
1052
- }
481
+ this.annotateRedirectError(error, url);
1053
482
  throw error;
1054
483
  }
1055
- finally {
1056
- if (signal) {
1057
- signal.removeEventListener('abort', abortHandler);
1058
- }
1059
- }
1060
484
  }
1061
485
  }
1062
- const DEFAULT_HEADERS = {
1063
- 'User-Agent': config.fetcher.userAgent,
1064
- Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
1065
- 'Accept-Language': 'en-US,en;q=0.5',
1066
- 'Accept-Encoding': 'gzip, deflate, br',
1067
- Connection: 'keep-alive',
1068
- };
1069
- function buildHeaders() {
1070
- return DEFAULT_HEADERS;
1071
- }
1072
- function buildRequestSignal(timeoutMs, external) {
1073
- if (timeoutMs <= 0)
1074
- return external;
1075
- const timeoutSignal = AbortSignal.timeout(timeoutMs);
1076
- return external ? AbortSignal.any([external, timeoutSignal]) : timeoutSignal;
1077
- }
1078
- function buildRequestInit(headers, signal) {
1079
- return {
1080
- method: 'GET',
1081
- headers,
1082
- ...(signal ? { signal } : {}),
1083
- };
1084
- }
1085
486
  function resolveResponseError(response, finalUrl) {
1086
487
  if (response.status === 429) {
1087
- return createRateLimitedFetchError(finalUrl, response.headers.get('retry-after'));
488
+ return createFetchError({ kind: 'rate-limited', retryAfter: response.headers.get('retry-after') }, finalUrl);
1088
489
  }
1089
490
  return response.ok
1090
491
  ? null
1091
- : createHttpFetchError(finalUrl, response.status, response.statusText);
492
+ : createFetchError({
493
+ kind: 'http',
494
+ status: response.status,
495
+ statusText: response.statusText,
496
+ }, finalUrl);
1092
497
  }
1093
498
  function resolveMediaType(contentType) {
1094
499
  if (!contentType)
@@ -1220,85 +625,224 @@ async function decodeResponseIfNeeded(response, url, signal) {
1220
625
  if (!isSupportedContentEncoding(encoding)) {
1221
626
  throw createUnsupportedContentEncodingError(url, encodingHeader ?? encoding);
1222
627
  }
1223
- }
1224
- if (!response.body)
1225
- return response;
1226
- const [decodeBranch, passthroughBranch] = response.body.tee();
1227
- const decodeOrder = encodings
1228
- .slice()
1229
- .reverse()
1230
- .filter(isSupportedContentEncoding);
1231
- const decompressors = decodeOrder.map((encoding) => createDecompressor(encoding));
1232
- const decodeSource = Readable.fromWeb(toNodeReadableStream(decodeBranch, url, 'response:decode-content-encoding'));
1233
- const decodedNodeStream = new PassThrough();
1234
- const decodedPipeline = pipeline([
1235
- decodeSource,
1236
- ...decompressors,
1237
- decodedNodeStream,
1238
- ]);
1239
- const headers = new Headers(response.headers);
1240
- headers.delete('content-encoding');
1241
- headers.delete('content-length');
1242
- const abortDecodePipeline = () => {
1243
- decodeSource.destroy();
1244
- for (const decompressor of decompressors) {
1245
- decompressor.destroy();
628
+ }
629
+ if (!response.body)
630
+ return response;
631
+ const [decodeBranch, passthroughBranch] = response.body.tee();
632
+ const decodeOrder = encodings
633
+ .slice()
634
+ .reverse()
635
+ .filter(isSupportedContentEncoding);
636
+ const decompressors = decodeOrder.map((encoding) => createDecompressor(encoding));
637
+ const decodeSource = Readable.fromWeb(toNodeReadableStream(decodeBranch, url, 'response:decode-content-encoding'));
638
+ const decodedNodeStream = new PassThrough();
639
+ const decodedPipeline = pipeline([
640
+ decodeSource,
641
+ ...decompressors,
642
+ decodedNodeStream,
643
+ ]);
644
+ const headers = new Headers(response.headers);
645
+ headers.delete('content-encoding');
646
+ headers.delete('content-length');
647
+ const abortDecodePipeline = () => {
648
+ decodeSource.destroy();
649
+ for (const decompressor of decompressors) {
650
+ decompressor.destroy();
651
+ }
652
+ decodedNodeStream.destroy();
653
+ };
654
+ if (signal) {
655
+ signal.addEventListener('abort', abortDecodePipeline, { once: true });
656
+ }
657
+ void decodedPipeline.catch((error) => {
658
+ decodedNodeStream.destroy(toError(error));
659
+ });
660
+ const decodedBodyStream = toWebReadableStream(decodedNodeStream, url, 'response:decode-content-encoding');
661
+ const decodedReader = decodedBodyStream.getReader();
662
+ const clearAbortListener = () => {
663
+ if (!signal)
664
+ return;
665
+ signal.removeEventListener('abort', abortDecodePipeline);
666
+ };
667
+ try {
668
+ const first = await decodedReader.read();
669
+ if (first.done) {
670
+ clearAbortListener();
671
+ void passthroughBranch.cancel().catch(() => undefined);
672
+ return new Response(null, {
673
+ status: response.status,
674
+ statusText: response.statusText,
675
+ headers,
676
+ });
677
+ }
678
+ void passthroughBranch.cancel().catch(() => undefined);
679
+ const body = createPumpedStream(first.value, decodedReader);
680
+ if (signal) {
681
+ void finished(decodedNodeStream, { cleanup: true })
682
+ .catch(() => { })
683
+ .finally(() => {
684
+ clearAbortListener();
685
+ });
686
+ }
687
+ return new Response(body, {
688
+ status: response.status,
689
+ statusText: response.statusText,
690
+ headers,
691
+ });
692
+ }
693
+ catch (error) {
694
+ clearAbortListener();
695
+ abortDecodePipeline();
696
+ void decodedReader.cancel(error).catch(() => undefined);
697
+ logDebug('Content-Encoding decode failed; using passthrough body', {
698
+ url: redactUrl(url),
699
+ encoding: encodingHeader ?? encodings.join(','),
700
+ error: isError(error) ? error.message : String(error),
701
+ });
702
+ return new Response(passthroughBranch, {
703
+ status: response.status,
704
+ statusText: response.statusText,
705
+ headers,
706
+ });
707
+ }
708
+ }
709
+ class ResponseTextReader {
710
+ async read(response, url, maxBytes, signal, encoding) {
711
+ const { buffer, encoding: effectiveEncoding, truncated, } = await this.readBuffer(response, url, maxBytes, signal, encoding);
712
+ const text = decodeBuffer(buffer, effectiveEncoding);
713
+ return { text, size: buffer.byteLength, truncated };
714
+ }
715
+ async readBuffer(response, url, maxBytes, signal, encoding) {
716
+ if (signal?.aborted) {
717
+ cancelResponseBody(response);
718
+ throw createFetchError({ kind: 'aborted' }, url);
719
+ }
720
+ if (!response.body) {
721
+ return this.readNonStreamBuffer(response, url, maxBytes, signal, encoding);
722
+ }
723
+ return this.readStreamToBuffer(response.body, url, maxBytes, signal, encoding);
724
+ }
725
+ async readNonStreamBuffer(response, url, maxBytes, signal, encoding) {
726
+ if (signal?.aborted)
727
+ throw createFetchError({ kind: 'canceled' }, url);
728
+ const limit = maxBytes <= 0 ? Number.POSITIVE_INFINITY : maxBytes;
729
+ let buffer;
730
+ let truncated = false;
731
+ try {
732
+ // Try safe blob slicing if available (Node 18+) to avoid OOM
733
+ const blob = await response.blob();
734
+ if (Number.isFinite(limit) && blob.size > limit) {
735
+ const sliced = blob.slice(0, limit);
736
+ buffer = new Uint8Array(await sliced.arrayBuffer());
737
+ truncated = true;
738
+ }
739
+ else {
740
+ buffer = new Uint8Array(await blob.arrayBuffer());
741
+ }
742
+ }
743
+ catch {
744
+ // Fallback if blob() fails
745
+ const arrayBuffer = await response.arrayBuffer();
746
+ const length = Math.min(arrayBuffer.byteLength, limit);
747
+ buffer = new Uint8Array(arrayBuffer, 0, length);
748
+ truncated = Number.isFinite(limit) && arrayBuffer.byteLength > limit;
749
+ }
750
+ const effectiveEncoding = resolveEncoding(encoding, buffer) ?? encoding ?? 'utf-8';
751
+ if (isBinaryContent(buffer, effectiveEncoding)) {
752
+ throw new FetchError('Detailed content type check failed: binary content detected', url, 500, { reason: 'binary_content_detected' });
753
+ }
754
+ return {
755
+ buffer,
756
+ encoding: effectiveEncoding,
757
+ size: buffer.byteLength,
758
+ truncated,
759
+ };
760
+ }
761
+ async readStreamToBuffer(stream, url, maxBytes, signal, encoding) {
762
+ const byteLimit = maxBytes <= 0 ? Number.POSITIVE_INFINITY : maxBytes;
763
+ const captureChunks = byteLimit !== Number.POSITIVE_INFINITY;
764
+ let effectiveEncoding = encoding ?? 'utf-8';
765
+ let encodingResolved = false;
766
+ let total = 0;
767
+ const chunks = [];
768
+ const source = Readable.fromWeb(toNodeReadableStream(stream, url, 'response:read-stream-buffer'));
769
+ const guard = new Transform({
770
+ transform(chunk, _encoding, callback) {
771
+ try {
772
+ const buf = Buffer.isBuffer(chunk)
773
+ ? chunk
774
+ : Buffer.from(chunk.buffer, chunk.byteOffset, chunk.byteLength);
775
+ if (!encodingResolved) {
776
+ encodingResolved = true;
777
+ effectiveEncoding =
778
+ resolveEncoding(encoding, buf) ?? encoding ?? 'utf-8';
779
+ }
780
+ if (isBinaryContent(buf, effectiveEncoding)) {
781
+ callback(new FetchError('Detailed content type check failed: binary content detected', url, 500, { reason: 'binary_content_detected' }));
782
+ return;
783
+ }
784
+ const newTotal = total + buf.length;
785
+ if (newTotal > byteLimit) {
786
+ const remaining = byteLimit - total;
787
+ if (remaining > 0) {
788
+ const slice = buf.subarray(0, remaining);
789
+ total += remaining;
790
+ if (captureChunks)
791
+ chunks.push(slice);
792
+ this.push(slice);
793
+ }
794
+ callback(new MaxBytesError());
795
+ return;
796
+ }
797
+ total = newTotal;
798
+ if (captureChunks)
799
+ chunks.push(buf);
800
+ callback(null, buf);
801
+ }
802
+ catch (error) {
803
+ callback(toError(error));
804
+ }
805
+ },
806
+ });
807
+ const guarded = source.pipe(guard);
808
+ const abortHandler = () => {
809
+ source.destroy();
810
+ guard.destroy();
811
+ };
812
+ if (signal) {
813
+ signal.addEventListener('abort', abortHandler, { once: true });
814
+ }
815
+ try {
816
+ const buffer = await consumeBuffer(guarded);
817
+ return {
818
+ buffer,
819
+ encoding: effectiveEncoding,
820
+ size: total,
821
+ truncated: false,
822
+ };
1246
823
  }
1247
- decodedNodeStream.destroy();
1248
- };
1249
- if (signal) {
1250
- signal.addEventListener('abort', abortDecodePipeline, { once: true });
1251
- }
1252
- void decodedPipeline.catch((error) => {
1253
- decodedNodeStream.destroy(error instanceof Error ? error : new Error(String(error)));
1254
- });
1255
- const decodedBodyStream = toWebReadableStream(decodedNodeStream, url, 'response:decode-content-encoding');
1256
- const decodedReader = decodedBodyStream.getReader();
1257
- const clearAbortListener = () => {
1258
- if (!signal)
1259
- return;
1260
- signal.removeEventListener('abort', abortDecodePipeline);
1261
- };
1262
- try {
1263
- const first = await decodedReader.read();
1264
- if (first.done) {
1265
- clearAbortListener();
1266
- void passthroughBranch.cancel().catch(() => undefined);
1267
- return new Response(null, {
1268
- status: response.status,
1269
- statusText: response.statusText,
1270
- headers,
1271
- });
824
+ catch (error) {
825
+ if (signal?.aborted)
826
+ throw createFetchError({ kind: 'aborted' }, url);
827
+ if (error instanceof FetchError)
828
+ throw error;
829
+ if (error instanceof MaxBytesError) {
830
+ source.destroy();
831
+ guard.destroy();
832
+ return {
833
+ buffer: Buffer.concat(chunks, total),
834
+ encoding: effectiveEncoding,
835
+ size: total,
836
+ truncated: true,
837
+ };
838
+ }
839
+ throw error;
1272
840
  }
1273
- void passthroughBranch.cancel().catch(() => undefined);
1274
- const body = createPumpedStream(first.value, decodedReader);
1275
- if (signal) {
1276
- void finished(decodedNodeStream, { cleanup: true })
1277
- .catch(() => { })
1278
- .finally(() => {
1279
- clearAbortListener();
1280
- });
841
+ finally {
842
+ if (signal) {
843
+ signal.removeEventListener('abort', abortHandler);
844
+ }
1281
845
  }
1282
- return new Response(body, {
1283
- status: response.status,
1284
- statusText: response.statusText,
1285
- headers,
1286
- });
1287
- }
1288
- catch (error) {
1289
- clearAbortListener();
1290
- abortDecodePipeline();
1291
- void decodedReader.cancel(error).catch(() => undefined);
1292
- logDebug('Content-Encoding decode failed; using passthrough body', {
1293
- url: redactUrl(url),
1294
- encoding: encodingHeader ?? encodings.join(','),
1295
- error: isError(error) ? error.message : String(error),
1296
- });
1297
- return new Response(passthroughBranch, {
1298
- status: response.status,
1299
- statusText: response.statusText,
1300
- headers,
1301
- });
1302
846
  }
1303
847
  }
1304
848
  async function readAndRecordDecodedResponse(response, finalUrl, ctx, telemetry, reader, maxBytes, mode, signal) {
@@ -1320,20 +864,172 @@ async function readAndRecordDecodedResponse(response, finalUrl, ctx, telemetry,
1320
864
  telemetry.recordResponse(ctx, decodedResponse, size);
1321
865
  return { kind: 'buffer', buffer, encoding, size, truncated };
1322
866
  }
1323
- function extractHostname(url) {
1324
- try {
1325
- return new URL(url).hostname;
867
+ function isReadableStreamLike(value) {
868
+ if (!isObject(value))
869
+ return false;
870
+ return (typeof value['getReader'] === 'function' &&
871
+ typeof value['cancel'] === 'function' &&
872
+ typeof value['tee'] === 'function' &&
873
+ typeof value['locked'] === 'boolean');
874
+ }
875
+ function assertReadableStreamLike(stream, url, stage) {
876
+ if (isReadableStreamLike(stream))
877
+ return;
878
+ throw new FetchError('Invalid response stream', url, 500, {
879
+ reason: 'invalid_stream',
880
+ stage,
881
+ });
882
+ }
883
+ function toNodeReadableStream(stream, url, stage) {
884
+ assertReadableStreamLike(stream, url, stage);
885
+ return stream;
886
+ }
887
+ function toWebReadableStream(stream, url, stage) {
888
+ const converted = Readable.toWeb(stream);
889
+ assertReadableStreamLike(converted, url, stage);
890
+ return converted;
891
+ }
892
+ const fetchChannel = diagnosticsChannel.channel('fetch-url-mcp.fetch');
893
+ const SLOW_REQUEST_THRESHOLD_MS = 5000;
894
+ class FetchTelemetry {
895
+ logger;
896
+ context;
897
+ redactor;
898
+ constructor(logger, context, redactor) {
899
+ this.logger = logger;
900
+ this.context = context;
901
+ this.redactor = redactor;
1326
902
  }
1327
- catch {
1328
- throw createErrorWithCode('Invalid URL', 'EINVAL');
903
+ redact(url) {
904
+ return this.redactor.redact(url);
905
+ }
906
+ contextFields(ctx) {
907
+ return {
908
+ ...(ctx.contextRequestId
909
+ ? { contextRequestId: ctx.contextRequestId }
910
+ : {}),
911
+ ...(ctx.operationId ? { operationId: ctx.operationId } : {}),
912
+ };
913
+ }
914
+ start(url, method) {
915
+ const safeUrl = this.redactor.redact(url);
916
+ const contextRequestId = this.context.getRequestId();
917
+ const operationId = this.context.getOperationId();
918
+ const ctx = {
919
+ requestId: randomUUID(),
920
+ startTime: performance.now(),
921
+ url: safeUrl,
922
+ method: method.toUpperCase(),
923
+ };
924
+ if (contextRequestId)
925
+ ctx.contextRequestId = contextRequestId;
926
+ if (operationId)
927
+ ctx.operationId = operationId;
928
+ const ctxFields = this.contextFields(ctx);
929
+ this.publish({
930
+ v: 1,
931
+ type: 'start',
932
+ requestId: ctx.requestId,
933
+ method: ctx.method,
934
+ url: ctx.url,
935
+ ...ctxFields,
936
+ });
937
+ this.logger.debug('HTTP Request', {
938
+ requestId: ctx.requestId,
939
+ method: ctx.method,
940
+ url: ctx.url,
941
+ ...ctxFields,
942
+ });
943
+ return ctx;
944
+ }
945
+ recordResponse(context, response, contentSize) {
946
+ const duration = performance.now() - context.startTime;
947
+ const durationLabel = `${Math.round(duration)}ms`;
948
+ const ctxFields = this.contextFields(context);
949
+ this.publish({
950
+ v: 1,
951
+ type: 'end',
952
+ requestId: context.requestId,
953
+ status: response.status,
954
+ duration,
955
+ ...ctxFields,
956
+ });
957
+ const contentType = response.headers.get('content-type') ?? undefined;
958
+ const contentLengthHeader = response.headers.get('content-length');
959
+ const size = contentLengthHeader ??
960
+ (contentSize === undefined ? undefined : String(contentSize));
961
+ this.logger.debug('HTTP Response', {
962
+ requestId: context.requestId,
963
+ status: response.status,
964
+ url: context.url,
965
+ duration: durationLabel,
966
+ ...ctxFields,
967
+ ...(contentType ? { contentType } : {}),
968
+ ...(size ? { size } : {}),
969
+ });
970
+ if (duration > SLOW_REQUEST_THRESHOLD_MS) {
971
+ this.logger.warn('Slow HTTP request detected', {
972
+ requestId: context.requestId,
973
+ url: context.url,
974
+ duration: durationLabel,
975
+ ...ctxFields,
976
+ });
977
+ }
978
+ }
979
+ recordError(context, error, status) {
980
+ const duration = performance.now() - context.startTime;
981
+ const err = toError(error);
982
+ const code = isSystemError(err) ? err.code : undefined;
983
+ const ctxFields = this.contextFields(context);
984
+ this.publish({
985
+ v: 1,
986
+ type: 'error',
987
+ requestId: context.requestId,
988
+ url: context.url,
989
+ error: err.message,
990
+ duration,
991
+ ...(code !== undefined ? { code } : {}),
992
+ ...(status !== undefined ? { status } : {}),
993
+ ...ctxFields,
994
+ });
995
+ const logData = {
996
+ requestId: context.requestId,
997
+ url: context.url,
998
+ status,
999
+ code,
1000
+ error: err.message,
1001
+ ...ctxFields,
1002
+ };
1003
+ if (status === 429) {
1004
+ this.logger.warn('HTTP Request Error', logData);
1005
+ return;
1006
+ }
1007
+ this.logger.error('HTTP Request Error', logData);
1008
+ }
1009
+ publish(event) {
1010
+ if (!fetchChannel.hasSubscribers)
1011
+ return;
1012
+ try {
1013
+ fetchChannel.publish(event);
1014
+ }
1015
+ catch {
1016
+ // Best-effort telemetry; never crash request path.
1017
+ }
1329
1018
  }
1330
1019
  }
1331
- function createDnsPreflight(dnsResolver) {
1332
- return async (url, signal) => {
1333
- const hostname = extractHostname(url);
1334
- return await dnsResolver.resolveAndValidate(hostname, signal);
1335
- };
1336
- }
1020
+ const defaultLogger = {
1021
+ debug: logDebug,
1022
+ warn: logWarn,
1023
+ error: logError,
1024
+ };
1025
+ const defaultContext = {
1026
+ getRequestId,
1027
+ getOperationId,
1028
+ };
1029
+ const defaultRedactor = {
1030
+ redact: redactUrl,
1031
+ };
1032
+ const defaultFetch = (input, init) => globalThis.fetch(input, init);
1337
1033
  class HttpFetcher {
1338
1034
  fetcherConfig;
1339
1035
  redirectFollower;
@@ -1387,6 +1083,29 @@ class HttpFetcher {
1387
1083
  }
1388
1084
  }
1389
1085
  }
1086
+ const DEFAULT_HEADERS = {
1087
+ 'User-Agent': config.fetcher.userAgent,
1088
+ Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
1089
+ 'Accept-Language': 'en-US,en;q=0.5',
1090
+ 'Accept-Encoding': 'gzip, deflate, br',
1091
+ Connection: 'keep-alive',
1092
+ };
1093
+ function buildHeaders() {
1094
+ return DEFAULT_HEADERS;
1095
+ }
1096
+ function buildRequestSignal(timeoutMs, external) {
1097
+ if (timeoutMs <= 0)
1098
+ return external;
1099
+ const timeoutSignal = AbortSignal.timeout(timeoutMs);
1100
+ return external ? AbortSignal.any([external, timeoutSignal]) : timeoutSignal;
1101
+ }
1102
+ function buildRequestInit(headers, signal) {
1103
+ return {
1104
+ method: 'GET',
1105
+ headers,
1106
+ ...(signal ? { signal } : {}),
1107
+ };
1108
+ }
1390
1109
  const ipBlocker = new IpBlocker(config.security);
1391
1110
  const urlNormalizer = new UrlNormalizer(config.constants, config.security, ipBlocker, BLOCKED_HOST_SUFFIXES);
1392
1111
  const rawUrlTransformer = new RawUrlTransformer(defaultLogger);
@@ -1394,7 +1113,6 @@ const dnsResolver = new SafeDnsResolver(ipBlocker, config.security, BLOCKED_HOST
1394
1113
  const telemetry = new FetchTelemetry(defaultLogger, defaultContext, defaultRedactor);
1395
1114
  const normalizeRedirectUrl = (url) => urlNormalizer.validateAndNormalize(url);
1396
1115
  const dnsPreflight = createDnsPreflight(dnsResolver);
1397
- // Redirect follower with per-hop DNS preflight.
1398
1116
  const secureRedirectFollower = new RedirectFollower(defaultFetch, normalizeRedirectUrl, dnsPreflight);
1399
1117
  const responseReader = new ResponseTextReader();
1400
1118
  const httpFetcher = new HttpFetcher(config.fetcher, secureRedirectFollower, responseReader, telemetry);
@@ -1436,4 +1154,3 @@ export async function fetchNormalizedUrl(normalizedUrl, options) {
1436
1154
  export async function fetchNormalizedUrlBuffer(normalizedUrl, options) {
1437
1155
  return httpFetcher.fetchNormalizedUrlBuffer(normalizedUrl, options);
1438
1156
  }
1439
- //# sourceMappingURL=fetch.js.map