@j0hanz/fetch-url-mcp 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. package/dist/cli.d.ts +2 -3
  2. package/dist/cli.js +1 -2
  3. package/dist/http/auth.d.ts +5 -3
  4. package/dist/http/auth.js +64 -15
  5. package/dist/http/health.d.ts +1 -2
  6. package/dist/http/health.js +7 -18
  7. package/dist/http/helpers.d.ts +3 -4
  8. package/dist/http/helpers.js +21 -21
  9. package/dist/http/native.d.ts +0 -1
  10. package/dist/http/native.js +34 -26
  11. package/dist/http/rate-limit.d.ts +0 -1
  12. package/dist/http/rate-limit.js +3 -4
  13. package/dist/index.d.ts +0 -1
  14. package/dist/index.js +17 -18
  15. package/dist/lib/{markdown-cleanup.d.ts → content.d.ts} +4 -2
  16. package/dist/lib/content.js +1356 -0
  17. package/dist/lib/core.d.ts +253 -0
  18. package/dist/lib/core.js +1228 -0
  19. package/dist/lib/{tool-pipeline.d.ts → fetch-pipeline.d.ts} +1 -2
  20. package/dist/lib/{tool-pipeline.js → fetch-pipeline.js} +10 -19
  21. package/dist/lib/{fetch.d.ts → http.d.ts} +7 -9
  22. package/dist/lib/{fetch.js → http.js} +706 -944
  23. package/dist/lib/mcp-tools.d.ts +28 -0
  24. package/dist/lib/mcp-tools.js +107 -0
  25. package/dist/lib/{tool-progress.d.ts → progress.d.ts} +0 -1
  26. package/dist/lib/{tool-progress.js → progress.js} +8 -13
  27. package/dist/lib/task-handlers.d.ts +5 -0
  28. package/dist/lib/{mcp.js → task-handlers.js} +56 -12
  29. package/dist/lib/url.d.ts +70 -0
  30. package/dist/lib/url.js +686 -0
  31. package/dist/lib/utils.d.ts +58 -0
  32. package/dist/lib/utils.js +304 -0
  33. package/dist/prompts/index.d.ts +0 -1
  34. package/dist/prompts/index.js +0 -1
  35. package/dist/resources/index.d.ts +0 -1
  36. package/dist/resources/index.js +74 -33
  37. package/dist/resources/instructions.d.ts +0 -1
  38. package/dist/resources/instructions.js +2 -2
  39. package/dist/schemas/inputs.d.ts +0 -1
  40. package/dist/schemas/inputs.js +2 -3
  41. package/dist/schemas/outputs.d.ts +0 -1
  42. package/dist/schemas/outputs.js +1 -2
  43. package/dist/server.d.ts +0 -1
  44. package/dist/server.js +16 -26
  45. package/dist/tasks/execution.d.ts +0 -1
  46. package/dist/tasks/execution.js +27 -24
  47. package/dist/tasks/manager.d.ts +7 -3
  48. package/dist/tasks/manager.js +53 -34
  49. package/dist/tasks/owner.d.ts +1 -2
  50. package/dist/tasks/owner.js +1 -2
  51. package/dist/tasks/tool-registry.d.ts +1 -2
  52. package/dist/tasks/tool-registry.js +0 -1
  53. package/dist/tools/fetch-url.d.ts +1 -2
  54. package/dist/tools/fetch-url.js +39 -31
  55. package/dist/tools/index.d.ts +0 -1
  56. package/dist/tools/index.js +0 -1
  57. package/dist/transform/html-translators.d.ts +1 -0
  58. package/dist/transform/html-translators.js +454 -0
  59. package/dist/transform/metadata.d.ts +4 -0
  60. package/dist/transform/metadata.js +183 -0
  61. package/dist/transform/transform.d.ts +0 -1
  62. package/dist/transform/transform.js +24 -641
  63. package/dist/transform/types.d.ts +9 -11
  64. package/dist/transform/types.js +0 -1
  65. package/dist/transform/worker-pool.d.ts +0 -1
  66. package/dist/transform/worker-pool.js +7 -16
  67. package/dist/transform/workers/shared.d.ts +0 -1
  68. package/dist/transform/workers/shared.js +1 -2
  69. package/dist/transform/workers/transform-child.d.ts +0 -1
  70. package/dist/transform/workers/transform-child.js +0 -1
  71. package/dist/transform/workers/transform-worker.d.ts +0 -1
  72. package/dist/transform/workers/transform-worker.js +0 -1
  73. package/package.json +6 -3
  74. package/dist/cli.d.ts.map +0 -1
  75. package/dist/cli.js.map +0 -1
  76. package/dist/http/auth.d.ts.map +0 -1
  77. package/dist/http/auth.js.map +0 -1
  78. package/dist/http/health.d.ts.map +0 -1
  79. package/dist/http/health.js.map +0 -1
  80. package/dist/http/helpers.d.ts.map +0 -1
  81. package/dist/http/helpers.js.map +0 -1
  82. package/dist/http/native.d.ts.map +0 -1
  83. package/dist/http/native.js.map +0 -1
  84. package/dist/http/rate-limit.d.ts.map +0 -1
  85. package/dist/http/rate-limit.js.map +0 -1
  86. package/dist/index.d.ts.map +0 -1
  87. package/dist/index.js.map +0 -1
  88. package/dist/lib/cache.d.ts +0 -54
  89. package/dist/lib/cache.d.ts.map +0 -1
  90. package/dist/lib/cache.js +0 -264
  91. package/dist/lib/cache.js.map +0 -1
  92. package/dist/lib/config.d.ts +0 -143
  93. package/dist/lib/config.d.ts.map +0 -1
  94. package/dist/lib/config.js +0 -476
  95. package/dist/lib/config.js.map +0 -1
  96. package/dist/lib/crypto.d.ts +0 -4
  97. package/dist/lib/crypto.d.ts.map +0 -1
  98. package/dist/lib/crypto.js +0 -56
  99. package/dist/lib/crypto.js.map +0 -1
  100. package/dist/lib/dom-noise-removal.d.ts +0 -2
  101. package/dist/lib/dom-noise-removal.d.ts.map +0 -1
  102. package/dist/lib/dom-noise-removal.js +0 -494
  103. package/dist/lib/dom-noise-removal.js.map +0 -1
  104. package/dist/lib/download.d.ts +0 -4
  105. package/dist/lib/download.d.ts.map +0 -1
  106. package/dist/lib/download.js +0 -106
  107. package/dist/lib/download.js.map +0 -1
  108. package/dist/lib/errors.d.ts +0 -14
  109. package/dist/lib/errors.d.ts.map +0 -1
  110. package/dist/lib/errors.js +0 -72
  111. package/dist/lib/errors.js.map +0 -1
  112. package/dist/lib/fetch-content.d.ts +0 -5
  113. package/dist/lib/fetch-content.d.ts.map +0 -1
  114. package/dist/lib/fetch-content.js +0 -164
  115. package/dist/lib/fetch-content.js.map +0 -1
  116. package/dist/lib/fetch-stream.d.ts +0 -5
  117. package/dist/lib/fetch-stream.d.ts.map +0 -1
  118. package/dist/lib/fetch-stream.js +0 -29
  119. package/dist/lib/fetch-stream.js.map +0 -1
  120. package/dist/lib/fetch.d.ts.map +0 -1
  121. package/dist/lib/fetch.js.map +0 -1
  122. package/dist/lib/host-normalization.d.ts +0 -2
  123. package/dist/lib/host-normalization.d.ts.map +0 -1
  124. package/dist/lib/host-normalization.js +0 -91
  125. package/dist/lib/host-normalization.js.map +0 -1
  126. package/dist/lib/ip-blocklist.d.ts +0 -9
  127. package/dist/lib/ip-blocklist.d.ts.map +0 -1
  128. package/dist/lib/ip-blocklist.js +0 -79
  129. package/dist/lib/ip-blocklist.js.map +0 -1
  130. package/dist/lib/json.d.ts +0 -2
  131. package/dist/lib/json.d.ts.map +0 -1
  132. package/dist/lib/json.js +0 -45
  133. package/dist/lib/json.js.map +0 -1
  134. package/dist/lib/language-detection.d.ts +0 -3
  135. package/dist/lib/language-detection.d.ts.map +0 -1
  136. package/dist/lib/language-detection.js +0 -355
  137. package/dist/lib/language-detection.js.map +0 -1
  138. package/dist/lib/markdown-cleanup.d.ts.map +0 -1
  139. package/dist/lib/markdown-cleanup.js +0 -532
  140. package/dist/lib/markdown-cleanup.js.map +0 -1
  141. package/dist/lib/mcp-lifecycle.d.ts +0 -5
  142. package/dist/lib/mcp-lifecycle.d.ts.map +0 -1
  143. package/dist/lib/mcp-lifecycle.js +0 -51
  144. package/dist/lib/mcp-lifecycle.js.map +0 -1
  145. package/dist/lib/mcp-validator.d.ts +0 -17
  146. package/dist/lib/mcp-validator.d.ts.map +0 -1
  147. package/dist/lib/mcp-validator.js +0 -45
  148. package/dist/lib/mcp-validator.js.map +0 -1
  149. package/dist/lib/mcp.d.ts +0 -4
  150. package/dist/lib/mcp.d.ts.map +0 -1
  151. package/dist/lib/mcp.js.map +0 -1
  152. package/dist/lib/observability.d.ts +0 -23
  153. package/dist/lib/observability.d.ts.map +0 -1
  154. package/dist/lib/observability.js +0 -238
  155. package/dist/lib/observability.js.map +0 -1
  156. package/dist/lib/server-tuning.d.ts +0 -15
  157. package/dist/lib/server-tuning.d.ts.map +0 -1
  158. package/dist/lib/server-tuning.js +0 -49
  159. package/dist/lib/server-tuning.js.map +0 -1
  160. package/dist/lib/session.d.ts +0 -45
  161. package/dist/lib/session.d.ts.map +0 -1
  162. package/dist/lib/session.js +0 -263
  163. package/dist/lib/session.js.map +0 -1
  164. package/dist/lib/timer-utils.d.ts +0 -13
  165. package/dist/lib/timer-utils.d.ts.map +0 -1
  166. package/dist/lib/timer-utils.js +0 -44
  167. package/dist/lib/timer-utils.js.map +0 -1
  168. package/dist/lib/tool-errors.d.ts +0 -12
  169. package/dist/lib/tool-errors.d.ts.map +0 -1
  170. package/dist/lib/tool-errors.js +0 -55
  171. package/dist/lib/tool-errors.js.map +0 -1
  172. package/dist/lib/tool-pipeline.d.ts.map +0 -1
  173. package/dist/lib/tool-pipeline.js.map +0 -1
  174. package/dist/lib/tool-progress.d.ts.map +0 -1
  175. package/dist/lib/tool-progress.js.map +0 -1
  176. package/dist/lib/type-guards.d.ts +0 -16
  177. package/dist/lib/type-guards.d.ts.map +0 -1
  178. package/dist/lib/type-guards.js +0 -13
  179. package/dist/lib/type-guards.js.map +0 -1
  180. package/dist/prompts/index.d.ts.map +0 -1
  181. package/dist/prompts/index.js.map +0 -1
  182. package/dist/resources/index.d.ts.map +0 -1
  183. package/dist/resources/index.js.map +0 -1
  184. package/dist/resources/instructions.d.ts.map +0 -1
  185. package/dist/resources/instructions.js.map +0 -1
  186. package/dist/schemas/inputs.d.ts.map +0 -1
  187. package/dist/schemas/inputs.js.map +0 -1
  188. package/dist/schemas/outputs.d.ts.map +0 -1
  189. package/dist/schemas/outputs.js.map +0 -1
  190. package/dist/server.d.ts.map +0 -1
  191. package/dist/server.js.map +0 -1
  192. package/dist/tasks/execution.d.ts.map +0 -1
  193. package/dist/tasks/execution.js.map +0 -1
  194. package/dist/tasks/manager.d.ts.map +0 -1
  195. package/dist/tasks/manager.js.map +0 -1
  196. package/dist/tasks/owner.d.ts.map +0 -1
  197. package/dist/tasks/owner.js.map +0 -1
  198. package/dist/tasks/tool-registry.d.ts.map +0 -1
  199. package/dist/tasks/tool-registry.js.map +0 -1
  200. package/dist/tools/fetch-url.d.ts.map +0 -1
  201. package/dist/tools/fetch-url.js.map +0 -1
  202. package/dist/tools/index.d.ts.map +0 -1
  203. package/dist/tools/index.js.map +0 -1
  204. package/dist/transform/transform.d.ts.map +0 -1
  205. package/dist/transform/transform.js.map +0 -1
  206. package/dist/transform/types.d.ts.map +0 -1
  207. package/dist/transform/types.js.map +0 -1
  208. package/dist/transform/worker-pool.d.ts.map +0 -1
  209. package/dist/transform/worker-pool.js.map +0 -1
  210. package/dist/transform/workers/shared.d.ts.map +0 -1
  211. package/dist/transform/workers/shared.js.map +0 -1
  212. package/dist/transform/workers/transform-child.d.ts.map +0 -1
  213. package/dist/transform/workers/transform-child.js.map +0 -1
  214. package/dist/transform/workers/transform-worker.d.ts.map +0 -1
  215. package/dist/transform/workers/transform-worker.js.map +0 -1
@@ -1,537 +1,277 @@
1
1
  import { Buffer } from 'node:buffer';
2
2
  import { randomUUID } from 'node:crypto';
3
3
  import diagnosticsChannel from 'node:diagnostics_channel';
4
- import dns from 'node:dns';
4
+ import {} from 'node:http';
5
5
  import { isIP } from 'node:net';
6
+ import { posix as pathPosix } from 'node:path';
6
7
  import { performance } from 'node:perf_hooks';
7
8
  import { PassThrough, Readable, Transform } from 'node:stream';
8
9
  import { buffer as consumeBuffer } from 'node:stream/consumers';
9
10
  import { finished, pipeline } from 'node:stream/promises';
11
+ import {} from 'node:stream/web';
10
12
  import tls from 'node:tls';
11
13
  import { createBrotliDecompress, createGunzip, createInflate } from 'node:zlib';
12
14
  import { Agent } from 'undici';
13
- import { config } from './config.js';
14
- import { createErrorWithCode, FetchError, isSystemError, toError, } from './errors.js';
15
- import { decodeBuffer, getCharsetFromContentType, isBinaryContent, resolveEncoding, } from './fetch-content.js';
16
- import { toNodeReadableStream, toWebReadableStream } from './fetch-stream.js';
17
- import { createDefaultBlockList, normalizeIpForBlockList, } from './ip-blocklist.js';
18
- import { getOperationId, getRequestId, logDebug, logError, logWarn, redactUrl, } from './observability.js';
19
- import { isError, isObject } from './type-guards.js';
20
- const defaultLogger = {
21
- debug: logDebug,
22
- warn: logWarn,
23
- error: logError,
24
- };
25
- const defaultContext = {
26
- getRequestId,
27
- getOperationId,
15
+ import { z } from 'zod';
16
+ import { get as cacheGet, config, getOperationId, getRequestId, logDebug, logError, logWarn, parseCachedPayload, redactUrl, resolveCachedPayloadContent, } from './core.js';
17
+ import { BLOCKED_HOST_SUFFIXES, createDnsPreflight, IpBlocker, RawUrlTransformer, SafeDnsResolver, UrlNormalizer, VALIDATION_ERROR_CODE, } from './url.js';
18
+ import { createErrorWithCode, FetchError, isError, isObject, isSystemError, toError, } from './utils.js';
19
+ const FILENAME_RULES = {
20
+ MAX_LEN: 200,
21
+ UNSAFE_CHARS: /[<>:"/\\|?*\p{C}]/gu,
22
+ WHITESPACE: /\s+/g,
23
+ EXTENSIONS: /\.(html?|php|aspx?|jsp)$/i,
28
24
  };
29
- const defaultRedactor = {
30
- redact: redactUrl,
31
- };
32
- const defaultFetch = (input, init) => globalThis.fetch(input, init);
33
- function isLocalFetchAllowed() {
34
- return process.env['ALLOW_LOCAL_FETCH'] === 'true';
35
- }
36
- class IpBlocker {
37
- security;
38
- blockList = createDefaultBlockList();
39
- constructor(security) {
40
- this.security = security;
41
- }
42
- isBlockedIp(candidate) {
43
- const normalized = candidate.trim().toLowerCase();
44
- if (isCloudMetadataHost(normalized))
45
- return true;
46
- if (isLocalFetchAllowed())
47
- return false;
48
- if (!normalized)
49
- return false;
50
- if (this.security.blockedHosts.has(normalized))
51
- return true;
52
- const normalizedIp = normalizeIpForBlockList(normalized);
53
- return normalizedIp
54
- ? this.blockList.check(normalizedIp.ip, normalizedIp.family)
55
- : false;
56
- }
57
- }
58
- const VALIDATION_ERROR_CODE = 'VALIDATION_ERROR';
59
- function createValidationError(message) {
60
- return createErrorWithCode(message, VALIDATION_ERROR_CODE);
61
- }
62
- const BLOCKED_HOST_SUFFIXES = ['.local', '.internal'];
63
- // This list is not exhaustive but covers the most common cloud metadata endpoints.
64
- const CLOUD_METADATA_HOSTS = new Set([
65
- '169.254.169.254', // AWS / GCP / Azure
66
- 'metadata.google.internal', // GCP
67
- '100.100.100.200', // Alibaba Cloud
68
- 'fd00:ec2::254', // AWS IPv6
69
- ]);
70
- function isCloudMetadataHost(hostname) {
71
- const lowered = hostname.toLowerCase();
72
- if (CLOUD_METADATA_HOSTS.has(lowered))
73
- return true;
74
- const normalized = normalizeIpForBlockList(lowered);
75
- return normalized !== null && CLOUD_METADATA_HOSTS.has(normalized.ip);
76
- }
77
- class UrlNormalizer {
78
- constants;
79
- security;
80
- ipBlocker;
81
- blockedHostSuffixes;
82
- constructor(constants, security, ipBlocker, blockedHostSuffixes) {
83
- this.constants = constants;
84
- this.security = security;
85
- this.ipBlocker = ipBlocker;
86
- this.blockedHostSuffixes = blockedHostSuffixes;
87
- }
88
- normalize(urlString) {
89
- const trimmedUrl = this.requireTrimmedUrl(urlString);
90
- if (trimmedUrl.length > this.constants.maxUrlLength) {
91
- throw createValidationError(`URL exceeds maximum length of ${this.constants.maxUrlLength} characters`);
92
- }
93
- let url;
94
- try {
95
- url = new URL(trimmedUrl);
96
- }
97
- catch {
98
- throw createValidationError('Invalid URL format');
99
- }
100
- if (url.protocol !== 'http:' && url.protocol !== 'https:') {
101
- throw createValidationError(`Invalid protocol: ${url.protocol}. Only http: and https: are allowed`);
102
- }
103
- if (url.username || url.password) {
104
- throw createValidationError('URLs with embedded credentials are not allowed');
105
- }
106
- const hostname = this.normalizeHostname(url);
107
- this.assertHostnameAllowed(hostname);
108
- url.hostname = hostname;
109
- return { normalizedUrl: url.href, hostname };
110
- }
111
- validateAndNormalize(urlString) {
112
- return this.normalize(urlString).normalizedUrl;
113
- }
114
- requireTrimmedUrl(urlString) {
115
- if (!urlString || typeof urlString !== 'string') {
116
- throw createValidationError('URL is required');
117
- }
118
- const trimmed = urlString.trim();
119
- if (!trimmed)
120
- throw createValidationError('URL cannot be empty');
121
- return trimmed;
122
- }
123
- normalizeHostname(url) {
124
- const hostname = url.hostname.toLowerCase().replace(/\.+$/, '');
125
- if (!hostname) {
126
- throw createValidationError('URL must have a valid hostname');
127
- }
128
- return hostname;
129
- }
130
- assertHostnameAllowed(hostname) {
131
- if (isCloudMetadataHost(hostname)) {
132
- throw createValidationError(`Blocked host: ${hostname}. Cloud metadata endpoints are not allowed`);
133
- }
134
- if (!isLocalFetchAllowed()) {
135
- if (this.security.blockedHosts.has(hostname)) {
136
- throw createValidationError(`Blocked host: ${hostname}. Internal hosts are not allowed`);
137
- }
138
- if (this.ipBlocker.isBlockedIp(hostname)) {
139
- throw createValidationError(`Blocked IP range: ${hostname}. Private IPs are not allowed`);
140
- }
141
- }
142
- if (this.blockedHostSuffixes.some((suffix) => hostname.endsWith(suffix))) {
143
- throw createValidationError(`Blocked hostname pattern: ${hostname}. Internal domain suffixes are not allowed`);
144
- }
145
- }
25
+ function sanitizeString(input) {
26
+ return input
27
+ .toLowerCase()
28
+ .replace(FILENAME_RULES.UNSAFE_CHARS, '')
29
+ .replace(FILENAME_RULES.WHITESPACE, '-')
30
+ .replace(/-+/g, '-')
31
+ .replace(/(?:^-|-$)/g, '');
146
32
  }
147
- function getPatternGroup(groups, key) {
148
- const value = groups[key];
149
- if (value === undefined)
33
+ function resolveUrlFilenameCandidate(url) {
34
+ const parsed = new URL(url);
35
+ if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:')
36
+ return null;
37
+ const basename = pathPosix.basename(parsed.pathname);
38
+ if (!basename || basename === 'index')
150
39
  return null;
151
- if (value === '')
40
+ const cleaned = basename.replace(FILENAME_RULES.EXTENSIONS, '');
41
+ const sanitized = sanitizeString(cleaned);
42
+ if (sanitized === 'index')
152
43
  return null;
153
- return value;
44
+ return sanitized || null;
154
45
  }
155
- const GITHUB_BLOB_PATTERN = new URLPattern({
156
- protocol: 'http{s}?',
157
- hostname: '{:sub.}?github.com',
158
- pathname: '/:owner/:repo/blob/:branch/:path+',
159
- });
160
- const GITHUB_GIST_PATTERN = new URLPattern({
161
- protocol: 'http{s}?',
162
- hostname: 'gist.github.com',
163
- pathname: '/:user/:gistId',
164
- });
165
- const GITHUB_GIST_RAW_PATTERN = new URLPattern({
166
- protocol: 'http{s}?',
167
- hostname: 'gist.github.com',
168
- pathname: '/:user/:gistId/raw/:filePath+',
169
- });
170
- const GITLAB_BLOB_PATTERNS = [
171
- new URLPattern({
172
- protocol: 'http{s}?',
173
- hostname: 'gitlab.com',
174
- pathname: '/:base+/-/blob/:branch/:path+',
175
- }),
176
- new URLPattern({
177
- protocol: 'http{s}?',
178
- hostname: '*:sub.gitlab.com',
179
- pathname: '/:base+/-/blob/:branch/:path+',
180
- }),
181
- ];
182
- const BITBUCKET_SRC_PATTERN = new URLPattern({
183
- protocol: 'http{s}?',
184
- hostname: '{:sub.}?bitbucket.org',
185
- pathname: '/:owner/:repo/src/:branch/:path+',
186
- });
187
- const BITBUCKET_RAW_RE = /bitbucket\.org\/[^/]+\/[^/]+\/raw\//;
188
- const RAW_TEXT_EXTENSIONS = new Set([
189
- '.md',
190
- '.markdown',
191
- '.txt',
192
- '.json',
193
- '.yaml',
194
- '.yml',
195
- '.toml',
196
- '.xml',
197
- '.csv',
198
- '.rst',
199
- '.adoc',
200
- '.org',
201
- ]);
202
- class RawUrlTransformer {
203
- logger;
204
- constructor(logger) {
205
- this.logger = logger;
206
- }
207
- transformToRawUrl(url) {
208
- if (!url)
209
- return { url, transformed: false };
210
- if (this.isRawUrl(url))
211
- return { url, transformed: false };
212
- let base;
213
- let hash;
214
- let parsed;
215
- try {
216
- parsed = new URL(url);
217
- base = parsed.origin + parsed.pathname;
218
- ({ hash } = parsed);
219
- }
220
- catch {
221
- ({ base, hash } = this.splitParams(url));
222
- }
223
- const match = this.tryTransformWithUrl(base, hash, parsed);
224
- if (!match)
225
- return { url, transformed: false };
226
- this.logger.debug('URL transformed to raw content URL', {
227
- platform: match.platform,
228
- original: url.substring(0, 100),
229
- transformed: match.url.substring(0, 100),
230
- });
231
- return { url: match.url, transformed: true, platform: match.platform };
232
- }
233
- isRawTextContentUrl(urlString) {
234
- if (!urlString)
235
- return false;
236
- if (this.isRawUrl(urlString))
237
- return true;
238
- try {
239
- const url = new URL(urlString);
240
- const pathname = url.pathname.toLowerCase();
241
- const lastDot = pathname.lastIndexOf('.');
242
- if (lastDot === -1)
243
- return false;
244
- return RAW_TEXT_EXTENSIONS.has(pathname.slice(lastDot));
245
- }
246
- catch {
247
- const { base } = this.splitParams(urlString);
248
- const lowerBase = base.toLowerCase();
249
- const lastDot = lowerBase.lastIndexOf('.');
250
- if (lastDot === -1)
251
- return false;
252
- return RAW_TEXT_EXTENSIONS.has(lowerBase.slice(lastDot));
253
- }
254
- }
255
- isRawUrl(url) {
256
- const lower = url.toLowerCase();
257
- return (lower.includes('raw.githubusercontent.com') ||
258
- lower.includes('gist.githubusercontent.com') ||
259
- lower.includes('/-/raw/') ||
260
- BITBUCKET_RAW_RE.test(lower));
261
- }
262
- splitParams(urlString) {
263
- const hashIndex = urlString.indexOf('#');
264
- const queryIndex = urlString.indexOf('?');
265
- const endIndex = Math.min(queryIndex === -1 ? urlString.length : queryIndex, hashIndex === -1 ? urlString.length : hashIndex);
266
- const hash = hashIndex !== -1 ? urlString.slice(hashIndex) : '';
267
- return { base: urlString.slice(0, endIndex), hash };
268
- }
269
- tryTransformWithUrl(base, hash, preParsed) {
270
- let parsed = preParsed ?? null;
271
- if (!parsed) {
272
- try {
273
- parsed = new URL(base);
274
- }
275
- catch {
276
- // Ignore invalid URLs
277
- }
278
- }
279
- if (!parsed)
280
- return null;
281
- if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:')
282
- return null;
283
- const gist = this.transformGithubGist(base, hash);
284
- if (gist)
285
- return gist;
286
- const github = this.transformGithubBlob(base);
287
- if (github)
288
- return github;
289
- const gitlab = this.transformGitLab(base, parsed.origin);
290
- if (gitlab)
291
- return gitlab;
292
- const bitbucket = this.transformBitbucket(base, parsed.origin);
293
- if (bitbucket)
294
- return bitbucket;
46
+ function truncateFilenameBase(name, extension) {
47
+ const maxBase = FILENAME_RULES.MAX_LEN - extension.length;
48
+ return name.length > maxBase ? name.substring(0, maxBase) : name;
49
+ }
50
+ function resolveTitleFilenameCandidate(title) {
51
+ if (!title)
295
52
  return null;
53
+ return sanitizeString(title) || null;
54
+ }
55
+ function resolveFilenameBase(url, title, hashFallback) {
56
+ try {
57
+ const fromUrl = resolveUrlFilenameCandidate(url);
58
+ if (fromUrl)
59
+ return fromUrl;
296
60
  }
297
- transformGithubBlob(url) {
298
- const match = GITHUB_BLOB_PATTERN.exec(url);
299
- if (!match)
300
- return null;
301
- const groups = match.pathname.groups;
302
- const owner = getPatternGroup(groups, 'owner');
303
- const repo = getPatternGroup(groups, 'repo');
304
- const branch = getPatternGroup(groups, 'branch');
305
- const path = getPatternGroup(groups, 'path');
306
- if (!owner || !repo || !branch || !path)
307
- return null;
308
- return {
309
- url: `https://raw.githubusercontent.com/${owner}/${repo}/${branch}/${path}`,
310
- platform: 'github',
311
- };
312
- }
313
- transformGithubGist(url, hash) {
314
- const rawMatch = GITHUB_GIST_RAW_PATTERN.exec(url);
315
- if (rawMatch) {
316
- const groups = rawMatch.pathname.groups;
317
- const user = getPatternGroup(groups, 'user');
318
- const gistId = getPatternGroup(groups, 'gistId');
319
- const filePath = getPatternGroup(groups, 'filePath');
320
- if (!user || !gistId)
321
- return null;
322
- const resolvedFilePath = filePath ? `/${filePath}` : '';
323
- return {
324
- url: `https://gist.githubusercontent.com/${user}/${gistId}/raw${resolvedFilePath}`,
325
- platform: 'github-gist',
326
- };
327
- }
328
- const match = GITHUB_GIST_PATTERN.exec(url);
329
- if (!match)
330
- return null;
331
- const groups = match.pathname.groups;
332
- const user = getPatternGroup(groups, 'user');
333
- const gistId = getPatternGroup(groups, 'gistId');
334
- if (!user || !gistId)
335
- return null;
336
- let filePath = '';
337
- if (hash.startsWith('#file-')) {
338
- const filename = hash.slice('#file-'.length).replace(/-/g, '.');
339
- if (filename)
340
- filePath = `/${filename}`;
341
- }
342
- return {
343
- url: `https://gist.githubusercontent.com/${user}/${gistId}/raw${filePath}`,
344
- platform: 'github-gist',
345
- };
61
+ catch {
62
+ // Ignore URL parsing errors and continue fallbacks.
63
+ }
64
+ const fromTitle = resolveTitleFilenameCandidate(title);
65
+ if (fromTitle)
66
+ return fromTitle;
67
+ if (hashFallback)
68
+ return hashFallback.substring(0, 16);
69
+ return `download-${Date.now()}`;
70
+ }
71
+ export function generateSafeFilename(url, title, hashFallback, extension = '.md') {
72
+ const name = resolveFilenameBase(url, title, hashFallback);
73
+ return `${truncateFilenameBase(name, extension)}${extension}`;
74
+ }
75
+ const DownloadParamsSchema = z.strictObject({
76
+ namespace: z.literal('markdown'),
77
+ hash: z
78
+ .string()
79
+ .regex(/^[a-f0-9.]+$/i)
80
+ .min(8)
81
+ .max(64),
82
+ });
83
+ function writeJsonError(res, status, message, code) {
84
+ res.writeHead(status, { 'Content-Type': 'application/json' });
85
+ res.end(JSON.stringify({ error: message, code }));
86
+ }
87
+ export function handleDownload(res, namespace, hash) {
88
+ const parsed = DownloadParamsSchema.safeParse({ namespace, hash });
89
+ if (!parsed.success) {
90
+ writeJsonError(res, 400, 'Invalid namespace or hash', 'BAD_REQUEST');
91
+ return;
346
92
  }
347
- transformGitLab(url, origin) {
348
- for (const pattern of GITLAB_BLOB_PATTERNS) {
349
- const match = pattern.exec(url);
350
- if (!match)
351
- continue;
352
- const groups = match.pathname.groups;
353
- const base = getPatternGroup(groups, 'base');
354
- const branch = getPatternGroup(groups, 'branch');
355
- const path = getPatternGroup(groups, 'path');
356
- if (!base || !branch || !path)
357
- return null;
358
- return {
359
- url: `${origin}/${base}/-/raw/${branch}/${path}`,
360
- platform: 'gitlab',
361
- };
362
- }
363
- return null;
93
+ const cacheKey = `${parsed.data.namespace}:${parsed.data.hash}`;
94
+ const entry = cacheGet(cacheKey, { force: true });
95
+ if (!entry) {
96
+ writeJsonError(res, 404, 'Not found or expired', 'NOT_FOUND');
97
+ return;
364
98
  }
365
- transformBitbucket(url, origin) {
366
- const match = BITBUCKET_SRC_PATTERN.exec(url);
367
- if (!match)
368
- return null;
369
- const groups = match.pathname.groups;
370
- const owner = getPatternGroup(groups, 'owner');
371
- const repo = getPatternGroup(groups, 'repo');
372
- const branch = getPatternGroup(groups, 'branch');
373
- const path = getPatternGroup(groups, 'path');
374
- if (!owner || !repo || !branch || !path)
375
- return null;
376
- return {
377
- url: `${origin}/${owner}/${repo}/raw/${branch}/${path}`,
378
- platform: 'bitbucket',
379
- };
99
+ const payload = parseCachedPayload(entry.content);
100
+ const content = payload ? resolveCachedPayloadContent(payload) : null;
101
+ if (!content) {
102
+ writeJsonError(res, 404, 'Content missing', 'NOT_FOUND');
103
+ return;
380
104
  }
105
+ const fileName = generateSafeFilename(entry.url, payload?.title, parsed.data.hash);
106
+ // Safe header generation — RFC 5987 encoding for non-ASCII filenames
107
+ const encoded = encodeURIComponent(fileName).replace(/'/g, '%27');
108
+ res.setHeader('Content-Type', 'text/markdown; charset=utf-8');
109
+ res.setHeader('Content-Disposition', `attachment; filename="${fileName}"; filename*=UTF-8''${encoded}`);
110
+ res.setHeader('Cache-Control', `private, max-age=${config.cache.ttl}`);
111
+ res.setHeader('X-Content-Type-Options', 'nosniff');
112
+ res.end(content);
381
113
  }
382
- const DNS_LOOKUP_TIMEOUT_MS = 5000;
383
- const CNAME_LOOKUP_MAX_DEPTH = 5;
384
- function normalizeDnsName(value) {
385
- const normalized = value.trim().toLowerCase().replace(/\.+$/, '');
386
- return normalized;
387
- }
388
- function createSignalAbortRace(signal, isAbort, onTimeout, onAbort) {
389
- let abortListener = null;
390
- const abortPromise = new Promise((_, reject) => {
391
- abortListener = () => {
392
- reject(isAbort() ? onAbort() : onTimeout());
393
- };
394
- signal.addEventListener('abort', abortListener, { once: true });
395
- if (signal.aborted)
396
- abortListener();
397
- });
398
- const cleanup = () => {
399
- if (!abortListener)
400
- return;
401
- try {
402
- signal.removeEventListener('abort', abortListener);
403
- }
404
- catch {
405
- // Ignore listener cleanup failures; they are non-fatal by design.
406
- }
407
- abortListener = null;
408
- };
409
- return { abortPromise, cleanup };
410
- }
411
- async function withTimeout(promise, timeoutMs, onTimeout, signal, onAbort) {
412
- const timeoutSignal = timeoutMs > 0 ? AbortSignal.timeout(timeoutMs) : undefined;
413
- const raceSignal = signal && timeoutSignal
414
- ? AbortSignal.any([signal, timeoutSignal])
415
- : (signal ?? timeoutSignal);
416
- if (!raceSignal)
417
- return promise;
418
- const abortRace = createSignalAbortRace(raceSignal, () => signal?.aborted === true, onTimeout, onAbort ?? (() => new Error('Request was canceled')));
114
+ const UTF8_ENCODING = 'utf-8';
115
+ function getCharsetFromContentType(contentType) {
116
+ if (!contentType)
117
+ return undefined;
118
+ const match = /charset=([^;]+)/i.exec(contentType);
119
+ const charsetGroup = match?.[1];
120
+ if (!charsetGroup)
121
+ return undefined;
122
+ let charset = charsetGroup.trim();
123
+ if (charset.startsWith('"') && charset.endsWith('"')) {
124
+ charset = charset.slice(1, -1);
125
+ }
126
+ return charset.trim();
127
+ }
128
+ function createDecoder(encoding) {
129
+ const fallback = () => new TextDecoder(UTF8_ENCODING);
130
+ if (!encoding)
131
+ return fallback();
419
132
  try {
420
- return await Promise.race([promise, abortRace.abortPromise]);
133
+ return new TextDecoder(encoding);
421
134
  }
422
- finally {
423
- abortRace.cleanup();
135
+ catch {
136
+ return fallback();
424
137
  }
425
138
  }
426
- function createAbortSignalError() {
427
- const err = new Error('Request was canceled');
428
- err.name = 'AbortError';
429
- return err;
430
- }
431
- class SafeDnsResolver {
432
- ipBlocker;
433
- security;
434
- blockedHostSuffixes;
435
- constructor(ipBlocker, security, blockedHostSuffixes) {
436
- this.ipBlocker = ipBlocker;
437
- this.security = security;
438
- this.blockedHostSuffixes = blockedHostSuffixes;
439
- }
440
- async resolveAndValidate(hostname, signal) {
441
- const normalizedHostname = normalizeDnsName(hostname.replace(/^\[|\]$/g, ''));
442
- if (!normalizedHostname) {
443
- throw createErrorWithCode('Invalid hostname provided', 'EINVAL');
444
- }
445
- if (signal?.aborted) {
446
- throw createAbortSignalError();
447
- }
448
- if (this.isBlockedHostname(normalizedHostname)) {
449
- throw createErrorWithCode(`Blocked host: ${normalizedHostname}. Internal hosts are not allowed`, 'EBLOCKED');
450
- }
451
- if (isIP(normalizedHostname)) {
452
- if (isCloudMetadataHost(normalizedHostname)) {
453
- throw createErrorWithCode(`Blocked IP range: ${normalizedHostname}. Cloud metadata endpoints are not allowed`, 'EBLOCKED');
454
- }
455
- if (process.env['ALLOW_LOCAL_FETCH'] !== 'true' &&
456
- this.ipBlocker.isBlockedIp(normalizedHostname)) {
457
- throw createErrorWithCode(`Blocked IP range: ${normalizedHostname}. Private IPs are not allowed`, 'EBLOCKED');
458
- }
459
- return normalizedHostname;
460
- }
461
- await this.assertNoBlockedCname(normalizedHostname, signal);
462
- const resultPromise = dns.promises.lookup(normalizedHostname, {
463
- all: true,
464
- order: 'verbatim',
465
- });
466
- const addresses = await withTimeout(resultPromise, DNS_LOOKUP_TIMEOUT_MS, () => createErrorWithCode(`DNS lookup timed out for ${normalizedHostname}`, 'ETIMEOUT'), signal, createAbortSignalError);
467
- if (addresses.length === 0 || !addresses[0]) {
468
- throw createErrorWithCode(`No DNS results returned for ${normalizedHostname}`, 'ENODATA');
469
- }
470
- for (const addr of addresses) {
471
- if (addr.family !== 4 && addr.family !== 6) {
472
- throw createErrorWithCode(`Invalid address family returned for ${normalizedHostname}`, 'EINVAL');
473
- }
474
- if (isCloudMetadataHost(addr.address)) {
475
- throw createErrorWithCode(`Blocked IP detected for ${normalizedHostname}`, 'EBLOCKED');
476
- }
477
- if (!isLocalFetchAllowed() && this.ipBlocker.isBlockedIp(addr.address)) {
478
- throw createErrorWithCode(`Blocked IP detected for ${normalizedHostname}`, 'EBLOCKED');
479
- }
480
- }
481
- return addresses[0].address;
482
- }
483
- isBlockedHostname(hostname) {
484
- if (isCloudMetadataHost(hostname))
485
- return true;
486
- if (isLocalFetchAllowed())
139
+ function decodeBuffer(buffer, encoding) {
140
+ return createDecoder(encoding).decode(buffer);
141
+ }
142
+ function normalizeEncodingLabel(encoding) {
143
+ return encoding?.trim().toLowerCase() ?? '';
144
+ }
145
+ function isUnicodeWideEncoding(encoding) {
146
+ const normalized = normalizeEncodingLabel(encoding);
147
+ return (normalized.startsWith('utf-16') ||
148
+ normalized.startsWith('utf-32') ||
149
+ normalized === 'ucs-2' ||
150
+ normalized === 'unicodefffe' ||
151
+ normalized === 'unicodefeff');
152
+ }
153
+ const BOM_SIGNATURES = [
154
+ // 4-byte BOMs must come first to avoid false matches with 2-byte prefixes
155
+ { bytes: [0xff, 0xfe, 0x00, 0x00], encoding: 'utf-32le' },
156
+ { bytes: [0x00, 0x00, 0xfe, 0xff], encoding: 'utf-32be' },
157
+ { bytes: [0xef, 0xbb, 0xbf], encoding: 'utf-8' },
158
+ { bytes: [0xff, 0xfe], encoding: 'utf-16le' },
159
+ { bytes: [0xfe, 0xff], encoding: 'utf-16be' },
160
+ ];
161
+ function startsWithBytes(buffer, signature) {
162
+ const sigLen = signature.length;
163
+ if (buffer.length < sigLen)
164
+ return false;
165
+ for (let i = 0; i < sigLen; i += 1) {
166
+ if (buffer[i] !== signature[i])
487
167
  return false;
488
- if (this.security.blockedHosts.has(hostname))
489
- return true;
490
- return this.blockedHostSuffixes.some((suffix) => hostname.endsWith(suffix));
491
168
  }
492
- async assertNoBlockedCname(hostname, signal) {
493
- let current = hostname;
494
- const seen = new Set();
495
- for (let depth = 0; depth < CNAME_LOOKUP_MAX_DEPTH; depth += 1) {
496
- if (!current || seen.has(current))
497
- return;
498
- seen.add(current);
499
- const cnames = await this.resolveCname(current, signal);
500
- if (cnames.length === 0)
501
- return;
502
- for (const cname of cnames) {
503
- if (this.isBlockedHostname(cname)) {
504
- throw createErrorWithCode(`Blocked DNS CNAME detected for ${hostname}: ${cname}`, 'EBLOCKED');
505
- }
506
- }
507
- current = cnames[0] ?? '';
508
- }
169
+ return true;
170
+ }
171
+ function detectBomEncoding(buffer) {
172
+ for (const { bytes, encoding } of BOM_SIGNATURES) {
173
+ if (startsWithBytes(buffer, bytes))
174
+ return encoding;
509
175
  }
510
- async resolveCname(hostname, signal) {
511
- try {
512
- const resultPromise = dns.promises.resolveCname(hostname);
513
- const cnames = await withTimeout(resultPromise, DNS_LOOKUP_TIMEOUT_MS, () => createErrorWithCode(`DNS CNAME lookup timed out for ${hostname}`, 'ETIMEOUT'), signal, createAbortSignalError);
514
- return cnames
515
- .map((value) => normalizeDnsName(value))
516
- .filter((value) => value.length > 0);
517
- }
518
- catch (error) {
519
- if (isError(error) && error.name === 'AbortError') {
520
- throw error;
521
- }
522
- if (isSystemError(error) &&
523
- (error.code === 'ENODATA' ||
524
- error.code === 'ENOTFOUND' ||
525
- error.code === 'ENODOMAIN')) {
526
- return [];
527
- }
528
- logDebug('DNS CNAME lookup failed; continuing with address lookup', {
529
- hostname,
530
- ...(isSystemError(error) ? { code: error.code } : {}),
531
- });
532
- return [];
533
- }
176
+ return undefined;
177
+ }
178
+ function readQuotedValue(input, startIndex) {
179
+ const first = input[startIndex];
180
+ if (!first)
181
+ return '';
182
+ const quoted = first === '"' || first === "'";
183
+ if (quoted) {
184
+ const end = input.indexOf(first, startIndex + 1);
185
+ return end === -1 ? '' : input.slice(startIndex + 1, end).trim();
186
+ }
187
+ const tail = input.slice(startIndex);
188
+ const stop = tail.search(/[\s/>]/);
189
+ return (stop === -1 ? tail : tail.slice(0, stop)).trim();
190
+ }
191
+ function findTokenValue(original, lower, token, fromIndex = 0) {
192
+ const tokenIndex = lower.indexOf(token, fromIndex);
193
+ if (tokenIndex === -1)
194
+ return undefined;
195
+ const valueStart = tokenIndex + token.length;
196
+ const value = readQuotedValue(original, valueStart);
197
+ return value || undefined;
198
+ }
199
+ function extractHtmlCharset(headSnippet) {
200
+ const lower = headSnippet.toLowerCase();
201
+ const charset = findTokenValue(headSnippet, lower, 'charset=');
202
+ return charset ? charset.toLowerCase() : undefined;
203
+ }
204
+ function extractXmlEncoding(headSnippet) {
205
+ const lower = headSnippet.toLowerCase();
206
+ const xmlStart = lower.indexOf('<?xml');
207
+ if (xmlStart === -1)
208
+ return undefined;
209
+ const xmlEnd = lower.indexOf('?>', xmlStart);
210
+ const declaration = xmlEnd === -1
211
+ ? headSnippet.slice(xmlStart)
212
+ : headSnippet.slice(xmlStart, xmlEnd + 2);
213
+ const declarationLower = declaration.toLowerCase();
214
+ const encoding = findTokenValue(declaration, declarationLower, 'encoding=');
215
+ return encoding ? encoding.toLowerCase() : undefined;
216
+ }
217
+ function detectHtmlDeclaredEncoding(buffer) {
218
+ const scanSize = Math.min(buffer.length, 8_192);
219
+ if (scanSize === 0)
220
+ return undefined;
221
+ const headSnippet = Buffer.from(buffer.buffer, buffer.byteOffset, scanSize).toString('latin1');
222
+ return extractHtmlCharset(headSnippet) ?? extractXmlEncoding(headSnippet);
223
+ }
224
+ function resolveEncoding(declaredEncoding, sample) {
225
+ const bomEncoding = detectBomEncoding(sample);
226
+ if (bomEncoding)
227
+ return bomEncoding;
228
+ if (declaredEncoding)
229
+ return declaredEncoding;
230
+ return detectHtmlDeclaredEncoding(sample);
231
+ }
232
+ const BINARY_SIGNATURES = [
233
+ [0x25, 0x50, 0x44, 0x46],
234
+ [0x89, 0x50, 0x4e, 0x47],
235
+ [0x47, 0x49, 0x46, 0x38],
236
+ [0xff, 0xd8, 0xff],
237
+ [0x52, 0x49, 0x46, 0x46],
238
+ [0x42, 0x4d],
239
+ [0x49, 0x49, 0x2a, 0x00],
240
+ [0x4d, 0x4d, 0x00, 0x2a],
241
+ [0x00, 0x00, 0x01, 0x00],
242
+ [0x50, 0x4b, 0x03, 0x04],
243
+ [0x1f, 0x8b],
244
+ [0x42, 0x5a, 0x68],
245
+ [0x52, 0x61, 0x72, 0x21],
246
+ [0x37, 0x7a, 0xbc, 0xaf],
247
+ [0x7f, 0x45, 0x4c, 0x46],
248
+ [0x4d, 0x5a],
249
+ [0xcf, 0xfa, 0xed, 0xfe],
250
+ [0x00, 0x61, 0x73, 0x6d],
251
+ [0x1a, 0x45, 0xdf, 0xa3],
252
+ [0x66, 0x74, 0x79, 0x70],
253
+ [0x46, 0x4c, 0x56],
254
+ [0x49, 0x44, 0x33],
255
+ [0xff, 0xfb],
256
+ [0xff, 0xfa],
257
+ [0x4f, 0x67, 0x67, 0x53],
258
+ [0x66, 0x4c, 0x61, 0x43],
259
+ [0x4d, 0x54, 0x68, 0x64],
260
+ [0x77, 0x4f, 0x46, 0x46],
261
+ [0x00, 0x01, 0x00, 0x00],
262
+ [0x4f, 0x54, 0x54, 0x4f],
263
+ [0x53, 0x51, 0x4c, 0x69],
264
+ ];
265
+ function hasNullByte(buffer, limit) {
266
+ const checkLen = Math.min(buffer.length, limit);
267
+ return buffer.subarray(0, checkLen).includes(0x00);
268
+ }
269
+ function isBinaryContent(buffer, encoding) {
270
+ for (const signature of BINARY_SIGNATURES) {
271
+ if (startsWithBytes(buffer, signature))
272
+ return true;
534
273
  }
274
+ return !isUnicodeWideEncoding(encoding) && hasNullByte(buffer, 1000);
535
275
  }
536
276
  function parseRetryAfter(header) {
537
277
  if (!header)
@@ -619,221 +359,93 @@ function mapFetchError(error, fallbackUrl, timeoutMs) {
619
359
  }
620
360
  return createFetchError({ kind: 'network', message: error.message }, url);
621
361
  }
622
- const fetchChannel = diagnosticsChannel.channel('fetch-url-mcp.fetch');
623
- const SLOW_REQUEST_THRESHOLD_MS = 5000;
624
- class FetchTelemetry {
625
- logger;
626
- context;
627
- redactor;
628
- constructor(logger, context, redactor) {
629
- this.logger = logger;
630
- this.context = context;
631
- this.redactor = redactor;
362
+ const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
363
+ function isRedirectStatus(status) {
364
+ return REDIRECT_STATUSES.has(status);
365
+ }
366
+ function cancelResponseBody(response) {
367
+ const cancelPromise = response.body?.cancel();
368
+ if (!cancelPromise)
369
+ return;
370
+ void cancelPromise.catch(() => undefined);
371
+ }
372
+ class MaxBytesError extends Error {
373
+ constructor() {
374
+ super('max-bytes-reached');
632
375
  }
633
- redact(url) {
634
- return this.redactor.redact(url);
376
+ }
377
+ class RedirectFollower {
378
+ fetchFn;
379
+ normalizeUrl;
380
+ preflight;
381
+ constructor(fetchFn, normalizeUrl, preflight) {
382
+ this.fetchFn = fetchFn;
383
+ this.normalizeUrl = normalizeUrl;
384
+ this.preflight = preflight;
635
385
  }
636
- contextFields(ctx) {
386
+ async fetchWithRedirects(url, init, maxRedirects) {
387
+ let currentUrl = url;
388
+ const redirectLimit = Math.max(0, maxRedirects);
389
+ for (let redirectCount = 0; redirectCount <= redirectLimit; redirectCount += 1) {
390
+ const { response, nextUrl } = await this.withRedirectErrorContext(currentUrl, async () => {
391
+ let ipAddress;
392
+ if (this.preflight) {
393
+ ipAddress = await this.preflight(currentUrl, init.signal ?? undefined);
394
+ }
395
+ return this.performFetchCycle(currentUrl, init, redirectLimit, redirectCount, ipAddress);
396
+ });
397
+ if (!nextUrl)
398
+ return { response, url: currentUrl };
399
+ currentUrl = nextUrl;
400
+ }
401
+ throw createFetchError({ kind: 'too-many-redirects' }, currentUrl);
402
+ }
403
+ async performFetchCycle(currentUrl, init, redirectLimit, redirectCount, ipAddress) {
404
+ const fetchInit = {
405
+ ...init,
406
+ redirect: 'manual',
407
+ };
408
+ if (ipAddress) {
409
+ const ca = tls.rootCertificates.length > 0 ? tls.rootCertificates : undefined;
410
+ const agent = new Agent({
411
+ connect: {
412
+ lookup: (hostname, options, callback) => {
413
+ const family = isIP(ipAddress) === 6 ? 6 : 4;
414
+ if (options.all) {
415
+ callback(null, [{ address: ipAddress, family }]);
416
+ }
417
+ else {
418
+ callback(null, ipAddress, family);
419
+ }
420
+ },
421
+ timeout: config.fetcher.timeout,
422
+ ...(ca ? { ca } : {}),
423
+ },
424
+ pipelining: 1,
425
+ connections: 1,
426
+ keepAliveTimeout: 1000,
427
+ keepAliveMaxTimeout: 1000,
428
+ });
429
+ fetchInit.dispatcher = agent;
430
+ }
431
+ const response = await this.fetchFn(currentUrl, fetchInit);
432
+ if (!isRedirectStatus(response.status))
433
+ return { response };
434
+ if (redirectCount >= redirectLimit) {
435
+ cancelResponseBody(response);
436
+ throw createFetchError({ kind: 'too-many-redirects' }, currentUrl);
437
+ }
438
+ const location = this.getRedirectLocation(response, currentUrl);
439
+ cancelResponseBody(response);
440
+ const nextUrl = this.resolveRedirectTarget(currentUrl, location);
441
+ const parsedNextUrl = new URL(nextUrl);
442
+ if (parsedNextUrl.protocol !== 'http:' &&
443
+ parsedNextUrl.protocol !== 'https:') {
444
+ throw createErrorWithCode(`Unsupported redirect protocol: ${parsedNextUrl.protocol}`, 'EUNSUPPORTEDPROTOCOL');
445
+ }
637
446
  return {
638
- ...(ctx.contextRequestId
639
- ? { contextRequestId: ctx.contextRequestId }
640
- : {}),
641
- ...(ctx.operationId ? { operationId: ctx.operationId } : {}),
642
- };
643
- }
644
- start(url, method) {
645
- const safeUrl = this.redactor.redact(url);
646
- const contextRequestId = this.context.getRequestId();
647
- const operationId = this.context.getOperationId();
648
- const ctx = {
649
- requestId: randomUUID(),
650
- startTime: performance.now(),
651
- url: safeUrl,
652
- method: method.toUpperCase(),
653
- };
654
- if (contextRequestId)
655
- ctx.contextRequestId = contextRequestId;
656
- if (operationId)
657
- ctx.operationId = operationId;
658
- const ctxFields = this.contextFields(ctx);
659
- this.publish({
660
- v: 1,
661
- type: 'start',
662
- requestId: ctx.requestId,
663
- method: ctx.method,
664
- url: ctx.url,
665
- ...ctxFields,
666
- });
667
- this.logger.debug('HTTP Request', {
668
- requestId: ctx.requestId,
669
- method: ctx.method,
670
- url: ctx.url,
671
- ...ctxFields,
672
- });
673
- return ctx;
674
- }
675
- recordResponse(context, response, contentSize) {
676
- const duration = performance.now() - context.startTime;
677
- const durationLabel = `${Math.round(duration)}ms`;
678
- const ctxFields = this.contextFields(context);
679
- this.publish({
680
- v: 1,
681
- type: 'end',
682
- requestId: context.requestId,
683
- status: response.status,
684
- duration,
685
- ...ctxFields,
686
- });
687
- const contentType = response.headers.get('content-type') ?? undefined;
688
- const contentLengthHeader = response.headers.get('content-length');
689
- const size = contentLengthHeader ??
690
- (contentSize === undefined ? undefined : String(contentSize));
691
- this.logger.debug('HTTP Response', {
692
- requestId: context.requestId,
693
- status: response.status,
694
- url: context.url,
695
- duration: durationLabel,
696
- ...ctxFields,
697
- ...(contentType ? { contentType } : {}),
698
- ...(size ? { size } : {}),
699
- });
700
- if (duration > SLOW_REQUEST_THRESHOLD_MS) {
701
- this.logger.warn('Slow HTTP request detected', {
702
- requestId: context.requestId,
703
- url: context.url,
704
- duration: durationLabel,
705
- ...ctxFields,
706
- });
707
- }
708
- }
709
- recordError(context, error, status) {
710
- const duration = performance.now() - context.startTime;
711
- const err = toError(error);
712
- const code = isSystemError(err) ? err.code : undefined;
713
- const ctxFields = this.contextFields(context);
714
- this.publish({
715
- v: 1,
716
- type: 'error',
717
- requestId: context.requestId,
718
- url: context.url,
719
- error: err.message,
720
- duration,
721
- ...(code !== undefined ? { code } : {}),
722
- ...(status !== undefined ? { status } : {}),
723
- ...ctxFields,
724
- });
725
- const logData = {
726
- requestId: context.requestId,
727
- url: context.url,
728
- status,
729
- code,
730
- error: err.message,
731
- ...ctxFields,
732
- };
733
- if (status === 429) {
734
- this.logger.warn('HTTP Request Error', logData);
735
- return;
736
- }
737
- this.logger.error('HTTP Request Error', logData);
738
- }
739
- publish(event) {
740
- if (!fetchChannel.hasSubscribers)
741
- return;
742
- try {
743
- fetchChannel.publish(event);
744
- }
745
- catch {
746
- // Best-effort telemetry; never crash request path.
747
- }
748
- }
749
- }
750
- const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
751
- function isRedirectStatus(status) {
752
- return REDIRECT_STATUSES.has(status);
753
- }
754
- function cancelResponseBody(response) {
755
- const cancelPromise = response.body?.cancel();
756
- if (!cancelPromise)
757
- return;
758
- void cancelPromise.catch(() => undefined);
759
- }
760
- class MaxBytesError extends Error {
761
- constructor() {
762
- super('max-bytes-reached');
763
- }
764
- }
765
- class RedirectFollower {
766
- fetchFn;
767
- normalizeUrl;
768
- preflight;
769
- constructor(fetchFn, normalizeUrl, preflight) {
770
- this.fetchFn = fetchFn;
771
- this.normalizeUrl = normalizeUrl;
772
- this.preflight = preflight;
773
- }
774
- async fetchWithRedirects(url, init, maxRedirects) {
775
- let currentUrl = url;
776
- const redirectLimit = Math.max(0, maxRedirects);
777
- for (let redirectCount = 0; redirectCount <= redirectLimit; redirectCount += 1) {
778
- const { response, nextUrl } = await this.withRedirectErrorContext(currentUrl, async () => {
779
- let ipAddress;
780
- if (this.preflight) {
781
- ipAddress = await this.preflight(currentUrl, init.signal ?? undefined);
782
- }
783
- return this.performFetchCycle(currentUrl, init, redirectLimit, redirectCount, ipAddress);
784
- });
785
- if (!nextUrl)
786
- return { response, url: currentUrl };
787
- currentUrl = nextUrl;
788
- }
789
- throw createFetchError({ kind: 'too-many-redirects' }, currentUrl);
790
- }
791
- async performFetchCycle(currentUrl, init, redirectLimit, redirectCount, ipAddress) {
792
- const fetchInit = {
793
- ...init,
794
- redirect: 'manual',
795
- };
796
- if (ipAddress) {
797
- const ca = tls.rootCertificates.length > 0 ? tls.rootCertificates : undefined;
798
- const agent = new Agent({
799
- connect: {
800
- lookup: (hostname, options, callback) => {
801
- const family = isIP(ipAddress) === 6 ? 6 : 4;
802
- if (options.all) {
803
- callback(null, [{ address: ipAddress, family }]);
804
- }
805
- else {
806
- callback(null, ipAddress, family);
807
- }
808
- },
809
- timeout: config.fetcher.timeout,
810
- ...(ca ? { ca } : {}),
811
- },
812
- pipelining: 1,
813
- connections: 1,
814
- keepAliveTimeout: 1000,
815
- keepAliveMaxTimeout: 1000,
816
- });
817
- fetchInit.dispatcher = agent;
818
- }
819
- const response = await this.fetchFn(currentUrl, fetchInit);
820
- if (!isRedirectStatus(response.status))
821
- return { response };
822
- if (redirectCount >= redirectLimit) {
823
- cancelResponseBody(response);
824
- throw createFetchError({ kind: 'too-many-redirects' }, currentUrl);
825
- }
826
- const location = this.getRedirectLocation(response, currentUrl);
827
- cancelResponseBody(response);
828
- const nextUrl = this.resolveRedirectTarget(currentUrl, location);
829
- const parsedNextUrl = new URL(nextUrl);
830
- if (parsedNextUrl.protocol !== 'http:' &&
831
- parsedNextUrl.protocol !== 'https:') {
832
- throw createErrorWithCode(`Unsupported redirect protocol: ${parsedNextUrl.protocol}`, 'EUNSUPPORTEDPROTOCOL');
833
- }
834
- return {
835
- response,
836
- nextUrl,
447
+ response,
448
+ nextUrl,
837
449
  };
838
450
  }
839
451
  getRedirectLocation(response, currentUrl) {
@@ -871,168 +483,6 @@ class RedirectFollower {
871
483
  }
872
484
  }
873
485
  }
874
- class ResponseTextReader {
875
- async read(response, url, maxBytes, signal, encoding) {
876
- const { buffer, encoding: effectiveEncoding, truncated, } = await this.readBuffer(response, url, maxBytes, signal, encoding);
877
- const text = decodeBuffer(buffer, effectiveEncoding);
878
- return { text, size: buffer.byteLength, truncated };
879
- }
880
- async readBuffer(response, url, maxBytes, signal, encoding) {
881
- if (signal?.aborted) {
882
- cancelResponseBody(response);
883
- throw createFetchError({ kind: 'aborted' }, url);
884
- }
885
- if (!response.body) {
886
- return this.readNonStreamBuffer(response, url, maxBytes, signal, encoding);
887
- }
888
- return this.readStreamToBuffer(response.body, url, maxBytes, signal, encoding);
889
- }
890
- async readNonStreamBuffer(response, url, maxBytes, signal, encoding) {
891
- if (signal?.aborted)
892
- throw createFetchError({ kind: 'canceled' }, url);
893
- const limit = maxBytes <= 0 ? Number.POSITIVE_INFINITY : maxBytes;
894
- let buffer;
895
- let truncated = false;
896
- try {
897
- // Try safe blob slicing if available (Node 18+) to avoid OOM
898
- const blob = await response.blob();
899
- if (Number.isFinite(limit) && blob.size > limit) {
900
- const sliced = blob.slice(0, limit);
901
- buffer = new Uint8Array(await sliced.arrayBuffer());
902
- truncated = true;
903
- }
904
- else {
905
- buffer = new Uint8Array(await blob.arrayBuffer());
906
- }
907
- }
908
- catch {
909
- // Fallback if blob() fails
910
- const arrayBuffer = await response.arrayBuffer();
911
- const length = Math.min(arrayBuffer.byteLength, limit);
912
- buffer = new Uint8Array(arrayBuffer, 0, length);
913
- truncated = Number.isFinite(limit) && arrayBuffer.byteLength > limit;
914
- }
915
- const effectiveEncoding = resolveEncoding(encoding, buffer) ?? encoding ?? 'utf-8';
916
- if (isBinaryContent(buffer, effectiveEncoding)) {
917
- throw new FetchError('Detailed content type check failed: binary content detected', url, 500, { reason: 'binary_content_detected' });
918
- }
919
- return {
920
- buffer,
921
- encoding: effectiveEncoding,
922
- size: buffer.byteLength,
923
- truncated,
924
- };
925
- }
926
- async readStreamToBuffer(stream, url, maxBytes, signal, encoding) {
927
- const byteLimit = maxBytes <= 0 ? Number.POSITIVE_INFINITY : maxBytes;
928
- const captureChunks = byteLimit !== Number.POSITIVE_INFINITY;
929
- let effectiveEncoding = encoding ?? 'utf-8';
930
- let encodingResolved = false;
931
- let total = 0;
932
- const chunks = [];
933
- const source = Readable.fromWeb(toNodeReadableStream(stream, url, 'response:read-stream-buffer'));
934
- const guard = new Transform({
935
- transform(chunk, _encoding, callback) {
936
- try {
937
- const buf = Buffer.isBuffer(chunk)
938
- ? chunk
939
- : Buffer.from(chunk.buffer, chunk.byteOffset, chunk.byteLength);
940
- if (!encodingResolved) {
941
- encodingResolved = true;
942
- effectiveEncoding =
943
- resolveEncoding(encoding, buf) ?? encoding ?? 'utf-8';
944
- }
945
- if (isBinaryContent(buf, effectiveEncoding)) {
946
- callback(new FetchError('Detailed content type check failed: binary content detected', url, 500, { reason: 'binary_content_detected' }));
947
- return;
948
- }
949
- const newTotal = total + buf.length;
950
- if (newTotal > byteLimit) {
951
- const remaining = byteLimit - total;
952
- if (remaining > 0) {
953
- const slice = buf.subarray(0, remaining);
954
- total += remaining;
955
- if (captureChunks)
956
- chunks.push(slice);
957
- this.push(slice);
958
- }
959
- callback(new MaxBytesError());
960
- return;
961
- }
962
- total = newTotal;
963
- if (captureChunks)
964
- chunks.push(buf);
965
- callback(null, buf);
966
- }
967
- catch (error) {
968
- callback(toError(error));
969
- }
970
- },
971
- });
972
- const guarded = source.pipe(guard);
973
- const abortHandler = () => {
974
- source.destroy();
975
- guard.destroy();
976
- };
977
- if (signal) {
978
- signal.addEventListener('abort', abortHandler, { once: true });
979
- }
980
- try {
981
- const buffer = await consumeBuffer(guarded);
982
- return {
983
- buffer,
984
- encoding: effectiveEncoding,
985
- size: total,
986
- truncated: false,
987
- };
988
- }
989
- catch (error) {
990
- if (signal?.aborted)
991
- throw createFetchError({ kind: 'aborted' }, url);
992
- if (error instanceof FetchError)
993
- throw error;
994
- if (error instanceof MaxBytesError) {
995
- source.destroy();
996
- guard.destroy();
997
- return {
998
- buffer: Buffer.concat(chunks, total),
999
- encoding: effectiveEncoding,
1000
- size: total,
1001
- truncated: true,
1002
- };
1003
- }
1004
- throw error;
1005
- }
1006
- finally {
1007
- if (signal) {
1008
- signal.removeEventListener('abort', abortHandler);
1009
- }
1010
- }
1011
- }
1012
- }
1013
- const DEFAULT_HEADERS = {
1014
- 'User-Agent': config.fetcher.userAgent,
1015
- Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
1016
- 'Accept-Language': 'en-US,en;q=0.5',
1017
- 'Accept-Encoding': 'gzip, deflate, br',
1018
- Connection: 'keep-alive',
1019
- };
1020
- function buildHeaders() {
1021
- return DEFAULT_HEADERS;
1022
- }
1023
- function buildRequestSignal(timeoutMs, external) {
1024
- if (timeoutMs <= 0)
1025
- return external;
1026
- const timeoutSignal = AbortSignal.timeout(timeoutMs);
1027
- return external ? AbortSignal.any([external, timeoutSignal]) : timeoutSignal;
1028
- }
1029
- function buildRequestInit(headers, signal) {
1030
- return {
1031
- method: 'GET',
1032
- headers,
1033
- ...(signal ? { signal } : {}),
1034
- };
1035
- }
1036
486
  function resolveResponseError(response, finalUrl) {
1037
487
  if (response.status === 429) {
1038
488
  return createFetchError({ kind: 'rate-limited', retryAfter: response.headers.get('retry-after') }, finalUrl);
@@ -1199,61 +649,200 @@ async function decodeResponseIfNeeded(response, url, signal) {
1199
649
  for (const decompressor of decompressors) {
1200
650
  decompressor.destroy();
1201
651
  }
1202
- decodedNodeStream.destroy();
1203
- };
1204
- if (signal) {
1205
- signal.addEventListener('abort', abortDecodePipeline, { once: true });
1206
- }
1207
- void decodedPipeline.catch((error) => {
1208
- decodedNodeStream.destroy(toError(error));
1209
- });
1210
- const decodedBodyStream = toWebReadableStream(decodedNodeStream, url, 'response:decode-content-encoding');
1211
- const decodedReader = decodedBodyStream.getReader();
1212
- const clearAbortListener = () => {
1213
- if (!signal)
1214
- return;
1215
- signal.removeEventListener('abort', abortDecodePipeline);
1216
- };
1217
- try {
1218
- const first = await decodedReader.read();
1219
- if (first.done) {
1220
- clearAbortListener();
1221
- void passthroughBranch.cancel().catch(() => undefined);
1222
- return new Response(null, {
1223
- status: response.status,
1224
- statusText: response.statusText,
1225
- headers,
1226
- });
652
+ decodedNodeStream.destroy();
653
+ };
654
+ if (signal) {
655
+ signal.addEventListener('abort', abortDecodePipeline, { once: true });
656
+ }
657
+ void decodedPipeline.catch((error) => {
658
+ decodedNodeStream.destroy(toError(error));
659
+ });
660
+ const decodedBodyStream = toWebReadableStream(decodedNodeStream, url, 'response:decode-content-encoding');
661
+ const decodedReader = decodedBodyStream.getReader();
662
+ const clearAbortListener = () => {
663
+ if (!signal)
664
+ return;
665
+ signal.removeEventListener('abort', abortDecodePipeline);
666
+ };
667
+ try {
668
+ const first = await decodedReader.read();
669
+ if (first.done) {
670
+ clearAbortListener();
671
+ void passthroughBranch.cancel().catch(() => undefined);
672
+ return new Response(null, {
673
+ status: response.status,
674
+ statusText: response.statusText,
675
+ headers,
676
+ });
677
+ }
678
+ void passthroughBranch.cancel().catch(() => undefined);
679
+ const body = createPumpedStream(first.value, decodedReader);
680
+ if (signal) {
681
+ void finished(decodedNodeStream, { cleanup: true })
682
+ .catch(() => { })
683
+ .finally(() => {
684
+ clearAbortListener();
685
+ });
686
+ }
687
+ return new Response(body, {
688
+ status: response.status,
689
+ statusText: response.statusText,
690
+ headers,
691
+ });
692
+ }
693
+ catch (error) {
694
+ clearAbortListener();
695
+ abortDecodePipeline();
696
+ void decodedReader.cancel(error).catch(() => undefined);
697
+ logDebug('Content-Encoding decode failed; using passthrough body', {
698
+ url: redactUrl(url),
699
+ encoding: encodingHeader ?? encodings.join(','),
700
+ error: isError(error) ? error.message : String(error),
701
+ });
702
+ return new Response(passthroughBranch, {
703
+ status: response.status,
704
+ statusText: response.statusText,
705
+ headers,
706
+ });
707
+ }
708
+ }
709
+ class ResponseTextReader {
710
+ async read(response, url, maxBytes, signal, encoding) {
711
+ const { buffer, encoding: effectiveEncoding, truncated, } = await this.readBuffer(response, url, maxBytes, signal, encoding);
712
+ const text = decodeBuffer(buffer, effectiveEncoding);
713
+ return { text, size: buffer.byteLength, truncated };
714
+ }
715
+ async readBuffer(response, url, maxBytes, signal, encoding) {
716
+ if (signal?.aborted) {
717
+ cancelResponseBody(response);
718
+ throw createFetchError({ kind: 'aborted' }, url);
719
+ }
720
+ if (!response.body) {
721
+ return this.readNonStreamBuffer(response, url, maxBytes, signal, encoding);
722
+ }
723
+ return this.readStreamToBuffer(response.body, url, maxBytes, signal, encoding);
724
+ }
725
+ async readNonStreamBuffer(response, url, maxBytes, signal, encoding) {
726
+ if (signal?.aborted)
727
+ throw createFetchError({ kind: 'canceled' }, url);
728
+ const limit = maxBytes <= 0 ? Number.POSITIVE_INFINITY : maxBytes;
729
+ let buffer;
730
+ let truncated = false;
731
+ try {
732
+ // Try safe blob slicing if available (Node 18+) to avoid OOM
733
+ const blob = await response.blob();
734
+ if (Number.isFinite(limit) && blob.size > limit) {
735
+ const sliced = blob.slice(0, limit);
736
+ buffer = new Uint8Array(await sliced.arrayBuffer());
737
+ truncated = true;
738
+ }
739
+ else {
740
+ buffer = new Uint8Array(await blob.arrayBuffer());
741
+ }
742
+ }
743
+ catch {
744
+ // Fallback if blob() fails
745
+ const arrayBuffer = await response.arrayBuffer();
746
+ const length = Math.min(arrayBuffer.byteLength, limit);
747
+ buffer = new Uint8Array(arrayBuffer, 0, length);
748
+ truncated = Number.isFinite(limit) && arrayBuffer.byteLength > limit;
749
+ }
750
+ const effectiveEncoding = resolveEncoding(encoding, buffer) ?? encoding ?? 'utf-8';
751
+ if (isBinaryContent(buffer, effectiveEncoding)) {
752
+ throw new FetchError('Detailed content type check failed: binary content detected', url, 500, { reason: 'binary_content_detected' });
753
+ }
754
+ return {
755
+ buffer,
756
+ encoding: effectiveEncoding,
757
+ size: buffer.byteLength,
758
+ truncated,
759
+ };
760
+ }
761
+ async readStreamToBuffer(stream, url, maxBytes, signal, encoding) {
762
+ const byteLimit = maxBytes <= 0 ? Number.POSITIVE_INFINITY : maxBytes;
763
+ const captureChunks = byteLimit !== Number.POSITIVE_INFINITY;
764
+ let effectiveEncoding = encoding ?? 'utf-8';
765
+ let encodingResolved = false;
766
+ let total = 0;
767
+ const chunks = [];
768
+ const source = Readable.fromWeb(toNodeReadableStream(stream, url, 'response:read-stream-buffer'));
769
+ const guard = new Transform({
770
+ transform(chunk, _encoding, callback) {
771
+ try {
772
+ const buf = Buffer.isBuffer(chunk)
773
+ ? chunk
774
+ : Buffer.from(chunk.buffer, chunk.byteOffset, chunk.byteLength);
775
+ if (!encodingResolved) {
776
+ encodingResolved = true;
777
+ effectiveEncoding =
778
+ resolveEncoding(encoding, buf) ?? encoding ?? 'utf-8';
779
+ }
780
+ if (isBinaryContent(buf, effectiveEncoding)) {
781
+ callback(new FetchError('Detailed content type check failed: binary content detected', url, 500, { reason: 'binary_content_detected' }));
782
+ return;
783
+ }
784
+ const newTotal = total + buf.length;
785
+ if (newTotal > byteLimit) {
786
+ const remaining = byteLimit - total;
787
+ if (remaining > 0) {
788
+ const slice = buf.subarray(0, remaining);
789
+ total += remaining;
790
+ if (captureChunks)
791
+ chunks.push(slice);
792
+ this.push(slice);
793
+ }
794
+ callback(new MaxBytesError());
795
+ return;
796
+ }
797
+ total = newTotal;
798
+ if (captureChunks)
799
+ chunks.push(buf);
800
+ callback(null, buf);
801
+ }
802
+ catch (error) {
803
+ callback(toError(error));
804
+ }
805
+ },
806
+ });
807
+ const guarded = source.pipe(guard);
808
+ const abortHandler = () => {
809
+ source.destroy();
810
+ guard.destroy();
811
+ };
812
+ if (signal) {
813
+ signal.addEventListener('abort', abortHandler, { once: true });
814
+ }
815
+ try {
816
+ const buffer = await consumeBuffer(guarded);
817
+ return {
818
+ buffer,
819
+ encoding: effectiveEncoding,
820
+ size: total,
821
+ truncated: false,
822
+ };
1227
823
  }
1228
- void passthroughBranch.cancel().catch(() => undefined);
1229
- const body = createPumpedStream(first.value, decodedReader);
1230
- if (signal) {
1231
- void finished(decodedNodeStream, { cleanup: true })
1232
- .catch(() => { })
1233
- .finally(() => {
1234
- clearAbortListener();
1235
- });
824
+ catch (error) {
825
+ if (signal?.aborted)
826
+ throw createFetchError({ kind: 'aborted' }, url);
827
+ if (error instanceof FetchError)
828
+ throw error;
829
+ if (error instanceof MaxBytesError) {
830
+ source.destroy();
831
+ guard.destroy();
832
+ return {
833
+ buffer: Buffer.concat(chunks, total),
834
+ encoding: effectiveEncoding,
835
+ size: total,
836
+ truncated: true,
837
+ };
838
+ }
839
+ throw error;
840
+ }
841
+ finally {
842
+ if (signal) {
843
+ signal.removeEventListener('abort', abortHandler);
844
+ }
1236
845
  }
1237
- return new Response(body, {
1238
- status: response.status,
1239
- statusText: response.statusText,
1240
- headers,
1241
- });
1242
- }
1243
- catch (error) {
1244
- clearAbortListener();
1245
- abortDecodePipeline();
1246
- void decodedReader.cancel(error).catch(() => undefined);
1247
- logDebug('Content-Encoding decode failed; using passthrough body', {
1248
- url: redactUrl(url),
1249
- encoding: encodingHeader ?? encodings.join(','),
1250
- error: isError(error) ? error.message : String(error),
1251
- });
1252
- return new Response(passthroughBranch, {
1253
- status: response.status,
1254
- statusText: response.statusText,
1255
- headers,
1256
- });
1257
846
  }
1258
847
  }
1259
848
  async function readAndRecordDecodedResponse(response, finalUrl, ctx, telemetry, reader, maxBytes, mode, signal) {
@@ -1275,20 +864,172 @@ async function readAndRecordDecodedResponse(response, finalUrl, ctx, telemetry,
1275
864
  telemetry.recordResponse(ctx, decodedResponse, size);
1276
865
  return { kind: 'buffer', buffer, encoding, size, truncated };
1277
866
  }
1278
- function extractHostname(url) {
1279
- try {
1280
- return new URL(url).hostname;
867
+ function isReadableStreamLike(value) {
868
+ if (!isObject(value))
869
+ return false;
870
+ return (typeof value['getReader'] === 'function' &&
871
+ typeof value['cancel'] === 'function' &&
872
+ typeof value['tee'] === 'function' &&
873
+ typeof value['locked'] === 'boolean');
874
+ }
875
+ function assertReadableStreamLike(stream, url, stage) {
876
+ if (isReadableStreamLike(stream))
877
+ return;
878
+ throw new FetchError('Invalid response stream', url, 500, {
879
+ reason: 'invalid_stream',
880
+ stage,
881
+ });
882
+ }
883
+ function toNodeReadableStream(stream, url, stage) {
884
+ assertReadableStreamLike(stream, url, stage);
885
+ return stream;
886
+ }
887
+ function toWebReadableStream(stream, url, stage) {
888
+ const converted = Readable.toWeb(stream);
889
+ assertReadableStreamLike(converted, url, stage);
890
+ return converted;
891
+ }
892
+ const fetchChannel = diagnosticsChannel.channel('fetch-url-mcp.fetch');
893
+ const SLOW_REQUEST_THRESHOLD_MS = 5000;
894
+ class FetchTelemetry {
895
+ logger;
896
+ context;
897
+ redactor;
898
+ constructor(logger, context, redactor) {
899
+ this.logger = logger;
900
+ this.context = context;
901
+ this.redactor = redactor;
1281
902
  }
1282
- catch {
1283
- throw createErrorWithCode('Invalid URL', 'EINVAL');
903
+ redact(url) {
904
+ return this.redactor.redact(url);
905
+ }
906
+ contextFields(ctx) {
907
+ return {
908
+ ...(ctx.contextRequestId
909
+ ? { contextRequestId: ctx.contextRequestId }
910
+ : {}),
911
+ ...(ctx.operationId ? { operationId: ctx.operationId } : {}),
912
+ };
913
+ }
914
+ start(url, method) {
915
+ const safeUrl = this.redactor.redact(url);
916
+ const contextRequestId = this.context.getRequestId();
917
+ const operationId = this.context.getOperationId();
918
+ const ctx = {
919
+ requestId: randomUUID(),
920
+ startTime: performance.now(),
921
+ url: safeUrl,
922
+ method: method.toUpperCase(),
923
+ };
924
+ if (contextRequestId)
925
+ ctx.contextRequestId = contextRequestId;
926
+ if (operationId)
927
+ ctx.operationId = operationId;
928
+ const ctxFields = this.contextFields(ctx);
929
+ this.publish({
930
+ v: 1,
931
+ type: 'start',
932
+ requestId: ctx.requestId,
933
+ method: ctx.method,
934
+ url: ctx.url,
935
+ ...ctxFields,
936
+ });
937
+ this.logger.debug('HTTP Request', {
938
+ requestId: ctx.requestId,
939
+ method: ctx.method,
940
+ url: ctx.url,
941
+ ...ctxFields,
942
+ });
943
+ return ctx;
944
+ }
945
+ recordResponse(context, response, contentSize) {
946
+ const duration = performance.now() - context.startTime;
947
+ const durationLabel = `${Math.round(duration)}ms`;
948
+ const ctxFields = this.contextFields(context);
949
+ this.publish({
950
+ v: 1,
951
+ type: 'end',
952
+ requestId: context.requestId,
953
+ status: response.status,
954
+ duration,
955
+ ...ctxFields,
956
+ });
957
+ const contentType = response.headers.get('content-type') ?? undefined;
958
+ const contentLengthHeader = response.headers.get('content-length');
959
+ const size = contentLengthHeader ??
960
+ (contentSize === undefined ? undefined : String(contentSize));
961
+ this.logger.debug('HTTP Response', {
962
+ requestId: context.requestId,
963
+ status: response.status,
964
+ url: context.url,
965
+ duration: durationLabel,
966
+ ...ctxFields,
967
+ ...(contentType ? { contentType } : {}),
968
+ ...(size ? { size } : {}),
969
+ });
970
+ if (duration > SLOW_REQUEST_THRESHOLD_MS) {
971
+ this.logger.warn('Slow HTTP request detected', {
972
+ requestId: context.requestId,
973
+ url: context.url,
974
+ duration: durationLabel,
975
+ ...ctxFields,
976
+ });
977
+ }
978
+ }
979
+ recordError(context, error, status) {
980
+ const duration = performance.now() - context.startTime;
981
+ const err = toError(error);
982
+ const code = isSystemError(err) ? err.code : undefined;
983
+ const ctxFields = this.contextFields(context);
984
+ this.publish({
985
+ v: 1,
986
+ type: 'error',
987
+ requestId: context.requestId,
988
+ url: context.url,
989
+ error: err.message,
990
+ duration,
991
+ ...(code !== undefined ? { code } : {}),
992
+ ...(status !== undefined ? { status } : {}),
993
+ ...ctxFields,
994
+ });
995
+ const logData = {
996
+ requestId: context.requestId,
997
+ url: context.url,
998
+ status,
999
+ code,
1000
+ error: err.message,
1001
+ ...ctxFields,
1002
+ };
1003
+ if (status === 429) {
1004
+ this.logger.warn('HTTP Request Error', logData);
1005
+ return;
1006
+ }
1007
+ this.logger.error('HTTP Request Error', logData);
1008
+ }
1009
+ publish(event) {
1010
+ if (!fetchChannel.hasSubscribers)
1011
+ return;
1012
+ try {
1013
+ fetchChannel.publish(event);
1014
+ }
1015
+ catch {
1016
+ // Best-effort telemetry; never crash request path.
1017
+ }
1284
1018
  }
1285
1019
  }
1286
- function createDnsPreflight(dnsResolver) {
1287
- return async (url, signal) => {
1288
- const hostname = extractHostname(url);
1289
- return await dnsResolver.resolveAndValidate(hostname, signal);
1290
- };
1291
- }
1020
+ const defaultLogger = {
1021
+ debug: logDebug,
1022
+ warn: logWarn,
1023
+ error: logError,
1024
+ };
1025
+ const defaultContext = {
1026
+ getRequestId,
1027
+ getOperationId,
1028
+ };
1029
+ const defaultRedactor = {
1030
+ redact: redactUrl,
1031
+ };
1032
+ const defaultFetch = (input, init) => globalThis.fetch(input, init);
1292
1033
  class HttpFetcher {
1293
1034
  fetcherConfig;
1294
1035
  redirectFollower;
@@ -1342,6 +1083,29 @@ class HttpFetcher {
1342
1083
  }
1343
1084
  }
1344
1085
  }
1086
+ const DEFAULT_HEADERS = {
1087
+ 'User-Agent': config.fetcher.userAgent,
1088
+ Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
1089
+ 'Accept-Language': 'en-US,en;q=0.5',
1090
+ 'Accept-Encoding': 'gzip, deflate, br',
1091
+ Connection: 'keep-alive',
1092
+ };
1093
+ function buildHeaders() {
1094
+ return DEFAULT_HEADERS;
1095
+ }
1096
+ function buildRequestSignal(timeoutMs, external) {
1097
+ if (timeoutMs <= 0)
1098
+ return external;
1099
+ const timeoutSignal = AbortSignal.timeout(timeoutMs);
1100
+ return external ? AbortSignal.any([external, timeoutSignal]) : timeoutSignal;
1101
+ }
1102
+ function buildRequestInit(headers, signal) {
1103
+ return {
1104
+ method: 'GET',
1105
+ headers,
1106
+ ...(signal ? { signal } : {}),
1107
+ };
1108
+ }
1345
1109
  const ipBlocker = new IpBlocker(config.security);
1346
1110
  const urlNormalizer = new UrlNormalizer(config.constants, config.security, ipBlocker, BLOCKED_HOST_SUFFIXES);
1347
1111
  const rawUrlTransformer = new RawUrlTransformer(defaultLogger);
@@ -1349,7 +1113,6 @@ const dnsResolver = new SafeDnsResolver(ipBlocker, config.security, BLOCKED_HOST
1349
1113
  const telemetry = new FetchTelemetry(defaultLogger, defaultContext, defaultRedactor);
1350
1114
  const normalizeRedirectUrl = (url) => urlNormalizer.validateAndNormalize(url);
1351
1115
  const dnsPreflight = createDnsPreflight(dnsResolver);
1352
- // Redirect follower with per-hop DNS preflight.
1353
1116
  const secureRedirectFollower = new RedirectFollower(defaultFetch, normalizeRedirectUrl, dnsPreflight);
1354
1117
  const responseReader = new ResponseTextReader();
1355
1118
  const httpFetcher = new HttpFetcher(config.fetcher, secureRedirectFollower, responseReader, telemetry);
@@ -1391,4 +1154,3 @@ export async function fetchNormalizedUrl(normalizedUrl, options) {
1391
1154
  export async function fetchNormalizedUrlBuffer(normalizedUrl, options) {
1392
1155
  return httpFetcher.fetchNormalizedUrlBuffer(normalizedUrl, options);
1393
1156
  }
1394
- //# sourceMappingURL=fetch.js.map