@j0hanz/fetch-url-mcp 1.3.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. package/README.md +24 -21
  2. package/dist/cli.d.ts +3 -3
  3. package/dist/cli.js +15 -8
  4. package/dist/http/auth.d.ts +6 -6
  5. package/dist/http/auth.js +78 -23
  6. package/dist/http/health.d.ts +1 -2
  7. package/dist/http/health.js +7 -18
  8. package/dist/http/helpers.d.ts +3 -11
  9. package/dist/http/helpers.js +28 -26
  10. package/dist/http/native.d.ts +0 -1
  11. package/dist/http/native.js +63 -41
  12. package/dist/http/rate-limit.d.ts +2 -2
  13. package/dist/http/rate-limit.js +11 -16
  14. package/dist/index.d.ts +0 -1
  15. package/dist/index.js +17 -20
  16. package/dist/{markdown-cleanup.d.ts → lib/content.d.ts} +4 -2
  17. package/dist/lib/content.js +1356 -0
  18. package/dist/lib/core.d.ts +253 -0
  19. package/dist/lib/core.js +1228 -0
  20. package/dist/{tool-pipeline.d.ts → lib/fetch-pipeline.d.ts} +1 -3
  21. package/dist/{tool-pipeline.js → lib/fetch-pipeline.js} +18 -44
  22. package/dist/{fetch.d.ts → lib/http.d.ts} +7 -9
  23. package/dist/{fetch.js → lib/http.js} +721 -1004
  24. package/dist/lib/mcp-tools.d.ts +28 -0
  25. package/dist/lib/mcp-tools.js +107 -0
  26. package/dist/{tool-progress.d.ts → lib/progress.d.ts} +0 -2
  27. package/dist/{tool-progress.js → lib/progress.js} +9 -14
  28. package/dist/lib/task-handlers.d.ts +5 -0
  29. package/dist/{mcp.js → lib/task-handlers.js} +95 -31
  30. package/dist/lib/url.d.ts +70 -0
  31. package/dist/lib/url.js +686 -0
  32. package/dist/lib/utils.d.ts +58 -0
  33. package/dist/lib/utils.js +304 -0
  34. package/dist/{prompts.d.ts → prompts/index.d.ts} +0 -1
  35. package/dist/{prompts.js → prompts/index.js} +1 -2
  36. package/dist/{resources.d.ts → resources/index.d.ts} +0 -1
  37. package/dist/{resources.js → resources/index.js} +87 -64
  38. package/dist/{instructions.d.ts → resources/instructions.d.ts} +0 -1
  39. package/dist/{instructions.js → resources/instructions.js} +5 -3
  40. package/dist/schemas/inputs.d.ts +7 -0
  41. package/dist/schemas/inputs.js +24 -0
  42. package/dist/schemas/outputs.d.ts +23 -0
  43. package/dist/schemas/outputs.js +77 -0
  44. package/dist/server.d.ts +0 -1
  45. package/dist/server.js +26 -25
  46. package/dist/tasks/execution.d.ts +0 -1
  47. package/dist/tasks/execution.js +106 -70
  48. package/dist/tasks/manager.d.ts +11 -3
  49. package/dist/tasks/manager.js +97 -73
  50. package/dist/tasks/owner.d.ts +3 -3
  51. package/dist/tasks/owner.js +2 -2
  52. package/dist/tasks/tool-registry.d.ts +11 -0
  53. package/dist/tasks/tool-registry.js +13 -0
  54. package/dist/tools/fetch-url.d.ts +28 -0
  55. package/dist/{tools.js → tools/fetch-url.js} +95 -147
  56. package/dist/tools/index.d.ts +2 -0
  57. package/dist/tools/index.js +4 -0
  58. package/dist/transform/html-translators.d.ts +1 -0
  59. package/dist/transform/html-translators.js +454 -0
  60. package/dist/transform/metadata.d.ts +4 -0
  61. package/dist/transform/metadata.js +183 -0
  62. package/dist/transform/transform.d.ts +0 -1
  63. package/dist/transform/transform.js +44 -679
  64. package/dist/transform/types.d.ts +9 -12
  65. package/dist/transform/types.js +0 -1
  66. package/dist/transform/worker-pool.d.ts +0 -1
  67. package/dist/transform/worker-pool.js +7 -16
  68. package/dist/transform/workers/shared.d.ts +7 -0
  69. package/dist/transform/workers/shared.js +130 -0
  70. package/dist/transform/workers/transform-child.d.ts +0 -1
  71. package/dist/transform/workers/transform-child.js +5 -135
  72. package/dist/transform/workers/transform-worker.d.ts +0 -1
  73. package/dist/transform/workers/transform-worker.js +7 -128
  74. package/package.json +11 -7
  75. package/dist/cache.d.ts +0 -54
  76. package/dist/cache.d.ts.map +0 -1
  77. package/dist/cache.js +0 -261
  78. package/dist/cache.js.map +0 -1
  79. package/dist/cli.d.ts.map +0 -1
  80. package/dist/cli.js.map +0 -1
  81. package/dist/config.d.ts +0 -141
  82. package/dist/config.d.ts.map +0 -1
  83. package/dist/config.js +0 -473
  84. package/dist/config.js.map +0 -1
  85. package/dist/crypto.d.ts +0 -4
  86. package/dist/crypto.d.ts.map +0 -1
  87. package/dist/crypto.js +0 -56
  88. package/dist/crypto.js.map +0 -1
  89. package/dist/dom-noise-removal.d.ts +0 -2
  90. package/dist/dom-noise-removal.d.ts.map +0 -1
  91. package/dist/dom-noise-removal.js +0 -494
  92. package/dist/dom-noise-removal.js.map +0 -1
  93. package/dist/download.d.ts +0 -4
  94. package/dist/download.d.ts.map +0 -1
  95. package/dist/download.js +0 -106
  96. package/dist/download.js.map +0 -1
  97. package/dist/errors.d.ts +0 -11
  98. package/dist/errors.d.ts.map +0 -1
  99. package/dist/errors.js +0 -65
  100. package/dist/errors.js.map +0 -1
  101. package/dist/examples/mcp-fetch-url-client.js +0 -329
  102. package/dist/examples/mcp-fetch-url-client.js.map +0 -1
  103. package/dist/fetch-content.d.ts +0 -5
  104. package/dist/fetch-content.d.ts.map +0 -1
  105. package/dist/fetch-content.js +0 -164
  106. package/dist/fetch-content.js.map +0 -1
  107. package/dist/fetch-stream.d.ts +0 -5
  108. package/dist/fetch-stream.d.ts.map +0 -1
  109. package/dist/fetch-stream.js +0 -29
  110. package/dist/fetch-stream.js.map +0 -1
  111. package/dist/fetch.d.ts.map +0 -1
  112. package/dist/fetch.js.map +0 -1
  113. package/dist/host-normalization.d.ts +0 -2
  114. package/dist/host-normalization.d.ts.map +0 -1
  115. package/dist/host-normalization.js +0 -91
  116. package/dist/host-normalization.js.map +0 -1
  117. package/dist/http/auth.d.ts.map +0 -1
  118. package/dist/http/auth.js.map +0 -1
  119. package/dist/http/health.d.ts.map +0 -1
  120. package/dist/http/health.js.map +0 -1
  121. package/dist/http/helpers.d.ts.map +0 -1
  122. package/dist/http/helpers.js.map +0 -1
  123. package/dist/http/native.d.ts.map +0 -1
  124. package/dist/http/native.js.map +0 -1
  125. package/dist/http/rate-limit.d.ts.map +0 -1
  126. package/dist/http/rate-limit.js.map +0 -1
  127. package/dist/index.d.ts.map +0 -1
  128. package/dist/index.js.map +0 -1
  129. package/dist/instructions.d.ts.map +0 -1
  130. package/dist/instructions.js.map +0 -1
  131. package/dist/ip-blocklist.d.ts +0 -9
  132. package/dist/ip-blocklist.d.ts.map +0 -1
  133. package/dist/ip-blocklist.js +0 -79
  134. package/dist/ip-blocklist.js.map +0 -1
  135. package/dist/json.d.ts +0 -2
  136. package/dist/json.d.ts.map +0 -1
  137. package/dist/json.js +0 -45
  138. package/dist/json.js.map +0 -1
  139. package/dist/language-detection.d.ts +0 -3
  140. package/dist/language-detection.d.ts.map +0 -1
  141. package/dist/language-detection.js +0 -355
  142. package/dist/language-detection.js.map +0 -1
  143. package/dist/markdown-cleanup.d.ts.map +0 -1
  144. package/dist/markdown-cleanup.js +0 -534
  145. package/dist/markdown-cleanup.js.map +0 -1
  146. package/dist/mcp-validator.d.ts +0 -17
  147. package/dist/mcp-validator.d.ts.map +0 -1
  148. package/dist/mcp-validator.js +0 -45
  149. package/dist/mcp-validator.js.map +0 -1
  150. package/dist/mcp.d.ts +0 -4
  151. package/dist/mcp.d.ts.map +0 -1
  152. package/dist/mcp.js.map +0 -1
  153. package/dist/observability.d.ts +0 -23
  154. package/dist/observability.d.ts.map +0 -1
  155. package/dist/observability.js +0 -238
  156. package/dist/observability.js.map +0 -1
  157. package/dist/prompts.d.ts.map +0 -1
  158. package/dist/prompts.js.map +0 -1
  159. package/dist/resources.d.ts.map +0 -1
  160. package/dist/resources.js.map +0 -1
  161. package/dist/server-tuning.d.ts +0 -15
  162. package/dist/server-tuning.d.ts.map +0 -1
  163. package/dist/server-tuning.js +0 -49
  164. package/dist/server-tuning.js.map +0 -1
  165. package/dist/server.d.ts.map +0 -1
  166. package/dist/server.js.map +0 -1
  167. package/dist/session.d.ts +0 -42
  168. package/dist/session.d.ts.map +0 -1
  169. package/dist/session.js +0 -255
  170. package/dist/session.js.map +0 -1
  171. package/dist/tasks/execution.d.ts.map +0 -1
  172. package/dist/tasks/execution.js.map +0 -1
  173. package/dist/tasks/manager.d.ts.map +0 -1
  174. package/dist/tasks/manager.js.map +0 -1
  175. package/dist/tasks/owner.d.ts.map +0 -1
  176. package/dist/tasks/owner.js.map +0 -1
  177. package/dist/timer-utils.d.ts +0 -6
  178. package/dist/timer-utils.d.ts.map +0 -1
  179. package/dist/timer-utils.js +0 -27
  180. package/dist/timer-utils.js.map +0 -1
  181. package/dist/tool-errors.d.ts +0 -12
  182. package/dist/tool-errors.d.ts.map +0 -1
  183. package/dist/tool-errors.js +0 -55
  184. package/dist/tool-errors.js.map +0 -1
  185. package/dist/tool-pipeline.d.ts.map +0 -1
  186. package/dist/tool-pipeline.js.map +0 -1
  187. package/dist/tool-progress.d.ts.map +0 -1
  188. package/dist/tool-progress.js.map +0 -1
  189. package/dist/tools.d.ts +0 -54
  190. package/dist/tools.d.ts.map +0 -1
  191. package/dist/tools.js.map +0 -1
  192. package/dist/transform/transform.d.ts.map +0 -1
  193. package/dist/transform/transform.js.map +0 -1
  194. package/dist/transform/types.d.ts.map +0 -1
  195. package/dist/transform/types.js.map +0 -1
  196. package/dist/transform/worker-pool.d.ts.map +0 -1
  197. package/dist/transform/worker-pool.js.map +0 -1
  198. package/dist/transform/workers/transform-child.d.ts.map +0 -1
  199. package/dist/transform/workers/transform-child.js.map +0 -1
  200. package/dist/transform/workers/transform-worker.d.ts.map +0 -1
  201. package/dist/transform/workers/transform-worker.js.map +0 -1
  202. package/dist/type-guards.d.ts +0 -16
  203. package/dist/type-guards.d.ts.map +0 -1
  204. package/dist/type-guards.js +0 -13
  205. package/dist/type-guards.js.map +0 -1
@@ -1,6 +1,5 @@
1
- import type { MarkdownTransformResult } from './transform/types.js';
1
+ import { type MarkdownTransformResult } from '../transform/types.js';
2
2
  type JsonRecord = Record<string, unknown>;
3
- export declare function readUnknown(obj: unknown, key: string): unknown;
4
3
  export declare function readString(obj: unknown, key: string): string | undefined;
5
4
  export declare function readNestedRecord(obj: unknown, keys: readonly string[]): JsonRecord | undefined;
6
5
  export declare function withSignal(signal?: AbortSignal): {
@@ -69,4 +68,3 @@ export declare function performSharedFetch(options: SharedFetchOptions, deps?: S
69
68
  inlineResult: InlineContentResult;
70
69
  }>;
71
70
  export {};
72
- //# sourceMappingURL=tool-pipeline.d.ts.map
@@ -1,15 +1,13 @@
1
1
  import { z } from 'zod';
2
- import * as cache from './cache.js';
3
- import { config } from './config.js';
4
- import { getErrorMessage } from './errors.js';
5
- import { fetchNormalizedUrlBuffer, normalizeUrl, transformToRawUrl, } from './fetch.js';
6
- import { logDebug, logWarn } from './observability.js';
7
- import { transformBufferToMarkdown } from './transform/transform.js';
8
- import { isObject } from './type-guards.js';
2
+ import { transformBufferToMarkdown } from '../transform/transform.js';
3
+ import {} from '../transform/types.js';
4
+ import { config, createCacheKey, get, isEnabled, logDebug, logWarn, set, } from './core.js';
5
+ import { fetchNormalizedUrlBuffer, normalizeUrl, transformToRawUrl, } from './http.js';
6
+ import { getErrorMessage, isObject } from './utils.js';
9
7
  function asRecord(value) {
10
8
  return isObject(value) ? value : undefined;
11
9
  }
12
- export function readUnknown(obj, key) {
10
+ function readUnknown(obj, key) {
13
11
  const record = asRecord(obj);
14
12
  return record ? record[key] : undefined;
15
13
  }
@@ -38,7 +36,7 @@ export function withSignal(signal) {
38
36
  return signal === undefined ? {} : { signal };
39
37
  }
40
38
  /* -------------------------------------------------------------------------------------------------
41
- * Inline content limiting
39
+ * Inline content truncation
42
40
  * ------------------------------------------------------------------------------------------------- */
43
41
  export const TRUNCATION_MARKER = '...[truncated]';
44
42
  function getOpenCodeFence(content) {
@@ -147,29 +145,22 @@ const inlineLimiter = new InlineContentLimiter();
147
145
  function applyInlineContentLimit(content, inlineLimitOverride) {
148
146
  return inlineLimiter.apply(content, inlineLimitOverride);
149
147
  }
150
- function createUrlResolution(params) {
151
- return {
152
- normalizedUrl: params.normalizedUrl,
153
- originalUrl: params.originalUrl,
154
- transformed: params.transformed,
155
- };
156
- }
157
148
  function resolveNormalizedUrl(url) {
158
149
  const { normalizedUrl: validatedUrl } = normalizeUrl(url);
159
150
  const transformedResult = transformToRawUrl(validatedUrl);
160
151
  if (!transformedResult.transformed) {
161
- return createUrlResolution({
152
+ return {
162
153
  normalizedUrl: validatedUrl,
163
154
  originalUrl: validatedUrl,
164
155
  transformed: false,
165
- });
156
+ };
166
157
  }
167
158
  const { normalizedUrl: transformedUrl } = normalizeUrl(transformedResult.url);
168
- return createUrlResolution({
159
+ return {
169
160
  normalizedUrl: transformedUrl,
170
161
  originalUrl: validatedUrl,
171
162
  transformed: true,
172
- });
163
+ };
173
164
  }
174
165
  function logRawUrlTransformation(resolvedUrl) {
175
166
  if (!resolvedUrl.transformed)
@@ -178,9 +169,6 @@ function logRawUrlTransformation(resolvedUrl) {
178
169
  original: resolvedUrl.originalUrl,
179
170
  });
180
171
  }
181
- /* -------------------------------------------------------------------------------------------------
182
- * Cache helpers
183
- * ------------------------------------------------------------------------------------------------- */
184
172
  function extractTitle(value) {
185
173
  return readString(value, 'title');
186
174
  }
@@ -207,7 +195,7 @@ function attemptCacheRetrieval(params) {
207
195
  const { cacheKey, deserialize, cacheNamespace, normalizedUrl } = params;
208
196
  if (!cacheKey)
209
197
  return null;
210
- const cached = cache.get(cacheKey);
198
+ const cached = get(cacheKey);
211
199
  if (!cached)
212
200
  return null;
213
201
  if (!deserialize) {
@@ -246,7 +234,7 @@ function persistCache(params) {
246
234
  ...(title === undefined ? {} : { title }),
247
235
  };
248
236
  try {
249
- cache.set(cacheKey, serializer(data), metadata, force ? { force: true } : undefined);
237
+ set(cacheKey, serializer(data), metadata, force ? { force: true } : undefined);
250
238
  }
251
239
  catch (error) {
252
240
  logWarn('Failed to persist cache entry', {
@@ -256,13 +244,10 @@ function persistCache(params) {
256
244
  });
257
245
  }
258
246
  }
259
- /* -------------------------------------------------------------------------------------------------
260
- * Pipeline executor
261
- * ------------------------------------------------------------------------------------------------- */
262
247
  export async function executeFetchPipeline(options) {
263
248
  const resolvedUrl = resolveNormalizedUrl(options.url);
264
249
  logRawUrlTransformation(resolvedUrl);
265
- const cacheKey = cache.createCacheKey(options.cacheNamespace, resolvedUrl.normalizedUrl, options.cacheVary);
250
+ const cacheKey = createCacheKey(options.cacheNamespace, resolvedUrl.normalizedUrl, options.cacheVary);
266
251
  if (!options.forceRefresh) {
267
252
  const cachedResult = attemptCacheRetrieval({
268
253
  cacheKey,
@@ -279,7 +264,7 @@ export async function executeFetchPipeline(options) {
279
264
  const resolvedFinalUrl = finalUrl || resolvedUrl.normalizedUrl;
280
265
  const transformUrl = resolvedFinalUrl;
281
266
  const data = await options.transform({ buffer, encoding, ...(truncated ? { truncated: true } : {}) }, transformUrl);
282
- if (cache.isEnabled()) {
267
+ if (isEnabled()) {
283
268
  persistCache({
284
269
  cacheKey,
285
270
  data,
@@ -288,7 +273,7 @@ export async function executeFetchPipeline(options) {
288
273
  cacheNamespace: options.cacheNamespace,
289
274
  });
290
275
  if (finalUrl && finalUrl !== resolvedUrl.normalizedUrl) {
291
- const finalCacheKey = cache.createCacheKey(options.cacheNamespace, finalUrl, options.cacheVary);
276
+ const finalCacheKey = createCacheKey(options.cacheNamespace, finalUrl, options.cacheVary);
292
277
  if (finalCacheKey && finalCacheKey !== cacheKey) {
293
278
  persistCache({
294
279
  cacheKey: finalCacheKey,
@@ -313,18 +298,8 @@ export async function executeFetchPipeline(options) {
313
298
  function normalizeExtractedMetadata(metadata) {
314
299
  if (!metadata)
315
300
  return undefined;
316
- const normalized = {
317
- ...(metadata.title ? { title: metadata.title } : {}),
318
- ...(metadata.description ? { description: metadata.description } : {}),
319
- ...(metadata.author ? { author: metadata.author } : {}),
320
- ...(metadata.image ? { image: metadata.image } : {}),
321
- ...(metadata.favicon ? { favicon: metadata.favicon } : {}),
322
- ...(metadata.publishedAt ? { publishedAt: metadata.publishedAt } : {}),
323
- ...(metadata.modifiedAt ? { modifiedAt: metadata.modifiedAt } : {}),
324
- };
325
- if (Object.keys(normalized).length === 0)
326
- return undefined;
327
- return normalized;
301
+ const normalized = Object.fromEntries(Object.entries(metadata).filter(([, v]) => Boolean(v)));
302
+ return Object.keys(normalized).length > 0 ? normalized : undefined;
328
303
  }
329
304
  const cachedMarkdownSchema = z
330
305
  .object({
@@ -405,4 +380,3 @@ export async function performSharedFetch(options, deps = {}) {
405
380
  const inlineResult = applyInlineContentLimit(pipeline.data.content, options.maxInlineChars);
406
381
  return { pipeline, inlineResult };
407
382
  }
408
- //# sourceMappingURL=tool-pipeline.js.map
@@ -1,11 +1,7 @@
1
- interface FetchOptions {
2
- signal?: AbortSignal;
3
- }
4
- interface TransformResult {
5
- readonly url: string;
6
- readonly transformed: boolean;
7
- readonly platform?: string;
8
- }
1
+ import { type ServerResponse } from 'node:http';
2
+ import { type TransformResult } from './url.js';
3
+ export declare function generateSafeFilename(url: string, title?: string, hashFallback?: string, extension?: string): string;
4
+ export declare function handleDownload(res: ServerResponse, namespace: string, hash: string): void;
9
5
  interface FetchTelemetryContext {
10
6
  requestId: string;
11
7
  startTime: number;
@@ -14,6 +10,9 @@ interface FetchTelemetryContext {
14
10
  contextRequestId?: string;
15
11
  operationId?: string;
16
12
  }
13
+ interface FetchOptions {
14
+ signal?: AbortSignal;
15
+ }
17
16
  export declare function isBlockedIp(ip: string): boolean;
18
17
  export declare function normalizeUrl(urlString: string): {
19
18
  normalizedUrl: string;
@@ -41,4 +40,3 @@ export declare function fetchNormalizedUrlBuffer(normalizedUrl: string, options?
41
40
  finalUrl: string;
42
41
  }>;
43
42
  export {};
44
- //# sourceMappingURL=fetch.d.ts.map