@j0hanz/fetch-url-mcp 1.3.1 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -21
- package/dist/cli.d.ts +3 -3
- package/dist/cli.js +15 -8
- package/dist/http/auth.d.ts +6 -6
- package/dist/http/auth.js +78 -23
- package/dist/http/health.d.ts +1 -2
- package/dist/http/health.js +7 -18
- package/dist/http/helpers.d.ts +3 -11
- package/dist/http/helpers.js +28 -26
- package/dist/http/native.d.ts +0 -1
- package/dist/http/native.js +63 -41
- package/dist/http/rate-limit.d.ts +2 -2
- package/dist/http/rate-limit.js +11 -16
- package/dist/index.d.ts +0 -1
- package/dist/index.js +17 -20
- package/dist/{markdown-cleanup.d.ts → lib/content.d.ts} +4 -2
- package/dist/lib/content.js +1356 -0
- package/dist/lib/core.d.ts +253 -0
- package/dist/lib/core.js +1228 -0
- package/dist/{tool-pipeline.d.ts → lib/fetch-pipeline.d.ts} +1 -3
- package/dist/{tool-pipeline.js → lib/fetch-pipeline.js} +18 -44
- package/dist/{fetch.d.ts → lib/http.d.ts} +7 -9
- package/dist/{fetch.js → lib/http.js} +721 -1004
- package/dist/lib/mcp-tools.d.ts +28 -0
- package/dist/lib/mcp-tools.js +107 -0
- package/dist/{tool-progress.d.ts → lib/progress.d.ts} +0 -2
- package/dist/{tool-progress.js → lib/progress.js} +9 -14
- package/dist/lib/task-handlers.d.ts +5 -0
- package/dist/{mcp.js → lib/task-handlers.js} +95 -31
- package/dist/lib/url.d.ts +70 -0
- package/dist/lib/url.js +686 -0
- package/dist/lib/utils.d.ts +58 -0
- package/dist/lib/utils.js +304 -0
- package/dist/{prompts.d.ts → prompts/index.d.ts} +0 -1
- package/dist/{prompts.js → prompts/index.js} +1 -2
- package/dist/{resources.d.ts → resources/index.d.ts} +0 -1
- package/dist/{resources.js → resources/index.js} +87 -64
- package/dist/{instructions.d.ts → resources/instructions.d.ts} +0 -1
- package/dist/{instructions.js → resources/instructions.js} +5 -3
- package/dist/schemas/inputs.d.ts +7 -0
- package/dist/schemas/inputs.js +24 -0
- package/dist/schemas/outputs.d.ts +23 -0
- package/dist/schemas/outputs.js +77 -0
- package/dist/server.d.ts +0 -1
- package/dist/server.js +26 -25
- package/dist/tasks/execution.d.ts +0 -1
- package/dist/tasks/execution.js +106 -70
- package/dist/tasks/manager.d.ts +11 -3
- package/dist/tasks/manager.js +97 -73
- package/dist/tasks/owner.d.ts +3 -3
- package/dist/tasks/owner.js +2 -2
- package/dist/tasks/tool-registry.d.ts +11 -0
- package/dist/tasks/tool-registry.js +13 -0
- package/dist/tools/fetch-url.d.ts +28 -0
- package/dist/{tools.js → tools/fetch-url.js} +95 -147
- package/dist/tools/index.d.ts +2 -0
- package/dist/tools/index.js +4 -0
- package/dist/transform/html-translators.d.ts +1 -0
- package/dist/transform/html-translators.js +454 -0
- package/dist/transform/metadata.d.ts +4 -0
- package/dist/transform/metadata.js +183 -0
- package/dist/transform/transform.d.ts +0 -1
- package/dist/transform/transform.js +44 -679
- package/dist/transform/types.d.ts +9 -12
- package/dist/transform/types.js +0 -1
- package/dist/transform/worker-pool.d.ts +0 -1
- package/dist/transform/worker-pool.js +7 -16
- package/dist/transform/workers/shared.d.ts +7 -0
- package/dist/transform/workers/shared.js +130 -0
- package/dist/transform/workers/transform-child.d.ts +0 -1
- package/dist/transform/workers/transform-child.js +5 -135
- package/dist/transform/workers/transform-worker.d.ts +0 -1
- package/dist/transform/workers/transform-worker.js +7 -128
- package/package.json +11 -7
- package/dist/cache.d.ts +0 -54
- package/dist/cache.d.ts.map +0 -1
- package/dist/cache.js +0 -261
- package/dist/cache.js.map +0 -1
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/config.d.ts +0 -141
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js +0 -473
- package/dist/config.js.map +0 -1
- package/dist/crypto.d.ts +0 -4
- package/dist/crypto.d.ts.map +0 -1
- package/dist/crypto.js +0 -56
- package/dist/crypto.js.map +0 -1
- package/dist/dom-noise-removal.d.ts +0 -2
- package/dist/dom-noise-removal.d.ts.map +0 -1
- package/dist/dom-noise-removal.js +0 -494
- package/dist/dom-noise-removal.js.map +0 -1
- package/dist/download.d.ts +0 -4
- package/dist/download.d.ts.map +0 -1
- package/dist/download.js +0 -106
- package/dist/download.js.map +0 -1
- package/dist/errors.d.ts +0 -11
- package/dist/errors.d.ts.map +0 -1
- package/dist/errors.js +0 -65
- package/dist/errors.js.map +0 -1
- package/dist/examples/mcp-fetch-url-client.js +0 -329
- package/dist/examples/mcp-fetch-url-client.js.map +0 -1
- package/dist/fetch-content.d.ts +0 -5
- package/dist/fetch-content.d.ts.map +0 -1
- package/dist/fetch-content.js +0 -164
- package/dist/fetch-content.js.map +0 -1
- package/dist/fetch-stream.d.ts +0 -5
- package/dist/fetch-stream.d.ts.map +0 -1
- package/dist/fetch-stream.js +0 -29
- package/dist/fetch-stream.js.map +0 -1
- package/dist/fetch.d.ts.map +0 -1
- package/dist/fetch.js.map +0 -1
- package/dist/host-normalization.d.ts +0 -2
- package/dist/host-normalization.d.ts.map +0 -1
- package/dist/host-normalization.js +0 -91
- package/dist/host-normalization.js.map +0 -1
- package/dist/http/auth.d.ts.map +0 -1
- package/dist/http/auth.js.map +0 -1
- package/dist/http/health.d.ts.map +0 -1
- package/dist/http/health.js.map +0 -1
- package/dist/http/helpers.d.ts.map +0 -1
- package/dist/http/helpers.js.map +0 -1
- package/dist/http/native.d.ts.map +0 -1
- package/dist/http/native.js.map +0 -1
- package/dist/http/rate-limit.d.ts.map +0 -1
- package/dist/http/rate-limit.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/instructions.d.ts.map +0 -1
- package/dist/instructions.js.map +0 -1
- package/dist/ip-blocklist.d.ts +0 -9
- package/dist/ip-blocklist.d.ts.map +0 -1
- package/dist/ip-blocklist.js +0 -79
- package/dist/ip-blocklist.js.map +0 -1
- package/dist/json.d.ts +0 -2
- package/dist/json.d.ts.map +0 -1
- package/dist/json.js +0 -45
- package/dist/json.js.map +0 -1
- package/dist/language-detection.d.ts +0 -3
- package/dist/language-detection.d.ts.map +0 -1
- package/dist/language-detection.js +0 -355
- package/dist/language-detection.js.map +0 -1
- package/dist/markdown-cleanup.d.ts.map +0 -1
- package/dist/markdown-cleanup.js +0 -534
- package/dist/markdown-cleanup.js.map +0 -1
- package/dist/mcp-validator.d.ts +0 -17
- package/dist/mcp-validator.d.ts.map +0 -1
- package/dist/mcp-validator.js +0 -45
- package/dist/mcp-validator.js.map +0 -1
- package/dist/mcp.d.ts +0 -4
- package/dist/mcp.d.ts.map +0 -1
- package/dist/mcp.js.map +0 -1
- package/dist/observability.d.ts +0 -23
- package/dist/observability.d.ts.map +0 -1
- package/dist/observability.js +0 -238
- package/dist/observability.js.map +0 -1
- package/dist/prompts.d.ts.map +0 -1
- package/dist/prompts.js.map +0 -1
- package/dist/resources.d.ts.map +0 -1
- package/dist/resources.js.map +0 -1
- package/dist/server-tuning.d.ts +0 -15
- package/dist/server-tuning.d.ts.map +0 -1
- package/dist/server-tuning.js +0 -49
- package/dist/server-tuning.js.map +0 -1
- package/dist/server.d.ts.map +0 -1
- package/dist/server.js.map +0 -1
- package/dist/session.d.ts +0 -42
- package/dist/session.d.ts.map +0 -1
- package/dist/session.js +0 -255
- package/dist/session.js.map +0 -1
- package/dist/tasks/execution.d.ts.map +0 -1
- package/dist/tasks/execution.js.map +0 -1
- package/dist/tasks/manager.d.ts.map +0 -1
- package/dist/tasks/manager.js.map +0 -1
- package/dist/tasks/owner.d.ts.map +0 -1
- package/dist/tasks/owner.js.map +0 -1
- package/dist/timer-utils.d.ts +0 -6
- package/dist/timer-utils.d.ts.map +0 -1
- package/dist/timer-utils.js +0 -27
- package/dist/timer-utils.js.map +0 -1
- package/dist/tool-errors.d.ts +0 -12
- package/dist/tool-errors.d.ts.map +0 -1
- package/dist/tool-errors.js +0 -55
- package/dist/tool-errors.js.map +0 -1
- package/dist/tool-pipeline.d.ts.map +0 -1
- package/dist/tool-pipeline.js.map +0 -1
- package/dist/tool-progress.d.ts.map +0 -1
- package/dist/tool-progress.js.map +0 -1
- package/dist/tools.d.ts +0 -54
- package/dist/tools.d.ts.map +0 -1
- package/dist/tools.js.map +0 -1
- package/dist/transform/transform.d.ts.map +0 -1
- package/dist/transform/transform.js.map +0 -1
- package/dist/transform/types.d.ts.map +0 -1
- package/dist/transform/types.js.map +0 -1
- package/dist/transform/worker-pool.d.ts.map +0 -1
- package/dist/transform/worker-pool.js.map +0 -1
- package/dist/transform/workers/transform-child.d.ts.map +0 -1
- package/dist/transform/workers/transform-child.js.map +0 -1
- package/dist/transform/workers/transform-worker.d.ts.map +0 -1
- package/dist/transform/workers/transform-worker.js.map +0 -1
- package/dist/type-guards.d.ts +0 -16
- package/dist/type-guards.d.ts.map +0 -1
- package/dist/type-guards.js +0 -13
- package/dist/type-guards.js.map +0 -1
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
import type
|
|
1
|
+
import { type MarkdownTransformResult } from '../transform/types.js';
|
|
2
2
|
type JsonRecord = Record<string, unknown>;
|
|
3
|
-
export declare function readUnknown(obj: unknown, key: string): unknown;
|
|
4
3
|
export declare function readString(obj: unknown, key: string): string | undefined;
|
|
5
4
|
export declare function readNestedRecord(obj: unknown, keys: readonly string[]): JsonRecord | undefined;
|
|
6
5
|
export declare function withSignal(signal?: AbortSignal): {
|
|
@@ -69,4 +68,3 @@ export declare function performSharedFetch(options: SharedFetchOptions, deps?: S
|
|
|
69
68
|
inlineResult: InlineContentResult;
|
|
70
69
|
}>;
|
|
71
70
|
export {};
|
|
72
|
-
//# sourceMappingURL=tool-pipeline.d.ts.map
|
|
@@ -1,15 +1,13 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
|
-
import
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
5
|
-
import { fetchNormalizedUrlBuffer, normalizeUrl, transformToRawUrl, } from './
|
|
6
|
-
import {
|
|
7
|
-
import { transformBufferToMarkdown } from './transform/transform.js';
|
|
8
|
-
import { isObject } from './type-guards.js';
|
|
2
|
+
import { transformBufferToMarkdown } from '../transform/transform.js';
|
|
3
|
+
import {} from '../transform/types.js';
|
|
4
|
+
import { config, createCacheKey, get, isEnabled, logDebug, logWarn, set, } from './core.js';
|
|
5
|
+
import { fetchNormalizedUrlBuffer, normalizeUrl, transformToRawUrl, } from './http.js';
|
|
6
|
+
import { getErrorMessage, isObject } from './utils.js';
|
|
9
7
|
function asRecord(value) {
|
|
10
8
|
return isObject(value) ? value : undefined;
|
|
11
9
|
}
|
|
12
|
-
|
|
10
|
+
function readUnknown(obj, key) {
|
|
13
11
|
const record = asRecord(obj);
|
|
14
12
|
return record ? record[key] : undefined;
|
|
15
13
|
}
|
|
@@ -38,7 +36,7 @@ export function withSignal(signal) {
|
|
|
38
36
|
return signal === undefined ? {} : { signal };
|
|
39
37
|
}
|
|
40
38
|
/* -------------------------------------------------------------------------------------------------
|
|
41
|
-
* Inline content
|
|
39
|
+
* Inline content truncation
|
|
42
40
|
* ------------------------------------------------------------------------------------------------- */
|
|
43
41
|
export const TRUNCATION_MARKER = '...[truncated]';
|
|
44
42
|
function getOpenCodeFence(content) {
|
|
@@ -147,29 +145,22 @@ const inlineLimiter = new InlineContentLimiter();
|
|
|
147
145
|
function applyInlineContentLimit(content, inlineLimitOverride) {
|
|
148
146
|
return inlineLimiter.apply(content, inlineLimitOverride);
|
|
149
147
|
}
|
|
150
|
-
function createUrlResolution(params) {
|
|
151
|
-
return {
|
|
152
|
-
normalizedUrl: params.normalizedUrl,
|
|
153
|
-
originalUrl: params.originalUrl,
|
|
154
|
-
transformed: params.transformed,
|
|
155
|
-
};
|
|
156
|
-
}
|
|
157
148
|
function resolveNormalizedUrl(url) {
|
|
158
149
|
const { normalizedUrl: validatedUrl } = normalizeUrl(url);
|
|
159
150
|
const transformedResult = transformToRawUrl(validatedUrl);
|
|
160
151
|
if (!transformedResult.transformed) {
|
|
161
|
-
return
|
|
152
|
+
return {
|
|
162
153
|
normalizedUrl: validatedUrl,
|
|
163
154
|
originalUrl: validatedUrl,
|
|
164
155
|
transformed: false,
|
|
165
|
-
}
|
|
156
|
+
};
|
|
166
157
|
}
|
|
167
158
|
const { normalizedUrl: transformedUrl } = normalizeUrl(transformedResult.url);
|
|
168
|
-
return
|
|
159
|
+
return {
|
|
169
160
|
normalizedUrl: transformedUrl,
|
|
170
161
|
originalUrl: validatedUrl,
|
|
171
162
|
transformed: true,
|
|
172
|
-
}
|
|
163
|
+
};
|
|
173
164
|
}
|
|
174
165
|
function logRawUrlTransformation(resolvedUrl) {
|
|
175
166
|
if (!resolvedUrl.transformed)
|
|
@@ -178,9 +169,6 @@ function logRawUrlTransformation(resolvedUrl) {
|
|
|
178
169
|
original: resolvedUrl.originalUrl,
|
|
179
170
|
});
|
|
180
171
|
}
|
|
181
|
-
/* -------------------------------------------------------------------------------------------------
|
|
182
|
-
* Cache helpers
|
|
183
|
-
* ------------------------------------------------------------------------------------------------- */
|
|
184
172
|
function extractTitle(value) {
|
|
185
173
|
return readString(value, 'title');
|
|
186
174
|
}
|
|
@@ -207,7 +195,7 @@ function attemptCacheRetrieval(params) {
|
|
|
207
195
|
const { cacheKey, deserialize, cacheNamespace, normalizedUrl } = params;
|
|
208
196
|
if (!cacheKey)
|
|
209
197
|
return null;
|
|
210
|
-
const cached =
|
|
198
|
+
const cached = get(cacheKey);
|
|
211
199
|
if (!cached)
|
|
212
200
|
return null;
|
|
213
201
|
if (!deserialize) {
|
|
@@ -246,7 +234,7 @@ function persistCache(params) {
|
|
|
246
234
|
...(title === undefined ? {} : { title }),
|
|
247
235
|
};
|
|
248
236
|
try {
|
|
249
|
-
|
|
237
|
+
set(cacheKey, serializer(data), metadata, force ? { force: true } : undefined);
|
|
250
238
|
}
|
|
251
239
|
catch (error) {
|
|
252
240
|
logWarn('Failed to persist cache entry', {
|
|
@@ -256,13 +244,10 @@ function persistCache(params) {
|
|
|
256
244
|
});
|
|
257
245
|
}
|
|
258
246
|
}
|
|
259
|
-
/* -------------------------------------------------------------------------------------------------
|
|
260
|
-
* Pipeline executor
|
|
261
|
-
* ------------------------------------------------------------------------------------------------- */
|
|
262
247
|
export async function executeFetchPipeline(options) {
|
|
263
248
|
const resolvedUrl = resolveNormalizedUrl(options.url);
|
|
264
249
|
logRawUrlTransformation(resolvedUrl);
|
|
265
|
-
const cacheKey =
|
|
250
|
+
const cacheKey = createCacheKey(options.cacheNamespace, resolvedUrl.normalizedUrl, options.cacheVary);
|
|
266
251
|
if (!options.forceRefresh) {
|
|
267
252
|
const cachedResult = attemptCacheRetrieval({
|
|
268
253
|
cacheKey,
|
|
@@ -279,7 +264,7 @@ export async function executeFetchPipeline(options) {
|
|
|
279
264
|
const resolvedFinalUrl = finalUrl || resolvedUrl.normalizedUrl;
|
|
280
265
|
const transformUrl = resolvedFinalUrl;
|
|
281
266
|
const data = await options.transform({ buffer, encoding, ...(truncated ? { truncated: true } : {}) }, transformUrl);
|
|
282
|
-
if (
|
|
267
|
+
if (isEnabled()) {
|
|
283
268
|
persistCache({
|
|
284
269
|
cacheKey,
|
|
285
270
|
data,
|
|
@@ -288,7 +273,7 @@ export async function executeFetchPipeline(options) {
|
|
|
288
273
|
cacheNamespace: options.cacheNamespace,
|
|
289
274
|
});
|
|
290
275
|
if (finalUrl && finalUrl !== resolvedUrl.normalizedUrl) {
|
|
291
|
-
const finalCacheKey =
|
|
276
|
+
const finalCacheKey = createCacheKey(options.cacheNamespace, finalUrl, options.cacheVary);
|
|
292
277
|
if (finalCacheKey && finalCacheKey !== cacheKey) {
|
|
293
278
|
persistCache({
|
|
294
279
|
cacheKey: finalCacheKey,
|
|
@@ -313,18 +298,8 @@ export async function executeFetchPipeline(options) {
|
|
|
313
298
|
function normalizeExtractedMetadata(metadata) {
|
|
314
299
|
if (!metadata)
|
|
315
300
|
return undefined;
|
|
316
|
-
const normalized =
|
|
317
|
-
|
|
318
|
-
...(metadata.description ? { description: metadata.description } : {}),
|
|
319
|
-
...(metadata.author ? { author: metadata.author } : {}),
|
|
320
|
-
...(metadata.image ? { image: metadata.image } : {}),
|
|
321
|
-
...(metadata.favicon ? { favicon: metadata.favicon } : {}),
|
|
322
|
-
...(metadata.publishedAt ? { publishedAt: metadata.publishedAt } : {}),
|
|
323
|
-
...(metadata.modifiedAt ? { modifiedAt: metadata.modifiedAt } : {}),
|
|
324
|
-
};
|
|
325
|
-
if (Object.keys(normalized).length === 0)
|
|
326
|
-
return undefined;
|
|
327
|
-
return normalized;
|
|
301
|
+
const normalized = Object.fromEntries(Object.entries(metadata).filter(([, v]) => Boolean(v)));
|
|
302
|
+
return Object.keys(normalized).length > 0 ? normalized : undefined;
|
|
328
303
|
}
|
|
329
304
|
const cachedMarkdownSchema = z
|
|
330
305
|
.object({
|
|
@@ -405,4 +380,3 @@ export async function performSharedFetch(options, deps = {}) {
|
|
|
405
380
|
const inlineResult = applyInlineContentLimit(pipeline.data.content, options.maxInlineChars);
|
|
406
381
|
return { pipeline, inlineResult };
|
|
407
382
|
}
|
|
408
|
-
//# sourceMappingURL=tool-pipeline.js.map
|
|
@@ -1,11 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
readonly url: string;
|
|
6
|
-
readonly transformed: boolean;
|
|
7
|
-
readonly platform?: string;
|
|
8
|
-
}
|
|
1
|
+
import { type ServerResponse } from 'node:http';
|
|
2
|
+
import { type TransformResult } from './url.js';
|
|
3
|
+
export declare function generateSafeFilename(url: string, title?: string, hashFallback?: string, extension?: string): string;
|
|
4
|
+
export declare function handleDownload(res: ServerResponse, namespace: string, hash: string): void;
|
|
9
5
|
interface FetchTelemetryContext {
|
|
10
6
|
requestId: string;
|
|
11
7
|
startTime: number;
|
|
@@ -14,6 +10,9 @@ interface FetchTelemetryContext {
|
|
|
14
10
|
contextRequestId?: string;
|
|
15
11
|
operationId?: string;
|
|
16
12
|
}
|
|
13
|
+
interface FetchOptions {
|
|
14
|
+
signal?: AbortSignal;
|
|
15
|
+
}
|
|
17
16
|
export declare function isBlockedIp(ip: string): boolean;
|
|
18
17
|
export declare function normalizeUrl(urlString: string): {
|
|
19
18
|
normalizedUrl: string;
|
|
@@ -41,4 +40,3 @@ export declare function fetchNormalizedUrlBuffer(normalizedUrl: string, options?
|
|
|
41
40
|
finalUrl: string;
|
|
42
41
|
}>;
|
|
43
42
|
export {};
|
|
44
|
-
//# sourceMappingURL=fetch.d.ts.map
|