@j0hanz/superfetch 1.0.2 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +345 -57
- package/dist/config/index.d.ts.map +1 -1
- package/dist/config/index.js +6 -10
- package/dist/config/index.js.map +1 -1
- package/dist/config/types.d.ts +256 -0
- package/dist/config/types.d.ts.map +1 -0
- package/dist/config/types.js +2 -0
- package/dist/config/types.js.map +1 -0
- package/dist/errors/app-error.d.ts +6 -20
- package/dist/errors/app-error.d.ts.map +1 -1
- package/dist/errors/app-error.js +7 -18
- package/dist/errors/app-error.js.map +1 -1
- package/dist/index.js +75 -62
- package/dist/index.js.map +1 -1
- package/dist/middleware/error-handler.d.ts +1 -5
- package/dist/middleware/error-handler.d.ts.map +1 -1
- package/dist/middleware/error-handler.js +4 -12
- package/dist/middleware/error-handler.js.map +1 -1
- package/dist/middleware/rate-limiter.d.ts +2 -20
- package/dist/middleware/rate-limiter.d.ts.map +1 -1
- package/dist/middleware/rate-limiter.js +22 -47
- package/dist/middleware/rate-limiter.js.map +1 -1
- package/dist/prompts/index.d.ts +0 -3
- package/dist/prompts/index.d.ts.map +1 -1
- package/dist/prompts/index.js +2 -10
- package/dist/prompts/index.js.map +1 -1
- package/dist/resources/cached-content.d.ts +5 -0
- package/dist/resources/cached-content.d.ts.map +1 -0
- package/dist/resources/cached-content.js +93 -0
- package/dist/resources/cached-content.js.map +1 -0
- package/dist/resources/index.d.ts +0 -3
- package/dist/resources/index.d.ts.map +1 -1
- package/dist/resources/index.js +40 -5
- package/dist/resources/index.js.map +1 -1
- package/dist/server.d.ts +0 -4
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +11 -6
- package/dist/server.js.map +1 -1
- package/dist/services/cache.d.ts +20 -6
- package/dist/services/cache.d.ts.map +1 -1
- package/dist/services/cache.js +128 -20
- package/dist/services/cache.js.map +1 -1
- package/dist/services/card-extractor.d.ts +10 -0
- package/dist/services/card-extractor.d.ts.map +1 -0
- package/dist/services/card-extractor.js +194 -0
- package/dist/services/card-extractor.js.map +1 -0
- package/dist/services/extractor.d.ts +12 -19
- package/dist/services/extractor.d.ts.map +1 -1
- package/dist/services/extractor.js +60 -46
- package/dist/services/extractor.js.map +1 -1
- package/dist/services/fetcher.d.ts +13 -11
- package/dist/services/fetcher.d.ts.map +1 -1
- package/dist/services/fetcher.js +143 -54
- package/dist/services/fetcher.js.map +1 -1
- package/dist/services/logger.d.ts.map +1 -1
- package/dist/services/logger.js +4 -6
- package/dist/services/logger.js.map +1 -1
- package/dist/services/parser.d.ts +1 -6
- package/dist/services/parser.d.ts.map +1 -1
- package/dist/services/parser.js +57 -27
- package/dist/services/parser.js.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.d.ts +6 -18
- package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.js +104 -79
- package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +6 -10
- package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.js +83 -84
- package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.d.ts +6 -12
- package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.js +51 -93
- package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-urls.tool.d.ts +12 -0
- package/dist/tools/handlers/fetch-urls.tool.d.ts.map +1 -0
- package/dist/tools/handlers/fetch-urls.tool.js +184 -0
- package/dist/tools/handlers/fetch-urls.tool.js.map +1 -0
- package/dist/tools/index.d.ts +0 -4
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +145 -15
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/utils/common.d.ts +8 -0
- package/dist/tools/utils/common.d.ts.map +1 -0
- package/dist/tools/utils/common.js +35 -0
- package/dist/tools/utils/common.js.map +1 -0
- package/dist/tools/utils/fetch-pipeline.d.ts +3 -0
- package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -0
- package/dist/tools/utils/fetch-pipeline.js +78 -0
- package/dist/tools/utils/fetch-pipeline.js.map +1 -0
- package/dist/tools/utils/index.d.ts +4 -0
- package/dist/tools/utils/index.d.ts.map +1 -0
- package/dist/tools/utils/index.js +3 -0
- package/dist/tools/utils/index.js.map +1 -0
- package/dist/tools/utils/response-builder.d.ts +3 -0
- package/dist/tools/utils/response-builder.d.ts.map +1 -0
- package/dist/tools/utils/response-builder.js +24 -0
- package/dist/tools/utils/response-builder.js.map +1 -0
- package/dist/transformers/jsonl.transformer.d.ts +1 -1
- package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
- package/dist/transformers/jsonl.transformer.js +2 -1
- package/dist/transformers/jsonl.transformer.js.map +1 -1
- package/dist/transformers/markdown.transformer.d.ts +1 -1
- package/dist/transformers/markdown.transformer.d.ts.map +1 -1
- package/dist/transformers/markdown.transformer.js +99 -5
- package/dist/transformers/markdown.transformer.js.map +1 -1
- package/dist/types/content.types.d.ts +11 -11
- package/dist/types/content.types.d.ts.map +1 -1
- package/dist/types/index.d.ts +1 -2
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js +1 -2
- package/dist/types/index.js.map +1 -1
- package/dist/types/schemas.d.ts +39 -12
- package/dist/types/schemas.d.ts.map +1 -1
- package/dist/utils/concurrency.d.ts +6 -0
- package/dist/utils/concurrency.d.ts.map +1 -0
- package/dist/utils/concurrency.js +38 -0
- package/dist/utils/concurrency.js.map +1 -0
- package/dist/utils/content-cleaner.d.ts +32 -0
- package/dist/utils/content-cleaner.d.ts.map +1 -0
- package/dist/utils/content-cleaner.js +238 -0
- package/dist/utils/content-cleaner.js.map +1 -0
- package/dist/utils/language-detector.d.ts +5 -0
- package/dist/utils/language-detector.d.ts.map +1 -0
- package/dist/utils/language-detector.js +50 -0
- package/dist/utils/language-detector.js.map +1 -0
- package/dist/utils/sanitizer.d.ts +0 -10
- package/dist/utils/sanitizer.d.ts.map +1 -1
- package/dist/utils/sanitizer.js +4 -12
- package/dist/utils/sanitizer.js.map +1 -1
- package/dist/utils/tool-error-handler.d.ts +1 -15
- package/dist/utils/tool-error-handler.d.ts.map +1 -1
- package/dist/utils/tool-error-handler.js +34 -6
- package/dist/utils/tool-error-handler.js.map +1 -1
- package/dist/utils/url-validator.d.ts +0 -8
- package/dist/utils/url-validator.d.ts.map +1 -1
- package/dist/utils/url-validator.js +17 -31
- package/dist/utils/url-validator.js.map +1 -1
- package/package.json +81 -79
|
@@ -1,111 +1,69 @@
|
|
|
1
|
-
import { validateAndNormalizeUrl } from '../../utils/url-validator.js';
|
|
2
|
-
import { fetchUrlWithRetry } from '../../services/fetcher.js';
|
|
3
1
|
import { extractContent } from '../../services/extractor.js';
|
|
2
|
+
import { logDebug, logError } from '../../services/logger.js';
|
|
4
3
|
import { parseHtml } from '../../services/parser.js';
|
|
5
|
-
import { toJsonl } from '../../transformers/jsonl.transformer.js';
|
|
6
|
-
import * as cache from '../../services/cache.js';
|
|
7
|
-
import { config } from '../../config/index.js';
|
|
8
|
-
import { logError } from '../../services/logger.js';
|
|
9
4
|
import { createToolErrorResponse, handleToolError, } from '../../utils/tool-error-handler.js';
|
|
5
|
+
import { buildMetadata, shouldUseArticle, truncateContent, } from '../utils/common.js';
|
|
6
|
+
import { executeFetchPipeline } from '../utils/fetch-pipeline.js';
|
|
7
|
+
import { toJsonl } from '../../transformers/jsonl.transformer.js';
|
|
10
8
|
export const FETCH_URL_TOOL_NAME = 'fetch-url';
|
|
11
|
-
export const FETCH_URL_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks';
|
|
12
|
-
function
|
|
13
|
-
//
|
|
14
|
-
const { article, metadata: extractedMeta } = extractContent(html, url
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
: undefined;
|
|
28
|
-
return { contentBlocks, metadata, title: article.title };
|
|
29
|
-
}
|
|
30
|
-
// Fallback: use parsed HTML directly
|
|
31
|
-
const contentBlocks = parseHtml(html);
|
|
32
|
-
const metadata = options.includeMetadata && config.extraction.includeMetadata
|
|
33
|
-
? {
|
|
34
|
-
type: 'metadata',
|
|
35
|
-
title: extractedMeta.title,
|
|
36
|
-
description: extractedMeta.description,
|
|
37
|
-
author: extractedMeta.author,
|
|
38
|
-
url,
|
|
39
|
-
fetchedAt: new Date().toISOString(),
|
|
40
|
-
}
|
|
41
|
-
: undefined;
|
|
42
|
-
return { contentBlocks, metadata, title: extractedMeta.title };
|
|
9
|
+
export const FETCH_URL_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks. Supports custom headers, retries, and content length limits.';
|
|
10
|
+
function transformToJsonl(html, url, options) {
|
|
11
|
+
// Only invoke JSDOM when extractMainContent is true (lazy loading optimization)
|
|
12
|
+
const { article, metadata: extractedMeta } = extractContent(html, url, {
|
|
13
|
+
extractArticle: options.extractMainContent,
|
|
14
|
+
});
|
|
15
|
+
const useArticle = shouldUseArticle(options.extractMainContent, article);
|
|
16
|
+
const sourceHtml = useArticle ? article.content : html;
|
|
17
|
+
const contentBlocks = parseHtml(sourceHtml);
|
|
18
|
+
const metadata = buildMetadata(url, article, extractedMeta, useArticle, options.includeMetadata);
|
|
19
|
+
const title = useArticle ? article.title : extractedMeta.title;
|
|
20
|
+
return {
|
|
21
|
+
content: toJsonl(contentBlocks, metadata),
|
|
22
|
+
contentBlocks: contentBlocks.length,
|
|
23
|
+
title,
|
|
24
|
+
};
|
|
43
25
|
}
|
|
44
26
|
export async function fetchUrlToolHandler(input) {
|
|
27
|
+
if (!input.url) {
|
|
28
|
+
return createToolErrorResponse('URL is required', '', 'VALIDATION_ERROR');
|
|
29
|
+
}
|
|
45
30
|
try {
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
content: [
|
|
66
|
-
{
|
|
67
|
-
type: 'text',
|
|
68
|
-
text: JSON.stringify(structuredContent),
|
|
69
|
-
},
|
|
70
|
-
],
|
|
71
|
-
structuredContent,
|
|
72
|
-
};
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
const html = await fetchUrlWithRetry(url, input.customHeaders);
|
|
76
|
-
// Validate HTML content was received
|
|
77
|
-
if (!html) {
|
|
78
|
-
return createToolErrorResponse('No content received from URL', url, 'EMPTY_CONTENT');
|
|
79
|
-
}
|
|
80
|
-
const { contentBlocks, metadata, title } = extractContentFromHtml(html, url, {
|
|
81
|
-
extractMainContent: input.extractMainContent ?? true,
|
|
82
|
-
includeMetadata: input.includeMetadata ?? true,
|
|
31
|
+
const extractMainContent = input.extractMainContent ?? true;
|
|
32
|
+
const includeMetadata = input.includeMetadata ?? true;
|
|
33
|
+
logDebug('Fetching URL', {
|
|
34
|
+
url: input.url,
|
|
35
|
+
extractMainContent,
|
|
36
|
+
includeMetadata,
|
|
37
|
+
});
|
|
38
|
+
const result = await executeFetchPipeline({
|
|
39
|
+
url: input.url,
|
|
40
|
+
cacheNamespace: 'url',
|
|
41
|
+
customHeaders: input.customHeaders,
|
|
42
|
+
retries: input.retries,
|
|
43
|
+
transform: (html, url) => transformToJsonl(html, url, { extractMainContent, includeMetadata }),
|
|
44
|
+
serialize: (data) => data.content,
|
|
45
|
+
deserialize: (cached) => ({
|
|
46
|
+
content: cached,
|
|
47
|
+
contentBlocks: 0,
|
|
48
|
+
title: undefined,
|
|
49
|
+
}),
|
|
83
50
|
});
|
|
84
|
-
|
|
85
|
-
if (input.maxContentLength &&
|
|
86
|
-
input.maxContentLength > 0 &&
|
|
87
|
-
jsonlContent.length > input.maxContentLength) {
|
|
88
|
-
jsonlContent =
|
|
89
|
-
jsonlContent.substring(0, input.maxContentLength) + '\n...[truncated]';
|
|
90
|
-
}
|
|
91
|
-
// Cache the result
|
|
92
|
-
if (cacheKey) {
|
|
93
|
-
cache.set(cacheKey, jsonlContent);
|
|
94
|
-
}
|
|
51
|
+
const { content, truncated } = truncateContent(result.data.content, input.maxContentLength);
|
|
95
52
|
const structuredContent = {
|
|
96
|
-
url,
|
|
97
|
-
title,
|
|
98
|
-
contentBlocks: contentBlocks
|
|
99
|
-
fetchedAt:
|
|
53
|
+
url: result.url,
|
|
54
|
+
title: result.data.title,
|
|
55
|
+
contentBlocks: result.data.contentBlocks,
|
|
56
|
+
fetchedAt: result.fetchedAt,
|
|
100
57
|
format: 'jsonl',
|
|
101
|
-
content
|
|
102
|
-
cached:
|
|
58
|
+
content,
|
|
59
|
+
cached: result.fromCache,
|
|
60
|
+
...(truncated && { truncated }),
|
|
103
61
|
};
|
|
104
62
|
return {
|
|
105
63
|
content: [
|
|
106
64
|
{
|
|
107
65
|
type: 'text',
|
|
108
|
-
text: JSON.stringify(structuredContent, null, 2),
|
|
66
|
+
text: JSON.stringify(structuredContent, result.fromCache ? undefined : null, result.fromCache ? undefined : 2),
|
|
109
67
|
},
|
|
110
68
|
],
|
|
111
69
|
structuredContent,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-url.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-url.tool.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"fetch-url.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-url.tool.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAC7D,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,0BAA0B,CAAC;AAC9D,OAAO,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AAErD,OAAO,EACL,uBAAuB,EACvB,eAAe,GAChB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EACL,aAAa,EACb,gBAAgB,EAChB,eAAe,GAChB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAC;AAElE,OAAO,EAAE,OAAO,EAAE,MAAM,yCAAyC,CAAC;AAElE,MAAM,CAAC,MAAM,mBAAmB,GAAG,WAAW,CAAC;AAC/C,MAAM,CAAC,MAAM,0BAA0B,GACrC,0JAA0J,CAAC;AAE7J,SAAS,gBAAgB,CACvB,IAAY,EACZ,GAAW,EACX,OAAkE;IAElE,gFAAgF;IAChF,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,GAAG,cAAc,CAAC,IAAI,EAAE,GAAG,EAAE;QACrE,cAAc,EAAE,OAAO,CAAC,kBAAkB;KAC3C,CAAC,CAAC;IACH,MAAM,UAAU,GAAG,gBAAgB,CAAC,OAAO,CAAC,kBAAkB,EAAE,OAAO,CAAC,CAAC;IACzE,MAAM,UAAU,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;IACvD,MAAM,aAAa,GAAG,SAAS,CAAC,UAAU,CAAC,CAAC;IAC5C,MAAM,QAAQ,GAAG,aAAa,CAC5B,GAAG,EACH,OAAO,EACP,aAAa,EACb,UAAU,EACV,OAAO,CAAC,eAAe,CACxB,CAAC;IACF,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC;IAE/D,OAAO;QACL,OAAO,EAAE,OAAO,CAAC,aAAa,EAAE,QAAQ,CAAC;QACzC,aAAa,EAAE,aAAa,CAAC,MAAM;QACnC,KAAK;KACN,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,KAAoB;IAK5D,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC;QACf,OAAO,uBAAuB,CAAC,iBAAiB,EAAE,EAAE,EAAE,kBAAkB,CAAC,CAAC;IAC5E,CAAC;IAED,IAAI,CAAC;QACH,MAAM,kBAAkB,GAAG,KAAK,CAAC,kBAAkB,IAAI,IAAI,CAAC;QAC5D,MAAM,eAAe,GAAG,KAAK,CAAC,eAAe,IAAI,IAAI,CAAC;QAEtD,QAAQ,CAAC,cAAc,EAAE;YACvB,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,kBAAkB;YAClB,eAAe;SAChB,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAuB;YAC9D,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,cAAc,EAAE,KAAK;YACrB,aAAa,EAAE,KAAK,CAAC,aAAa;YAClC,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,SAAS,EAAE,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE,CACvB,gBAAgB,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE,kBAAkB,EAAE,eAAe,EAAE,CAAC;YACtE,SAAS,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO;YACjC,WAAW,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;gBACxB,OAAO,EAAE,MAAM;gBACf,aAAa,EAAE,CAAC;gBAChB,KAAK,EAAE,SAAS;aACjB,CAAC;SACH,CAAC,CAAC;QAEH,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,eAAe,CAC5C,MAAM,CAAC,IAAI,CAAC,OAAO,EACnB,KAAK,CAAC,gBAAgB,CACvB,CAAC;QAEF,MAAM,iBAAiB,GAAG;YACxB,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK;YACxB,aAAa,EAAE,MAAM,CAAC,IAAI,CAAC,aAAa;YACxC,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,MAAM,EAAE,OAAgB;YACxB,OAAO;YACP,MAAM,EAAE,MAAM,CAAC,SAAS;YACxB,GAAG,CAAC,SAAS,IAAI,EAAE,SAAS,EAAE,CAAC;SAChC,CAAC;QAEF,OAAO;YACL,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAClB,iBAAiB,EACjB,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,EACnC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CACjC;iBACF;aACF;YACD,iBAAiB;SAClB,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,sBAAsB,EACtB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACF,OAAO,eAAe,CAAC,KAAK,EAAE,KAAK,CAAC,GAAG,EAAE,qBAAqB,CAAC,CAAC;IAClE,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { FetchUrlsInput } from '../../config/types.js';
|
|
2
|
+
export declare const FETCH_URLS_TOOL_NAME = "fetch-urls";
|
|
3
|
+
export declare const FETCH_URLS_TOOL_DESCRIPTION = "Fetches multiple URLs in parallel and converts them to AI-readable format (JSONL or Markdown). Supports concurrency control and continues on individual failures.";
|
|
4
|
+
export declare function fetchUrlsToolHandler(input: FetchUrlsInput): Promise<{
|
|
5
|
+
content: {
|
|
6
|
+
type: 'text';
|
|
7
|
+
text: string;
|
|
8
|
+
}[];
|
|
9
|
+
structuredContent?: Record<string, unknown>;
|
|
10
|
+
isError?: boolean;
|
|
11
|
+
}>;
|
|
12
|
+
//# sourceMappingURL=fetch-urls.tool.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch-urls.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-urls.tool.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAEV,cAAc,EAGf,MAAM,uBAAuB,CAAC;AAwB/B,eAAO,MAAM,oBAAoB,eAAe,CAAC;AACjD,eAAO,MAAM,2BAA2B,sKAC6H,CAAC;AA6HtK,wBAAsB,oBAAoB,CAAC,KAAK,EAAE,cAAc,GAAG,OAAO,CAAC;IACzE,OAAO,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IAC1C,iBAAiB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC5C,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB,CAAC,CAsHD"}
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
import * as cheerio from 'cheerio';
|
|
2
|
+
import * as cache from '../../services/cache.js';
|
|
3
|
+
import { extractContent, extractMetadataWithCheerio, } from '../../services/extractor.js';
|
|
4
|
+
import { fetchUrlWithRetry } from '../../services/fetcher.js';
|
|
5
|
+
import { logDebug, logError, logWarn } from '../../services/logger.js';
|
|
6
|
+
import { parseHtml } from '../../services/parser.js';
|
|
7
|
+
import { runWithConcurrency } from '../../utils/concurrency.js';
|
|
8
|
+
import { createToolErrorResponse } from '../../utils/tool-error-handler.js';
|
|
9
|
+
import { validateAndNormalizeUrl } from '../../utils/url-validator.js';
|
|
10
|
+
import { buildMetadata, shouldUseArticle, truncateContent, } from '../utils/common.js';
|
|
11
|
+
import { createBatchResponse } from '../utils/response-builder.js';
|
|
12
|
+
import { toJsonl } from '../../transformers/jsonl.transformer.js';
|
|
13
|
+
import { htmlToMarkdown } from '../../transformers/markdown.transformer.js';
|
|
14
|
+
export const FETCH_URLS_TOOL_NAME = 'fetch-urls';
|
|
15
|
+
export const FETCH_URLS_TOOL_DESCRIPTION = 'Fetches multiple URLs in parallel and converts them to AI-readable format (JSONL or Markdown). Supports concurrency control and continues on individual failures.';
|
|
16
|
+
const MAX_URLS = 10;
|
|
17
|
+
const DEFAULT_CONCURRENCY = 3;
|
|
18
|
+
async function processSingleUrl(url, options) {
|
|
19
|
+
try {
|
|
20
|
+
const normalizedUrl = validateAndNormalizeUrl(url);
|
|
21
|
+
const cacheNamespace = options.format === 'markdown' ? 'markdown' : 'url';
|
|
22
|
+
const cacheKey = cache.createCacheKey(cacheNamespace, normalizedUrl);
|
|
23
|
+
if (cacheKey) {
|
|
24
|
+
const cached = cache.get(cacheKey);
|
|
25
|
+
if (cached) {
|
|
26
|
+
logDebug('Batch cache hit', { url: normalizedUrl });
|
|
27
|
+
return {
|
|
28
|
+
url: normalizedUrl,
|
|
29
|
+
success: true,
|
|
30
|
+
content: cached.content,
|
|
31
|
+
cached: true,
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
const fetchResult = await fetchUrlWithRetry(normalizedUrl);
|
|
36
|
+
let sourceHtml;
|
|
37
|
+
let title;
|
|
38
|
+
let metadata;
|
|
39
|
+
// Fast path: Skip JSDOM entirely when extractMainContent is false
|
|
40
|
+
if (!options.extractMainContent) {
|
|
41
|
+
sourceHtml = fetchResult.html;
|
|
42
|
+
const $ = cheerio.load(fetchResult.html);
|
|
43
|
+
const extractedMeta = extractMetadataWithCheerio($);
|
|
44
|
+
({ title } = extractedMeta);
|
|
45
|
+
if (options.includeMetadata) {
|
|
46
|
+
metadata = {
|
|
47
|
+
type: 'metadata',
|
|
48
|
+
url: normalizedUrl,
|
|
49
|
+
fetchedAt: new Date().toISOString(),
|
|
50
|
+
title: extractedMeta.title,
|
|
51
|
+
description: extractedMeta.description,
|
|
52
|
+
author: extractedMeta.author,
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
// Slow path: Use JSDOM only when article extraction is needed
|
|
58
|
+
const { article, metadata: extractedMeta } = extractContent(fetchResult.html, normalizedUrl, {
|
|
59
|
+
extractArticle: true,
|
|
60
|
+
});
|
|
61
|
+
const useArticle = shouldUseArticle(true, article);
|
|
62
|
+
metadata = buildMetadata(normalizedUrl, article, extractedMeta, useArticle, options.includeMetadata);
|
|
63
|
+
sourceHtml = useArticle ? article.content : fetchResult.html;
|
|
64
|
+
title = useArticle ? article.title : extractedMeta.title;
|
|
65
|
+
}
|
|
66
|
+
let content;
|
|
67
|
+
let contentBlocks;
|
|
68
|
+
if (options.format === 'markdown') {
|
|
69
|
+
content = htmlToMarkdown(sourceHtml, metadata);
|
|
70
|
+
}
|
|
71
|
+
else {
|
|
72
|
+
const blocks = parseHtml(sourceHtml);
|
|
73
|
+
contentBlocks = blocks.length;
|
|
74
|
+
content = toJsonl(blocks, metadata);
|
|
75
|
+
}
|
|
76
|
+
const { content: truncatedContent } = truncateContent(content, options.maxContentLength);
|
|
77
|
+
content = truncatedContent;
|
|
78
|
+
if (cacheKey)
|
|
79
|
+
cache.set(cacheKey, content);
|
|
80
|
+
return {
|
|
81
|
+
url: normalizedUrl,
|
|
82
|
+
success: true,
|
|
83
|
+
title,
|
|
84
|
+
content,
|
|
85
|
+
contentBlocks,
|
|
86
|
+
cached: false,
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
catch (error) {
|
|
90
|
+
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
|
91
|
+
const errorCode = error instanceof Error &&
|
|
92
|
+
'code' in error &&
|
|
93
|
+
typeof error.code === 'string'
|
|
94
|
+
? error.code
|
|
95
|
+
: 'FETCH_ERROR';
|
|
96
|
+
logWarn('Batch URL processing failed', { url, error: errorMessage });
|
|
97
|
+
return {
|
|
98
|
+
url,
|
|
99
|
+
success: false,
|
|
100
|
+
cached: false,
|
|
101
|
+
error: errorMessage,
|
|
102
|
+
errorCode,
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
export async function fetchUrlsToolHandler(input) {
|
|
107
|
+
try {
|
|
108
|
+
// Validate input - urls array is guaranteed by Zod schema but check for empty
|
|
109
|
+
if (input.urls.length === 0) {
|
|
110
|
+
return createToolErrorResponse('At least one URL is required', '', 'VALIDATION_ERROR');
|
|
111
|
+
}
|
|
112
|
+
// Enforce max URLs limit
|
|
113
|
+
if (input.urls.length > MAX_URLS) {
|
|
114
|
+
return createToolErrorResponse(`Maximum ${MAX_URLS} URLs allowed per batch`, '', 'VALIDATION_ERROR');
|
|
115
|
+
}
|
|
116
|
+
// Filter out empty URLs
|
|
117
|
+
const validUrls = input.urls.filter((url) => typeof url === 'string' && url.trim().length > 0);
|
|
118
|
+
if (validUrls.length === 0) {
|
|
119
|
+
return createToolErrorResponse('No valid URLs provided', '', 'VALIDATION_ERROR');
|
|
120
|
+
}
|
|
121
|
+
const concurrency = Math.min(Math.max(1, input.concurrency ?? DEFAULT_CONCURRENCY), 5);
|
|
122
|
+
const continueOnError = input.continueOnError ?? true;
|
|
123
|
+
const format = input.format ?? 'jsonl';
|
|
124
|
+
logDebug('Starting batch URL fetch', {
|
|
125
|
+
urlCount: validUrls.length,
|
|
126
|
+
concurrency,
|
|
127
|
+
format,
|
|
128
|
+
});
|
|
129
|
+
// Create tasks for each URL
|
|
130
|
+
const tasks = validUrls.map((url) => async () => processSingleUrl(url, {
|
|
131
|
+
extractMainContent: input.extractMainContent ?? true,
|
|
132
|
+
includeMetadata: input.includeMetadata ?? true,
|
|
133
|
+
maxContentLength: input.maxContentLength,
|
|
134
|
+
format,
|
|
135
|
+
}));
|
|
136
|
+
// Execute with concurrency control
|
|
137
|
+
const settledResults = await runWithConcurrency(concurrency, tasks, {
|
|
138
|
+
onProgress: (completed, total) => {
|
|
139
|
+
logDebug('Batch progress', {
|
|
140
|
+
completed,
|
|
141
|
+
total,
|
|
142
|
+
percentage: Math.round((completed / total) * 100),
|
|
143
|
+
});
|
|
144
|
+
},
|
|
145
|
+
});
|
|
146
|
+
// Helper to safely extract error message from rejected promise
|
|
147
|
+
const getErrorMessage = ({ reason }) => {
|
|
148
|
+
const typedReason = reason;
|
|
149
|
+
return typedReason instanceof Error
|
|
150
|
+
? typedReason.message
|
|
151
|
+
: String(typedReason);
|
|
152
|
+
};
|
|
153
|
+
// Process results
|
|
154
|
+
const results = settledResults.map((result, index) => {
|
|
155
|
+
if (result.status === 'fulfilled') {
|
|
156
|
+
return result.value;
|
|
157
|
+
}
|
|
158
|
+
else {
|
|
159
|
+
// Promise rejection (shouldn't happen as processSingleUrl catches errors)
|
|
160
|
+
return {
|
|
161
|
+
url: validUrls[index] ?? 'unknown',
|
|
162
|
+
success: false,
|
|
163
|
+
cached: false,
|
|
164
|
+
error: getErrorMessage(result),
|
|
165
|
+
errorCode: 'PROMISE_REJECTED',
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
});
|
|
169
|
+
// Check if we should fail fast on errors
|
|
170
|
+
if (!continueOnError) {
|
|
171
|
+
const firstError = results.find((r) => !r.success);
|
|
172
|
+
if (firstError && !firstError.success) {
|
|
173
|
+
const errorMsg = firstError.error ?? 'Unknown error';
|
|
174
|
+
return createToolErrorResponse(`Batch failed: ${errorMsg}`, firstError.url, firstError.errorCode ?? 'BATCH_ERROR');
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return createBatchResponse(results);
|
|
178
|
+
}
|
|
179
|
+
catch (error) {
|
|
180
|
+
logError('fetch-urls tool error', error instanceof Error ? error : undefined);
|
|
181
|
+
return createToolErrorResponse(error instanceof Error ? error.message : 'Failed to fetch URLs', '', 'BATCH_ERROR');
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
//# sourceMappingURL=fetch-urls.tool.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch-urls.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-urls.tool.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AASnC,OAAO,KAAK,KAAK,MAAM,yBAAyB,CAAC;AACjD,OAAO,EACL,cAAc,EACd,0BAA0B,GAC3B,MAAM,6BAA6B,CAAC;AACrC,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,0BAA0B,CAAC;AACvE,OAAO,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AAErD,OAAO,EAAE,kBAAkB,EAAE,MAAM,4BAA4B,CAAC;AAChE,OAAO,EAAE,uBAAuB,EAAE,MAAM,mCAAmC,CAAC;AAC5E,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAC;AACvE,OAAO,EACL,aAAa,EACb,gBAAgB,EAChB,eAAe,GAChB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EAAE,mBAAmB,EAAE,MAAM,8BAA8B,CAAC;AAEnE,OAAO,EAAE,OAAO,EAAE,MAAM,yCAAyC,CAAC;AAClE,OAAO,EAAE,cAAc,EAAE,MAAM,4CAA4C,CAAC;AAE5E,MAAM,CAAC,MAAM,oBAAoB,GAAG,YAAY,CAAC;AACjD,MAAM,CAAC,MAAM,2BAA2B,GACtC,mKAAmK,CAAC;AAEtK,MAAM,QAAQ,GAAG,EAAE,CAAC;AACpB,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAS9B,KAAK,UAAU,gBAAgB,CAC7B,GAAW,EACX,OAAuB;IAEvB,IAAI,CAAC;QACH,MAAM,aAAa,GAAG,uBAAuB,CAAC,GAAG,CAAC,CAAC;QACnD,MAAM,cAAc,GAAG,OAAO,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,KAAK,CAAC;QAC1E,MAAM,QAAQ,GAAG,KAAK,CAAC,cAAc,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC;QAErE,IAAI,QAAQ,EAAE,CAAC;YACb,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACnC,IAAI,MAAM,EAAE,CAAC;gBACX,QAAQ,CAAC,iBAAiB,EAAE,EAAE,GAAG,EAAE,aAAa,EAAE,CAAC,CAAC;gBACpD,OAAO;oBACL,GAAG,EAAE,aAAa;oBAClB,OAAO,EAAE,IAAI;oBACb,OAAO,EAAE,MAAM,CAAC,OAAO;oBACvB,MAAM,EAAE,IAAI;iBACb,CAAC;YACJ,CAAC;QACH,CAAC;QAED,MAAM,WAAW,GAAG,MAAM,iBAAiB,CAAC,aAAa,CAAC,CAAC;QAE3D,IAAI,UAAkB,CAAC;QACvB,IAAI,KAAyB,CAAC;QAC9B,IAAI,QAAmC,CAAC;QAExC,kEAAkE;QAClE,IAAI,CAAC,OAAO,CAAC,kBAAkB,EAAE,CAAC;YAChC,UAAU,GAAG,WAAW,CAAC,IAAI,CAAC;YAC9B,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;YACzC,MAAM,aAAa,GAAG,0BAA0B,CAAC,CAAC,CAAC,CAAC;YACpD,CAAC,EAAE,KAAK,EAAE,GAAG,aAAa,CAAC,CAAC;YAE5B,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;gBAC5B,QAAQ,GAAG;oBACT,IAAI,EAAE,UAAmB;oBACzB,GAAG,EAAE,aAAa;oBAClB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;oBACnC,KAAK,EAAE,aAAa,CAAC,KAAK;oBAC1B,WAAW,EAAE,aAAa,CAAC,WAAW;oBACtC,MAAM,EAAE,aAAa,CAAC,MAAM;iBAC7B,CAAC;YACJ,CAAC;QACH,CAAC;aAAM,CAAC;YACN,8DAA8D;YAC9D,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,GAAG,cAAc,CACzD,WAAW,CAAC,IAAI,EAChB,aAAa,EACb;gBACE,cAAc,EAAE,IAAI;aACrB,CACF,CAAC;YACF,MAAM,UAAU,GAAG,gBAAgB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YACnD,QAAQ,GAAG,aAAa,CACtB,aAAa,EACb,OAAO,EACP,aAAa,EACb,UAAU,EACV,OAAO,CAAC,eAAe,CACxB,CAAC;YACF,UAAU,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC;YAC7D,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC;QAC3D,CAAC;QAED,IAAI,OAAe,CAAC;QACpB,IAAI,aAAiC,CAAC;QAEtC,IAAI,OAAO,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;YAClC,OAAO,GAAG,cAAc,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;QACjD,CAAC;aAAM,CAAC;YACN,MAAM,MAAM,GAAG,SAAS,CAAC,UAAU,CAAC,CAAC;YACrC,aAAa,GAAG,MAAM,CAAC,MAAM,CAAC;YAC9B,OAAO,GAAG,OAAO,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;QACtC,CAAC;QAED,MAAM,EAAE,OAAO,EAAE,gBAAgB,EAAE,GAAG,eAAe,CACnD,OAAO,EACP,OAAO,CAAC,gBAAgB,CACzB,CAAC;QACF,OAAO,GAAG,gBAAgB,CAAC;QAC3B,IAAI,QAAQ;YAAE,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAE3C,OAAO;YACL,GAAG,EAAE,aAAa;YAClB,OAAO,EAAE,IAAI;YACb,KAAK;YACL,OAAO;YACP,aAAa;YACb,MAAM,EAAE,KAAK;SACd,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;QAC3D,MAAM,SAAS,GACb,KAAK,YAAY,KAAK;YACtB,MAAM,IAAI,KAAK;YACf,OAAO,KAAK,CAAC,IAAI,KAAK,QAAQ;YAC5B,CAAC,CAAC,KAAK,CAAC,IAAI;YACZ,CAAC,CAAC,aAAa,CAAC;QAEpB,OAAO,CAAC,6BAA6B,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC,CAAC;QACrE,OAAO;YACL,GAAG;YACH,OAAO,EAAE,KAAK;YACd,MAAM,EAAE,KAAK;YACb,KAAK,EAAE,YAAY;YACnB,SAAS;SACV,CAAC;IACJ,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,oBAAoB,CAAC,KAAqB;IAK9D,IAAI,CAAC;QACH,8EAA8E;QAC9E,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,OAAO,uBAAuB,CAC5B,8BAA8B,EAC9B,EAAE,EACF,kBAAkB,CACnB,CAAC;QACJ,CAAC;QAED,yBAAyB;QACzB,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,QAAQ,EAAE,CAAC;YACjC,OAAO,uBAAuB,CAC5B,WAAW,QAAQ,yBAAyB,EAC5C,EAAE,EACF,kBAAkB,CACnB,CAAC;QACJ,CAAC;QAED,wBAAwB;QACxB,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CACjC,CAAC,GAAG,EAAE,EAAE,CAAC,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAC1D,CAAC;QAEF,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,OAAO,uBAAuB,CAC5B,wBAAwB,EACxB,EAAE,EACF,kBAAkB,CACnB,CAAC;QACJ,CAAC;QAED,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAC1B,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,WAAW,IAAI,mBAAmB,CAAC,EACrD,CAAC,CACF,CAAC;QACF,MAAM,eAAe,GAAG,KAAK,CAAC,eAAe,IAAI,IAAI,CAAC;QACtD,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,IAAI,OAAO,CAAC;QAEvC,QAAQ,CAAC,0BAA0B,EAAE;YACnC,QAAQ,EAAE,SAAS,CAAC,MAAM;YAC1B,WAAW;YACX,MAAM;SACP,CAAC,CAAC;QAEH,4BAA4B;QAC5B,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,CACzB,CAAC,GAAG,EAAE,EAAE,CAAC,KAAK,IAAI,EAAE,CAClB,gBAAgB,CAAC,GAAG,EAAE;YACpB,kBAAkB,EAAE,KAAK,CAAC,kBAAkB,IAAI,IAAI;YACpD,eAAe,EAAE,KAAK,CAAC,eAAe,IAAI,IAAI;YAC9C,gBAAgB,EAAE,KAAK,CAAC,gBAAgB;YACxC,MAAM;SACP,CAAC,CACL,CAAC;QAEF,mCAAmC;QACnC,MAAM,cAAc,GAAG,MAAM,kBAAkB,CAAC,WAAW,EAAE,KAAK,EAAE;YAClE,UAAU,EAAE,CAAC,SAAS,EAAE,KAAK,EAAE,EAAE;gBAC/B,QAAQ,CAAC,gBAAgB,EAAE;oBACzB,SAAS;oBACT,KAAK;oBACL,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,SAAS,GAAG,KAAK,CAAC,GAAG,GAAG,CAAC;iBAClD,CAAC,CAAC;YACL,CAAC;SACF,CAAC,CAAC;QAEH,+DAA+D;QAC/D,MAAM,eAAe,GAAG,CAAC,EAAE,MAAM,EAAyB,EAAU,EAAE;YACpE,MAAM,WAAW,GAAY,MAAM,CAAC;YACpC,OAAO,WAAW,YAAY,KAAK;gBACjC,CAAC,CAAC,WAAW,CAAC,OAAO;gBACrB,CAAC,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;QAC1B,CAAC,CAAC;QAEF,kBAAkB;QAClB,MAAM,OAAO,GAAqB,cAAc,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE;YACrE,IAAI,MAAM,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;gBAClC,OAAO,MAAM,CAAC,KAAK,CAAC;YACtB,CAAC;iBAAM,CAAC;gBACN,0EAA0E;gBAC1E,OAAO;oBACL,GAAG,EAAE,SAAS,CAAC,KAAK,CAAC,IAAI,SAAS;oBAClC,OAAO,EAAE,KAAc;oBACvB,MAAM,EAAE,KAAc;oBACtB,KAAK,EAAE,eAAe,CAAC,MAAM,CAAC;oBAC9B,SAAS,EAAE,kBAAkB;iBAC9B,CAAC;YACJ,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,yCAAyC;QACzC,IAAI,CAAC,eAAe,EAAE,CAAC;YACrB,MAAM,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YACnD,IAAI,UAAU,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,CAAC;gBACtC,MAAM,QAAQ,GAAG,UAAU,CAAC,KAAK,IAAI,eAAe,CAAC;gBACrD,OAAO,uBAAuB,CAC5B,iBAAiB,QAAQ,EAAE,EAC3B,UAAU,CAAC,GAAG,EACd,UAAU,CAAC,SAAS,IAAI,aAAa,CACtC,CAAC;YACJ,CAAC;QACH,CAAC;QAED,OAAO,mBAAmB,CAAC,OAAO,CAAC,CAAC;IACtC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,uBAAuB,EACvB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QAEF,OAAO,uBAAuB,CAC5B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,sBAAsB,EAC/D,EAAE,EACF,aAAa,CACd,CAAC;IACJ,CAAC;AACH,CAAC"}
|
package/dist/tools/index.d.ts
CHANGED
|
@@ -1,7 +1,3 @@
|
|
|
1
1
|
import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
2
|
-
/**
|
|
3
|
-
* Registers all tools with the MCP server using the modern McpServer API
|
|
4
|
-
* Tools are registered with Zod schemas for automatic validation
|
|
5
|
-
*/
|
|
6
2
|
export declare function registerTools(server: McpServer): void;
|
|
7
3
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/tools/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/tools/index.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AAsQzE,wBAAgB,aAAa,CAAC,MAAM,EAAE,SAAS,GAAG,IAAI,CA4CrD"}
|
package/dist/tools/index.js
CHANGED
|
@@ -1,8 +1,28 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
2
|
+
import { FETCH_LINKS_TOOL_DESCRIPTION, FETCH_LINKS_TOOL_NAME, fetchLinksToolHandler, } from './handlers/fetch-links.tool.js';
|
|
3
|
+
import { FETCH_MARKDOWN_TOOL_DESCRIPTION, FETCH_MARKDOWN_TOOL_NAME, fetchMarkdownToolHandler, } from './handlers/fetch-markdown.tool.js';
|
|
4
|
+
import { FETCH_URL_TOOL_DESCRIPTION, FETCH_URL_TOOL_NAME, fetchUrlToolHandler, } from './handlers/fetch-url.tool.js';
|
|
5
|
+
import { FETCH_URLS_TOOL_DESCRIPTION, FETCH_URLS_TOOL_NAME, fetchUrlsToolHandler, } from './handlers/fetch-urls.tool.js';
|
|
5
6
|
// Zod schemas for runtime validation - single source of truth
|
|
7
|
+
// Common request options shared across tools
|
|
8
|
+
const RequestOptionsSchema = {
|
|
9
|
+
customHeaders: z
|
|
10
|
+
.record(z.string())
|
|
11
|
+
.optional()
|
|
12
|
+
.describe('Custom HTTP headers for the request'),
|
|
13
|
+
timeout: z
|
|
14
|
+
.number()
|
|
15
|
+
.min(1000)
|
|
16
|
+
.max(60000)
|
|
17
|
+
.optional()
|
|
18
|
+
.describe('Request timeout in milliseconds (1000-60000)'),
|
|
19
|
+
retries: z
|
|
20
|
+
.number()
|
|
21
|
+
.min(1)
|
|
22
|
+
.max(10)
|
|
23
|
+
.optional()
|
|
24
|
+
.describe('Number of retry attempts (1-10)'),
|
|
25
|
+
};
|
|
6
26
|
// Input schemas
|
|
7
27
|
const FetchUrlInputSchema = {
|
|
8
28
|
url: z.string().min(1).describe('The URL to fetch'),
|
|
@@ -26,10 +46,7 @@ const FetchUrlInputSchema = {
|
|
|
26
46
|
.optional()
|
|
27
47
|
.default('jsonl')
|
|
28
48
|
.describe('Output format'),
|
|
29
|
-
|
|
30
|
-
.record(z.string())
|
|
31
|
-
.optional()
|
|
32
|
-
.describe('Custom HTTP headers for the request'),
|
|
49
|
+
...RequestOptionsSchema,
|
|
33
50
|
};
|
|
34
51
|
const FetchLinksInputSchema = {
|
|
35
52
|
url: z.string().min(1).describe('The URL to extract links from'),
|
|
@@ -43,6 +60,22 @@ const FetchLinksInputSchema = {
|
|
|
43
60
|
.optional()
|
|
44
61
|
.default(true)
|
|
45
62
|
.describe('Include internal links'),
|
|
63
|
+
maxLinks: z
|
|
64
|
+
.number()
|
|
65
|
+
.positive()
|
|
66
|
+
.max(1000)
|
|
67
|
+
.optional()
|
|
68
|
+
.describe('Maximum number of links to return (1-1000)'),
|
|
69
|
+
filterPattern: z
|
|
70
|
+
.string()
|
|
71
|
+
.optional()
|
|
72
|
+
.describe('Regex pattern to filter links (matches against href)'),
|
|
73
|
+
includeImages: z
|
|
74
|
+
.boolean()
|
|
75
|
+
.optional()
|
|
76
|
+
.default(false)
|
|
77
|
+
.describe('Include image links (img src attributes)'),
|
|
78
|
+
...RequestOptionsSchema,
|
|
46
79
|
};
|
|
47
80
|
const FetchMarkdownInputSchema = {
|
|
48
81
|
url: z.string().min(1).describe('The URL to fetch'),
|
|
@@ -56,6 +89,57 @@ const FetchMarkdownInputSchema = {
|
|
|
56
89
|
.optional()
|
|
57
90
|
.default(true)
|
|
58
91
|
.describe('Include YAML frontmatter metadata'),
|
|
92
|
+
maxContentLength: z
|
|
93
|
+
.number()
|
|
94
|
+
.positive()
|
|
95
|
+
.optional()
|
|
96
|
+
.describe('Maximum content length in characters'),
|
|
97
|
+
generateToc: z
|
|
98
|
+
.boolean()
|
|
99
|
+
.optional()
|
|
100
|
+
.default(false)
|
|
101
|
+
.describe('Generate table of contents from headings'),
|
|
102
|
+
...RequestOptionsSchema,
|
|
103
|
+
};
|
|
104
|
+
const FetchUrlsInputSchema = {
|
|
105
|
+
urls: z
|
|
106
|
+
.array(z.string().min(1))
|
|
107
|
+
.min(1)
|
|
108
|
+
.max(10)
|
|
109
|
+
.describe('Array of URLs to fetch (1-10 URLs)'),
|
|
110
|
+
extractMainContent: z
|
|
111
|
+
.boolean()
|
|
112
|
+
.optional()
|
|
113
|
+
.default(true)
|
|
114
|
+
.describe('Use Readability to extract main article content'),
|
|
115
|
+
includeMetadata: z
|
|
116
|
+
.boolean()
|
|
117
|
+
.optional()
|
|
118
|
+
.default(true)
|
|
119
|
+
.describe('Include page metadata (title, description, etc.)'),
|
|
120
|
+
maxContentLength: z
|
|
121
|
+
.number()
|
|
122
|
+
.positive()
|
|
123
|
+
.optional()
|
|
124
|
+
.describe('Maximum content length per URL in characters'),
|
|
125
|
+
format: z
|
|
126
|
+
.enum(['jsonl', 'markdown'])
|
|
127
|
+
.optional()
|
|
128
|
+
.default('jsonl')
|
|
129
|
+
.describe('Output format for all URLs'),
|
|
130
|
+
concurrency: z
|
|
131
|
+
.number()
|
|
132
|
+
.min(1)
|
|
133
|
+
.max(5)
|
|
134
|
+
.optional()
|
|
135
|
+
.default(3)
|
|
136
|
+
.describe('Maximum concurrent requests (1-5)'),
|
|
137
|
+
continueOnError: z
|
|
138
|
+
.boolean()
|
|
139
|
+
.optional()
|
|
140
|
+
.default(true)
|
|
141
|
+
.describe('Continue processing if some URLs fail'),
|
|
142
|
+
...RequestOptionsSchema,
|
|
59
143
|
};
|
|
60
144
|
// Output schemas for structured content validation
|
|
61
145
|
const FetchUrlOutputSchema = {
|
|
@@ -78,9 +162,17 @@ const FetchLinksOutputSchema = {
|
|
|
78
162
|
.array(z.object({
|
|
79
163
|
href: z.string().describe('The link URL'),
|
|
80
164
|
text: z.string().describe('The link anchor text'),
|
|
81
|
-
type: z.enum(['internal', 'external']).describe('Link type'),
|
|
165
|
+
type: z.enum(['internal', 'external', 'image']).describe('Link type'),
|
|
82
166
|
}))
|
|
83
167
|
.describe('Array of extracted links'),
|
|
168
|
+
filtered: z
|
|
169
|
+
.number()
|
|
170
|
+
.optional()
|
|
171
|
+
.describe('Number of links filtered out by pattern'),
|
|
172
|
+
truncated: z
|
|
173
|
+
.boolean()
|
|
174
|
+
.optional()
|
|
175
|
+
.describe('Whether results were truncated by maxLinks'),
|
|
84
176
|
error: z.string().optional().describe('Error message if the request failed'),
|
|
85
177
|
errorCode: z.string().optional().describe('Error code if the request failed'),
|
|
86
178
|
};
|
|
@@ -91,35 +183,73 @@ const FetchMarkdownOutputSchema = {
|
|
|
91
183
|
.string()
|
|
92
184
|
.describe('ISO timestamp of when the content was fetched'),
|
|
93
185
|
markdown: z.string().describe('The extracted content in Markdown format'),
|
|
186
|
+
toc: z
|
|
187
|
+
.array(z.object({
|
|
188
|
+
level: z.number().describe('Heading level (1-6)'),
|
|
189
|
+
text: z.string().describe('Heading text'),
|
|
190
|
+
slug: z.string().describe('URL-friendly anchor slug'),
|
|
191
|
+
}))
|
|
192
|
+
.optional()
|
|
193
|
+
.describe('Table of contents (if generateToc is true)'),
|
|
94
194
|
cached: z.boolean().describe('Whether the result was served from cache'),
|
|
195
|
+
truncated: z
|
|
196
|
+
.boolean()
|
|
197
|
+
.optional()
|
|
198
|
+
.describe('Whether content was truncated by maxContentLength'),
|
|
95
199
|
error: z.string().optional().describe('Error message if the request failed'),
|
|
96
200
|
errorCode: z.string().optional().describe('Error code if the request failed'),
|
|
97
201
|
};
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
202
|
+
const FetchUrlsOutputSchema = {
|
|
203
|
+
results: z
|
|
204
|
+
.array(z.object({
|
|
205
|
+
url: z.string().describe('The fetched URL'),
|
|
206
|
+
success: z.boolean().describe('Whether the fetch was successful'),
|
|
207
|
+
title: z.string().optional().describe('Page title'),
|
|
208
|
+
content: z.string().optional().describe('The extracted content'),
|
|
209
|
+
contentBlocks: z
|
|
210
|
+
.number()
|
|
211
|
+
.optional()
|
|
212
|
+
.describe('Number of content blocks (JSONL only)'),
|
|
213
|
+
cached: z.boolean().optional().describe('Whether served from cache'),
|
|
214
|
+
error: z.string().optional().describe('Error message if failed'),
|
|
215
|
+
errorCode: z.string().optional().describe('Error code if failed'),
|
|
216
|
+
}))
|
|
217
|
+
.describe('Array of results for each URL'),
|
|
218
|
+
summary: z
|
|
219
|
+
.object({
|
|
220
|
+
total: z.number().describe('Total URLs processed'),
|
|
221
|
+
successful: z.number().describe('Number of successful fetches'),
|
|
222
|
+
failed: z.number().describe('Number of failed fetches'),
|
|
223
|
+
cached: z.number().describe('Number served from cache'),
|
|
224
|
+
totalContentBlocks: z.number().describe('Total content blocks extracted'),
|
|
225
|
+
})
|
|
226
|
+
.describe('Summary statistics'),
|
|
227
|
+
fetchedAt: z.string().describe('ISO timestamp of batch completion'),
|
|
228
|
+
};
|
|
102
229
|
export function registerTools(server) {
|
|
103
|
-
// Register fetch-url tool
|
|
104
230
|
server.registerTool(FETCH_URL_TOOL_NAME, {
|
|
105
231
|
title: 'Fetch URL',
|
|
106
232
|
description: FETCH_URL_TOOL_DESCRIPTION,
|
|
107
233
|
inputSchema: FetchUrlInputSchema,
|
|
108
234
|
outputSchema: FetchUrlOutputSchema,
|
|
109
235
|
}, async (args) => fetchUrlToolHandler(args));
|
|
110
|
-
// Register fetch-links tool
|
|
111
236
|
server.registerTool(FETCH_LINKS_TOOL_NAME, {
|
|
112
237
|
title: 'Fetch Links',
|
|
113
238
|
description: FETCH_LINKS_TOOL_DESCRIPTION,
|
|
114
239
|
inputSchema: FetchLinksInputSchema,
|
|
115
240
|
outputSchema: FetchLinksOutputSchema,
|
|
116
241
|
}, async (args) => fetchLinksToolHandler(args));
|
|
117
|
-
// Register fetch-markdown tool
|
|
118
242
|
server.registerTool(FETCH_MARKDOWN_TOOL_NAME, {
|
|
119
243
|
title: 'Fetch Markdown',
|
|
120
244
|
description: FETCH_MARKDOWN_TOOL_DESCRIPTION,
|
|
121
245
|
inputSchema: FetchMarkdownInputSchema,
|
|
122
246
|
outputSchema: FetchMarkdownOutputSchema,
|
|
123
247
|
}, async (args) => fetchMarkdownToolHandler(args));
|
|
248
|
+
server.registerTool(FETCH_URLS_TOOL_NAME, {
|
|
249
|
+
title: 'Fetch URLs (Batch)',
|
|
250
|
+
description: FETCH_URLS_TOOL_DESCRIPTION,
|
|
251
|
+
inputSchema: FetchUrlsInputSchema,
|
|
252
|
+
outputSchema: FetchUrlsOutputSchema,
|
|
253
|
+
}, async (args) => fetchUrlsToolHandler(args));
|
|
124
254
|
}
|
|
125
255
|
//# sourceMappingURL=index.js.map
|