@j0hanz/superfetch 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +590 -327
- package/dist/config/index.d.ts.map +1 -1
- package/dist/config/index.js +6 -10
- package/dist/config/index.js.map +1 -1
- package/dist/config/types.d.ts +251 -0
- package/dist/config/types.d.ts.map +1 -0
- package/dist/config/types.js +2 -0
- package/dist/config/types.js.map +1 -0
- package/dist/errors/app-error.d.ts +2 -20
- package/dist/errors/app-error.d.ts.map +1 -1
- package/dist/errors/app-error.js +0 -18
- package/dist/errors/app-error.js.map +1 -1
- package/dist/index.js +13 -47
- package/dist/index.js.map +1 -1
- package/dist/middleware/error-handler.d.ts +1 -5
- package/dist/middleware/error-handler.d.ts.map +1 -1
- package/dist/middleware/error-handler.js +1 -11
- package/dist/middleware/error-handler.js.map +1 -1
- package/dist/middleware/rate-limiter.d.ts +2 -20
- package/dist/middleware/rate-limiter.d.ts.map +1 -1
- package/dist/middleware/rate-limiter.js +11 -44
- package/dist/middleware/rate-limiter.js.map +1 -1
- package/dist/prompts/index.d.ts +0 -3
- package/dist/prompts/index.d.ts.map +1 -1
- package/dist/prompts/index.js +0 -3
- package/dist/prompts/index.js.map +1 -1
- package/dist/resources/index.d.ts +0 -3
- package/dist/resources/index.d.ts.map +1 -1
- package/dist/resources/index.js +1 -4
- package/dist/resources/index.js.map +1 -1
- package/dist/server.d.ts +0 -4
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +2 -6
- package/dist/server.js.map +1 -1
- package/dist/services/cache.d.ts +9 -6
- package/dist/services/cache.d.ts.map +1 -1
- package/dist/services/cache.js +71 -20
- package/dist/services/cache.js.map +1 -1
- package/dist/services/card-extractor.d.ts +10 -0
- package/dist/services/card-extractor.d.ts.map +1 -0
- package/dist/services/card-extractor.js +187 -0
- package/dist/services/card-extractor.js.map +1 -0
- package/dist/services/extractor.d.ts +6 -19
- package/dist/services/extractor.d.ts.map +1 -1
- package/dist/services/extractor.js +53 -46
- package/dist/services/extractor.js.map +1 -1
- package/dist/services/fetcher.d.ts +4 -11
- package/dist/services/fetcher.d.ts.map +1 -1
- package/dist/services/fetcher.js +30 -36
- package/dist/services/fetcher.js.map +1 -1
- package/dist/services/logger.d.ts.map +1 -1
- package/dist/services/logger.js +4 -6
- package/dist/services/logger.js.map +1 -1
- package/dist/services/parser.d.ts +1 -6
- package/dist/services/parser.d.ts.map +1 -1
- package/dist/services/parser.js +64 -47
- package/dist/services/parser.js.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.d.ts +5 -12
- package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.js +104 -79
- package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +7 -4
- package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.js +84 -84
- package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.d.ts +8 -6
- package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.js +51 -93
- package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-urls.tool.d.ts +5 -0
- package/dist/tools/handlers/fetch-urls.tool.d.ts.map +1 -0
- package/dist/tools/handlers/fetch-urls.tool.js +147 -0
- package/dist/tools/handlers/fetch-urls.tool.js.map +1 -0
- package/dist/tools/index.d.ts +0 -4
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +145 -15
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/utils/common.d.ts +8 -0
- package/dist/tools/utils/common.d.ts.map +1 -0
- package/dist/tools/utils/common.js +35 -0
- package/dist/tools/utils/common.js.map +1 -0
- package/dist/tools/utils/fetch-pipeline.d.ts +3 -0
- package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -0
- package/dist/tools/utils/fetch-pipeline.js +37 -0
- package/dist/tools/utils/fetch-pipeline.js.map +1 -0
- package/dist/tools/utils/index.d.ts +4 -0
- package/dist/tools/utils/index.d.ts.map +1 -0
- package/dist/tools/utils/index.js +3 -0
- package/dist/tools/utils/index.js.map +1 -0
- package/dist/tools/utils/response-builder.d.ts +3 -0
- package/dist/tools/utils/response-builder.d.ts.map +1 -0
- package/dist/tools/utils/response-builder.js +24 -0
- package/dist/tools/utils/response-builder.js.map +1 -0
- package/dist/transformers/jsonl.transformer.d.ts +1 -1
- package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
- package/dist/transformers/jsonl.transformer.js +2 -1
- package/dist/transformers/jsonl.transformer.js.map +1 -1
- package/dist/transformers/markdown.transformer.d.ts +1 -1
- package/dist/transformers/markdown.transformer.d.ts.map +1 -1
- package/dist/transformers/markdown.transformer.js +116 -2
- package/dist/transformers/markdown.transformer.js.map +1 -1
- package/dist/types/content.types.d.ts +11 -11
- package/dist/types/content.types.d.ts.map +1 -1
- package/dist/types/index.d.ts +1 -2
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js +1 -2
- package/dist/types/index.js.map +1 -1
- package/dist/types/schemas.d.ts +39 -12
- package/dist/types/schemas.d.ts.map +1 -1
- package/dist/utils/concurrency.d.ts +2 -0
- package/dist/utils/concurrency.d.ts.map +1 -0
- package/dist/utils/concurrency.js +25 -0
- package/dist/utils/concurrency.js.map +1 -0
- package/dist/utils/content-cleaner.d.ts +32 -0
- package/dist/utils/content-cleaner.d.ts.map +1 -0
- package/dist/utils/content-cleaner.js +240 -0
- package/dist/utils/content-cleaner.js.map +1 -0
- package/dist/utils/language-detector.d.ts +5 -0
- package/dist/utils/language-detector.d.ts.map +1 -0
- package/dist/utils/language-detector.js +50 -0
- package/dist/utils/language-detector.js.map +1 -0
- package/dist/utils/sanitizer.d.ts +0 -10
- package/dist/utils/sanitizer.d.ts.map +1 -1
- package/dist/utils/sanitizer.js +3 -11
- package/dist/utils/sanitizer.js.map +1 -1
- package/dist/utils/tool-error-handler.d.ts +1 -15
- package/dist/utils/tool-error-handler.d.ts.map +1 -1
- package/dist/utils/tool-error-handler.js +1 -1
- package/dist/utils/tool-error-handler.js.map +1 -1
- package/dist/utils/url-validator.d.ts +0 -8
- package/dist/utils/url-validator.d.ts.map +1 -1
- package/dist/utils/url-validator.js +17 -31
- package/dist/utils/url-validator.js.map +1 -1
- package/package.json +4 -3
|
@@ -1,103 +1,103 @@
|
|
|
1
|
-
import { validateAndNormalizeUrl } from '../../utils/url-validator.js';
|
|
2
|
-
import { fetchUrlWithRetry } from '../../services/fetcher.js';
|
|
3
1
|
import { extractContent } from '../../services/extractor.js';
|
|
4
|
-
import {
|
|
5
|
-
import
|
|
6
|
-
import { config } from '../../config/index.js';
|
|
7
|
-
import { logError } from '../../services/logger.js';
|
|
2
|
+
import { logDebug, logError } from '../../services/logger.js';
|
|
3
|
+
import { stripMarkdownLinks } from '../../utils/content-cleaner.js';
|
|
8
4
|
import { createToolErrorResponse, handleToolError, } from '../../utils/tool-error-handler.js';
|
|
5
|
+
import { buildMetadata, shouldUseArticle } from '../utils/common.js';
|
|
6
|
+
import { executeFetchPipeline } from '../utils/fetch-pipeline.js';
|
|
7
|
+
import { htmlToMarkdown } from '../../transformers/markdown.transformer.js';
|
|
9
8
|
export const FETCH_MARKDOWN_TOOL_NAME = 'fetch-markdown';
|
|
10
|
-
export const FETCH_MARKDOWN_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to clean Markdown format with optional frontmatter';
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
9
|
+
export const FETCH_MARKDOWN_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to clean Markdown format with optional frontmatter, table of contents, and content length limits';
|
|
10
|
+
/**
|
|
11
|
+
* Generate URL-friendly slug from text
|
|
12
|
+
* Strips markdown link syntax before slugifying
|
|
13
|
+
*/
|
|
14
|
+
function slugify(text) {
|
|
15
|
+
// First strip markdown links: [Text](#anchor) -> Text
|
|
16
|
+
const cleanText = stripMarkdownLinks(text);
|
|
17
|
+
return cleanText
|
|
18
|
+
.toLowerCase()
|
|
19
|
+
.replace(/[^\w\s-]/g, '')
|
|
20
|
+
.replace(/\s+/g, '-')
|
|
21
|
+
.replace(/--+/g, '-')
|
|
22
|
+
.trim();
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Extract table of contents from markdown headings
|
|
26
|
+
* Returns clean text without markdown link syntax
|
|
27
|
+
*/
|
|
28
|
+
function extractToc(markdown) {
|
|
29
|
+
const headingRegex = /^(#{1,6})\s+(.+)$/gm;
|
|
30
|
+
const toc = [];
|
|
31
|
+
let match;
|
|
32
|
+
while ((match = headingRegex.exec(markdown)) !== null) {
|
|
33
|
+
if (!match[1] || !match[2])
|
|
34
|
+
continue;
|
|
35
|
+
const rawText = match[2].trim();
|
|
36
|
+
// Clean markdown links from TOC text: [Usage](#usage) -> Usage
|
|
37
|
+
const text = stripMarkdownLinks(rawText);
|
|
38
|
+
toc.push({ level: match[1].length, text, slug: slugify(rawText) });
|
|
30
39
|
}
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
40
|
+
return toc;
|
|
41
|
+
}
|
|
42
|
+
function transformToMarkdown(html, url, options) {
|
|
43
|
+
// Only invoke JSDOM when extractMainContent is true (lazy loading optimization)
|
|
44
|
+
const { article, metadata: extractedMeta } = extractContent(html, url, {
|
|
45
|
+
extractArticle: options.extractMainContent,
|
|
46
|
+
});
|
|
47
|
+
const useArticle = shouldUseArticle(options.extractMainContent, article);
|
|
48
|
+
const metadata = buildMetadata(url, article, extractedMeta, useArticle, options.includeMetadata);
|
|
49
|
+
const sourceHtml = useArticle ? article.content : html;
|
|
50
|
+
const title = useArticle ? article.title : extractedMeta.title;
|
|
51
|
+
let markdown = htmlToMarkdown(sourceHtml, metadata);
|
|
52
|
+
const toc = options.generateToc ? extractToc(markdown) : undefined;
|
|
53
|
+
let truncated = false;
|
|
54
|
+
if (options.maxContentLength && markdown.length > options.maxContentLength) {
|
|
55
|
+
markdown =
|
|
56
|
+
markdown.substring(0, options.maxContentLength) + '\n\n...[truncated]';
|
|
57
|
+
truncated = true;
|
|
58
|
+
}
|
|
59
|
+
return { markdown, title, toc, truncated };
|
|
46
60
|
}
|
|
47
61
|
export async function fetchMarkdownToolHandler(input) {
|
|
62
|
+
if (!input.url) {
|
|
63
|
+
return createToolErrorResponse('URL is required', '', 'VALIDATION_ERROR');
|
|
64
|
+
}
|
|
48
65
|
try {
|
|
49
|
-
|
|
50
|
-
if (!input.url) {
|
|
51
|
-
return createToolErrorResponse('URL is required', '', 'VALIDATION_ERROR');
|
|
52
|
-
}
|
|
53
|
-
const url = validateAndNormalizeUrl(input.url);
|
|
54
|
-
const cacheKey = cache.createCacheKey('markdown', url);
|
|
55
|
-
// Check cache first
|
|
56
|
-
if (cacheKey) {
|
|
57
|
-
const cached = cache.get(cacheKey);
|
|
58
|
-
if (cached) {
|
|
59
|
-
const structuredContent = {
|
|
60
|
-
url,
|
|
61
|
-
cached: true,
|
|
62
|
-
fetchedAt: cached.fetchedAt,
|
|
63
|
-
markdown: cached.content,
|
|
64
|
-
};
|
|
65
|
-
return {
|
|
66
|
-
content: [
|
|
67
|
-
{
|
|
68
|
-
type: 'text',
|
|
69
|
-
text: JSON.stringify(structuredContent),
|
|
70
|
-
},
|
|
71
|
-
],
|
|
72
|
-
structuredContent,
|
|
73
|
-
};
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
const html = await fetchUrlWithRetry(url);
|
|
77
|
-
// Validate HTML content was received
|
|
78
|
-
if (!html) {
|
|
79
|
-
return createToolErrorResponse('No content received from URL', url, 'EMPTY_CONTENT');
|
|
80
|
-
}
|
|
81
|
-
const { markdown, title } = extractAndConvertToMarkdown(html, url, {
|
|
66
|
+
const options = {
|
|
82
67
|
extractMainContent: input.extractMainContent ?? true,
|
|
83
68
|
includeMetadata: input.includeMetadata ?? true,
|
|
69
|
+
generateToc: input.generateToc ?? false,
|
|
70
|
+
maxContentLength: input.maxContentLength,
|
|
71
|
+
};
|
|
72
|
+
logDebug('Fetching markdown', { url: input.url, ...options });
|
|
73
|
+
const result = await executeFetchPipeline({
|
|
74
|
+
url: input.url,
|
|
75
|
+
cacheNamespace: 'markdown',
|
|
76
|
+
customHeaders: input.customHeaders,
|
|
77
|
+
retries: input.retries,
|
|
78
|
+
transform: (html, url) => transformToMarkdown(html, url, options),
|
|
79
|
+
serialize: (data) => data.markdown,
|
|
80
|
+
deserialize: (cached) => ({
|
|
81
|
+
markdown: cached,
|
|
82
|
+
title: undefined,
|
|
83
|
+
toc: undefined,
|
|
84
|
+
truncated: false,
|
|
85
|
+
}),
|
|
84
86
|
});
|
|
85
|
-
// Cache the result
|
|
86
|
-
if (cacheKey) {
|
|
87
|
-
cache.set(cacheKey, markdown);
|
|
88
|
-
}
|
|
89
87
|
const structuredContent = {
|
|
90
|
-
url,
|
|
91
|
-
title,
|
|
92
|
-
fetchedAt:
|
|
93
|
-
markdown,
|
|
94
|
-
|
|
88
|
+
url: result.url,
|
|
89
|
+
title: result.data.title,
|
|
90
|
+
fetchedAt: result.fetchedAt,
|
|
91
|
+
markdown: result.data.markdown,
|
|
92
|
+
...(result.data.toc && { toc: result.data.toc }),
|
|
93
|
+
cached: result.fromCache,
|
|
94
|
+
...(result.data.truncated && { truncated: result.data.truncated }),
|
|
95
95
|
};
|
|
96
96
|
return {
|
|
97
97
|
content: [
|
|
98
98
|
{
|
|
99
99
|
type: 'text',
|
|
100
|
-
text: JSON.stringify(structuredContent, null, 2),
|
|
100
|
+
text: JSON.stringify(structuredContent, result.fromCache ? undefined : null, result.fromCache ? undefined : 2),
|
|
101
101
|
},
|
|
102
102
|
],
|
|
103
103
|
structuredContent,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-markdown.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-markdown.tool.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"fetch-markdown.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-markdown.tool.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAC7D,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,0BAA0B,CAAC;AAE9D,OAAO,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AACpE,OAAO,EACL,uBAAuB,EACvB,eAAe,GAChB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACrE,OAAO,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAC;AAElE,OAAO,EAAE,cAAc,EAAE,MAAM,4CAA4C,CAAC;AAE5E,MAAM,CAAC,MAAM,wBAAwB,GAAG,gBAAgB,CAAC;AACzD,MAAM,CAAC,MAAM,+BAA+B,GAC1C,oIAAoI,CAAC;AAEvI;;;GAGG;AACH,SAAS,OAAO,CAAC,IAAY;IAC3B,sDAAsD;IACtD,MAAM,SAAS,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC;IAE3C,OAAO,SAAS;SACb,WAAW,EAAE;SACb,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC;SACxB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE,CAAC;AACZ,CAAC;AAED;;;GAGG;AACH,SAAS,UAAU,CAAC,QAAgB;IAClC,MAAM,YAAY,GAAG,qBAAqB,CAAC;IAC3C,MAAM,GAAG,GAAe,EAAE,CAAC;IAC3B,IAAI,KAAK,CAAC;IAEV,OAAO,CAAC,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACtD,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;YAAE,SAAS;QACrC,MAAM,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAChC,+DAA+D;QAC/D,MAAM,IAAI,GAAG,kBAAkB,CAAC,OAAO,CAAC,CAAC;QACzC,GAAG,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IACrE,CAAC;IAED,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,mBAAmB,CAC1B,IAAY,EACZ,GAAW,EACX,OAAyB;IAEzB,gFAAgF;IAChF,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,GAAG,cAAc,CAAC,IAAI,EAAE,GAAG,EAAE;QACrE,cAAc,EAAE,OAAO,CAAC,kBAAkB;KAC3C,CAAC,CAAC;IACH,MAAM,UAAU,GAAG,gBAAgB,CAAC,OAAO,CAAC,kBAAkB,EAAE,OAAO,CAAC,CAAC;IACzE,MAAM,QAAQ,GAAG,aAAa,CAC5B,GAAG,EACH,OAAO,EACP,aAAa,EACb,UAAU,EACV,OAAO,CAAC,eAAe,CACxB,CAAC;IACF,MAAM,UAAU,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;IACvD,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC;IAE/D,IAAI,QAAQ,GAAG,cAAc,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;IACpD,MAAM,GAAG,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAEnE,IAAI,SAAS,GAAG,KAAK,CAAC;IACtB,IAAI,OAAO,CAAC,gBAAgB,IAAI,QAAQ,CAAC,MAAM,GAAG,OAAO,CAAC,gBAAgB,EAAE,CAAC;QAC3E,QAAQ;YACN,QAAQ,CAAC,SAAS,CAAC,CAAC,EAAE,OAAO,CAAC,gBAAgB,CAAC,GAAG,oBAAoB,CAAC;QACzE,SAAS,GAAG,IAAI,CAAC;IACnB,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,GAAG,EAAE,SAAS,EAAE,CAAC;AAC7C,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,wBAAwB,CAAC,KAAyB;IACtE,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC;QACf,OAAO,uBAAuB,CAAC,iBAAiB,EAAE,EAAE,EAAE,kBAAkB,CAAC,CAAC;IAC5E,CAAC;IAED,IAAI,CAAC;QACH,MAAM,OAAO,GAAqB;YAChC,kBAAkB,EAAE,KAAK,CAAC,kBAAkB,IAAI,IAAI;YACpD,eAAe,EAAE,KAAK,CAAC,eAAe,IAAI,IAAI;YAC9C,WAAW,EAAE,KAAK,CAAC,WAAW,IAAI,KAAK;YACvC,gBAAgB,EAAE,KAAK,CAAC,gBAAgB;SACzC,CAAC;QAEF,QAAQ,CAAC,mBAAmB,EAAE,EAAE,GAAG,EAAE,KAAK,CAAC,GAAG,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;QAE9D,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAA0B;YACjE,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,cAAc,EAAE,UAAU;YAC1B,aAAa,EAAE,KAAK,CAAC,aAAa;YAClC,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,SAAS,EAAE,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE,CAAC,mBAAmB,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC;YACjE,SAAS,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ;YAClC,WAAW,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;gBACxB,QAAQ,EAAE,MAAM;gBAChB,KAAK,EAAE,SAAS;gBAChB,GAAG,EAAE,SAAS;gBACd,SAAS,EAAE,KAAK;aACjB,CAAC;SACH,CAAC,CAAC;QAEH,MAAM,iBAAiB,GAAG;YACxB,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK;YACxB,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,QAAQ,EAAE,MAAM,CAAC,IAAI,CAAC,QAAQ;YAC9B,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,EAAE,GAAG,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC;YAChD,MAAM,EAAE,MAAM,CAAC,SAAS;YACxB,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,IAAI,EAAE,SAAS,EAAE,MAAM,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;SACnE,CAAC;QAEF,OAAO;YACL,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAClB,iBAAiB,EACjB,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,EACnC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CACjC;iBACF;aACF;YACD,iBAAiB;SAClB,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,2BAA2B,EAC3B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACF,OAAO,eAAe,CAAC,KAAK,EAAE,KAAK,CAAC,GAAG,EAAE,0BAA0B,CAAC,CAAC;IACvE,CAAC;AACH,CAAC"}
|
|
@@ -1,18 +1,20 @@
|
|
|
1
|
-
import type { FetchUrlInput } from '../../types
|
|
1
|
+
import type { FetchUrlInput } from '../../config/types.js';
|
|
2
2
|
export declare const FETCH_URL_TOOL_NAME = "fetch-url";
|
|
3
|
-
export declare const FETCH_URL_TOOL_DESCRIPTION = "Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks";
|
|
4
|
-
export declare function fetchUrlToolHandler(input: FetchUrlInput): Promise<import("../../
|
|
3
|
+
export declare const FETCH_URL_TOOL_DESCRIPTION = "Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks. Supports custom headers, retries, and content length limits.";
|
|
4
|
+
export declare function fetchUrlToolHandler(input: FetchUrlInput): Promise<import("../../config/types.js").ToolErrorResponse | {
|
|
5
5
|
content: {
|
|
6
6
|
type: "text";
|
|
7
7
|
text: string;
|
|
8
8
|
}[];
|
|
9
9
|
structuredContent: {
|
|
10
|
+
truncated?: true;
|
|
10
11
|
url: string;
|
|
11
|
-
|
|
12
|
+
title: string | undefined;
|
|
13
|
+
contentBlocks: number;
|
|
12
14
|
fetchedAt: string;
|
|
13
|
-
content: string;
|
|
14
15
|
format: "jsonl";
|
|
15
|
-
|
|
16
|
+
content: string;
|
|
17
|
+
cached: boolean;
|
|
16
18
|
};
|
|
17
19
|
}>;
|
|
18
20
|
//# sourceMappingURL=fetch-url.tool.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-url.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-url.tool.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"fetch-url.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-url.tool.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,aAAa,EAEd,MAAM,uBAAuB,CAAC;AAmB/B,eAAO,MAAM,mBAAmB,cAAc,CAAC;AAC/C,eAAO,MAAM,0BAA0B,6JACqH,CAAC;AA8B7J,wBAAsB,mBAAmB,CAAC,KAAK,EAAE,aAAa;;;;;;;;;;;;;;;GAkE7D"}
|
|
@@ -1,111 +1,69 @@
|
|
|
1
|
-
import { validateAndNormalizeUrl } from '../../utils/url-validator.js';
|
|
2
|
-
import { fetchUrlWithRetry } from '../../services/fetcher.js';
|
|
3
1
|
import { extractContent } from '../../services/extractor.js';
|
|
2
|
+
import { logDebug, logError } from '../../services/logger.js';
|
|
4
3
|
import { parseHtml } from '../../services/parser.js';
|
|
5
|
-
import { toJsonl } from '../../transformers/jsonl.transformer.js';
|
|
6
|
-
import * as cache from '../../services/cache.js';
|
|
7
|
-
import { config } from '../../config/index.js';
|
|
8
|
-
import { logError } from '../../services/logger.js';
|
|
9
4
|
import { createToolErrorResponse, handleToolError, } from '../../utils/tool-error-handler.js';
|
|
5
|
+
import { buildMetadata, shouldUseArticle, truncateContent, } from '../utils/common.js';
|
|
6
|
+
import { executeFetchPipeline } from '../utils/fetch-pipeline.js';
|
|
7
|
+
import { toJsonl } from '../../transformers/jsonl.transformer.js';
|
|
10
8
|
export const FETCH_URL_TOOL_NAME = 'fetch-url';
|
|
11
|
-
export const FETCH_URL_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks';
|
|
12
|
-
function
|
|
13
|
-
//
|
|
14
|
-
const { article, metadata: extractedMeta } = extractContent(html, url
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
: undefined;
|
|
28
|
-
return { contentBlocks, metadata, title: article.title };
|
|
29
|
-
}
|
|
30
|
-
// Fallback: use parsed HTML directly
|
|
31
|
-
const contentBlocks = parseHtml(html);
|
|
32
|
-
const metadata = options.includeMetadata && config.extraction.includeMetadata
|
|
33
|
-
? {
|
|
34
|
-
type: 'metadata',
|
|
35
|
-
title: extractedMeta.title,
|
|
36
|
-
description: extractedMeta.description,
|
|
37
|
-
author: extractedMeta.author,
|
|
38
|
-
url,
|
|
39
|
-
fetchedAt: new Date().toISOString(),
|
|
40
|
-
}
|
|
41
|
-
: undefined;
|
|
42
|
-
return { contentBlocks, metadata, title: extractedMeta.title };
|
|
9
|
+
export const FETCH_URL_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks. Supports custom headers, retries, and content length limits.';
|
|
10
|
+
function transformToJsonl(html, url, options) {
|
|
11
|
+
// Only invoke JSDOM when extractMainContent is true (lazy loading optimization)
|
|
12
|
+
const { article, metadata: extractedMeta } = extractContent(html, url, {
|
|
13
|
+
extractArticle: options.extractMainContent,
|
|
14
|
+
});
|
|
15
|
+
const useArticle = shouldUseArticle(options.extractMainContent, article);
|
|
16
|
+
const sourceHtml = useArticle ? article.content : html;
|
|
17
|
+
const contentBlocks = parseHtml(sourceHtml);
|
|
18
|
+
const metadata = buildMetadata(url, article, extractedMeta, useArticle, options.includeMetadata);
|
|
19
|
+
const title = useArticle ? article.title : extractedMeta.title;
|
|
20
|
+
return {
|
|
21
|
+
content: toJsonl(contentBlocks, metadata),
|
|
22
|
+
contentBlocks: contentBlocks.length,
|
|
23
|
+
title,
|
|
24
|
+
};
|
|
43
25
|
}
|
|
44
26
|
export async function fetchUrlToolHandler(input) {
|
|
27
|
+
if (!input.url) {
|
|
28
|
+
return createToolErrorResponse('URL is required', '', 'VALIDATION_ERROR');
|
|
29
|
+
}
|
|
45
30
|
try {
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
content: [
|
|
66
|
-
{
|
|
67
|
-
type: 'text',
|
|
68
|
-
text: JSON.stringify(structuredContent),
|
|
69
|
-
},
|
|
70
|
-
],
|
|
71
|
-
structuredContent,
|
|
72
|
-
};
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
const html = await fetchUrlWithRetry(url, input.customHeaders);
|
|
76
|
-
// Validate HTML content was received
|
|
77
|
-
if (!html) {
|
|
78
|
-
return createToolErrorResponse('No content received from URL', url, 'EMPTY_CONTENT');
|
|
79
|
-
}
|
|
80
|
-
const { contentBlocks, metadata, title } = extractContentFromHtml(html, url, {
|
|
81
|
-
extractMainContent: input.extractMainContent ?? true,
|
|
82
|
-
includeMetadata: input.includeMetadata ?? true,
|
|
31
|
+
const extractMainContent = input.extractMainContent ?? true;
|
|
32
|
+
const includeMetadata = input.includeMetadata ?? true;
|
|
33
|
+
logDebug('Fetching URL', {
|
|
34
|
+
url: input.url,
|
|
35
|
+
extractMainContent,
|
|
36
|
+
includeMetadata,
|
|
37
|
+
});
|
|
38
|
+
const result = await executeFetchPipeline({
|
|
39
|
+
url: input.url,
|
|
40
|
+
cacheNamespace: 'url',
|
|
41
|
+
customHeaders: input.customHeaders,
|
|
42
|
+
retries: input.retries,
|
|
43
|
+
transform: (html, url) => transformToJsonl(html, url, { extractMainContent, includeMetadata }),
|
|
44
|
+
serialize: (data) => data.content,
|
|
45
|
+
deserialize: (cached) => ({
|
|
46
|
+
content: cached,
|
|
47
|
+
contentBlocks: 0,
|
|
48
|
+
title: undefined,
|
|
49
|
+
}),
|
|
83
50
|
});
|
|
84
|
-
|
|
85
|
-
if (input.maxContentLength &&
|
|
86
|
-
input.maxContentLength > 0 &&
|
|
87
|
-
jsonlContent.length > input.maxContentLength) {
|
|
88
|
-
jsonlContent =
|
|
89
|
-
jsonlContent.substring(0, input.maxContentLength) + '\n...[truncated]';
|
|
90
|
-
}
|
|
91
|
-
// Cache the result
|
|
92
|
-
if (cacheKey) {
|
|
93
|
-
cache.set(cacheKey, jsonlContent);
|
|
94
|
-
}
|
|
51
|
+
const { content, truncated } = truncateContent(result.data.content, input.maxContentLength);
|
|
95
52
|
const structuredContent = {
|
|
96
|
-
url,
|
|
97
|
-
title,
|
|
98
|
-
contentBlocks: contentBlocks
|
|
99
|
-
fetchedAt:
|
|
53
|
+
url: result.url,
|
|
54
|
+
title: result.data.title,
|
|
55
|
+
contentBlocks: result.data.contentBlocks,
|
|
56
|
+
fetchedAt: result.fetchedAt,
|
|
100
57
|
format: 'jsonl',
|
|
101
|
-
content
|
|
102
|
-
cached:
|
|
58
|
+
content,
|
|
59
|
+
cached: result.fromCache,
|
|
60
|
+
...(truncated && { truncated }),
|
|
103
61
|
};
|
|
104
62
|
return {
|
|
105
63
|
content: [
|
|
106
64
|
{
|
|
107
65
|
type: 'text',
|
|
108
|
-
text: JSON.stringify(structuredContent, null, 2),
|
|
66
|
+
text: JSON.stringify(structuredContent, result.fromCache ? undefined : null, result.fromCache ? undefined : 2),
|
|
109
67
|
},
|
|
110
68
|
],
|
|
111
69
|
structuredContent,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-url.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-url.tool.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"fetch-url.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-url.tool.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAC7D,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,0BAA0B,CAAC;AAC9D,OAAO,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AAErD,OAAO,EACL,uBAAuB,EACvB,eAAe,GAChB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EACL,aAAa,EACb,gBAAgB,EAChB,eAAe,GAChB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAC;AAElE,OAAO,EAAE,OAAO,EAAE,MAAM,yCAAyC,CAAC;AAElE,MAAM,CAAC,MAAM,mBAAmB,GAAG,WAAW,CAAC;AAC/C,MAAM,CAAC,MAAM,0BAA0B,GACrC,0JAA0J,CAAC;AAE7J,SAAS,gBAAgB,CACvB,IAAY,EACZ,GAAW,EACX,OAAkE;IAElE,gFAAgF;IAChF,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,GAAG,cAAc,CAAC,IAAI,EAAE,GAAG,EAAE;QACrE,cAAc,EAAE,OAAO,CAAC,kBAAkB;KAC3C,CAAC,CAAC;IACH,MAAM,UAAU,GAAG,gBAAgB,CAAC,OAAO,CAAC,kBAAkB,EAAE,OAAO,CAAC,CAAC;IACzE,MAAM,UAAU,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;IACvD,MAAM,aAAa,GAAG,SAAS,CAAC,UAAU,CAAC,CAAC;IAC5C,MAAM,QAAQ,GAAG,aAAa,CAC5B,GAAG,EACH,OAAO,EACP,aAAa,EACb,UAAU,EACV,OAAO,CAAC,eAAe,CACxB,CAAC;IACF,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC;IAE/D,OAAO;QACL,OAAO,EAAE,OAAO,CAAC,aAAa,EAAE,QAAQ,CAAC;QACzC,aAAa,EAAE,aAAa,CAAC,MAAM;QACnC,KAAK;KACN,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,KAAoB;IAC5D,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC;QACf,OAAO,uBAAuB,CAAC,iBAAiB,EAAE,EAAE,EAAE,kBAAkB,CAAC,CAAC;IAC5E,CAAC;IAED,IAAI,CAAC;QACH,MAAM,kBAAkB,GAAG,KAAK,CAAC,kBAAkB,IAAI,IAAI,CAAC;QAC5D,MAAM,eAAe,GAAG,KAAK,CAAC,eAAe,IAAI,IAAI,CAAC;QAEtD,QAAQ,CAAC,cAAc,EAAE;YACvB,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,kBAAkB;YAClB,eAAe;SAChB,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAuB;YAC9D,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,cAAc,EAAE,KAAK;YACrB,aAAa,EAAE,KAAK,CAAC,aAAa;YAClC,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,SAAS,EAAE,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE,CACvB,gBAAgB,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE,kBAAkB,EAAE,eAAe,EAAE,CAAC;YACtE,SAAS,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO;YACjC,WAAW,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;gBACxB,OAAO,EAAE,MAAM;gBACf,aAAa,EAAE,CAAC;gBAChB,KAAK,EAAE,SAAS;aACjB,CAAC;SACH,CAAC,CAAC;QAEH,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,eAAe,CAC5C,MAAM,CAAC,IAAI,CAAC,OAAO,EACnB,KAAK,CAAC,gBAAgB,CACvB,CAAC;QAEF,MAAM,iBAAiB,GAAG;YACxB,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK;YACxB,aAAa,EAAE,MAAM,CAAC,IAAI,CAAC,aAAa;YACxC,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,MAAM,EAAE,OAAgB;YACxB,OAAO;YACP,MAAM,EAAE,MAAM,CAAC,SAAS;YACxB,GAAG,CAAC,SAAS,IAAI,EAAE,SAAS,EAAE,CAAC;SAChC,CAAC;QAEF,OAAO;YACL,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAClB,iBAAiB,EACjB,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,EACnC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CACjC;iBACF;aACF;YACD,iBAAiB;SAClB,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,sBAAsB,EACtB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACF,OAAO,eAAe,CAAC,KAAK,EAAE,KAAK,CAAC,GAAG,EAAE,qBAAqB,CAAC,CAAC;IAClE,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { FetchUrlsInput } from '../../config/types.js';
|
|
2
|
+
export declare const FETCH_URLS_TOOL_NAME = "fetch-urls";
|
|
3
|
+
export declare const FETCH_URLS_TOOL_DESCRIPTION = "Fetches multiple URLs in parallel and converts them to AI-readable format (JSONL or Markdown). Supports concurrency control and continues on individual failures.";
|
|
4
|
+
export declare function fetchUrlsToolHandler(input: FetchUrlsInput): Promise<import("../../config/types.js").ToolErrorResponse | import("../../config/types.js").ToolResponse<import("../../config/types.js").BatchResponseContent>>;
|
|
5
|
+
//# sourceMappingURL=fetch-urls.tool.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch-urls.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-urls.tool.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAEV,cAAc,EAEf,MAAM,uBAAuB,CAAC;AAqB/B,eAAO,MAAM,oBAAoB,eAAe,CAAC;AACjD,eAAO,MAAM,2BAA2B,sKAC6H,CAAC;AAsGtK,wBAAsB,oBAAoB,CAAC,KAAK,EAAE,cAAc,mKAwG/D"}
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import * as cache from '../../services/cache.js';
|
|
2
|
+
import { extractContent } from '../../services/extractor.js';
|
|
3
|
+
import { fetchUrlWithRetry } from '../../services/fetcher.js';
|
|
4
|
+
import { logDebug, logError, logWarn } from '../../services/logger.js';
|
|
5
|
+
import { parseHtml } from '../../services/parser.js';
|
|
6
|
+
import { runWithConcurrency } from '../../utils/concurrency.js';
|
|
7
|
+
import { createToolErrorResponse } from '../../utils/tool-error-handler.js';
|
|
8
|
+
import { validateAndNormalizeUrl } from '../../utils/url-validator.js';
|
|
9
|
+
import { buildMetadata, shouldUseArticle, truncateContent, } from '../utils/common.js';
|
|
10
|
+
import { createBatchResponse } from '../utils/response-builder.js';
|
|
11
|
+
import { toJsonl } from '../../transformers/jsonl.transformer.js';
|
|
12
|
+
import { htmlToMarkdown } from '../../transformers/markdown.transformer.js';
|
|
13
|
+
export const FETCH_URLS_TOOL_NAME = 'fetch-urls';
|
|
14
|
+
export const FETCH_URLS_TOOL_DESCRIPTION = 'Fetches multiple URLs in parallel and converts them to AI-readable format (JSONL or Markdown). Supports concurrency control and continues on individual failures.';
|
|
15
|
+
const MAX_URLS = 10;
|
|
16
|
+
const DEFAULT_CONCURRENCY = 3;
|
|
17
|
+
async function processSingleUrl(url, options) {
|
|
18
|
+
try {
|
|
19
|
+
const normalizedUrl = validateAndNormalizeUrl(url);
|
|
20
|
+
const cacheNamespace = options.format === 'markdown' ? 'markdown' : 'url';
|
|
21
|
+
const cacheKey = cache.createCacheKey(cacheNamespace, normalizedUrl);
|
|
22
|
+
if (cacheKey) {
|
|
23
|
+
const cached = cache.get(cacheKey);
|
|
24
|
+
if (cached) {
|
|
25
|
+
logDebug('Batch cache hit', { url: normalizedUrl });
|
|
26
|
+
return {
|
|
27
|
+
url: normalizedUrl,
|
|
28
|
+
success: true,
|
|
29
|
+
content: cached.content,
|
|
30
|
+
cached: true,
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
const fetchResult = await fetchUrlWithRetry(normalizedUrl);
|
|
35
|
+
// Only invoke JSDOM when extractMainContent is true (lazy loading optimization)
|
|
36
|
+
const { article, metadata: extractedMeta } = extractContent(fetchResult.html, normalizedUrl, {
|
|
37
|
+
extractArticle: options.extractMainContent,
|
|
38
|
+
});
|
|
39
|
+
const useArticle = shouldUseArticle(options.extractMainContent, article);
|
|
40
|
+
const metadata = buildMetadata(normalizedUrl, article, extractedMeta, useArticle, options.includeMetadata);
|
|
41
|
+
const sourceHtml = useArticle ? article.content : fetchResult.html;
|
|
42
|
+
const title = useArticle ? article.title : extractedMeta.title;
|
|
43
|
+
let content;
|
|
44
|
+
let contentBlocks;
|
|
45
|
+
if (options.format === 'markdown') {
|
|
46
|
+
content = htmlToMarkdown(sourceHtml, metadata);
|
|
47
|
+
}
|
|
48
|
+
else {
|
|
49
|
+
const blocks = parseHtml(sourceHtml);
|
|
50
|
+
contentBlocks = blocks.length;
|
|
51
|
+
content = toJsonl(blocks, metadata);
|
|
52
|
+
}
|
|
53
|
+
const { content: truncatedContent } = truncateContent(content, options.maxContentLength);
|
|
54
|
+
content = truncatedContent;
|
|
55
|
+
if (cacheKey)
|
|
56
|
+
cache.set(cacheKey, content);
|
|
57
|
+
return {
|
|
58
|
+
url: normalizedUrl,
|
|
59
|
+
success: true,
|
|
60
|
+
title,
|
|
61
|
+
content,
|
|
62
|
+
contentBlocks,
|
|
63
|
+
cached: false,
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
catch (error) {
|
|
67
|
+
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
|
68
|
+
const errorCode = error instanceof Error &&
|
|
69
|
+
'code' in error &&
|
|
70
|
+
typeof error.code === 'string'
|
|
71
|
+
? error.code
|
|
72
|
+
: 'FETCH_ERROR';
|
|
73
|
+
logWarn('Batch URL processing failed', { url, error: errorMessage });
|
|
74
|
+
return {
|
|
75
|
+
url,
|
|
76
|
+
success: false,
|
|
77
|
+
cached: false,
|
|
78
|
+
error: errorMessage,
|
|
79
|
+
errorCode,
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
export async function fetchUrlsToolHandler(input) {
|
|
84
|
+
try {
|
|
85
|
+
// Validate input - urls array is guaranteed by Zod schema but check for empty
|
|
86
|
+
if (input.urls.length === 0) {
|
|
87
|
+
return createToolErrorResponse('At least one URL is required', '', 'VALIDATION_ERROR');
|
|
88
|
+
}
|
|
89
|
+
// Enforce max URLs limit
|
|
90
|
+
if (input.urls.length > MAX_URLS) {
|
|
91
|
+
return createToolErrorResponse(`Maximum ${MAX_URLS} URLs allowed per batch`, '', 'VALIDATION_ERROR');
|
|
92
|
+
}
|
|
93
|
+
// Filter out empty URLs
|
|
94
|
+
const validUrls = input.urls.filter((url) => typeof url === 'string' && url.trim().length > 0);
|
|
95
|
+
if (validUrls.length === 0) {
|
|
96
|
+
return createToolErrorResponse('No valid URLs provided', '', 'VALIDATION_ERROR');
|
|
97
|
+
}
|
|
98
|
+
const concurrency = Math.min(Math.max(1, input.concurrency ?? DEFAULT_CONCURRENCY), 5);
|
|
99
|
+
const continueOnError = input.continueOnError ?? true;
|
|
100
|
+
const format = input.format ?? 'jsonl';
|
|
101
|
+
logDebug('Starting batch URL fetch', {
|
|
102
|
+
urlCount: validUrls.length,
|
|
103
|
+
concurrency,
|
|
104
|
+
format,
|
|
105
|
+
});
|
|
106
|
+
// Create tasks for each URL
|
|
107
|
+
const tasks = validUrls.map((url) => async () => processSingleUrl(url, {
|
|
108
|
+
extractMainContent: input.extractMainContent ?? true,
|
|
109
|
+
includeMetadata: input.includeMetadata ?? true,
|
|
110
|
+
maxContentLength: input.maxContentLength,
|
|
111
|
+
format,
|
|
112
|
+
}));
|
|
113
|
+
// Execute with concurrency control
|
|
114
|
+
const settledResults = await runWithConcurrency(concurrency, tasks);
|
|
115
|
+
// Process results
|
|
116
|
+
const results = settledResults.map((result, index) => {
|
|
117
|
+
if (result.status === 'fulfilled') {
|
|
118
|
+
return result.value;
|
|
119
|
+
}
|
|
120
|
+
else {
|
|
121
|
+
// Promise rejection (shouldn't happen as processSingleUrl catches errors)
|
|
122
|
+
const reason = result.reason;
|
|
123
|
+
const errorMessage = reason instanceof Error ? reason.message : String(reason);
|
|
124
|
+
return {
|
|
125
|
+
url: validUrls[index] ?? 'unknown',
|
|
126
|
+
success: false,
|
|
127
|
+
cached: false,
|
|
128
|
+
error: errorMessage,
|
|
129
|
+
errorCode: 'PROMISE_REJECTED',
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
});
|
|
133
|
+
// Check if we should fail fast on errors
|
|
134
|
+
if (!continueOnError) {
|
|
135
|
+
const firstError = results.find((r) => !r.success);
|
|
136
|
+
if (firstError) {
|
|
137
|
+
return createToolErrorResponse(`Batch failed: ${firstError.error}`, firstError.url, firstError.errorCode ?? 'BATCH_ERROR');
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
return createBatchResponse(results);
|
|
141
|
+
}
|
|
142
|
+
catch (error) {
|
|
143
|
+
logError('fetch-urls tool error', error instanceof Error ? error : undefined);
|
|
144
|
+
return createToolErrorResponse(error instanceof Error ? error.message : 'Failed to fetch URLs', '', 'BATCH_ERROR');
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
//# sourceMappingURL=fetch-urls.tool.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch-urls.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-urls.tool.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,KAAK,MAAM,yBAAyB,CAAC;AACjD,OAAO,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAC7D,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,0BAA0B,CAAC;AACvE,OAAO,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AAErD,OAAO,EAAE,kBAAkB,EAAE,MAAM,4BAA4B,CAAC;AAChE,OAAO,EAAE,uBAAuB,EAAE,MAAM,mCAAmC,CAAC;AAC5E,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAC;AACvE,OAAO,EACL,aAAa,EACb,gBAAgB,EAChB,eAAe,GAChB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EAAE,mBAAmB,EAAE,MAAM,8BAA8B,CAAC;AAEnE,OAAO,EAAE,OAAO,EAAE,MAAM,yCAAyC,CAAC;AAClE,OAAO,EAAE,cAAc,EAAE,MAAM,4CAA4C,CAAC;AAE5E,MAAM,CAAC,MAAM,oBAAoB,GAAG,YAAY,CAAC;AACjD,MAAM,CAAC,MAAM,2BAA2B,GACtC,mKAAmK,CAAC;AAEtK,MAAM,QAAQ,GAAG,EAAE,CAAC;AACpB,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAS9B,KAAK,UAAU,gBAAgB,CAC7B,GAAW,EACX,OAAuB;IAEvB,IAAI,CAAC;QACH,MAAM,aAAa,GAAG,uBAAuB,CAAC,GAAG,CAAC,CAAC;QACnD,MAAM,cAAc,GAAG,OAAO,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,KAAK,CAAC;QAC1E,MAAM,QAAQ,GAAG,KAAK,CAAC,cAAc,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC;QAErE,IAAI,QAAQ,EAAE,CAAC;YACb,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACnC,IAAI,MAAM,EAAE,CAAC;gBACX,QAAQ,CAAC,iBAAiB,EAAE,EAAE,GAAG,EAAE,aAAa,EAAE,CAAC,CAAC;gBACpD,OAAO;oBACL,GAAG,EAAE,aAAa;oBAClB,OAAO,EAAE,IAAI;oBACb,OAAO,EAAE,MAAM,CAAC,OAAO;oBACvB,MAAM,EAAE,IAAI;iBACb,CAAC;YACJ,CAAC;QACH,CAAC;QAED,MAAM,WAAW,GAAG,MAAM,iBAAiB,CAAC,aAAa,CAAC,CAAC;QAE3D,gFAAgF;QAChF,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,GAAG,cAAc,CACzD,WAAW,CAAC,IAAI,EAChB,aAAa,EACb;YACE,cAAc,EAAE,OAAO,CAAC,kBAAkB;SAC3C,CACF,CAAC;QACF,MAAM,UAAU,GAAG,gBAAgB,CAAC,OAAO,CAAC,kBAAkB,EAAE,OAAO,CAAC,CAAC;QACzE,MAAM,QAAQ,GAAG,aAAa,CAC5B,aAAa,EACb,OAAO,EACP,aAAa,EACb,UAAU,EACV,OAAO,CAAC,eAAe,CACxB,CAAC;QACF,MAAM,UAAU,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC;QACnE,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC;QAE/D,IAAI,OAAe,CAAC;QACpB,IAAI,aAAiC,CAAC;QAEtC,IAAI,OAAO,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;YAClC,OAAO,GAAG,cAAc,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;QACjD,CAAC;aAAM,CAAC;YACN,MAAM,MAAM,GAAG,SAAS,CAAC,UAAU,CAAC,CAAC;YACrC,aAAa,GAAG,MAAM,CAAC,MAAM,CAAC;YAC9B,OAAO,GAAG,OAAO,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;QACtC,CAAC;QAED,MAAM,EAAE,OAAO,EAAE,gBAAgB,EAAE,GAAG,eAAe,CACnD,OAAO,EACP,OAAO,CAAC,gBAAgB,CACzB,CAAC;QACF,OAAO,GAAG,gBAAgB,CAAC;QAC3B,IAAI,QAAQ;YAAE,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAE3C,OAAO;YACL,GAAG,EAAE,aAAa;YAClB,OAAO,EAAE,IAAI;YACb,KAAK;YACL,OAAO;YACP,aAAa;YACb,MAAM,EAAE,KAAK;SACd,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;QAC3D,MAAM,SAAS,GACb,KAAK,YAAY,KAAK;YACtB,MAAM,IAAI,KAAK;YACf,OAAO,KAAK,CAAC,IAAI,KAAK,QAAQ;YAC5B,CAAC,CAAC,KAAK,CAAC,IAAI;YACZ,CAAC,CAAC,aAAa,CAAC;QAEpB,OAAO,CAAC,6BAA6B,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC,CAAC;QACrE,OAAO;YACL,GAAG;YACH,OAAO,EAAE,KAAK;YACd,MAAM,EAAE,KAAK;YACb,KAAK,EAAE,YAAY;YACnB,SAAS;SACV,CAAC;IACJ,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,oBAAoB,CAAC,KAAqB;IAC9D,IAAI,CAAC;QACH,8EAA8E;QAC9E,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,OAAO,uBAAuB,CAC5B,8BAA8B,EAC9B,EAAE,EACF,kBAAkB,CACnB,CAAC;QACJ,CAAC;QAED,yBAAyB;QACzB,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,QAAQ,EAAE,CAAC;YACjC,OAAO,uBAAuB,CAC5B,WAAW,QAAQ,yBAAyB,EAC5C,EAAE,EACF,kBAAkB,CACnB,CAAC;QACJ,CAAC;QAED,wBAAwB;QACxB,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CACjC,CAAC,GAAG,EAAE,EAAE,CAAC,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAC1D,CAAC;QAEF,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,OAAO,uBAAuB,CAC5B,wBAAwB,EACxB,EAAE,EACF,kBAAkB,CACnB,CAAC;QACJ,CAAC;QAED,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAC1B,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,WAAW,IAAI,mBAAmB,CAAC,EACrD,CAAC,CACF,CAAC;QACF,MAAM,eAAe,GAAG,KAAK,CAAC,eAAe,IAAI,IAAI,CAAC;QACtD,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,IAAI,OAAO,CAAC;QAEvC,QAAQ,CAAC,0BAA0B,EAAE;YACnC,QAAQ,EAAE,SAAS,CAAC,MAAM;YAC1B,WAAW;YACX,MAAM;SACP,CAAC,CAAC;QAEH,4BAA4B;QAC5B,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,CACzB,CAAC,GAAG,EAAE,EAAE,CAAC,KAAK,IAAI,EAAE,CAClB,gBAAgB,CAAC,GAAG,EAAE;YACpB,kBAAkB,EAAE,KAAK,CAAC,kBAAkB,IAAI,IAAI;YACpD,eAAe,EAAE,KAAK,CAAC,eAAe,IAAI,IAAI;YAC9C,gBAAgB,EAAE,KAAK,CAAC,gBAAgB;YACxC,MAAM;SACP,CAAC,CACL,CAAC;QAEF,mCAAmC;QACnC,MAAM,cAAc,GAAG,MAAM,kBAAkB,CAAC,WAAW,EAAE,KAAK,CAAC,CAAC;QAEpE,kBAAkB;QAClB,MAAM,OAAO,GAAqB,cAAc,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE;YACrE,IAAI,MAAM,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;gBAClC,OAAO,MAAM,CAAC,KAAK,CAAC;YACtB,CAAC;iBAAM,CAAC;gBACN,0EAA0E;gBAC1E,MAAM,MAAM,GAAY,MAAM,CAAC,MAAM,CAAC;gBACtC,MAAM,YAAY,GAChB,MAAM,YAAY,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;gBAC5D,OAAO;oBACL,GAAG,EAAE,SAAS,CAAC,KAAK,CAAC,IAAI,SAAS;oBAClC,OAAO,EAAE,KAAc;oBACvB,MAAM,EAAE,KAAc;oBACtB,KAAK,EAAE,YAAY;oBACnB,SAAS,EAAE,kBAAkB;iBAC9B,CAAC;YACJ,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,yCAAyC;QACzC,IAAI,CAAC,eAAe,EAAE,CAAC;YACrB,MAAM,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YACnD,IAAI,UAAU,EAAE,CAAC;gBACf,OAAO,uBAAuB,CAC5B,iBAAiB,UAAU,CAAC,KAAK,EAAE,EACnC,UAAU,CAAC,GAAG,EACd,UAAU,CAAC,SAAS,IAAI,aAAa,CACtC,CAAC;YACJ,CAAC;QACH,CAAC;QAED,OAAO,mBAAmB,CAAC,OAAO,CAAC,CAAC;IACtC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,uBAAuB,EACvB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QAEF,OAAO,uBAAuB,CAC5B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,sBAAsB,EAC/D,EAAE,EACF,aAAa,CACd,CAAC;IACJ,CAAC;AACH,CAAC"}
|
package/dist/tools/index.d.ts
CHANGED
|
@@ -1,7 +1,3 @@
|
|
|
1
1
|
import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
2
|
-
/**
|
|
3
|
-
* Registers all tools with the MCP server using the modern McpServer API
|
|
4
|
-
* Tools are registered with Zod schemas for automatic validation
|
|
5
|
-
*/
|
|
6
2
|
export declare function registerTools(server: McpServer): void;
|
|
7
3
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/tools/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/tools/index.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AAsQzE,wBAAgB,aAAa,CAAC,MAAM,EAAE,SAAS,GAAG,IAAI,CA4CrD"}
|