@j0hanz/superfetch 1.0.2 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +345 -57
- package/dist/config/index.d.ts.map +1 -1
- package/dist/config/index.js +6 -10
- package/dist/config/index.js.map +1 -1
- package/dist/config/types.d.ts +256 -0
- package/dist/config/types.d.ts.map +1 -0
- package/dist/config/types.js +2 -0
- package/dist/config/types.js.map +1 -0
- package/dist/errors/app-error.d.ts +6 -20
- package/dist/errors/app-error.d.ts.map +1 -1
- package/dist/errors/app-error.js +7 -18
- package/dist/errors/app-error.js.map +1 -1
- package/dist/index.js +75 -62
- package/dist/index.js.map +1 -1
- package/dist/middleware/error-handler.d.ts +1 -5
- package/dist/middleware/error-handler.d.ts.map +1 -1
- package/dist/middleware/error-handler.js +4 -12
- package/dist/middleware/error-handler.js.map +1 -1
- package/dist/middleware/rate-limiter.d.ts +2 -20
- package/dist/middleware/rate-limiter.d.ts.map +1 -1
- package/dist/middleware/rate-limiter.js +22 -47
- package/dist/middleware/rate-limiter.js.map +1 -1
- package/dist/prompts/index.d.ts +0 -3
- package/dist/prompts/index.d.ts.map +1 -1
- package/dist/prompts/index.js +2 -10
- package/dist/prompts/index.js.map +1 -1
- package/dist/resources/cached-content.d.ts +5 -0
- package/dist/resources/cached-content.d.ts.map +1 -0
- package/dist/resources/cached-content.js +93 -0
- package/dist/resources/cached-content.js.map +1 -0
- package/dist/resources/index.d.ts +0 -3
- package/dist/resources/index.d.ts.map +1 -1
- package/dist/resources/index.js +40 -5
- package/dist/resources/index.js.map +1 -1
- package/dist/server.d.ts +0 -4
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +11 -6
- package/dist/server.js.map +1 -1
- package/dist/services/cache.d.ts +20 -6
- package/dist/services/cache.d.ts.map +1 -1
- package/dist/services/cache.js +128 -20
- package/dist/services/cache.js.map +1 -1
- package/dist/services/card-extractor.d.ts +10 -0
- package/dist/services/card-extractor.d.ts.map +1 -0
- package/dist/services/card-extractor.js +194 -0
- package/dist/services/card-extractor.js.map +1 -0
- package/dist/services/extractor.d.ts +12 -19
- package/dist/services/extractor.d.ts.map +1 -1
- package/dist/services/extractor.js +60 -46
- package/dist/services/extractor.js.map +1 -1
- package/dist/services/fetcher.d.ts +13 -11
- package/dist/services/fetcher.d.ts.map +1 -1
- package/dist/services/fetcher.js +143 -54
- package/dist/services/fetcher.js.map +1 -1
- package/dist/services/logger.d.ts.map +1 -1
- package/dist/services/logger.js +4 -6
- package/dist/services/logger.js.map +1 -1
- package/dist/services/parser.d.ts +1 -6
- package/dist/services/parser.d.ts.map +1 -1
- package/dist/services/parser.js +57 -27
- package/dist/services/parser.js.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.d.ts +6 -18
- package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.js +104 -79
- package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +6 -10
- package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.js +83 -84
- package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.d.ts +6 -12
- package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.js +51 -93
- package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-urls.tool.d.ts +12 -0
- package/dist/tools/handlers/fetch-urls.tool.d.ts.map +1 -0
- package/dist/tools/handlers/fetch-urls.tool.js +184 -0
- package/dist/tools/handlers/fetch-urls.tool.js.map +1 -0
- package/dist/tools/index.d.ts +0 -4
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +145 -15
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/utils/common.d.ts +8 -0
- package/dist/tools/utils/common.d.ts.map +1 -0
- package/dist/tools/utils/common.js +35 -0
- package/dist/tools/utils/common.js.map +1 -0
- package/dist/tools/utils/fetch-pipeline.d.ts +3 -0
- package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -0
- package/dist/tools/utils/fetch-pipeline.js +78 -0
- package/dist/tools/utils/fetch-pipeline.js.map +1 -0
- package/dist/tools/utils/index.d.ts +4 -0
- package/dist/tools/utils/index.d.ts.map +1 -0
- package/dist/tools/utils/index.js +3 -0
- package/dist/tools/utils/index.js.map +1 -0
- package/dist/tools/utils/response-builder.d.ts +3 -0
- package/dist/tools/utils/response-builder.d.ts.map +1 -0
- package/dist/tools/utils/response-builder.js +24 -0
- package/dist/tools/utils/response-builder.js.map +1 -0
- package/dist/transformers/jsonl.transformer.d.ts +1 -1
- package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
- package/dist/transformers/jsonl.transformer.js +2 -1
- package/dist/transformers/jsonl.transformer.js.map +1 -1
- package/dist/transformers/markdown.transformer.d.ts +1 -1
- package/dist/transformers/markdown.transformer.d.ts.map +1 -1
- package/dist/transformers/markdown.transformer.js +99 -5
- package/dist/transformers/markdown.transformer.js.map +1 -1
- package/dist/types/content.types.d.ts +11 -11
- package/dist/types/content.types.d.ts.map +1 -1
- package/dist/types/index.d.ts +1 -2
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js +1 -2
- package/dist/types/index.js.map +1 -1
- package/dist/types/schemas.d.ts +39 -12
- package/dist/types/schemas.d.ts.map +1 -1
- package/dist/utils/concurrency.d.ts +6 -0
- package/dist/utils/concurrency.d.ts.map +1 -0
- package/dist/utils/concurrency.js +38 -0
- package/dist/utils/concurrency.js.map +1 -0
- package/dist/utils/content-cleaner.d.ts +32 -0
- package/dist/utils/content-cleaner.d.ts.map +1 -0
- package/dist/utils/content-cleaner.js +238 -0
- package/dist/utils/content-cleaner.js.map +1 -0
- package/dist/utils/language-detector.d.ts +5 -0
- package/dist/utils/language-detector.d.ts.map +1 -0
- package/dist/utils/language-detector.js +50 -0
- package/dist/utils/language-detector.js.map +1 -0
- package/dist/utils/sanitizer.d.ts +0 -10
- package/dist/utils/sanitizer.d.ts.map +1 -1
- package/dist/utils/sanitizer.js +4 -12
- package/dist/utils/sanitizer.js.map +1 -1
- package/dist/utils/tool-error-handler.d.ts +1 -15
- package/dist/utils/tool-error-handler.d.ts.map +1 -1
- package/dist/utils/tool-error-handler.js +34 -6
- package/dist/utils/tool-error-handler.js.map +1 -1
- package/dist/utils/url-validator.d.ts +0 -8
- package/dist/utils/url-validator.d.ts.map +1 -1
- package/dist/utils/url-validator.js +17 -31
- package/dist/utils/url-validator.js.map +1 -1
- package/package.json +81 -79
|
@@ -1,104 +1,129 @@
|
|
|
1
|
-
import { validateAndNormalizeUrl, isInternalUrl, } from '../../utils/url-validator.js';
|
|
2
|
-
import { fetchUrlWithRetry } from '../../services/fetcher.js';
|
|
3
|
-
import * as cache from '../../services/cache.js';
|
|
4
1
|
import * as cheerio from 'cheerio';
|
|
5
|
-
import { logError } from '../../services/logger.js';
|
|
2
|
+
import { logDebug, logError } from '../../services/logger.js';
|
|
6
3
|
import { createToolErrorResponse, handleToolError, } from '../../utils/tool-error-handler.js';
|
|
4
|
+
import { isInternalUrl } from '../../utils/url-validator.js';
|
|
5
|
+
import { executeFetchPipeline } from '../utils/fetch-pipeline.js';
|
|
7
6
|
export const FETCH_LINKS_TOOL_NAME = 'fetch-links';
|
|
8
|
-
export const FETCH_LINKS_TOOL_DESCRIPTION = 'Extracts all hyperlinks from a webpage with anchor text and type classification';
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
7
|
+
export const FETCH_LINKS_TOOL_DESCRIPTION = 'Extracts all hyperlinks from a webpage with anchor text and type classification. Supports filtering, image links, and link limits.';
|
|
8
|
+
function tryResolveUrl(href, baseUrl) {
|
|
9
|
+
try {
|
|
10
|
+
return new URL(href, baseUrl).href;
|
|
11
|
+
}
|
|
12
|
+
catch {
|
|
13
|
+
return null;
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
function shouldIncludeLink(type, url, options, seen) {
|
|
17
|
+
if (seen.has(url))
|
|
18
|
+
return false;
|
|
19
|
+
if (options.filterPattern && !options.filterPattern.test(url))
|
|
20
|
+
return false;
|
|
21
|
+
if (type === 'internal' && !options.includeInternal)
|
|
22
|
+
return false;
|
|
23
|
+
if (type === 'external' && !options.includeExternal)
|
|
24
|
+
return false;
|
|
25
|
+
return true;
|
|
26
|
+
}
|
|
27
|
+
function extractLinks(html, baseUrl, options) {
|
|
13
28
|
const $ = cheerio.load(html);
|
|
14
29
|
const links = [];
|
|
15
|
-
const
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
const
|
|
19
|
-
|
|
20
|
-
|
|
30
|
+
const seen = new Set();
|
|
31
|
+
let filtered = 0;
|
|
32
|
+
$('a[href]').each((_, el) => {
|
|
33
|
+
const href = $(el).attr('href');
|
|
34
|
+
if (!href || href.startsWith('#') || href.startsWith('javascript:'))
|
|
35
|
+
return;
|
|
36
|
+
const url = tryResolveUrl(href, baseUrl);
|
|
37
|
+
if (!url)
|
|
38
|
+
return;
|
|
39
|
+
const type = isInternalUrl(url, baseUrl)
|
|
40
|
+
? 'internal'
|
|
41
|
+
: 'external';
|
|
42
|
+
if (!shouldIncludeLink(type, url, options, seen)) {
|
|
43
|
+
if (!seen.has(url))
|
|
44
|
+
filtered++;
|
|
21
45
|
return;
|
|
22
46
|
}
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
47
|
+
seen.add(url);
|
|
48
|
+
links.push({ href: url, text: $(el).text().trim() || url, type });
|
|
49
|
+
});
|
|
50
|
+
if (options.includeImages) {
|
|
51
|
+
$('img[src]').each((_, el) => {
|
|
52
|
+
const src = $(el).attr('src');
|
|
53
|
+
if (!src || src.startsWith('data:'))
|
|
27
54
|
return;
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
const type = isInternalUrl(absoluteUrl, baseUrl)
|
|
31
|
-
? 'internal'
|
|
32
|
-
: 'external';
|
|
33
|
-
// Filter based on options
|
|
34
|
-
if (type === 'internal' && !options.includeInternal)
|
|
55
|
+
const url = tryResolveUrl(src, baseUrl);
|
|
56
|
+
if (!url)
|
|
35
57
|
return;
|
|
36
|
-
if (
|
|
58
|
+
if (!shouldIncludeLink('image', url, options, seen)) {
|
|
59
|
+
if (!seen.has(url))
|
|
60
|
+
filtered++;
|
|
37
61
|
return;
|
|
62
|
+
}
|
|
63
|
+
seen.add(url);
|
|
38
64
|
links.push({
|
|
39
|
-
href:
|
|
40
|
-
text:
|
|
41
|
-
type,
|
|
65
|
+
href: url,
|
|
66
|
+
text: $(el).attr('alt')?.trim() ?? url,
|
|
67
|
+
type: 'image',
|
|
42
68
|
});
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
const truncated = options.maxLinks ? links.length > options.maxLinks : false;
|
|
72
|
+
const resultLinks = truncated ? links.slice(0, options.maxLinks) : links;
|
|
73
|
+
return {
|
|
74
|
+
links: resultLinks,
|
|
75
|
+
linkCount: resultLinks.length,
|
|
76
|
+
filtered,
|
|
77
|
+
truncated,
|
|
78
|
+
};
|
|
49
79
|
}
|
|
50
|
-
/**
|
|
51
|
-
* Tool handler for extracting links from a URL
|
|
52
|
-
*/
|
|
53
80
|
export async function fetchLinksToolHandler(input) {
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
// Check cache first
|
|
62
|
-
if (cacheKey) {
|
|
63
|
-
const cached = cache.get(cacheKey);
|
|
64
|
-
if (cached) {
|
|
65
|
-
// Parse the cached content to return as structuredContent
|
|
66
|
-
try {
|
|
67
|
-
const structuredContent = JSON.parse(cached.content);
|
|
68
|
-
return {
|
|
69
|
-
content: [{ type: 'text', text: cached.content }],
|
|
70
|
-
structuredContent,
|
|
71
|
-
};
|
|
72
|
-
}
|
|
73
|
-
catch {
|
|
74
|
-
return {
|
|
75
|
-
content: [{ type: 'text', text: cached.content }],
|
|
76
|
-
};
|
|
77
|
-
}
|
|
78
|
-
}
|
|
81
|
+
if (!input.url) {
|
|
82
|
+
return createToolErrorResponse('URL is required', '', 'VALIDATION_ERROR');
|
|
83
|
+
}
|
|
84
|
+
let filterPattern;
|
|
85
|
+
if (input.filterPattern) {
|
|
86
|
+
try {
|
|
87
|
+
filterPattern = new RegExp(input.filterPattern, 'i');
|
|
79
88
|
}
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
if (!html) {
|
|
83
|
-
return createToolErrorResponse('No content received from URL', url, 'EMPTY_CONTENT');
|
|
89
|
+
catch {
|
|
90
|
+
return createToolErrorResponse(`Invalid filter pattern: ${input.filterPattern}`, input.url, 'VALIDATION_ERROR');
|
|
84
91
|
}
|
|
85
|
-
|
|
86
|
-
|
|
92
|
+
}
|
|
93
|
+
try {
|
|
94
|
+
const options = {
|
|
87
95
|
includeInternal: input.includeInternal ?? true,
|
|
88
96
|
includeExternal: input.includeExternal ?? true,
|
|
97
|
+
includeImages: input.includeImages ?? false,
|
|
98
|
+
maxLinks: input.maxLinks,
|
|
99
|
+
filterPattern,
|
|
100
|
+
};
|
|
101
|
+
logDebug('Extracting links', {
|
|
102
|
+
url: input.url,
|
|
103
|
+
...options,
|
|
104
|
+
filterPattern: input.filterPattern,
|
|
105
|
+
});
|
|
106
|
+
const result = await executeFetchPipeline({
|
|
107
|
+
url: input.url,
|
|
108
|
+
cacheNamespace: 'links',
|
|
109
|
+
customHeaders: input.customHeaders,
|
|
110
|
+
retries: input.retries,
|
|
111
|
+
transform: (html, url) => extractLinks(html, url, options),
|
|
89
112
|
});
|
|
90
113
|
const structuredContent = {
|
|
91
|
-
url,
|
|
92
|
-
linkCount:
|
|
93
|
-
links,
|
|
114
|
+
url: result.url,
|
|
115
|
+
linkCount: result.data.linkCount,
|
|
116
|
+
links: result.data.links,
|
|
117
|
+
...(result.data.filtered > 0 && { filtered: result.data.filtered }),
|
|
118
|
+
...(result.data.truncated && { truncated: result.data.truncated }),
|
|
94
119
|
};
|
|
95
|
-
const outputText = JSON.stringify(structuredContent, null, 2);
|
|
96
|
-
// Cache the result
|
|
97
|
-
if (cacheKey) {
|
|
98
|
-
cache.set(cacheKey, outputText);
|
|
99
|
-
}
|
|
100
120
|
return {
|
|
101
|
-
content: [
|
|
121
|
+
content: [
|
|
122
|
+
{
|
|
123
|
+
type: 'text',
|
|
124
|
+
text: JSON.stringify(structuredContent, null, 2),
|
|
125
|
+
},
|
|
126
|
+
],
|
|
102
127
|
structuredContent,
|
|
103
128
|
};
|
|
104
129
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-links.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-links.tool.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,uBAAuB,EACvB,
|
|
1
|
+
{"version":3,"file":"fetch-links.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-links.tool.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AASnC,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,0BAA0B,CAAC;AAE9D,OAAO,EACL,uBAAuB,EACvB,eAAe,GAChB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EAAE,aAAa,EAAE,MAAM,8BAA8B,CAAC;AAC7D,OAAO,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAC;AAElE,MAAM,CAAC,MAAM,qBAAqB,GAAG,aAAa,CAAC;AACnD,MAAM,CAAC,MAAM,4BAA4B,GACvC,oIAAoI,CAAC;AAIvI,SAAS,aAAa,CAAC,IAAY,EAAE,OAAe;IAClD,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;IACrC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,SAAS,iBAAiB,CACxB,IAAc,EACd,GAAW,EACX,OAA4B,EAC5B,IAAiB;IAEjB,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;QAAE,OAAO,KAAK,CAAC;IAChC,IAAI,OAAO,CAAC,aAAa,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,KAAK,CAAC;IAC5E,IAAI,IAAI,KAAK,UAAU,IAAI,CAAC,OAAO,CAAC,eAAe;QAAE,OAAO,KAAK,CAAC;IAClE,IAAI,IAAI,KAAK,UAAU,IAAI,CAAC,OAAO,CAAC,eAAe;QAAE,OAAO,KAAK,CAAC;IAClE,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,YAAY,CACnB,IAAY,EACZ,OAAe,EACf,OAA4B;IAE5B,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,KAAK,GAAoB,EAAE,CAAC;IAClC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QAC1B,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAChC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;YAAE,OAAO;QAE5E,MAAM,GAAG,GAAG,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACzC,IAAI,CAAC,GAAG;YAAE,OAAO;QAEjB,MAAM,IAAI,GAAa,aAAa,CAAC,GAAG,EAAE,OAAO,CAAC;YAChD,CAAC,CAAC,UAAU;YACZ,CAAC,CAAC,UAAU,CAAC;QACf,IAAI,CAAC,iBAAiB,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,CAAC;YACjD,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;gBAAE,QAAQ,EAAE,CAAC;YAC/B,OAAO;QACT,CAAC;QAED,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACd,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,IAAI,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;IACpE,CAAC,CAAC,CAAC;IAEH,IAAI,OAAO,CAAC,aAAa,EAAE,CAAC;QAC1B,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;YAC3B,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAC9B,IAAI,CAAC,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC;gBAAE,OAAO;YAE5C,MAAM,GAAG,GAAG,aAAa,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;YACxC,IAAI,CAAC,GAAG;gBAAE,OAAO;YAEjB,IAAI,CAAC,iBAAiB,CAAC,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,CAAC;gBACpD,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;oBAAE,QAAQ,EAAE,CAAC;gBAC/B,OAAO;YACT,CAAC;YAED,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACd,KAAK,CAAC,IAAI,CAAC;gBACT,IAAI,EAAE,GAAG;gBACT,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,IAAI,GAAG;gBACtC,IAAI,EAAE,OAAO;aACd,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;IAED,MAAM,SAAS,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC;IAC7E,MAAM,WAAW,GAAG,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;IAEzE,OAAO;QACL,KAAK,EAAE,WAAW;QAClB,SAAS,EAAE,WAAW,CAAC,MAAM;QAC7B,QAAQ;QACR,SAAS;KACV,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,qBAAqB,CAAC,KAAsB;IAKhE,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC;QACf,OAAO,uBAAuB,CAAC,iBAAiB,EAAE,EAAE,EAAE,kBAAkB,CAAC,CAAC;IAC5E,CAAC;IAED,IAAI,aAAiC,CAAC;IACtC,IAAI,KAAK,CAAC,aAAa,EAAE,CAAC;QACxB,IAAI,CAAC;YACH,aAAa,GAAG,IAAI,MAAM,CAAC,KAAK,CAAC,aAAa,EAAE,GAAG,CAAC,CAAC;QACvD,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,uBAAuB,CAC5B,2BAA2B,KAAK,CAAC,aAAa,EAAE,EAChD,KAAK,CAAC,GAAG,EACT,kBAAkB,CACnB,CAAC;QACJ,CAAC;IACH,CAAC;IAED,IAAI,CAAC;QACH,MAAM,OAAO,GAAwB;YACnC,eAAe,EAAE,KAAK,CAAC,eAAe,IAAI,IAAI;YAC9C,eAAe,EAAE,KAAK,CAAC,eAAe,IAAI,IAAI;YAC9C,aAAa,EAAE,KAAK,CAAC,aAAa,IAAI,KAAK;YAC3C,QAAQ,EAAE,KAAK,CAAC,QAAQ;YACxB,aAAa;SACd,CAAC;QAEF,QAAQ,CAAC,kBAAkB,EAAE;YAC3B,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,GAAG,OAAO;YACV,aAAa,EAAE,KAAK,CAAC,aAAa;SACnC,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAuB;YAC9D,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,cAAc,EAAE,OAAO;YACvB,aAAa,EAAE,KAAK,CAAC,aAAa;YAClC,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,SAAS,EAAE,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC;SAC3D,CAAC,CAAC;QAEH,MAAM,iBAAiB,GAAG;YACxB,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,SAAS,EAAE,MAAM,CAAC,IAAI,CAAC,SAAS;YAChC,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK;YACxB,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,GAAG,CAAC,IAAI,EAAE,QAAQ,EAAE,MAAM,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnE,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,IAAI,EAAE,SAAS,EAAE,MAAM,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;SACnE,CAAC;QAEF,OAAO;YACL,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,iBAAiB,EAAE,IAAI,EAAE,CAAC,CAAC;iBACjD;aACF;YACD,iBAAiB;SAClB,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,wBAAwB,EACxB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACF,OAAO,eAAe,CAAC,KAAK,EAAE,KAAK,CAAC,GAAG,EAAE,yBAAyB,CAAC,CAAC;IACtE,CAAC;AACH,CAAC"}
|
|
@@ -1,16 +1,12 @@
|
|
|
1
|
-
import type { FetchMarkdownInput } from '../../types
|
|
1
|
+
import type { FetchMarkdownInput } from '../../config/types.js';
|
|
2
2
|
export declare const FETCH_MARKDOWN_TOOL_NAME = "fetch-markdown";
|
|
3
|
-
export declare const FETCH_MARKDOWN_TOOL_DESCRIPTION = "Fetches a webpage and converts it to clean Markdown format with optional frontmatter";
|
|
4
|
-
export declare function fetchMarkdownToolHandler(input: FetchMarkdownInput): Promise<
|
|
3
|
+
export declare const FETCH_MARKDOWN_TOOL_DESCRIPTION = "Fetches a webpage and converts it to clean Markdown format with optional frontmatter, table of contents, and content length limits";
|
|
4
|
+
export declare function fetchMarkdownToolHandler(input: FetchMarkdownInput): Promise<{
|
|
5
5
|
content: {
|
|
6
|
-
type:
|
|
6
|
+
type: 'text';
|
|
7
7
|
text: string;
|
|
8
8
|
}[];
|
|
9
|
-
structuredContent
|
|
10
|
-
|
|
11
|
-
cached: boolean;
|
|
12
|
-
fetchedAt: string;
|
|
13
|
-
markdown: string;
|
|
14
|
-
};
|
|
9
|
+
structuredContent?: Record<string, unknown>;
|
|
10
|
+
isError?: boolean;
|
|
15
11
|
}>;
|
|
16
12
|
//# sourceMappingURL=fetch-markdown.tool.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-markdown.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-markdown.tool.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"fetch-markdown.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-markdown.tool.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,kBAAkB,EAInB,MAAM,uBAAuB,CAAC;AAe/B,eAAO,MAAM,wBAAwB,mBAAmB,CAAC;AACzD,eAAO,MAAM,+BAA+B,uIAC0F,CAAC;AAsEvI,wBAAsB,wBAAwB,CAC5C,KAAK,EAAE,kBAAkB,GACxB,OAAO,CAAC;IACT,OAAO,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IAC1C,iBAAiB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC5C,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB,CAAC,CA4DD"}
|
|
@@ -1,103 +1,102 @@
|
|
|
1
|
-
import { validateAndNormalizeUrl } from '../../utils/url-validator.js';
|
|
2
|
-
import { fetchUrlWithRetry } from '../../services/fetcher.js';
|
|
3
1
|
import { extractContent } from '../../services/extractor.js';
|
|
4
|
-
import {
|
|
5
|
-
import
|
|
6
|
-
import { config } from '../../config/index.js';
|
|
7
|
-
import { logError } from '../../services/logger.js';
|
|
2
|
+
import { logDebug, logError } from '../../services/logger.js';
|
|
3
|
+
import { stripMarkdownLinks } from '../../utils/content-cleaner.js';
|
|
8
4
|
import { createToolErrorResponse, handleToolError, } from '../../utils/tool-error-handler.js';
|
|
5
|
+
import { buildMetadata, shouldUseArticle } from '../utils/common.js';
|
|
6
|
+
import { executeFetchPipeline } from '../utils/fetch-pipeline.js';
|
|
7
|
+
import { htmlToMarkdown } from '../../transformers/markdown.transformer.js';
|
|
9
8
|
export const FETCH_MARKDOWN_TOOL_NAME = 'fetch-markdown';
|
|
10
|
-
export const FETCH_MARKDOWN_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to clean Markdown format with optional frontmatter';
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
9
|
+
export const FETCH_MARKDOWN_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to clean Markdown format with optional frontmatter, table of contents, and content length limits';
|
|
10
|
+
/**
|
|
11
|
+
* Generate URL-friendly slug from text
|
|
12
|
+
* Strips markdown link syntax before slugifying
|
|
13
|
+
*/
|
|
14
|
+
function slugify(text) {
|
|
15
|
+
// First strip markdown links: [Text](#anchor) -> Text
|
|
16
|
+
const cleanText = stripMarkdownLinks(text);
|
|
17
|
+
return cleanText
|
|
18
|
+
.toLowerCase()
|
|
19
|
+
.replace(/[^\w\s-]/g, '')
|
|
20
|
+
.replace(/\s+/g, '-')
|
|
21
|
+
.replace(/--+/g, '-')
|
|
22
|
+
.trim();
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Extract table of contents from markdown headings
|
|
26
|
+
* Returns clean text without markdown link syntax
|
|
27
|
+
*/
|
|
28
|
+
function extractToc(markdown) {
|
|
29
|
+
const headingRegex = /^(#{1,6})\s+(.+)$/gm;
|
|
30
|
+
const toc = [];
|
|
31
|
+
let match;
|
|
32
|
+
while ((match = headingRegex.exec(markdown)) !== null) {
|
|
33
|
+
if (!match[1] || !match[2])
|
|
34
|
+
continue;
|
|
35
|
+
const rawText = match[2].trim();
|
|
36
|
+
// Clean markdown links from TOC text: [Usage](#usage) -> Usage
|
|
37
|
+
const text = stripMarkdownLinks(rawText);
|
|
38
|
+
toc.push({ level: match[1].length, text, slug: slugify(rawText) });
|
|
30
39
|
}
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
40
|
+
return toc;
|
|
41
|
+
}
|
|
42
|
+
function transformToMarkdown(html, url, options) {
|
|
43
|
+
// Only invoke JSDOM when extractMainContent is true (lazy loading optimization)
|
|
44
|
+
const { article, metadata: extractedMeta } = extractContent(html, url, {
|
|
45
|
+
extractArticle: options.extractMainContent,
|
|
46
|
+
});
|
|
47
|
+
const useArticle = shouldUseArticle(options.extractMainContent, article);
|
|
48
|
+
const metadata = buildMetadata(url, article, extractedMeta, useArticle, options.includeMetadata);
|
|
49
|
+
const sourceHtml = useArticle ? article.content : html;
|
|
50
|
+
const title = useArticle ? article.title : extractedMeta.title;
|
|
51
|
+
let markdown = htmlToMarkdown(sourceHtml, metadata);
|
|
52
|
+
const toc = options.generateToc ? extractToc(markdown) : undefined;
|
|
53
|
+
let truncated = false;
|
|
54
|
+
if (options.maxContentLength && markdown.length > options.maxContentLength) {
|
|
55
|
+
markdown = `${markdown.substring(0, options.maxContentLength)}\n\n...[truncated]`;
|
|
56
|
+
truncated = true;
|
|
57
|
+
}
|
|
58
|
+
return { markdown, title, toc, truncated };
|
|
46
59
|
}
|
|
47
60
|
export async function fetchMarkdownToolHandler(input) {
|
|
61
|
+
if (!input.url) {
|
|
62
|
+
return createToolErrorResponse('URL is required', '', 'VALIDATION_ERROR');
|
|
63
|
+
}
|
|
48
64
|
try {
|
|
49
|
-
|
|
50
|
-
if (!input.url) {
|
|
51
|
-
return createToolErrorResponse('URL is required', '', 'VALIDATION_ERROR');
|
|
52
|
-
}
|
|
53
|
-
const url = validateAndNormalizeUrl(input.url);
|
|
54
|
-
const cacheKey = cache.createCacheKey('markdown', url);
|
|
55
|
-
// Check cache first
|
|
56
|
-
if (cacheKey) {
|
|
57
|
-
const cached = cache.get(cacheKey);
|
|
58
|
-
if (cached) {
|
|
59
|
-
const structuredContent = {
|
|
60
|
-
url,
|
|
61
|
-
cached: true,
|
|
62
|
-
fetchedAt: cached.fetchedAt,
|
|
63
|
-
markdown: cached.content,
|
|
64
|
-
};
|
|
65
|
-
return {
|
|
66
|
-
content: [
|
|
67
|
-
{
|
|
68
|
-
type: 'text',
|
|
69
|
-
text: JSON.stringify(structuredContent),
|
|
70
|
-
},
|
|
71
|
-
],
|
|
72
|
-
structuredContent,
|
|
73
|
-
};
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
const html = await fetchUrlWithRetry(url);
|
|
77
|
-
// Validate HTML content was received
|
|
78
|
-
if (!html) {
|
|
79
|
-
return createToolErrorResponse('No content received from URL', url, 'EMPTY_CONTENT');
|
|
80
|
-
}
|
|
81
|
-
const { markdown, title } = extractAndConvertToMarkdown(html, url, {
|
|
65
|
+
const options = {
|
|
82
66
|
extractMainContent: input.extractMainContent ?? true,
|
|
83
67
|
includeMetadata: input.includeMetadata ?? true,
|
|
68
|
+
generateToc: input.generateToc ?? false,
|
|
69
|
+
maxContentLength: input.maxContentLength,
|
|
70
|
+
};
|
|
71
|
+
logDebug('Fetching markdown', { url: input.url, ...options });
|
|
72
|
+
const result = await executeFetchPipeline({
|
|
73
|
+
url: input.url,
|
|
74
|
+
cacheNamespace: 'markdown',
|
|
75
|
+
customHeaders: input.customHeaders,
|
|
76
|
+
retries: input.retries,
|
|
77
|
+
transform: (html, url) => transformToMarkdown(html, url, options),
|
|
78
|
+
serialize: (data) => data.markdown,
|
|
79
|
+
deserialize: (cached) => ({
|
|
80
|
+
markdown: cached,
|
|
81
|
+
title: undefined,
|
|
82
|
+
toc: undefined,
|
|
83
|
+
truncated: false,
|
|
84
|
+
}),
|
|
84
85
|
});
|
|
85
|
-
// Cache the result
|
|
86
|
-
if (cacheKey) {
|
|
87
|
-
cache.set(cacheKey, markdown);
|
|
88
|
-
}
|
|
89
86
|
const structuredContent = {
|
|
90
|
-
url,
|
|
91
|
-
title,
|
|
92
|
-
fetchedAt:
|
|
93
|
-
markdown,
|
|
94
|
-
|
|
87
|
+
url: result.url,
|
|
88
|
+
title: result.data.title,
|
|
89
|
+
fetchedAt: result.fetchedAt,
|
|
90
|
+
markdown: result.data.markdown,
|
|
91
|
+
...(result.data.toc && { toc: result.data.toc }),
|
|
92
|
+
cached: result.fromCache,
|
|
93
|
+
...(result.data.truncated && { truncated: result.data.truncated }),
|
|
95
94
|
};
|
|
96
95
|
return {
|
|
97
96
|
content: [
|
|
98
97
|
{
|
|
99
98
|
type: 'text',
|
|
100
|
-
text: JSON.stringify(structuredContent, null, 2),
|
|
99
|
+
text: JSON.stringify(structuredContent, result.fromCache ? undefined : null, result.fromCache ? undefined : 2),
|
|
101
100
|
},
|
|
102
101
|
],
|
|
103
102
|
structuredContent,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-markdown.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-markdown.tool.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"fetch-markdown.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-markdown.tool.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAC7D,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,0BAA0B,CAAC;AAE9D,OAAO,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AACpE,OAAO,EACL,uBAAuB,EACvB,eAAe,GAChB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACrE,OAAO,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAC;AAElE,OAAO,EAAE,cAAc,EAAE,MAAM,4CAA4C,CAAC;AAE5E,MAAM,CAAC,MAAM,wBAAwB,GAAG,gBAAgB,CAAC;AACzD,MAAM,CAAC,MAAM,+BAA+B,GAC1C,oIAAoI,CAAC;AAEvI;;;GAGG;AACH,SAAS,OAAO,CAAC,IAAY;IAC3B,sDAAsD;IACtD,MAAM,SAAS,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC;IAE3C,OAAO,SAAS;SACb,WAAW,EAAE;SACb,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC;SACxB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE,CAAC;AACZ,CAAC;AAED;;;GAGG;AACH,SAAS,UAAU,CAAC,QAAgB;IAClC,MAAM,YAAY,GAAG,qBAAqB,CAAC;IAC3C,MAAM,GAAG,GAAe,EAAE,CAAC;IAC3B,IAAI,KAAK,CAAC;IAEV,OAAO,CAAC,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACtD,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;YAAE,SAAS;QACrC,MAAM,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAChC,+DAA+D;QAC/D,MAAM,IAAI,GAAG,kBAAkB,CAAC,OAAO,CAAC,CAAC;QACzC,GAAG,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IACrE,CAAC;IAED,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,mBAAmB,CAC1B,IAAY,EACZ,GAAW,EACX,OAAyB;IAEzB,gFAAgF;IAChF,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,GAAG,cAAc,CAAC,IAAI,EAAE,GAAG,EAAE;QACrE,cAAc,EAAE,OAAO,CAAC,kBAAkB;KAC3C,CAAC,CAAC;IACH,MAAM,UAAU,GAAG,gBAAgB,CAAC,OAAO,CAAC,kBAAkB,EAAE,OAAO,CAAC,CAAC;IACzE,MAAM,QAAQ,GAAG,aAAa,CAC5B,GAAG,EACH,OAAO,EACP,aAAa,EACb,UAAU,EACV,OAAO,CAAC,eAAe,CACxB,CAAC;IACF,MAAM,UAAU,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;IACvD,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC;IAE/D,IAAI,QAAQ,GAAG,cAAc,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;IACpD,MAAM,GAAG,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAEnE,IAAI,SAAS,GAAG,KAAK,CAAC;IACtB,IAAI,OAAO,CAAC,gBAAgB,IAAI,QAAQ,CAAC,MAAM,GAAG,OAAO,CAAC,gBAAgB,EAAE,CAAC;QAC3E,QAAQ,GAAG,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC,EAAE,OAAO,CAAC,gBAAgB,CAAC,oBAAoB,CAAC;QAClF,SAAS,GAAG,IAAI,CAAC;IACnB,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,GAAG,EAAE,SAAS,EAAE,CAAC;AAC7C,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,wBAAwB,CAC5C,KAAyB;IAMzB,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC;QACf,OAAO,uBAAuB,CAAC,iBAAiB,EAAE,EAAE,EAAE,kBAAkB,CAAC,CAAC;IAC5E,CAAC;IAED,IAAI,CAAC;QACH,MAAM,OAAO,GAAqB;YAChC,kBAAkB,EAAE,KAAK,CAAC,kBAAkB,IAAI,IAAI;YACpD,eAAe,EAAE,KAAK,CAAC,eAAe,IAAI,IAAI;YAC9C,WAAW,EAAE,KAAK,CAAC,WAAW,IAAI,KAAK;YACvC,gBAAgB,EAAE,KAAK,CAAC,gBAAgB;SACzC,CAAC;QAEF,QAAQ,CAAC,mBAAmB,EAAE,EAAE,GAAG,EAAE,KAAK,CAAC,GAAG,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;QAE9D,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAA0B;YACjE,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,cAAc,EAAE,UAAU;YAC1B,aAAa,EAAE,KAAK,CAAC,aAAa;YAClC,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,SAAS,EAAE,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE,CAAC,mBAAmB,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC;YACjE,SAAS,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ;YAClC,WAAW,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;gBACxB,QAAQ,EAAE,MAAM;gBAChB,KAAK,EAAE,SAAS;gBAChB,GAAG,EAAE,SAAS;gBACd,SAAS,EAAE,KAAK;aACjB,CAAC;SACH,CAAC,CAAC;QAEH,MAAM,iBAAiB,GAAG;YACxB,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK;YACxB,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,QAAQ,EAAE,MAAM,CAAC,IAAI,CAAC,QAAQ;YAC9B,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,EAAE,GAAG,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC;YAChD,MAAM,EAAE,MAAM,CAAC,SAAS;YACxB,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,IAAI,EAAE,SAAS,EAAE,MAAM,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;SACnE,CAAC;QAEF,OAAO;YACL,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAClB,iBAAiB,EACjB,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,EACnC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CACjC;iBACF;aACF;YACD,iBAAiB;SAClB,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,2BAA2B,EAC3B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACF,OAAO,eAAe,CAAC,KAAK,EAAE,KAAK,CAAC,GAAG,EAAE,0BAA0B,CAAC,CAAC;IACvE,CAAC;AACH,CAAC"}
|
|
@@ -1,18 +1,12 @@
|
|
|
1
|
-
import type { FetchUrlInput } from '../../types
|
|
1
|
+
import type { FetchUrlInput } from '../../config/types.js';
|
|
2
2
|
export declare const FETCH_URL_TOOL_NAME = "fetch-url";
|
|
3
|
-
export declare const FETCH_URL_TOOL_DESCRIPTION = "Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks";
|
|
4
|
-
export declare function fetchUrlToolHandler(input: FetchUrlInput): Promise<
|
|
3
|
+
export declare const FETCH_URL_TOOL_DESCRIPTION = "Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks. Supports custom headers, retries, and content length limits.";
|
|
4
|
+
export declare function fetchUrlToolHandler(input: FetchUrlInput): Promise<{
|
|
5
5
|
content: {
|
|
6
|
-
type:
|
|
6
|
+
type: 'text';
|
|
7
7
|
text: string;
|
|
8
8
|
}[];
|
|
9
|
-
structuredContent
|
|
10
|
-
|
|
11
|
-
cached: boolean;
|
|
12
|
-
fetchedAt: string;
|
|
13
|
-
content: string;
|
|
14
|
-
format: "jsonl";
|
|
15
|
-
contentBlocks: number;
|
|
16
|
-
};
|
|
9
|
+
structuredContent?: Record<string, unknown>;
|
|
10
|
+
isError?: boolean;
|
|
17
11
|
}>;
|
|
18
12
|
//# sourceMappingURL=fetch-url.tool.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-url.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-url.tool.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"fetch-url.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-url.tool.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,aAAa,EAEd,MAAM,uBAAuB,CAAC;AAmB/B,eAAO,MAAM,mBAAmB,cAAc,CAAC;AAC/C,eAAO,MAAM,0BAA0B,6JACqH,CAAC;AA8B7J,wBAAsB,mBAAmB,CAAC,KAAK,EAAE,aAAa,GAAG,OAAO,CAAC;IACvE,OAAO,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IAC1C,iBAAiB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC5C,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB,CAAC,CAkED"}
|