@j0hanz/superfetch 1.1.2 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -30
- package/dist/config/formatting.d.ts +9 -0
- package/dist/config/formatting.d.ts.map +1 -0
- package/dist/config/formatting.js +11 -0
- package/dist/config/formatting.js.map +1 -0
- package/dist/config/index.d.ts +11 -2
- package/dist/config/index.d.ts.map +1 -1
- package/dist/config/index.js +35 -13
- package/dist/config/index.js.map +1 -1
- package/dist/config/types/content.d.ts +107 -0
- package/dist/config/types/content.d.ts.map +1 -0
- package/dist/config/types/content.js +2 -0
- package/dist/config/types/content.js.map +1 -0
- package/dist/config/types/runtime.d.ts +78 -0
- package/dist/config/types/runtime.d.ts.map +1 -0
- package/dist/config/types/runtime.js +2 -0
- package/dist/config/types/runtime.js.map +1 -0
- package/dist/config/types/tools.d.ts +99 -0
- package/dist/config/types/tools.d.ts.map +1 -0
- package/dist/config/types/tools.js +2 -0
- package/dist/config/types/tools.js.map +1 -0
- package/dist/config/types.d.ts +3 -297
- package/dist/config/types.d.ts.map +1 -1
- package/dist/http/auth.d.ts +3 -0
- package/dist/http/auth.d.ts.map +1 -0
- package/dist/http/auth.js +34 -0
- package/dist/http/auth.js.map +1 -0
- package/dist/http/cors.d.ts +8 -0
- package/dist/http/cors.d.ts.map +1 -0
- package/dist/http/cors.js +47 -0
- package/dist/http/cors.js.map +1 -0
- package/dist/http/mcp-routes.d.ts +5 -0
- package/dist/http/mcp-routes.d.ts.map +1 -0
- package/dist/http/mcp-routes.js +110 -0
- package/dist/http/mcp-routes.js.map +1 -0
- package/dist/http/mcp-session.d.ts +12 -0
- package/dist/http/mcp-session.d.ts.map +1 -0
- package/dist/http/mcp-session.js +209 -0
- package/dist/http/mcp-session.js.map +1 -0
- package/dist/http/mcp-validation.d.ts +3 -0
- package/dist/http/mcp-validation.d.ts.map +1 -0
- package/dist/http/mcp-validation.js +34 -0
- package/dist/http/mcp-validation.js.map +1 -0
- package/dist/http/rate-limit.d.ts +13 -0
- package/dist/http/rate-limit.d.ts.map +1 -0
- package/dist/http/rate-limit.js +91 -0
- package/dist/http/rate-limit.js.map +1 -0
- package/dist/http/server.d.ts +4 -0
- package/dist/http/server.d.ts.map +1 -0
- package/dist/http/server.js +183 -0
- package/dist/http/server.js.map +1 -0
- package/dist/http/sessions.d.ts +15 -0
- package/dist/http/sessions.d.ts.map +1 -0
- package/dist/http/sessions.js +64 -0
- package/dist/http/sessions.js.map +1 -0
- package/dist/index.js +26 -341
- package/dist/index.js.map +1 -1
- package/dist/middleware/error-handler.d.ts +2 -2
- package/dist/middleware/error-handler.d.ts.map +1 -1
- package/dist/middleware/error-handler.js +46 -15
- package/dist/middleware/error-handler.js.map +1 -1
- package/dist/resources/cached-content.d.ts.map +1 -1
- package/dist/resources/cached-content.js +104 -44
- package/dist/resources/cached-content.js.map +1 -1
- package/dist/resources/index.d.ts.map +1 -1
- package/dist/resources/index.js +77 -69
- package/dist/resources/index.js.map +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +9 -3
- package/dist/server.js.map +1 -1
- package/dist/services/cache.d.ts +13 -1
- package/dist/services/cache.d.ts.map +1 -1
- package/dist/services/cache.js +89 -16
- package/dist/services/cache.js.map +1 -1
- package/dist/services/context.d.ts +1 -1
- package/dist/services/context.d.ts.map +1 -1
- package/dist/services/context.js +1 -1
- package/dist/services/context.js.map +1 -1
- package/dist/services/extractor.d.ts.map +1 -1
- package/dist/services/extractor.js +122 -87
- package/dist/services/extractor.js.map +1 -1
- package/dist/services/fetcher/agents.d.ts +4 -0
- package/dist/services/fetcher/agents.d.ts.map +1 -0
- package/dist/services/fetcher/agents.js +111 -0
- package/dist/services/fetcher/agents.js.map +1 -0
- package/dist/services/fetcher/errors.d.ts +5 -0
- package/dist/services/fetcher/errors.d.ts.map +1 -0
- package/dist/services/fetcher/errors.js +71 -0
- package/dist/services/fetcher/errors.js.map +1 -0
- package/dist/services/fetcher/headers.d.ts +2 -0
- package/dist/services/fetcher/headers.d.ts.map +1 -0
- package/dist/services/fetcher/headers.js +28 -0
- package/dist/services/fetcher/headers.js.map +1 -0
- package/dist/services/fetcher/interceptors.d.ts +10 -0
- package/dist/services/fetcher/interceptors.d.ts.map +1 -0
- package/dist/services/fetcher/interceptors.js +82 -0
- package/dist/services/fetcher/interceptors.js.map +1 -0
- package/dist/services/fetcher/redirects.d.ts +6 -0
- package/dist/services/fetcher/redirects.d.ts.map +1 -0
- package/dist/services/fetcher/redirects.js +67 -0
- package/dist/services/fetcher/redirects.js.map +1 -0
- package/dist/services/fetcher/response.d.ts +5 -0
- package/dist/services/fetcher/response.d.ts.map +1 -0
- package/dist/services/fetcher/response.js +39 -0
- package/dist/services/fetcher/response.js.map +1 -0
- package/dist/services/fetcher/retry-policy.d.ts +28 -0
- package/dist/services/fetcher/retry-policy.d.ts.map +1 -0
- package/dist/services/fetcher/retry-policy.js +138 -0
- package/dist/services/fetcher/retry-policy.js.map +1 -0
- package/dist/services/fetcher.d.ts +2 -1
- package/dist/services/fetcher.d.ts.map +1 -1
- package/dist/services/fetcher.js +62 -315
- package/dist/services/fetcher.js.map +1 -1
- package/dist/services/logger.js +4 -4
- package/dist/services/logger.js.map +1 -1
- package/dist/services/parser.d.ts +1 -0
- package/dist/services/parser.d.ts.map +1 -1
- package/dist/services/parser.js +55 -35
- package/dist/services/parser.js.map +1 -1
- package/dist/tools/handlers/fetch-links/link-extractor.d.ts +4 -0
- package/dist/tools/handlers/fetch-links/link-extractor.d.ts.map +1 -0
- package/dist/tools/handlers/fetch-links/link-extractor.js +163 -0
- package/dist/tools/handlers/fetch-links/link-extractor.js.map +1 -0
- package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.js +78 -116
- package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +3 -13
- package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.js +74 -83
- package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-single.shared.d.ts +26 -0
- package/dist/tools/handlers/fetch-single.shared.d.ts.map +1 -0
- package/dist/tools/handlers/fetch-single.shared.js +49 -0
- package/dist/tools/handlers/fetch-single.shared.js.map +1 -0
- package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.js +82 -54
- package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-urls/processor.d.ts +13 -0
- package/dist/tools/handlers/fetch-urls/processor.d.ts.map +1 -0
- package/dist/tools/handlers/fetch-urls/processor.js +153 -0
- package/dist/tools/handlers/fetch-urls/processor.js.map +1 -0
- package/dist/tools/handlers/fetch-urls/response.d.ts +3 -0
- package/dist/tools/handlers/fetch-urls/response.d.ts.map +1 -0
- package/dist/tools/handlers/fetch-urls/response.js +58 -0
- package/dist/tools/handlers/fetch-urls/response.js.map +1 -0
- package/dist/tools/handlers/fetch-urls/validation.d.ts +6 -0
- package/dist/tools/handlers/fetch-urls/validation.d.ts.map +1 -0
- package/dist/tools/handlers/fetch-urls/validation.js +18 -0
- package/dist/tools/handlers/fetch-urls/validation.js.map +1 -0
- package/dist/tools/handlers/fetch-urls.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-urls.tool.js +104 -202
- package/dist/tools/handlers/fetch-urls.tool.js.map +1 -1
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +36 -237
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/schemas.d.ts +357 -0
- package/dist/tools/schemas.d.ts.map +1 -0
- package/dist/tools/schemas.js +272 -0
- package/dist/tools/schemas.js.map +1 -0
- package/dist/tools/utils/cache-vary.d.ts +3 -0
- package/dist/tools/utils/cache-vary.d.ts.map +1 -0
- package/dist/tools/utils/cache-vary.js +44 -0
- package/dist/tools/utils/cache-vary.js.map +1 -0
- package/dist/tools/utils/common.d.ts +2 -2
- package/dist/tools/utils/common.d.ts.map +1 -1
- package/dist/tools/utils/common.js +5 -1
- package/dist/tools/utils/common.js.map +1 -1
- package/dist/tools/utils/content-transform.d.ts +16 -0
- package/dist/tools/utils/content-transform.d.ts.map +1 -0
- package/dist/tools/utils/content-transform.js +49 -0
- package/dist/tools/utils/content-transform.js.map +1 -0
- package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -1
- package/dist/tools/utils/fetch-pipeline.js +32 -18
- package/dist/tools/utils/fetch-pipeline.js.map +1 -1
- package/dist/tools/utils/inline-content.d.ts +11 -0
- package/dist/tools/utils/inline-content.d.ts.map +1 -0
- package/dist/tools/utils/inline-content.js +39 -0
- package/dist/tools/utils/inline-content.js.map +1 -0
- package/dist/tools/utils/markdown-toc.d.ts +3 -0
- package/dist/tools/utils/markdown-toc.d.ts.map +1 -0
- package/dist/tools/utils/markdown-toc.js +35 -0
- package/dist/tools/utils/markdown-toc.js.map +1 -0
- package/dist/tools/utils/tool-response.d.ts +9 -0
- package/dist/tools/utils/tool-response.d.ts.map +1 -0
- package/dist/tools/utils/tool-response.js +19 -0
- package/dist/tools/utils/tool-response.js.map +1 -0
- package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
- package/dist/transformers/jsonl.transformer.js +51 -28
- package/dist/transformers/jsonl.transformer.js.map +1 -1
- package/dist/transformers/markdown.transformer.d.ts.map +1 -1
- package/dist/transformers/markdown.transformer.js +82 -111
- package/dist/transformers/markdown.transformer.js.map +1 -1
- package/dist/utils/header-normalizer.d.ts +5 -0
- package/dist/utils/header-normalizer.d.ts.map +1 -0
- package/dist/utils/header-normalizer.js +25 -0
- package/dist/utils/header-normalizer.js.map +1 -0
- package/dist/utils/tool-error-handler.d.ts +1 -0
- package/dist/utils/tool-error-handler.d.ts.map +1 -1
- package/dist/utils/tool-error-handler.js +29 -1
- package/dist/utils/tool-error-handler.js.map +1 -1
- package/dist/utils/url-validator.d.ts +0 -3
- package/dist/utils/url-validator.d.ts.map +1 -1
- package/dist/utils/url-validator.js +98 -18
- package/dist/utils/url-validator.js.map +1 -1
- package/package.json +11 -6
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import * as cheerio from 'cheerio';
|
|
2
|
+
import safeRegex from 'safe-regex';
|
|
3
|
+
import { createToolErrorResponse } from '../../../utils/tool-error-handler.js';
|
|
4
|
+
import { isInternalUrl } from '../../../utils/url-validator.js';
|
|
5
|
+
function isLinkAllowed(type, options) {
|
|
6
|
+
const allowed = {
|
|
7
|
+
internal: options.includeInternal,
|
|
8
|
+
external: options.includeExternal,
|
|
9
|
+
image: options.includeImages,
|
|
10
|
+
};
|
|
11
|
+
return allowed[type];
|
|
12
|
+
}
|
|
13
|
+
function matchesFilter(url, filterPattern) {
|
|
14
|
+
if (!filterPattern)
|
|
15
|
+
return true;
|
|
16
|
+
return filterPattern.test(url);
|
|
17
|
+
}
|
|
18
|
+
function evaluateLink(link, options, seen) {
|
|
19
|
+
if (seen.has(link.href)) {
|
|
20
|
+
return { accepted: false, filtered: false };
|
|
21
|
+
}
|
|
22
|
+
if (!matchesFilter(link.href, options.filterPattern)) {
|
|
23
|
+
return { accepted: false, filtered: true };
|
|
24
|
+
}
|
|
25
|
+
if (!isLinkAllowed(link.type, options)) {
|
|
26
|
+
return { accepted: false, filtered: true };
|
|
27
|
+
}
|
|
28
|
+
return { accepted: true, filtered: false };
|
|
29
|
+
}
|
|
30
|
+
export function resolveFilterPattern(pattern, url) {
|
|
31
|
+
if (!pattern)
|
|
32
|
+
return undefined;
|
|
33
|
+
const lengthError = validatePatternLength(pattern, url);
|
|
34
|
+
if (lengthError)
|
|
35
|
+
return lengthError;
|
|
36
|
+
const filterPattern = buildFilterRegex(pattern, url);
|
|
37
|
+
if (isToolResponseBase(filterPattern))
|
|
38
|
+
return filterPattern;
|
|
39
|
+
const safetyError = validatePatternSafety(filterPattern, url);
|
|
40
|
+
if (safetyError)
|
|
41
|
+
return safetyError;
|
|
42
|
+
return filterPattern;
|
|
43
|
+
}
|
|
44
|
+
function validatePatternLength(pattern, url) {
|
|
45
|
+
if (pattern.length <= 200)
|
|
46
|
+
return null;
|
|
47
|
+
return createToolErrorResponse('Filter pattern too long (max 200 characters)', url, 'VALIDATION_ERROR');
|
|
48
|
+
}
|
|
49
|
+
function buildFilterRegex(pattern, url) {
|
|
50
|
+
try {
|
|
51
|
+
return new RegExp(pattern, 'i');
|
|
52
|
+
}
|
|
53
|
+
catch {
|
|
54
|
+
return createToolErrorResponse(`Invalid filter pattern: ${pattern}`, url, 'VALIDATION_ERROR');
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
function validatePatternSafety(pattern, url) {
|
|
58
|
+
if (safeRegex(pattern))
|
|
59
|
+
return null;
|
|
60
|
+
return createToolErrorResponse('Filter pattern is unsafe (potential catastrophic backtracking)', url, 'VALIDATION_ERROR');
|
|
61
|
+
}
|
|
62
|
+
function isToolResponseBase(value) {
|
|
63
|
+
return (value !== null &&
|
|
64
|
+
typeof value === 'object' &&
|
|
65
|
+
'content' in value &&
|
|
66
|
+
Array.isArray(value.content));
|
|
67
|
+
}
|
|
68
|
+
function tryResolveUrl(href, baseUrl) {
|
|
69
|
+
if (!URL.canParse(href, baseUrl)) {
|
|
70
|
+
return null;
|
|
71
|
+
}
|
|
72
|
+
return new URL(href, baseUrl).href;
|
|
73
|
+
}
|
|
74
|
+
function buildLinkType(url, baseUrl) {
|
|
75
|
+
return isInternalUrl(url, baseUrl) ? 'internal' : 'external';
|
|
76
|
+
}
|
|
77
|
+
function isSkippableHref(href) {
|
|
78
|
+
return href.startsWith('#') || href.startsWith('javascript:');
|
|
79
|
+
}
|
|
80
|
+
function getAnchorHref($, el) {
|
|
81
|
+
const href = $(el).attr('href');
|
|
82
|
+
if (!href)
|
|
83
|
+
return null;
|
|
84
|
+
if (isSkippableHref(href))
|
|
85
|
+
return null;
|
|
86
|
+
return href;
|
|
87
|
+
}
|
|
88
|
+
function resolveAnchorLink($, el, baseUrl) {
|
|
89
|
+
const href = getAnchorHref($, el);
|
|
90
|
+
if (!href)
|
|
91
|
+
return null;
|
|
92
|
+
const url = tryResolveUrl(href, baseUrl);
|
|
93
|
+
if (!url)
|
|
94
|
+
return null;
|
|
95
|
+
return {
|
|
96
|
+
href: url,
|
|
97
|
+
text: $(el).text().trim() || url,
|
|
98
|
+
type: buildLinkType(url, baseUrl),
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
function resolveImageLink($, el, baseUrl) {
|
|
102
|
+
const src = $(el).attr('src');
|
|
103
|
+
if (!src || src.startsWith('data:'))
|
|
104
|
+
return null;
|
|
105
|
+
const url = tryResolveUrl(src, baseUrl);
|
|
106
|
+
if (!url)
|
|
107
|
+
return null;
|
|
108
|
+
return {
|
|
109
|
+
href: url,
|
|
110
|
+
text: $(el).attr('alt')?.trim() ?? url,
|
|
111
|
+
type: 'image',
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
function collectAnchorLinks($, baseUrl, options, seen, links) {
|
|
115
|
+
let filtered = 0;
|
|
116
|
+
$('a[href]').each((_, el) => {
|
|
117
|
+
const link = resolveAnchorLink($, el, baseUrl);
|
|
118
|
+
if (!link)
|
|
119
|
+
return;
|
|
120
|
+
const result = evaluateLink(link, options, seen);
|
|
121
|
+
if (result.filtered)
|
|
122
|
+
filtered += 1;
|
|
123
|
+
if (!result.accepted)
|
|
124
|
+
return;
|
|
125
|
+
seen.add(link.href);
|
|
126
|
+
links.push(link);
|
|
127
|
+
});
|
|
128
|
+
return filtered;
|
|
129
|
+
}
|
|
130
|
+
function collectImageLinks($, baseUrl, options, seen, links) {
|
|
131
|
+
if (!options.includeImages)
|
|
132
|
+
return 0;
|
|
133
|
+
let filtered = 0;
|
|
134
|
+
$('img[src]').each((_, el) => {
|
|
135
|
+
const link = resolveImageLink($, el, baseUrl);
|
|
136
|
+
if (!link)
|
|
137
|
+
return;
|
|
138
|
+
const result = evaluateLink(link, options, seen);
|
|
139
|
+
if (result.filtered)
|
|
140
|
+
filtered += 1;
|
|
141
|
+
if (!result.accepted)
|
|
142
|
+
return;
|
|
143
|
+
seen.add(link.href);
|
|
144
|
+
links.push(link);
|
|
145
|
+
});
|
|
146
|
+
return filtered;
|
|
147
|
+
}
|
|
148
|
+
export function extractLinks(html, baseUrl, options) {
|
|
149
|
+
const $ = cheerio.load(html);
|
|
150
|
+
const links = [];
|
|
151
|
+
const seen = new Set();
|
|
152
|
+
let filtered = collectAnchorLinks($, baseUrl, options, seen, links);
|
|
153
|
+
filtered += collectImageLinks($, baseUrl, options, seen, links);
|
|
154
|
+
const truncated = options.maxLinks ? links.length > options.maxLinks : false;
|
|
155
|
+
const resultLinks = truncated ? links.slice(0, options.maxLinks) : links;
|
|
156
|
+
return {
|
|
157
|
+
links: resultLinks,
|
|
158
|
+
linkCount: resultLinks.length,
|
|
159
|
+
filtered,
|
|
160
|
+
truncated,
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
//# sourceMappingURL=link-extractor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"link-extractor.js","sourceRoot":"","sources":["../../../../src/tools/handlers/fetch-links/link-extractor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC,OAAO,SAAS,MAAM,YAAY,CAAC;AAUnC,OAAO,EAAE,uBAAuB,EAAE,MAAM,sCAAsC,CAAC;AAC/E,OAAO,EAAE,aAAa,EAAE,MAAM,iCAAiC,CAAC;AAEhE,SAAS,aAAa,CAAC,IAAc,EAAE,OAA4B;IACjE,MAAM,OAAO,GAA8B;QACzC,QAAQ,EAAE,OAAO,CAAC,eAAe;QACjC,QAAQ,EAAE,OAAO,CAAC,eAAe;QACjC,KAAK,EAAE,OAAO,CAAC,aAAa;KAC7B,CAAC;IACF,OAAO,OAAO,CAAC,IAAI,CAAC,CAAC;AACvB,CAAC;AAED,SAAS,aAAa,CACpB,GAAW,EACX,aAAiC;IAEjC,IAAI,CAAC,aAAa;QAAE,OAAO,IAAI,CAAC;IAChC,OAAO,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACjC,CAAC;AAED,SAAS,YAAY,CACnB,IAAmB,EACnB,OAA4B,EAC5B,IAAiB;IAEjB,IAAI,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;QACxB,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;IAC9C,CAAC;IAED,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,EAAE,OAAO,CAAC,aAAa,CAAC,EAAE,CAAC;QACrD,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;IAC7C,CAAC;IAED,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,EAAE,OAAO,CAAC,EAAE,CAAC;QACvC,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;IAC7C,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;AAC7C,CAAC;AAED,MAAM,UAAU,oBAAoB,CAClC,OAA2B,EAC3B,GAAW;IAEX,IAAI,CAAC,OAAO;QAAE,OAAO,SAAS,CAAC;IAE/B,MAAM,WAAW,GAAG,qBAAqB,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;IACxD,IAAI,WAAW;QAAE,OAAO,WAAW,CAAC;IAEpC,MAAM,aAAa,GAAG,gBAAgB,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;IACrD,IAAI,kBAAkB,CAAC,aAAa,CAAC;QAAE,OAAO,aAAa,CAAC;IAE5D,MAAM,WAAW,GAAG,qBAAqB,CAAC,aAAa,EAAE,GAAG,CAAC,CAAC;IAC9D,IAAI,WAAW;QAAE,OAAO,WAAW,CAAC;IAEpC,OAAO,aAAa,CAAC;AACvB,CAAC;AAED,SAAS,qBAAqB,CAC5B,OAAe,EACf,GAAW;IAEX,IAAI,OAAO,CAAC,MAAM,IAAI,GAAG;QAAE,OAAO,IAAI,CAAC;IACvC,OAAO,uBAAuB,CAC5B,8CAA8C,EAC9C,GAAG,EACH,kBAAkB,CACnB,CAAC;AACJ,CAAC;AAED,SAAS,gBAAgB,CACvB,OAAe,EACf,GAAW;IAEX,IAAI,CAAC;QACH,OAAO,IAAI,MAAM,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;IAClC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,uBAAuB,CAC5B,2BAA2B,OAAO,EAAE,EACpC,GAAG,EACH,kBAAkB,CACnB,CAAC;IACJ,CAAC;AACH,CAAC;AAED,SAAS,qBAAqB,CAC5B,OAAe,EACf,GAAW;IAEX,IAAI,SAAS,CAAC,OAAO,CAAC;QAAE,OAAO,IAAI,CAAC;IACpC,OAAO,uBAAuB,CAC5B,gEAAgE,EAChE,GAAG,EACH,kBAAkB,CACnB,CAAC;AACJ,CAAC;AAED,SAAS,kBAAkB,CAAC,KAAc;IACxC,OAAO,CACL,KAAK,KAAK,IAAI;QACd,OAAO,KAAK,KAAK,QAAQ;QACzB,SAAS,IAAI,KAAK;QAClB,KAAK,CAAC,OAAO,CAAE,KAA0B,CAAC,OAAO,CAAC,CACnD,CAAC;AACJ,CAAC;AAED,SAAS,aAAa,CAAC,IAAY,EAAE,OAAe;IAClD,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,EAAE,CAAC;QACjC,OAAO,IAAI,CAAC;IACd,CAAC;IACD,OAAO,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;AACrC,CAAC;AAED,SAAS,aAAa,CAAC,GAAW,EAAE,OAAe;IACjD,OAAO,aAAa,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC;AAC/D,CAAC;AAED,SAAS,eAAe,CAAC,IAAY;IACnC,OAAO,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC;AAChE,CAAC;AAED,SAAS,aAAa,CAAC,CAAqB,EAAE,EAAW;IACvD,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAChC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IACvB,IAAI,eAAe,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IACvC,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,iBAAiB,CACxB,CAAqB,EACrB,EAAW,EACX,OAAe;IAEf,MAAM,IAAI,GAAG,aAAa,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAClC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IAEvB,MAAM,GAAG,GAAG,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IACzC,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IAEtB,OAAO;QACL,IAAI,EAAE,GAAG;QACT,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,IAAI,GAAG;QAChC,IAAI,EAAE,aAAa,CAAC,GAAG,EAAE,OAAO,CAAC;KAClC,CAAC;AACJ,CAAC;AAED,SAAS,gBAAgB,CACvB,CAAqB,EACrB,EAAW,EACX,OAAe;IAEf,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC9B,IAAI,CAAC,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC;QAAE,OAAO,IAAI,CAAC;IAEjD,MAAM,GAAG,GAAG,aAAa,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IACxC,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IAEtB,OAAO;QACL,IAAI,EAAE,GAAG;QACT,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,IAAI,GAAG;QACtC,IAAI,EAAE,OAAO;KACd,CAAC;AACJ,CAAC;AAED,SAAS,kBAAkB,CACzB,CAAqB,EACrB,OAAe,EACf,OAA4B,EAC5B,IAAiB,EACjB,KAAsB;IAEtB,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QAC1B,MAAM,IAAI,GAAG,iBAAiB,CAAC,CAAC,EAAE,EAAE,EAAE,OAAO,CAAC,CAAC;QAC/C,IAAI,CAAC,IAAI;YAAE,OAAO;QAElB,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;QACjD,IAAI,MAAM,CAAC,QAAQ;YAAE,QAAQ,IAAI,CAAC,CAAC;QACnC,IAAI,CAAC,MAAM,CAAC,QAAQ;YAAE,OAAO;QAE7B,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACpB,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACnB,CAAC,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,iBAAiB,CACxB,CAAqB,EACrB,OAAe,EACf,OAA4B,EAC5B,IAAiB,EACjB,KAAsB;IAEtB,IAAI,CAAC,OAAO,CAAC,aAAa;QAAE,OAAO,CAAC,CAAC;IAErC,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QAC3B,MAAM,IAAI,GAAG,gBAAgB,CAAC,CAAC,EAAE,EAAE,EAAE,OAAO,CAAC,CAAC;QAC9C,IAAI,CAAC,IAAI;YAAE,OAAO;QAElB,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;QACjD,IAAI,MAAM,CAAC,QAAQ;YAAE,QAAQ,IAAI,CAAC,CAAC;QACnC,IAAI,CAAC,MAAM,CAAC,QAAQ;YAAE,OAAO;QAE7B,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACpB,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACnB,CAAC,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,YAAY,CAC1B,IAAY,EACZ,OAAe,EACf,OAA4B;IAE5B,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,KAAK,GAAoB,EAAE,CAAC;IAClC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAE/B,IAAI,QAAQ,GAAG,kBAAkB,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC;IACpE,QAAQ,IAAI,iBAAiB,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC;IAEhE,MAAM,SAAS,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC;IAC7E,MAAM,WAAW,GAAG,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;IAEzE,OAAO;QACL,KAAK,EAAE,WAAW;QAClB,SAAS,EAAE,WAAW,CAAC,MAAM;QAC7B,QAAQ;QACR,SAAS;KACV,CAAC;AACJ,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-links.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-links.tool.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"fetch-links.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-links.tool.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAEV,eAAe,EAGf,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAgB/B,eAAO,MAAM,qBAAqB,gBAAgB,CAAC;AACnD,eAAO,MAAM,4BAA4B,uIAC6F,CAAC;AAiEvI,wBAAsB,qBAAqB,CACzC,KAAK,EAAE,eAAe,GACrB,OAAO,CAAC,gBAAgB,CAAC,CAU3B"}
|
|
@@ -1,138 +1,100 @@
|
|
|
1
|
-
import * as cheerio from 'cheerio';
|
|
2
1
|
import { logDebug, logError } from '../../services/logger.js';
|
|
3
2
|
import { createToolErrorResponse, handleToolError, } from '../../utils/tool-error-handler.js';
|
|
4
|
-
import {
|
|
3
|
+
import { appendHeaderVary } from '../utils/cache-vary.js';
|
|
5
4
|
import { executeFetchPipeline } from '../utils/fetch-pipeline.js';
|
|
5
|
+
import { extractLinks, resolveFilterPattern, } from './fetch-links/link-extractor.js';
|
|
6
6
|
export const FETCH_LINKS_TOOL_NAME = 'fetch-links';
|
|
7
7
|
export const FETCH_LINKS_TOOL_DESCRIPTION = 'Extracts all hyperlinks from a webpage with anchor text and type classification. Supports filtering, image links, and link limits.';
|
|
8
|
-
function
|
|
9
|
-
|
|
10
|
-
return new URL(href, baseUrl).href;
|
|
11
|
-
}
|
|
12
|
-
catch {
|
|
13
|
-
return null;
|
|
14
|
-
}
|
|
8
|
+
function isRecord(value) {
|
|
9
|
+
return value !== null && typeof value === 'object';
|
|
15
10
|
}
|
|
16
|
-
function
|
|
17
|
-
if (
|
|
11
|
+
function isToolResponseBase(value) {
|
|
12
|
+
if (!isRecord(value))
|
|
18
13
|
return false;
|
|
19
|
-
if (
|
|
14
|
+
if (!('content' in value))
|
|
20
15
|
return false;
|
|
21
|
-
|
|
22
|
-
return false;
|
|
23
|
-
if (type === 'external' && !options.includeExternal)
|
|
24
|
-
return false;
|
|
25
|
-
return true;
|
|
16
|
+
return Array.isArray(value.content);
|
|
26
17
|
}
|
|
27
|
-
function
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
$('a[href]').each((_, el) => {
|
|
33
|
-
const href = $(el).attr('href');
|
|
34
|
-
if (!href || href.startsWith('#') || href.startsWith('javascript:'))
|
|
35
|
-
return;
|
|
36
|
-
const url = tryResolveUrl(href, baseUrl);
|
|
37
|
-
if (!url)
|
|
38
|
-
return;
|
|
39
|
-
const type = isInternalUrl(url, baseUrl)
|
|
40
|
-
? 'internal'
|
|
41
|
-
: 'external';
|
|
42
|
-
if (!shouldIncludeLink(type, url, options, seen)) {
|
|
43
|
-
if (!seen.has(url))
|
|
44
|
-
filtered++;
|
|
45
|
-
return;
|
|
46
|
-
}
|
|
47
|
-
seen.add(url);
|
|
48
|
-
links.push({ href: url, text: $(el).text().trim() || url, type });
|
|
18
|
+
function logFetchLinksStart(url, options, filterPattern) {
|
|
19
|
+
logDebug('Extracting links', {
|
|
20
|
+
url,
|
|
21
|
+
...options,
|
|
22
|
+
filterPattern,
|
|
49
23
|
});
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
}
|
|
71
|
-
const truncated = options.maxLinks ? links.length > options.maxLinks : false;
|
|
72
|
-
const resultLinks = truncated ? links.slice(0, options.maxLinks) : links;
|
|
24
|
+
}
|
|
25
|
+
async function fetchLinksPipeline(url, input, options) {
|
|
26
|
+
return executeFetchPipeline({
|
|
27
|
+
url,
|
|
28
|
+
cacheNamespace: 'links',
|
|
29
|
+
customHeaders: input.customHeaders,
|
|
30
|
+
retries: input.retries,
|
|
31
|
+
timeout: input.timeout,
|
|
32
|
+
cacheVary: appendHeaderVary({
|
|
33
|
+
includeInternal: options.includeInternal,
|
|
34
|
+
includeExternal: options.includeExternal,
|
|
35
|
+
includeImages: options.includeImages,
|
|
36
|
+
maxLinks: options.maxLinks,
|
|
37
|
+
filterPattern: input.filterPattern ?? null,
|
|
38
|
+
}, input.customHeaders),
|
|
39
|
+
transform: (html, normalizedUrl) => extractLinks(html, normalizedUrl, options),
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
function buildLinksResponse(result) {
|
|
43
|
+
const structuredContent = buildLinksStructuredContent(result);
|
|
73
44
|
return {
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
45
|
+
content: [
|
|
46
|
+
{
|
|
47
|
+
type: 'text',
|
|
48
|
+
text: JSON.stringify(structuredContent, null, 2),
|
|
49
|
+
},
|
|
50
|
+
],
|
|
51
|
+
structuredContent,
|
|
78
52
|
};
|
|
79
53
|
}
|
|
80
54
|
export async function fetchLinksToolHandler(input) {
|
|
81
|
-
if (!input.url) {
|
|
82
|
-
return createToolErrorResponse('URL is required', '', 'VALIDATION_ERROR');
|
|
83
|
-
}
|
|
84
|
-
let filterPattern;
|
|
85
|
-
if (input.filterPattern) {
|
|
86
|
-
if (input.filterPattern.length > 200) {
|
|
87
|
-
return createToolErrorResponse('Filter pattern too long (max 200 characters)', input.url, 'VALIDATION_ERROR');
|
|
88
|
-
}
|
|
89
|
-
try {
|
|
90
|
-
filterPattern = new RegExp(input.filterPattern, 'i');
|
|
91
|
-
}
|
|
92
|
-
catch {
|
|
93
|
-
return createToolErrorResponse(`Invalid filter pattern: ${input.filterPattern}`, input.url, 'VALIDATION_ERROR');
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
55
|
try {
|
|
97
|
-
|
|
98
|
-
includeInternal: input.includeInternal ?? true,
|
|
99
|
-
includeExternal: input.includeExternal ?? true,
|
|
100
|
-
includeImages: input.includeImages ?? false,
|
|
101
|
-
maxLinks: input.maxLinks,
|
|
102
|
-
filterPattern,
|
|
103
|
-
};
|
|
104
|
-
logDebug('Extracting links', {
|
|
105
|
-
url: input.url,
|
|
106
|
-
...options,
|
|
107
|
-
filterPattern: input.filterPattern,
|
|
108
|
-
});
|
|
109
|
-
const result = await executeFetchPipeline({
|
|
110
|
-
url: input.url,
|
|
111
|
-
cacheNamespace: 'links',
|
|
112
|
-
customHeaders: input.customHeaders,
|
|
113
|
-
retries: input.retries,
|
|
114
|
-
transform: (html, url) => extractLinks(html, url, options),
|
|
115
|
-
});
|
|
116
|
-
const structuredContent = {
|
|
117
|
-
url: result.url,
|
|
118
|
-
linkCount: result.data.linkCount,
|
|
119
|
-
links: result.data.links,
|
|
120
|
-
...(result.data.filtered > 0 && { filtered: result.data.filtered }),
|
|
121
|
-
...(result.data.truncated && { truncated: result.data.truncated }),
|
|
122
|
-
};
|
|
123
|
-
return {
|
|
124
|
-
content: [
|
|
125
|
-
{
|
|
126
|
-
type: 'text',
|
|
127
|
-
text: JSON.stringify(structuredContent, null, 2),
|
|
128
|
-
},
|
|
129
|
-
],
|
|
130
|
-
structuredContent,
|
|
131
|
-
};
|
|
56
|
+
return await executeFetchLinks(input);
|
|
132
57
|
}
|
|
133
58
|
catch (error) {
|
|
134
59
|
logError('fetch-links tool error', error instanceof Error ? error : undefined);
|
|
135
60
|
return handleToolError(error, input.url, 'Failed to extract links');
|
|
136
61
|
}
|
|
137
62
|
}
|
|
63
|
+
async function executeFetchLinks(input) {
|
|
64
|
+
const { url } = input;
|
|
65
|
+
if (!url) {
|
|
66
|
+
return createToolErrorResponse('URL is required', '', 'VALIDATION_ERROR');
|
|
67
|
+
}
|
|
68
|
+
const filterPattern = resolveFilterPattern(input.filterPattern, url);
|
|
69
|
+
if (isToolResponseBase(filterPattern)) {
|
|
70
|
+
return filterPattern;
|
|
71
|
+
}
|
|
72
|
+
const options = buildExtractOptions(input, filterPattern);
|
|
73
|
+
logFetchLinksStart(url, options, input.filterPattern);
|
|
74
|
+
const result = await fetchLinksPipeline(url, input, options);
|
|
75
|
+
return buildLinksResponse(result);
|
|
76
|
+
}
|
|
77
|
+
function buildExtractOptions(input, filterPattern) {
|
|
78
|
+
return {
|
|
79
|
+
includeInternal: input.includeInternal ?? true,
|
|
80
|
+
includeExternal: input.includeExternal ?? true,
|
|
81
|
+
includeImages: input.includeImages ?? false,
|
|
82
|
+
maxLinks: input.maxLinks,
|
|
83
|
+
filterPattern,
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
function buildLinksStructuredContent(result) {
|
|
87
|
+
const structuredContent = {
|
|
88
|
+
url: result.url,
|
|
89
|
+
linkCount: result.data.linkCount,
|
|
90
|
+
links: result.data.links,
|
|
91
|
+
};
|
|
92
|
+
if (result.data.filtered > 0) {
|
|
93
|
+
structuredContent.filtered = result.data.filtered;
|
|
94
|
+
}
|
|
95
|
+
if (result.data.truncated) {
|
|
96
|
+
structuredContent.truncated = result.data.truncated;
|
|
97
|
+
}
|
|
98
|
+
return structuredContent;
|
|
99
|
+
}
|
|
138
100
|
//# sourceMappingURL=fetch-links.tool.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-links.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-links.tool.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"fetch-links.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-links.tool.ts"],"names":[],"mappings":"AAQA,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,0BAA0B,CAAC;AAE9D,OAAO,EACL,uBAAuB,EACvB,eAAe,GAChB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,OAAO,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAC;AAElE,OAAO,EACL,YAAY,EACZ,oBAAoB,GACrB,MAAM,iCAAiC,CAAC;AAEzC,MAAM,CAAC,MAAM,qBAAqB,GAAG,aAAa,CAAC;AACnD,MAAM,CAAC,MAAM,4BAA4B,GACvC,oIAAoI,CAAC;AAEvI,SAAS,QAAQ,CAAC,KAAc;IAC9B,OAAO,KAAK,KAAK,IAAI,IAAI,OAAO,KAAK,KAAK,QAAQ,CAAC;AACrD,CAAC;AAED,SAAS,kBAAkB,CAAC,KAAc;IACxC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACnC,IAAI,CAAC,CAAC,SAAS,IAAI,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACxC,OAAO,KAAK,CAAC,OAAO,CAAE,KAA0B,CAAC,OAAO,CAAC,CAAC;AAC5D,CAAC;AAED,SAAS,kBAAkB,CACzB,GAAW,EACX,OAA4B,EAC5B,aAAiC;IAEjC,QAAQ,CAAC,kBAAkB,EAAE;QAC3B,GAAG;QACH,GAAG,OAAO;QACV,aAAa;KACd,CAAC,CAAC;AACL,CAAC;AAED,KAAK,UAAU,kBAAkB,CAC/B,GAAW,EACX,KAAsB,EACtB,OAA4B;IAE5B,OAAO,oBAAoB,CAAuB;QAChD,GAAG;QACH,cAAc,EAAE,OAAO;QACvB,aAAa,EAAE,KAAK,CAAC,aAAa;QAClC,OAAO,EAAE,KAAK,CAAC,OAAO;QACtB,OAAO,EAAE,KAAK,CAAC,OAAO;QACtB,SAAS,EAAE,gBAAgB,CACzB;YACE,eAAe,EAAE,OAAO,CAAC,eAAe;YACxC,eAAe,EAAE,OAAO,CAAC,eAAe;YACxC,aAAa,EAAE,OAAO,CAAC,aAAa;YACpC,QAAQ,EAAE,OAAO,CAAC,QAAQ;YAC1B,aAAa,EAAE,KAAK,CAAC,aAAa,IAAI,IAAI;SAC3C,EACD,KAAK,CAAC,aAAa,CACpB;QACD,SAAS,EAAE,CAAC,IAAI,EAAE,aAAa,EAAE,EAAE,CACjC,YAAY,CAAC,IAAI,EAAE,aAAa,EAAE,OAAO,CAAC;KAC7C,CAAC,CAAC;AACL,CAAC;AAED,SAAS,kBAAkB,CACzB,MAA4C;IAE5C,MAAM,iBAAiB,GAAG,2BAA2B,CAAC,MAAM,CAAC,CAAC;IAC9D,OAAO;QACL,OAAO,EAAE;YACP;gBACE,IAAI,EAAE,MAAe;gBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,iBAAiB,EAAE,IAAI,EAAE,CAAC,CAAC;aACjD;SACF;QACD,iBAAiB;KAClB,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,KAAsB;IAEtB,IAAI,CAAC;QACH,OAAO,MAAM,iBAAiB,CAAC,KAAK,CAAC,CAAC;IACxC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,wBAAwB,EACxB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACF,OAAO,eAAe,CAAC,KAAK,EAAE,KAAK,CAAC,GAAG,EAAE,yBAAyB,CAAC,CAAC;IACtE,CAAC;AACH,CAAC;AAED,KAAK,UAAU,iBAAiB,CAC9B,KAAsB;IAEtB,MAAM,EAAE,GAAG,EAAE,GAAG,KAAK,CAAC;IACtB,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO,uBAAuB,CAAC,iBAAiB,EAAE,EAAE,EAAE,kBAAkB,CAAC,CAAC;IAC5E,CAAC;IACD,MAAM,aAAa,GAAG,oBAAoB,CAAC,KAAK,CAAC,aAAa,EAAE,GAAG,CAAC,CAAC;IACrE,IAAI,kBAAkB,CAAC,aAAa,CAAC,EAAE,CAAC;QACtC,OAAO,aAAa,CAAC;IACvB,CAAC;IAED,MAAM,OAAO,GAAG,mBAAmB,CAAC,KAAK,EAAE,aAAa,CAAC,CAAC;IAE1D,kBAAkB,CAAC,GAAG,EAAE,OAAO,EAAE,KAAK,CAAC,aAAa,CAAC,CAAC;IACtD,MAAM,MAAM,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,KAAK,EAAE,OAAO,CAAC,CAAC;IAC7D,OAAO,kBAAkB,CAAC,MAAM,CAAC,CAAC;AACpC,CAAC;AAED,SAAS,mBAAmB,CAC1B,KAAsB,EACtB,aAAiC;IAEjC,OAAO;QACL,eAAe,EAAE,KAAK,CAAC,eAAe,IAAI,IAAI;QAC9C,eAAe,EAAE,KAAK,CAAC,eAAe,IAAI,IAAI;QAC9C,aAAa,EAAE,KAAK,CAAC,aAAa,IAAI,KAAK;QAC3C,QAAQ,EAAE,KAAK,CAAC,QAAQ;QACxB,aAAa;KACd,CAAC;AACJ,CAAC;AAED,SAAS,2BAA2B,CAClC,MAA4C;IAE5C,MAAM,iBAAiB,GAA4B;QACjD,GAAG,EAAE,MAAM,CAAC,GAAG;QACf,SAAS,EAAE,MAAM,CAAC,IAAI,CAAC,SAAS;QAChC,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK;KACzB,CAAC;IAEF,IAAI,MAAM,CAAC,IAAI,CAAC,QAAQ,GAAG,CAAC,EAAE,CAAC;QAC7B,iBAAiB,CAAC,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC;IACpD,CAAC;IAED,IAAI,MAAM,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;QAC1B,iBAAiB,CAAC,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC;IACtD,CAAC;IAED,OAAO,iBAAiB,CAAC;AAC3B,CAAC"}
|
|
@@ -1,15 +1,5 @@
|
|
|
1
|
-
import type { FetchMarkdownInput } from '../../config/types.js';
|
|
1
|
+
import type { FetchMarkdownInput, ToolResponseBase } from '../../config/types.js';
|
|
2
2
|
export declare const FETCH_MARKDOWN_TOOL_NAME = "fetch-markdown";
|
|
3
|
-
export declare const FETCH_MARKDOWN_TOOL_DESCRIPTION = "Fetches a webpage and converts it to clean Markdown format with optional frontmatter
|
|
4
|
-
|
|
5
|
-
[x: string]: unknown;
|
|
6
|
-
content: {
|
|
7
|
-
type: 'text';
|
|
8
|
-
text: string;
|
|
9
|
-
}[];
|
|
10
|
-
structuredContent?: Record<string, unknown>;
|
|
11
|
-
isError?: boolean;
|
|
12
|
-
}
|
|
13
|
-
export declare function fetchMarkdownToolHandler(input: FetchMarkdownInput): Promise<FetchMarkdownToolResponse>;
|
|
14
|
-
export {};
|
|
3
|
+
export declare const FETCH_MARKDOWN_TOOL_DESCRIPTION = "Fetches a webpage and converts it to clean Markdown format with optional frontmatter and content length limits";
|
|
4
|
+
export declare function fetchMarkdownToolHandler(input: FetchMarkdownInput): Promise<ToolResponseBase>;
|
|
15
5
|
//# sourceMappingURL=fetch-markdown.tool.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-markdown.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-markdown.tool.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,kBAAkB,
|
|
1
|
+
{"version":3,"file":"fetch-markdown.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-markdown.tool.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,kBAAkB,EAGlB,gBAAgB,EAEjB,MAAM,uBAAuB,CAAC;AAoB/B,eAAO,MAAM,wBAAwB,mBAAmB,CAAC;AACzD,eAAO,MAAM,+BAA+B,mHACsE,CAAC;AAyGnH,wBAAsB,wBAAwB,CAC5C,KAAK,EAAE,kBAAkB,GACxB,OAAO,CAAC,gBAAgB,CAAC,CAU3B"}
|
|
@@ -1,100 +1,91 @@
|
|
|
1
|
-
import { extractContent } from '../../services/extractor.js';
|
|
2
1
|
import { logDebug, logError } from '../../services/logger.js';
|
|
3
|
-
import { stripMarkdownLinks } from '../../utils/content-cleaner.js';
|
|
4
2
|
import { createToolErrorResponse, handleToolError, } from '../../utils/tool-error-handler.js';
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
7
|
-
import { htmlToMarkdown } from '../../transformers/markdown.transformer.js';
|
|
3
|
+
import { transformHtmlToMarkdown } from '../utils/content-transform.js';
|
|
4
|
+
import { buildToolContentBlocks, performSharedFetch, } from './fetch-single.shared.js';
|
|
8
5
|
export const FETCH_MARKDOWN_TOOL_NAME = 'fetch-markdown';
|
|
9
|
-
export const FETCH_MARKDOWN_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to clean Markdown format with optional frontmatter
|
|
10
|
-
function
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
.
|
|
14
|
-
.
|
|
15
|
-
|
|
16
|
-
.replace(/--+/g, '-')
|
|
17
|
-
.trim();
|
|
6
|
+
export const FETCH_MARKDOWN_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to clean Markdown format with optional frontmatter and content length limits';
|
|
7
|
+
function resolveMarkdownOptions(input) {
|
|
8
|
+
return {
|
|
9
|
+
extractMainContent: input.extractMainContent ?? true,
|
|
10
|
+
includeMetadata: input.includeMetadata ?? true,
|
|
11
|
+
maxContentLength: input.maxContentLength,
|
|
12
|
+
};
|
|
18
13
|
}
|
|
19
|
-
function
|
|
20
|
-
const
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
slug: slugify(rawText),
|
|
33
|
-
});
|
|
14
|
+
function buildMarkdownStructuredContent(pipeline, inlineResult) {
|
|
15
|
+
const structuredContent = {
|
|
16
|
+
url: pipeline.url,
|
|
17
|
+
title: pipeline.data.title,
|
|
18
|
+
fetchedAt: pipeline.fetchedAt,
|
|
19
|
+
contentSize: inlineResult.contentSize,
|
|
20
|
+
cached: pipeline.fromCache,
|
|
21
|
+
};
|
|
22
|
+
if (pipeline.data.truncated || inlineResult.truncated) {
|
|
23
|
+
structuredContent.truncated = true;
|
|
24
|
+
}
|
|
25
|
+
if (typeof inlineResult.content === 'string') {
|
|
26
|
+
structuredContent.markdown = inlineResult.content;
|
|
34
27
|
}
|
|
35
|
-
|
|
28
|
+
if (inlineResult.resourceUri) {
|
|
29
|
+
structuredContent.resourceUri = inlineResult.resourceUri;
|
|
30
|
+
structuredContent.resourceMimeType = inlineResult.resourceMimeType;
|
|
31
|
+
}
|
|
32
|
+
return structuredContent;
|
|
33
|
+
}
|
|
34
|
+
function getInlineErrorResponse(inlineResult, url) {
|
|
35
|
+
if (!inlineResult.error)
|
|
36
|
+
return null;
|
|
37
|
+
return createToolErrorResponse(inlineResult.error, url, 'INTERNAL_ERROR');
|
|
38
|
+
}
|
|
39
|
+
function logFetchMarkdownStart(url, options) {
|
|
40
|
+
logDebug('Fetching markdown', { url, ...options });
|
|
36
41
|
}
|
|
37
|
-
function
|
|
38
|
-
|
|
39
|
-
|
|
42
|
+
function buildMarkdownTransform(options) {
|
|
43
|
+
return (html, url) => {
|
|
44
|
+
const markdownResult = transformHtmlToMarkdown(html, url, options);
|
|
45
|
+
return { ...markdownResult, content: markdownResult.markdown };
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
async function fetchMarkdownPipeline(url, input, options, transformOptions) {
|
|
49
|
+
return performSharedFetch({
|
|
50
|
+
url,
|
|
51
|
+
format: 'markdown',
|
|
52
|
+
extractMainContent: options.extractMainContent,
|
|
53
|
+
includeMetadata: options.includeMetadata,
|
|
54
|
+
maxContentLength: options.maxContentLength,
|
|
55
|
+
customHeaders: input.customHeaders,
|
|
56
|
+
retries: input.retries,
|
|
57
|
+
timeout: input.timeout,
|
|
58
|
+
transform: buildMarkdownTransform(transformOptions),
|
|
40
59
|
});
|
|
41
|
-
const shouldExtractFromArticle = determineContentExtractionSource(options.extractMainContent, article);
|
|
42
|
-
const metadata = createContentMetadataBlock(url, article, extractedMeta, shouldExtractFromArticle, options.includeMetadata);
|
|
43
|
-
const sourceHtml = shouldExtractFromArticle ? article.content : html;
|
|
44
|
-
const title = shouldExtractFromArticle ? article.title : extractedMeta.title;
|
|
45
|
-
let markdown = htmlToMarkdown(sourceHtml, metadata);
|
|
46
|
-
const toc = options.generateToc ? extractToc(markdown) : undefined;
|
|
47
|
-
let truncated = false;
|
|
48
|
-
if (options.maxContentLength && markdown.length > options.maxContentLength) {
|
|
49
|
-
markdown = `${markdown.substring(0, options.maxContentLength)}\n\n...[truncated]`;
|
|
50
|
-
truncated = true;
|
|
51
|
-
}
|
|
52
|
-
return { markdown, title, toc, truncated };
|
|
53
60
|
}
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
extractMainContent: input.extractMainContent ?? true,
|
|
60
|
-
includeMetadata: input.includeMetadata ?? true,
|
|
61
|
-
generateToc: input.generateToc ?? false,
|
|
62
|
-
maxContentLength: input.maxContentLength,
|
|
61
|
+
function buildMarkdownResponse(pipeline, inlineResult) {
|
|
62
|
+
const structuredContent = buildMarkdownStructuredContent(pipeline, inlineResult);
|
|
63
|
+
return {
|
|
64
|
+
content: buildToolContentBlocks(structuredContent, pipeline.fromCache, inlineResult, 'Fetched markdown'),
|
|
65
|
+
structuredContent,
|
|
63
66
|
};
|
|
64
|
-
|
|
67
|
+
}
|
|
68
|
+
export async function fetchMarkdownToolHandler(input) {
|
|
65
69
|
try {
|
|
66
|
-
|
|
67
|
-
url: input.url,
|
|
68
|
-
cacheNamespace: 'markdown',
|
|
69
|
-
customHeaders: input.customHeaders,
|
|
70
|
-
retries: input.retries,
|
|
71
|
-
transform: (html, url) => transformToMarkdown(html, url, options),
|
|
72
|
-
serialize: (data) => data.markdown,
|
|
73
|
-
deserialize: (cached) => ({
|
|
74
|
-
markdown: cached,
|
|
75
|
-
title: undefined,
|
|
76
|
-
toc: undefined,
|
|
77
|
-
truncated: false,
|
|
78
|
-
}),
|
|
79
|
-
});
|
|
80
|
-
const structuredContent = {
|
|
81
|
-
url: result.url,
|
|
82
|
-
title: result.data.title,
|
|
83
|
-
fetchedAt: result.fetchedAt,
|
|
84
|
-
markdown: result.data.markdown,
|
|
85
|
-
...(result.data.toc && { toc: result.data.toc }),
|
|
86
|
-
cached: result.fromCache,
|
|
87
|
-
...(result.data.truncated && { truncated: result.data.truncated }),
|
|
88
|
-
};
|
|
89
|
-
const jsonOutput = JSON.stringify(structuredContent, result.fromCache ? undefined : null, result.fromCache ? undefined : 2);
|
|
90
|
-
return {
|
|
91
|
-
content: [{ type: 'text', text: jsonOutput }],
|
|
92
|
-
structuredContent,
|
|
93
|
-
};
|
|
70
|
+
return await executeFetchMarkdown(input);
|
|
94
71
|
}
|
|
95
72
|
catch (error) {
|
|
96
73
|
logError('fetch-markdown tool error', error instanceof Error ? error : undefined);
|
|
97
74
|
return handleToolError(error, input.url, 'Failed to fetch markdown');
|
|
98
75
|
}
|
|
99
76
|
}
|
|
77
|
+
async function executeFetchMarkdown(input) {
|
|
78
|
+
const { url } = input;
|
|
79
|
+
if (!url) {
|
|
80
|
+
return createToolErrorResponse('URL is required', '', 'VALIDATION_ERROR');
|
|
81
|
+
}
|
|
82
|
+
const options = resolveMarkdownOptions(input);
|
|
83
|
+
const transformOptions = { ...options };
|
|
84
|
+
logFetchMarkdownStart(url, transformOptions);
|
|
85
|
+
const { pipeline, inlineResult } = await fetchMarkdownPipeline(url, input, options, transformOptions);
|
|
86
|
+
const inlineError = getInlineErrorResponse(inlineResult, url);
|
|
87
|
+
if (inlineError)
|
|
88
|
+
return inlineError;
|
|
89
|
+
return buildMarkdownResponse(pipeline, inlineResult);
|
|
90
|
+
}
|
|
100
91
|
//# sourceMappingURL=fetch-markdown.tool.js.map
|