@j0hanz/superfetch 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +590 -327
- package/dist/config/index.d.ts.map +1 -1
- package/dist/config/index.js +6 -10
- package/dist/config/index.js.map +1 -1
- package/dist/config/types.d.ts +251 -0
- package/dist/config/types.d.ts.map +1 -0
- package/dist/config/types.js +2 -0
- package/dist/config/types.js.map +1 -0
- package/dist/errors/app-error.d.ts +2 -20
- package/dist/errors/app-error.d.ts.map +1 -1
- package/dist/errors/app-error.js +0 -18
- package/dist/errors/app-error.js.map +1 -1
- package/dist/index.js +13 -47
- package/dist/index.js.map +1 -1
- package/dist/middleware/error-handler.d.ts +1 -5
- package/dist/middleware/error-handler.d.ts.map +1 -1
- package/dist/middleware/error-handler.js +1 -11
- package/dist/middleware/error-handler.js.map +1 -1
- package/dist/middleware/rate-limiter.d.ts +2 -20
- package/dist/middleware/rate-limiter.d.ts.map +1 -1
- package/dist/middleware/rate-limiter.js +11 -44
- package/dist/middleware/rate-limiter.js.map +1 -1
- package/dist/prompts/index.d.ts +0 -3
- package/dist/prompts/index.d.ts.map +1 -1
- package/dist/prompts/index.js +0 -3
- package/dist/prompts/index.js.map +1 -1
- package/dist/resources/index.d.ts +0 -3
- package/dist/resources/index.d.ts.map +1 -1
- package/dist/resources/index.js +1 -4
- package/dist/resources/index.js.map +1 -1
- package/dist/server.d.ts +0 -4
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +2 -6
- package/dist/server.js.map +1 -1
- package/dist/services/cache.d.ts +9 -6
- package/dist/services/cache.d.ts.map +1 -1
- package/dist/services/cache.js +71 -20
- package/dist/services/cache.js.map +1 -1
- package/dist/services/card-extractor.d.ts +10 -0
- package/dist/services/card-extractor.d.ts.map +1 -0
- package/dist/services/card-extractor.js +187 -0
- package/dist/services/card-extractor.js.map +1 -0
- package/dist/services/extractor.d.ts +6 -19
- package/dist/services/extractor.d.ts.map +1 -1
- package/dist/services/extractor.js +53 -46
- package/dist/services/extractor.js.map +1 -1
- package/dist/services/fetcher.d.ts +4 -11
- package/dist/services/fetcher.d.ts.map +1 -1
- package/dist/services/fetcher.js +30 -36
- package/dist/services/fetcher.js.map +1 -1
- package/dist/services/logger.d.ts.map +1 -1
- package/dist/services/logger.js +4 -6
- package/dist/services/logger.js.map +1 -1
- package/dist/services/parser.d.ts +1 -6
- package/dist/services/parser.d.ts.map +1 -1
- package/dist/services/parser.js +64 -47
- package/dist/services/parser.js.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.d.ts +5 -12
- package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.js +104 -79
- package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +7 -4
- package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.js +84 -84
- package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.d.ts +8 -6
- package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.js +51 -93
- package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-urls.tool.d.ts +5 -0
- package/dist/tools/handlers/fetch-urls.tool.d.ts.map +1 -0
- package/dist/tools/handlers/fetch-urls.tool.js +147 -0
- package/dist/tools/handlers/fetch-urls.tool.js.map +1 -0
- package/dist/tools/index.d.ts +0 -4
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +145 -15
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/utils/common.d.ts +8 -0
- package/dist/tools/utils/common.d.ts.map +1 -0
- package/dist/tools/utils/common.js +35 -0
- package/dist/tools/utils/common.js.map +1 -0
- package/dist/tools/utils/fetch-pipeline.d.ts +3 -0
- package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -0
- package/dist/tools/utils/fetch-pipeline.js +37 -0
- package/dist/tools/utils/fetch-pipeline.js.map +1 -0
- package/dist/tools/utils/index.d.ts +4 -0
- package/dist/tools/utils/index.d.ts.map +1 -0
- package/dist/tools/utils/index.js +3 -0
- package/dist/tools/utils/index.js.map +1 -0
- package/dist/tools/utils/response-builder.d.ts +3 -0
- package/dist/tools/utils/response-builder.d.ts.map +1 -0
- package/dist/tools/utils/response-builder.js +24 -0
- package/dist/tools/utils/response-builder.js.map +1 -0
- package/dist/transformers/jsonl.transformer.d.ts +1 -1
- package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
- package/dist/transformers/jsonl.transformer.js +2 -1
- package/dist/transformers/jsonl.transformer.js.map +1 -1
- package/dist/transformers/markdown.transformer.d.ts +1 -1
- package/dist/transformers/markdown.transformer.d.ts.map +1 -1
- package/dist/transformers/markdown.transformer.js +116 -2
- package/dist/transformers/markdown.transformer.js.map +1 -1
- package/dist/types/content.types.d.ts +11 -11
- package/dist/types/content.types.d.ts.map +1 -1
- package/dist/types/index.d.ts +1 -2
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js +1 -2
- package/dist/types/index.js.map +1 -1
- package/dist/types/schemas.d.ts +39 -12
- package/dist/types/schemas.d.ts.map +1 -1
- package/dist/utils/concurrency.d.ts +2 -0
- package/dist/utils/concurrency.d.ts.map +1 -0
- package/dist/utils/concurrency.js +25 -0
- package/dist/utils/concurrency.js.map +1 -0
- package/dist/utils/content-cleaner.d.ts +32 -0
- package/dist/utils/content-cleaner.d.ts.map +1 -0
- package/dist/utils/content-cleaner.js +240 -0
- package/dist/utils/content-cleaner.js.map +1 -0
- package/dist/utils/language-detector.d.ts +5 -0
- package/dist/utils/language-detector.d.ts.map +1 -0
- package/dist/utils/language-detector.js +50 -0
- package/dist/utils/language-detector.js.map +1 -0
- package/dist/utils/sanitizer.d.ts +0 -10
- package/dist/utils/sanitizer.d.ts.map +1 -1
- package/dist/utils/sanitizer.js +3 -11
- package/dist/utils/sanitizer.js.map +1 -1
- package/dist/utils/tool-error-handler.d.ts +1 -15
- package/dist/utils/tool-error-handler.d.ts.map +1 -1
- package/dist/utils/tool-error-handler.js +1 -1
- package/dist/utils/tool-error-handler.js.map +1 -1
- package/dist/utils/url-validator.d.ts +0 -8
- package/dist/utils/url-validator.d.ts.map +1 -1
- package/dist/utils/url-validator.js +17 -31
- package/dist/utils/url-validator.js.map +1 -1
- package/package.json +4 -3
|
@@ -1,8 +1,3 @@
|
|
|
1
|
-
import type { ContentBlockUnion } from '../types
|
|
2
|
-
/**
|
|
3
|
-
* Parses HTML content and extracts semantic blocks
|
|
4
|
-
* @param html - HTML string to parse
|
|
5
|
-
* @returns Array of content blocks (empty array if parsing fails)
|
|
6
|
-
*/
|
|
1
|
+
import type { ContentBlockUnion } from '../config/types.js';
|
|
7
2
|
export declare function parseHtml(html: string): ContentBlockUnion[];
|
|
8
3
|
//# sourceMappingURL=parser.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../../src/services/parser.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../../src/services/parser.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAGV,iBAAiB,EAOlB,MAAM,oBAAoB,CAAC;AAsM5B,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,iBAAiB,EAAE,CA0C3D"}
|
package/dist/services/parser.js
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
import * as cheerio from 'cheerio';
|
|
2
|
-
import { sanitizeText } from '../utils/sanitizer.js';
|
|
3
2
|
import { config } from '../config/index.js';
|
|
3
|
+
import { cleanCodeBlock, cleanHeading, cleanListItems, cleanParagraph, removeInlineTimestamps, } from '../utils/content-cleaner.js';
|
|
4
|
+
import { detectLanguage } from '../utils/language-detector.js';
|
|
5
|
+
import { sanitizeText } from '../utils/sanitizer.js';
|
|
4
6
|
import { logWarn } from './logger.js';
|
|
5
|
-
// Maximum HTML size to parse (10MB)
|
|
6
7
|
const MAX_HTML_SIZE = 10 * 1024 * 1024;
|
|
7
8
|
function parseHeading($, element) {
|
|
8
|
-
const
|
|
9
|
+
const rawText = sanitizeText($(element).text());
|
|
10
|
+
const text = cleanHeading(rawText);
|
|
9
11
|
if (!text)
|
|
10
12
|
return null;
|
|
11
13
|
return {
|
|
@@ -15,20 +17,25 @@ function parseHeading($, element) {
|
|
|
15
17
|
};
|
|
16
18
|
}
|
|
17
19
|
function parseParagraph($, element) {
|
|
18
|
-
|
|
20
|
+
let rawText = sanitizeText($(element).text());
|
|
21
|
+
// Remove inline timestamps like "13 days ago" from paragraphs
|
|
22
|
+
rawText = removeInlineTimestamps(rawText);
|
|
23
|
+
const text = cleanParagraph(rawText);
|
|
19
24
|
if (!text || text.length < config.extraction.minParagraphLength)
|
|
20
25
|
return null;
|
|
21
26
|
return { type: 'paragraph', text };
|
|
22
27
|
}
|
|
23
28
|
function parseList($, element) {
|
|
24
|
-
const
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
29
|
+
const listItems = $(element).find('li').toArray();
|
|
30
|
+
const rawItems = [];
|
|
31
|
+
// Use for...of instead of .each() to avoid callback overhead
|
|
32
|
+
for (const li of listItems) {
|
|
28
33
|
const text = sanitizeText($(li).text());
|
|
29
34
|
if (text)
|
|
30
|
-
|
|
31
|
-
}
|
|
35
|
+
rawItems.push(text);
|
|
36
|
+
}
|
|
37
|
+
// Clean list items to remove noise
|
|
38
|
+
const items = cleanListItems(rawItems);
|
|
32
39
|
if (items.length === 0)
|
|
33
40
|
return null;
|
|
34
41
|
return {
|
|
@@ -38,14 +45,23 @@ function parseList($, element) {
|
|
|
38
45
|
};
|
|
39
46
|
}
|
|
40
47
|
function parseCode($, element) {
|
|
41
|
-
const
|
|
48
|
+
const rawText = $(element).text().trim();
|
|
49
|
+
const text = cleanCodeBlock(rawText);
|
|
42
50
|
if (!text)
|
|
43
51
|
return null;
|
|
44
|
-
|
|
45
|
-
const
|
|
52
|
+
// Try to get language from class attribute first
|
|
53
|
+
const className = $(element).attr('class') ?? '';
|
|
54
|
+
const dataLang = $(element).attr('data-language') ?? '';
|
|
55
|
+
// Check multiple possible class patterns for language
|
|
56
|
+
const languageMatch = /language-(\w+)/.exec(className) ??
|
|
57
|
+
/lang-(\w+)/.exec(className) ??
|
|
58
|
+
/highlight-(\w+)/.exec(className) ??
|
|
59
|
+
/^(\w+)$/.exec(dataLang);
|
|
60
|
+
// Use detected language from class, or try to detect from content
|
|
61
|
+
const language = languageMatch?.[1] ?? detectLanguage(text);
|
|
46
62
|
return {
|
|
47
63
|
type: 'code',
|
|
48
|
-
language
|
|
64
|
+
language,
|
|
49
65
|
text,
|
|
50
66
|
};
|
|
51
67
|
}
|
|
@@ -53,31 +69,28 @@ function parseTable($, element) {
|
|
|
53
69
|
const headers = [];
|
|
54
70
|
const rows = [];
|
|
55
71
|
const $table = $(element);
|
|
56
|
-
//
|
|
57
|
-
$table.find('thead th, thead td').
|
|
72
|
+
// Use toArray() + for...of instead of .each() callbacks
|
|
73
|
+
const headerCells = $table.find('thead th, thead td').toArray();
|
|
74
|
+
for (const cell of headerCells) {
|
|
58
75
|
headers.push(sanitizeText($(cell).text()));
|
|
59
|
-
}
|
|
76
|
+
}
|
|
60
77
|
if (headers.length === 0) {
|
|
61
|
-
$table
|
|
62
|
-
|
|
63
|
-
.first()
|
|
64
|
-
.find('th, td')
|
|
65
|
-
.each((_, cell) => {
|
|
78
|
+
const firstRowCells = $table.find('tr').first().find('th, td').toArray();
|
|
79
|
+
for (const cell of firstRowCells) {
|
|
66
80
|
headers.push(sanitizeText($(cell).text()));
|
|
67
|
-
}
|
|
81
|
+
}
|
|
68
82
|
}
|
|
69
|
-
// Extract body rows
|
|
70
83
|
const rowsSelector = headers.length > 0 ? 'tbody tr, tr:not(:first)' : 'tbody tr, tr';
|
|
71
|
-
$table.find(rowsSelector).
|
|
84
|
+
const tableRows = $table.find(rowsSelector).toArray();
|
|
85
|
+
for (const row of tableRows) {
|
|
86
|
+
const rowCells = $(row).find('td, th').toArray();
|
|
72
87
|
const cells = [];
|
|
73
|
-
|
|
74
|
-
.find('td, th')
|
|
75
|
-
.each((_, cell) => {
|
|
88
|
+
for (const cell of rowCells) {
|
|
76
89
|
cells.push(sanitizeText($(cell).text()));
|
|
77
|
-
}
|
|
90
|
+
}
|
|
78
91
|
if (cells.length > 0)
|
|
79
92
|
rows.push(cells);
|
|
80
|
-
}
|
|
93
|
+
}
|
|
81
94
|
if (rows.length === 0)
|
|
82
95
|
return null;
|
|
83
96
|
return {
|
|
@@ -93,9 +106,16 @@ function parseImage($, element) {
|
|
|
93
106
|
return {
|
|
94
107
|
type: 'image',
|
|
95
108
|
src,
|
|
96
|
-
alt: $(element).attr('alt')
|
|
109
|
+
alt: $(element).attr('alt') ?? undefined,
|
|
97
110
|
};
|
|
98
111
|
}
|
|
112
|
+
function parseBlockquote($, element) {
|
|
113
|
+
const rawText = sanitizeText($(element).text());
|
|
114
|
+
const text = cleanParagraph(rawText);
|
|
115
|
+
if (!text || text.length < config.extraction.minParagraphLength)
|
|
116
|
+
return null;
|
|
117
|
+
return { type: 'blockquote', text };
|
|
118
|
+
}
|
|
99
119
|
const ELEMENT_PARSERS = {
|
|
100
120
|
h1: parseHeading,
|
|
101
121
|
h2: parseHeading,
|
|
@@ -110,6 +130,7 @@ const ELEMENT_PARSERS = {
|
|
|
110
130
|
code: parseCode,
|
|
111
131
|
table: parseTable,
|
|
112
132
|
img: parseImage,
|
|
133
|
+
blockquote: parseBlockquote,
|
|
113
134
|
};
|
|
114
135
|
function isParseableTag(tag) {
|
|
115
136
|
return tag in ELEMENT_PARSERS;
|
|
@@ -128,6 +149,7 @@ function filterBlocks(blocks) {
|
|
|
128
149
|
case 'paragraph':
|
|
129
150
|
case 'heading':
|
|
130
151
|
case 'code':
|
|
152
|
+
case 'blockquote':
|
|
131
153
|
return block.text.length > 0;
|
|
132
154
|
case 'list':
|
|
133
155
|
return block.items.length > 0;
|
|
@@ -136,40 +158,35 @@ function filterBlocks(blocks) {
|
|
|
136
158
|
}
|
|
137
159
|
});
|
|
138
160
|
}
|
|
139
|
-
/**
|
|
140
|
-
* Parses HTML content and extracts semantic blocks
|
|
141
|
-
* @param html - HTML string to parse
|
|
142
|
-
* @returns Array of content blocks (empty array if parsing fails)
|
|
143
|
-
*/
|
|
144
161
|
export function parseHtml(html) {
|
|
145
|
-
|
|
146
|
-
if (!html || typeof html !== 'string') {
|
|
162
|
+
if (!html || typeof html !== 'string')
|
|
147
163
|
return [];
|
|
148
|
-
|
|
149
|
-
// Size validation to prevent memory issues
|
|
164
|
+
let processedHtml = html;
|
|
150
165
|
if (html.length > MAX_HTML_SIZE) {
|
|
151
166
|
logWarn('HTML content exceeds maximum size, truncating', {
|
|
152
167
|
size: html.length,
|
|
153
168
|
maxSize: MAX_HTML_SIZE,
|
|
154
169
|
});
|
|
155
|
-
|
|
170
|
+
processedHtml = html.substring(0, MAX_HTML_SIZE);
|
|
156
171
|
}
|
|
157
172
|
try {
|
|
158
|
-
const $ = cheerio.load(
|
|
173
|
+
const $ = cheerio.load(processedHtml);
|
|
159
174
|
const blocks = [];
|
|
160
175
|
$('script, style, noscript, iframe, svg').remove();
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
.
|
|
176
|
+
// Use toArray() + for...of instead of .each() to avoid callback overhead
|
|
177
|
+
const elements = $('body')
|
|
178
|
+
.find('h1, h2, h3, h4, h5, h6, p, ul, ol, pre, code:not(pre code), table, img, blockquote')
|
|
179
|
+
.toArray();
|
|
180
|
+
for (const element of elements) {
|
|
164
181
|
try {
|
|
165
182
|
const block = parseElement($, element);
|
|
166
183
|
if (block)
|
|
167
184
|
blocks.push(block);
|
|
168
185
|
}
|
|
169
186
|
catch {
|
|
170
|
-
// Skip
|
|
187
|
+
// Skip element errors
|
|
171
188
|
}
|
|
172
|
-
}
|
|
189
|
+
}
|
|
173
190
|
return filterBlocks(blocks);
|
|
174
191
|
}
|
|
175
192
|
catch (error) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"parser.js","sourceRoot":"","sources":["../../src/services/parser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;
|
|
1
|
+
{"version":3,"file":"parser.js","sourceRoot":"","sources":["../../src/services/parser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAInC,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAa5C,OAAO,EACL,cAAc,EACd,YAAY,EACZ,cAAc,EACd,cAAc,EACd,sBAAsB,GACvB,MAAM,6BAA6B,CAAC;AACrC,OAAO,EAAE,cAAc,EAAE,MAAM,+BAA+B,CAAC;AAC/D,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAErD,OAAO,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAEtC,MAAM,aAAa,GAAG,EAAE,GAAG,IAAI,GAAG,IAAI,CAAC;AAEvC,SAAS,YAAY,CAAC,CAAa,EAAE,OAAgB;IACnD,MAAM,OAAO,GAAG,YAAY,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAChD,MAAM,IAAI,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;IACnC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IAEvB,OAAO;QACL,IAAI,EAAE,SAAS;QACf,KAAK,EAAE,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;QACjD,IAAI;KACL,CAAC;AACJ,CAAC;AAED,SAAS,cAAc,CACrB,CAAa,EACb,OAAgB;IAEhB,IAAI,OAAO,GAAG,YAAY,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAC9C,8DAA8D;IAC9D,OAAO,GAAG,sBAAsB,CAAC,OAAO,CAAC,CAAC;IAC1C,MAAM,IAAI,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC;IACrC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,UAAU,CAAC,kBAAkB;QAAE,OAAO,IAAI,CAAC;IAE7E,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;AACrC,CAAC;AAED,SAAS,SAAS,CAAC,CAAa,EAAE,OAAgB;IAChD,MAAM,SAAS,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,CAAC;IAClD,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,6DAA6D;IAC7D,KAAK,MAAM,EAAE,IAAI,SAAS,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QACxC,IAAI,IAAI;YAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAChC,CAAC;IAED,mCAAmC;IACnC,MAAM,KAAK,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;IACvC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEpC,OAAO;QACL,IAAI,EAAE,MAAM;QACZ,OAAO,EAAE,OAAO,CAAC,OAAO,CAAC,WAAW,EAAE,KAAK,IAAI;QAC/C,KAAK;KACN,CAAC;AACJ,CAAC;AAED,SAAS,SAAS,CAAC,CAAa,EAAE,OAAgB;IAChD,MAAM,OAAO,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IACzC,MAAM,IAAI,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC;IACrC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IAEvB,iDAAiD;IACjD,MAAM,SAAS,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;IACjD,MAAM,QAAQ,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;IAExD,sDAAsD;IACtD,MAAM,aAAa,GACjB,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC;QAChC,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC;QAC5B,iBAAiB,CAAC,IAAI,CAAC,SAAS,CAAC;QACjC,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAE3B,kEAAkE;IAClE,MAAM,QAAQ,GAAG,aAAa,EAAE,CAAC,CAAC,CAAC,IAAI,cAAc,CAAC,IAAI,CAAC,CAAC;IAE5D,OAAO;QACL,IAAI,EAAE,MAAM;QACZ,QAAQ;QACR,IAAI;KACL,CAAC;AACJ,CAAC;AAED,SAAS,UAAU,CAAC,CAAa,EAAE,OAAgB;IACjD,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,MAAM,IAAI,GAAe,EAAE,CAAC;IAC5B,MAAM,MAAM,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC;IAE1B,wDAAwD;IACxD,MAAM,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC,OAAO,EAAE,CAAC;IAChE,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;QAC/B,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IAC7C,CAAC;IAED,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,MAAM,aAAa,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,OAAO,EAAE,CAAC;QACzE,KAAK,MAAM,IAAI,IAAI,aAAa,EAAE,CAAC;YACjC,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QAC7C,CAAC;IACH,CAAC;IAED,MAAM,YAAY,GAChB,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,0BAA0B,CAAC,CAAC,CAAC,cAAc,CAAC;IACnE,MAAM,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,OAAO,EAAE,CAAC;IAEtD,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;QAC5B,MAAM,QAAQ,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,OAAO,EAAE,CAAC;QACjD,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;YAC5B,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QAC3C,CAAC;QACD,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACzC,CAAC;IAED,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEnC,OAAO;QACL,IAAI,EAAE,OAAO;QACb,OAAO,EAAE,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS;QACjD,IAAI;KACL,CAAC;AACJ,CAAC;AAED,SAAS,UAAU,CAAC,CAAa,EAAE,OAAgB;IACjD,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACnC,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IAEtB,OAAO;QACL,IAAI,EAAE,OAAO;QACb,GAAG;QACH,GAAG,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,SAAS;KACzC,CAAC;AACJ,CAAC;AAED,SAAS,eAAe,CACtB,CAAa,EACb,OAAgB;IAEhB,MAAM,OAAO,GAAG,YAAY,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAChD,MAAM,IAAI,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC;IACrC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,UAAU,CAAC,kBAAkB;QAAE,OAAO,IAAI,CAAC;IAE7E,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;AACtC,CAAC;AAED,MAAM,eAAe,GAAG;IACtB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,CAAC,EAAE,cAAc;IACjB,EAAE,EAAE,SAAS;IACb,EAAE,EAAE,SAAS;IACb,GAAG,EAAE,SAAS;IACd,IAAI,EAAE,SAAS;IACf,KAAK,EAAE,UAAU;IACjB,GAAG,EAAE,UAAU;IACf,UAAU,EAAE,eAAe;CAI5B,CAAC;AAEF,SAAS,cAAc,CAAC,GAAW;IACjC,OAAO,GAAG,IAAI,eAAe,CAAC;AAChC,CAAC;AAED,SAAS,YAAY,CAAC,CAAa,EAAE,IAAa;IAChD,IAAI,CAAC,CAAC,SAAS,IAAI,IAAI,CAAC,IAAI,OAAO,IAAI,CAAC,OAAO,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE1E,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;IAC3C,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC;QAAE,OAAO,IAAI,CAAC;IAC1C,OAAO,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;AAC3C,CAAC;AAED,SAAS,YAAY,CAAC,MAA2B;IAC/C,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE;QAC7B,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,KAAK,WAAW,CAAC;YACjB,KAAK,SAAS,CAAC;YACf,KAAK,MAAM,CAAC;YACZ,KAAK,YAAY;gBACf,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;YAC/B,KAAK,MAAM;gBACT,OAAO,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC;YAChC;gBACE,OAAO,IAAI,CAAC;QAChB,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,UAAU,SAAS,CAAC,IAAY;IACpC,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ;QAAE,OAAO,EAAE,CAAC;IAEjD,IAAI,aAAa,GAAG,IAAI,CAAC;IACzB,IAAI,IAAI,CAAC,MAAM,GAAG,aAAa,EAAE,CAAC;QAChC,OAAO,CAAC,+CAA+C,EAAE;YACvD,IAAI,EAAE,IAAI,CAAC,MAAM;YACjB,OAAO,EAAE,aAAa;SACvB,CAAC,CAAC;QACH,aAAa,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;IACnD,CAAC;IAED,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QACtC,MAAM,MAAM,GAAwB,EAAE,CAAC;QAEvC,CAAC,CAAC,sCAAsC,CAAC,CAAC,MAAM,EAAE,CAAC;QAEnD,yEAAyE;QACzE,MAAM,QAAQ,GAAG,CAAC,CAAC,MAAM,CAAC;aACvB,IAAI,CACH,oFAAoF,CACrF;aACA,OAAO,EAAE,CAAC;QAEb,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,IAAI,CAAC;gBACH,MAAM,KAAK,GAAG,YAAY,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;gBACvC,IAAI,KAAK;oBAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAChC,CAAC;YAAC,MAAM,CAAC;gBACP,sBAAsB;YACxB,CAAC;QACH,CAAC;QAED,OAAO,YAAY,CAAC,MAAM,CAAC,CAAC;IAC9B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,sBAAsB,EAAE;YAC9B,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;YAC/D,UAAU,EAAE,IAAI,CAAC,MAAM;SACxB,CAAC,CAAC;QACH,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC"}
|
|
@@ -1,24 +1,17 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { ExtractedLink, FetchLinksInput } from '../../config/types.js';
|
|
2
2
|
export declare const FETCH_LINKS_TOOL_NAME = "fetch-links";
|
|
3
|
-
export declare const FETCH_LINKS_TOOL_DESCRIPTION = "Extracts all hyperlinks from a webpage with anchor text and type classification";
|
|
4
|
-
|
|
5
|
-
* Tool handler for extracting links from a URL
|
|
6
|
-
*/
|
|
7
|
-
export declare function fetchLinksToolHandler(input: FetchLinksInput): Promise<import("../../utils/tool-error-handler.js").ToolErrorResponse | {
|
|
3
|
+
export declare const FETCH_LINKS_TOOL_DESCRIPTION = "Extracts all hyperlinks from a webpage with anchor text and type classification. Supports filtering, image links, and link limits.";
|
|
4
|
+
export declare function fetchLinksToolHandler(input: FetchLinksInput): Promise<import("../../config/types.js").ToolErrorResponse | {
|
|
8
5
|
content: {
|
|
9
6
|
type: "text";
|
|
10
7
|
text: string;
|
|
11
8
|
}[];
|
|
12
9
|
structuredContent: {
|
|
10
|
+
truncated?: true;
|
|
11
|
+
filtered?: number;
|
|
13
12
|
url: string;
|
|
14
13
|
linkCount: number;
|
|
15
14
|
links: ExtractedLink[];
|
|
16
15
|
};
|
|
17
|
-
} | {
|
|
18
|
-
content: {
|
|
19
|
-
type: "text";
|
|
20
|
-
text: string;
|
|
21
|
-
}[];
|
|
22
|
-
structuredContent?: undefined;
|
|
23
16
|
}>;
|
|
24
17
|
//# sourceMappingURL=fetch-links.tool.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-links.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-links.tool.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"fetch-links.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-links.tool.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EACV,aAAa,EAEb,eAAe,EAEhB,MAAM,uBAAuB,CAAC;AAW/B,eAAO,MAAM,qBAAqB,gBAAgB,CAAC;AACnD,eAAO,MAAM,4BAA4B,uIAC6F,CAAC;AAuFvI,wBAAsB,qBAAqB,CAAC,KAAK,EAAE,eAAe;;;;;;;;;;;;GAiEjE"}
|
|
@@ -1,104 +1,129 @@
|
|
|
1
|
-
import { validateAndNormalizeUrl, isInternalUrl, } from '../../utils/url-validator.js';
|
|
2
|
-
import { fetchUrlWithRetry } from '../../services/fetcher.js';
|
|
3
|
-
import * as cache from '../../services/cache.js';
|
|
4
1
|
import * as cheerio from 'cheerio';
|
|
5
|
-
import { logError } from '../../services/logger.js';
|
|
2
|
+
import { logDebug, logError } from '../../services/logger.js';
|
|
6
3
|
import { createToolErrorResponse, handleToolError, } from '../../utils/tool-error-handler.js';
|
|
4
|
+
import { isInternalUrl } from '../../utils/url-validator.js';
|
|
5
|
+
import { executeFetchPipeline } from '../utils/fetch-pipeline.js';
|
|
7
6
|
export const FETCH_LINKS_TOOL_NAME = 'fetch-links';
|
|
8
|
-
export const FETCH_LINKS_TOOL_DESCRIPTION = 'Extracts all hyperlinks from a webpage with anchor text and type classification';
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
7
|
+
export const FETCH_LINKS_TOOL_DESCRIPTION = 'Extracts all hyperlinks from a webpage with anchor text and type classification. Supports filtering, image links, and link limits.';
|
|
8
|
+
function tryResolveUrl(href, baseUrl) {
|
|
9
|
+
try {
|
|
10
|
+
return new URL(href, baseUrl).href;
|
|
11
|
+
}
|
|
12
|
+
catch {
|
|
13
|
+
return null;
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
function shouldIncludeLink(type, url, options, seen) {
|
|
17
|
+
if (seen.has(url))
|
|
18
|
+
return false;
|
|
19
|
+
if (options.filterPattern && !options.filterPattern.test(url))
|
|
20
|
+
return false;
|
|
21
|
+
if (type === 'internal' && !options.includeInternal)
|
|
22
|
+
return false;
|
|
23
|
+
if (type === 'external' && !options.includeExternal)
|
|
24
|
+
return false;
|
|
25
|
+
return true;
|
|
26
|
+
}
|
|
27
|
+
function extractLinks(html, baseUrl, options) {
|
|
13
28
|
const $ = cheerio.load(html);
|
|
14
29
|
const links = [];
|
|
15
|
-
const
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
const
|
|
19
|
-
|
|
20
|
-
|
|
30
|
+
const seen = new Set();
|
|
31
|
+
let filtered = 0;
|
|
32
|
+
$('a[href]').each((_, el) => {
|
|
33
|
+
const href = $(el).attr('href');
|
|
34
|
+
if (!href || href.startsWith('#') || href.startsWith('javascript:'))
|
|
35
|
+
return;
|
|
36
|
+
const url = tryResolveUrl(href, baseUrl);
|
|
37
|
+
if (!url)
|
|
38
|
+
return;
|
|
39
|
+
const type = isInternalUrl(url, baseUrl)
|
|
40
|
+
? 'internal'
|
|
41
|
+
: 'external';
|
|
42
|
+
if (!shouldIncludeLink(type, url, options, seen)) {
|
|
43
|
+
if (!seen.has(url))
|
|
44
|
+
filtered++;
|
|
21
45
|
return;
|
|
22
46
|
}
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
47
|
+
seen.add(url);
|
|
48
|
+
links.push({ href: url, text: $(el).text().trim() || url, type });
|
|
49
|
+
});
|
|
50
|
+
if (options.includeImages) {
|
|
51
|
+
$('img[src]').each((_, el) => {
|
|
52
|
+
const src = $(el).attr('src');
|
|
53
|
+
if (!src || src.startsWith('data:'))
|
|
27
54
|
return;
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
const type = isInternalUrl(absoluteUrl, baseUrl)
|
|
31
|
-
? 'internal'
|
|
32
|
-
: 'external';
|
|
33
|
-
// Filter based on options
|
|
34
|
-
if (type === 'internal' && !options.includeInternal)
|
|
55
|
+
const url = tryResolveUrl(src, baseUrl);
|
|
56
|
+
if (!url)
|
|
35
57
|
return;
|
|
36
|
-
if (
|
|
58
|
+
if (!shouldIncludeLink('image', url, options, seen)) {
|
|
59
|
+
if (!seen.has(url))
|
|
60
|
+
filtered++;
|
|
37
61
|
return;
|
|
62
|
+
}
|
|
63
|
+
seen.add(url);
|
|
38
64
|
links.push({
|
|
39
|
-
href:
|
|
40
|
-
text:
|
|
41
|
-
type,
|
|
65
|
+
href: url,
|
|
66
|
+
text: $(el).attr('alt')?.trim() ?? url,
|
|
67
|
+
type: 'image',
|
|
42
68
|
});
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
const truncated = options.maxLinks ? links.length > options.maxLinks : false;
|
|
72
|
+
const resultLinks = truncated ? links.slice(0, options.maxLinks) : links;
|
|
73
|
+
return {
|
|
74
|
+
links: resultLinks,
|
|
75
|
+
linkCount: resultLinks.length,
|
|
76
|
+
filtered,
|
|
77
|
+
truncated,
|
|
78
|
+
};
|
|
49
79
|
}
|
|
50
|
-
/**
|
|
51
|
-
* Tool handler for extracting links from a URL
|
|
52
|
-
*/
|
|
53
80
|
export async function fetchLinksToolHandler(input) {
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
// Check cache first
|
|
62
|
-
if (cacheKey) {
|
|
63
|
-
const cached = cache.get(cacheKey);
|
|
64
|
-
if (cached) {
|
|
65
|
-
// Parse the cached content to return as structuredContent
|
|
66
|
-
try {
|
|
67
|
-
const structuredContent = JSON.parse(cached.content);
|
|
68
|
-
return {
|
|
69
|
-
content: [{ type: 'text', text: cached.content }],
|
|
70
|
-
structuredContent,
|
|
71
|
-
};
|
|
72
|
-
}
|
|
73
|
-
catch {
|
|
74
|
-
return {
|
|
75
|
-
content: [{ type: 'text', text: cached.content }],
|
|
76
|
-
};
|
|
77
|
-
}
|
|
78
|
-
}
|
|
81
|
+
if (!input.url) {
|
|
82
|
+
return createToolErrorResponse('URL is required', '', 'VALIDATION_ERROR');
|
|
83
|
+
}
|
|
84
|
+
let filterPattern;
|
|
85
|
+
if (input.filterPattern) {
|
|
86
|
+
try {
|
|
87
|
+
filterPattern = new RegExp(input.filterPattern, 'i');
|
|
79
88
|
}
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
if (!html) {
|
|
83
|
-
return createToolErrorResponse('No content received from URL', url, 'EMPTY_CONTENT');
|
|
89
|
+
catch {
|
|
90
|
+
return createToolErrorResponse(`Invalid filter pattern: ${input.filterPattern}`, input.url, 'VALIDATION_ERROR');
|
|
84
91
|
}
|
|
85
|
-
|
|
86
|
-
|
|
92
|
+
}
|
|
93
|
+
try {
|
|
94
|
+
const options = {
|
|
87
95
|
includeInternal: input.includeInternal ?? true,
|
|
88
96
|
includeExternal: input.includeExternal ?? true,
|
|
97
|
+
includeImages: input.includeImages ?? false,
|
|
98
|
+
maxLinks: input.maxLinks,
|
|
99
|
+
filterPattern,
|
|
100
|
+
};
|
|
101
|
+
logDebug('Extracting links', {
|
|
102
|
+
url: input.url,
|
|
103
|
+
...options,
|
|
104
|
+
filterPattern: input.filterPattern,
|
|
105
|
+
});
|
|
106
|
+
const result = await executeFetchPipeline({
|
|
107
|
+
url: input.url,
|
|
108
|
+
cacheNamespace: 'links',
|
|
109
|
+
customHeaders: input.customHeaders,
|
|
110
|
+
retries: input.retries,
|
|
111
|
+
transform: (html, url) => extractLinks(html, url, options),
|
|
89
112
|
});
|
|
90
113
|
const structuredContent = {
|
|
91
|
-
url,
|
|
92
|
-
linkCount:
|
|
93
|
-
links,
|
|
114
|
+
url: result.url,
|
|
115
|
+
linkCount: result.data.linkCount,
|
|
116
|
+
links: result.data.links,
|
|
117
|
+
...(result.data.filtered > 0 && { filtered: result.data.filtered }),
|
|
118
|
+
...(result.data.truncated && { truncated: result.data.truncated }),
|
|
94
119
|
};
|
|
95
|
-
const outputText = JSON.stringify(structuredContent, null, 2);
|
|
96
|
-
// Cache the result
|
|
97
|
-
if (cacheKey) {
|
|
98
|
-
cache.set(cacheKey, outputText);
|
|
99
|
-
}
|
|
100
120
|
return {
|
|
101
|
-
content: [
|
|
121
|
+
content: [
|
|
122
|
+
{
|
|
123
|
+
type: 'text',
|
|
124
|
+
text: JSON.stringify(structuredContent, null, 2),
|
|
125
|
+
},
|
|
126
|
+
],
|
|
102
127
|
structuredContent,
|
|
103
128
|
};
|
|
104
129
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-links.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-links.tool.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,uBAAuB,EACvB,
|
|
1
|
+
{"version":3,"file":"fetch-links.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-links.tool.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AASnC,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,0BAA0B,CAAC;AAE9D,OAAO,EACL,uBAAuB,EACvB,eAAe,GAChB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EAAE,aAAa,EAAE,MAAM,8BAA8B,CAAC;AAC7D,OAAO,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAC;AAElE,MAAM,CAAC,MAAM,qBAAqB,GAAG,aAAa,CAAC;AACnD,MAAM,CAAC,MAAM,4BAA4B,GACvC,oIAAoI,CAAC;AAIvI,SAAS,aAAa,CAAC,IAAY,EAAE,OAAe;IAClD,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;IACrC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,SAAS,iBAAiB,CACxB,IAAc,EACd,GAAW,EACX,OAA4B,EAC5B,IAAiB;IAEjB,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;QAAE,OAAO,KAAK,CAAC;IAChC,IAAI,OAAO,CAAC,aAAa,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,KAAK,CAAC;IAC5E,IAAI,IAAI,KAAK,UAAU,IAAI,CAAC,OAAO,CAAC,eAAe;QAAE,OAAO,KAAK,CAAC;IAClE,IAAI,IAAI,KAAK,UAAU,IAAI,CAAC,OAAO,CAAC,eAAe;QAAE,OAAO,KAAK,CAAC;IAClE,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,YAAY,CACnB,IAAY,EACZ,OAAe,EACf,OAA4B;IAE5B,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,KAAK,GAAoB,EAAE,CAAC;IAClC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QAC1B,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAChC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;YAAE,OAAO;QAE5E,MAAM,GAAG,GAAG,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACzC,IAAI,CAAC,GAAG;YAAE,OAAO;QAEjB,MAAM,IAAI,GAAa,aAAa,CAAC,GAAG,EAAE,OAAO,CAAC;YAChD,CAAC,CAAC,UAAU;YACZ,CAAC,CAAC,UAAU,CAAC;QACf,IAAI,CAAC,iBAAiB,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,CAAC;YACjD,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;gBAAE,QAAQ,EAAE,CAAC;YAC/B,OAAO;QACT,CAAC;QAED,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACd,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,IAAI,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;IACpE,CAAC,CAAC,CAAC;IAEH,IAAI,OAAO,CAAC,aAAa,EAAE,CAAC;QAC1B,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;YAC3B,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAC9B,IAAI,CAAC,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC;gBAAE,OAAO;YAE5C,MAAM,GAAG,GAAG,aAAa,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;YACxC,IAAI,CAAC,GAAG;gBAAE,OAAO;YAEjB,IAAI,CAAC,iBAAiB,CAAC,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,CAAC;gBACpD,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;oBAAE,QAAQ,EAAE,CAAC;gBAC/B,OAAO;YACT,CAAC;YAED,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACd,KAAK,CAAC,IAAI,CAAC;gBACT,IAAI,EAAE,GAAG;gBACT,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,IAAI,GAAG;gBACtC,IAAI,EAAE,OAAO;aACd,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;IAED,MAAM,SAAS,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC;IAC7E,MAAM,WAAW,GAAG,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;IAEzE,OAAO;QACL,KAAK,EAAE,WAAW;QAClB,SAAS,EAAE,WAAW,CAAC,MAAM;QAC7B,QAAQ;QACR,SAAS;KACV,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,qBAAqB,CAAC,KAAsB;IAChE,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC;QACf,OAAO,uBAAuB,CAAC,iBAAiB,EAAE,EAAE,EAAE,kBAAkB,CAAC,CAAC;IAC5E,CAAC;IAED,IAAI,aAAiC,CAAC;IACtC,IAAI,KAAK,CAAC,aAAa,EAAE,CAAC;QACxB,IAAI,CAAC;YACH,aAAa,GAAG,IAAI,MAAM,CAAC,KAAK,CAAC,aAAa,EAAE,GAAG,CAAC,CAAC;QACvD,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,uBAAuB,CAC5B,2BAA2B,KAAK,CAAC,aAAa,EAAE,EAChD,KAAK,CAAC,GAAG,EACT,kBAAkB,CACnB,CAAC;QACJ,CAAC;IACH,CAAC;IAED,IAAI,CAAC;QACH,MAAM,OAAO,GAAwB;YACnC,eAAe,EAAE,KAAK,CAAC,eAAe,IAAI,IAAI;YAC9C,eAAe,EAAE,KAAK,CAAC,eAAe,IAAI,IAAI;YAC9C,aAAa,EAAE,KAAK,CAAC,aAAa,IAAI,KAAK;YAC3C,QAAQ,EAAE,KAAK,CAAC,QAAQ;YACxB,aAAa;SACd,CAAC;QAEF,QAAQ,CAAC,kBAAkB,EAAE;YAC3B,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,GAAG,OAAO;YACV,aAAa,EAAE,KAAK,CAAC,aAAa;SACnC,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAuB;YAC9D,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,cAAc,EAAE,OAAO;YACvB,aAAa,EAAE,KAAK,CAAC,aAAa;YAClC,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,SAAS,EAAE,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC;SAC3D,CAAC,CAAC;QAEH,MAAM,iBAAiB,GAAG;YACxB,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,SAAS,EAAE,MAAM,CAAC,IAAI,CAAC,SAAS;YAChC,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK;YACxB,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,GAAG,CAAC,IAAI,EAAE,QAAQ,EAAE,MAAM,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnE,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,IAAI,EAAE,SAAS,EAAE,MAAM,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;SACnE,CAAC;QAEF,OAAO;YACL,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,iBAAiB,EAAE,IAAI,EAAE,CAAC,CAAC;iBACjD;aACF;YACD,iBAAiB;SAClB,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,wBAAwB,EACxB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACF,OAAO,eAAe,CAAC,KAAK,EAAE,KAAK,CAAC,GAAG,EAAE,yBAAyB,CAAC,CAAC;IACtE,CAAC;AACH,CAAC"}
|
|
@@ -1,14 +1,17 @@
|
|
|
1
|
-
import type { FetchMarkdownInput } from '../../types
|
|
1
|
+
import type { FetchMarkdownInput, TocEntry } from '../../config/types.js';
|
|
2
2
|
export declare const FETCH_MARKDOWN_TOOL_NAME = "fetch-markdown";
|
|
3
|
-
export declare const FETCH_MARKDOWN_TOOL_DESCRIPTION = "Fetches a webpage and converts it to clean Markdown format with optional frontmatter";
|
|
4
|
-
export declare function fetchMarkdownToolHandler(input: FetchMarkdownInput): Promise<import("../../
|
|
3
|
+
export declare const FETCH_MARKDOWN_TOOL_DESCRIPTION = "Fetches a webpage and converts it to clean Markdown format with optional frontmatter, table of contents, and content length limits";
|
|
4
|
+
export declare function fetchMarkdownToolHandler(input: FetchMarkdownInput): Promise<import("../../config/types.js").ToolErrorResponse | {
|
|
5
5
|
content: {
|
|
6
6
|
type: "text";
|
|
7
7
|
text: string;
|
|
8
8
|
}[];
|
|
9
9
|
structuredContent: {
|
|
10
|
-
|
|
10
|
+
truncated?: true;
|
|
11
11
|
cached: boolean;
|
|
12
|
+
toc?: TocEntry[];
|
|
13
|
+
url: string;
|
|
14
|
+
title: string | undefined;
|
|
12
15
|
fetchedAt: string;
|
|
13
16
|
markdown: string;
|
|
14
17
|
};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-markdown.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-markdown.tool.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"fetch-markdown.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-markdown.tool.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,kBAAkB,EAElB,QAAQ,EAET,MAAM,uBAAuB,CAAC;AAe/B,eAAO,MAAM,wBAAwB,mBAAmB,CAAC;AACzD,eAAO,MAAM,+BAA+B,uIAC0F,CAAC;AAuEvI,wBAAsB,wBAAwB,CAAC,KAAK,EAAE,kBAAkB;;;;;;;;;;;;;;GA4DvE"}
|