@j0hanz/superfetch 1.1.2 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -30
- package/dist/config/formatting.d.ts +9 -0
- package/dist/config/formatting.d.ts.map +1 -0
- package/dist/config/formatting.js +11 -0
- package/dist/config/formatting.js.map +1 -0
- package/dist/config/index.d.ts +11 -2
- package/dist/config/index.d.ts.map +1 -1
- package/dist/config/index.js +35 -13
- package/dist/config/index.js.map +1 -1
- package/dist/config/types/content.d.ts +107 -0
- package/dist/config/types/content.d.ts.map +1 -0
- package/dist/config/types/content.js +2 -0
- package/dist/config/types/content.js.map +1 -0
- package/dist/config/types/runtime.d.ts +78 -0
- package/dist/config/types/runtime.d.ts.map +1 -0
- package/dist/config/types/runtime.js +2 -0
- package/dist/config/types/runtime.js.map +1 -0
- package/dist/config/types/tools.d.ts +99 -0
- package/dist/config/types/tools.d.ts.map +1 -0
- package/dist/config/types/tools.js +2 -0
- package/dist/config/types/tools.js.map +1 -0
- package/dist/config/types.d.ts +3 -297
- package/dist/config/types.d.ts.map +1 -1
- package/dist/http/auth.d.ts +3 -0
- package/dist/http/auth.d.ts.map +1 -0
- package/dist/http/auth.js +34 -0
- package/dist/http/auth.js.map +1 -0
- package/dist/http/cors.d.ts +8 -0
- package/dist/http/cors.d.ts.map +1 -0
- package/dist/http/cors.js +47 -0
- package/dist/http/cors.js.map +1 -0
- package/dist/http/mcp-routes.d.ts +5 -0
- package/dist/http/mcp-routes.d.ts.map +1 -0
- package/dist/http/mcp-routes.js +110 -0
- package/dist/http/mcp-routes.js.map +1 -0
- package/dist/http/mcp-session.d.ts +12 -0
- package/dist/http/mcp-session.d.ts.map +1 -0
- package/dist/http/mcp-session.js +209 -0
- package/dist/http/mcp-session.js.map +1 -0
- package/dist/http/mcp-validation.d.ts +3 -0
- package/dist/http/mcp-validation.d.ts.map +1 -0
- package/dist/http/mcp-validation.js +34 -0
- package/dist/http/mcp-validation.js.map +1 -0
- package/dist/http/rate-limit.d.ts +13 -0
- package/dist/http/rate-limit.d.ts.map +1 -0
- package/dist/http/rate-limit.js +91 -0
- package/dist/http/rate-limit.js.map +1 -0
- package/dist/http/server.d.ts +4 -0
- package/dist/http/server.d.ts.map +1 -0
- package/dist/http/server.js +183 -0
- package/dist/http/server.js.map +1 -0
- package/dist/http/sessions.d.ts +15 -0
- package/dist/http/sessions.d.ts.map +1 -0
- package/dist/http/sessions.js +64 -0
- package/dist/http/sessions.js.map +1 -0
- package/dist/index.js +26 -341
- package/dist/index.js.map +1 -1
- package/dist/middleware/error-handler.d.ts +2 -2
- package/dist/middleware/error-handler.d.ts.map +1 -1
- package/dist/middleware/error-handler.js +46 -15
- package/dist/middleware/error-handler.js.map +1 -1
- package/dist/resources/cached-content.d.ts.map +1 -1
- package/dist/resources/cached-content.js +104 -44
- package/dist/resources/cached-content.js.map +1 -1
- package/dist/resources/index.d.ts.map +1 -1
- package/dist/resources/index.js +77 -69
- package/dist/resources/index.js.map +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +9 -3
- package/dist/server.js.map +1 -1
- package/dist/services/cache.d.ts +13 -1
- package/dist/services/cache.d.ts.map +1 -1
- package/dist/services/cache.js +89 -16
- package/dist/services/cache.js.map +1 -1
- package/dist/services/context.d.ts +1 -1
- package/dist/services/context.d.ts.map +1 -1
- package/dist/services/context.js +1 -1
- package/dist/services/context.js.map +1 -1
- package/dist/services/extractor.d.ts.map +1 -1
- package/dist/services/extractor.js +122 -87
- package/dist/services/extractor.js.map +1 -1
- package/dist/services/fetcher/agents.d.ts +4 -0
- package/dist/services/fetcher/agents.d.ts.map +1 -0
- package/dist/services/fetcher/agents.js +111 -0
- package/dist/services/fetcher/agents.js.map +1 -0
- package/dist/services/fetcher/errors.d.ts +5 -0
- package/dist/services/fetcher/errors.d.ts.map +1 -0
- package/dist/services/fetcher/errors.js +71 -0
- package/dist/services/fetcher/errors.js.map +1 -0
- package/dist/services/fetcher/headers.d.ts +2 -0
- package/dist/services/fetcher/headers.d.ts.map +1 -0
- package/dist/services/fetcher/headers.js +28 -0
- package/dist/services/fetcher/headers.js.map +1 -0
- package/dist/services/fetcher/interceptors.d.ts +10 -0
- package/dist/services/fetcher/interceptors.d.ts.map +1 -0
- package/dist/services/fetcher/interceptors.js +82 -0
- package/dist/services/fetcher/interceptors.js.map +1 -0
- package/dist/services/fetcher/redirects.d.ts +6 -0
- package/dist/services/fetcher/redirects.d.ts.map +1 -0
- package/dist/services/fetcher/redirects.js +67 -0
- package/dist/services/fetcher/redirects.js.map +1 -0
- package/dist/services/fetcher/response.d.ts +5 -0
- package/dist/services/fetcher/response.d.ts.map +1 -0
- package/dist/services/fetcher/response.js +39 -0
- package/dist/services/fetcher/response.js.map +1 -0
- package/dist/services/fetcher/retry-policy.d.ts +28 -0
- package/dist/services/fetcher/retry-policy.d.ts.map +1 -0
- package/dist/services/fetcher/retry-policy.js +138 -0
- package/dist/services/fetcher/retry-policy.js.map +1 -0
- package/dist/services/fetcher.d.ts +2 -1
- package/dist/services/fetcher.d.ts.map +1 -1
- package/dist/services/fetcher.js +62 -315
- package/dist/services/fetcher.js.map +1 -1
- package/dist/services/logger.js +4 -4
- package/dist/services/logger.js.map +1 -1
- package/dist/services/parser.d.ts +1 -0
- package/dist/services/parser.d.ts.map +1 -1
- package/dist/services/parser.js +55 -35
- package/dist/services/parser.js.map +1 -1
- package/dist/tools/handlers/fetch-links/link-extractor.d.ts +4 -0
- package/dist/tools/handlers/fetch-links/link-extractor.d.ts.map +1 -0
- package/dist/tools/handlers/fetch-links/link-extractor.js +163 -0
- package/dist/tools/handlers/fetch-links/link-extractor.js.map +1 -0
- package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.js +78 -116
- package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +3 -13
- package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.js +74 -83
- package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-single.shared.d.ts +26 -0
- package/dist/tools/handlers/fetch-single.shared.d.ts.map +1 -0
- package/dist/tools/handlers/fetch-single.shared.js +49 -0
- package/dist/tools/handlers/fetch-single.shared.js.map +1 -0
- package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.js +82 -54
- package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-urls/processor.d.ts +13 -0
- package/dist/tools/handlers/fetch-urls/processor.d.ts.map +1 -0
- package/dist/tools/handlers/fetch-urls/processor.js +153 -0
- package/dist/tools/handlers/fetch-urls/processor.js.map +1 -0
- package/dist/tools/handlers/fetch-urls/response.d.ts +3 -0
- package/dist/tools/handlers/fetch-urls/response.d.ts.map +1 -0
- package/dist/tools/handlers/fetch-urls/response.js +58 -0
- package/dist/tools/handlers/fetch-urls/response.js.map +1 -0
- package/dist/tools/handlers/fetch-urls/validation.d.ts +6 -0
- package/dist/tools/handlers/fetch-urls/validation.d.ts.map +1 -0
- package/dist/tools/handlers/fetch-urls/validation.js +18 -0
- package/dist/tools/handlers/fetch-urls/validation.js.map +1 -0
- package/dist/tools/handlers/fetch-urls.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-urls.tool.js +104 -202
- package/dist/tools/handlers/fetch-urls.tool.js.map +1 -1
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +36 -237
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/schemas.d.ts +357 -0
- package/dist/tools/schemas.d.ts.map +1 -0
- package/dist/tools/schemas.js +272 -0
- package/dist/tools/schemas.js.map +1 -0
- package/dist/tools/utils/cache-vary.d.ts +3 -0
- package/dist/tools/utils/cache-vary.d.ts.map +1 -0
- package/dist/tools/utils/cache-vary.js +44 -0
- package/dist/tools/utils/cache-vary.js.map +1 -0
- package/dist/tools/utils/common.d.ts +2 -2
- package/dist/tools/utils/common.d.ts.map +1 -1
- package/dist/tools/utils/common.js +5 -1
- package/dist/tools/utils/common.js.map +1 -1
- package/dist/tools/utils/content-transform.d.ts +16 -0
- package/dist/tools/utils/content-transform.d.ts.map +1 -0
- package/dist/tools/utils/content-transform.js +49 -0
- package/dist/tools/utils/content-transform.js.map +1 -0
- package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -1
- package/dist/tools/utils/fetch-pipeline.js +32 -18
- package/dist/tools/utils/fetch-pipeline.js.map +1 -1
- package/dist/tools/utils/inline-content.d.ts +11 -0
- package/dist/tools/utils/inline-content.d.ts.map +1 -0
- package/dist/tools/utils/inline-content.js +39 -0
- package/dist/tools/utils/inline-content.js.map +1 -0
- package/dist/tools/utils/markdown-toc.d.ts +3 -0
- package/dist/tools/utils/markdown-toc.d.ts.map +1 -0
- package/dist/tools/utils/markdown-toc.js +35 -0
- package/dist/tools/utils/markdown-toc.js.map +1 -0
- package/dist/tools/utils/tool-response.d.ts +9 -0
- package/dist/tools/utils/tool-response.d.ts.map +1 -0
- package/dist/tools/utils/tool-response.js +19 -0
- package/dist/tools/utils/tool-response.js.map +1 -0
- package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
- package/dist/transformers/jsonl.transformer.js +51 -28
- package/dist/transformers/jsonl.transformer.js.map +1 -1
- package/dist/transformers/markdown.transformer.d.ts.map +1 -1
- package/dist/transformers/markdown.transformer.js +82 -111
- package/dist/transformers/markdown.transformer.js.map +1 -1
- package/dist/utils/header-normalizer.d.ts +5 -0
- package/dist/utils/header-normalizer.d.ts.map +1 -0
- package/dist/utils/header-normalizer.js +25 -0
- package/dist/utils/header-normalizer.js.map +1 -0
- package/dist/utils/tool-error-handler.d.ts +1 -0
- package/dist/utils/tool-error-handler.d.ts.map +1 -1
- package/dist/utils/tool-error-handler.js +29 -1
- package/dist/utils/tool-error-handler.js.map +1 -1
- package/dist/utils/url-validator.d.ts +0 -3
- package/dist/utils/url-validator.d.ts.map +1 -1
- package/dist/utils/url-validator.js +98 -18
- package/dist/utils/url-validator.js.map +1 -1
- package/package.json +11 -6
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"markdown-toc.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/markdown-toc.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AAetD,wBAAgB,UAAU,CAAC,QAAQ,EAAE,MAAM,GAAG,QAAQ,EAAE,CAYvD"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { stripMarkdownLinks } from '../../utils/content-cleaner.js';
|
|
2
|
+
function slugify(text) {
|
|
3
|
+
const cleanText = stripMarkdownLinks(text);
|
|
4
|
+
return cleanText
|
|
5
|
+
.toLowerCase()
|
|
6
|
+
.replace(/[^\w\s-]/g, '')
|
|
7
|
+
.replace(/\s+/g, '-')
|
|
8
|
+
.replace(/--+/g, '-')
|
|
9
|
+
.trim();
|
|
10
|
+
}
|
|
11
|
+
export function extractToc(markdown) {
|
|
12
|
+
const headingRegex = /^(#{1,6})\s+(.+)$/gm;
|
|
13
|
+
const toc = [];
|
|
14
|
+
for (const match of markdown.matchAll(headingRegex)) {
|
|
15
|
+
const entry = buildTocEntry(match);
|
|
16
|
+
if (entry) {
|
|
17
|
+
toc.push(entry);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
return toc;
|
|
21
|
+
}
|
|
22
|
+
function buildTocEntry(match) {
|
|
23
|
+
const hashMarks = match[1];
|
|
24
|
+
const rawText = match[2];
|
|
25
|
+
if (!hashMarks || !rawText) {
|
|
26
|
+
return null;
|
|
27
|
+
}
|
|
28
|
+
const text = stripMarkdownLinks(rawText.trim());
|
|
29
|
+
return {
|
|
30
|
+
level: hashMarks.length,
|
|
31
|
+
text,
|
|
32
|
+
slug: slugify(rawText),
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
//# sourceMappingURL=markdown-toc.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"markdown-toc.js","sourceRoot":"","sources":["../../../src/tools/utils/markdown-toc.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AAEpE,SAAS,OAAO,CAAC,IAAY;IAC3B,MAAM,SAAS,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC;IAE3C,OAAO,SAAS;SACb,WAAW,EAAE;SACb,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC;SACxB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE,CAAC;AACZ,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,QAAgB;IACzC,MAAM,YAAY,GAAG,qBAAqB,CAAC;IAC3C,MAAM,GAAG,GAAe,EAAE,CAAC;IAE3B,KAAK,MAAM,KAAK,IAAI,QAAQ,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;QACpD,MAAM,KAAK,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;QACnC,IAAI,KAAK,EAAE,CAAC;YACV,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;IAED,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,aAAa,CAAC,KAAsB;IAC3C,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IAC3B,MAAM,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IAEzB,IAAI,CAAC,SAAS,IAAI,CAAC,OAAO,EAAE,CAAC;QAC3B,OAAO,IAAI,CAAC;IACd,CAAC;IAED,MAAM,IAAI,GAAG,kBAAkB,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IAChD,OAAO;QACL,KAAK,EAAE,SAAS,CAAC,MAAM;QACvB,IAAI;QACJ,IAAI,EAAE,OAAO,CAAC,OAAO,CAAC;KACvB,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { ToolResponseBase } from '../../config/types.js';
|
|
2
|
+
export interface ResourceLinkDetails {
|
|
3
|
+
readonly uri: string;
|
|
4
|
+
readonly name: string;
|
|
5
|
+
readonly mimeType?: string;
|
|
6
|
+
readonly description?: string;
|
|
7
|
+
}
|
|
8
|
+
export declare function buildJsonToolResponse<T extends Record<string, unknown>>(structuredContent: T, fromCache: boolean, resourceLink?: ResourceLinkDetails): ToolResponseBase;
|
|
9
|
+
//# sourceMappingURL=tool-response.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tool-response.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/tool-response.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAE9D,MAAM,WAAW,mBAAmB;IAClC,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;CAC/B;AAED,wBAAgB,qBAAqB,CAAC,CAAC,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EACrE,iBAAiB,EAAE,CAAC,EACpB,SAAS,EAAE,OAAO,EAClB,YAAY,CAAC,EAAE,mBAAmB,GACjC,gBAAgB,CAuBlB"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
export function buildJsonToolResponse(structuredContent, fromCache, resourceLink) {
|
|
2
|
+
const jsonOutput = JSON.stringify(structuredContent, fromCache ? undefined : null, fromCache ? undefined : 2);
|
|
3
|
+
const content = [
|
|
4
|
+
{ type: 'text', text: jsonOutput },
|
|
5
|
+
...(resourceLink
|
|
6
|
+
? [
|
|
7
|
+
{
|
|
8
|
+
type: 'resource_link',
|
|
9
|
+
uri: resourceLink.uri,
|
|
10
|
+
name: resourceLink.name,
|
|
11
|
+
mimeType: resourceLink.mimeType,
|
|
12
|
+
description: resourceLink.description,
|
|
13
|
+
},
|
|
14
|
+
]
|
|
15
|
+
: []),
|
|
16
|
+
];
|
|
17
|
+
return { content, structuredContent };
|
|
18
|
+
}
|
|
19
|
+
//# sourceMappingURL=tool-response.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tool-response.js","sourceRoot":"","sources":["../../../src/tools/utils/tool-response.ts"],"names":[],"mappings":"AASA,MAAM,UAAU,qBAAqB,CACnC,iBAAoB,EACpB,SAAkB,EAClB,YAAkC;IAElC,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,CAC/B,iBAAiB,EACjB,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,EAC5B,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAC1B,CAAC;IAEF,MAAM,OAAO,GAAG;QACd,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,UAAU,EAAE;QAC3C,GAAG,CAAC,YAAY;YACd,CAAC,CAAC;gBACE;oBACE,IAAI,EAAE,eAAwB;oBAC9B,GAAG,EAAE,YAAY,CAAC,GAAG;oBACrB,IAAI,EAAE,YAAY,CAAC,IAAI;oBACvB,QAAQ,EAAE,YAAY,CAAC,QAAQ;oBAC/B,WAAW,EAAE,YAAY,CAAC,WAAW;iBACtC;aACF;YACH,CAAC,CAAC,EAAE,CAAC;KACR,CAAC;IAEF,OAAO,EAAE,OAAO,EAAE,iBAAiB,EAAE,CAAC;AACxC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"jsonl.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"jsonl.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAkE3E,wBAAgB,OAAO,CACrB,MAAM,EAAE,SAAS,iBAAiB,EAAE,EACpC,QAAQ,CAAC,EAAE,aAAa,GACvB,MAAM,CAGR"}
|
|
@@ -1,23 +1,36 @@
|
|
|
1
|
+
import { joinLines } from '../config/formatting.js';
|
|
1
2
|
import { config } from '../config/index.js';
|
|
2
3
|
import { truncateText } from '../utils/sanitizer.js';
|
|
4
|
+
const TEXT_BLOCK_TYPES = new Set([
|
|
5
|
+
'paragraph',
|
|
6
|
+
'heading',
|
|
7
|
+
'code',
|
|
8
|
+
'blockquote',
|
|
9
|
+
]);
|
|
10
|
+
function isTextBlock(block) {
|
|
11
|
+
return 'text' in block;
|
|
12
|
+
}
|
|
13
|
+
function isListBlock(block) {
|
|
14
|
+
return block.type === 'list';
|
|
15
|
+
}
|
|
16
|
+
function truncateTextBlock(block, maxLength) {
|
|
17
|
+
const truncated = truncateText(block.text, maxLength);
|
|
18
|
+
return truncated === block.text ? block : { ...block, text: truncated };
|
|
19
|
+
}
|
|
20
|
+
function truncateListBlock(block, maxLength) {
|
|
21
|
+
const truncatedItems = block.items.map((item) => truncateText(item, maxLength));
|
|
22
|
+
const hasChanges = truncatedItems.some((item, index) => item !== block.items[index]);
|
|
23
|
+
return hasChanges ? { ...block, items: truncatedItems } : block;
|
|
24
|
+
}
|
|
3
25
|
function truncateBlock(block) {
|
|
4
26
|
const maxLength = config.extraction.maxBlockLength;
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
const truncated = truncateText(block.text, maxLength);
|
|
11
|
-
return truncated === block.text ? block : { ...block, text: truncated };
|
|
12
|
-
}
|
|
13
|
-
case 'list': {
|
|
14
|
-
const truncatedItems = block.items.map((item) => truncateText(item, maxLength));
|
|
15
|
-
const hasChanges = truncatedItems.some((item, index) => item !== block.items[index]);
|
|
16
|
-
return hasChanges ? { ...block, items: truncatedItems } : block;
|
|
17
|
-
}
|
|
18
|
-
default:
|
|
19
|
-
return block;
|
|
27
|
+
if (TEXT_BLOCK_TYPES.has(block.type) && isTextBlock(block)) {
|
|
28
|
+
return truncateTextBlock(block, maxLength);
|
|
29
|
+
}
|
|
30
|
+
if (isListBlock(block)) {
|
|
31
|
+
return truncateListBlock(block, maxLength);
|
|
20
32
|
}
|
|
33
|
+
return block;
|
|
21
34
|
}
|
|
22
35
|
function serializeBlock(block) {
|
|
23
36
|
try {
|
|
@@ -28,19 +41,14 @@ function serializeBlock(block) {
|
|
|
28
41
|
}
|
|
29
42
|
}
|
|
30
43
|
export function toJsonl(blocks, metadata) {
|
|
44
|
+
const lines = collectJsonlLines(blocks, metadata);
|
|
45
|
+
return joinLines(lines);
|
|
46
|
+
}
|
|
47
|
+
function collectJsonlLines(blocks, metadata) {
|
|
31
48
|
const lines = [];
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
type: metadata.type,
|
|
36
|
-
title: metadata.title,
|
|
37
|
-
url: metadata.url,
|
|
38
|
-
};
|
|
39
|
-
lines.push(JSON.stringify(minimalMetadata));
|
|
40
|
-
}
|
|
41
|
-
catch {
|
|
42
|
-
/* skip */
|
|
43
|
-
}
|
|
49
|
+
const header = serializeMetadata(metadata);
|
|
50
|
+
if (header) {
|
|
51
|
+
lines.push(header);
|
|
44
52
|
}
|
|
45
53
|
for (const block of blocks) {
|
|
46
54
|
const serialized = serializeBlock(block);
|
|
@@ -48,6 +56,21 @@ export function toJsonl(blocks, metadata) {
|
|
|
48
56
|
lines.push(serialized);
|
|
49
57
|
}
|
|
50
58
|
}
|
|
51
|
-
return lines
|
|
59
|
+
return lines;
|
|
60
|
+
}
|
|
61
|
+
function serializeMetadata(metadata) {
|
|
62
|
+
if (!metadata)
|
|
63
|
+
return null;
|
|
64
|
+
try {
|
|
65
|
+
const minimalMetadata = {
|
|
66
|
+
type: metadata.type,
|
|
67
|
+
title: metadata.title,
|
|
68
|
+
url: metadata.url,
|
|
69
|
+
};
|
|
70
|
+
return JSON.stringify(minimalMetadata);
|
|
71
|
+
}
|
|
72
|
+
catch {
|
|
73
|
+
return null;
|
|
74
|
+
}
|
|
52
75
|
}
|
|
53
76
|
//# sourceMappingURL=jsonl.transformer.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"jsonl.transformer.js","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAG5C,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAErD,
|
|
1
|
+
{"version":3,"file":"jsonl.transformer.js","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,yBAAyB,CAAC;AACpD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAG5C,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAErD,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAAC;IAC/B,WAAW;IACX,SAAS;IACT,MAAM;IACN,YAAY;CACb,CAAC,CAAC;AAEH,SAAS,WAAW,CAClB,KAAwB;IAExB,OAAO,MAAM,IAAI,KAAK,CAAC;AACzB,CAAC;AAED,SAAS,WAAW,CAClB,KAAwB;IAExB,OAAO,KAAK,CAAC,IAAI,KAAK,MAAM,CAAC;AAC/B,CAAC;AAED,SAAS,iBAAiB,CACxB,KAAmD,EACnD,SAAiB;IAEjB,MAAM,SAAS,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;IACtD,OAAO,SAAS,KAAK,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,GAAG,KAAK,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;AAC1E,CAAC;AAED,SAAS,iBAAiB,CACxB,KAAsD,EACtD,SAAiB;IAEjB,MAAM,cAAc,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAC9C,YAAY,CAAC,IAAI,EAAE,SAAS,CAAC,CAC9B,CAAC;IACF,MAAM,UAAU,GAAG,cAAc,CAAC,IAAI,CACpC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,IAAI,KAAK,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,CAC7C,CAAC;IACF,OAAO,UAAU,CAAC,CAAC,CAAC,EAAE,GAAG,KAAK,EAAE,KAAK,EAAE,cAAc,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;AAClE,CAAC;AAED,SAAS,aAAa,CAAC,KAAwB;IAC7C,MAAM,SAAS,GAAG,MAAM,CAAC,UAAU,CAAC,cAAc,CAAC;IAEnD,IAAI,gBAAgB,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,WAAW,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3D,OAAO,iBAAiB,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;IAC7C,CAAC;IAED,IAAI,WAAW,CAAC,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,iBAAiB,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;IAC7C,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,cAAc,CAAC,KAAwB;IAC9C,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,CAAC;IAC9C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,UAAU,OAAO,CACrB,MAAoC,EACpC,QAAwB;IAExB,MAAM,KAAK,GAAG,iBAAiB,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;IAClD,OAAO,SAAS,CAAC,KAAK,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,iBAAiB,CACxB,MAAoC,EACpC,QAAwB;IAExB,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,MAAM,GAAG,iBAAiB,CAAC,QAAQ,CAAC,CAAC;IAC3C,IAAI,MAAM,EAAE,CAAC;QACX,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACrB,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,UAAU,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QACzC,IAAI,UAAU,EAAE,CAAC;YACf,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACzB,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,iBAAiB,CAAC,QAAwB;IACjD,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAC3B,IAAI,CAAC;QACH,MAAM,eAAe,GAAG;YACtB,IAAI,EAAE,QAAQ,CAAC,IAAI;YACnB,KAAK,EAAE,QAAQ,CAAC,KAAK;YACrB,GAAG,EAAE,QAAQ,CAAC,GAAG;SAClB,CAAC;QACF,OAAO,IAAI,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;IACzC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"markdown.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAkIxD,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,aAAa,GAAG,MAAM,CAa7E"}
|
|
@@ -1,101 +1,62 @@
|
|
|
1
1
|
import TurndownService from 'turndown';
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
2
|
+
import { CODE_BLOCK, FRONTMATTER_DELIMITER, joinLines, } from '../config/formatting.js';
|
|
3
|
+
import { detectLanguageFromCode } from '../services/parser.js';
|
|
4
|
+
let turndownInstance = null;
|
|
5
|
+
function getTurndown() {
|
|
6
|
+
if (turndownInstance)
|
|
7
|
+
return turndownInstance;
|
|
8
|
+
turndownInstance = createTurndownInstance();
|
|
9
|
+
return turndownInstance;
|
|
10
|
+
}
|
|
11
|
+
function createTurndownInstance() {
|
|
12
|
+
const instance = new TurndownService({
|
|
13
|
+
headingStyle: 'atx',
|
|
14
|
+
codeBlockStyle: 'fenced',
|
|
15
|
+
emDelimiter: '_',
|
|
16
|
+
bulletListMarker: '-',
|
|
17
|
+
});
|
|
18
|
+
addNoiseRule(instance);
|
|
19
|
+
addFencedCodeRule(instance);
|
|
20
|
+
return instance;
|
|
21
|
+
}
|
|
22
|
+
function addNoiseRule(instance) {
|
|
23
|
+
instance.addRule('removeNoise', {
|
|
24
|
+
filter: ['script', 'style', 'noscript', 'nav', 'footer', 'aside', 'iframe'],
|
|
25
|
+
replacement: () => '',
|
|
26
|
+
});
|
|
27
|
+
}
|
|
28
|
+
function addFencedCodeRule(instance) {
|
|
29
|
+
instance.addRule('fencedCodeBlockWithLanguage', {
|
|
30
|
+
filter: (node, options) => isFencedCodeBlock(node, options),
|
|
31
|
+
replacement: (_content, node) => formatFencedCodeBlock(node),
|
|
32
|
+
});
|
|
25
33
|
}
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
/^[A-Z]$/,
|
|
29
|
-
/^Panel\s+[A-Z]$/i,
|
|
30
|
-
// Empty structural elements that survive HTML->Markdown conversion
|
|
31
|
-
/^[•·→←↑↓►▼▲◄▶◀■□●○★☆✓✗✔✘×]+$/,
|
|
32
|
-
/^[,;:\-–—]+$/,
|
|
33
|
-
/^\[\d+\]$/,
|
|
34
|
-
/^\(\d+\)$/,
|
|
35
|
-
];
|
|
36
|
-
const MULTIPLE_NEWLINES = /\n{3,}/g;
|
|
37
|
-
function isNoiseLine(line) {
|
|
38
|
-
const trimmed = line.trim();
|
|
39
|
-
if (!trimmed)
|
|
34
|
+
function isFencedCodeBlock(node, options) {
|
|
35
|
+
if (options.codeBlockStyle !== 'fenced')
|
|
40
36
|
return false;
|
|
41
|
-
|
|
42
|
-
if (markdownPrefixes.some((prefix) => trimmed.startsWith(prefix))) {
|
|
37
|
+
if (node.nodeName !== 'PRE')
|
|
43
38
|
return false;
|
|
44
|
-
}
|
|
45
|
-
|
|
39
|
+
const { firstChild } = node;
|
|
40
|
+
if (!firstChild)
|
|
41
|
+
return false;
|
|
42
|
+
return firstChild.nodeName === 'CODE';
|
|
46
43
|
}
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
const
|
|
50
|
-
const
|
|
51
|
-
|
|
52
|
-
for (const line of lines) {
|
|
53
|
-
if (line.trim().startsWith(CODE_FENCE)) {
|
|
54
|
-
insideCodeBlock = !insideCodeBlock;
|
|
55
|
-
cleanedLines.push(line);
|
|
56
|
-
continue;
|
|
57
|
-
}
|
|
58
|
-
if (insideCodeBlock) {
|
|
59
|
-
cleanedLines.push(line);
|
|
60
|
-
continue;
|
|
61
|
-
}
|
|
62
|
-
if (!isNoiseLine(line)) {
|
|
63
|
-
cleanedLines.push(line);
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
return cleanedLines.join('\n');
|
|
44
|
+
function formatFencedCodeBlock(node) {
|
|
45
|
+
const codeNode = node.firstChild;
|
|
46
|
+
const code = codeNode.textContent || '';
|
|
47
|
+
const language = resolveCodeLanguage(codeNode, code);
|
|
48
|
+
return CODE_BLOCK.format(code, language);
|
|
67
49
|
}
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
}
|
|
78
|
-
turndown.addRule('fencedCodeBlockWithLanguage', {
|
|
79
|
-
filter: (node, options) => {
|
|
80
|
-
return (options.codeBlockStyle === 'fenced' &&
|
|
81
|
-
node.nodeName === 'PRE' &&
|
|
82
|
-
node.firstChild !== null &&
|
|
83
|
-
node.firstChild.nodeName === 'CODE');
|
|
84
|
-
},
|
|
85
|
-
replacement: (_content, node) => {
|
|
86
|
-
const codeNode = node.firstChild;
|
|
87
|
-
const code = codeNode.textContent || '';
|
|
88
|
-
const className = codeNode.getAttribute('class') ?? '';
|
|
89
|
-
const dataLang = codeNode.getAttribute('data-language') ?? '';
|
|
90
|
-
const languageMatch = /language-(\w+)/.exec(className) ??
|
|
91
|
-
/lang-(\w+)/.exec(className) ??
|
|
92
|
-
/highlight-(\w+)/.exec(className) ??
|
|
93
|
-
/^(\w+)$/.exec(dataLang);
|
|
94
|
-
const language = languageMatch?.[1] ?? detectLanguageFromCode(code) ?? '';
|
|
95
|
-
return `\n\n\`\`\`${language}\n${code.replace(/\n$/, '')}\n\`\`\`\n\n`;
|
|
96
|
-
},
|
|
97
|
-
});
|
|
98
|
-
const YAML_SPECIAL_CHARS = /[:[\]{}"\n\r\t'|>&*!?,#]/;
|
|
50
|
+
function resolveCodeLanguage(codeNode, code) {
|
|
51
|
+
const className = codeNode.getAttribute('class') ?? '';
|
|
52
|
+
const dataLang = codeNode.getAttribute('data-language') ?? '';
|
|
53
|
+
const languageMatch = /language-(\w+)/.exec(className) ??
|
|
54
|
+
/lang-(\w+)/.exec(className) ??
|
|
55
|
+
/highlight-(\w+)/.exec(className) ??
|
|
56
|
+
/^(\w+)$/.exec(dataLang);
|
|
57
|
+
return languageMatch?.[1] ?? detectLanguageFromCode(code) ?? '';
|
|
58
|
+
}
|
|
59
|
+
const YAML_SPECIAL_CHARS = /[:[\]{}"\r\t'|>&*!?,#]|\n/;
|
|
99
60
|
const YAML_NUMERIC = /^[\d.]+$/;
|
|
100
61
|
const YAML_RESERVED_WORDS = /^(true|false|null|yes|no|on|off)$/i;
|
|
101
62
|
const ESCAPE_PATTERNS = {
|
|
@@ -104,14 +65,18 @@ const ESCAPE_PATTERNS = {
|
|
|
104
65
|
newline: /\n/g,
|
|
105
66
|
tab: /\t/g,
|
|
106
67
|
};
|
|
68
|
+
function needsYamlQuotes(value) {
|
|
69
|
+
const checks = [
|
|
70
|
+
(input) => YAML_SPECIAL_CHARS.test(input),
|
|
71
|
+
(input) => input.startsWith(' ') || input.endsWith(' '),
|
|
72
|
+
(input) => input === '',
|
|
73
|
+
(input) => YAML_NUMERIC.test(input),
|
|
74
|
+
(input) => YAML_RESERVED_WORDS.test(input),
|
|
75
|
+
];
|
|
76
|
+
return checks.some((check) => check(value));
|
|
77
|
+
}
|
|
107
78
|
function escapeYamlValue(value) {
|
|
108
|
-
|
|
109
|
-
value.startsWith(' ') ||
|
|
110
|
-
value.endsWith(' ') ||
|
|
111
|
-
value === '' ||
|
|
112
|
-
YAML_NUMERIC.test(value) ||
|
|
113
|
-
YAML_RESERVED_WORDS.test(value);
|
|
114
|
-
if (!requiresQuoting) {
|
|
79
|
+
if (!needsYamlQuotes(value)) {
|
|
115
80
|
return value;
|
|
116
81
|
}
|
|
117
82
|
const escaped = value
|
|
@@ -122,30 +87,36 @@ function escapeYamlValue(value) {
|
|
|
122
87
|
return `"${escaped}"`;
|
|
123
88
|
}
|
|
124
89
|
function createFrontmatter(metadata) {
|
|
125
|
-
const lines = [
|
|
90
|
+
const lines = [FRONTMATTER_DELIMITER];
|
|
126
91
|
if (metadata.title) {
|
|
127
92
|
lines.push(`title: ${escapeYamlValue(metadata.title)}`);
|
|
128
93
|
}
|
|
129
94
|
if (metadata.url) {
|
|
130
95
|
lines.push(`source: ${escapeYamlValue(metadata.url)}`);
|
|
131
96
|
}
|
|
132
|
-
lines.push(
|
|
133
|
-
return lines
|
|
97
|
+
lines.push(FRONTMATTER_DELIMITER);
|
|
98
|
+
return joinLines(lines);
|
|
99
|
+
}
|
|
100
|
+
function convertHtmlToMarkdown(html) {
|
|
101
|
+
return getTurndown().turndown(html).trim();
|
|
102
|
+
}
|
|
103
|
+
function buildFrontmatterBlock(metadata) {
|
|
104
|
+
return metadata ? createFrontmatter(metadata) : '';
|
|
134
105
|
}
|
|
135
106
|
export function htmlToMarkdown(html, metadata) {
|
|
136
|
-
const frontmatter =
|
|
137
|
-
if (!html
|
|
138
|
-
return frontmatter
|
|
107
|
+
const frontmatter = buildFrontmatterBlock(metadata);
|
|
108
|
+
if (!isValidHtmlInput(html)) {
|
|
109
|
+
return frontmatter;
|
|
139
110
|
}
|
|
140
111
|
try {
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
content = cleanMarkdownContent(content);
|
|
144
|
-
content = content.replace(MULTIPLE_NEWLINES, '\n\n').trim();
|
|
145
|
-
return frontmatter ? `${frontmatter}\n\n${content}` : content;
|
|
112
|
+
const content = convertHtmlToMarkdown(html);
|
|
113
|
+
return frontmatter ? `${frontmatter}\n${content}` : content;
|
|
146
114
|
}
|
|
147
115
|
catch {
|
|
148
|
-
return frontmatter
|
|
116
|
+
return frontmatter;
|
|
149
117
|
}
|
|
150
118
|
}
|
|
119
|
+
function isValidHtmlInput(html) {
|
|
120
|
+
return Boolean(html && typeof html === 'string');
|
|
121
|
+
}
|
|
151
122
|
//# sourceMappingURL=markdown.transformer.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown.transformer.js","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;
|
|
1
|
+
{"version":3,"file":"markdown.transformer.js","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;AAEvC,OAAO,EACL,UAAU,EACV,qBAAqB,EACrB,SAAS,GACV,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EAAE,sBAAsB,EAAE,MAAM,uBAAuB,CAAC;AAE/D,IAAI,gBAAgB,GAA2B,IAAI,CAAC;AAEpD,SAAS,WAAW;IAClB,IAAI,gBAAgB;QAAE,OAAO,gBAAgB,CAAC;IAC9C,gBAAgB,GAAG,sBAAsB,EAAE,CAAC;IAC5C,OAAO,gBAAgB,CAAC;AAC1B,CAAC;AAED,SAAS,sBAAsB;IAC7B,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC;QACnC,YAAY,EAAE,KAAK;QACnB,cAAc,EAAE,QAAQ;QACxB,WAAW,EAAE,GAAG;QAChB,gBAAgB,EAAE,GAAG;KACtB,CAAC,CAAC;IAEH,YAAY,CAAC,QAAQ,CAAC,CAAC;IACvB,iBAAiB,CAAC,QAAQ,CAAC,CAAC;IAE5B,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,YAAY,CAAC,QAAyB;IAC7C,QAAQ,CAAC,OAAO,CAAC,aAAa,EAAE;QAC9B,MAAM,EAAE,CAAC,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,CAAC;QAC3E,WAAW,EAAE,GAAG,EAAE,CAAC,EAAE;KACtB,CAAC,CAAC;AACL,CAAC;AAED,SAAS,iBAAiB,CAAC,QAAyB;IAClD,QAAQ,CAAC,OAAO,CAAC,6BAA6B,EAAE;QAC9C,MAAM,EAAE,CAAC,IAAI,EAAE,OAAO,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,EAAE,OAAO,CAAC;QAC3D,WAAW,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE,CAAC,qBAAqB,CAAC,IAAI,CAAC;KAC7D,CAAC,CAAC;AACL,CAAC;AAED,SAAS,iBAAiB,CACxB,IAA0B,EAC1B,OAAgC;IAEhC,IAAI,OAAO,CAAC,cAAc,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IACtD,IAAI,IAAI,CAAC,QAAQ,KAAK,KAAK;QAAE,OAAO,KAAK,CAAC;IAC1C,MAAM,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC;IAC5B,IAAI,CAAC,UAAU;QAAE,OAAO,KAAK,CAAC;IAC9B,OAAO,UAAU,CAAC,QAAQ,KAAK,MAAM,CAAC;AACxC,CAAC;AAED,SAAS,qBAAqB,CAAC,IAA0B;IACvD,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAyB,CAAC;IAChD,MAAM,IAAI,GAAG,QAAQ,CAAC,WAAW,IAAI,EAAE,CAAC;IACxC,MAAM,QAAQ,GAAG,mBAAmB,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;IACrD,OAAO,UAAU,CAAC,MAAM,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;AAC3C,CAAC;AAED,SAAS,mBAAmB,CAAC,QAAqB,EAAE,IAAY;IAC9D,MAAM,SAAS,GAAG,QAAQ,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;IACvD,MAAM,QAAQ,GAAG,QAAQ,CAAC,YAAY,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;IAE9D,MAAM,aAAa,GACjB,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC;QAChC,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC;QAC5B,iBAAiB,CAAC,IAAI,CAAC,SAAS,CAAC;QACjC,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAE3B,OAAO,aAAa,EAAE,CAAC,CAAC,CAAC,IAAI,sBAAsB,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;AAClE,CAAC;AAED,MAAM,kBAAkB,GAAG,2BAA2B,CAAC;AACvD,MAAM,YAAY,GAAG,UAAU,CAAC;AAChC,MAAM,mBAAmB,GAAG,oCAAoC,CAAC;AAEjE,MAAM,eAAe,GAAG;IACtB,SAAS,EAAE,KAAK;IAChB,KAAK,EAAE,IAAI;IACX,OAAO,EAAE,KAAK;IACd,GAAG,EAAE,KAAK;CACF,CAAC;AAEX,SAAS,eAAe,CAAC,KAAa;IACpC,MAAM,MAAM,GAAG;QACb,CAAC,KAAa,EAAE,EAAE,CAAC,kBAAkB,CAAC,IAAI,CAAC,KAAK,CAAC;QACjD,CAAC,KAAa,EAAE,EAAE,CAAC,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC;QAC/D,CAAC,KAAa,EAAE,EAAE,CAAC,KAAK,KAAK,EAAE;QAC/B,CAAC,KAAa,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC;QAC3C,CAAC,KAAa,EAAE,EAAE,CAAC,mBAAmB,CAAC,IAAI,CAAC,KAAK,CAAC;KACnD,CAAC;IAEF,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC;AAC9C,CAAC;AAED,SAAS,eAAe,CAAC,KAAa;IACpC,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC,EAAE,CAAC;QAC5B,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,OAAO,GAAG,KAAK;SAClB,OAAO,CAAC,eAAe,CAAC,SAAS,EAAE,MAAM,CAAC;SAC1C,OAAO,CAAC,eAAe,CAAC,KAAK,EAAE,KAAK,CAAC;SACrC,OAAO,CAAC,eAAe,CAAC,OAAO,EAAE,KAAK,CAAC;SACvC,OAAO,CAAC,eAAe,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IAEvC,OAAO,IAAI,OAAO,GAAG,CAAC;AACxB,CAAC;AAED,SAAS,iBAAiB,CAAC,QAAuB;IAChD,MAAM,KAAK,GAAa,CAAC,qBAAqB,CAAC,CAAC;IAEhD,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;QACnB,KAAK,CAAC,IAAI,CAAC,UAAU,eAAe,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IAC1D,CAAC;IACD,IAAI,QAAQ,CAAC,GAAG,EAAE,CAAC;QACjB,KAAK,CAAC,IAAI,CAAC,WAAW,eAAe,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACzD,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;IAClC,OAAO,SAAS,CAAC,KAAK,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,qBAAqB,CAAC,IAAY;IACzC,OAAO,WAAW,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;AAC7C,CAAC;AAED,SAAS,qBAAqB,CAAC,QAAwB;IACrD,OAAO,QAAQ,CAAC,CAAC,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;AACrD,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,QAAwB;IACnE,MAAM,WAAW,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IAEpD,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,EAAE,CAAC;QAC5B,OAAO,WAAW,CAAC;IACrB,CAAC;IAED,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,qBAAqB,CAAC,IAAI,CAAC,CAAC;QAC5C,OAAO,WAAW,CAAC,CAAC,CAAC,GAAG,WAAW,KAAK,OAAO,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;IAC9D,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,WAAW,CAAC;IACrB,CAAC;AACH,CAAC;AAED,SAAS,gBAAgB,CAAC,IAAY;IACpC,OAAO,OAAO,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,CAAC,CAAC;AACnD,CAAC"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export interface HeaderNormalizeOptions {
|
|
2
|
+
readonly sanitizeValue?: (value: string) => string;
|
|
3
|
+
}
|
|
4
|
+
export declare function normalizeHeaders(headers?: Record<string, string>, options?: HeaderNormalizeOptions): Record<string, string> | undefined;
|
|
5
|
+
//# sourceMappingURL=header-normalizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"header-normalizer.d.ts","sourceRoot":"","sources":["../../src/utils/header-normalizer.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,sBAAsB;IACrC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,MAAM,CAAC;CACpD;AAED,wBAAgB,gBAAgB,CAC9B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EAChC,OAAO,CAAC,EAAE,sBAAsB,GAC/B,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,SAAS,CAwBpC"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { config } from '../config/index.js';
|
|
2
|
+
export function normalizeHeaders(headers, options) {
|
|
3
|
+
if (!headers || Object.keys(headers).length === 0) {
|
|
4
|
+
return undefined;
|
|
5
|
+
}
|
|
6
|
+
const sanitizeValue = options?.sanitizeValue ?? ((value) => value);
|
|
7
|
+
const { blockedHeaders } = config.security;
|
|
8
|
+
const normalized = new Headers();
|
|
9
|
+
for (const [key, value] of Object.entries(headers)) {
|
|
10
|
+
if (blockedHeaders.has(key.toLowerCase()))
|
|
11
|
+
continue;
|
|
12
|
+
try {
|
|
13
|
+
normalized.set(key, sanitizeValue(value));
|
|
14
|
+
}
|
|
15
|
+
catch {
|
|
16
|
+
continue;
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
const entries = Array.from(normalized.entries());
|
|
20
|
+
if (entries.length === 0) {
|
|
21
|
+
return undefined;
|
|
22
|
+
}
|
|
23
|
+
return Object.fromEntries(entries);
|
|
24
|
+
}
|
|
25
|
+
//# sourceMappingURL=header-normalizer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"header-normalizer.js","sourceRoot":"","sources":["../../src/utils/header-normalizer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAM5C,MAAM,UAAU,gBAAgB,CAC9B,OAAgC,EAChC,OAAgC;IAEhC,IAAI,CAAC,OAAO,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAClD,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,MAAM,aAAa,GAAG,OAAO,EAAE,aAAa,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC;IACnE,MAAM,EAAE,cAAc,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC;IAC3C,MAAM,UAAU,GAAG,IAAI,OAAO,EAAE,CAAC;IAEjC,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;QACnD,IAAI,cAAc,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC;YAAE,SAAS;QACpD,IAAI,CAAC;YACH,UAAU,CAAC,GAAG,CAAC,GAAG,EAAE,aAAa,CAAC,KAAK,CAAC,CAAC,CAAC;QAC5C,CAAC;QAAC,MAAM,CAAC;YACP,SAAS;QACX,CAAC;IACH,CAAC;IAED,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,CAAC,CAAC;IACjD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,OAAO,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;AACrC,CAAC"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { ToolErrorResponse } from '../config/types.js';
|
|
2
|
+
export declare function normalizeToolErrorCode(code: string): string;
|
|
2
3
|
export declare function createToolErrorResponse(message: string, url: string, code: string): ToolErrorResponse;
|
|
3
4
|
export declare function handleToolError(error: unknown, url: string, fallbackMessage?: string): ToolErrorResponse;
|
|
4
5
|
//# sourceMappingURL=tool-error-handler.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tool-error-handler.d.ts","sourceRoot":"","sources":["../../src/utils/tool-error-handler.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"tool-error-handler.d.ts","sourceRoot":"","sources":["../../src/utils/tool-error-handler.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAwB5D,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAK3D;AAED,wBAAgB,uBAAuB,CACrC,OAAO,EAAE,MAAM,EACf,GAAG,EAAE,MAAM,EACX,IAAI,EAAE,MAAM,GACX,iBAAiB,CAanB;AAgBD,wBAAgB,eAAe,CAC7B,KAAK,EAAE,OAAO,EACd,GAAG,EAAE,MAAM,EACX,eAAe,SAAqB,GACnC,iBAAiB,CAgBnB"}
|
|
@@ -1,8 +1,36 @@
|
|
|
1
|
+
import { ErrorCode } from '@modelcontextprotocol/sdk/types.js';
|
|
1
2
|
import { FetchError } from '../errors/app-error.js';
|
|
2
3
|
const IS_DEVELOPMENT_WITH_STACK_TRACES = process.env.NODE_ENV === 'development' &&
|
|
3
4
|
process.env.EXPOSE_STACK_TRACES === 'true';
|
|
5
|
+
const MCP_ERROR_CODE_MAP = {
|
|
6
|
+
VALIDATION_ERROR: String(ErrorCode.InvalidParams),
|
|
7
|
+
INVALID_PARAMS: String(ErrorCode.InvalidParams),
|
|
8
|
+
INTERNAL_ERROR: String(ErrorCode.InternalError),
|
|
9
|
+
FETCH_ERROR: String(ErrorCode.InternalError),
|
|
10
|
+
BATCH_ERROR: String(ErrorCode.InternalError),
|
|
11
|
+
PROMISE_REJECTED: String(ErrorCode.InternalError),
|
|
12
|
+
UNKNOWN_ERROR: String(ErrorCode.InternalError),
|
|
13
|
+
};
|
|
14
|
+
const NUMERIC_ERROR_CODE = /^-?\d+$/;
|
|
15
|
+
function isNumericErrorCode(code) {
|
|
16
|
+
return NUMERIC_ERROR_CODE.test(code);
|
|
17
|
+
}
|
|
18
|
+
export function normalizeToolErrorCode(code) {
|
|
19
|
+
if (!code)
|
|
20
|
+
return String(ErrorCode.InternalError);
|
|
21
|
+
if (isNumericErrorCode(code))
|
|
22
|
+
return code;
|
|
23
|
+
if (code.startsWith('HTTP_'))
|
|
24
|
+
return String(ErrorCode.InternalError);
|
|
25
|
+
return MCP_ERROR_CODE_MAP[code] ?? code;
|
|
26
|
+
}
|
|
4
27
|
export function createToolErrorResponse(message, url, code) {
|
|
5
|
-
const structuredContent = {
|
|
28
|
+
const structuredContent = {
|
|
29
|
+
error: message,
|
|
30
|
+
url,
|
|
31
|
+
errorCode: normalizeToolErrorCode(code),
|
|
32
|
+
errorType: code,
|
|
33
|
+
};
|
|
6
34
|
return {
|
|
7
35
|
content: [{ type: 'text', text: JSON.stringify(structuredContent) }],
|
|
8
36
|
structuredContent,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tool-error-handler.js","sourceRoot":"","sources":["../../src/utils/tool-error-handler.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"tool-error-handler.js","sourceRoot":"","sources":["../../src/utils/tool-error-handler.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,oCAAoC,CAAC;AAI/D,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAEpD,MAAM,gCAAgC,GACpC,OAAO,CAAC,GAAG,CAAC,QAAQ,KAAK,aAAa;IACtC,OAAO,CAAC,GAAG,CAAC,mBAAmB,KAAK,MAAM,CAAC;AAE7C,MAAM,kBAAkB,GAA2B;IACjD,gBAAgB,EAAE,MAAM,CAAC,SAAS,CAAC,aAAa,CAAC;IACjD,cAAc,EAAE,MAAM,CAAC,SAAS,CAAC,aAAa,CAAC;IAC/C,cAAc,EAAE,MAAM,CAAC,SAAS,CAAC,aAAa,CAAC;IAC/C,WAAW,EAAE,MAAM,CAAC,SAAS,CAAC,aAAa,CAAC;IAC5C,WAAW,EAAE,MAAM,CAAC,SAAS,CAAC,aAAa,CAAC;IAC5C,gBAAgB,EAAE,MAAM,CAAC,SAAS,CAAC,aAAa,CAAC;IACjD,aAAa,EAAE,MAAM,CAAC,SAAS,CAAC,aAAa,CAAC;CAC/C,CAAC;AAEF,MAAM,kBAAkB,GAAG,SAAS,CAAC;AAErC,SAAS,kBAAkB,CAAC,IAAY;IACtC,OAAO,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACvC,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,IAAY;IACjD,IAAI,CAAC,IAAI;QAAE,OAAO,MAAM,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;IAClD,IAAI,kBAAkB,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAC1C,IAAI,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC;QAAE,OAAO,MAAM,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;IACrE,OAAO,kBAAkB,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC;AAC1C,CAAC;AAED,MAAM,UAAU,uBAAuB,CACrC,OAAe,EACf,GAAW,EACX,IAAY;IAEZ,MAAM,iBAAiB,GAAG;QACxB,KAAK,EAAE,OAAO;QACd,GAAG;QACH,SAAS,EAAE,sBAAsB,CAAC,IAAI,CAAC;QACvC,SAAS,EAAE,IAAI;KAChB,CAAC;IAEF,OAAO;QACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,iBAAiB,CAAC,EAAE,CAAC;QACpE,iBAAiB;QACjB,OAAO,EAAE,IAAI;KACd,CAAC;AACJ,CAAC;AAED,SAAS,kBAAkB,CACzB,WAAmB,EACnB,KAAY,EACZ,QAAiB;IAEjB,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,GAAG,QAAQ,KAAK,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC;IAE3E,IAAI,gCAAgC,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;QACpD,OAAO,GAAG,OAAO,KAAK,KAAK,CAAC,KAAK,EAAE,CAAC;IACtC,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,KAAc,EACd,GAAW,EACX,eAAe,GAAG,kBAAkB;IAEpC,IAAI,KAAK,YAAY,UAAU,EAAE,CAAC;QAChC,MAAM,OAAO,GAAG,kBAAkB,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QACzD,OAAO,uBAAuB,CAAC,OAAO,EAAE,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;IAC3D,CAAC;IAED,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;QAC3B,MAAM,OAAO,GAAG,kBAAkB,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,EAAE,eAAe,CAAC,CAAC;QAC1E,OAAO,uBAAuB,CAAC,OAAO,EAAE,GAAG,EAAE,eAAe,CAAC,CAAC;IAChE,CAAC;IAED,OAAO,uBAAuB,CAC5B,GAAG,eAAe,iBAAiB,EACnC,GAAG,EACH,eAAe,CAChB,CAAC;AACJ,CAAC"}
|
|
@@ -1,6 +1,3 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Check if an IP address is in a blocked private range
|
|
3
|
-
*/
|
|
4
1
|
export declare function isBlockedIp(ip: string): boolean;
|
|
5
2
|
export declare function validateAndNormalizeUrl(urlString: string): string;
|
|
6
3
|
export declare function isInternalUrl(url: string, baseUrl: string): boolean;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"url-validator.d.ts","sourceRoot":"","sources":["../../src/utils/url-validator.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"url-validator.d.ts","sourceRoot":"","sources":["../../src/utils/url-validator.ts"],"names":[],"mappings":"AA6CA,wBAAgB,WAAW,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAS/C;AAqFD,wBAAgB,uBAAuB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CAkBjE;AAED,wBAAgB,aAAa,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAOnE"}
|