@j0hanz/superfetch 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +327 -0
- package/dist/config/index.d.ts +30 -0
- package/dist/config/index.d.ts.map +1 -0
- package/dist/config/index.js +42 -0
- package/dist/config/index.js.map +1 -0
- package/dist/errors/app-error.d.ts +71 -0
- package/dist/errors/app-error.d.ts.map +1 -0
- package/dist/errors/app-error.js +103 -0
- package/dist/errors/app-error.js.map +1 -0
- package/dist/errors/index.d.ts +2 -0
- package/dist/errors/index.d.ts.map +1 -0
- package/dist/errors/index.js +2 -0
- package/dist/errors/index.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +179 -0
- package/dist/index.js.map +1 -0
- package/dist/middleware/error-handler.d.ts +7 -0
- package/dist/middleware/error-handler.d.ts.map +1 -0
- package/dist/middleware/error-handler.js +37 -0
- package/dist/middleware/error-handler.js.map +1 -0
- package/dist/middleware/rate-limiter.d.ts +33 -0
- package/dist/middleware/rate-limiter.d.ts.map +1 -0
- package/dist/middleware/rate-limiter.js +100 -0
- package/dist/middleware/rate-limiter.js.map +1 -0
- package/dist/prompts/index.d.ts +6 -0
- package/dist/prompts/index.d.ts.map +1 -0
- package/dist/prompts/index.js +81 -0
- package/dist/prompts/index.js.map +1 -0
- package/dist/resources/index.d.ts +6 -0
- package/dist/resources/index.d.ts.map +1 -0
- package/dist/resources/index.js +44 -0
- package/dist/resources/index.js.map +1 -0
- package/dist/server.d.ts +8 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/server.js +39 -0
- package/dist/server.js.map +1 -0
- package/dist/services/cache.d.ts +16 -0
- package/dist/services/cache.d.ts.map +1 -0
- package/dist/services/cache.js +63 -0
- package/dist/services/cache.js.map +1 -0
- package/dist/services/cache.service.d.ts +52 -0
- package/dist/services/cache.service.d.ts.map +1 -0
- package/dist/services/cache.service.js +113 -0
- package/dist/services/cache.service.js.map +1 -0
- package/dist/services/extractor.d.ts +32 -0
- package/dist/services/extractor.d.ts.map +1 -0
- package/dist/services/extractor.js +97 -0
- package/dist/services/extractor.js.map +1 -0
- package/dist/services/extractor.service.d.ts +18 -0
- package/dist/services/extractor.service.d.ts.map +1 -0
- package/dist/services/extractor.service.js +75 -0
- package/dist/services/extractor.service.js.map +1 -0
- package/dist/services/fetcher.d.ts +9 -0
- package/dist/services/fetcher.d.ts.map +1 -0
- package/dist/services/fetcher.js +100 -0
- package/dist/services/fetcher.js.map +1 -0
- package/dist/services/fetcher.service.d.ts +18 -0
- package/dist/services/fetcher.service.d.ts.map +1 -0
- package/dist/services/fetcher.service.js +122 -0
- package/dist/services/fetcher.service.js.map +1 -0
- package/dist/services/logger.d.ts +5 -0
- package/dist/services/logger.d.ts.map +1 -0
- package/dist/services/logger.js +48 -0
- package/dist/services/logger.js.map +1 -0
- package/dist/services/logger.service.d.ts +5 -0
- package/dist/services/logger.service.d.ts.map +1 -0
- package/dist/services/logger.service.js +57 -0
- package/dist/services/logger.service.js.map +1 -0
- package/dist/services/parser.d.ts +6 -0
- package/dist/services/parser.d.ts.map +1 -0
- package/dist/services/parser.js +152 -0
- package/dist/services/parser.js.map +1 -0
- package/dist/services/parser.service.d.ts +42 -0
- package/dist/services/parser.service.d.ts.map +1 -0
- package/dist/services/parser.service.js +209 -0
- package/dist/services/parser.service.js.map +1 -0
- package/dist/tools/handlers/fetch-links.tool.d.ts +20 -0
- package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -0
- package/dist/tools/handlers/fetch-links.tool.js +91 -0
- package/dist/tools/handlers/fetch-links.tool.js.map +1 -0
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +17 -0
- package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -0
- package/dist/tools/handlers/fetch-markdown.tool.js +99 -0
- package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -0
- package/dist/tools/handlers/fetch-url.tool.d.ts +17 -0
- package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -0
- package/dist/tools/handlers/fetch-url.tool.js +103 -0
- package/dist/tools/handlers/fetch-url.tool.js.map +1 -0
- package/dist/tools/index.d.ts +7 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/index.js +83 -0
- package/dist/tools/index.js.map +1 -0
- package/dist/transformers/jsonl.transformer.d.ts +4 -0
- package/dist/transformers/jsonl.transformer.d.ts.map +1 -0
- package/dist/transformers/jsonl.transformer.js +42 -0
- package/dist/transformers/jsonl.transformer.js.map +1 -0
- package/dist/transformers/markdown.transformer.d.ts +4 -0
- package/dist/transformers/markdown.transformer.d.ts.map +1 -0
- package/dist/transformers/markdown.transformer.js +104 -0
- package/dist/transformers/markdown.transformer.js.map +1 -0
- package/dist/types/content.types.d.ts +63 -0
- package/dist/types/content.types.d.ts.map +1 -0
- package/dist/types/content.types.js +2 -0
- package/dist/types/content.types.js.map +1 -0
- package/dist/types/index.d.ts +3 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +3 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/schemas.d.ts +22 -0
- package/dist/types/schemas.d.ts.map +1 -0
- package/dist/types/schemas.js +5 -0
- package/dist/types/schemas.js.map +1 -0
- package/dist/utils/sanitizer.d.ts +9 -0
- package/dist/utils/sanitizer.d.ts.map +1 -0
- package/dist/utils/sanitizer.js +19 -0
- package/dist/utils/sanitizer.js.map +1 -0
- package/dist/utils/url-validator.d.ts +10 -0
- package/dist/utils/url-validator.d.ts.map +1 -0
- package/dist/utils/url-validator.js +69 -0
- package/dist/utils/url-validator.js.map +1 -0
- package/package.json +80 -0
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import { fetchUrlToolHandler, FETCH_URL_TOOL_NAME, FETCH_URL_TOOL_DESCRIPTION, } from './handlers/fetch-url.tool.js';
|
|
3
|
+
import { fetchLinksToolHandler, FETCH_LINKS_TOOL_NAME, FETCH_LINKS_TOOL_DESCRIPTION, } from './handlers/fetch-links.tool.js';
|
|
4
|
+
import { fetchMarkdownToolHandler, FETCH_MARKDOWN_TOOL_NAME, FETCH_MARKDOWN_TOOL_DESCRIPTION, } from './handlers/fetch-markdown.tool.js';
|
|
5
|
+
// Zod schemas for runtime validation - single source of truth
|
|
6
|
+
const FetchUrlInputSchema = {
|
|
7
|
+
url: z.string().min(1).describe('The URL to fetch'),
|
|
8
|
+
extractMainContent: z
|
|
9
|
+
.boolean()
|
|
10
|
+
.optional()
|
|
11
|
+
.default(true)
|
|
12
|
+
.describe('Use Readability to extract main article content'),
|
|
13
|
+
includeMetadata: z
|
|
14
|
+
.boolean()
|
|
15
|
+
.optional()
|
|
16
|
+
.default(true)
|
|
17
|
+
.describe('Include page metadata (title, description, etc.)'),
|
|
18
|
+
maxContentLength: z
|
|
19
|
+
.number()
|
|
20
|
+
.positive()
|
|
21
|
+
.optional()
|
|
22
|
+
.describe('Maximum content length in characters'),
|
|
23
|
+
format: z
|
|
24
|
+
.enum(['jsonl', 'markdown'])
|
|
25
|
+
.optional()
|
|
26
|
+
.default('jsonl')
|
|
27
|
+
.describe('Output format'),
|
|
28
|
+
customHeaders: z
|
|
29
|
+
.record(z.string())
|
|
30
|
+
.optional()
|
|
31
|
+
.describe('Custom HTTP headers for the request'),
|
|
32
|
+
};
|
|
33
|
+
const FetchLinksInputSchema = {
|
|
34
|
+
url: z.string().min(1).describe('The URL to extract links from'),
|
|
35
|
+
includeExternal: z
|
|
36
|
+
.boolean()
|
|
37
|
+
.optional()
|
|
38
|
+
.default(true)
|
|
39
|
+
.describe('Include external links'),
|
|
40
|
+
includeInternal: z
|
|
41
|
+
.boolean()
|
|
42
|
+
.optional()
|
|
43
|
+
.default(true)
|
|
44
|
+
.describe('Include internal links'),
|
|
45
|
+
};
|
|
46
|
+
const FetchMarkdownInputSchema = {
|
|
47
|
+
url: z.string().min(1).describe('The URL to fetch'),
|
|
48
|
+
extractMainContent: z
|
|
49
|
+
.boolean()
|
|
50
|
+
.optional()
|
|
51
|
+
.default(true)
|
|
52
|
+
.describe('Extract main article content using Readability'),
|
|
53
|
+
includeMetadata: z
|
|
54
|
+
.boolean()
|
|
55
|
+
.optional()
|
|
56
|
+
.default(true)
|
|
57
|
+
.describe('Include YAML frontmatter metadata'),
|
|
58
|
+
};
|
|
59
|
+
/**
|
|
60
|
+
* Registers all tools with the MCP server using the modern McpServer API
|
|
61
|
+
* Tools are registered with Zod schemas for automatic validation
|
|
62
|
+
*/
|
|
63
|
+
export function registerTools(server) {
|
|
64
|
+
// Register fetch-url tool
|
|
65
|
+
server.registerTool(FETCH_URL_TOOL_NAME, {
|
|
66
|
+
title: 'Fetch URL',
|
|
67
|
+
description: FETCH_URL_TOOL_DESCRIPTION,
|
|
68
|
+
inputSchema: FetchUrlInputSchema,
|
|
69
|
+
}, async (args) => fetchUrlToolHandler(args));
|
|
70
|
+
// Register fetch-links tool
|
|
71
|
+
server.registerTool(FETCH_LINKS_TOOL_NAME, {
|
|
72
|
+
title: 'Fetch Links',
|
|
73
|
+
description: FETCH_LINKS_TOOL_DESCRIPTION,
|
|
74
|
+
inputSchema: FetchLinksInputSchema,
|
|
75
|
+
}, async (args) => fetchLinksToolHandler(args));
|
|
76
|
+
// Register fetch-markdown tool
|
|
77
|
+
server.registerTool(FETCH_MARKDOWN_TOOL_NAME, {
|
|
78
|
+
title: 'Fetch Markdown',
|
|
79
|
+
description: FETCH_MARKDOWN_TOOL_DESCRIPTION,
|
|
80
|
+
inputSchema: FetchMarkdownInputSchema,
|
|
81
|
+
}, async (args) => fetchMarkdownToolHandler(args));
|
|
82
|
+
}
|
|
83
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/tools/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EACL,mBAAmB,EACnB,mBAAmB,EACnB,0BAA0B,GAC3B,MAAM,8BAA8B,CAAC;AACtC,OAAO,EACL,qBAAqB,EACrB,qBAAqB,EACrB,4BAA4B,GAC7B,MAAM,gCAAgC,CAAC;AACxC,OAAO,EACL,wBAAwB,EACxB,wBAAwB,EACxB,+BAA+B,GAChC,MAAM,mCAAmC,CAAC;AAE3C,8DAA8D;AAC9D,MAAM,mBAAmB,GAAG;IAC1B,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,kBAAkB,CAAC;IACnD,kBAAkB,EAAE,CAAC;SAClB,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,iDAAiD,CAAC;IAC9D,eAAe,EAAE,CAAC;SACf,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,kDAAkD,CAAC;IAC/D,gBAAgB,EAAE,CAAC;SAChB,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,EAAE;SACV,QAAQ,CAAC,sCAAsC,CAAC;IACnD,MAAM,EAAE,CAAC;SACN,IAAI,CAAC,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC;SAC3B,QAAQ,EAAE;SACV,OAAO,CAAC,OAAO,CAAC;SAChB,QAAQ,CAAC,eAAe,CAAC;IAC5B,aAAa,EAAE,CAAC;SACb,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;SAClB,QAAQ,EAAE;SACV,QAAQ,CAAC,qCAAqC,CAAC;CACnD,CAAC;AAEF,MAAM,qBAAqB,GAAG;IAC5B,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,+BAA+B,CAAC;IAChE,eAAe,EAAE,CAAC;SACf,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,wBAAwB,CAAC;IACrC,eAAe,EAAE,CAAC;SACf,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,wBAAwB,CAAC;CACtC,CAAC;AAEF,MAAM,wBAAwB,GAAG;IAC/B,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,kBAAkB,CAAC;IACnD,kBAAkB,EAAE,CAAC;SAClB,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,gDAAgD,CAAC;IAC7D,eAAe,EAAE,CAAC;SACf,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,mCAAmC,CAAC;CACjD,CAAC;AAEF;;;GAGG;AACH,MAAM,UAAU,aAAa,CAAC,MAAiB;IAC7C,0BAA0B;IAC1B,MAAM,CAAC,YAAY,CACjB,mBAAmB,EACnB;QACE,KAAK,EAAE,WAAW;QAClB,WAAW,EAAE,0BAA0B;QACvC,WAAW,EAAE,mBAAmB;KACjC,EACD,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,mBAAmB,CAAC,IAAI,CAAC,CAC1C,CAAC;IAEF,4BAA4B;IAC5B,MAAM,CAAC,YAAY,CACjB,qBAAqB,EACrB;QACE,KAAK,EAAE,aAAa;QACpB,WAAW,EAAE,4BAA4B;QACzC,WAAW,EAAE,qBAAqB;KACnC,EACD,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,qBAAqB,CAAC,IAAI,CAAC,CAC5C,CAAC;IAEF,+BAA+B;IAC/B,MAAM,CAAC,YAAY,CACjB,wBAAwB,EACxB;QACE,KAAK,EAAE,gBAAgB;QACvB,WAAW,EAAE,+BAA+B;QAC5C,WAAW,EAAE,wBAAwB;KACtC,EACD,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,wBAAwB,CAAC,IAAI,CAAC,CAC/C,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { ContentBlockUnion, MetadataBlock } from '../types/index.js';
|
|
2
|
+
export declare function toJsonl(blocks: ContentBlockUnion[], metadata?: MetadataBlock): string;
|
|
3
|
+
export declare function fromJsonl(jsonl: string): ContentBlockUnion[];
|
|
4
|
+
//# sourceMappingURL=jsonl.transformer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"jsonl.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAuB1E,wBAAgB,OAAO,CAAC,MAAM,EAAE,iBAAiB,EAAE,EAAE,QAAQ,CAAC,EAAE,aAAa,GAAG,MAAM,CAUrF;AAED,wBAAgB,SAAS,CAAC,KAAK,EAAE,MAAM,GAAG,iBAAiB,EAAE,CAa5D"}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { config } from '../config/index.js';
|
|
2
|
+
import { truncateText } from '../utils/sanitizer.js';
|
|
3
|
+
import { logError } from '../services/logger.js';
|
|
4
|
+
function truncateBlock(block) {
|
|
5
|
+
const maxLength = config.extraction.maxBlockLength;
|
|
6
|
+
switch (block.type) {
|
|
7
|
+
case 'paragraph':
|
|
8
|
+
case 'heading':
|
|
9
|
+
case 'code':
|
|
10
|
+
return { ...block, text: truncateText(block.text, maxLength) };
|
|
11
|
+
case 'list':
|
|
12
|
+
return {
|
|
13
|
+
...block,
|
|
14
|
+
items: block.items.map((item) => truncateText(item, maxLength)),
|
|
15
|
+
};
|
|
16
|
+
default:
|
|
17
|
+
return block;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
export function toJsonl(blocks, metadata) {
|
|
21
|
+
const lines = [];
|
|
22
|
+
if (metadata)
|
|
23
|
+
lines.push(JSON.stringify(metadata));
|
|
24
|
+
for (const block of blocks) {
|
|
25
|
+
lines.push(JSON.stringify(truncateBlock(block)));
|
|
26
|
+
}
|
|
27
|
+
return lines.join('\n');
|
|
28
|
+
}
|
|
29
|
+
export function fromJsonl(jsonl) {
|
|
30
|
+
const lines = jsonl.split('\n').filter((line) => line.trim());
|
|
31
|
+
const blocks = [];
|
|
32
|
+
for (const line of lines) {
|
|
33
|
+
try {
|
|
34
|
+
blocks.push(JSON.parse(line));
|
|
35
|
+
}
|
|
36
|
+
catch (error) {
|
|
37
|
+
logError('Failed to parse JSONL line', error instanceof Error ? error : undefined);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
return blocks;
|
|
41
|
+
}
|
|
42
|
+
//# sourceMappingURL=jsonl.transformer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"jsonl.transformer.js","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AAEjD,SAAS,aAAa,CAAC,KAAwB;IAC7C,MAAM,SAAS,GAAG,MAAM,CAAC,UAAU,CAAC,cAAc,CAAC;IAEnD,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;QACnB,KAAK,WAAW,CAAC;QACjB,KAAK,SAAS,CAAC;QACf,KAAK,MAAM;YACT,OAAO,EAAE,GAAG,KAAK,EAAE,IAAI,EAAE,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,SAAS,CAAC,EAAE,CAAC;QACjE,KAAK,MAAM;YACT,OAAO;gBACL,GAAG,KAAK;gBACR,KAAK,EAAE,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;aAChE,CAAC;QACJ;YACE,OAAO,KAAK,CAAC;IACjB,CAAC;AACH,CAAC;AAED,MAAM,UAAU,OAAO,CAAC,MAA2B,EAAE,QAAwB;IAC3E,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,IAAI,QAAQ;QAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC;IAEnD,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACnD,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,MAAM,UAAU,SAAS,CAAC,KAAa;IACrC,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;IAC9D,MAAM,MAAM,GAAwB,EAAE,CAAC;IAEvC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC;YACH,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAsB,CAAC,CAAC;QACrD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,QAAQ,CAAC,4BAA4B,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QACrF,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { ContentBlockUnion, MetadataBlock } from '../types/index.js';
|
|
2
|
+
export declare function htmlToMarkdown(html: string, metadata?: MetadataBlock): string;
|
|
3
|
+
export declare function blocksToMarkdown(blocks: ContentBlockUnion[], metadata?: MetadataBlock): string;
|
|
4
|
+
//# sourceMappingURL=markdown.transformer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"markdown.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AA6F1E,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,aAAa,GAAG,MAAM,CAU7E;AAED,wBAAgB,gBAAgB,CAC9B,MAAM,EAAE,iBAAiB,EAAE,EAC3B,QAAQ,CAAC,EAAE,aAAa,GACvB,MAAM,CAcR"}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import TurndownService from 'turndown';
|
|
2
|
+
const turndown = new TurndownService({
|
|
3
|
+
headingStyle: 'atx',
|
|
4
|
+
codeBlockStyle: 'fenced',
|
|
5
|
+
emDelimiter: '_',
|
|
6
|
+
});
|
|
7
|
+
turndown.addRule('removeScripts', {
|
|
8
|
+
filter: ['script', 'style', 'noscript'],
|
|
9
|
+
replacement: () => '',
|
|
10
|
+
});
|
|
11
|
+
// Pre-compiled regex patterns for YAML value escaping (performance optimization)
|
|
12
|
+
const YAML_SPECIAL_CHARS = /[:[\]{}"\n\r'|>&*!?,#]/;
|
|
13
|
+
const YAML_NUMERIC = /^[\d.]+$/;
|
|
14
|
+
const YAML_RESERVED_WORDS = /^(true|false|null|yes|no|on|off)$/i;
|
|
15
|
+
const ESCAPE_BACKSLASH = /\\/g;
|
|
16
|
+
const ESCAPE_QUOTE = /"/g;
|
|
17
|
+
const ESCAPE_NEWLINE = /\n/g;
|
|
18
|
+
const ESCAPE_CARRIAGE = /\r/g;
|
|
19
|
+
const ESCAPE_TAB = /\t/g;
|
|
20
|
+
function escapeYamlValue(value) {
|
|
21
|
+
const needsQuoting = YAML_SPECIAL_CHARS.test(value) ||
|
|
22
|
+
value.startsWith(' ') ||
|
|
23
|
+
value.endsWith(' ') ||
|
|
24
|
+
value === '' ||
|
|
25
|
+
YAML_NUMERIC.test(value) ||
|
|
26
|
+
YAML_RESERVED_WORDS.test(value);
|
|
27
|
+
if (!needsQuoting)
|
|
28
|
+
return value;
|
|
29
|
+
const escaped = value
|
|
30
|
+
.replace(ESCAPE_BACKSLASH, '\\\\')
|
|
31
|
+
.replace(ESCAPE_QUOTE, '\\"')
|
|
32
|
+
.replace(ESCAPE_NEWLINE, '\\n')
|
|
33
|
+
.replace(ESCAPE_CARRIAGE, '\\r')
|
|
34
|
+
.replace(ESCAPE_TAB, '\\t');
|
|
35
|
+
return `"${escaped}"`;
|
|
36
|
+
}
|
|
37
|
+
function createFrontmatter(metadata) {
|
|
38
|
+
const lines = ['---'];
|
|
39
|
+
if (metadata.title)
|
|
40
|
+
lines.push(`title: ${escapeYamlValue(metadata.title)}`);
|
|
41
|
+
if (metadata.description)
|
|
42
|
+
lines.push(`description: ${escapeYamlValue(metadata.description)}`);
|
|
43
|
+
if (metadata.author)
|
|
44
|
+
lines.push(`author: ${escapeYamlValue(metadata.author)}`);
|
|
45
|
+
if (metadata.url)
|
|
46
|
+
lines.push(`url: ${escapeYamlValue(metadata.url)}`);
|
|
47
|
+
if (metadata.fetchedAt)
|
|
48
|
+
lines.push(`fetched_at: ${escapeYamlValue(metadata.fetchedAt)}`);
|
|
49
|
+
lines.push('---');
|
|
50
|
+
return lines.join('\n');
|
|
51
|
+
}
|
|
52
|
+
function tableToMarkdown(table) {
|
|
53
|
+
let markdown = '';
|
|
54
|
+
if (table.headers && table.headers.length > 0) {
|
|
55
|
+
markdown += '| ' + table.headers.join(' | ') + ' |\n';
|
|
56
|
+
markdown += '| ' + table.headers.map(() => '---').join(' | ') + ' |\n';
|
|
57
|
+
}
|
|
58
|
+
for (const row of table.rows) {
|
|
59
|
+
markdown += '| ' + row.join(' | ') + ' |\n';
|
|
60
|
+
}
|
|
61
|
+
return markdown.trim();
|
|
62
|
+
}
|
|
63
|
+
function blockToMarkdown(block) {
|
|
64
|
+
switch (block.type) {
|
|
65
|
+
case 'metadata':
|
|
66
|
+
return '';
|
|
67
|
+
case 'heading':
|
|
68
|
+
return '#'.repeat(block.level) + ' ' + block.text;
|
|
69
|
+
case 'paragraph':
|
|
70
|
+
return block.text;
|
|
71
|
+
case 'list':
|
|
72
|
+
return block.items
|
|
73
|
+
.map((item, index) => (block.ordered ? `${index + 1}. ` : '- ') + item)
|
|
74
|
+
.join('\n');
|
|
75
|
+
case 'code':
|
|
76
|
+
return '```' + (block.language || '') + '\n' + block.text + '\n```';
|
|
77
|
+
case 'table':
|
|
78
|
+
return tableToMarkdown(block);
|
|
79
|
+
case 'image':
|
|
80
|
+
return ``;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
export function htmlToMarkdown(html, metadata) {
|
|
84
|
+
let markdown = '';
|
|
85
|
+
if (metadata) {
|
|
86
|
+
markdown += createFrontmatter(metadata);
|
|
87
|
+
markdown += '\n\n';
|
|
88
|
+
}
|
|
89
|
+
markdown += turndown.turndown(html);
|
|
90
|
+
return markdown;
|
|
91
|
+
}
|
|
92
|
+
export function blocksToMarkdown(blocks, metadata) {
|
|
93
|
+
let markdown = '';
|
|
94
|
+
if (metadata) {
|
|
95
|
+
markdown += createFrontmatter(metadata);
|
|
96
|
+
markdown += '\n\n';
|
|
97
|
+
}
|
|
98
|
+
for (const block of blocks) {
|
|
99
|
+
markdown += blockToMarkdown(block);
|
|
100
|
+
markdown += '\n\n';
|
|
101
|
+
}
|
|
102
|
+
return markdown.trim();
|
|
103
|
+
}
|
|
104
|
+
//# sourceMappingURL=markdown.transformer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"markdown.transformer.js","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC;IACnC,YAAY,EAAE,KAAK;IACnB,cAAc,EAAE,QAAQ;IACxB,WAAW,EAAE,GAAG;CACjB,CAAC,CAAC;AAEH,QAAQ,CAAC,OAAO,CAAC,eAAe,EAAE;IAChC,MAAM,EAAE,CAAC,QAAQ,EAAE,OAAO,EAAE,UAAU,CAAC;IACvC,WAAW,EAAE,GAAG,EAAE,CAAC,EAAE;CACtB,CAAC,CAAC;AAEH,iFAAiF;AACjF,MAAM,kBAAkB,GAAG,wBAAwB,CAAC;AACpD,MAAM,YAAY,GAAG,UAAU,CAAC;AAChC,MAAM,mBAAmB,GAAG,oCAAoC,CAAC;AACjE,MAAM,gBAAgB,GAAG,KAAK,CAAC;AAC/B,MAAM,YAAY,GAAG,IAAI,CAAC;AAC1B,MAAM,cAAc,GAAG,KAAK,CAAC;AAC7B,MAAM,eAAe,GAAG,KAAK,CAAC;AAC9B,MAAM,UAAU,GAAG,KAAK,CAAC;AAEzB,SAAS,eAAe,CAAC,KAAa;IACpC,MAAM,YAAY,GAChB,kBAAkB,CAAC,IAAI,CAAC,KAAK,CAAC;QAC9B,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC;QACrB,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC;QACnB,KAAK,KAAK,EAAE;QACZ,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC;QACxB,mBAAmB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAElC,IAAI,CAAC,YAAY;QAAE,OAAO,KAAK,CAAC;IAEhC,MAAM,OAAO,GAAG,KAAK;SAClB,OAAO,CAAC,gBAAgB,EAAE,MAAM,CAAC;SACjC,OAAO,CAAC,YAAY,EAAE,KAAK,CAAC;SAC5B,OAAO,CAAC,cAAc,EAAE,KAAK,CAAC;SAC9B,OAAO,CAAC,eAAe,EAAE,KAAK,CAAC;SAC/B,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;IAE9B,OAAO,IAAI,OAAO,GAAG,CAAC;AACxB,CAAC;AAED,SAAS,iBAAiB,CAAC,QAAuB;IAChD,MAAM,KAAK,GAAG,CAAC,KAAK,CAAC,CAAC;IAEtB,IAAI,QAAQ,CAAC,KAAK;QAAE,KAAK,CAAC,IAAI,CAAC,UAAU,eAAe,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IAC5E,IAAI,QAAQ,CAAC,WAAW;QAAE,KAAK,CAAC,IAAI,CAAC,gBAAgB,eAAe,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC;IAC9F,IAAI,QAAQ,CAAC,MAAM;QAAE,KAAK,CAAC,IAAI,CAAC,WAAW,eAAe,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IAC/E,IAAI,QAAQ,CAAC,GAAG;QAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,eAAe,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACtE,IAAI,QAAQ,CAAC,SAAS;QAAE,KAAK,CAAC,IAAI,CAAC,eAAe,eAAe,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;IAEzF,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAClB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,eAAe,CAAC,KAA+C;IACtE,IAAI,QAAQ,GAAG,EAAE,CAAC;IAElB,IAAI,KAAK,CAAC,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC9C,QAAQ,IAAI,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,MAAM,CAAC;QACtD,QAAQ,IAAI,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,MAAM,CAAC;IACzE,CAAC;IAED,KAAK,MAAM,GAAG,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC;QAC7B,QAAQ,IAAI,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,MAAM,CAAC;IAC9C,CAAC;IAED,OAAO,QAAQ,CAAC,IAAI,EAAE,CAAC;AACzB,CAAC;AAED,SAAS,eAAe,CAAC,KAAwB;IAC/C,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;QACnB,KAAK,UAAU;YACb,OAAO,EAAE,CAAC;QACZ,KAAK,SAAS;YACZ,OAAO,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC;QACpD,KAAK,WAAW;YACd,OAAO,KAAK,CAAC,IAAI,CAAC;QACpB,KAAK,MAAM;YACT,OAAO,KAAK,CAAC,KAAK;iBACf,GAAG,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,KAAK,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;iBACtE,IAAI,CAAC,IAAI,CAAC,CAAC;QAChB,KAAK,MAAM;YACT,OAAO,KAAK,GAAG,CAAC,KAAK,CAAC,QAAQ,IAAI,EAAE,CAAC,GAAG,IAAI,GAAG,KAAK,CAAC,IAAI,GAAG,OAAO,CAAC;QACtE,KAAK,OAAO;YACV,OAAO,eAAe,CAAC,KAAK,CAAC,CAAC;QAChC,KAAK,OAAO;YACV,OAAO,KAAK,KAAK,CAAC,GAAG,IAAI,EAAE,KAAK,KAAK,CAAC,GAAG,GAAG,CAAC;IACjD,CAAC;AACH,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,QAAwB;IACnE,IAAI,QAAQ,GAAG,EAAE,CAAC;IAElB,IAAI,QAAQ,EAAE,CAAC;QACb,QAAQ,IAAI,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QACxC,QAAQ,IAAI,MAAM,CAAC;IACrB,CAAC;IAED,QAAQ,IAAI,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IACpC,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,gBAAgB,CAC9B,MAA2B,EAC3B,QAAwB;IAExB,IAAI,QAAQ,GAAG,EAAE,CAAC;IAElB,IAAI,QAAQ,EAAE,CAAC;QACb,QAAQ,IAAI,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QACxC,QAAQ,IAAI,MAAM,CAAC;IACrB,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,QAAQ,IAAI,eAAe,CAAC,KAAK,CAAC,CAAC;QACnC,QAAQ,IAAI,MAAM,CAAC;IACrB,CAAC;IAED,OAAO,QAAQ,CAAC,IAAI,EAAE,CAAC;AACzB,CAAC"}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
export type ContentBlockType = 'metadata' | 'heading' | 'paragraph' | 'list' | 'code' | 'table' | 'image';
|
|
2
|
+
interface ContentBlock {
|
|
3
|
+
type: ContentBlockType;
|
|
4
|
+
}
|
|
5
|
+
export interface MetadataBlock extends ContentBlock {
|
|
6
|
+
type: 'metadata';
|
|
7
|
+
title?: string;
|
|
8
|
+
description?: string;
|
|
9
|
+
author?: string;
|
|
10
|
+
url: string;
|
|
11
|
+
fetchedAt: string;
|
|
12
|
+
}
|
|
13
|
+
export interface HeadingBlock extends ContentBlock {
|
|
14
|
+
type: 'heading';
|
|
15
|
+
level: number;
|
|
16
|
+
text: string;
|
|
17
|
+
}
|
|
18
|
+
export interface ParagraphBlock extends ContentBlock {
|
|
19
|
+
type: 'paragraph';
|
|
20
|
+
text: string;
|
|
21
|
+
}
|
|
22
|
+
export interface ListBlock extends ContentBlock {
|
|
23
|
+
type: 'list';
|
|
24
|
+
ordered: boolean;
|
|
25
|
+
items: string[];
|
|
26
|
+
}
|
|
27
|
+
export interface CodeBlock extends ContentBlock {
|
|
28
|
+
type: 'code';
|
|
29
|
+
language?: string;
|
|
30
|
+
text: string;
|
|
31
|
+
}
|
|
32
|
+
export interface TableBlock extends ContentBlock {
|
|
33
|
+
type: 'table';
|
|
34
|
+
headers?: string[];
|
|
35
|
+
rows: string[][];
|
|
36
|
+
}
|
|
37
|
+
export interface ImageBlock extends ContentBlock {
|
|
38
|
+
type: 'image';
|
|
39
|
+
src: string;
|
|
40
|
+
alt?: string;
|
|
41
|
+
}
|
|
42
|
+
export type ContentBlockUnion = MetadataBlock | HeadingBlock | ParagraphBlock | ListBlock | CodeBlock | TableBlock | ImageBlock;
|
|
43
|
+
export interface ExtractedArticle {
|
|
44
|
+
title?: string;
|
|
45
|
+
byline?: string;
|
|
46
|
+
content: string;
|
|
47
|
+
textContent: string;
|
|
48
|
+
excerpt?: string;
|
|
49
|
+
siteName?: string;
|
|
50
|
+
}
|
|
51
|
+
export interface CacheEntry {
|
|
52
|
+
url: string;
|
|
53
|
+
content: string;
|
|
54
|
+
fetchedAt: string;
|
|
55
|
+
expiresAt: string;
|
|
56
|
+
}
|
|
57
|
+
export interface ExtractedLink {
|
|
58
|
+
href: string;
|
|
59
|
+
text: string;
|
|
60
|
+
type: 'internal' | 'external';
|
|
61
|
+
}
|
|
62
|
+
export {};
|
|
63
|
+
//# sourceMappingURL=content.types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"content.types.d.ts","sourceRoot":"","sources":["../../src/types/content.types.ts"],"names":[],"mappings":"AACA,MAAM,MAAM,gBAAgB,GACxB,UAAU,GACV,SAAS,GACT,WAAW,GACX,MAAM,GACN,MAAM,GACN,OAAO,GACP,OAAO,CAAC;AAGZ,UAAU,YAAY;IACpB,IAAI,EAAE,gBAAgB,CAAC;CACxB;AAGD,MAAM,WAAW,aAAc,SAAQ,YAAY;IACjD,IAAI,EAAE,UAAU,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,EAAE,MAAM,CAAC;CACnB;AAGD,MAAM,WAAW,YAAa,SAAQ,YAAY;IAChD,IAAI,EAAE,SAAS,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACd;AAGD,MAAM,WAAW,cAAe,SAAQ,YAAY;IAClD,IAAI,EAAE,WAAW,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC;CACd;AAGD,MAAM,WAAW,SAAU,SAAQ,YAAY;IAC7C,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,EAAE,CAAC;CACjB;AAGD,MAAM,WAAW,SAAU,SAAQ,YAAY;IAC7C,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC;CACd;AAGD,MAAM,WAAW,UAAW,SAAQ,YAAY;IAC9C,IAAI,EAAE,OAAO,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,IAAI,EAAE,MAAM,EAAE,EAAE,CAAC;CAClB;AAGD,MAAM,WAAW,UAAW,SAAQ,YAAY;IAC9C,IAAI,EAAE,OAAO,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AAGD,MAAM,MAAM,iBAAiB,GACzB,aAAa,GACb,YAAY,GACZ,cAAc,GACd,SAAS,GACT,SAAS,GACT,UAAU,GACV,UAAU,CAAC;AAGf,MAAM,WAAW,gBAAgB;IAC/B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAGD,MAAM,WAAW,UAAU;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;CACnB;AAGD,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,UAAU,GAAG,UAAU,CAAC;CAC/B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"content.types.js","sourceRoot":"","sources":["../../src/types/content.types.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA,cAAc,oBAAoB,CAAC;AACnC,cAAc,cAAc,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA,cAAc,oBAAoB,CAAC;AACnC,cAAc,cAAc,CAAC"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool input types - used for type safety in tool handlers
|
|
3
|
+
*/
|
|
4
|
+
export interface FetchUrlInput {
|
|
5
|
+
url: string;
|
|
6
|
+
extractMainContent?: boolean;
|
|
7
|
+
includeMetadata?: boolean;
|
|
8
|
+
maxContentLength?: number;
|
|
9
|
+
format?: 'jsonl' | 'markdown';
|
|
10
|
+
customHeaders?: Record<string, string>;
|
|
11
|
+
}
|
|
12
|
+
export interface FetchLinksInput {
|
|
13
|
+
url: string;
|
|
14
|
+
includeExternal?: boolean;
|
|
15
|
+
includeInternal?: boolean;
|
|
16
|
+
}
|
|
17
|
+
export interface FetchMarkdownInput {
|
|
18
|
+
url: string;
|
|
19
|
+
extractMainContent?: boolean;
|
|
20
|
+
includeMetadata?: boolean;
|
|
21
|
+
}
|
|
22
|
+
//# sourceMappingURL=schemas.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schemas.d.ts","sourceRoot":"","sources":["../../src/types/schemas.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,aAAa;IAC5B,GAAG,EAAE,MAAM,CAAC;IACZ,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,GAAG,UAAU,CAAC;IAC9B,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACxC;AAED,MAAM,WAAW,eAAe;IAC9B,GAAG,EAAE,MAAM,CAAC;IACZ,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,eAAe,CAAC,EAAE,OAAO,CAAC;CAC3B;AAED,MAAM,WAAW,kBAAkB;IACjC,GAAG,EAAE,MAAM,CAAC;IACZ,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,eAAe,CAAC,EAAE,OAAO,CAAC;CAC3B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schemas.js","sourceRoot":"","sources":["../../src/types/schemas.ts"],"names":[],"mappings":"AAAA;;GAEG"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sanitizes text content by removing extra whitespace and special characters
|
|
3
|
+
*/
|
|
4
|
+
export declare function sanitizeText(text: string): string;
|
|
5
|
+
/**
|
|
6
|
+
* Truncates text to a maximum length with ellipsis
|
|
7
|
+
*/
|
|
8
|
+
export declare function truncateText(text: string, maxLength: number): string;
|
|
9
|
+
//# sourceMappingURL=sanitizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sanitizer.d.ts","sourceRoot":"","sources":["../../src/utils/sanitizer.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAKjD;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,CAKpE"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sanitizes text content by removing extra whitespace and special characters
|
|
3
|
+
*/
|
|
4
|
+
export function sanitizeText(text) {
|
|
5
|
+
return text
|
|
6
|
+
.replace(/\s+/g, ' ') // Replace multiple spaces with single space
|
|
7
|
+
.replace(/\n\s*\n/g, '\n') // Remove empty lines
|
|
8
|
+
.trim();
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Truncates text to a maximum length with ellipsis
|
|
12
|
+
*/
|
|
13
|
+
export function truncateText(text, maxLength) {
|
|
14
|
+
if (text.length <= maxLength) {
|
|
15
|
+
return text;
|
|
16
|
+
}
|
|
17
|
+
return text.substring(0, maxLength - 3) + '...';
|
|
18
|
+
}
|
|
19
|
+
//# sourceMappingURL=sanitizer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sanitizer.js","sourceRoot":"","sources":["../../src/utils/sanitizer.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,OAAO,IAAI;SACR,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,4CAA4C;SACjE,OAAO,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC,qBAAqB;SAC/C,IAAI,EAAE,CAAC;AACZ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY,EAAE,SAAiB;IAC1D,IAAI,IAAI,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QAC7B,OAAO,IAAI,CAAC;IACd,CAAC;IACD,OAAO,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,SAAS,GAAG,CAAC,CAAC,GAAG,KAAK,CAAC;AAClD,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Validates and normalizes a URL, blocking SSRF attack vectors
|
|
3
|
+
* @throws {UrlValidationError} if URL is invalid or blocked
|
|
4
|
+
*/
|
|
5
|
+
export declare function validateAndNormalizeUrl(urlString: string): string;
|
|
6
|
+
/**
|
|
7
|
+
* Checks if a URL is internal (same domain)
|
|
8
|
+
*/
|
|
9
|
+
export declare function isInternalUrl(url: string, baseUrl: string): boolean;
|
|
10
|
+
//# sourceMappingURL=url-validator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"url-validator.d.ts","sourceRoot":"","sources":["../../src/utils/url-validator.ts"],"names":[],"mappings":"AAgCA;;;GAGG;AACH,wBAAgB,uBAAuB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CAoCjE;AAED;;GAEG;AACH,wBAAgB,aAAa,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAQnE"}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import { UrlValidationError } from '../errors/app-error.js';
|
|
2
|
+
// Blocked hosts to prevent SSRF attacks
|
|
3
|
+
const BLOCKED_HOSTS = new Set([
|
|
4
|
+
'localhost',
|
|
5
|
+
'127.0.0.1',
|
|
6
|
+
'0.0.0.0',
|
|
7
|
+
'::1',
|
|
8
|
+
'169.254.169.254', // AWS metadata endpoint
|
|
9
|
+
'metadata.google.internal', // GCP metadata
|
|
10
|
+
'metadata.azure.com', // Azure metadata
|
|
11
|
+
]);
|
|
12
|
+
// Blocked IP patterns (private networks)
|
|
13
|
+
const BLOCKED_IP_PATTERNS = [
|
|
14
|
+
/^10\./, // Private Class A
|
|
15
|
+
/^172\.(1[6-9]|2\d|3[01])\./, // Private Class B
|
|
16
|
+
/^192\.168\./, // Private Class C
|
|
17
|
+
/^127\./, // Loopback
|
|
18
|
+
/^0\./, // Current network
|
|
19
|
+
/^169\.254\./, // Link-local
|
|
20
|
+
/^fc00:/i, // IPv6 unique local
|
|
21
|
+
/^fe80:/i, // IPv6 link-local
|
|
22
|
+
];
|
|
23
|
+
/**
|
|
24
|
+
* Checks if a hostname matches blocked IP patterns
|
|
25
|
+
*/
|
|
26
|
+
function isBlockedIp(hostname) {
|
|
27
|
+
return BLOCKED_IP_PATTERNS.some((pattern) => pattern.test(hostname));
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Validates and normalizes a URL, blocking SSRF attack vectors
|
|
31
|
+
* @throws {UrlValidationError} if URL is invalid or blocked
|
|
32
|
+
*/
|
|
33
|
+
export function validateAndNormalizeUrl(urlString) {
|
|
34
|
+
let url;
|
|
35
|
+
try {
|
|
36
|
+
url = new URL(urlString);
|
|
37
|
+
}
|
|
38
|
+
catch {
|
|
39
|
+
throw new UrlValidationError(`Invalid URL format`, urlString);
|
|
40
|
+
}
|
|
41
|
+
// Only allow HTTP(S) protocols
|
|
42
|
+
if (url.protocol !== 'http:' && url.protocol !== 'https:') {
|
|
43
|
+
throw new UrlValidationError(`Invalid protocol: ${url.protocol}. Only http: and https: are allowed`, urlString);
|
|
44
|
+
}
|
|
45
|
+
const hostname = url.hostname.toLowerCase();
|
|
46
|
+
// Block known internal/metadata hosts
|
|
47
|
+
if (BLOCKED_HOSTS.has(hostname)) {
|
|
48
|
+
throw new UrlValidationError(`Blocked host: ${hostname}. Internal hosts are not allowed`, urlString);
|
|
49
|
+
}
|
|
50
|
+
// Block private IP ranges
|
|
51
|
+
if (isBlockedIp(hostname)) {
|
|
52
|
+
throw new UrlValidationError(`Blocked IP range: ${hostname}. Private IPs are not allowed`, urlString);
|
|
53
|
+
}
|
|
54
|
+
return url.href;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Checks if a URL is internal (same domain)
|
|
58
|
+
*/
|
|
59
|
+
export function isInternalUrl(url, baseUrl) {
|
|
60
|
+
try {
|
|
61
|
+
const urlObj = new URL(url, baseUrl);
|
|
62
|
+
const baseUrlObj = new URL(baseUrl);
|
|
63
|
+
return urlObj.hostname === baseUrlObj.hostname;
|
|
64
|
+
}
|
|
65
|
+
catch {
|
|
66
|
+
return false;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
//# sourceMappingURL=url-validator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"url-validator.js","sourceRoot":"","sources":["../../src/utils/url-validator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAC;AAE5D,wCAAwC;AACxC,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC;IAC5B,WAAW;IACX,WAAW;IACX,SAAS;IACT,KAAK;IACL,iBAAiB,EAAE,wBAAwB;IAC3C,0BAA0B,EAAE,eAAe;IAC3C,oBAAoB,EAAE,iBAAiB;CACxC,CAAC,CAAC;AAEH,yCAAyC;AACzC,MAAM,mBAAmB,GAAsB;IAC7C,OAAO,EAAE,kBAAkB;IAC3B,4BAA4B,EAAE,kBAAkB;IAChD,aAAa,EAAE,kBAAkB;IACjC,QAAQ,EAAE,WAAW;IACrB,MAAM,EAAE,kBAAkB;IAC1B,aAAa,EAAE,aAAa;IAC5B,SAAS,EAAE,oBAAoB;IAC/B,SAAS,EAAE,kBAAkB;CAC9B,CAAC;AAEF;;GAEG;AACH,SAAS,WAAW,CAAC,QAAgB;IACnC,OAAO,mBAAmB,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;AACvE,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,uBAAuB,CAAC,SAAiB;IACvD,IAAI,GAAQ,CAAC;IAEb,IAAI,CAAC;QACH,GAAG,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC;IAC3B,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,kBAAkB,CAAC,oBAAoB,EAAE,SAAS,CAAC,CAAC;IAChE,CAAC;IAED,+BAA+B;IAC/B,IAAI,GAAG,CAAC,QAAQ,KAAK,OAAO,IAAI,GAAG,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;QAC1D,MAAM,IAAI,kBAAkB,CAC1B,qBAAqB,GAAG,CAAC,QAAQ,qCAAqC,EACtE,SAAS,CACV,CAAC;IACJ,CAAC;IAED,MAAM,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;IAE5C,sCAAsC;IACtC,IAAI,aAAa,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;QAChC,MAAM,IAAI,kBAAkB,CAC1B,iBAAiB,QAAQ,kCAAkC,EAC3D,SAAS,CACV,CAAC;IACJ,CAAC;IAED,0BAA0B;IAC1B,IAAI,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC1B,MAAM,IAAI,kBAAkB,CAC1B,qBAAqB,QAAQ,+BAA+B,EAC5D,SAAS,CACV,CAAC;IACJ,CAAC;IAED,OAAO,GAAG,CAAC,IAAI,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,aAAa,CAAC,GAAW,EAAE,OAAe;IACxD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;QACrC,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;QACpC,OAAO,MAAM,CAAC,QAAQ,KAAK,UAAU,CAAC,QAAQ,CAAC;IACjD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@j0hanz/superfetch",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Intelligent web content fetcher MCP server that converts HTML to clean, AI-readable JSONL format",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"bin": {
|
|
8
|
+
"superfetch": "./dist/index.js"
|
|
9
|
+
},
|
|
10
|
+
"files": [
|
|
11
|
+
"dist",
|
|
12
|
+
"README.md"
|
|
13
|
+
],
|
|
14
|
+
"repository": {
|
|
15
|
+
"type": "git",
|
|
16
|
+
"url": "https://github.com/j0hanz/super-fetch-mcp-server.git"
|
|
17
|
+
},
|
|
18
|
+
"homepage": "https://github.com/j0hanz/super-fetch-mcp-server#readme",
|
|
19
|
+
"bugs": {
|
|
20
|
+
"url": "https://github.com/j0hanz/super-fetch-mcp-server/issues"
|
|
21
|
+
},
|
|
22
|
+
"author": "j0hanz",
|
|
23
|
+
"license": "MIT",
|
|
24
|
+
"keywords": [
|
|
25
|
+
"mcp",
|
|
26
|
+
"mcp-server",
|
|
27
|
+
"web-fetching",
|
|
28
|
+
"content-extraction",
|
|
29
|
+
"readability",
|
|
30
|
+
"jsonl",
|
|
31
|
+
"ai-tools"
|
|
32
|
+
],
|
|
33
|
+
"scripts": {
|
|
34
|
+
"dev": "tsx watch src/index.ts",
|
|
35
|
+
"build": "tsc && shx chmod +x dist/*.js",
|
|
36
|
+
"prepare": "npm run build",
|
|
37
|
+
"prepublishOnly": "npm run build && npm run lint",
|
|
38
|
+
"start": "node dist/index.js",
|
|
39
|
+
"format": "prettier --write .",
|
|
40
|
+
"type-check": "tsc --noEmit",
|
|
41
|
+
"lint": "eslint .",
|
|
42
|
+
"lint:fix": "eslint . --fix",
|
|
43
|
+
"knip": "knip",
|
|
44
|
+
"knip:fix": "knip --fix",
|
|
45
|
+
"test": "echo \"Tests not yet implemented - see Phase 4 roadmap\" && exit 0"
|
|
46
|
+
},
|
|
47
|
+
"dependencies": {
|
|
48
|
+
"@modelcontextprotocol/sdk": "^1.0.4",
|
|
49
|
+
"@mozilla/readability": "^0.6.0",
|
|
50
|
+
"axios": "^1.7.9",
|
|
51
|
+
"cheerio": "^1.0.0",
|
|
52
|
+
"domhandler": "^5.0.3",
|
|
53
|
+
"express": "^4.21.2",
|
|
54
|
+
"jsdom": "^25.0.1",
|
|
55
|
+
"node-cache": "^5.1.2",
|
|
56
|
+
"turndown": "^7.2.0",
|
|
57
|
+
"winston": "^3.19.0",
|
|
58
|
+
"zod": "^3.25.76"
|
|
59
|
+
},
|
|
60
|
+
"devDependencies": {
|
|
61
|
+
"@eslint/js": "^9.39.1",
|
|
62
|
+
"@types/express": "^5.0.0",
|
|
63
|
+
"@types/jsdom": "^21.1.7",
|
|
64
|
+
"@types/node": "^24.10.2",
|
|
65
|
+
"@types/turndown": "^5.0.5",
|
|
66
|
+
"@typescript-eslint/eslint-plugin": "^8.19.1",
|
|
67
|
+
"@typescript-eslint/parser": "^8.19.1",
|
|
68
|
+
"eslint": "^9.39.1",
|
|
69
|
+
"eslint-config-prettier": "^10.1.8",
|
|
70
|
+
"knip": "^5.72.0",
|
|
71
|
+
"prettier": "^3.7.4",
|
|
72
|
+
"shx": "^0.3.4",
|
|
73
|
+
"tsx": "^4.21.0",
|
|
74
|
+
"typescript": "^5.9.3",
|
|
75
|
+
"typescript-eslint": "^8.49.0"
|
|
76
|
+
},
|
|
77
|
+
"engines": {
|
|
78
|
+
"node": ">=18.0.0"
|
|
79
|
+
}
|
|
80
|
+
}
|