@j0hanz/superfetch 1.2.5 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. package/README.md +131 -156
  2. package/dist/config/auth-config.d.ts +16 -0
  3. package/dist/config/auth-config.js +53 -0
  4. package/dist/config/constants.d.ts +11 -13
  5. package/dist/config/constants.js +1 -3
  6. package/dist/config/env-parsers.d.ts +7 -0
  7. package/dist/config/env-parsers.js +84 -0
  8. package/dist/config/formatting.d.ts +2 -2
  9. package/dist/config/index.d.ts +47 -53
  10. package/dist/config/index.js +35 -64
  11. package/dist/config/types/content.d.ts +1 -49
  12. package/dist/config/types/runtime.d.ts +8 -16
  13. package/dist/config/types/tools.d.ts +2 -28
  14. package/dist/http/accept-policy.d.ts +3 -0
  15. package/dist/http/accept-policy.js +45 -0
  16. package/dist/http/async-handler.d.ts +2 -0
  17. package/dist/http/async-handler.js +5 -0
  18. package/dist/http/auth-introspection.d.ts +2 -0
  19. package/dist/http/auth-introspection.js +141 -0
  20. package/dist/http/auth-static.d.ts +2 -0
  21. package/dist/http/auth-static.js +23 -0
  22. package/dist/http/auth.d.ts +3 -2
  23. package/dist/http/auth.js +254 -23
  24. package/dist/http/cors.d.ts +6 -6
  25. package/dist/http/cors.js +7 -42
  26. package/dist/http/download-routes.d.ts +0 -12
  27. package/dist/http/download-routes.js +21 -58
  28. package/dist/http/host-allowlist.d.ts +3 -0
  29. package/dist/http/host-allowlist.js +117 -0
  30. package/dist/http/jsonrpc-http.d.ts +2 -0
  31. package/dist/http/jsonrpc-http.js +10 -0
  32. package/dist/http/mcp-routes.d.ts +8 -3
  33. package/dist/http/mcp-routes.js +137 -31
  34. package/dist/http/mcp-session-eviction.d.ts +3 -0
  35. package/dist/http/mcp-session-eviction.js +24 -0
  36. package/dist/http/mcp-session-helpers.d.ts +0 -1
  37. package/dist/http/mcp-session-helpers.js +1 -1
  38. package/dist/http/mcp-session-init.d.ts +7 -0
  39. package/dist/http/mcp-session-init.js +94 -0
  40. package/dist/http/mcp-session-slots.d.ts +17 -0
  41. package/dist/http/mcp-session-slots.js +55 -0
  42. package/dist/http/mcp-session-transport-init.d.ts +7 -0
  43. package/dist/http/mcp-session-transport-init.js +41 -0
  44. package/dist/http/mcp-session-transport.d.ts +7 -0
  45. package/dist/http/mcp-session-transport.js +57 -0
  46. package/dist/http/mcp-session-types.d.ts +5 -0
  47. package/dist/http/mcp-session-types.js +1 -0
  48. package/dist/http/mcp-session.d.ts +9 -9
  49. package/dist/http/mcp-session.js +15 -137
  50. package/dist/http/mcp-sessions.d.ts +43 -0
  51. package/dist/http/mcp-sessions.js +392 -0
  52. package/dist/http/mcp-validation.d.ts +1 -0
  53. package/dist/http/mcp-validation.js +11 -10
  54. package/dist/http/protocol-policy.d.ts +2 -0
  55. package/dist/http/protocol-policy.js +31 -0
  56. package/dist/http/rate-limit.js +7 -4
  57. package/dist/http/server-config.d.ts +1 -0
  58. package/dist/http/server-config.js +40 -0
  59. package/dist/http/server-middleware.d.ts +7 -9
  60. package/dist/http/server-middleware.js +9 -70
  61. package/dist/http/server-shutdown.d.ts +4 -0
  62. package/dist/http/server-shutdown.js +43 -0
  63. package/dist/http/server.d.ts +10 -0
  64. package/dist/http/server.js +546 -61
  65. package/dist/http/session-cleanup.js +8 -5
  66. package/dist/middleware/error-handler.d.ts +1 -1
  67. package/dist/middleware/error-handler.js +32 -33
  68. package/dist/resources/cached-content-params.d.ts +5 -0
  69. package/dist/resources/cached-content-params.js +36 -0
  70. package/dist/resources/cached-content.js +67 -125
  71. package/dist/resources/index.js +0 -82
  72. package/dist/server.js +50 -29
  73. package/dist/services/cache-events.d.ts +8 -0
  74. package/dist/services/cache-events.js +19 -0
  75. package/dist/services/cache-keys.d.ts +7 -0
  76. package/dist/services/cache-keys.js +57 -0
  77. package/dist/services/cache.d.ts +4 -9
  78. package/dist/services/cache.js +77 -139
  79. package/dist/services/context.d.ts +0 -1
  80. package/dist/services/context.js +0 -7
  81. package/dist/services/extractor.js +55 -116
  82. package/dist/services/fetcher/agents.d.ts +2 -2
  83. package/dist/services/fetcher/agents.js +35 -96
  84. package/dist/services/fetcher/dns-selection.d.ts +2 -0
  85. package/dist/services/fetcher/dns-selection.js +72 -0
  86. package/dist/services/fetcher/interceptors.d.ts +0 -22
  87. package/dist/services/fetcher/interceptors.js +18 -32
  88. package/dist/services/fetcher/redirects.js +16 -7
  89. package/dist/services/fetcher/response.js +79 -34
  90. package/dist/services/fetcher.d.ts +22 -3
  91. package/dist/services/fetcher.js +544 -44
  92. package/dist/services/fifo-queue.d.ts +8 -0
  93. package/dist/services/fifo-queue.js +25 -0
  94. package/dist/services/logger.js +2 -2
  95. package/dist/services/metadata-collector.d.ts +1 -9
  96. package/dist/services/metadata-collector.js +71 -2
  97. package/dist/services/transform-worker-pool.d.ts +4 -14
  98. package/dist/services/transform-worker-pool.js +177 -129
  99. package/dist/services/transform-worker-types.d.ts +32 -0
  100. package/dist/services/transform-worker-types.js +14 -0
  101. package/dist/tools/handlers/fetch-markdown.tool.d.ts +3 -4
  102. package/dist/tools/handlers/fetch-markdown.tool.js +20 -72
  103. package/dist/tools/handlers/fetch-single.shared.d.ts +11 -22
  104. package/dist/tools/handlers/fetch-single.shared.js +175 -89
  105. package/dist/tools/handlers/fetch-url.tool.d.ts +7 -1
  106. package/dist/tools/handlers/fetch-url.tool.js +84 -119
  107. package/dist/tools/index.js +21 -40
  108. package/dist/tools/schemas.d.ts +1 -51
  109. package/dist/tools/schemas.js +1 -107
  110. package/dist/tools/utils/cached-markdown.d.ts +5 -0
  111. package/dist/tools/utils/cached-markdown.js +46 -0
  112. package/dist/tools/utils/content-shaping.d.ts +4 -0
  113. package/dist/tools/utils/content-shaping.js +67 -0
  114. package/dist/tools/utils/content-transform.d.ts +5 -17
  115. package/dist/tools/utils/content-transform.js +134 -114
  116. package/dist/tools/utils/fetch-pipeline.d.ts +0 -8
  117. package/dist/tools/utils/fetch-pipeline.js +57 -63
  118. package/dist/tools/utils/frontmatter.d.ts +3 -0
  119. package/dist/tools/utils/frontmatter.js +73 -0
  120. package/dist/tools/utils/inline-content.d.ts +1 -2
  121. package/dist/tools/utils/inline-content.js +4 -7
  122. package/dist/tools/utils/markdown-heuristics.d.ts +1 -0
  123. package/dist/tools/utils/markdown-heuristics.js +19 -0
  124. package/dist/tools/utils/markdown-signals.d.ts +1 -0
  125. package/dist/tools/utils/markdown-signals.js +19 -0
  126. package/dist/tools/utils/raw-markdown-frontmatter.d.ts +3 -0
  127. package/dist/tools/utils/raw-markdown-frontmatter.js +73 -0
  128. package/dist/tools/utils/raw-markdown.d.ts +6 -0
  129. package/dist/tools/utils/raw-markdown.js +135 -0
  130. package/dist/transformers/markdown/fenced-code-rule.d.ts +2 -0
  131. package/dist/transformers/markdown/fenced-code-rule.js +38 -0
  132. package/dist/transformers/markdown/frontmatter.d.ts +2 -0
  133. package/dist/transformers/markdown/frontmatter.js +45 -0
  134. package/dist/transformers/markdown/noise-rule.d.ts +2 -0
  135. package/dist/transformers/markdown/noise-rule.js +80 -0
  136. package/dist/transformers/markdown/turndown-instance.d.ts +2 -0
  137. package/dist/transformers/markdown/turndown-instance.js +19 -0
  138. package/dist/transformers/markdown.d.ts +2 -0
  139. package/dist/transformers/markdown.js +185 -0
  140. package/dist/transformers/markdown.transformer.js +5 -117
  141. package/dist/utils/cached-payload.d.ts +7 -0
  142. package/dist/utils/cached-payload.js +36 -0
  143. package/dist/utils/code-language-bash.d.ts +1 -0
  144. package/dist/utils/code-language-bash.js +48 -0
  145. package/dist/utils/code-language-core.d.ts +2 -0
  146. package/dist/utils/code-language-core.js +13 -0
  147. package/dist/utils/code-language-detectors.d.ts +5 -0
  148. package/dist/utils/code-language-detectors.js +142 -0
  149. package/dist/utils/code-language-helpers.d.ts +5 -0
  150. package/dist/utils/code-language-helpers.js +62 -0
  151. package/dist/utils/code-language-parsing.d.ts +5 -0
  152. package/dist/utils/code-language-parsing.js +62 -0
  153. package/dist/utils/code-language.d.ts +9 -0
  154. package/dist/utils/code-language.js +250 -46
  155. package/dist/utils/error-details.d.ts +3 -0
  156. package/dist/utils/error-details.js +12 -0
  157. package/dist/utils/error-utils.js +1 -1
  158. package/dist/utils/filename-generator.js +34 -12
  159. package/dist/utils/guards.d.ts +1 -0
  160. package/dist/utils/guards.js +3 -0
  161. package/dist/utils/header-normalizer.d.ts +0 -3
  162. package/dist/utils/header-normalizer.js +3 -3
  163. package/dist/utils/ip-address.d.ts +4 -0
  164. package/dist/utils/ip-address.js +6 -0
  165. package/dist/utils/tool-error-handler.d.ts +2 -2
  166. package/dist/utils/tool-error-handler.js +14 -46
  167. package/dist/utils/url-transformer.d.ts +7 -0
  168. package/dist/utils/url-transformer.js +147 -0
  169. package/dist/utils/url-validator.d.ts +1 -2
  170. package/dist/utils/url-validator.js +53 -114
  171. package/dist/workers/content-transform.worker.d.ts +1 -0
  172. package/dist/workers/content-transform.worker.js +40 -0
  173. package/package.json +17 -18
@@ -1,33 +1,9 @@
1
- import { ErrorCode } from '@modelcontextprotocol/sdk/types.js';
2
1
  import { FetchError } from '../errors/app-error.js';
3
- import { isSystemError } from './error-utils.js';
4
- const IS_DEVELOPMENT_WITH_STACK_TRACES = process.env.NODE_ENV === 'development' &&
5
- process.env.EXPOSE_STACK_TRACES === 'true';
6
- const MCP_ERROR_CODE_MAP = {
7
- VALIDATION_ERROR: String(ErrorCode.InvalidParams),
8
- INVALID_PARAMS: String(ErrorCode.InvalidParams),
9
- INTERNAL_ERROR: String(ErrorCode.InternalError),
10
- FETCH_ERROR: String(ErrorCode.InternalError),
11
- BATCH_ERROR: String(ErrorCode.InternalError),
12
- PROMISE_REJECTED: String(ErrorCode.InternalError),
13
- UNKNOWN_ERROR: String(ErrorCode.InternalError),
14
- };
15
- const NUMERIC_ERROR_CODE = /^-?\d+$/;
16
- function normalizeToolErrorCode(code) {
17
- if (!code)
18
- return String(ErrorCode.InternalError);
19
- if (NUMERIC_ERROR_CODE.test(code))
20
- return code;
21
- if (code.startsWith('HTTP_'))
22
- return String(ErrorCode.InternalError);
23
- return MCP_ERROR_CODE_MAP[code] ?? code;
24
- }
25
- export function createToolErrorResponse(message, url, code, details = {}) {
2
+ import { isSystemError } from './error-details.js';
3
+ export function createToolErrorResponse(message, url) {
26
4
  const structuredContent = {
27
- ...details,
28
5
  error: message,
29
6
  url,
30
- errorCode: normalizeToolErrorCode(code),
31
7
  };
32
8
  return {
33
9
  content: [{ type: 'text', text: JSON.stringify(structuredContent) }],
@@ -35,29 +11,21 @@ export function createToolErrorResponse(message, url, code, details = {}) {
35
11
  isError: true,
36
12
  };
37
13
  }
38
- function formatErrorMessage(baseMessage, error, fallback) {
39
- const message = fallback ? `${fallback}: ${error.message}` : error.message;
40
- if (IS_DEVELOPMENT_WITH_STACK_TRACES && error.stack) {
41
- return `${message}\n${error.stack}`;
42
- }
43
- return message;
44
- }
45
- export function handleToolError(error, url, fallbackMessage = 'Operation failed', details = {}) {
46
- if (isValidationError(error)) {
47
- return createToolErrorResponse(error.message, url, 'VALIDATION_ERROR', details);
48
- }
49
- if (error instanceof FetchError) {
50
- const message = formatErrorMessage(error.message, error);
51
- return createToolErrorResponse(message, url, error.code, details);
52
- }
53
- if (error instanceof Error) {
54
- const message = formatErrorMessage(error.message, error, fallbackMessage);
55
- return createToolErrorResponse(message, url, 'UNKNOWN_ERROR', details);
56
- }
57
- return createToolErrorResponse(`${fallbackMessage}: Unknown error`, url, 'UNKNOWN_ERROR', details);
14
+ export function handleToolError(error, url, fallbackMessage = 'Operation failed') {
15
+ const message = resolveToolErrorMessage(error, fallbackMessage);
16
+ return createToolErrorResponse(message, url);
58
17
  }
59
18
  function isValidationError(error) {
60
19
  return (error instanceof Error &&
61
20
  isSystemError(error) &&
62
21
  error.code === 'VALIDATION_ERROR');
63
22
  }
23
+ function resolveToolErrorMessage(error, fallbackMessage) {
24
+ if (isValidationError(error) || error instanceof FetchError) {
25
+ return error.message;
26
+ }
27
+ if (error instanceof Error) {
28
+ return `${fallbackMessage}: ${error.message}`;
29
+ }
30
+ return `${fallbackMessage}: Unknown error`;
31
+ }
@@ -0,0 +1,7 @@
1
+ export interface TransformResult {
2
+ readonly url: string;
3
+ readonly transformed: boolean;
4
+ readonly platform?: string;
5
+ }
6
+ export declare function transformToRawUrl(url: string): TransformResult;
7
+ export declare function isRawTextContentUrl(url: string): boolean;
@@ -0,0 +1,147 @@
1
+ import { logDebug } from '../services/logger.js';
2
+ const GITHUB_BLOB_RULE = {
3
+ name: 'github',
4
+ pattern: /^https?:\/\/(?:www\.)?github\.com\/([^/]+)\/([^/]+)\/blob\/([^/]+)\/(.+)$/i,
5
+ transform: (match) => {
6
+ const owner = match[1] ?? '';
7
+ const repo = match[2] ?? '';
8
+ const branch = match[3] ?? '';
9
+ const path = match[4] ?? '';
10
+ return `https://raw.githubusercontent.com/${owner}/${repo}/${branch}/${path}`;
11
+ },
12
+ };
13
+ const GITHUB_GIST_RULE = {
14
+ name: 'github-gist',
15
+ pattern: /^https?:\/\/gist\.github\.com\/([^/]+)\/([a-f0-9]+)(?:#file-(.+)|\/raw\/([^/]+))?$/i,
16
+ transform: (match) => {
17
+ const user = match[1] ?? '';
18
+ const gistId = match[2] ?? '';
19
+ const hashFile = match[3];
20
+ const rawFile = match[4];
21
+ const filename = rawFile ?? hashFile?.replace(/-/g, '.');
22
+ const filePath = filename ? `/${filename}` : '';
23
+ return `https://gist.githubusercontent.com/${user}/${gistId}/raw${filePath}`;
24
+ },
25
+ };
26
+ const GITLAB_BLOB_RULE = {
27
+ name: 'gitlab',
28
+ pattern: /^(https?:\/\/(?:[^/]+\.)?gitlab\.com\/[^/]+\/[^/]+)\/-\/blob\/([^/]+)\/(.+)$/i,
29
+ transform: (match) => {
30
+ const baseUrl = match[1] ?? '';
31
+ const branch = match[2] ?? '';
32
+ const path = match[3] ?? '';
33
+ return `${baseUrl}/-/raw/${branch}/${path}`;
34
+ },
35
+ };
36
+ const BITBUCKET_SRC_RULE = {
37
+ name: 'bitbucket',
38
+ pattern: /^(https?:\/\/(?:www\.)?bitbucket\.org\/[^/]+\/[^/]+)\/src\/([^/]+)\/(.+)$/i,
39
+ transform: (match) => {
40
+ const baseUrl = match[1] ?? '';
41
+ const branch = match[2] ?? '';
42
+ const path = match[3] ?? '';
43
+ return `${baseUrl}/raw/${branch}/${path}`;
44
+ },
45
+ };
46
+ const TRANSFORM_RULES = [
47
+ GITHUB_BLOB_RULE,
48
+ GITHUB_GIST_RULE,
49
+ GITLAB_BLOB_RULE,
50
+ BITBUCKET_SRC_RULE,
51
+ ];
52
+ function isRawUrl(url) {
53
+ const lowerUrl = url.toLowerCase();
54
+ return (lowerUrl.includes('raw.githubusercontent.com') ||
55
+ lowerUrl.includes('gist.githubusercontent.com') ||
56
+ lowerUrl.includes('/-/raw/') ||
57
+ /bitbucket\.org\/[^/]+\/[^/]+\/raw\//.test(lowerUrl));
58
+ }
59
+ function getUrlWithoutParams(url) {
60
+ const hashIndex = url.indexOf('#');
61
+ const queryIndex = url.indexOf('?');
62
+ let endIndex = url.length;
63
+ if (queryIndex !== -1) {
64
+ if (hashIndex !== -1) {
65
+ endIndex = Math.min(queryIndex, hashIndex);
66
+ }
67
+ else {
68
+ endIndex = queryIndex;
69
+ }
70
+ }
71
+ else if (hashIndex !== -1) {
72
+ endIndex = hashIndex;
73
+ }
74
+ const hash = hashIndex !== -1 ? url.slice(hashIndex) : '';
75
+ return {
76
+ base: url.slice(0, endIndex),
77
+ hash,
78
+ };
79
+ }
80
+ function resolveUrlToMatch(rule, base, hash) {
81
+ if (rule.name !== 'github-gist')
82
+ return base;
83
+ if (!hash.startsWith('#file-'))
84
+ return base;
85
+ return base + hash;
86
+ }
87
+ function applyTransformRules(base, hash) {
88
+ for (const rule of TRANSFORM_RULES) {
89
+ const urlToMatch = resolveUrlToMatch(rule, base, hash);
90
+ const match = rule.pattern.exec(urlToMatch);
91
+ if (match) {
92
+ return { url: rule.transform(match), platform: rule.name };
93
+ }
94
+ }
95
+ return null;
96
+ }
97
+ export function transformToRawUrl(url) {
98
+ if (!url)
99
+ return { url, transformed: false };
100
+ if (isRawUrl(url)) {
101
+ return { url, transformed: false };
102
+ }
103
+ const { base, hash } = getUrlWithoutParams(url);
104
+ const result = applyTransformRules(base, hash);
105
+ if (!result)
106
+ return { url, transformed: false };
107
+ logDebug('URL transformed to raw content URL', {
108
+ platform: result.platform,
109
+ original: url.substring(0, 100),
110
+ transformed: result.url.substring(0, 100),
111
+ });
112
+ return {
113
+ url: result.url,
114
+ transformed: true,
115
+ platform: result.platform,
116
+ };
117
+ }
118
+ const RAW_TEXT_EXTENSIONS = new Set([
119
+ '.md',
120
+ '.markdown',
121
+ '.txt',
122
+ '.json',
123
+ '.yaml',
124
+ '.yml',
125
+ '.toml',
126
+ '.xml',
127
+ '.csv',
128
+ '.rst',
129
+ '.adoc',
130
+ '.org',
131
+ ]);
132
+ export function isRawTextContentUrl(url) {
133
+ if (!url)
134
+ return false;
135
+ if (isRawUrl(url))
136
+ return true;
137
+ const { base } = getUrlWithoutParams(url);
138
+ const lowerBase = base.toLowerCase();
139
+ return hasKnownRawTextExtension(lowerBase);
140
+ }
141
+ function hasKnownRawTextExtension(urlBaseLower) {
142
+ for (const ext of RAW_TEXT_EXTENSIONS) {
143
+ if (urlBaseLower.endsWith(ext))
144
+ return true;
145
+ }
146
+ return false;
147
+ }
@@ -1,7 +1,6 @@
1
1
  export declare function isBlockedIp(ip: string): boolean;
2
- export declare function assertResolvedAddressesAllowed(hostname: string): Promise<void>;
3
2
  export declare function normalizeUrl(urlString: string): {
4
3
  normalizedUrl: string;
5
4
  hostname: string;
6
5
  };
7
- export declare function validateAndNormalizeUrl(urlString: string): Promise<string>;
6
+ export declare function validateAndNormalizeUrl(urlString: string): string;
@@ -1,29 +1,38 @@
1
- import { lookup } from 'node:dns/promises';
2
1
  import { BlockList, isIP } from 'node:net';
3
2
  import { config } from '../config/index.js';
4
- import { createErrorWithCode } from './error-utils.js';
3
+ import { createErrorWithCode } from './error-details.js';
4
+ import { buildIpv4, buildIpv6 } from './ip-address.js';
5
5
  const BLOCK_LIST = new BlockList();
6
+ const IPV6_ZERO = buildIpv6([0, 0, 0, 0, 0, 0, 0, 0]);
7
+ const IPV6_LOOPBACK = buildIpv6([0, 0, 0, 0, 0, 0, 0, 1]);
8
+ const IPV6_64_FF9B = buildIpv6(['64', 'ff9b', 0, 0, 0, 0, 0, 0]);
9
+ const IPV6_64_FF9B_1 = buildIpv6(['64', 'ff9b', 1, 0, 0, 0, 0, 0]);
10
+ const IPV6_2001 = buildIpv6(['2001', 0, 0, 0, 0, 0, 0, 0]);
11
+ const IPV6_2002 = buildIpv6(['2002', 0, 0, 0, 0, 0, 0, 0]);
12
+ const IPV6_FC00 = buildIpv6(['fc00', 0, 0, 0, 0, 0, 0, 0]);
13
+ const IPV6_FE80 = buildIpv6(['fe80', 0, 0, 0, 0, 0, 0, 0]);
14
+ const IPV6_FF00 = buildIpv6(['ff00', 0, 0, 0, 0, 0, 0, 0]);
6
15
  const BLOCKED_IPV4_SUBNETS = [
7
- { subnet: '0.0.0.0', prefix: 8 },
8
- { subnet: '10.0.0.0', prefix: 8 },
9
- { subnet: '100.64.0.0', prefix: 10 },
10
- { subnet: '127.0.0.0', prefix: 8 },
11
- { subnet: '169.254.0.0', prefix: 16 },
12
- { subnet: '172.16.0.0', prefix: 12 },
13
- { subnet: '192.168.0.0', prefix: 16 },
14
- { subnet: '224.0.0.0', prefix: 4 },
15
- { subnet: '240.0.0.0', prefix: 4 },
16
+ { subnet: buildIpv4([0, 0, 0, 0]), prefix: 8 },
17
+ { subnet: buildIpv4([10, 0, 0, 0]), prefix: 8 },
18
+ { subnet: buildIpv4([100, 64, 0, 0]), prefix: 10 },
19
+ { subnet: buildIpv4([127, 0, 0, 0]), prefix: 8 },
20
+ { subnet: buildIpv4([169, 254, 0, 0]), prefix: 16 },
21
+ { subnet: buildIpv4([172, 16, 0, 0]), prefix: 12 },
22
+ { subnet: buildIpv4([192, 168, 0, 0]), prefix: 16 },
23
+ { subnet: buildIpv4([224, 0, 0, 0]), prefix: 4 },
24
+ { subnet: buildIpv4([240, 0, 0, 0]), prefix: 4 },
16
25
  ];
17
26
  const BLOCKED_IPV6_SUBNETS = [
18
- { subnet: '::', prefix: 128 },
19
- { subnet: '::1', prefix: 128 },
20
- { subnet: '64:ff9b::', prefix: 96 },
21
- { subnet: '64:ff9b:1::', prefix: 48 },
22
- { subnet: '2001::', prefix: 32 },
23
- { subnet: '2002::', prefix: 16 },
24
- { subnet: 'fc00::', prefix: 7 },
25
- { subnet: 'fe80::', prefix: 10 },
26
- { subnet: 'ff00::', prefix: 8 },
27
+ { subnet: IPV6_ZERO, prefix: 128 },
28
+ { subnet: IPV6_LOOPBACK, prefix: 128 },
29
+ { subnet: IPV6_64_FF9B, prefix: 96 },
30
+ { subnet: IPV6_64_FF9B_1, prefix: 48 },
31
+ { subnet: IPV6_2001, prefix: 32 },
32
+ { subnet: IPV6_2002, prefix: 16 },
33
+ { subnet: IPV6_FC00, prefix: 7 },
34
+ { subnet: IPV6_FE80, prefix: 10 },
35
+ { subnet: IPV6_FF00, prefix: 8 },
27
36
  ];
28
37
  for (const entry of BLOCKED_IPV4_SUBNETS) {
29
38
  BLOCK_LIST.addSubnet(entry.subnet, entry.prefix, 'ipv4');
@@ -31,36 +40,6 @@ for (const entry of BLOCKED_IPV4_SUBNETS) {
31
40
  for (const entry of BLOCKED_IPV6_SUBNETS) {
32
41
  BLOCK_LIST.addSubnet(entry.subnet, entry.prefix, 'ipv6');
33
42
  }
34
- const DNS_LOOKUP_TIMEOUT_MS = 5000;
35
- const DNS_DECISION_TTL_MS = 60000;
36
- const DNS_DECISION_MAX = 1000;
37
- const dnsDecisionCache = new Map();
38
- function getCachedDnsDecision(hostname) {
39
- const cached = dnsDecisionCache.get(hostname);
40
- if (!cached)
41
- return null;
42
- if (cached.expiresAt <= Date.now()) {
43
- dnsDecisionCache.delete(hostname);
44
- return null;
45
- }
46
- return cached;
47
- }
48
- function setCachedDnsDecision(hostname, ok) {
49
- dnsDecisionCache.set(hostname, {
50
- ok,
51
- expiresAt: Date.now() + DNS_DECISION_TTL_MS,
52
- });
53
- if (dnsDecisionCache.size <= DNS_DECISION_MAX)
54
- return;
55
- const evictCount = Math.ceil(DNS_DECISION_MAX * 0.05);
56
- const iterator = dnsDecisionCache.keys();
57
- for (let i = 0; i < evictCount; i++) {
58
- const { value, done } = iterator.next();
59
- if (done)
60
- break;
61
- dnsDecisionCache.delete(value);
62
- }
63
- }
64
43
  function matchesBlockedIpPatterns(resolvedIp) {
65
44
  for (const pattern of config.security.blockedIpPatterns) {
66
45
  if (pattern.test(resolvedIp)) {
@@ -91,55 +70,6 @@ function isBlockedByList(ip, ipType) {
91
70
  }
92
71
  return BLOCK_LIST.check(ip, 'ipv6');
93
72
  }
94
- function lookupWithTimeout(hostname) {
95
- return new Promise((resolve, reject) => {
96
- const timer = setTimeout(() => {
97
- reject(createValidationError(`DNS lookup timed out for ${hostname}`));
98
- }, DNS_LOOKUP_TIMEOUT_MS);
99
- lookup(hostname, { all: true })
100
- .then((result) => {
101
- clearTimeout(timer);
102
- resolve(result);
103
- })
104
- .catch((error) => {
105
- clearTimeout(timer);
106
- reject(error instanceof Error ? error : createValidationError(String(error)));
107
- });
108
- });
109
- }
110
- export async function assertResolvedAddressesAllowed(hostname) {
111
- const cached = getCachedDnsDecision(hostname);
112
- if (cached) {
113
- if (!cached.ok) {
114
- throw createValidationError(`Blocked IP range resolved from hostname: ${hostname}`);
115
- }
116
- return;
117
- }
118
- try {
119
- const result = await lookupWithTimeout(hostname);
120
- const addresses = Array.isArray(result) ? result : [result];
121
- if (addresses.length === 0) {
122
- throw createValidationError(`Unable to resolve hostname: ${hostname}`);
123
- }
124
- for (const { address } of addresses) {
125
- if (isBlockedIp(address.toLowerCase())) {
126
- setCachedDnsDecision(hostname, false);
127
- throw createValidationError(`Blocked IP range resolved from hostname: ${hostname}`);
128
- }
129
- }
130
- setCachedDnsDecision(hostname, true);
131
- }
132
- catch (error) {
133
- const code = error?.code;
134
- if (code === 'ENOTFOUND' || code === 'EAI_AGAIN') {
135
- throw createValidationError(`Unable to resolve hostname: ${hostname}`);
136
- }
137
- if (error instanceof Error) {
138
- throw error;
139
- }
140
- throw createValidationError(String(error));
141
- }
142
- }
143
73
  export function normalizeUrl(urlString) {
144
74
  const trimmedUrl = requireTrimmedUrl(urlString);
145
75
  assertUrlLength(trimmedUrl);
@@ -150,10 +80,8 @@ export function normalizeUrl(urlString) {
150
80
  assertHostnameAllowed(hostname);
151
81
  return { normalizedUrl: url.href, hostname };
152
82
  }
153
- export async function validateAndNormalizeUrl(urlString) {
154
- const { normalizedUrl, hostname } = normalizeUrl(urlString);
155
- await assertResolvedAddressesAllowed(hostname);
156
- return normalizedUrl;
83
+ export function validateAndNormalizeUrl(urlString) {
84
+ return normalizeUrl(urlString).normalizedUrl;
157
85
  }
158
86
  const VALIDATION_ERROR_CODE = 'VALIDATION_ERROR';
159
87
  function createValidationError(message) {
@@ -175,10 +103,12 @@ function assertUrlLength(url) {
175
103
  throw createValidationError(`URL exceeds maximum length of ${config.constants.maxUrlLength} characters`);
176
104
  }
177
105
  function parseUrl(urlString) {
178
- if (!URL.canParse(urlString)) {
106
+ try {
107
+ return new URL(urlString);
108
+ }
109
+ catch {
179
110
  throw createValidationError('Invalid URL format');
180
111
  }
181
- return new URL(urlString);
182
112
  }
183
113
  function assertHttpProtocol(url) {
184
114
  if (url.protocol === 'http:' || url.protocol === 'https:')
@@ -199,15 +129,24 @@ function normalizeHostname(url) {
199
129
  }
200
130
  const BLOCKED_HOST_SUFFIXES = ['.local', '.internal'];
201
131
  function assertHostnameAllowed(hostname) {
202
- if (config.security.blockedHosts.has(hostname)) {
203
- throw createValidationError(`Blocked host: ${hostname}. Internal hosts are not allowed`);
204
- }
205
- if (isBlockedIp(hostname)) {
206
- throw createValidationError(`Blocked IP range: ${hostname}. Private IPs are not allowed`);
207
- }
208
- if (matchesBlockedSuffix(hostname)) {
209
- throw createValidationError(`Blocked hostname pattern: ${hostname}. Internal domain suffixes are not allowed`);
210
- }
132
+ assertNotBlockedHost(hostname);
133
+ assertNotBlockedIp(hostname);
134
+ assertNotBlockedHostnameSuffix(hostname);
135
+ }
136
+ function assertNotBlockedHost(hostname) {
137
+ if (!config.security.blockedHosts.has(hostname))
138
+ return;
139
+ throw createValidationError(`Blocked host: ${hostname}. Internal hosts are not allowed`);
140
+ }
141
+ function assertNotBlockedIp(hostname) {
142
+ if (!isBlockedIp(hostname))
143
+ return;
144
+ throw createValidationError(`Blocked IP range: ${hostname}. Private IPs are not allowed`);
145
+ }
146
+ function assertNotBlockedHostnameSuffix(hostname) {
147
+ if (!matchesBlockedSuffix(hostname))
148
+ return;
149
+ throw createValidationError(`Blocked hostname pattern: ${hostname}. Internal domain suffixes are not allowed`);
211
150
  }
212
151
  function matchesBlockedSuffix(hostname) {
213
152
  return BLOCKED_HOST_SUFFIXES.some((suffix) => hostname.endsWith(suffix));
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,40 @@
1
+ import { parentPort } from 'node:worker_threads';
2
+ import { isRecord } from '../utils/guards.js';
3
+ import { transformHtmlToMarkdownSync } from '../tools/utils/content-transform.js';
4
+ const port = parentPort;
5
+ function isWorkerTransformRequest(value) {
6
+ if (!isRecord(value))
7
+ return false;
8
+ return (typeof value.id === 'number' &&
9
+ typeof value.html === 'string' &&
10
+ typeof value.url === 'string' &&
11
+ typeof value.options === 'object');
12
+ }
13
+ function handleMessage(value) {
14
+ if (!port)
15
+ return;
16
+ if (!isWorkerTransformRequest(value))
17
+ return;
18
+ const { id, html, url, options } = value;
19
+ try {
20
+ const result = transformHtmlToMarkdownSync(html, url, options);
21
+ const response = {
22
+ id,
23
+ ok: true,
24
+ result,
25
+ };
26
+ port.postMessage(response);
27
+ }
28
+ catch (error) {
29
+ const response = {
30
+ id,
31
+ ok: false,
32
+ error: error instanceof Error ? error.message : String(error),
33
+ };
34
+ port.postMessage(response);
35
+ }
36
+ }
37
+ if (!port) {
38
+ process.exit(1);
39
+ }
40
+ port.on('message', handleMessage);
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "@j0hanz/superfetch",
3
- "version": "1.2.5",
3
+ "version": "2.0.1",
4
4
  "mcpName": "io.github.j0hanz/superfetch",
5
- "description": "Intelligent web content fetcher MCP server that converts HTML to clean, AI-readable JSONL format",
5
+ "description": "Intelligent web content fetcher MCP server that converts HTML to clean, AI-readable Markdown",
6
6
  "type": "module",
7
7
  "main": "dist/index.js",
8
8
  "bin": {
@@ -28,54 +28,53 @@
28
28
  "web-fetching",
29
29
  "content-extraction",
30
30
  "readability",
31
- "jsonl",
31
+ "markdown",
32
32
  "ai-tools",
33
33
  "model-context-protocol",
34
34
  "superfetch"
35
35
  ],
36
36
  "scripts": {
37
- "dev": "tsx watch src/index.ts",
38
- "build": "tsc -p tsconfig.build.json && shx chmod +x dist/*.js",
37
+ "build": "tsc -p tsconfig.build.json && node -e \"require('fs').chmodSync('dist/index.js', '755')\"",
39
38
  "prepare": "npm run build",
40
- "prepublishOnly": "npm run build && npm run lint",
39
+ "dev": "tsx watch src/index.ts",
41
40
  "start": "node dist/index.js",
42
- "release": "node scripts/release.js",
43
41
  "format": "prettier --write .",
44
42
  "type-check": "tsc --noEmit",
45
43
  "lint": "eslint .",
46
44
  "lint:fix": "eslint . --fix",
47
45
  "test": "npm run build --silent && node --test --experimental-transform-types",
48
46
  "test:coverage": "npm run build --silent && node --test --experimental-transform-types --experimental-test-coverage",
49
- "bench": "npm run build && node scripts/bench.mjs",
50
47
  "knip": "knip",
51
- "knip:fix": "knip --fix"
48
+ "knip:fix": "knip --fix",
49
+ "inspector": "npx @modelcontextprotocol/inspector",
50
+ "prepublishOnly": "npm run lint && npm run type-check && npm run build"
52
51
  },
53
52
  "dependencies": {
54
- "@modelcontextprotocol/sdk": "^1.25.1",
53
+ "@modelcontextprotocol/sdk": "^1.25.2",
55
54
  "@mozilla/readability": "^0.6.0",
56
- "cheerio": "^1.1.2",
57
- "domhandler": "^5.0.3",
58
55
  "express": "^5.2.1",
59
56
  "linkedom": "^0.18.12",
60
57
  "turndown": "^7.2.2",
61
- "undici": "^6.22.0",
62
- "zod": "^4.3.4"
58
+ "undici": "^6.23.0",
59
+ "zod": "^4.3.5"
63
60
  },
64
61
  "devDependencies": {
65
62
  "@eslint/js": "^9.39.2",
66
- "@trivago/prettier-plugin-sort-imports": "^6.0.0",
63
+ "@trivago/prettier-plugin-sort-imports": "^6.0.2",
67
64
  "@types/express": "^5.0.6",
68
65
  "@types/node": "^22.19.3",
69
66
  "@types/turndown": "^5.0.6",
70
67
  "eslint": "^9.23.2",
71
68
  "eslint-config-prettier": "^10.1.8",
69
+ "eslint-plugin-de-morgan": "^2.0.0",
70
+ "eslint-plugin-depend": "^1.4.0",
71
+ "eslint-plugin-sonarjs": "^3.0.5",
72
72
  "eslint-plugin-unused-imports": "^4.3.0",
73
- "knip": "^5.78.0",
73
+ "knip": "^5.80.1",
74
74
  "prettier": "^3.7.4",
75
- "shx": "^0.4.0",
76
75
  "tsx": "^4.21.0",
77
76
  "typescript": "^5.9.3",
78
- "typescript-eslint": "^8.51.0"
77
+ "typescript-eslint": "^8.52.0"
79
78
  },
80
79
  "engines": {
81
80
  "node": ">=20.12.0"