@j0hanz/superfetch 1.2.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/README.md +116 -152
  2. package/dist/config/auth-config.d.ts +16 -0
  3. package/dist/config/auth-config.js +53 -0
  4. package/dist/config/constants.d.ts +11 -13
  5. package/dist/config/constants.js +1 -3
  6. package/dist/config/env-parsers.d.ts +7 -0
  7. package/dist/config/env-parsers.js +84 -0
  8. package/dist/config/formatting.d.ts +2 -2
  9. package/dist/config/index.d.ts +47 -53
  10. package/dist/config/index.js +25 -59
  11. package/dist/config/types/content.d.ts +1 -49
  12. package/dist/config/types/runtime.d.ts +8 -16
  13. package/dist/config/types/tools.d.ts +2 -28
  14. package/dist/http/accept-policy.d.ts +3 -0
  15. package/dist/http/accept-policy.js +45 -0
  16. package/dist/http/async-handler.d.ts +2 -0
  17. package/dist/http/async-handler.js +5 -0
  18. package/dist/http/auth-introspection.d.ts +2 -0
  19. package/dist/http/auth-introspection.js +141 -0
  20. package/dist/http/auth-static.d.ts +2 -0
  21. package/dist/http/auth-static.js +23 -0
  22. package/dist/http/auth.d.ts +3 -2
  23. package/dist/http/auth.js +98 -26
  24. package/dist/http/cors.d.ts +6 -6
  25. package/dist/http/cors.js +7 -42
  26. package/dist/http/download-routes.d.ts +0 -12
  27. package/dist/http/download-routes.js +21 -58
  28. package/dist/http/jsonrpc-http.d.ts +2 -0
  29. package/dist/http/jsonrpc-http.js +10 -0
  30. package/dist/http/mcp-routes.d.ts +0 -1
  31. package/dist/http/mcp-routes.js +43 -30
  32. package/dist/http/mcp-session-helpers.d.ts +0 -1
  33. package/dist/http/mcp-session-helpers.js +1 -1
  34. package/dist/http/mcp-session-transport.d.ts +7 -0
  35. package/dist/http/mcp-session-transport.js +57 -0
  36. package/dist/http/mcp-session.js +60 -73
  37. package/dist/http/mcp-validation.d.ts +1 -0
  38. package/dist/http/mcp-validation.js +11 -10
  39. package/dist/http/protocol-policy.d.ts +2 -0
  40. package/dist/http/protocol-policy.js +31 -0
  41. package/dist/http/rate-limit.js +5 -2
  42. package/dist/http/server-config.d.ts +1 -0
  43. package/dist/http/server-config.js +40 -0
  44. package/dist/http/server-middleware.d.ts +2 -9
  45. package/dist/http/server-middleware.js +96 -43
  46. package/dist/http/server-shutdown.d.ts +4 -0
  47. package/dist/http/server-shutdown.js +43 -0
  48. package/dist/http/server.js +52 -64
  49. package/dist/http/session-cleanup.js +1 -1
  50. package/dist/middleware/error-handler.js +1 -3
  51. package/dist/resources/cached-content.js +50 -108
  52. package/dist/resources/index.js +0 -82
  53. package/dist/server.js +51 -30
  54. package/dist/services/cache-keys.d.ts +7 -0
  55. package/dist/services/cache-keys.js +57 -0
  56. package/dist/services/cache.d.ts +1 -7
  57. package/dist/services/cache.js +53 -119
  58. package/dist/services/context.d.ts +0 -1
  59. package/dist/services/context.js +0 -7
  60. package/dist/services/extractor.js +10 -82
  61. package/dist/services/fetcher/agents.d.ts +2 -2
  62. package/dist/services/fetcher/agents.js +34 -95
  63. package/dist/services/fetcher/dns-selection.d.ts +2 -0
  64. package/dist/services/fetcher/dns-selection.js +72 -0
  65. package/dist/services/fetcher/interceptors.d.ts +0 -22
  66. package/dist/services/fetcher/interceptors.js +30 -13
  67. package/dist/services/fetcher/redirects.js +4 -3
  68. package/dist/services/fetcher/response.js +66 -31
  69. package/dist/services/fetcher.d.ts +1 -3
  70. package/dist/services/fetcher.js +14 -33
  71. package/dist/services/fifo-queue.d.ts +8 -0
  72. package/dist/services/fifo-queue.js +25 -0
  73. package/dist/services/logger.js +2 -2
  74. package/dist/services/metadata-collector.d.ts +1 -9
  75. package/dist/services/metadata-collector.js +71 -2
  76. package/dist/services/transform-worker-pool.d.ts +4 -14
  77. package/dist/services/transform-worker-pool.js +177 -129
  78. package/dist/services/transform-worker-types.d.ts +32 -0
  79. package/dist/services/transform-worker-types.js +14 -0
  80. package/dist/tools/handlers/fetch-markdown.tool.d.ts +3 -4
  81. package/dist/tools/handlers/fetch-markdown.tool.js +20 -72
  82. package/dist/tools/handlers/fetch-single.shared.d.ts +1 -20
  83. package/dist/tools/handlers/fetch-single.shared.js +44 -87
  84. package/dist/tools/handlers/fetch-url.tool.d.ts +1 -1
  85. package/dist/tools/handlers/fetch-url.tool.js +46 -123
  86. package/dist/tools/index.js +21 -40
  87. package/dist/tools/schemas.d.ts +1 -51
  88. package/dist/tools/schemas.js +2 -108
  89. package/dist/tools/utils/cached-markdown.d.ts +5 -0
  90. package/dist/tools/utils/cached-markdown.js +46 -0
  91. package/dist/tools/utils/content-shaping.d.ts +4 -0
  92. package/dist/tools/utils/content-shaping.js +52 -0
  93. package/dist/tools/utils/content-transform.d.ts +2 -17
  94. package/dist/tools/utils/content-transform.js +120 -114
  95. package/dist/tools/utils/fetch-pipeline.d.ts +0 -8
  96. package/dist/tools/utils/fetch-pipeline.js +65 -62
  97. package/dist/tools/utils/inline-content.d.ts +1 -2
  98. package/dist/tools/utils/inline-content.js +4 -7
  99. package/dist/transformers/markdown.transformer.js +109 -34
  100. package/dist/utils/cached-payload.d.ts +7 -0
  101. package/dist/utils/cached-payload.js +36 -0
  102. package/dist/utils/error-utils.js +1 -1
  103. package/dist/utils/filename-generator.js +21 -10
  104. package/dist/utils/guards.d.ts +1 -0
  105. package/dist/utils/guards.js +3 -0
  106. package/dist/utils/header-normalizer.d.ts +0 -3
  107. package/dist/utils/header-normalizer.js +3 -3
  108. package/dist/utils/tool-error-handler.d.ts +2 -2
  109. package/dist/utils/tool-error-handler.js +11 -38
  110. package/dist/utils/url-transformer.d.ts +7 -0
  111. package/dist/utils/url-transformer.js +147 -0
  112. package/dist/utils/url-validator.d.ts +1 -2
  113. package/dist/utils/url-validator.js +20 -93
  114. package/dist/workers/content-transform.worker.d.ts +1 -0
  115. package/dist/workers/content-transform.worker.js +40 -0
  116. package/package.json +13 -16
@@ -1,13 +1,8 @@
1
1
  import TurndownService from 'turndown';
2
2
  import { CODE_BLOCK, FRONTMATTER_DELIMITER, joinLines, } from '../config/formatting.js';
3
3
  import { detectLanguageFromCode, resolveLanguageFromAttributes, } from '../utils/code-language.js';
4
+ import { isRecord } from '../utils/guards.js';
4
5
  let turndownInstance = null;
5
- function getTurndown() {
6
- if (turndownInstance)
7
- return turndownInstance;
8
- turndownInstance = createTurndownInstance();
9
- return turndownInstance;
10
- }
11
6
  function createTurndownInstance() {
12
7
  const instance = new TurndownService({
13
8
  headingStyle: 'atx',
@@ -19,12 +14,97 @@ function createTurndownInstance() {
19
14
  addFencedCodeRule(instance);
20
15
  return instance;
21
16
  }
17
+ function getTurndown() {
18
+ turndownInstance ??= createTurndownInstance();
19
+ return turndownInstance;
20
+ }
21
+ function isElement(node) {
22
+ if (!isRecord(node))
23
+ return false;
24
+ return 'getAttribute' in node && typeof node.getAttribute === 'function';
25
+ }
26
+ const STRUCTURAL_TAGS = new Set([
27
+ 'script',
28
+ 'style',
29
+ 'noscript',
30
+ 'iframe',
31
+ 'nav',
32
+ 'footer',
33
+ 'aside',
34
+ 'header',
35
+ 'form',
36
+ 'button',
37
+ 'input',
38
+ 'select',
39
+ 'textarea',
40
+ ]);
41
+ const NAVIGATION_ROLES = new Set([
42
+ 'navigation',
43
+ 'banner',
44
+ 'complementary',
45
+ 'contentinfo',
46
+ 'tree',
47
+ 'menubar',
48
+ 'menu',
49
+ ]);
50
+ const PROMO_PATTERN = /banner|promo|announcement|cta|callout|advert|newsletter|subscribe|cookie|consent|popup|modal|overlay|toast/;
51
+ const FIXED_PATTERN = /\b(fixed|sticky)\b/;
52
+ const HIGH_Z_PATTERN = /\bz-(?:4[0-9]|50)\b/;
53
+ const ISOLATE_PATTERN = /\bisolate\b/;
54
+ function isStructuralNoiseTag(tagName) {
55
+ return (STRUCTURAL_TAGS.has(tagName) || tagName === 'svg' || tagName === 'canvas');
56
+ }
57
+ function isElementHidden(element) {
58
+ return (element.getAttribute('hidden') !== null ||
59
+ element.getAttribute('aria-hidden') === 'true');
60
+ }
61
+ function hasNoiseRole(role) {
62
+ return role ? NAVIGATION_ROLES.has(role) : false;
63
+ }
64
+ function matchesPromoIdOrClass(className, id) {
65
+ const combined = `${className} ${id}`.toLowerCase();
66
+ return PROMO_PATTERN.test(combined);
67
+ }
68
+ function matchesHighZIsolate(className) {
69
+ return HIGH_Z_PATTERN.test(className) && ISOLATE_PATTERN.test(className);
70
+ }
71
+ function matchesFixedOrHighZIsolate(className) {
72
+ if (FIXED_PATTERN.test(className))
73
+ return true;
74
+ return matchesHighZIsolate(className);
75
+ }
22
76
  function addNoiseRule(instance) {
23
77
  instance.addRule('removeNoise', {
24
- filter: ['script', 'style', 'noscript', 'nav', 'footer', 'aside', 'iframe'],
78
+ filter: (node) => isNoiseNode(node),
25
79
  replacement: () => '',
26
80
  });
27
81
  }
82
+ function isNoiseNode(node) {
83
+ if (!isElement(node))
84
+ return false;
85
+ return isNoiseElement(node);
86
+ }
87
+ function readElementMetadata(element) {
88
+ return {
89
+ tagName: element.tagName.toLowerCase(),
90
+ className: element.getAttribute('class') ?? '',
91
+ id: element.getAttribute('id') ?? '',
92
+ role: element.getAttribute('role'),
93
+ isHidden: isElementHidden(element),
94
+ };
95
+ }
96
+ function isNoiseElement(node) {
97
+ const metadata = readElementMetadata(node);
98
+ if (isStructuralNoiseTag(metadata.tagName))
99
+ return true;
100
+ if (metadata.isHidden)
101
+ return true;
102
+ if (hasNoiseRole(metadata.role))
103
+ return true;
104
+ if (matchesFixedOrHighZIsolate(metadata.className))
105
+ return true;
106
+ return matchesPromoIdOrClass(metadata.className, metadata.id);
107
+ }
28
108
  function addFencedCodeRule(instance) {
29
109
  instance.addRule('fencedCodeBlockWithLanguage', {
30
110
  filter: (node, options) => isFencedCodeBlock(node, options),
@@ -41,12 +121,6 @@ function isFencedCodeBlock(node, options) {
41
121
  return false;
42
122
  return firstChild.nodeName === 'CODE';
43
123
  }
44
- function isElement(node) {
45
- return (node !== null &&
46
- typeof node === 'object' &&
47
- 'getAttribute' in node &&
48
- typeof node.getAttribute === 'function');
49
- }
50
124
  function formatFencedCodeBlock(node) {
51
125
  const codeNode = node.firstChild;
52
126
  if (!isElement(codeNode))
@@ -56,11 +130,16 @@ function formatFencedCodeBlock(node) {
56
130
  return CODE_BLOCK.format(code, language);
57
131
  }
58
132
  function resolveCodeLanguage(codeNode, code) {
59
- const className = codeNode.getAttribute('class') ?? '';
60
- const dataLang = codeNode.getAttribute('data-language') ?? '';
61
- const attributeLanguage = resolveLanguageFromAttributes(className, dataLang);
133
+ const { className, dataLanguage } = readCodeAttributes(codeNode);
134
+ const attributeLanguage = resolveLanguageFromAttributes(className, dataLanguage);
62
135
  return attributeLanguage ?? detectLanguageFromCode(code) ?? '';
63
136
  }
137
+ function readCodeAttributes(codeNode) {
138
+ return {
139
+ className: codeNode.getAttribute('class') ?? '',
140
+ dataLanguage: codeNode.getAttribute('data-language') ?? '',
141
+ };
142
+ }
64
143
  const YAML_SPECIAL_CHARS = /[:[\]{}"\r\t'|>&*!?,#]|\n/;
65
144
  const YAML_NUMERIC = /^[\d.]+$/;
66
145
  const YAML_RESERVED_WORDS = /^(true|false|null|yes|no|on|off)$/i;
@@ -91,36 +170,32 @@ function escapeYamlValue(value) {
91
170
  .replace(ESCAPE_PATTERNS.tab, '\\t');
92
171
  return `"${escaped}"`;
93
172
  }
173
+ function appendFrontmatterField(lines, key, value) {
174
+ if (!value)
175
+ return;
176
+ lines.push(`${key}: ${escapeYamlValue(value)}`);
177
+ }
94
178
  function createFrontmatter(metadata) {
95
179
  const lines = [FRONTMATTER_DELIMITER];
96
- if (metadata.title) {
97
- lines.push(`title: ${escapeYamlValue(metadata.title)}`);
98
- }
99
- if (metadata.url) {
100
- lines.push(`source: ${escapeYamlValue(metadata.url)}`);
101
- }
180
+ appendFrontmatterField(lines, 'title', metadata.title);
181
+ appendFrontmatterField(lines, 'source', metadata.url);
102
182
  lines.push(FRONTMATTER_DELIMITER);
103
183
  return joinLines(lines);
104
184
  }
105
- function convertHtmlToMarkdown(html) {
106
- return getTurndown().turndown(html).trim();
107
- }
108
- function buildFrontmatterBlock(metadata) {
109
- return metadata ? createFrontmatter(metadata) : '';
110
- }
111
185
  export function htmlToMarkdown(html, metadata) {
112
- const frontmatter = buildFrontmatterBlock(metadata);
113
- if (!isValidHtmlInput(html)) {
186
+ const frontmatter = buildFrontmatter(metadata);
187
+ if (!html)
114
188
  return frontmatter;
115
- }
116
189
  try {
117
- const content = convertHtmlToMarkdown(html);
190
+ const content = getTurndown().turndown(html).trim();
118
191
  return frontmatter ? `${frontmatter}\n${content}` : content;
119
192
  }
120
193
  catch {
121
194
  return frontmatter;
122
195
  }
123
196
  }
124
- function isValidHtmlInput(html) {
125
- return Boolean(html && typeof html === 'string');
197
+ function buildFrontmatter(metadata) {
198
+ if (!metadata)
199
+ return '';
200
+ return createFrontmatter(metadata);
126
201
  }
@@ -0,0 +1,7 @@
1
+ export interface CachedPayload {
2
+ content?: string;
3
+ markdown?: string;
4
+ title?: string;
5
+ }
6
+ export declare function parseCachedPayload(raw: string): CachedPayload | null;
7
+ export declare function resolveCachedPayloadContent(payload: CachedPayload): string | null;
@@ -0,0 +1,36 @@
1
+ import { isRecord } from './guards.js';
2
+ export function parseCachedPayload(raw) {
3
+ try {
4
+ const parsed = JSON.parse(raw);
5
+ return isCachedPayload(parsed) ? parsed : null;
6
+ }
7
+ catch {
8
+ return null;
9
+ }
10
+ }
11
+ export function resolveCachedPayloadContent(payload) {
12
+ if (typeof payload.markdown === 'string') {
13
+ return payload.markdown;
14
+ }
15
+ if (typeof payload.content === 'string') {
16
+ return payload.content;
17
+ }
18
+ return null;
19
+ }
20
+ function hasOptionalStringProperty(value, key) {
21
+ const prop = value[key];
22
+ if (prop === undefined)
23
+ return true;
24
+ return typeof prop === 'string';
25
+ }
26
+ function isCachedPayload(value) {
27
+ if (!isRecord(value))
28
+ return false;
29
+ if (!hasOptionalStringProperty(value, 'content'))
30
+ return false;
31
+ if (!hasOptionalStringProperty(value, 'markdown'))
32
+ return false;
33
+ if (!hasOptionalStringProperty(value, 'title'))
34
+ return false;
35
+ return true;
36
+ }
@@ -8,5 +8,5 @@ export function createErrorWithCode(message, code) {
8
8
  export function isSystemError(error) {
9
9
  return (error instanceof Error &&
10
10
  'code' in error &&
11
- typeof error.code === 'string');
11
+ typeof Reflect.get(error, 'code') === 'string');
12
12
  }
@@ -16,20 +16,31 @@ export function generateSafeFilename(url, title, hashFallback, extension = DEFAU
16
16
  }
17
17
  return `download-${Date.now()}${extension}`;
18
18
  }
19
+ function getLastPathSegment(url) {
20
+ const segments = url.pathname.split('/').filter(Boolean);
21
+ if (segments.length === 0)
22
+ return null;
23
+ const lastSegment = segments[segments.length - 1];
24
+ return lastSegment ?? null;
25
+ }
26
+ function stripCommonPageExtension(segment) {
27
+ return segment.replace(/\.(html?|php|aspx?|jsp)$/i, '');
28
+ }
29
+ function normalizeUrlFilenameSegment(segment) {
30
+ const cleaned = stripCommonPageExtension(segment);
31
+ if (!cleaned)
32
+ return null;
33
+ if (cleaned === 'index')
34
+ return null;
35
+ return cleaned;
36
+ }
19
37
  function extractFilenameFromUrl(url) {
20
38
  try {
21
39
  const urlObj = new URL(url);
22
- const { pathname } = urlObj;
23
- const segments = pathname.split('/').filter(Boolean);
24
- if (segments.length === 0)
25
- return null;
26
- const lastSegment = segments[segments.length - 1];
40
+ const lastSegment = getLastPathSegment(urlObj);
27
41
  if (!lastSegment)
28
42
  return null;
29
- const cleaned = lastSegment.replace(/\.(html?|php|aspx?|jsp)$/i, '');
30
- if (!cleaned || cleaned === 'index')
31
- return null;
32
- return cleaned;
43
+ return normalizeUrlFilenameSegment(lastSegment);
33
44
  }
34
45
  catch {
35
46
  return null;
@@ -43,7 +54,7 @@ function slugifyTitle(title) {
43
54
  .replace(WHITESPACE_REGEX, '-')
44
55
  .replace(/-+/g, '-')
45
56
  .replace(/^-|-$/g, '');
46
- return slug.length > 0 ? slug : null;
57
+ return slug || null;
47
58
  }
48
59
  function sanitizeFilename(name, extension) {
49
60
  let sanitized = name
@@ -0,0 +1 @@
1
+ export declare function isRecord(value: unknown): value is Record<string, unknown>;
@@ -0,0 +1,3 @@
1
+ export function isRecord(value) {
2
+ return typeof value === 'object' && value !== null;
3
+ }
@@ -2,7 +2,4 @@ interface NormalizeOptions {
2
2
  readonly trimValues?: boolean;
3
3
  }
4
4
  export declare function normalizeHeaderRecord(headers: Record<string, string> | undefined, blockedHeaders: Set<string>, options?: NormalizeOptions): Record<string, string> | undefined;
5
- export declare function normalizeHeaderEntries(headers: Record<string, string>, blockedHeaders: Set<string>, options?: NormalizeOptions): Headers;
6
- export declare function hasHeaderEntries(headers: Headers): boolean;
7
- export declare function headersToRecord(headers: Headers): Record<string, string>;
8
5
  export {};
@@ -6,7 +6,7 @@ export function normalizeHeaderRecord(headers, blockedHeaders, options = {}) {
6
6
  return undefined;
7
7
  return headersToRecord(normalized);
8
8
  }
9
- export function normalizeHeaderEntries(headers, blockedHeaders, options = {}) {
9
+ function normalizeHeaderEntries(headers, blockedHeaders, options = {}) {
10
10
  const normalized = new Headers();
11
11
  for (const [key, value] of Object.entries(headers)) {
12
12
  if (blockedHeaders.has(key.toLowerCase()))
@@ -15,10 +15,10 @@ export function normalizeHeaderEntries(headers, blockedHeaders, options = {}) {
15
15
  }
16
16
  return normalized;
17
17
  }
18
- export function hasHeaderEntries(headers) {
18
+ function hasHeaderEntries(headers) {
19
19
  return !headers.keys().next().done;
20
20
  }
21
- export function headersToRecord(headers) {
21
+ function headersToRecord(headers) {
22
22
  return Object.fromEntries(headers.entries());
23
23
  }
24
24
  function setHeaderValue(headers, key, value, trimValue) {
@@ -1,3 +1,3 @@
1
1
  import type { ToolErrorResponse } from '../config/types/tools.js';
2
- export declare function createToolErrorResponse(message: string, url: string, code: string, details?: Record<string, unknown>): ToolErrorResponse;
3
- export declare function handleToolError(error: unknown, url: string, fallbackMessage?: string, details?: Record<string, unknown>): ToolErrorResponse;
2
+ export declare function createToolErrorResponse(message: string, url: string): ToolErrorResponse;
3
+ export declare function handleToolError(error: unknown, url: string, fallbackMessage?: string): ToolErrorResponse;
@@ -1,33 +1,15 @@
1
- import { ErrorCode } from '@modelcontextprotocol/sdk/types.js';
2
1
  import { FetchError } from '../errors/app-error.js';
3
2
  import { isSystemError } from './error-utils.js';
4
- const IS_DEVELOPMENT_WITH_STACK_TRACES = process.env.NODE_ENV === 'development' &&
5
- process.env.EXPOSE_STACK_TRACES === 'true';
6
- const MCP_ERROR_CODE_MAP = {
7
- VALIDATION_ERROR: String(ErrorCode.InvalidParams),
8
- INVALID_PARAMS: String(ErrorCode.InvalidParams),
9
- INTERNAL_ERROR: String(ErrorCode.InternalError),
10
- FETCH_ERROR: String(ErrorCode.InternalError),
11
- BATCH_ERROR: String(ErrorCode.InternalError),
12
- PROMISE_REJECTED: String(ErrorCode.InternalError),
13
- UNKNOWN_ERROR: String(ErrorCode.InternalError),
14
- };
15
- const NUMERIC_ERROR_CODE = /^-?\d+$/;
16
- function normalizeToolErrorCode(code) {
17
- if (!code)
18
- return String(ErrorCode.InternalError);
19
- if (NUMERIC_ERROR_CODE.test(code))
20
- return code;
21
- if (code.startsWith('HTTP_'))
22
- return String(ErrorCode.InternalError);
23
- return MCP_ERROR_CODE_MAP[code] ?? code;
3
+ function createFallbackErrorResponse(fallbackMessage, url, error) {
4
+ return createToolErrorResponse(`${fallbackMessage}: ${error.message}`, url);
24
5
  }
25
- export function createToolErrorResponse(message, url, code, details = {}) {
6
+ function createUnknownErrorResponse(fallbackMessage, url) {
7
+ return createToolErrorResponse(`${fallbackMessage}: Unknown error`, url);
8
+ }
9
+ export function createToolErrorResponse(message, url) {
26
10
  const structuredContent = {
27
- ...details,
28
11
  error: message,
29
12
  url,
30
- errorCode: normalizeToolErrorCode(code),
31
13
  };
32
14
  return {
33
15
  content: [{ type: 'text', text: JSON.stringify(structuredContent) }],
@@ -35,26 +17,17 @@ export function createToolErrorResponse(message, url, code, details = {}) {
35
17
  isError: true,
36
18
  };
37
19
  }
38
- function formatErrorMessage(baseMessage, error, fallback) {
39
- const message = fallback ? `${fallback}: ${error.message}` : error.message;
40
- if (IS_DEVELOPMENT_WITH_STACK_TRACES && error.stack) {
41
- return `${message}\n${error.stack}`;
42
- }
43
- return message;
44
- }
45
- export function handleToolError(error, url, fallbackMessage = 'Operation failed', details = {}) {
20
+ export function handleToolError(error, url, fallbackMessage = 'Operation failed') {
46
21
  if (isValidationError(error)) {
47
- return createToolErrorResponse(error.message, url, 'VALIDATION_ERROR', details);
22
+ return createToolErrorResponse(error.message, url);
48
23
  }
49
24
  if (error instanceof FetchError) {
50
- const message = formatErrorMessage(error.message, error);
51
- return createToolErrorResponse(message, url, error.code, details);
25
+ return createToolErrorResponse(error.message, url);
52
26
  }
53
27
  if (error instanceof Error) {
54
- const message = formatErrorMessage(error.message, error, fallbackMessage);
55
- return createToolErrorResponse(message, url, 'UNKNOWN_ERROR', details);
28
+ return createFallbackErrorResponse(fallbackMessage, url, error);
56
29
  }
57
- return createToolErrorResponse(`${fallbackMessage}: Unknown error`, url, 'UNKNOWN_ERROR', details);
30
+ return createUnknownErrorResponse(fallbackMessage, url);
58
31
  }
59
32
  function isValidationError(error) {
60
33
  return (error instanceof Error &&
@@ -0,0 +1,7 @@
1
+ export interface TransformResult {
2
+ readonly url: string;
3
+ readonly transformed: boolean;
4
+ readonly platform?: string;
5
+ }
6
+ export declare function transformToRawUrl(url: string): TransformResult;
7
+ export declare function isRawTextContentUrl(url: string): boolean;
@@ -0,0 +1,147 @@
1
+ import { logDebug } from '../services/logger.js';
2
+ const GITHUB_BLOB_RULE = {
3
+ name: 'github',
4
+ pattern: /^https?:\/\/(?:www\.)?github\.com\/([^/]+)\/([^/]+)\/blob\/([^/]+)\/(.+)$/i,
5
+ transform: (match) => {
6
+ const owner = match[1] ?? '';
7
+ const repo = match[2] ?? '';
8
+ const branch = match[3] ?? '';
9
+ const path = match[4] ?? '';
10
+ return `https://raw.githubusercontent.com/${owner}/${repo}/${branch}/${path}`;
11
+ },
12
+ };
13
+ const GITHUB_GIST_RULE = {
14
+ name: 'github-gist',
15
+ pattern: /^https?:\/\/gist\.github\.com\/([^/]+)\/([a-f0-9]+)(?:#file-(.+)|\/raw\/([^/]+))?$/i,
16
+ transform: (match) => {
17
+ const user = match[1] ?? '';
18
+ const gistId = match[2] ?? '';
19
+ const hashFile = match[3];
20
+ const rawFile = match[4];
21
+ const filename = rawFile ?? hashFile?.replace(/-/g, '.');
22
+ const filePath = filename ? `/${filename}` : '';
23
+ return `https://gist.githubusercontent.com/${user}/${gistId}/raw${filePath}`;
24
+ },
25
+ };
26
+ const GITLAB_BLOB_RULE = {
27
+ name: 'gitlab',
28
+ pattern: /^(https?:\/\/(?:[^/]+\.)?gitlab\.com\/[^/]+\/[^/]+)\/-\/blob\/([^/]+)\/(.+)$/i,
29
+ transform: (match) => {
30
+ const baseUrl = match[1] ?? '';
31
+ const branch = match[2] ?? '';
32
+ const path = match[3] ?? '';
33
+ return `${baseUrl}/-/raw/${branch}/${path}`;
34
+ },
35
+ };
36
+ const BITBUCKET_SRC_RULE = {
37
+ name: 'bitbucket',
38
+ pattern: /^(https?:\/\/(?:www\.)?bitbucket\.org\/[^/]+\/[^/]+)\/src\/([^/]+)\/(.+)$/i,
39
+ transform: (match) => {
40
+ const baseUrl = match[1] ?? '';
41
+ const branch = match[2] ?? '';
42
+ const path = match[3] ?? '';
43
+ return `${baseUrl}/raw/${branch}/${path}`;
44
+ },
45
+ };
46
+ const TRANSFORM_RULES = [
47
+ GITHUB_BLOB_RULE,
48
+ GITHUB_GIST_RULE,
49
+ GITLAB_BLOB_RULE,
50
+ BITBUCKET_SRC_RULE,
51
+ ];
52
+ function isRawUrl(url) {
53
+ const lowerUrl = url.toLowerCase();
54
+ return (lowerUrl.includes('raw.githubusercontent.com') ||
55
+ lowerUrl.includes('gist.githubusercontent.com') ||
56
+ lowerUrl.includes('/-/raw/') ||
57
+ /bitbucket\.org\/[^/]+\/[^/]+\/raw\//.test(lowerUrl));
58
+ }
59
+ function getUrlWithoutParams(url) {
60
+ const hashIndex = url.indexOf('#');
61
+ const queryIndex = url.indexOf('?');
62
+ let endIndex = url.length;
63
+ if (queryIndex !== -1) {
64
+ if (hashIndex !== -1) {
65
+ endIndex = Math.min(queryIndex, hashIndex);
66
+ }
67
+ else {
68
+ endIndex = queryIndex;
69
+ }
70
+ }
71
+ else if (hashIndex !== -1) {
72
+ endIndex = hashIndex;
73
+ }
74
+ const hash = hashIndex !== -1 ? url.slice(hashIndex) : '';
75
+ return {
76
+ base: url.slice(0, endIndex),
77
+ hash,
78
+ };
79
+ }
80
+ function resolveUrlToMatch(rule, base, hash) {
81
+ if (rule.name !== 'github-gist')
82
+ return base;
83
+ if (!hash.startsWith('#file-'))
84
+ return base;
85
+ return base + hash;
86
+ }
87
+ function applyTransformRules(base, hash) {
88
+ for (const rule of TRANSFORM_RULES) {
89
+ const urlToMatch = resolveUrlToMatch(rule, base, hash);
90
+ const match = rule.pattern.exec(urlToMatch);
91
+ if (match) {
92
+ return { url: rule.transform(match), platform: rule.name };
93
+ }
94
+ }
95
+ return null;
96
+ }
97
+ export function transformToRawUrl(url) {
98
+ if (!url)
99
+ return { url, transformed: false };
100
+ if (isRawUrl(url)) {
101
+ return { url, transformed: false };
102
+ }
103
+ const { base, hash } = getUrlWithoutParams(url);
104
+ const result = applyTransformRules(base, hash);
105
+ if (!result)
106
+ return { url, transformed: false };
107
+ logDebug('URL transformed to raw content URL', {
108
+ platform: result.platform,
109
+ original: url.substring(0, 100),
110
+ transformed: result.url.substring(0, 100),
111
+ });
112
+ return {
113
+ url: result.url,
114
+ transformed: true,
115
+ platform: result.platform,
116
+ };
117
+ }
118
+ const RAW_TEXT_EXTENSIONS = new Set([
119
+ '.md',
120
+ '.markdown',
121
+ '.txt',
122
+ '.json',
123
+ '.yaml',
124
+ '.yml',
125
+ '.toml',
126
+ '.xml',
127
+ '.csv',
128
+ '.rst',
129
+ '.adoc',
130
+ '.org',
131
+ ]);
132
+ export function isRawTextContentUrl(url) {
133
+ if (!url)
134
+ return false;
135
+ if (isRawUrl(url))
136
+ return true;
137
+ const { base } = getUrlWithoutParams(url);
138
+ const lowerBase = base.toLowerCase();
139
+ return hasKnownRawTextExtension(lowerBase);
140
+ }
141
+ function hasKnownRawTextExtension(urlBaseLower) {
142
+ for (const ext of RAW_TEXT_EXTENSIONS) {
143
+ if (urlBaseLower.endsWith(ext))
144
+ return true;
145
+ }
146
+ return false;
147
+ }
@@ -1,7 +1,6 @@
1
1
  export declare function isBlockedIp(ip: string): boolean;
2
- export declare function assertResolvedAddressesAllowed(hostname: string): Promise<void>;
3
2
  export declare function normalizeUrl(urlString: string): {
4
3
  normalizedUrl: string;
5
4
  hostname: string;
6
5
  };
7
- export declare function validateAndNormalizeUrl(urlString: string): Promise<string>;
6
+ export declare function validateAndNormalizeUrl(urlString: string): string;