@j0hanz/superfetch 1.0.2 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/README.md +345 -57
  2. package/dist/config/index.d.ts.map +1 -1
  3. package/dist/config/index.js +6 -10
  4. package/dist/config/index.js.map +1 -1
  5. package/dist/config/types.d.ts +256 -0
  6. package/dist/config/types.d.ts.map +1 -0
  7. package/dist/config/types.js +2 -0
  8. package/dist/config/types.js.map +1 -0
  9. package/dist/errors/app-error.d.ts +6 -20
  10. package/dist/errors/app-error.d.ts.map +1 -1
  11. package/dist/errors/app-error.js +7 -18
  12. package/dist/errors/app-error.js.map +1 -1
  13. package/dist/index.js +75 -62
  14. package/dist/index.js.map +1 -1
  15. package/dist/middleware/error-handler.d.ts +1 -5
  16. package/dist/middleware/error-handler.d.ts.map +1 -1
  17. package/dist/middleware/error-handler.js +4 -12
  18. package/dist/middleware/error-handler.js.map +1 -1
  19. package/dist/middleware/rate-limiter.d.ts +2 -20
  20. package/dist/middleware/rate-limiter.d.ts.map +1 -1
  21. package/dist/middleware/rate-limiter.js +22 -47
  22. package/dist/middleware/rate-limiter.js.map +1 -1
  23. package/dist/prompts/index.d.ts +0 -3
  24. package/dist/prompts/index.d.ts.map +1 -1
  25. package/dist/prompts/index.js +2 -10
  26. package/dist/prompts/index.js.map +1 -1
  27. package/dist/resources/cached-content.d.ts +5 -0
  28. package/dist/resources/cached-content.d.ts.map +1 -0
  29. package/dist/resources/cached-content.js +93 -0
  30. package/dist/resources/cached-content.js.map +1 -0
  31. package/dist/resources/index.d.ts +0 -3
  32. package/dist/resources/index.d.ts.map +1 -1
  33. package/dist/resources/index.js +40 -5
  34. package/dist/resources/index.js.map +1 -1
  35. package/dist/server.d.ts +0 -4
  36. package/dist/server.d.ts.map +1 -1
  37. package/dist/server.js +11 -6
  38. package/dist/server.js.map +1 -1
  39. package/dist/services/cache.d.ts +20 -6
  40. package/dist/services/cache.d.ts.map +1 -1
  41. package/dist/services/cache.js +128 -20
  42. package/dist/services/cache.js.map +1 -1
  43. package/dist/services/card-extractor.d.ts +10 -0
  44. package/dist/services/card-extractor.d.ts.map +1 -0
  45. package/dist/services/card-extractor.js +194 -0
  46. package/dist/services/card-extractor.js.map +1 -0
  47. package/dist/services/extractor.d.ts +12 -19
  48. package/dist/services/extractor.d.ts.map +1 -1
  49. package/dist/services/extractor.js +60 -46
  50. package/dist/services/extractor.js.map +1 -1
  51. package/dist/services/fetcher.d.ts +13 -11
  52. package/dist/services/fetcher.d.ts.map +1 -1
  53. package/dist/services/fetcher.js +143 -54
  54. package/dist/services/fetcher.js.map +1 -1
  55. package/dist/services/logger.d.ts.map +1 -1
  56. package/dist/services/logger.js +4 -6
  57. package/dist/services/logger.js.map +1 -1
  58. package/dist/services/parser.d.ts +1 -6
  59. package/dist/services/parser.d.ts.map +1 -1
  60. package/dist/services/parser.js +57 -27
  61. package/dist/services/parser.js.map +1 -1
  62. package/dist/tools/handlers/fetch-links.tool.d.ts +6 -18
  63. package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
  64. package/dist/tools/handlers/fetch-links.tool.js +104 -79
  65. package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
  66. package/dist/tools/handlers/fetch-markdown.tool.d.ts +6 -10
  67. package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
  68. package/dist/tools/handlers/fetch-markdown.tool.js +83 -84
  69. package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
  70. package/dist/tools/handlers/fetch-url.tool.d.ts +6 -12
  71. package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
  72. package/dist/tools/handlers/fetch-url.tool.js +51 -93
  73. package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
  74. package/dist/tools/handlers/fetch-urls.tool.d.ts +12 -0
  75. package/dist/tools/handlers/fetch-urls.tool.d.ts.map +1 -0
  76. package/dist/tools/handlers/fetch-urls.tool.js +184 -0
  77. package/dist/tools/handlers/fetch-urls.tool.js.map +1 -0
  78. package/dist/tools/index.d.ts +0 -4
  79. package/dist/tools/index.d.ts.map +1 -1
  80. package/dist/tools/index.js +145 -15
  81. package/dist/tools/index.js.map +1 -1
  82. package/dist/tools/utils/common.d.ts +8 -0
  83. package/dist/tools/utils/common.d.ts.map +1 -0
  84. package/dist/tools/utils/common.js +35 -0
  85. package/dist/tools/utils/common.js.map +1 -0
  86. package/dist/tools/utils/fetch-pipeline.d.ts +3 -0
  87. package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -0
  88. package/dist/tools/utils/fetch-pipeline.js +78 -0
  89. package/dist/tools/utils/fetch-pipeline.js.map +1 -0
  90. package/dist/tools/utils/index.d.ts +4 -0
  91. package/dist/tools/utils/index.d.ts.map +1 -0
  92. package/dist/tools/utils/index.js +3 -0
  93. package/dist/tools/utils/index.js.map +1 -0
  94. package/dist/tools/utils/response-builder.d.ts +3 -0
  95. package/dist/tools/utils/response-builder.d.ts.map +1 -0
  96. package/dist/tools/utils/response-builder.js +24 -0
  97. package/dist/tools/utils/response-builder.js.map +1 -0
  98. package/dist/transformers/jsonl.transformer.d.ts +1 -1
  99. package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
  100. package/dist/transformers/jsonl.transformer.js +2 -1
  101. package/dist/transformers/jsonl.transformer.js.map +1 -1
  102. package/dist/transformers/markdown.transformer.d.ts +1 -1
  103. package/dist/transformers/markdown.transformer.d.ts.map +1 -1
  104. package/dist/transformers/markdown.transformer.js +99 -5
  105. package/dist/transformers/markdown.transformer.js.map +1 -1
  106. package/dist/types/content.types.d.ts +11 -11
  107. package/dist/types/content.types.d.ts.map +1 -1
  108. package/dist/types/index.d.ts +1 -2
  109. package/dist/types/index.d.ts.map +1 -1
  110. package/dist/types/index.js +1 -2
  111. package/dist/types/index.js.map +1 -1
  112. package/dist/types/schemas.d.ts +39 -12
  113. package/dist/types/schemas.d.ts.map +1 -1
  114. package/dist/utils/concurrency.d.ts +6 -0
  115. package/dist/utils/concurrency.d.ts.map +1 -0
  116. package/dist/utils/concurrency.js +38 -0
  117. package/dist/utils/concurrency.js.map +1 -0
  118. package/dist/utils/content-cleaner.d.ts +32 -0
  119. package/dist/utils/content-cleaner.d.ts.map +1 -0
  120. package/dist/utils/content-cleaner.js +238 -0
  121. package/dist/utils/content-cleaner.js.map +1 -0
  122. package/dist/utils/language-detector.d.ts +5 -0
  123. package/dist/utils/language-detector.d.ts.map +1 -0
  124. package/dist/utils/language-detector.js +50 -0
  125. package/dist/utils/language-detector.js.map +1 -0
  126. package/dist/utils/sanitizer.d.ts +0 -10
  127. package/dist/utils/sanitizer.d.ts.map +1 -1
  128. package/dist/utils/sanitizer.js +4 -12
  129. package/dist/utils/sanitizer.js.map +1 -1
  130. package/dist/utils/tool-error-handler.d.ts +1 -15
  131. package/dist/utils/tool-error-handler.d.ts.map +1 -1
  132. package/dist/utils/tool-error-handler.js +34 -6
  133. package/dist/utils/tool-error-handler.js.map +1 -1
  134. package/dist/utils/url-validator.d.ts +0 -8
  135. package/dist/utils/url-validator.d.ts.map +1 -1
  136. package/dist/utils/url-validator.js +17 -31
  137. package/dist/utils/url-validator.js.map +1 -1
  138. package/package.json +81 -79
@@ -1,111 +1,69 @@
1
- import { validateAndNormalizeUrl } from '../../utils/url-validator.js';
2
- import { fetchUrlWithRetry } from '../../services/fetcher.js';
3
1
  import { extractContent } from '../../services/extractor.js';
2
+ import { logDebug, logError } from '../../services/logger.js';
4
3
  import { parseHtml } from '../../services/parser.js';
5
- import { toJsonl } from '../../transformers/jsonl.transformer.js';
6
- import * as cache from '../../services/cache.js';
7
- import { config } from '../../config/index.js';
8
- import { logError } from '../../services/logger.js';
9
4
  import { createToolErrorResponse, handleToolError, } from '../../utils/tool-error-handler.js';
5
+ import { buildMetadata, shouldUseArticle, truncateContent, } from '../utils/common.js';
6
+ import { executeFetchPipeline } from '../utils/fetch-pipeline.js';
7
+ import { toJsonl } from '../../transformers/jsonl.transformer.js';
10
8
  export const FETCH_URL_TOOL_NAME = 'fetch-url';
11
- export const FETCH_URL_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks';
12
- function extractContentFromHtml(html, url, options) {
13
- // Use the optimized extractContent that parses JSDOM only once
14
- const { article, metadata: extractedMeta } = extractContent(html, url);
15
- if (options.extractMainContent &&
16
- config.extraction.extractMainContent &&
17
- article) {
18
- const contentBlocks = parseHtml(article.content);
19
- const metadata = options.includeMetadata && config.extraction.includeMetadata
20
- ? {
21
- type: 'metadata',
22
- title: article.title,
23
- author: article.byline,
24
- url,
25
- fetchedAt: new Date().toISOString(),
26
- }
27
- : undefined;
28
- return { contentBlocks, metadata, title: article.title };
29
- }
30
- // Fallback: use parsed HTML directly
31
- const contentBlocks = parseHtml(html);
32
- const metadata = options.includeMetadata && config.extraction.includeMetadata
33
- ? {
34
- type: 'metadata',
35
- title: extractedMeta.title,
36
- description: extractedMeta.description,
37
- author: extractedMeta.author,
38
- url,
39
- fetchedAt: new Date().toISOString(),
40
- }
41
- : undefined;
42
- return { contentBlocks, metadata, title: extractedMeta.title };
9
+ export const FETCH_URL_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks. Supports custom headers, retries, and content length limits.';
10
+ function transformToJsonl(html, url, options) {
11
+ // Only invoke JSDOM when extractMainContent is true (lazy loading optimization)
12
+ const { article, metadata: extractedMeta } = extractContent(html, url, {
13
+ extractArticle: options.extractMainContent,
14
+ });
15
+ const useArticle = shouldUseArticle(options.extractMainContent, article);
16
+ const sourceHtml = useArticle ? article.content : html;
17
+ const contentBlocks = parseHtml(sourceHtml);
18
+ const metadata = buildMetadata(url, article, extractedMeta, useArticle, options.includeMetadata);
19
+ const title = useArticle ? article.title : extractedMeta.title;
20
+ return {
21
+ content: toJsonl(contentBlocks, metadata),
22
+ contentBlocks: contentBlocks.length,
23
+ title,
24
+ };
43
25
  }
44
26
  export async function fetchUrlToolHandler(input) {
27
+ if (!input.url) {
28
+ return createToolErrorResponse('URL is required', '', 'VALIDATION_ERROR');
29
+ }
45
30
  try {
46
- // Validate URL input
47
- if (!input.url) {
48
- return createToolErrorResponse('URL is required', '', 'VALIDATION_ERROR');
49
- }
50
- const url = validateAndNormalizeUrl(input.url);
51
- const cacheKey = cache.createCacheKey('url', url);
52
- // Check cache first
53
- if (cacheKey) {
54
- const cached = cache.get(cacheKey);
55
- if (cached) {
56
- const structuredContent = {
57
- url,
58
- cached: true,
59
- fetchedAt: cached.fetchedAt,
60
- content: cached.content,
61
- format: 'jsonl',
62
- contentBlocks: 0, // Unknown from cache
63
- };
64
- return {
65
- content: [
66
- {
67
- type: 'text',
68
- text: JSON.stringify(structuredContent),
69
- },
70
- ],
71
- structuredContent,
72
- };
73
- }
74
- }
75
- const html = await fetchUrlWithRetry(url, input.customHeaders);
76
- // Validate HTML content was received
77
- if (!html) {
78
- return createToolErrorResponse('No content received from URL', url, 'EMPTY_CONTENT');
79
- }
80
- const { contentBlocks, metadata, title } = extractContentFromHtml(html, url, {
81
- extractMainContent: input.extractMainContent ?? true,
82
- includeMetadata: input.includeMetadata ?? true,
31
+ const extractMainContent = input.extractMainContent ?? true;
32
+ const includeMetadata = input.includeMetadata ?? true;
33
+ logDebug('Fetching URL', {
34
+ url: input.url,
35
+ extractMainContent,
36
+ includeMetadata,
37
+ });
38
+ const result = await executeFetchPipeline({
39
+ url: input.url,
40
+ cacheNamespace: 'url',
41
+ customHeaders: input.customHeaders,
42
+ retries: input.retries,
43
+ transform: (html, url) => transformToJsonl(html, url, { extractMainContent, includeMetadata }),
44
+ serialize: (data) => data.content,
45
+ deserialize: (cached) => ({
46
+ content: cached,
47
+ contentBlocks: 0,
48
+ title: undefined,
49
+ }),
83
50
  });
84
- let jsonlContent = toJsonl(contentBlocks, metadata);
85
- if (input.maxContentLength &&
86
- input.maxContentLength > 0 &&
87
- jsonlContent.length > input.maxContentLength) {
88
- jsonlContent =
89
- jsonlContent.substring(0, input.maxContentLength) + '\n...[truncated]';
90
- }
91
- // Cache the result
92
- if (cacheKey) {
93
- cache.set(cacheKey, jsonlContent);
94
- }
51
+ const { content, truncated } = truncateContent(result.data.content, input.maxContentLength);
95
52
  const structuredContent = {
96
- url,
97
- title,
98
- contentBlocks: contentBlocks.length,
99
- fetchedAt: new Date().toISOString(),
53
+ url: result.url,
54
+ title: result.data.title,
55
+ contentBlocks: result.data.contentBlocks,
56
+ fetchedAt: result.fetchedAt,
100
57
  format: 'jsonl',
101
- content: jsonlContent,
102
- cached: false,
58
+ content,
59
+ cached: result.fromCache,
60
+ ...(truncated && { truncated }),
103
61
  };
104
62
  return {
105
63
  content: [
106
64
  {
107
65
  type: 'text',
108
- text: JSON.stringify(structuredContent, null, 2),
66
+ text: JSON.stringify(structuredContent, result.fromCache ? undefined : null, result.fromCache ? undefined : 2),
109
67
  },
110
68
  ],
111
69
  structuredContent,
@@ -1 +1 @@
1
- {"version":3,"file":"fetch-url.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-url.tool.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAC;AACvE,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAC7D,OAAO,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AACrD,OAAO,EAAE,OAAO,EAAE,MAAM,yCAAyC,CAAC;AAClE,OAAO,KAAK,KAAK,MAAM,yBAAyB,CAAC;AACjD,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAC/C,OAAO,EAAE,QAAQ,EAAE,MAAM,0BAA0B,CAAC;AACpD,OAAO,EACL,uBAAuB,EACvB,eAAe,GAChB,MAAM,mCAAmC,CAAC;AAO3C,MAAM,CAAC,MAAM,mBAAmB,GAAG,WAAW,CAAC;AAC/C,MAAM,CAAC,MAAM,0BAA0B,GACrC,4FAA4F,CAAC;AAQ/F,SAAS,sBAAsB,CAC7B,IAAY,EACZ,GAAW,EACX,OAAkE;IAElE,+DAA+D;IAC/D,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,GAAG,cAAc,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IAEvE,IACE,OAAO,CAAC,kBAAkB;QAC1B,MAAM,CAAC,UAAU,CAAC,kBAAkB;QACpC,OAAO,EACP,CAAC;QACD,MAAM,aAAa,GAAG,SAAS,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QACjD,MAAM,QAAQ,GACZ,OAAO,CAAC,eAAe,IAAI,MAAM,CAAC,UAAU,CAAC,eAAe;YAC1D,CAAC,CAAC;gBACE,IAAI,EAAE,UAAmB;gBACzB,KAAK,EAAE,OAAO,CAAC,KAAK;gBACpB,MAAM,EAAE,OAAO,CAAC,MAAM;gBACtB,GAAG;gBACH,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACpC;YACH,CAAC,CAAC,SAAS,CAAC;QAEhB,OAAO,EAAE,aAAa,EAAE,QAAQ,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC;IAC3D,CAAC;IAED,qCAAqC;IACrC,MAAM,aAAa,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAEtC,MAAM,QAAQ,GACZ,OAAO,CAAC,eAAe,IAAI,MAAM,CAAC,UAAU,CAAC,eAAe;QAC1D,CAAC,CAAC;YACE,IAAI,EAAE,UAAmB;YACzB,KAAK,EAAE,aAAa,CAAC,KAAK;YAC1B,WAAW,EAAE,aAAa,CAAC,WAAW;YACtC,MAAM,EAAE,aAAa,CAAC,MAAM;YAC5B,GAAG;YACH,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC;QACH,CAAC,CAAC,SAAS,CAAC;IAEhB,OAAO,EAAE,aAAa,EAAE,QAAQ,EAAE,KAAK,EAAE,aAAa,CAAC,KAAK,EAAE,CAAC;AACjE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,KAAoB;IAC5D,IAAI,CAAC;QACH,qBAAqB;QACrB,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC;YACf,OAAO,uBAAuB,CAAC,iBAAiB,EAAE,EAAE,EAAE,kBAAkB,CAAC,CAAC;QAC5E,CAAC;QAED,MAAM,GAAG,GAAG,uBAAuB,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC/C,MAAM,QAAQ,GAAG,KAAK,CAAC,cAAc,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QAElD,oBAAoB;QACpB,IAAI,QAAQ,EAAE,CAAC;YACb,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACnC,IAAI,MAAM,EAAE,CAAC;gBACX,MAAM,iBAAiB,GAAG;oBACxB,GAAG;oBACH,MAAM,EAAE,IAAI;oBACZ,SAAS,EAAE,MAAM,CAAC,SAAS;oBAC3B,OAAO,EAAE,MAAM,CAAC,OAAO;oBACvB,MAAM,EAAE,OAAgB;oBACxB,aAAa,EAAE,CAAC,EAAE,qBAAqB;iBACxC,CAAC;gBACF,OAAO;oBACL,OAAO,EAAE;wBACP;4BACE,IAAI,EAAE,MAAe;4BACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,iBAAiB,CAAC;yBACxC;qBACF;oBACD,iBAAiB;iBAClB,CAAC;YACJ,CAAC;QACH,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,iBAAiB,CAAC,GAAG,EAAE,KAAK,CAAC,aAAa,CAAC,CAAC;QAE/D,qCAAqC;QACrC,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,OAAO,uBAAuB,CAC5B,8BAA8B,EAC9B,GAAG,EACH,eAAe,CAChB,CAAC;QACJ,CAAC;QAED,MAAM,EAAE,aAAa,EAAE,QAAQ,EAAE,KAAK,EAAE,GAAG,sBAAsB,CAC/D,IAAI,EACJ,GAAG,EACH;YACE,kBAAkB,EAAE,KAAK,CAAC,kBAAkB,IAAI,IAAI;YACpD,eAAe,EAAE,KAAK,CAAC,eAAe,IAAI,IAAI;SAC/C,CACF,CAAC;QAEF,IAAI,YAAY,GAAG,OAAO,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;QAEpD,IACE,KAAK,CAAC,gBAAgB;YACtB,KAAK,CAAC,gBAAgB,GAAG,CAAC;YAC1B,YAAY,CAAC,MAAM,GAAG,KAAK,CAAC,gBAAgB,EAC5C,CAAC;YACD,YAAY;gBACV,YAAY,CAAC,SAAS,CAAC,CAAC,EAAE,KAAK,CAAC,gBAAgB,CAAC,GAAG,kBAAkB,CAAC;QAC3E,CAAC;QAED,mBAAmB;QACnB,IAAI,QAAQ,EAAE,CAAC;YACb,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;QACpC,CAAC;QAED,MAAM,iBAAiB,GAAG;YACxB,GAAG;YACH,KAAK;YACL,aAAa,EAAE,aAAa,CAAC,MAAM;YACnC,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACnC,MAAM,EAAE,OAAgB;YACxB,OAAO,EAAE,YAAY;YACrB,MAAM,EAAE,KAAK;SACd,CAAC;QAEF,OAAO;YACL,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,iBAAiB,EAAE,IAAI,EAAE,CAAC,CAAC;iBACjD;aACF;YACD,iBAAiB;SAClB,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,sBAAsB,EACtB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACF,OAAO,eAAe,CAAC,KAAK,EAAE,KAAK,CAAC,GAAG,EAAE,qBAAqB,CAAC,CAAC;IAClE,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"fetch-url.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-url.tool.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAC7D,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,0BAA0B,CAAC;AAC9D,OAAO,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AAErD,OAAO,EACL,uBAAuB,EACvB,eAAe,GAChB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EACL,aAAa,EACb,gBAAgB,EAChB,eAAe,GAChB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAC;AAElE,OAAO,EAAE,OAAO,EAAE,MAAM,yCAAyC,CAAC;AAElE,MAAM,CAAC,MAAM,mBAAmB,GAAG,WAAW,CAAC;AAC/C,MAAM,CAAC,MAAM,0BAA0B,GACrC,0JAA0J,CAAC;AAE7J,SAAS,gBAAgB,CACvB,IAAY,EACZ,GAAW,EACX,OAAkE;IAElE,gFAAgF;IAChF,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,GAAG,cAAc,CAAC,IAAI,EAAE,GAAG,EAAE;QACrE,cAAc,EAAE,OAAO,CAAC,kBAAkB;KAC3C,CAAC,CAAC;IACH,MAAM,UAAU,GAAG,gBAAgB,CAAC,OAAO,CAAC,kBAAkB,EAAE,OAAO,CAAC,CAAC;IACzE,MAAM,UAAU,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;IACvD,MAAM,aAAa,GAAG,SAAS,CAAC,UAAU,CAAC,CAAC;IAC5C,MAAM,QAAQ,GAAG,aAAa,CAC5B,GAAG,EACH,OAAO,EACP,aAAa,EACb,UAAU,EACV,OAAO,CAAC,eAAe,CACxB,CAAC;IACF,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC;IAE/D,OAAO;QACL,OAAO,EAAE,OAAO,CAAC,aAAa,EAAE,QAAQ,CAAC;QACzC,aAAa,EAAE,aAAa,CAAC,MAAM;QACnC,KAAK;KACN,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,KAAoB;IAK5D,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC;QACf,OAAO,uBAAuB,CAAC,iBAAiB,EAAE,EAAE,EAAE,kBAAkB,CAAC,CAAC;IAC5E,CAAC;IAED,IAAI,CAAC;QACH,MAAM,kBAAkB,GAAG,KAAK,CAAC,kBAAkB,IAAI,IAAI,CAAC;QAC5D,MAAM,eAAe,GAAG,KAAK,CAAC,eAAe,IAAI,IAAI,CAAC;QAEtD,QAAQ,CAAC,cAAc,EAAE;YACvB,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,kBAAkB;YAClB,eAAe;SAChB,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAuB;YAC9D,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,cAAc,EAAE,KAAK;YACrB,aAAa,EAAE,KAAK,CAAC,aAAa;YAClC,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,SAAS,EAAE,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE,CACvB,gBAAgB,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE,kBAAkB,EAAE,eAAe,EAAE,CAAC;YACtE,SAAS,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO;YACjC,WAAW,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;gBACxB,OAAO,EAAE,MAAM;gBACf,aAAa,EAAE,CAAC;gBAChB,KAAK,EAAE,SAAS;aACjB,CAAC;SACH,CAAC,CAAC;QAEH,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,eAAe,CAC5C,MAAM,CAAC,IAAI,CAAC,OAAO,EACnB,KAAK,CAAC,gBAAgB,CACvB,CAAC;QAEF,MAAM,iBAAiB,GAAG;YACxB,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK;YACxB,aAAa,EAAE,MAAM,CAAC,IAAI,CAAC,aAAa;YACxC,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,MAAM,EAAE,OAAgB;YACxB,OAAO;YACP,MAAM,EAAE,MAAM,CAAC,SAAS;YACxB,GAAG,CAAC,SAAS,IAAI,EAAE,SAAS,EAAE,CAAC;SAChC,CAAC;QAEF,OAAO;YACL,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAClB,iBAAiB,EACjB,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,EACnC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CACjC;iBACF;aACF;YACD,iBAAiB;SAClB,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,sBAAsB,EACtB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACF,OAAO,eAAe,CAAC,KAAK,EAAE,KAAK,CAAC,GAAG,EAAE,qBAAqB,CAAC,CAAC;IAClE,CAAC;AACH,CAAC"}
@@ -0,0 +1,12 @@
1
+ import type { FetchUrlsInput } from '../../config/types.js';
2
+ export declare const FETCH_URLS_TOOL_NAME = "fetch-urls";
3
+ export declare const FETCH_URLS_TOOL_DESCRIPTION = "Fetches multiple URLs in parallel and converts them to AI-readable format (JSONL or Markdown). Supports concurrency control and continues on individual failures.";
4
+ export declare function fetchUrlsToolHandler(input: FetchUrlsInput): Promise<{
5
+ content: {
6
+ type: 'text';
7
+ text: string;
8
+ }[];
9
+ structuredContent?: Record<string, unknown>;
10
+ isError?: boolean;
11
+ }>;
12
+ //# sourceMappingURL=fetch-urls.tool.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch-urls.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-urls.tool.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAEV,cAAc,EAGf,MAAM,uBAAuB,CAAC;AAwB/B,eAAO,MAAM,oBAAoB,eAAe,CAAC;AACjD,eAAO,MAAM,2BAA2B,sKAC6H,CAAC;AA6HtK,wBAAsB,oBAAoB,CAAC,KAAK,EAAE,cAAc,GAAG,OAAO,CAAC;IACzE,OAAO,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IAC1C,iBAAiB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC5C,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB,CAAC,CAsHD"}
@@ -0,0 +1,184 @@
1
+ import * as cheerio from 'cheerio';
2
+ import * as cache from '../../services/cache.js';
3
+ import { extractContent, extractMetadataWithCheerio, } from '../../services/extractor.js';
4
+ import { fetchUrlWithRetry } from '../../services/fetcher.js';
5
+ import { logDebug, logError, logWarn } from '../../services/logger.js';
6
+ import { parseHtml } from '../../services/parser.js';
7
+ import { runWithConcurrency } from '../../utils/concurrency.js';
8
+ import { createToolErrorResponse } from '../../utils/tool-error-handler.js';
9
+ import { validateAndNormalizeUrl } from '../../utils/url-validator.js';
10
+ import { buildMetadata, shouldUseArticle, truncateContent, } from '../utils/common.js';
11
+ import { createBatchResponse } from '../utils/response-builder.js';
12
+ import { toJsonl } from '../../transformers/jsonl.transformer.js';
13
+ import { htmlToMarkdown } from '../../transformers/markdown.transformer.js';
14
+ export const FETCH_URLS_TOOL_NAME = 'fetch-urls';
15
+ export const FETCH_URLS_TOOL_DESCRIPTION = 'Fetches multiple URLs in parallel and converts them to AI-readable format (JSONL or Markdown). Supports concurrency control and continues on individual failures.';
16
+ const MAX_URLS = 10;
17
+ const DEFAULT_CONCURRENCY = 3;
18
+ async function processSingleUrl(url, options) {
19
+ try {
20
+ const normalizedUrl = validateAndNormalizeUrl(url);
21
+ const cacheNamespace = options.format === 'markdown' ? 'markdown' : 'url';
22
+ const cacheKey = cache.createCacheKey(cacheNamespace, normalizedUrl);
23
+ if (cacheKey) {
24
+ const cached = cache.get(cacheKey);
25
+ if (cached) {
26
+ logDebug('Batch cache hit', { url: normalizedUrl });
27
+ return {
28
+ url: normalizedUrl,
29
+ success: true,
30
+ content: cached.content,
31
+ cached: true,
32
+ };
33
+ }
34
+ }
35
+ const fetchResult = await fetchUrlWithRetry(normalizedUrl);
36
+ let sourceHtml;
37
+ let title;
38
+ let metadata;
39
+ // Fast path: Skip JSDOM entirely when extractMainContent is false
40
+ if (!options.extractMainContent) {
41
+ sourceHtml = fetchResult.html;
42
+ const $ = cheerio.load(fetchResult.html);
43
+ const extractedMeta = extractMetadataWithCheerio($);
44
+ ({ title } = extractedMeta);
45
+ if (options.includeMetadata) {
46
+ metadata = {
47
+ type: 'metadata',
48
+ url: normalizedUrl,
49
+ fetchedAt: new Date().toISOString(),
50
+ title: extractedMeta.title,
51
+ description: extractedMeta.description,
52
+ author: extractedMeta.author,
53
+ };
54
+ }
55
+ }
56
+ else {
57
+ // Slow path: Use JSDOM only when article extraction is needed
58
+ const { article, metadata: extractedMeta } = extractContent(fetchResult.html, normalizedUrl, {
59
+ extractArticle: true,
60
+ });
61
+ const useArticle = shouldUseArticle(true, article);
62
+ metadata = buildMetadata(normalizedUrl, article, extractedMeta, useArticle, options.includeMetadata);
63
+ sourceHtml = useArticle ? article.content : fetchResult.html;
64
+ title = useArticle ? article.title : extractedMeta.title;
65
+ }
66
+ let content;
67
+ let contentBlocks;
68
+ if (options.format === 'markdown') {
69
+ content = htmlToMarkdown(sourceHtml, metadata);
70
+ }
71
+ else {
72
+ const blocks = parseHtml(sourceHtml);
73
+ contentBlocks = blocks.length;
74
+ content = toJsonl(blocks, metadata);
75
+ }
76
+ const { content: truncatedContent } = truncateContent(content, options.maxContentLength);
77
+ content = truncatedContent;
78
+ if (cacheKey)
79
+ cache.set(cacheKey, content);
80
+ return {
81
+ url: normalizedUrl,
82
+ success: true,
83
+ title,
84
+ content,
85
+ contentBlocks,
86
+ cached: false,
87
+ };
88
+ }
89
+ catch (error) {
90
+ const errorMessage = error instanceof Error ? error.message : 'Unknown error';
91
+ const errorCode = error instanceof Error &&
92
+ 'code' in error &&
93
+ typeof error.code === 'string'
94
+ ? error.code
95
+ : 'FETCH_ERROR';
96
+ logWarn('Batch URL processing failed', { url, error: errorMessage });
97
+ return {
98
+ url,
99
+ success: false,
100
+ cached: false,
101
+ error: errorMessage,
102
+ errorCode,
103
+ };
104
+ }
105
+ }
106
+ export async function fetchUrlsToolHandler(input) {
107
+ try {
108
+ // Validate input - urls array is guaranteed by Zod schema but check for empty
109
+ if (input.urls.length === 0) {
110
+ return createToolErrorResponse('At least one URL is required', '', 'VALIDATION_ERROR');
111
+ }
112
+ // Enforce max URLs limit
113
+ if (input.urls.length > MAX_URLS) {
114
+ return createToolErrorResponse(`Maximum ${MAX_URLS} URLs allowed per batch`, '', 'VALIDATION_ERROR');
115
+ }
116
+ // Filter out empty URLs
117
+ const validUrls = input.urls.filter((url) => typeof url === 'string' && url.trim().length > 0);
118
+ if (validUrls.length === 0) {
119
+ return createToolErrorResponse('No valid URLs provided', '', 'VALIDATION_ERROR');
120
+ }
121
+ const concurrency = Math.min(Math.max(1, input.concurrency ?? DEFAULT_CONCURRENCY), 5);
122
+ const continueOnError = input.continueOnError ?? true;
123
+ const format = input.format ?? 'jsonl';
124
+ logDebug('Starting batch URL fetch', {
125
+ urlCount: validUrls.length,
126
+ concurrency,
127
+ format,
128
+ });
129
+ // Create tasks for each URL
130
+ const tasks = validUrls.map((url) => async () => processSingleUrl(url, {
131
+ extractMainContent: input.extractMainContent ?? true,
132
+ includeMetadata: input.includeMetadata ?? true,
133
+ maxContentLength: input.maxContentLength,
134
+ format,
135
+ }));
136
+ // Execute with concurrency control
137
+ const settledResults = await runWithConcurrency(concurrency, tasks, {
138
+ onProgress: (completed, total) => {
139
+ logDebug('Batch progress', {
140
+ completed,
141
+ total,
142
+ percentage: Math.round((completed / total) * 100),
143
+ });
144
+ },
145
+ });
146
+ // Helper to safely extract error message from rejected promise
147
+ const getErrorMessage = ({ reason }) => {
148
+ const typedReason = reason;
149
+ return typedReason instanceof Error
150
+ ? typedReason.message
151
+ : String(typedReason);
152
+ };
153
+ // Process results
154
+ const results = settledResults.map((result, index) => {
155
+ if (result.status === 'fulfilled') {
156
+ return result.value;
157
+ }
158
+ else {
159
+ // Promise rejection (shouldn't happen as processSingleUrl catches errors)
160
+ return {
161
+ url: validUrls[index] ?? 'unknown',
162
+ success: false,
163
+ cached: false,
164
+ error: getErrorMessage(result),
165
+ errorCode: 'PROMISE_REJECTED',
166
+ };
167
+ }
168
+ });
169
+ // Check if we should fail fast on errors
170
+ if (!continueOnError) {
171
+ const firstError = results.find((r) => !r.success);
172
+ if (firstError && !firstError.success) {
173
+ const errorMsg = firstError.error ?? 'Unknown error';
174
+ return createToolErrorResponse(`Batch failed: ${errorMsg}`, firstError.url, firstError.errorCode ?? 'BATCH_ERROR');
175
+ }
176
+ }
177
+ return createBatchResponse(results);
178
+ }
179
+ catch (error) {
180
+ logError('fetch-urls tool error', error instanceof Error ? error : undefined);
181
+ return createToolErrorResponse(error instanceof Error ? error.message : 'Failed to fetch URLs', '', 'BATCH_ERROR');
182
+ }
183
+ }
184
+ //# sourceMappingURL=fetch-urls.tool.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch-urls.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-urls.tool.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AASnC,OAAO,KAAK,KAAK,MAAM,yBAAyB,CAAC;AACjD,OAAO,EACL,cAAc,EACd,0BAA0B,GAC3B,MAAM,6BAA6B,CAAC;AACrC,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,0BAA0B,CAAC;AACvE,OAAO,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AAErD,OAAO,EAAE,kBAAkB,EAAE,MAAM,4BAA4B,CAAC;AAChE,OAAO,EAAE,uBAAuB,EAAE,MAAM,mCAAmC,CAAC;AAC5E,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAC;AACvE,OAAO,EACL,aAAa,EACb,gBAAgB,EAChB,eAAe,GAChB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EAAE,mBAAmB,EAAE,MAAM,8BAA8B,CAAC;AAEnE,OAAO,EAAE,OAAO,EAAE,MAAM,yCAAyC,CAAC;AAClE,OAAO,EAAE,cAAc,EAAE,MAAM,4CAA4C,CAAC;AAE5E,MAAM,CAAC,MAAM,oBAAoB,GAAG,YAAY,CAAC;AACjD,MAAM,CAAC,MAAM,2BAA2B,GACtC,mKAAmK,CAAC;AAEtK,MAAM,QAAQ,GAAG,EAAE,CAAC;AACpB,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAS9B,KAAK,UAAU,gBAAgB,CAC7B,GAAW,EACX,OAAuB;IAEvB,IAAI,CAAC;QACH,MAAM,aAAa,GAAG,uBAAuB,CAAC,GAAG,CAAC,CAAC;QACnD,MAAM,cAAc,GAAG,OAAO,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,KAAK,CAAC;QAC1E,MAAM,QAAQ,GAAG,KAAK,CAAC,cAAc,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC;QAErE,IAAI,QAAQ,EAAE,CAAC;YACb,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACnC,IAAI,MAAM,EAAE,CAAC;gBACX,QAAQ,CAAC,iBAAiB,EAAE,EAAE,GAAG,EAAE,aAAa,EAAE,CAAC,CAAC;gBACpD,OAAO;oBACL,GAAG,EAAE,aAAa;oBAClB,OAAO,EAAE,IAAI;oBACb,OAAO,EAAE,MAAM,CAAC,OAAO;oBACvB,MAAM,EAAE,IAAI;iBACb,CAAC;YACJ,CAAC;QACH,CAAC;QAED,MAAM,WAAW,GAAG,MAAM,iBAAiB,CAAC,aAAa,CAAC,CAAC;QAE3D,IAAI,UAAkB,CAAC;QACvB,IAAI,KAAyB,CAAC;QAC9B,IAAI,QAAmC,CAAC;QAExC,kEAAkE;QAClE,IAAI,CAAC,OAAO,CAAC,kBAAkB,EAAE,CAAC;YAChC,UAAU,GAAG,WAAW,CAAC,IAAI,CAAC;YAC9B,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;YACzC,MAAM,aAAa,GAAG,0BAA0B,CAAC,CAAC,CAAC,CAAC;YACpD,CAAC,EAAE,KAAK,EAAE,GAAG,aAAa,CAAC,CAAC;YAE5B,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;gBAC5B,QAAQ,GAAG;oBACT,IAAI,EAAE,UAAmB;oBACzB,GAAG,EAAE,aAAa;oBAClB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;oBACnC,KAAK,EAAE,aAAa,CAAC,KAAK;oBAC1B,WAAW,EAAE,aAAa,CAAC,WAAW;oBACtC,MAAM,EAAE,aAAa,CAAC,MAAM;iBAC7B,CAAC;YACJ,CAAC;QACH,CAAC;aAAM,CAAC;YACN,8DAA8D;YAC9D,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,GAAG,cAAc,CACzD,WAAW,CAAC,IAAI,EAChB,aAAa,EACb;gBACE,cAAc,EAAE,IAAI;aACrB,CACF,CAAC;YACF,MAAM,UAAU,GAAG,gBAAgB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YACnD,QAAQ,GAAG,aAAa,CACtB,aAAa,EACb,OAAO,EACP,aAAa,EACb,UAAU,EACV,OAAO,CAAC,eAAe,CACxB,CAAC;YACF,UAAU,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC;YAC7D,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC;QAC3D,CAAC;QAED,IAAI,OAAe,CAAC;QACpB,IAAI,aAAiC,CAAC;QAEtC,IAAI,OAAO,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;YAClC,OAAO,GAAG,cAAc,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;QACjD,CAAC;aAAM,CAAC;YACN,MAAM,MAAM,GAAG,SAAS,CAAC,UAAU,CAAC,CAAC;YACrC,aAAa,GAAG,MAAM,CAAC,MAAM,CAAC;YAC9B,OAAO,GAAG,OAAO,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;QACtC,CAAC;QAED,MAAM,EAAE,OAAO,EAAE,gBAAgB,EAAE,GAAG,eAAe,CACnD,OAAO,EACP,OAAO,CAAC,gBAAgB,CACzB,CAAC;QACF,OAAO,GAAG,gBAAgB,CAAC;QAC3B,IAAI,QAAQ;YAAE,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAE3C,OAAO;YACL,GAAG,EAAE,aAAa;YAClB,OAAO,EAAE,IAAI;YACb,KAAK;YACL,OAAO;YACP,aAAa;YACb,MAAM,EAAE,KAAK;SACd,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;QAC3D,MAAM,SAAS,GACb,KAAK,YAAY,KAAK;YACtB,MAAM,IAAI,KAAK;YACf,OAAO,KAAK,CAAC,IAAI,KAAK,QAAQ;YAC5B,CAAC,CAAC,KAAK,CAAC,IAAI;YACZ,CAAC,CAAC,aAAa,CAAC;QAEpB,OAAO,CAAC,6BAA6B,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC,CAAC;QACrE,OAAO;YACL,GAAG;YACH,OAAO,EAAE,KAAK;YACd,MAAM,EAAE,KAAK;YACb,KAAK,EAAE,YAAY;YACnB,SAAS;SACV,CAAC;IACJ,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,oBAAoB,CAAC,KAAqB;IAK9D,IAAI,CAAC;QACH,8EAA8E;QAC9E,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,OAAO,uBAAuB,CAC5B,8BAA8B,EAC9B,EAAE,EACF,kBAAkB,CACnB,CAAC;QACJ,CAAC;QAED,yBAAyB;QACzB,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,QAAQ,EAAE,CAAC;YACjC,OAAO,uBAAuB,CAC5B,WAAW,QAAQ,yBAAyB,EAC5C,EAAE,EACF,kBAAkB,CACnB,CAAC;QACJ,CAAC;QAED,wBAAwB;QACxB,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CACjC,CAAC,GAAG,EAAE,EAAE,CAAC,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAC1D,CAAC;QAEF,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,OAAO,uBAAuB,CAC5B,wBAAwB,EACxB,EAAE,EACF,kBAAkB,CACnB,CAAC;QACJ,CAAC;QAED,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAC1B,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,WAAW,IAAI,mBAAmB,CAAC,EACrD,CAAC,CACF,CAAC;QACF,MAAM,eAAe,GAAG,KAAK,CAAC,eAAe,IAAI,IAAI,CAAC;QACtD,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,IAAI,OAAO,CAAC;QAEvC,QAAQ,CAAC,0BAA0B,EAAE;YACnC,QAAQ,EAAE,SAAS,CAAC,MAAM;YAC1B,WAAW;YACX,MAAM;SACP,CAAC,CAAC;QAEH,4BAA4B;QAC5B,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,CACzB,CAAC,GAAG,EAAE,EAAE,CAAC,KAAK,IAAI,EAAE,CAClB,gBAAgB,CAAC,GAAG,EAAE;YACpB,kBAAkB,EAAE,KAAK,CAAC,kBAAkB,IAAI,IAAI;YACpD,eAAe,EAAE,KAAK,CAAC,eAAe,IAAI,IAAI;YAC9C,gBAAgB,EAAE,KAAK,CAAC,gBAAgB;YACxC,MAAM;SACP,CAAC,CACL,CAAC;QAEF,mCAAmC;QACnC,MAAM,cAAc,GAAG,MAAM,kBAAkB,CAAC,WAAW,EAAE,KAAK,EAAE;YAClE,UAAU,EAAE,CAAC,SAAS,EAAE,KAAK,EAAE,EAAE;gBAC/B,QAAQ,CAAC,gBAAgB,EAAE;oBACzB,SAAS;oBACT,KAAK;oBACL,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,SAAS,GAAG,KAAK,CAAC,GAAG,GAAG,CAAC;iBAClD,CAAC,CAAC;YACL,CAAC;SACF,CAAC,CAAC;QAEH,+DAA+D;QAC/D,MAAM,eAAe,GAAG,CAAC,EAAE,MAAM,EAAyB,EAAU,EAAE;YACpE,MAAM,WAAW,GAAY,MAAM,CAAC;YACpC,OAAO,WAAW,YAAY,KAAK;gBACjC,CAAC,CAAC,WAAW,CAAC,OAAO;gBACrB,CAAC,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;QAC1B,CAAC,CAAC;QAEF,kBAAkB;QAClB,MAAM,OAAO,GAAqB,cAAc,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE;YACrE,IAAI,MAAM,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;gBAClC,OAAO,MAAM,CAAC,KAAK,CAAC;YACtB,CAAC;iBAAM,CAAC;gBACN,0EAA0E;gBAC1E,OAAO;oBACL,GAAG,EAAE,SAAS,CAAC,KAAK,CAAC,IAAI,SAAS;oBAClC,OAAO,EAAE,KAAc;oBACvB,MAAM,EAAE,KAAc;oBACtB,KAAK,EAAE,eAAe,CAAC,MAAM,CAAC;oBAC9B,SAAS,EAAE,kBAAkB;iBAC9B,CAAC;YACJ,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,yCAAyC;QACzC,IAAI,CAAC,eAAe,EAAE,CAAC;YACrB,MAAM,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YACnD,IAAI,UAAU,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,CAAC;gBACtC,MAAM,QAAQ,GAAG,UAAU,CAAC,KAAK,IAAI,eAAe,CAAC;gBACrD,OAAO,uBAAuB,CAC5B,iBAAiB,QAAQ,EAAE,EAC3B,UAAU,CAAC,GAAG,EACd,UAAU,CAAC,SAAS,IAAI,aAAa,CACtC,CAAC;YACJ,CAAC;QACH,CAAC;QAED,OAAO,mBAAmB,CAAC,OAAO,CAAC,CAAC;IACtC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,uBAAuB,EACvB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QAEF,OAAO,uBAAuB,CAC5B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,sBAAsB,EAC/D,EAAE,EACF,aAAa,CACd,CAAC;IACJ,CAAC;AACH,CAAC"}
@@ -1,7 +1,3 @@
1
1
  import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
- /**
3
- * Registers all tools with the MCP server using the modern McpServer API
4
- * Tools are registered with Zod schemas for automatic validation
5
- */
6
2
  export declare function registerTools(server: McpServer): void;
7
3
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/tools/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AAwHzE;;;GAGG;AACH,wBAAgB,aAAa,CAAC,MAAM,EAAE,SAAS,GAAG,IAAI,CAoCrD"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/tools/index.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AAsQzE,wBAAgB,aAAa,CAAC,MAAM,EAAE,SAAS,GAAG,IAAI,CA4CrD"}
@@ -1,8 +1,28 @@
1
1
  import { z } from 'zod';
2
- import { fetchUrlToolHandler, FETCH_URL_TOOL_NAME, FETCH_URL_TOOL_DESCRIPTION, } from './handlers/fetch-url.tool.js';
3
- import { fetchLinksToolHandler, FETCH_LINKS_TOOL_NAME, FETCH_LINKS_TOOL_DESCRIPTION, } from './handlers/fetch-links.tool.js';
4
- import { fetchMarkdownToolHandler, FETCH_MARKDOWN_TOOL_NAME, FETCH_MARKDOWN_TOOL_DESCRIPTION, } from './handlers/fetch-markdown.tool.js';
2
+ import { FETCH_LINKS_TOOL_DESCRIPTION, FETCH_LINKS_TOOL_NAME, fetchLinksToolHandler, } from './handlers/fetch-links.tool.js';
3
+ import { FETCH_MARKDOWN_TOOL_DESCRIPTION, FETCH_MARKDOWN_TOOL_NAME, fetchMarkdownToolHandler, } from './handlers/fetch-markdown.tool.js';
4
+ import { FETCH_URL_TOOL_DESCRIPTION, FETCH_URL_TOOL_NAME, fetchUrlToolHandler, } from './handlers/fetch-url.tool.js';
5
+ import { FETCH_URLS_TOOL_DESCRIPTION, FETCH_URLS_TOOL_NAME, fetchUrlsToolHandler, } from './handlers/fetch-urls.tool.js';
5
6
  // Zod schemas for runtime validation - single source of truth
7
+ // Common request options shared across tools
8
+ const RequestOptionsSchema = {
9
+ customHeaders: z
10
+ .record(z.string())
11
+ .optional()
12
+ .describe('Custom HTTP headers for the request'),
13
+ timeout: z
14
+ .number()
15
+ .min(1000)
16
+ .max(60000)
17
+ .optional()
18
+ .describe('Request timeout in milliseconds (1000-60000)'),
19
+ retries: z
20
+ .number()
21
+ .min(1)
22
+ .max(10)
23
+ .optional()
24
+ .describe('Number of retry attempts (1-10)'),
25
+ };
6
26
  // Input schemas
7
27
  const FetchUrlInputSchema = {
8
28
  url: z.string().min(1).describe('The URL to fetch'),
@@ -26,10 +46,7 @@ const FetchUrlInputSchema = {
26
46
  .optional()
27
47
  .default('jsonl')
28
48
  .describe('Output format'),
29
- customHeaders: z
30
- .record(z.string())
31
- .optional()
32
- .describe('Custom HTTP headers for the request'),
49
+ ...RequestOptionsSchema,
33
50
  };
34
51
  const FetchLinksInputSchema = {
35
52
  url: z.string().min(1).describe('The URL to extract links from'),
@@ -43,6 +60,22 @@ const FetchLinksInputSchema = {
43
60
  .optional()
44
61
  .default(true)
45
62
  .describe('Include internal links'),
63
+ maxLinks: z
64
+ .number()
65
+ .positive()
66
+ .max(1000)
67
+ .optional()
68
+ .describe('Maximum number of links to return (1-1000)'),
69
+ filterPattern: z
70
+ .string()
71
+ .optional()
72
+ .describe('Regex pattern to filter links (matches against href)'),
73
+ includeImages: z
74
+ .boolean()
75
+ .optional()
76
+ .default(false)
77
+ .describe('Include image links (img src attributes)'),
78
+ ...RequestOptionsSchema,
46
79
  };
47
80
  const FetchMarkdownInputSchema = {
48
81
  url: z.string().min(1).describe('The URL to fetch'),
@@ -56,6 +89,57 @@ const FetchMarkdownInputSchema = {
56
89
  .optional()
57
90
  .default(true)
58
91
  .describe('Include YAML frontmatter metadata'),
92
+ maxContentLength: z
93
+ .number()
94
+ .positive()
95
+ .optional()
96
+ .describe('Maximum content length in characters'),
97
+ generateToc: z
98
+ .boolean()
99
+ .optional()
100
+ .default(false)
101
+ .describe('Generate table of contents from headings'),
102
+ ...RequestOptionsSchema,
103
+ };
104
+ const FetchUrlsInputSchema = {
105
+ urls: z
106
+ .array(z.string().min(1))
107
+ .min(1)
108
+ .max(10)
109
+ .describe('Array of URLs to fetch (1-10 URLs)'),
110
+ extractMainContent: z
111
+ .boolean()
112
+ .optional()
113
+ .default(true)
114
+ .describe('Use Readability to extract main article content'),
115
+ includeMetadata: z
116
+ .boolean()
117
+ .optional()
118
+ .default(true)
119
+ .describe('Include page metadata (title, description, etc.)'),
120
+ maxContentLength: z
121
+ .number()
122
+ .positive()
123
+ .optional()
124
+ .describe('Maximum content length per URL in characters'),
125
+ format: z
126
+ .enum(['jsonl', 'markdown'])
127
+ .optional()
128
+ .default('jsonl')
129
+ .describe('Output format for all URLs'),
130
+ concurrency: z
131
+ .number()
132
+ .min(1)
133
+ .max(5)
134
+ .optional()
135
+ .default(3)
136
+ .describe('Maximum concurrent requests (1-5)'),
137
+ continueOnError: z
138
+ .boolean()
139
+ .optional()
140
+ .default(true)
141
+ .describe('Continue processing if some URLs fail'),
142
+ ...RequestOptionsSchema,
59
143
  };
60
144
  // Output schemas for structured content validation
61
145
  const FetchUrlOutputSchema = {
@@ -78,9 +162,17 @@ const FetchLinksOutputSchema = {
78
162
  .array(z.object({
79
163
  href: z.string().describe('The link URL'),
80
164
  text: z.string().describe('The link anchor text'),
81
- type: z.enum(['internal', 'external']).describe('Link type'),
165
+ type: z.enum(['internal', 'external', 'image']).describe('Link type'),
82
166
  }))
83
167
  .describe('Array of extracted links'),
168
+ filtered: z
169
+ .number()
170
+ .optional()
171
+ .describe('Number of links filtered out by pattern'),
172
+ truncated: z
173
+ .boolean()
174
+ .optional()
175
+ .describe('Whether results were truncated by maxLinks'),
84
176
  error: z.string().optional().describe('Error message if the request failed'),
85
177
  errorCode: z.string().optional().describe('Error code if the request failed'),
86
178
  };
@@ -91,35 +183,73 @@ const FetchMarkdownOutputSchema = {
91
183
  .string()
92
184
  .describe('ISO timestamp of when the content was fetched'),
93
185
  markdown: z.string().describe('The extracted content in Markdown format'),
186
+ toc: z
187
+ .array(z.object({
188
+ level: z.number().describe('Heading level (1-6)'),
189
+ text: z.string().describe('Heading text'),
190
+ slug: z.string().describe('URL-friendly anchor slug'),
191
+ }))
192
+ .optional()
193
+ .describe('Table of contents (if generateToc is true)'),
94
194
  cached: z.boolean().describe('Whether the result was served from cache'),
195
+ truncated: z
196
+ .boolean()
197
+ .optional()
198
+ .describe('Whether content was truncated by maxContentLength'),
95
199
  error: z.string().optional().describe('Error message if the request failed'),
96
200
  errorCode: z.string().optional().describe('Error code if the request failed'),
97
201
  };
98
- /**
99
- * Registers all tools with the MCP server using the modern McpServer API
100
- * Tools are registered with Zod schemas for automatic validation
101
- */
202
+ const FetchUrlsOutputSchema = {
203
+ results: z
204
+ .array(z.object({
205
+ url: z.string().describe('The fetched URL'),
206
+ success: z.boolean().describe('Whether the fetch was successful'),
207
+ title: z.string().optional().describe('Page title'),
208
+ content: z.string().optional().describe('The extracted content'),
209
+ contentBlocks: z
210
+ .number()
211
+ .optional()
212
+ .describe('Number of content blocks (JSONL only)'),
213
+ cached: z.boolean().optional().describe('Whether served from cache'),
214
+ error: z.string().optional().describe('Error message if failed'),
215
+ errorCode: z.string().optional().describe('Error code if failed'),
216
+ }))
217
+ .describe('Array of results for each URL'),
218
+ summary: z
219
+ .object({
220
+ total: z.number().describe('Total URLs processed'),
221
+ successful: z.number().describe('Number of successful fetches'),
222
+ failed: z.number().describe('Number of failed fetches'),
223
+ cached: z.number().describe('Number served from cache'),
224
+ totalContentBlocks: z.number().describe('Total content blocks extracted'),
225
+ })
226
+ .describe('Summary statistics'),
227
+ fetchedAt: z.string().describe('ISO timestamp of batch completion'),
228
+ };
102
229
  export function registerTools(server) {
103
- // Register fetch-url tool
104
230
  server.registerTool(FETCH_URL_TOOL_NAME, {
105
231
  title: 'Fetch URL',
106
232
  description: FETCH_URL_TOOL_DESCRIPTION,
107
233
  inputSchema: FetchUrlInputSchema,
108
234
  outputSchema: FetchUrlOutputSchema,
109
235
  }, async (args) => fetchUrlToolHandler(args));
110
- // Register fetch-links tool
111
236
  server.registerTool(FETCH_LINKS_TOOL_NAME, {
112
237
  title: 'Fetch Links',
113
238
  description: FETCH_LINKS_TOOL_DESCRIPTION,
114
239
  inputSchema: FetchLinksInputSchema,
115
240
  outputSchema: FetchLinksOutputSchema,
116
241
  }, async (args) => fetchLinksToolHandler(args));
117
- // Register fetch-markdown tool
118
242
  server.registerTool(FETCH_MARKDOWN_TOOL_NAME, {
119
243
  title: 'Fetch Markdown',
120
244
  description: FETCH_MARKDOWN_TOOL_DESCRIPTION,
121
245
  inputSchema: FetchMarkdownInputSchema,
122
246
  outputSchema: FetchMarkdownOutputSchema,
123
247
  }, async (args) => fetchMarkdownToolHandler(args));
248
+ server.registerTool(FETCH_URLS_TOOL_NAME, {
249
+ title: 'Fetch URLs (Batch)',
250
+ description: FETCH_URLS_TOOL_DESCRIPTION,
251
+ inputSchema: FetchUrlsInputSchema,
252
+ outputSchema: FetchUrlsOutputSchema,
253
+ }, async (args) => fetchUrlsToolHandler(args));
124
254
  }
125
255
  //# sourceMappingURL=index.js.map