@j0hanz/superfetch 1.1.1 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. package/README.md +57 -32
  2. package/dist/config/formatting.d.ts +9 -0
  3. package/dist/config/formatting.d.ts.map +1 -0
  4. package/dist/config/formatting.js +11 -0
  5. package/dist/config/formatting.js.map +1 -0
  6. package/dist/config/index.d.ts +16 -2
  7. package/dist/config/index.d.ts.map +1 -1
  8. package/dist/config/index.js +43 -14
  9. package/dist/config/index.js.map +1 -1
  10. package/dist/config/types/content.d.ts +107 -0
  11. package/dist/config/types/content.d.ts.map +1 -0
  12. package/dist/config/types/content.js +2 -0
  13. package/dist/config/types/content.js.map +1 -0
  14. package/dist/config/types/runtime.d.ts +78 -0
  15. package/dist/config/types/runtime.d.ts.map +1 -0
  16. package/dist/config/types/runtime.js +2 -0
  17. package/dist/config/types/runtime.js.map +1 -0
  18. package/dist/config/types/tools.d.ts +99 -0
  19. package/dist/config/types/tools.d.ts.map +1 -0
  20. package/dist/config/types/tools.js +2 -0
  21. package/dist/config/types/tools.js.map +1 -0
  22. package/dist/config/types.d.ts +3 -296
  23. package/dist/config/types.d.ts.map +1 -1
  24. package/dist/http/auth.d.ts +3 -0
  25. package/dist/http/auth.d.ts.map +1 -0
  26. package/dist/http/auth.js +34 -0
  27. package/dist/http/auth.js.map +1 -0
  28. package/dist/http/cors.d.ts +8 -0
  29. package/dist/http/cors.d.ts.map +1 -0
  30. package/dist/http/cors.js +47 -0
  31. package/dist/http/cors.js.map +1 -0
  32. package/dist/http/mcp-routes.d.ts +5 -0
  33. package/dist/http/mcp-routes.d.ts.map +1 -0
  34. package/dist/http/mcp-routes.js +110 -0
  35. package/dist/http/mcp-routes.js.map +1 -0
  36. package/dist/http/mcp-session.d.ts +12 -0
  37. package/dist/http/mcp-session.d.ts.map +1 -0
  38. package/dist/http/mcp-session.js +209 -0
  39. package/dist/http/mcp-session.js.map +1 -0
  40. package/dist/http/mcp-validation.d.ts +3 -0
  41. package/dist/http/mcp-validation.d.ts.map +1 -0
  42. package/dist/http/mcp-validation.js +34 -0
  43. package/dist/http/mcp-validation.js.map +1 -0
  44. package/dist/http/rate-limit.d.ts +13 -0
  45. package/dist/http/rate-limit.d.ts.map +1 -0
  46. package/dist/http/rate-limit.js +91 -0
  47. package/dist/http/rate-limit.js.map +1 -0
  48. package/dist/http/server.d.ts +4 -0
  49. package/dist/http/server.d.ts.map +1 -0
  50. package/dist/http/server.js +183 -0
  51. package/dist/http/server.js.map +1 -0
  52. package/dist/http/sessions.d.ts +15 -0
  53. package/dist/http/sessions.d.ts.map +1 -0
  54. package/dist/http/sessions.js +64 -0
  55. package/dist/http/sessions.js.map +1 -0
  56. package/dist/index.js +26 -223
  57. package/dist/index.js.map +1 -1
  58. package/dist/middleware/error-handler.d.ts +2 -2
  59. package/dist/middleware/error-handler.d.ts.map +1 -1
  60. package/dist/middleware/error-handler.js +46 -15
  61. package/dist/middleware/error-handler.js.map +1 -1
  62. package/dist/resources/cached-content.d.ts.map +1 -1
  63. package/dist/resources/cached-content.js +104 -44
  64. package/dist/resources/cached-content.js.map +1 -1
  65. package/dist/resources/index.d.ts.map +1 -1
  66. package/dist/resources/index.js +77 -69
  67. package/dist/resources/index.js.map +1 -1
  68. package/dist/server.d.ts.map +1 -1
  69. package/dist/server.js +9 -3
  70. package/dist/server.js.map +1 -1
  71. package/dist/services/cache.d.ts +13 -1
  72. package/dist/services/cache.d.ts.map +1 -1
  73. package/dist/services/cache.js +90 -13
  74. package/dist/services/cache.js.map +1 -1
  75. package/dist/services/context.d.ts +9 -0
  76. package/dist/services/context.d.ts.map +1 -0
  77. package/dist/services/context.js +9 -0
  78. package/dist/services/context.js.map +1 -0
  79. package/dist/services/extractor.d.ts.map +1 -1
  80. package/dist/services/extractor.js +122 -87
  81. package/dist/services/extractor.js.map +1 -1
  82. package/dist/services/fetcher/agents.d.ts +4 -0
  83. package/dist/services/fetcher/agents.d.ts.map +1 -0
  84. package/dist/services/fetcher/agents.js +111 -0
  85. package/dist/services/fetcher/agents.js.map +1 -0
  86. package/dist/services/fetcher/errors.d.ts +5 -0
  87. package/dist/services/fetcher/errors.d.ts.map +1 -0
  88. package/dist/services/fetcher/errors.js +71 -0
  89. package/dist/services/fetcher/errors.js.map +1 -0
  90. package/dist/services/fetcher/headers.d.ts +2 -0
  91. package/dist/services/fetcher/headers.d.ts.map +1 -0
  92. package/dist/services/fetcher/headers.js +28 -0
  93. package/dist/services/fetcher/headers.js.map +1 -0
  94. package/dist/services/fetcher/interceptors.d.ts +10 -0
  95. package/dist/services/fetcher/interceptors.d.ts.map +1 -0
  96. package/dist/services/fetcher/interceptors.js +82 -0
  97. package/dist/services/fetcher/interceptors.js.map +1 -0
  98. package/dist/services/fetcher/redirects.d.ts +6 -0
  99. package/dist/services/fetcher/redirects.d.ts.map +1 -0
  100. package/dist/services/fetcher/redirects.js +67 -0
  101. package/dist/services/fetcher/redirects.js.map +1 -0
  102. package/dist/services/fetcher/response.d.ts +5 -0
  103. package/dist/services/fetcher/response.d.ts.map +1 -0
  104. package/dist/services/fetcher/response.js +39 -0
  105. package/dist/services/fetcher/response.js.map +1 -0
  106. package/dist/services/fetcher/retry-policy.d.ts +28 -0
  107. package/dist/services/fetcher/retry-policy.d.ts.map +1 -0
  108. package/dist/services/fetcher/retry-policy.js +138 -0
  109. package/dist/services/fetcher/retry-policy.js.map +1 -0
  110. package/dist/services/fetcher.d.ts +2 -1
  111. package/dist/services/fetcher.d.ts.map +1 -1
  112. package/dist/services/fetcher.js +61 -254
  113. package/dist/services/fetcher.js.map +1 -1
  114. package/dist/services/logger.d.ts.map +1 -1
  115. package/dist/services/logger.js +14 -5
  116. package/dist/services/logger.js.map +1 -1
  117. package/dist/services/parser.d.ts +1 -0
  118. package/dist/services/parser.d.ts.map +1 -1
  119. package/dist/services/parser.js +55 -35
  120. package/dist/services/parser.js.map +1 -1
  121. package/dist/tools/handlers/fetch-links/link-extractor.d.ts +4 -0
  122. package/dist/tools/handlers/fetch-links/link-extractor.d.ts.map +1 -0
  123. package/dist/tools/handlers/fetch-links/link-extractor.js +163 -0
  124. package/dist/tools/handlers/fetch-links/link-extractor.js.map +1 -0
  125. package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
  126. package/dist/tools/handlers/fetch-links.tool.js +78 -116
  127. package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
  128. package/dist/tools/handlers/fetch-markdown.tool.d.ts +3 -13
  129. package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
  130. package/dist/tools/handlers/fetch-markdown.tool.js +74 -83
  131. package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
  132. package/dist/tools/handlers/fetch-single.shared.d.ts +26 -0
  133. package/dist/tools/handlers/fetch-single.shared.d.ts.map +1 -0
  134. package/dist/tools/handlers/fetch-single.shared.js +49 -0
  135. package/dist/tools/handlers/fetch-single.shared.js.map +1 -0
  136. package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
  137. package/dist/tools/handlers/fetch-url.tool.js +82 -54
  138. package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
  139. package/dist/tools/handlers/fetch-urls/processor.d.ts +13 -0
  140. package/dist/tools/handlers/fetch-urls/processor.d.ts.map +1 -0
  141. package/dist/tools/handlers/fetch-urls/processor.js +153 -0
  142. package/dist/tools/handlers/fetch-urls/processor.js.map +1 -0
  143. package/dist/tools/handlers/fetch-urls/response.d.ts +3 -0
  144. package/dist/tools/handlers/fetch-urls/response.d.ts.map +1 -0
  145. package/dist/tools/handlers/fetch-urls/response.js +58 -0
  146. package/dist/tools/handlers/fetch-urls/response.js.map +1 -0
  147. package/dist/tools/handlers/fetch-urls/validation.d.ts +6 -0
  148. package/dist/tools/handlers/fetch-urls/validation.d.ts.map +1 -0
  149. package/dist/tools/handlers/fetch-urls/validation.js +18 -0
  150. package/dist/tools/handlers/fetch-urls/validation.js.map +1 -0
  151. package/dist/tools/handlers/fetch-urls.tool.d.ts.map +1 -1
  152. package/dist/tools/handlers/fetch-urls.tool.js +104 -197
  153. package/dist/tools/handlers/fetch-urls.tool.js.map +1 -1
  154. package/dist/tools/index.d.ts.map +1 -1
  155. package/dist/tools/index.js +36 -237
  156. package/dist/tools/index.js.map +1 -1
  157. package/dist/tools/schemas.d.ts +357 -0
  158. package/dist/tools/schemas.d.ts.map +1 -0
  159. package/dist/tools/schemas.js +272 -0
  160. package/dist/tools/schemas.js.map +1 -0
  161. package/dist/tools/utils/cache-vary.d.ts +3 -0
  162. package/dist/tools/utils/cache-vary.d.ts.map +1 -0
  163. package/dist/tools/utils/cache-vary.js +44 -0
  164. package/dist/tools/utils/cache-vary.js.map +1 -0
  165. package/dist/tools/utils/common.d.ts +2 -2
  166. package/dist/tools/utils/common.d.ts.map +1 -1
  167. package/dist/tools/utils/common.js +5 -1
  168. package/dist/tools/utils/common.js.map +1 -1
  169. package/dist/tools/utils/content-transform.d.ts +16 -0
  170. package/dist/tools/utils/content-transform.d.ts.map +1 -0
  171. package/dist/tools/utils/content-transform.js +49 -0
  172. package/dist/tools/utils/content-transform.js.map +1 -0
  173. package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -1
  174. package/dist/tools/utils/fetch-pipeline.js +32 -18
  175. package/dist/tools/utils/fetch-pipeline.js.map +1 -1
  176. package/dist/tools/utils/inline-content.d.ts +11 -0
  177. package/dist/tools/utils/inline-content.d.ts.map +1 -0
  178. package/dist/tools/utils/inline-content.js +39 -0
  179. package/dist/tools/utils/inline-content.js.map +1 -0
  180. package/dist/tools/utils/markdown-toc.d.ts +3 -0
  181. package/dist/tools/utils/markdown-toc.d.ts.map +1 -0
  182. package/dist/tools/utils/markdown-toc.js +35 -0
  183. package/dist/tools/utils/markdown-toc.js.map +1 -0
  184. package/dist/tools/utils/tool-response.d.ts +9 -0
  185. package/dist/tools/utils/tool-response.d.ts.map +1 -0
  186. package/dist/tools/utils/tool-response.js +19 -0
  187. package/dist/tools/utils/tool-response.js.map +1 -0
  188. package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
  189. package/dist/transformers/jsonl.transformer.js +51 -28
  190. package/dist/transformers/jsonl.transformer.js.map +1 -1
  191. package/dist/transformers/markdown.transformer.d.ts.map +1 -1
  192. package/dist/transformers/markdown.transformer.js +82 -111
  193. package/dist/transformers/markdown.transformer.js.map +1 -1
  194. package/dist/utils/header-normalizer.d.ts +5 -0
  195. package/dist/utils/header-normalizer.d.ts.map +1 -0
  196. package/dist/utils/header-normalizer.js +25 -0
  197. package/dist/utils/header-normalizer.js.map +1 -0
  198. package/dist/utils/tool-error-handler.d.ts +1 -0
  199. package/dist/utils/tool-error-handler.d.ts.map +1 -1
  200. package/dist/utils/tool-error-handler.js +29 -1
  201. package/dist/utils/tool-error-handler.js.map +1 -1
  202. package/dist/utils/url-validator.d.ts +0 -3
  203. package/dist/utils/url-validator.d.ts.map +1 -1
  204. package/dist/utils/url-validator.js +98 -18
  205. package/dist/utils/url-validator.js.map +1 -1
  206. package/package.json +11 -6
@@ -0,0 +1,163 @@
1
+ import * as cheerio from 'cheerio';
2
+ import safeRegex from 'safe-regex';
3
+ import { createToolErrorResponse } from '../../../utils/tool-error-handler.js';
4
+ import { isInternalUrl } from '../../../utils/url-validator.js';
5
+ function isLinkAllowed(type, options) {
6
+ const allowed = {
7
+ internal: options.includeInternal,
8
+ external: options.includeExternal,
9
+ image: options.includeImages,
10
+ };
11
+ return allowed[type];
12
+ }
13
+ function matchesFilter(url, filterPattern) {
14
+ if (!filterPattern)
15
+ return true;
16
+ return filterPattern.test(url);
17
+ }
18
+ function evaluateLink(link, options, seen) {
19
+ if (seen.has(link.href)) {
20
+ return { accepted: false, filtered: false };
21
+ }
22
+ if (!matchesFilter(link.href, options.filterPattern)) {
23
+ return { accepted: false, filtered: true };
24
+ }
25
+ if (!isLinkAllowed(link.type, options)) {
26
+ return { accepted: false, filtered: true };
27
+ }
28
+ return { accepted: true, filtered: false };
29
+ }
30
+ export function resolveFilterPattern(pattern, url) {
31
+ if (!pattern)
32
+ return undefined;
33
+ const lengthError = validatePatternLength(pattern, url);
34
+ if (lengthError)
35
+ return lengthError;
36
+ const filterPattern = buildFilterRegex(pattern, url);
37
+ if (isToolResponseBase(filterPattern))
38
+ return filterPattern;
39
+ const safetyError = validatePatternSafety(filterPattern, url);
40
+ if (safetyError)
41
+ return safetyError;
42
+ return filterPattern;
43
+ }
44
+ function validatePatternLength(pattern, url) {
45
+ if (pattern.length <= 200)
46
+ return null;
47
+ return createToolErrorResponse('Filter pattern too long (max 200 characters)', url, 'VALIDATION_ERROR');
48
+ }
49
+ function buildFilterRegex(pattern, url) {
50
+ try {
51
+ return new RegExp(pattern, 'i');
52
+ }
53
+ catch {
54
+ return createToolErrorResponse(`Invalid filter pattern: ${pattern}`, url, 'VALIDATION_ERROR');
55
+ }
56
+ }
57
+ function validatePatternSafety(pattern, url) {
58
+ if (safeRegex(pattern))
59
+ return null;
60
+ return createToolErrorResponse('Filter pattern is unsafe (potential catastrophic backtracking)', url, 'VALIDATION_ERROR');
61
+ }
62
+ function isToolResponseBase(value) {
63
+ return (value !== null &&
64
+ typeof value === 'object' &&
65
+ 'content' in value &&
66
+ Array.isArray(value.content));
67
+ }
68
+ function tryResolveUrl(href, baseUrl) {
69
+ if (!URL.canParse(href, baseUrl)) {
70
+ return null;
71
+ }
72
+ return new URL(href, baseUrl).href;
73
+ }
74
+ function buildLinkType(url, baseUrl) {
75
+ return isInternalUrl(url, baseUrl) ? 'internal' : 'external';
76
+ }
77
+ function isSkippableHref(href) {
78
+ return href.startsWith('#') || href.startsWith('javascript:');
79
+ }
80
+ function getAnchorHref($, el) {
81
+ const href = $(el).attr('href');
82
+ if (!href)
83
+ return null;
84
+ if (isSkippableHref(href))
85
+ return null;
86
+ return href;
87
+ }
88
+ function resolveAnchorLink($, el, baseUrl) {
89
+ const href = getAnchorHref($, el);
90
+ if (!href)
91
+ return null;
92
+ const url = tryResolveUrl(href, baseUrl);
93
+ if (!url)
94
+ return null;
95
+ return {
96
+ href: url,
97
+ text: $(el).text().trim() || url,
98
+ type: buildLinkType(url, baseUrl),
99
+ };
100
+ }
101
+ function resolveImageLink($, el, baseUrl) {
102
+ const src = $(el).attr('src');
103
+ if (!src || src.startsWith('data:'))
104
+ return null;
105
+ const url = tryResolveUrl(src, baseUrl);
106
+ if (!url)
107
+ return null;
108
+ return {
109
+ href: url,
110
+ text: $(el).attr('alt')?.trim() ?? url,
111
+ type: 'image',
112
+ };
113
+ }
114
+ function collectAnchorLinks($, baseUrl, options, seen, links) {
115
+ let filtered = 0;
116
+ $('a[href]').each((_, el) => {
117
+ const link = resolveAnchorLink($, el, baseUrl);
118
+ if (!link)
119
+ return;
120
+ const result = evaluateLink(link, options, seen);
121
+ if (result.filtered)
122
+ filtered += 1;
123
+ if (!result.accepted)
124
+ return;
125
+ seen.add(link.href);
126
+ links.push(link);
127
+ });
128
+ return filtered;
129
+ }
130
+ function collectImageLinks($, baseUrl, options, seen, links) {
131
+ if (!options.includeImages)
132
+ return 0;
133
+ let filtered = 0;
134
+ $('img[src]').each((_, el) => {
135
+ const link = resolveImageLink($, el, baseUrl);
136
+ if (!link)
137
+ return;
138
+ const result = evaluateLink(link, options, seen);
139
+ if (result.filtered)
140
+ filtered += 1;
141
+ if (!result.accepted)
142
+ return;
143
+ seen.add(link.href);
144
+ links.push(link);
145
+ });
146
+ return filtered;
147
+ }
148
+ export function extractLinks(html, baseUrl, options) {
149
+ const $ = cheerio.load(html);
150
+ const links = [];
151
+ const seen = new Set();
152
+ let filtered = collectAnchorLinks($, baseUrl, options, seen, links);
153
+ filtered += collectImageLinks($, baseUrl, options, seen, links);
154
+ const truncated = options.maxLinks ? links.length > options.maxLinks : false;
155
+ const resultLinks = truncated ? links.slice(0, options.maxLinks) : links;
156
+ return {
157
+ links: resultLinks,
158
+ linkCount: resultLinks.length,
159
+ filtered,
160
+ truncated,
161
+ };
162
+ }
163
+ //# sourceMappingURL=link-extractor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"link-extractor.js","sourceRoot":"","sources":["../../../../src/tools/handlers/fetch-links/link-extractor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC,OAAO,SAAS,MAAM,YAAY,CAAC;AAUnC,OAAO,EAAE,uBAAuB,EAAE,MAAM,sCAAsC,CAAC;AAC/E,OAAO,EAAE,aAAa,EAAE,MAAM,iCAAiC,CAAC;AAEhE,SAAS,aAAa,CAAC,IAAc,EAAE,OAA4B;IACjE,MAAM,OAAO,GAA8B;QACzC,QAAQ,EAAE,OAAO,CAAC,eAAe;QACjC,QAAQ,EAAE,OAAO,CAAC,eAAe;QACjC,KAAK,EAAE,OAAO,CAAC,aAAa;KAC7B,CAAC;IACF,OAAO,OAAO,CAAC,IAAI,CAAC,CAAC;AACvB,CAAC;AAED,SAAS,aAAa,CACpB,GAAW,EACX,aAAiC;IAEjC,IAAI,CAAC,aAAa;QAAE,OAAO,IAAI,CAAC;IAChC,OAAO,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACjC,CAAC;AAED,SAAS,YAAY,CACnB,IAAmB,EACnB,OAA4B,EAC5B,IAAiB;IAEjB,IAAI,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;QACxB,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;IAC9C,CAAC;IAED,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,EAAE,OAAO,CAAC,aAAa,CAAC,EAAE,CAAC;QACrD,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;IAC7C,CAAC;IAED,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,EAAE,OAAO,CAAC,EAAE,CAAC;QACvC,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;IAC7C,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;AAC7C,CAAC;AAED,MAAM,UAAU,oBAAoB,CAClC,OAA2B,EAC3B,GAAW;IAEX,IAAI,CAAC,OAAO;QAAE,OAAO,SAAS,CAAC;IAE/B,MAAM,WAAW,GAAG,qBAAqB,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;IACxD,IAAI,WAAW;QAAE,OAAO,WAAW,CAAC;IAEpC,MAAM,aAAa,GAAG,gBAAgB,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;IACrD,IAAI,kBAAkB,CAAC,aAAa,CAAC;QAAE,OAAO,aAAa,CAAC;IAE5D,MAAM,WAAW,GAAG,qBAAqB,CAAC,aAAa,EAAE,GAAG,CAAC,CAAC;IAC9D,IAAI,WAAW;QAAE,OAAO,WAAW,CAAC;IAEpC,OAAO,aAAa,CAAC;AACvB,CAAC;AAED,SAAS,qBAAqB,CAC5B,OAAe,EACf,GAAW;IAEX,IAAI,OAAO,CAAC,MAAM,IAAI,GAAG;QAAE,OAAO,IAAI,CAAC;IACvC,OAAO,uBAAuB,CAC5B,8CAA8C,EAC9C,GAAG,EACH,kBAAkB,CACnB,CAAC;AACJ,CAAC;AAED,SAAS,gBAAgB,CACvB,OAAe,EACf,GAAW;IAEX,IAAI,CAAC;QACH,OAAO,IAAI,MAAM,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;IAClC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,uBAAuB,CAC5B,2BAA2B,OAAO,EAAE,EACpC,GAAG,EACH,kBAAkB,CACnB,CAAC;IACJ,CAAC;AACH,CAAC;AAED,SAAS,qBAAqB,CAC5B,OAAe,EACf,GAAW;IAEX,IAAI,SAAS,CAAC,OAAO,CAAC;QAAE,OAAO,IAAI,CAAC;IACpC,OAAO,uBAAuB,CAC5B,gEAAgE,EAChE,GAAG,EACH,kBAAkB,CACnB,CAAC;AACJ,CAAC;AAED,SAAS,kBAAkB,CAAC,KAAc;IACxC,OAAO,CACL,KAAK,KAAK,IAAI;QACd,OAAO,KAAK,KAAK,QAAQ;QACzB,SAAS,IAAI,KAAK;QAClB,KAAK,CAAC,OAAO,CAAE,KAA0B,CAAC,OAAO,CAAC,CACnD,CAAC;AACJ,CAAC;AAED,SAAS,aAAa,CAAC,IAAY,EAAE,OAAe;IAClD,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,EAAE,CAAC;QACjC,OAAO,IAAI,CAAC;IACd,CAAC;IACD,OAAO,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;AACrC,CAAC;AAED,SAAS,aAAa,CAAC,GAAW,EAAE,OAAe;IACjD,OAAO,aAAa,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC;AAC/D,CAAC;AAED,SAAS,eAAe,CAAC,IAAY;IACnC,OAAO,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC;AAChE,CAAC;AAED,SAAS,aAAa,CAAC,CAAqB,EAAE,EAAW;IACvD,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAChC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IACvB,IAAI,eAAe,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IACvC,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,iBAAiB,CACxB,CAAqB,EACrB,EAAW,EACX,OAAe;IAEf,MAAM,IAAI,GAAG,aAAa,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAClC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IAEvB,MAAM,GAAG,GAAG,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IACzC,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IAEtB,OAAO;QACL,IAAI,EAAE,GAAG;QACT,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,IAAI,GAAG;QAChC,IAAI,EAAE,aAAa,CAAC,GAAG,EAAE,OAAO,CAAC;KAClC,CAAC;AACJ,CAAC;AAED,SAAS,gBAAgB,CACvB,CAAqB,EACrB,EAAW,EACX,OAAe;IAEf,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC9B,IAAI,CAAC,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC;QAAE,OAAO,IAAI,CAAC;IAEjD,MAAM,GAAG,GAAG,aAAa,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IACxC,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IAEtB,OAAO;QACL,IAAI,EAAE,GAAG;QACT,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,IAAI,GAAG;QACtC,IAAI,EAAE,OAAO;KACd,CAAC;AACJ,CAAC;AAED,SAAS,kBAAkB,CACzB,CAAqB,EACrB,OAAe,EACf,OAA4B,EAC5B,IAAiB,EACjB,KAAsB;IAEtB,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QAC1B,MAAM,IAAI,GAAG,iBAAiB,CAAC,CAAC,EAAE,EAAE,EAAE,OAAO,CAAC,CAAC;QAC/C,IAAI,CAAC,IAAI;YAAE,OAAO;QAElB,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;QACjD,IAAI,MAAM,CAAC,QAAQ;YAAE,QAAQ,IAAI,CAAC,CAAC;QACnC,IAAI,CAAC,MAAM,CAAC,QAAQ;YAAE,OAAO;QAE7B,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACpB,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACnB,CAAC,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,iBAAiB,CACxB,CAAqB,EACrB,OAAe,EACf,OAA4B,EAC5B,IAAiB,EACjB,KAAsB;IAEtB,IAAI,CAAC,OAAO,CAAC,aAAa;QAAE,OAAO,CAAC,CAAC;IAErC,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QAC3B,MAAM,IAAI,GAAG,gBAAgB,CAAC,CAAC,EAAE,EAAE,EAAE,OAAO,CAAC,CAAC;QAC9C,IAAI,CAAC,IAAI;YAAE,OAAO;QAElB,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;QACjD,IAAI,MAAM,CAAC,QAAQ;YAAE,QAAQ,IAAI,CAAC,CAAC;QACnC,IAAI,CAAC,MAAM,CAAC,QAAQ;YAAE,OAAO;QAE7B,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACpB,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACnB,CAAC,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,YAAY,CAC1B,IAAY,EACZ,OAAe,EACf,OAA4B;IAE5B,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,KAAK,GAAoB,EAAE,CAAC;IAClC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAE/B,IAAI,QAAQ,GAAG,kBAAkB,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC;IACpE,QAAQ,IAAI,iBAAiB,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC;IAEhE,MAAM,SAAS,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC;IAC7E,MAAM,WAAW,GAAG,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;IAEzE,OAAO;QACL,KAAK,EAAE,WAAW;QAClB,SAAS,EAAE,WAAW,CAAC,MAAM;QAC7B,QAAQ;QACR,SAAS;KACV,CAAC;AACJ,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"fetch-links.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-links.tool.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAGV,eAAe,EAGf,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAW/B,eAAO,MAAM,qBAAqB,gBAAgB,CAAC;AACnD,eAAO,MAAM,4BAA4B,uIAC6F,CAAC;AAqFvI,wBAAsB,qBAAqB,CACzC,KAAK,EAAE,eAAe,GACrB,OAAO,CAAC,gBAAgB,CAAC,CAwE3B"}
1
+ {"version":3,"file":"fetch-links.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-links.tool.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAEV,eAAe,EAGf,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAgB/B,eAAO,MAAM,qBAAqB,gBAAgB,CAAC;AACnD,eAAO,MAAM,4BAA4B,uIAC6F,CAAC;AAiEvI,wBAAsB,qBAAqB,CACzC,KAAK,EAAE,eAAe,GACrB,OAAO,CAAC,gBAAgB,CAAC,CAU3B"}
@@ -1,138 +1,100 @@
1
- import * as cheerio from 'cheerio';
2
1
  import { logDebug, logError } from '../../services/logger.js';
3
2
  import { createToolErrorResponse, handleToolError, } from '../../utils/tool-error-handler.js';
4
- import { isInternalUrl } from '../../utils/url-validator.js';
3
+ import { appendHeaderVary } from '../utils/cache-vary.js';
5
4
  import { executeFetchPipeline } from '../utils/fetch-pipeline.js';
5
+ import { extractLinks, resolveFilterPattern, } from './fetch-links/link-extractor.js';
6
6
  export const FETCH_LINKS_TOOL_NAME = 'fetch-links';
7
7
  export const FETCH_LINKS_TOOL_DESCRIPTION = 'Extracts all hyperlinks from a webpage with anchor text and type classification. Supports filtering, image links, and link limits.';
8
- function tryResolveUrl(href, baseUrl) {
9
- try {
10
- return new URL(href, baseUrl).href;
11
- }
12
- catch {
13
- return null;
14
- }
8
+ function isRecord(value) {
9
+ return value !== null && typeof value === 'object';
15
10
  }
16
- function shouldIncludeLink(type, url, options, seen) {
17
- if (seen.has(url))
11
+ function isToolResponseBase(value) {
12
+ if (!isRecord(value))
18
13
  return false;
19
- if (options.filterPattern && !options.filterPattern.test(url))
14
+ if (!('content' in value))
20
15
  return false;
21
- if (type === 'internal' && !options.includeInternal)
22
- return false;
23
- if (type === 'external' && !options.includeExternal)
24
- return false;
25
- return true;
16
+ return Array.isArray(value.content);
26
17
  }
27
- function extractLinks(html, baseUrl, options) {
28
- const $ = cheerio.load(html);
29
- const links = [];
30
- const seen = new Set();
31
- let filtered = 0;
32
- $('a[href]').each((_, el) => {
33
- const href = $(el).attr('href');
34
- if (!href || href.startsWith('#') || href.startsWith('javascript:'))
35
- return;
36
- const url = tryResolveUrl(href, baseUrl);
37
- if (!url)
38
- return;
39
- const type = isInternalUrl(url, baseUrl)
40
- ? 'internal'
41
- : 'external';
42
- if (!shouldIncludeLink(type, url, options, seen)) {
43
- if (!seen.has(url))
44
- filtered++;
45
- return;
46
- }
47
- seen.add(url);
48
- links.push({ href: url, text: $(el).text().trim() || url, type });
18
+ function logFetchLinksStart(url, options, filterPattern) {
19
+ logDebug('Extracting links', {
20
+ url,
21
+ ...options,
22
+ filterPattern,
49
23
  });
50
- if (options.includeImages) {
51
- $('img[src]').each((_, el) => {
52
- const src = $(el).attr('src');
53
- if (!src || src.startsWith('data:'))
54
- return;
55
- const url = tryResolveUrl(src, baseUrl);
56
- if (!url)
57
- return;
58
- if (!shouldIncludeLink('image', url, options, seen)) {
59
- if (!seen.has(url))
60
- filtered++;
61
- return;
62
- }
63
- seen.add(url);
64
- links.push({
65
- href: url,
66
- text: $(el).attr('alt')?.trim() ?? url,
67
- type: 'image',
68
- });
69
- });
70
- }
71
- const truncated = options.maxLinks ? links.length > options.maxLinks : false;
72
- const resultLinks = truncated ? links.slice(0, options.maxLinks) : links;
24
+ }
25
+ async function fetchLinksPipeline(url, input, options) {
26
+ return executeFetchPipeline({
27
+ url,
28
+ cacheNamespace: 'links',
29
+ customHeaders: input.customHeaders,
30
+ retries: input.retries,
31
+ timeout: input.timeout,
32
+ cacheVary: appendHeaderVary({
33
+ includeInternal: options.includeInternal,
34
+ includeExternal: options.includeExternal,
35
+ includeImages: options.includeImages,
36
+ maxLinks: options.maxLinks,
37
+ filterPattern: input.filterPattern ?? null,
38
+ }, input.customHeaders),
39
+ transform: (html, normalizedUrl) => extractLinks(html, normalizedUrl, options),
40
+ });
41
+ }
42
+ function buildLinksResponse(result) {
43
+ const structuredContent = buildLinksStructuredContent(result);
73
44
  return {
74
- links: resultLinks,
75
- linkCount: resultLinks.length,
76
- filtered,
77
- truncated,
45
+ content: [
46
+ {
47
+ type: 'text',
48
+ text: JSON.stringify(structuredContent, null, 2),
49
+ },
50
+ ],
51
+ structuredContent,
78
52
  };
79
53
  }
80
54
  export async function fetchLinksToolHandler(input) {
81
- if (!input.url) {
82
- return createToolErrorResponse('URL is required', '', 'VALIDATION_ERROR');
83
- }
84
- let filterPattern;
85
- if (input.filterPattern) {
86
- if (input.filterPattern.length > 200) {
87
- return createToolErrorResponse('Filter pattern too long (max 200 characters)', input.url, 'VALIDATION_ERROR');
88
- }
89
- try {
90
- filterPattern = new RegExp(input.filterPattern, 'i');
91
- }
92
- catch {
93
- return createToolErrorResponse(`Invalid filter pattern: ${input.filterPattern}`, input.url, 'VALIDATION_ERROR');
94
- }
95
- }
96
55
  try {
97
- const options = {
98
- includeInternal: input.includeInternal ?? true,
99
- includeExternal: input.includeExternal ?? true,
100
- includeImages: input.includeImages ?? false,
101
- maxLinks: input.maxLinks,
102
- filterPattern,
103
- };
104
- logDebug('Extracting links', {
105
- url: input.url,
106
- ...options,
107
- filterPattern: input.filterPattern,
108
- });
109
- const result = await executeFetchPipeline({
110
- url: input.url,
111
- cacheNamespace: 'links',
112
- customHeaders: input.customHeaders,
113
- retries: input.retries,
114
- transform: (html, url) => extractLinks(html, url, options),
115
- });
116
- const structuredContent = {
117
- url: result.url,
118
- linkCount: result.data.linkCount,
119
- links: result.data.links,
120
- ...(result.data.filtered > 0 && { filtered: result.data.filtered }),
121
- ...(result.data.truncated && { truncated: result.data.truncated }),
122
- };
123
- return {
124
- content: [
125
- {
126
- type: 'text',
127
- text: JSON.stringify(structuredContent, null, 2),
128
- },
129
- ],
130
- structuredContent,
131
- };
56
+ return await executeFetchLinks(input);
132
57
  }
133
58
  catch (error) {
134
59
  logError('fetch-links tool error', error instanceof Error ? error : undefined);
135
60
  return handleToolError(error, input.url, 'Failed to extract links');
136
61
  }
137
62
  }
63
+ async function executeFetchLinks(input) {
64
+ const { url } = input;
65
+ if (!url) {
66
+ return createToolErrorResponse('URL is required', '', 'VALIDATION_ERROR');
67
+ }
68
+ const filterPattern = resolveFilterPattern(input.filterPattern, url);
69
+ if (isToolResponseBase(filterPattern)) {
70
+ return filterPattern;
71
+ }
72
+ const options = buildExtractOptions(input, filterPattern);
73
+ logFetchLinksStart(url, options, input.filterPattern);
74
+ const result = await fetchLinksPipeline(url, input, options);
75
+ return buildLinksResponse(result);
76
+ }
77
+ function buildExtractOptions(input, filterPattern) {
78
+ return {
79
+ includeInternal: input.includeInternal ?? true,
80
+ includeExternal: input.includeExternal ?? true,
81
+ includeImages: input.includeImages ?? false,
82
+ maxLinks: input.maxLinks,
83
+ filterPattern,
84
+ };
85
+ }
86
+ function buildLinksStructuredContent(result) {
87
+ const structuredContent = {
88
+ url: result.url,
89
+ linkCount: result.data.linkCount,
90
+ links: result.data.links,
91
+ };
92
+ if (result.data.filtered > 0) {
93
+ structuredContent.filtered = result.data.filtered;
94
+ }
95
+ if (result.data.truncated) {
96
+ structuredContent.truncated = result.data.truncated;
97
+ }
98
+ return structuredContent;
99
+ }
138
100
  //# sourceMappingURL=fetch-links.tool.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"fetch-links.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-links.tool.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAWnC,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,0BAA0B,CAAC;AAE9D,OAAO,EACL,uBAAuB,EACvB,eAAe,GAChB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EAAE,aAAa,EAAE,MAAM,8BAA8B,CAAC;AAC7D,OAAO,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAC;AAElE,MAAM,CAAC,MAAM,qBAAqB,GAAG,aAAa,CAAC;AACnD,MAAM,CAAC,MAAM,4BAA4B,GACvC,oIAAoI,CAAC;AAEvI,SAAS,aAAa,CAAC,IAAY,EAAE,OAAe;IAClD,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;IACrC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,SAAS,iBAAiB,CACxB,IAAc,EACd,GAAW,EACX,OAA4B,EAC5B,IAAiB;IAEjB,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;QAAE,OAAO,KAAK,CAAC;IAChC,IAAI,OAAO,CAAC,aAAa,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,KAAK,CAAC;IAC5E,IAAI,IAAI,KAAK,UAAU,IAAI,CAAC,OAAO,CAAC,eAAe;QAAE,OAAO,KAAK,CAAC;IAClE,IAAI,IAAI,KAAK,UAAU,IAAI,CAAC,OAAO,CAAC,eAAe;QAAE,OAAO,KAAK,CAAC;IAClE,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,YAAY,CACnB,IAAY,EACZ,OAAe,EACf,OAA4B;IAE5B,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,KAAK,GAAoB,EAAE,CAAC;IAClC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QAC1B,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAChC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;YAAE,OAAO;QAE5E,MAAM,GAAG,GAAG,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACzC,IAAI,CAAC,GAAG;YAAE,OAAO;QAEjB,MAAM,IAAI,GAAa,aAAa,CAAC,GAAG,EAAE,OAAO,CAAC;YAChD,CAAC,CAAC,UAAU;YACZ,CAAC,CAAC,UAAU,CAAC;QACf,IAAI,CAAC,iBAAiB,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,CAAC;YACjD,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;gBAAE,QAAQ,EAAE,CAAC;YAC/B,OAAO;QACT,CAAC;QAED,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACd,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,IAAI,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;IACpE,CAAC,CAAC,CAAC;IAEH,IAAI,OAAO,CAAC,aAAa,EAAE,CAAC;QAC1B,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;YAC3B,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAC9B,IAAI,CAAC,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC;gBAAE,OAAO;YAE5C,MAAM,GAAG,GAAG,aAAa,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;YACxC,IAAI,CAAC,GAAG;gBAAE,OAAO;YAEjB,IAAI,CAAC,iBAAiB,CAAC,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,CAAC;gBACpD,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;oBAAE,QAAQ,EAAE,CAAC;gBAC/B,OAAO;YACT,CAAC;YAED,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACd,KAAK,CAAC,IAAI,CAAC;gBACT,IAAI,EAAE,GAAG;gBACT,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,IAAI,GAAG;gBACtC,IAAI,EAAE,OAAO;aACd,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;IAED,MAAM,SAAS,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC;IAC7E,MAAM,WAAW,GAAG,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;IAEzE,OAAO;QACL,KAAK,EAAE,WAAW;QAClB,SAAS,EAAE,WAAW,CAAC,MAAM;QAC7B,QAAQ;QACR,SAAS;KACV,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,KAAsB;IAEtB,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC;QACf,OAAO,uBAAuB,CAAC,iBAAiB,EAAE,EAAE,EAAE,kBAAkB,CAAC,CAAC;IAC5E,CAAC;IAED,IAAI,aAAiC,CAAC;IACtC,IAAI,KAAK,CAAC,aAAa,EAAE,CAAC;QACxB,IAAI,KAAK,CAAC,aAAa,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YACrC,OAAO,uBAAuB,CAC5B,8CAA8C,EAC9C,KAAK,CAAC,GAAG,EACT,kBAAkB,CACnB,CAAC;QACJ,CAAC;QACD,IAAI,CAAC;YACH,aAAa,GAAG,IAAI,MAAM,CAAC,KAAK,CAAC,aAAa,EAAE,GAAG,CAAC,CAAC;QACvD,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,uBAAuB,CAC5B,2BAA2B,KAAK,CAAC,aAAa,EAAE,EAChD,KAAK,CAAC,GAAG,EACT,kBAAkB,CACnB,CAAC;QACJ,CAAC;IACH,CAAC;IAED,IAAI,CAAC;QACH,MAAM,OAAO,GAAwB;YACnC,eAAe,EAAE,KAAK,CAAC,eAAe,IAAI,IAAI;YAC9C,eAAe,EAAE,KAAK,CAAC,eAAe,IAAI,IAAI;YAC9C,aAAa,EAAE,KAAK,CAAC,aAAa,IAAI,KAAK;YAC3C,QAAQ,EAAE,KAAK,CAAC,QAAQ;YACxB,aAAa;SACd,CAAC;QAEF,QAAQ,CAAC,kBAAkB,EAAE;YAC3B,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,GAAG,OAAO;YACV,aAAa,EAAE,KAAK,CAAC,aAAa;SACnC,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAuB;YAC9D,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,cAAc,EAAE,OAAO;YACvB,aAAa,EAAE,KAAK,CAAC,aAAa;YAClC,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,SAAS,EAAE,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC;SAC3D,CAAC,CAAC;QAEH,MAAM,iBAAiB,GAAG;YACxB,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,SAAS,EAAE,MAAM,CAAC,IAAI,CAAC,SAAS;YAChC,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK;YACxB,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,GAAG,CAAC,IAAI,EAAE,QAAQ,EAAE,MAAM,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnE,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,IAAI,EAAE,SAAS,EAAE,MAAM,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;SACnE,CAAC;QAEF,OAAO;YACL,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,iBAAiB,EAAE,IAAI,EAAE,CAAC,CAAC;iBACjD;aACF;YACD,iBAAiB;SAClB,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,wBAAwB,EACxB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACF,OAAO,eAAe,CAAC,KAAK,EAAE,KAAK,CAAC,GAAG,EAAE,yBAAyB,CAAC,CAAC;IACtE,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"fetch-links.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-links.tool.ts"],"names":[],"mappings":"AAQA,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,0BAA0B,CAAC;AAE9D,OAAO,EACL,uBAAuB,EACvB,eAAe,GAChB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,OAAO,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAC;AAElE,OAAO,EACL,YAAY,EACZ,oBAAoB,GACrB,MAAM,iCAAiC,CAAC;AAEzC,MAAM,CAAC,MAAM,qBAAqB,GAAG,aAAa,CAAC;AACnD,MAAM,CAAC,MAAM,4BAA4B,GACvC,oIAAoI,CAAC;AAEvI,SAAS,QAAQ,CAAC,KAAc;IAC9B,OAAO,KAAK,KAAK,IAAI,IAAI,OAAO,KAAK,KAAK,QAAQ,CAAC;AACrD,CAAC;AAED,SAAS,kBAAkB,CAAC,KAAc;IACxC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACnC,IAAI,CAAC,CAAC,SAAS,IAAI,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACxC,OAAO,KAAK,CAAC,OAAO,CAAE,KAA0B,CAAC,OAAO,CAAC,CAAC;AAC5D,CAAC;AAED,SAAS,kBAAkB,CACzB,GAAW,EACX,OAA4B,EAC5B,aAAiC;IAEjC,QAAQ,CAAC,kBAAkB,EAAE;QAC3B,GAAG;QACH,GAAG,OAAO;QACV,aAAa;KACd,CAAC,CAAC;AACL,CAAC;AAED,KAAK,UAAU,kBAAkB,CAC/B,GAAW,EACX,KAAsB,EACtB,OAA4B;IAE5B,OAAO,oBAAoB,CAAuB;QAChD,GAAG;QACH,cAAc,EAAE,OAAO;QACvB,aAAa,EAAE,KAAK,CAAC,aAAa;QAClC,OAAO,EAAE,KAAK,CAAC,OAAO;QACtB,OAAO,EAAE,KAAK,CAAC,OAAO;QACtB,SAAS,EAAE,gBAAgB,CACzB;YACE,eAAe,EAAE,OAAO,CAAC,eAAe;YACxC,eAAe,EAAE,OAAO,CAAC,eAAe;YACxC,aAAa,EAAE,OAAO,CAAC,aAAa;YACpC,QAAQ,EAAE,OAAO,CAAC,QAAQ;YAC1B,aAAa,EAAE,KAAK,CAAC,aAAa,IAAI,IAAI;SAC3C,EACD,KAAK,CAAC,aAAa,CACpB;QACD,SAAS,EAAE,CAAC,IAAI,EAAE,aAAa,EAAE,EAAE,CACjC,YAAY,CAAC,IAAI,EAAE,aAAa,EAAE,OAAO,CAAC;KAC7C,CAAC,CAAC;AACL,CAAC;AAED,SAAS,kBAAkB,CACzB,MAA4C;IAE5C,MAAM,iBAAiB,GAAG,2BAA2B,CAAC,MAAM,CAAC,CAAC;IAC9D,OAAO;QACL,OAAO,EAAE;YACP;gBACE,IAAI,EAAE,MAAe;gBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,iBAAiB,EAAE,IAAI,EAAE,CAAC,CAAC;aACjD;SACF;QACD,iBAAiB;KAClB,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,KAAsB;IAEtB,IAAI,CAAC;QACH,OAAO,MAAM,iBAAiB,CAAC,KAAK,CAAC,CAAC;IACxC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,wBAAwB,EACxB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACF,OAAO,eAAe,CAAC,KAAK,EAAE,KAAK,CAAC,GAAG,EAAE,yBAAyB,CAAC,CAAC;IACtE,CAAC;AACH,CAAC;AAED,KAAK,UAAU,iBAAiB,CAC9B,KAAsB;IAEtB,MAAM,EAAE,GAAG,EAAE,GAAG,KAAK,CAAC;IACtB,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO,uBAAuB,CAAC,iBAAiB,EAAE,EAAE,EAAE,kBAAkB,CAAC,CAAC;IAC5E,CAAC;IACD,MAAM,aAAa,GAAG,oBAAoB,CAAC,KAAK,CAAC,aAAa,EAAE,GAAG,CAAC,CAAC;IACrE,IAAI,kBAAkB,CAAC,aAAa,CAAC,EAAE,CAAC;QACtC,OAAO,aAAa,CAAC;IACvB,CAAC;IAED,MAAM,OAAO,GAAG,mBAAmB,CAAC,KAAK,EAAE,aAAa,CAAC,CAAC;IAE1D,kBAAkB,CAAC,GAAG,EAAE,OAAO,EAAE,KAAK,CAAC,aAAa,CAAC,CAAC;IACtD,MAAM,MAAM,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,KAAK,EAAE,OAAO,CAAC,CAAC;IAC7D,OAAO,kBAAkB,CAAC,MAAM,CAAC,CAAC;AACpC,CAAC;AAED,SAAS,mBAAmB,CAC1B,KAAsB,EACtB,aAAiC;IAEjC,OAAO;QACL,eAAe,EAAE,KAAK,CAAC,eAAe,IAAI,IAAI;QAC9C,eAAe,EAAE,KAAK,CAAC,eAAe,IAAI,IAAI;QAC9C,aAAa,EAAE,KAAK,CAAC,aAAa,IAAI,KAAK;QAC3C,QAAQ,EAAE,KAAK,CAAC,QAAQ;QACxB,aAAa;KACd,CAAC;AACJ,CAAC;AAED,SAAS,2BAA2B,CAClC,MAA4C;IAE5C,MAAM,iBAAiB,GAA4B;QACjD,GAAG,EAAE,MAAM,CAAC,GAAG;QACf,SAAS,EAAE,MAAM,CAAC,IAAI,CAAC,SAAS;QAChC,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK;KACzB,CAAC;IAEF,IAAI,MAAM,CAAC,IAAI,CAAC,QAAQ,GAAG,CAAC,EAAE,CAAC;QAC7B,iBAAiB,CAAC,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC;IACpD,CAAC;IAED,IAAI,MAAM,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;QAC1B,iBAAiB,CAAC,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC;IACtD,CAAC;IAED,OAAO,iBAAiB,CAAC;AAC3B,CAAC"}
@@ -1,15 +1,5 @@
1
- import type { FetchMarkdownInput } from '../../config/types.js';
1
+ import type { FetchMarkdownInput, ToolResponseBase } from '../../config/types.js';
2
2
  export declare const FETCH_MARKDOWN_TOOL_NAME = "fetch-markdown";
3
- export declare const FETCH_MARKDOWN_TOOL_DESCRIPTION = "Fetches a webpage and converts it to clean Markdown format with optional frontmatter, table of contents, and content length limits";
4
- interface FetchMarkdownToolResponse {
5
- [x: string]: unknown;
6
- content: {
7
- type: 'text';
8
- text: string;
9
- }[];
10
- structuredContent?: Record<string, unknown>;
11
- isError?: boolean;
12
- }
13
- export declare function fetchMarkdownToolHandler(input: FetchMarkdownInput): Promise<FetchMarkdownToolResponse>;
14
- export {};
3
+ export declare const FETCH_MARKDOWN_TOOL_DESCRIPTION = "Fetches a webpage and converts it to clean Markdown format with optional frontmatter and content length limits";
4
+ export declare function fetchMarkdownToolHandler(input: FetchMarkdownInput): Promise<ToolResponseBase>;
15
5
  //# sourceMappingURL=fetch-markdown.tool.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"fetch-markdown.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-markdown.tool.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,kBAAkB,EAInB,MAAM,uBAAuB,CAAC;AAkB/B,eAAO,MAAM,wBAAwB,mBAAmB,CAAC;AACzD,eAAO,MAAM,+BAA+B,uIAC0F,CAAC;AAEvI,UAAU,yBAAyB;IACjC,CAAC,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC;IACrB,OAAO,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IAC1C,iBAAiB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC5C,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAwED,wBAAsB,wBAAwB,CAC5C,KAAK,EAAE,kBAAkB,GACxB,OAAO,CAAC,yBAAyB,CAAC,CAyDpC"}
1
+ {"version":3,"file":"fetch-markdown.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-markdown.tool.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,kBAAkB,EAGlB,gBAAgB,EAEjB,MAAM,uBAAuB,CAAC;AAoB/B,eAAO,MAAM,wBAAwB,mBAAmB,CAAC;AACzD,eAAO,MAAM,+BAA+B,mHACsE,CAAC;AAyGnH,wBAAsB,wBAAwB,CAC5C,KAAK,EAAE,kBAAkB,GACxB,OAAO,CAAC,gBAAgB,CAAC,CAU3B"}
@@ -1,100 +1,91 @@
1
- import { extractContent } from '../../services/extractor.js';
2
1
  import { logDebug, logError } from '../../services/logger.js';
3
- import { stripMarkdownLinks } from '../../utils/content-cleaner.js';
4
2
  import { createToolErrorResponse, handleToolError, } from '../../utils/tool-error-handler.js';
5
- import { createContentMetadataBlock, determineContentExtractionSource, } from '../utils/common.js';
6
- import { executeFetchPipeline } from '../utils/fetch-pipeline.js';
7
- import { htmlToMarkdown } from '../../transformers/markdown.transformer.js';
3
+ import { transformHtmlToMarkdown } from '../utils/content-transform.js';
4
+ import { buildToolContentBlocks, performSharedFetch, } from './fetch-single.shared.js';
8
5
  export const FETCH_MARKDOWN_TOOL_NAME = 'fetch-markdown';
9
- export const FETCH_MARKDOWN_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to clean Markdown format with optional frontmatter, table of contents, and content length limits';
10
- function slugify(text) {
11
- const cleanText = stripMarkdownLinks(text);
12
- return cleanText
13
- .toLowerCase()
14
- .replace(/[^\w\s-]/g, '')
15
- .replace(/\s+/g, '-')
16
- .replace(/--+/g, '-')
17
- .trim();
6
+ export const FETCH_MARKDOWN_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to clean Markdown format with optional frontmatter and content length limits';
7
+ function resolveMarkdownOptions(input) {
8
+ return {
9
+ extractMainContent: input.extractMainContent ?? true,
10
+ includeMetadata: input.includeMetadata ?? true,
11
+ maxContentLength: input.maxContentLength,
12
+ };
18
13
  }
19
- function extractToc(markdown) {
20
- const headingRegex = /^(#{1,6})\s+(.+)$/gm;
21
- const toc = [];
22
- let match;
23
- while ((match = headingRegex.exec(markdown)) !== null) {
24
- const hashMarks = match[1];
25
- const rawText = match[2];
26
- if (!hashMarks || !rawText)
27
- continue;
28
- const text = stripMarkdownLinks(rawText.trim());
29
- toc.push({
30
- level: hashMarks.length,
31
- text,
32
- slug: slugify(rawText),
33
- });
14
+ function buildMarkdownStructuredContent(pipeline, inlineResult) {
15
+ const structuredContent = {
16
+ url: pipeline.url,
17
+ title: pipeline.data.title,
18
+ fetchedAt: pipeline.fetchedAt,
19
+ contentSize: inlineResult.contentSize,
20
+ cached: pipeline.fromCache,
21
+ };
22
+ if (pipeline.data.truncated || inlineResult.truncated) {
23
+ structuredContent.truncated = true;
24
+ }
25
+ if (typeof inlineResult.content === 'string') {
26
+ structuredContent.markdown = inlineResult.content;
34
27
  }
35
- return toc;
28
+ if (inlineResult.resourceUri) {
29
+ structuredContent.resourceUri = inlineResult.resourceUri;
30
+ structuredContent.resourceMimeType = inlineResult.resourceMimeType;
31
+ }
32
+ return structuredContent;
33
+ }
34
+ function getInlineErrorResponse(inlineResult, url) {
35
+ if (!inlineResult.error)
36
+ return null;
37
+ return createToolErrorResponse(inlineResult.error, url, 'INTERNAL_ERROR');
38
+ }
39
+ function logFetchMarkdownStart(url, options) {
40
+ logDebug('Fetching markdown', { url, ...options });
36
41
  }
37
- function transformToMarkdown(html, url, options) {
38
- const { article, metadata: extractedMeta } = extractContent(html, url, {
39
- extractArticle: options.extractMainContent,
42
+ function buildMarkdownTransform(options) {
43
+ return (html, url) => {
44
+ const markdownResult = transformHtmlToMarkdown(html, url, options);
45
+ return { ...markdownResult, content: markdownResult.markdown };
46
+ };
47
+ }
48
+ async function fetchMarkdownPipeline(url, input, options, transformOptions) {
49
+ return performSharedFetch({
50
+ url,
51
+ format: 'markdown',
52
+ extractMainContent: options.extractMainContent,
53
+ includeMetadata: options.includeMetadata,
54
+ maxContentLength: options.maxContentLength,
55
+ customHeaders: input.customHeaders,
56
+ retries: input.retries,
57
+ timeout: input.timeout,
58
+ transform: buildMarkdownTransform(transformOptions),
40
59
  });
41
- const shouldExtractFromArticle = determineContentExtractionSource(options.extractMainContent, article);
42
- const metadata = createContentMetadataBlock(url, article, extractedMeta, shouldExtractFromArticle, options.includeMetadata);
43
- const sourceHtml = shouldExtractFromArticle ? article.content : html;
44
- const title = shouldExtractFromArticle ? article.title : extractedMeta.title;
45
- let markdown = htmlToMarkdown(sourceHtml, metadata);
46
- const toc = options.generateToc ? extractToc(markdown) : undefined;
47
- let truncated = false;
48
- if (options.maxContentLength && markdown.length > options.maxContentLength) {
49
- markdown = `${markdown.substring(0, options.maxContentLength)}\n\n...[truncated]`;
50
- truncated = true;
51
- }
52
- return { markdown, title, toc, truncated };
53
60
  }
54
- export async function fetchMarkdownToolHandler(input) {
55
- if (!input.url) {
56
- return createToolErrorResponse('URL is required', '', 'VALIDATION_ERROR');
57
- }
58
- const options = {
59
- extractMainContent: input.extractMainContent ?? true,
60
- includeMetadata: input.includeMetadata ?? true,
61
- generateToc: input.generateToc ?? false,
62
- maxContentLength: input.maxContentLength,
61
+ function buildMarkdownResponse(pipeline, inlineResult) {
62
+ const structuredContent = buildMarkdownStructuredContent(pipeline, inlineResult);
63
+ return {
64
+ content: buildToolContentBlocks(structuredContent, pipeline.fromCache, inlineResult, 'Fetched markdown'),
65
+ structuredContent,
63
66
  };
64
- logDebug('Fetching markdown', { url: input.url, ...options });
67
+ }
68
+ export async function fetchMarkdownToolHandler(input) {
65
69
  try {
66
- const result = await executeFetchPipeline({
67
- url: input.url,
68
- cacheNamespace: 'markdown',
69
- customHeaders: input.customHeaders,
70
- retries: input.retries,
71
- transform: (html, url) => transformToMarkdown(html, url, options),
72
- serialize: (data) => data.markdown,
73
- deserialize: (cached) => ({
74
- markdown: cached,
75
- title: undefined,
76
- toc: undefined,
77
- truncated: false,
78
- }),
79
- });
80
- const structuredContent = {
81
- url: result.url,
82
- title: result.data.title,
83
- fetchedAt: result.fetchedAt,
84
- markdown: result.data.markdown,
85
- ...(result.data.toc && { toc: result.data.toc }),
86
- cached: result.fromCache,
87
- ...(result.data.truncated && { truncated: result.data.truncated }),
88
- };
89
- const jsonOutput = JSON.stringify(structuredContent, result.fromCache ? undefined : null, result.fromCache ? undefined : 2);
90
- return {
91
- content: [{ type: 'text', text: jsonOutput }],
92
- structuredContent,
93
- };
70
+ return await executeFetchMarkdown(input);
94
71
  }
95
72
  catch (error) {
96
73
  logError('fetch-markdown tool error', error instanceof Error ? error : undefined);
97
74
  return handleToolError(error, input.url, 'Failed to fetch markdown');
98
75
  }
99
76
  }
77
+ async function executeFetchMarkdown(input) {
78
+ const { url } = input;
79
+ if (!url) {
80
+ return createToolErrorResponse('URL is required', '', 'VALIDATION_ERROR');
81
+ }
82
+ const options = resolveMarkdownOptions(input);
83
+ const transformOptions = { ...options };
84
+ logFetchMarkdownStart(url, transformOptions);
85
+ const { pipeline, inlineResult } = await fetchMarkdownPipeline(url, input, options, transformOptions);
86
+ const inlineError = getInlineErrorResponse(inlineResult, url);
87
+ if (inlineError)
88
+ return inlineError;
89
+ return buildMarkdownResponse(pipeline, inlineResult);
90
+ }
100
91
  //# sourceMappingURL=fetch-markdown.tool.js.map