@j0hanz/superfetch 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/README.md +327 -0
  2. package/dist/config/index.d.ts +30 -0
  3. package/dist/config/index.d.ts.map +1 -0
  4. package/dist/config/index.js +42 -0
  5. package/dist/config/index.js.map +1 -0
  6. package/dist/errors/app-error.d.ts +71 -0
  7. package/dist/errors/app-error.d.ts.map +1 -0
  8. package/dist/errors/app-error.js +103 -0
  9. package/dist/errors/app-error.js.map +1 -0
  10. package/dist/errors/index.d.ts +2 -0
  11. package/dist/errors/index.d.ts.map +1 -0
  12. package/dist/errors/index.js +2 -0
  13. package/dist/errors/index.js.map +1 -0
  14. package/dist/index.d.ts +3 -0
  15. package/dist/index.d.ts.map +1 -0
  16. package/dist/index.js +179 -0
  17. package/dist/index.js.map +1 -0
  18. package/dist/middleware/error-handler.d.ts +7 -0
  19. package/dist/middleware/error-handler.d.ts.map +1 -0
  20. package/dist/middleware/error-handler.js +37 -0
  21. package/dist/middleware/error-handler.js.map +1 -0
  22. package/dist/middleware/rate-limiter.d.ts +33 -0
  23. package/dist/middleware/rate-limiter.d.ts.map +1 -0
  24. package/dist/middleware/rate-limiter.js +100 -0
  25. package/dist/middleware/rate-limiter.js.map +1 -0
  26. package/dist/prompts/index.d.ts +6 -0
  27. package/dist/prompts/index.d.ts.map +1 -0
  28. package/dist/prompts/index.js +81 -0
  29. package/dist/prompts/index.js.map +1 -0
  30. package/dist/resources/index.d.ts +6 -0
  31. package/dist/resources/index.d.ts.map +1 -0
  32. package/dist/resources/index.js +44 -0
  33. package/dist/resources/index.js.map +1 -0
  34. package/dist/server.d.ts +8 -0
  35. package/dist/server.d.ts.map +1 -0
  36. package/dist/server.js +39 -0
  37. package/dist/server.js.map +1 -0
  38. package/dist/services/cache.d.ts +16 -0
  39. package/dist/services/cache.d.ts.map +1 -0
  40. package/dist/services/cache.js +63 -0
  41. package/dist/services/cache.js.map +1 -0
  42. package/dist/services/cache.service.d.ts +52 -0
  43. package/dist/services/cache.service.d.ts.map +1 -0
  44. package/dist/services/cache.service.js +113 -0
  45. package/dist/services/cache.service.js.map +1 -0
  46. package/dist/services/extractor.d.ts +32 -0
  47. package/dist/services/extractor.d.ts.map +1 -0
  48. package/dist/services/extractor.js +97 -0
  49. package/dist/services/extractor.js.map +1 -0
  50. package/dist/services/extractor.service.d.ts +18 -0
  51. package/dist/services/extractor.service.d.ts.map +1 -0
  52. package/dist/services/extractor.service.js +75 -0
  53. package/dist/services/extractor.service.js.map +1 -0
  54. package/dist/services/fetcher.d.ts +9 -0
  55. package/dist/services/fetcher.d.ts.map +1 -0
  56. package/dist/services/fetcher.js +100 -0
  57. package/dist/services/fetcher.js.map +1 -0
  58. package/dist/services/fetcher.service.d.ts +18 -0
  59. package/dist/services/fetcher.service.d.ts.map +1 -0
  60. package/dist/services/fetcher.service.js +122 -0
  61. package/dist/services/fetcher.service.js.map +1 -0
  62. package/dist/services/logger.d.ts +5 -0
  63. package/dist/services/logger.d.ts.map +1 -0
  64. package/dist/services/logger.js +48 -0
  65. package/dist/services/logger.js.map +1 -0
  66. package/dist/services/logger.service.d.ts +5 -0
  67. package/dist/services/logger.service.d.ts.map +1 -0
  68. package/dist/services/logger.service.js +57 -0
  69. package/dist/services/logger.service.js.map +1 -0
  70. package/dist/services/parser.d.ts +6 -0
  71. package/dist/services/parser.d.ts.map +1 -0
  72. package/dist/services/parser.js +152 -0
  73. package/dist/services/parser.js.map +1 -0
  74. package/dist/services/parser.service.d.ts +42 -0
  75. package/dist/services/parser.service.d.ts.map +1 -0
  76. package/dist/services/parser.service.js +209 -0
  77. package/dist/services/parser.service.js.map +1 -0
  78. package/dist/tools/handlers/fetch-links.tool.d.ts +20 -0
  79. package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -0
  80. package/dist/tools/handlers/fetch-links.tool.js +91 -0
  81. package/dist/tools/handlers/fetch-links.tool.js.map +1 -0
  82. package/dist/tools/handlers/fetch-markdown.tool.d.ts +17 -0
  83. package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -0
  84. package/dist/tools/handlers/fetch-markdown.tool.js +99 -0
  85. package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -0
  86. package/dist/tools/handlers/fetch-url.tool.d.ts +17 -0
  87. package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -0
  88. package/dist/tools/handlers/fetch-url.tool.js +103 -0
  89. package/dist/tools/handlers/fetch-url.tool.js.map +1 -0
  90. package/dist/tools/index.d.ts +7 -0
  91. package/dist/tools/index.d.ts.map +1 -0
  92. package/dist/tools/index.js +83 -0
  93. package/dist/tools/index.js.map +1 -0
  94. package/dist/transformers/jsonl.transformer.d.ts +4 -0
  95. package/dist/transformers/jsonl.transformer.d.ts.map +1 -0
  96. package/dist/transformers/jsonl.transformer.js +42 -0
  97. package/dist/transformers/jsonl.transformer.js.map +1 -0
  98. package/dist/transformers/markdown.transformer.d.ts +4 -0
  99. package/dist/transformers/markdown.transformer.d.ts.map +1 -0
  100. package/dist/transformers/markdown.transformer.js +104 -0
  101. package/dist/transformers/markdown.transformer.js.map +1 -0
  102. package/dist/types/content.types.d.ts +63 -0
  103. package/dist/types/content.types.d.ts.map +1 -0
  104. package/dist/types/content.types.js +2 -0
  105. package/dist/types/content.types.js.map +1 -0
  106. package/dist/types/index.d.ts +3 -0
  107. package/dist/types/index.d.ts.map +1 -0
  108. package/dist/types/index.js +3 -0
  109. package/dist/types/index.js.map +1 -0
  110. package/dist/types/schemas.d.ts +22 -0
  111. package/dist/types/schemas.d.ts.map +1 -0
  112. package/dist/types/schemas.js +5 -0
  113. package/dist/types/schemas.js.map +1 -0
  114. package/dist/utils/sanitizer.d.ts +9 -0
  115. package/dist/utils/sanitizer.d.ts.map +1 -0
  116. package/dist/utils/sanitizer.js +19 -0
  117. package/dist/utils/sanitizer.js.map +1 -0
  118. package/dist/utils/url-validator.d.ts +10 -0
  119. package/dist/utils/url-validator.d.ts.map +1 -0
  120. package/dist/utils/url-validator.js +69 -0
  121. package/dist/utils/url-validator.js.map +1 -0
  122. package/package.json +80 -0
@@ -0,0 +1,209 @@
1
+ import * as cheerio from 'cheerio';
2
+ import { sanitizeText } from '../utils/sanitizer.js';
3
+ import { config } from '../config/index.js';
4
+ class ParserService {
5
+ /**
6
+ * Parses HTML content and extracts semantic blocks
7
+ */
8
+ parseHtml(html) {
9
+ const $ = cheerio.load(html);
10
+ const blocks = [];
11
+ // Remove script, style, and other non-content elements
12
+ $('script, style, noscript, iframe, svg').remove();
13
+ // Parse the body content
14
+ $('body')
15
+ .find('h1, h2, h3, h4, h5, h6, p, ul, ol, pre, code, table, img')
16
+ .each((_, element) => {
17
+ const block = this.parseElement($, element);
18
+ if (block) {
19
+ blocks.push(block);
20
+ }
21
+ });
22
+ return this.filterBlocks(blocks);
23
+ }
24
+ /**
25
+ * Parses a single element into a content block
26
+ */
27
+ parseElement($, node) {
28
+ // Check if node is an Element with tagName property
29
+ if (!('tagName' in node) || typeof node.tagName !== 'string') {
30
+ return null;
31
+ }
32
+ const element = node;
33
+ const tagName = element.tagName.toLowerCase();
34
+ switch (tagName) {
35
+ case 'h1':
36
+ case 'h2':
37
+ case 'h3':
38
+ case 'h4':
39
+ case 'h5':
40
+ case 'h6':
41
+ return this.parseHeading($, element);
42
+ case 'p':
43
+ return this.parseParagraph($, element);
44
+ case 'ul':
45
+ case 'ol':
46
+ return this.parseList($, element);
47
+ case 'pre':
48
+ case 'code':
49
+ return this.parseCode($, element);
50
+ case 'table':
51
+ return this.parseTable($, element);
52
+ case 'img':
53
+ return this.parseImage($, element);
54
+ default:
55
+ return null;
56
+ }
57
+ }
58
+ /**
59
+ * Parses a heading element
60
+ */
61
+ parseHeading($, element) {
62
+ const text = sanitizeText($(element).text());
63
+ if (!text) {
64
+ return null;
65
+ }
66
+ const level = parseInt(element.tagName.substring(1), 10);
67
+ return {
68
+ type: 'heading',
69
+ level,
70
+ text,
71
+ };
72
+ }
73
+ /**
74
+ * Parses a paragraph element
75
+ */
76
+ parseParagraph($, element) {
77
+ const text = sanitizeText($(element).text());
78
+ if (!text || text.length < config.extraction.minParagraphLength) {
79
+ // Skip very short paragraphs
80
+ return null;
81
+ }
82
+ return {
83
+ type: 'paragraph',
84
+ text,
85
+ };
86
+ }
87
+ /**
88
+ * Parses a list element
89
+ */
90
+ parseList($, element) {
91
+ const items = [];
92
+ $(element)
93
+ .find('li')
94
+ .each((_, li) => {
95
+ const text = sanitizeText($(li).text());
96
+ if (text) {
97
+ items.push(text);
98
+ }
99
+ });
100
+ if (items.length === 0) {
101
+ return null;
102
+ }
103
+ const ordered = element.tagName.toLowerCase() === 'ol';
104
+ return {
105
+ type: 'list',
106
+ ordered,
107
+ items,
108
+ };
109
+ }
110
+ /**
111
+ * Parses a code element
112
+ */
113
+ parseCode($, element) {
114
+ const text = $(element).text().trim();
115
+ if (!text) {
116
+ return null;
117
+ }
118
+ // Try to detect language from class name
119
+ const className = $(element).attr('class') || '';
120
+ const languageMatch = className.match(/language-(\w+)/);
121
+ const language = languageMatch ? languageMatch[1] : undefined;
122
+ return {
123
+ type: 'code',
124
+ language,
125
+ text,
126
+ };
127
+ }
128
+ /**
129
+ * Parses a table element
130
+ */
131
+ parseTable($, element) {
132
+ const headers = [];
133
+ const rows = [];
134
+ // Extract headers from thead or first row
135
+ const $table = $(element);
136
+ $table.find('thead th, thead td').each((_, cell) => {
137
+ headers.push(sanitizeText($(cell).text()));
138
+ });
139
+ // If no thead, try first tr
140
+ if (headers.length === 0) {
141
+ $table
142
+ .find('tr')
143
+ .first()
144
+ .find('th, td')
145
+ .each((_, cell) => {
146
+ headers.push(sanitizeText($(cell).text()));
147
+ });
148
+ }
149
+ // Extract body rows
150
+ const rowsSelector = headers.length > 0 ? 'tbody tr, tr:not(:first)' : 'tbody tr, tr';
151
+ $table.find(rowsSelector).each((_, row) => {
152
+ const cells = [];
153
+ $(row)
154
+ .find('td, th')
155
+ .each((_, cell) => {
156
+ cells.push(sanitizeText($(cell).text()));
157
+ });
158
+ if (cells.length > 0) {
159
+ rows.push(cells);
160
+ }
161
+ });
162
+ if (rows.length === 0) {
163
+ return null;
164
+ }
165
+ return {
166
+ type: 'table',
167
+ headers: headers.length > 0 ? headers : undefined,
168
+ rows,
169
+ };
170
+ }
171
+ /**
172
+ * Parses an image element
173
+ */
174
+ parseImage($, element) {
175
+ const src = $(element).attr('src');
176
+ if (!src) {
177
+ return null;
178
+ }
179
+ const alt = $(element).attr('alt') || undefined;
180
+ return {
181
+ type: 'image',
182
+ src,
183
+ alt,
184
+ };
185
+ }
186
+ /**
187
+ * Filters out empty or invalid blocks
188
+ */
189
+ filterBlocks(blocks) {
190
+ return blocks.filter((block) => {
191
+ if (block.type === 'paragraph') {
192
+ return block.text.length > 0;
193
+ }
194
+ if (block.type === 'heading') {
195
+ return block.text.length > 0;
196
+ }
197
+ if (block.type === 'list') {
198
+ return block.items.length > 0;
199
+ }
200
+ if (block.type === 'code') {
201
+ return block.text.length > 0;
202
+ }
203
+ return true;
204
+ });
205
+ }
206
+ }
207
+ // Singleton instance
208
+ export const parserService = new ParserService();
209
+ //# sourceMappingURL=parser.service.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parser.service.js","sourceRoot":"","sources":["../../src/services/parser.service.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAW5C,MAAM,aAAa;IACjB;;OAEG;IACH,SAAS,CAAC,IAAY;QACpB,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC7B,MAAM,MAAM,GAAwB,EAAE,CAAC;QAEvC,uDAAuD;QACvD,CAAC,CAAC,sCAAsC,CAAC,CAAC,MAAM,EAAE,CAAC;QAEnD,yBAAyB;QACzB,CAAC,CAAC,MAAM,CAAC;aACN,IAAI,CAAC,0DAA0D,CAAC;aAChE,IAAI,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE;YACnB,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;YAC5C,IAAI,KAAK,EAAE,CAAC;gBACV,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;QACH,CAAC,CAAC,CAAC;QAEL,OAAO,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;IACnC,CAAC;IAED;;OAEG;IACK,YAAY,CAAC,CAAa,EAAE,IAAa;QAC/C,oDAAoD;QACpD,IAAI,CAAC,CAAC,SAAS,IAAI,IAAI,CAAC,IAAI,OAAO,IAAI,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;YAC7D,OAAO,IAAI,CAAC;QACd,CAAC;QACD,MAAM,OAAO,GAAG,IAAI,CAAC;QACrB,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;QAE9C,QAAQ,OAAO,EAAE,CAAC;YAChB,KAAK,IAAI,CAAC;YACV,KAAK,IAAI,CAAC;YACV,KAAK,IAAI,CAAC;YACV,KAAK,IAAI,CAAC;YACV,KAAK,IAAI,CAAC;YACV,KAAK,IAAI;gBACP,OAAO,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;YAEvC,KAAK,GAAG;gBACN,OAAO,IAAI,CAAC,cAAc,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;YAEzC,KAAK,IAAI,CAAC;YACV,KAAK,IAAI;gBACP,OAAO,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;YAEpC,KAAK,KAAK,CAAC;YACX,KAAK,MAAM;gBACT,OAAO,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;YAEpC,KAAK,OAAO;gBACV,OAAO,IAAI,CAAC,UAAU,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;YAErC,KAAK,KAAK;gBACR,OAAO,IAAI,CAAC,UAAU,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;YAErC;gBACE,OAAO,IAAI,CAAC;QAChB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,YAAY,CAAC,CAAa,EAAE,OAAgB;QAClD,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QAC7C,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAEzD,OAAO;YACL,IAAI,EAAE,SAAS;YACf,KAAK;YACL,IAAI;SACL,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,cAAc,CACpB,CAAa,EACb,OAAgB;QAEhB,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QAC7C,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,UAAU,CAAC,kBAAkB,EAAE,CAAC;YAChE,6BAA6B;YAC7B,OAAO,IAAI,CAAC;QACd,CAAC;QAED,OAAO;YACL,IAAI,EAAE,WAAW;YACjB,IAAI;SACL,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,SAAS,CAAC,CAAa,EAAE,OAAgB;QAC/C,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,CAAC,CAAC,OAAO,CAAC;aACP,IAAI,CAAC,IAAI,CAAC;aACV,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;YACd,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;YACxC,IAAI,IAAI,EAAE,CAAC;gBACT,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACnB,CAAC;QACH,CAAC,CAAC,CAAC;QAEL,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,WAAW,EAAE,KAAK,IAAI,CAAC;QAEvD,OAAO;YACL,IAAI,EAAE,MAAM;YACZ,OAAO;YACP,KAAK;SACN,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,SAAS,CAAC,CAAa,EAAE,OAAgB;QAC/C,MAAM,IAAI,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACtC,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,OAAO,IAAI,CAAC;QACd,CAAC;QAED,yCAAyC;QACzC,MAAM,SAAS,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;QACjD,MAAM,aAAa,GAAG,SAAS,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;QACxD,MAAM,QAAQ,GAAG,aAAa,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QAE9D,OAAO;YACL,IAAI,EAAE,MAAM;YACZ,QAAQ;YACR,IAAI;SACL,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,UAAU,CAAC,CAAa,EAAE,OAAgB;QAChD,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,MAAM,IAAI,GAAe,EAAE,CAAC;QAE5B,0CAA0C;QAC1C,MAAM,MAAM,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC;QAC1B,MAAM,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;YACjD,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QAC7C,CAAC,CAAC,CAAC;QAEH,4BAA4B;QAC5B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,MAAM;iBACH,IAAI,CAAC,IAAI,CAAC;iBACV,KAAK,EAAE;iBACP,IAAI,CAAC,QAAQ,CAAC;iBACd,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;gBAChB,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;YAC7C,CAAC,CAAC,CAAC;QACP,CAAC;QAED,oBAAoB;QACpB,MAAM,YAAY,GAChB,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,0BAA0B,CAAC,CAAC,CAAC,cAAc,CAAC;QACnE,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE;YACxC,MAAM,KAAK,GAAa,EAAE,CAAC;YAC3B,CAAC,CAAC,GAAG,CAAC;iBACH,IAAI,CAAC,QAAQ,CAAC;iBACd,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;gBAChB,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;YAC3C,CAAC,CAAC,CAAC;YACL,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACrB,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACnB,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,OAAO,IAAI,CAAC;QACd,CAAC;QAED,OAAO;YACL,IAAI,EAAE,OAAO;YACb,OAAO,EAAE,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS;YACjD,IAAI;SACL,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,UAAU,CAAC,CAAa,EAAE,OAAgB;QAChD,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACnC,IAAI,CAAC,GAAG,EAAE,CAAC;YACT,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,SAAS,CAAC;QAEhD,OAAO;YACL,IAAI,EAAE,OAAO;YACb,GAAG;YACH,GAAG;SACJ,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,YAAY,CAAC,MAA2B;QAC9C,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE;YAC7B,IAAI,KAAK,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBAC/B,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;YAC/B,CAAC;YACD,IAAI,KAAK,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;gBAC7B,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;YAC/B,CAAC;YACD,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBAC1B,OAAO,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC;YAChC,CAAC;YACD,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBAC1B,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;YAC/B,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC,CAAC,CAAC;IACL,CAAC;CACF;AAED,qBAAqB;AACrB,MAAM,CAAC,MAAM,aAAa,GAAG,IAAI,aAAa,EAAE,CAAC"}
@@ -0,0 +1,20 @@
1
+ import type { FetchLinksInput } from '../../types/index.js';
2
+ export declare const FETCH_LINKS_TOOL_NAME = "fetch-links";
3
+ export declare const FETCH_LINKS_TOOL_DESCRIPTION = "Extracts all hyperlinks from a webpage with anchor text and type classification";
4
+ /**
5
+ * Tool handler for extracting links from a URL
6
+ */
7
+ export declare function fetchLinksToolHandler(input: FetchLinksInput): Promise<{
8
+ content: {
9
+ type: "text";
10
+ text: string;
11
+ }[];
12
+ isError?: undefined;
13
+ } | {
14
+ content: {
15
+ type: "text";
16
+ text: string;
17
+ }[];
18
+ isError: boolean;
19
+ }>;
20
+ //# sourceMappingURL=fetch-links.tool.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch-links.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-links.tool.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,eAAe,EAAiB,MAAM,sBAAsB,CAAC;AAE3E,eAAO,MAAM,qBAAqB,gBAAgB,CAAC;AACnD,eAAO,MAAM,4BAA4B,oFAC0C,CAAC;AAmDpF;;GAEG;AACH,wBAAsB,qBAAqB,CAAC,KAAK,EAAE,eAAe;;;;;;;;;;;;GA+CjE"}
@@ -0,0 +1,91 @@
1
+ import { validateAndNormalizeUrl, isInternalUrl, } from '../../utils/url-validator.js';
2
+ import { fetchUrlWithRetry } from '../../services/fetcher.js';
3
+ import * as cache from '../../services/cache.js';
4
+ import * as cheerio from 'cheerio';
5
+ export const FETCH_LINKS_TOOL_NAME = 'fetch-links';
6
+ export const FETCH_LINKS_TOOL_DESCRIPTION = 'Extracts all hyperlinks from a webpage with anchor text and type classification';
7
+ /**
8
+ * Extracts links from HTML, filtering by type and deduplicating
9
+ */
10
+ function extractLinksFromHtml(html, baseUrl, options) {
11
+ const $ = cheerio.load(html);
12
+ const links = [];
13
+ const seenUrls = new Set();
14
+ $('a[href]').each((_, element) => {
15
+ const href = $(element).attr('href');
16
+ const text = $(element).text().trim();
17
+ // Skip invalid hrefs
18
+ if (!href || href.startsWith('#') || href.startsWith('javascript:')) {
19
+ return;
20
+ }
21
+ try {
22
+ const absoluteUrl = new URL(href, baseUrl).href;
23
+ // Skip duplicates
24
+ if (seenUrls.has(absoluteUrl)) {
25
+ return;
26
+ }
27
+ seenUrls.add(absoluteUrl);
28
+ const type = isInternalUrl(absoluteUrl, baseUrl) ? 'internal' : 'external';
29
+ // Filter based on options
30
+ if (type === 'internal' && !options.includeInternal)
31
+ return;
32
+ if (type === 'external' && !options.includeExternal)
33
+ return;
34
+ links.push({
35
+ href: absoluteUrl,
36
+ text: text || absoluteUrl,
37
+ type,
38
+ });
39
+ }
40
+ catch {
41
+ // Skip invalid URLs silently
42
+ }
43
+ });
44
+ return links;
45
+ }
46
+ /**
47
+ * Tool handler for extracting links from a URL
48
+ */
49
+ export async function fetchLinksToolHandler(input) {
50
+ try {
51
+ const url = validateAndNormalizeUrl(input.url);
52
+ const cacheKey = cache.createCacheKey('links', url);
53
+ const cached = cache.get(cacheKey);
54
+ if (cached) {
55
+ return {
56
+ content: [{ type: 'text', text: cached.content }],
57
+ };
58
+ }
59
+ const html = await fetchUrlWithRetry(url);
60
+ // Extract links
61
+ const links = extractLinksFromHtml(html, url, {
62
+ includeInternal: input.includeInternal ?? true,
63
+ includeExternal: input.includeExternal ?? true,
64
+ });
65
+ const output = {
66
+ url,
67
+ linkCount: links.length,
68
+ links,
69
+ };
70
+ const outputText = JSON.stringify(output, null, 2);
71
+ cache.set(cacheKey, outputText);
72
+ return {
73
+ content: [{ type: 'text', text: outputText }],
74
+ };
75
+ }
76
+ catch (error) {
77
+ return {
78
+ content: [
79
+ {
80
+ type: 'text',
81
+ text: JSON.stringify({
82
+ error: `Failed to extract links: ${error instanceof Error ? error.message : 'Unknown error'}`,
83
+ url: input.url,
84
+ }),
85
+ },
86
+ ],
87
+ isError: true,
88
+ };
89
+ }
90
+ }
91
+ //# sourceMappingURL=fetch-links.tool.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch-links.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-links.tool.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,uBAAuB,EACvB,aAAa,GACd,MAAM,8BAA8B,CAAC;AACtC,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,KAAK,KAAK,MAAM,yBAAyB,CAAC;AACjD,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC,MAAM,CAAC,MAAM,qBAAqB,GAAG,aAAa,CAAC;AACnD,MAAM,CAAC,MAAM,4BAA4B,GACvC,iFAAiF,CAAC;AAEpF;;GAEG;AACH,SAAS,oBAAoB,CAC3B,IAAY,EACZ,OAAe,EACf,OAA+D;IAE/D,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,KAAK,GAAoB,EAAE,CAAC;IAClC,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;IAEnC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE;QAC/B,MAAM,IAAI,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACrC,MAAM,IAAI,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QAEtC,qBAAqB;QACrB,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;YACpE,OAAO;QACT,CAAC;QAED,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;YAEhD,kBAAkB;YAClB,IAAI,QAAQ,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,CAAC;gBAC9B,OAAO;YACT,CAAC;YACD,QAAQ,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;YAE1B,MAAM,IAAI,GAAG,aAAa,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC;YAE3E,0BAA0B;YAC1B,IAAI,IAAI,KAAK,UAAU,IAAI,CAAC,OAAO,CAAC,eAAe;gBAAE,OAAO;YAC5D,IAAI,IAAI,KAAK,UAAU,IAAI,CAAC,OAAO,CAAC,eAAe;gBAAE,OAAO;YAE5D,KAAK,CAAC,IAAI,CAAC;gBACT,IAAI,EAAE,WAAW;gBACjB,IAAI,EAAE,IAAI,IAAI,WAAW;gBACzB,IAAI;aACL,CAAC,CAAC;QACL,CAAC;QAAC,MAAM,CAAC;YACP,6BAA6B;QAC/B,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CAAC,KAAsB;IAChE,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,uBAAuB,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC/C,MAAM,QAAQ,GAAG,KAAK,CAAC,cAAc,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;QAEpD,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACnC,IAAI,MAAM,EAAE,CAAC;YACX,OAAO;gBACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,MAAM,CAAC,OAAO,EAAE,CAAC;aAC3D,CAAC;QACJ,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,iBAAiB,CAAC,GAAG,CAAC,CAAC;QAE1C,gBAAgB;QAChB,MAAM,KAAK,GAAG,oBAAoB,CAAC,IAAI,EAAE,GAAG,EAAE;YAC5C,eAAe,EAAE,KAAK,CAAC,eAAe,IAAI,IAAI;YAC9C,eAAe,EAAE,KAAK,CAAC,eAAe,IAAI,IAAI;SAC/C,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG;YACb,GAAG;YACH,SAAS,EAAE,KAAK,CAAC,MAAM;YACvB,KAAK;SACN,CAAC;QAEF,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;QAEnD,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QAEhC,OAAO;YACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC;SACvD,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO;YACL,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;wBACnB,KAAK,EAAE,4BAA4B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE;wBAC7F,GAAG,EAAE,KAAK,CAAC,GAAG;qBACf,CAAC;iBACH;aACF;YACD,OAAO,EAAE,IAAI;SACd,CAAC;IACJ,CAAC;AACH,CAAC"}
@@ -0,0 +1,17 @@
1
+ import type { FetchMarkdownInput } from '../../types/index.js';
2
+ export declare const FETCH_MARKDOWN_TOOL_NAME = "fetch-markdown";
3
+ export declare const FETCH_MARKDOWN_TOOL_DESCRIPTION = "Fetches a webpage and converts it to clean Markdown format with optional frontmatter";
4
+ export declare function fetchMarkdownToolHandler(input: FetchMarkdownInput): Promise<{
5
+ content: {
6
+ type: "text";
7
+ text: string;
8
+ }[];
9
+ isError?: undefined;
10
+ } | {
11
+ content: {
12
+ type: "text";
13
+ text: string;
14
+ }[];
15
+ isError: boolean;
16
+ }>;
17
+ //# sourceMappingURL=fetch-markdown.tool.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch-markdown.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-markdown.tool.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC;AAE/D,eAAO,MAAM,wBAAwB,mBAAmB,CAAC;AACzD,eAAO,MAAM,+BAA+B,yFAC4C,CAAC;AA+CzF,wBAAsB,wBAAwB,CAAC,KAAK,EAAE,kBAAkB;;;;;;;;;;;;GA+DvE"}
@@ -0,0 +1,99 @@
1
+ import { validateAndNormalizeUrl } from '../../utils/url-validator.js';
2
+ import { fetchUrlWithRetry } from '../../services/fetcher.js';
3
+ import { extractContent } from '../../services/extractor.js';
4
+ import { htmlToMarkdown } from '../../transformers/markdown.transformer.js';
5
+ import * as cache from '../../services/cache.js';
6
+ import { config } from '../../config/index.js';
7
+ export const FETCH_MARKDOWN_TOOL_NAME = 'fetch-markdown';
8
+ export const FETCH_MARKDOWN_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to clean Markdown format with optional frontmatter';
9
+ function extractAndConvertToMarkdown(html, url, options) {
10
+ // Use the optimized extractContent that parses JSDOM only once
11
+ const { article, metadata: extractedMeta } = extractContent(html, url);
12
+ if (options.extractMainContent && config.extraction.extractMainContent && article) {
13
+ const metadata = options.includeMetadata && config.extraction.includeMetadata
14
+ ? {
15
+ type: 'metadata',
16
+ title: article.title,
17
+ author: article.byline,
18
+ url,
19
+ fetchedAt: new Date().toISOString(),
20
+ }
21
+ : undefined;
22
+ return {
23
+ markdown: htmlToMarkdown(article.content, metadata),
24
+ title: article.title,
25
+ };
26
+ }
27
+ // Fallback: convert full HTML
28
+ const metadata = options.includeMetadata && config.extraction.includeMetadata
29
+ ? {
30
+ type: 'metadata',
31
+ title: extractedMeta.title,
32
+ description: extractedMeta.description,
33
+ author: extractedMeta.author,
34
+ url,
35
+ fetchedAt: new Date().toISOString(),
36
+ }
37
+ : undefined;
38
+ return {
39
+ markdown: htmlToMarkdown(html, metadata),
40
+ title: extractedMeta.title,
41
+ };
42
+ }
43
+ export async function fetchMarkdownToolHandler(input) {
44
+ try {
45
+ const url = validateAndNormalizeUrl(input.url);
46
+ const cacheKey = cache.createCacheKey('markdown', url);
47
+ const cached = cache.get(cacheKey);
48
+ if (cached) {
49
+ return {
50
+ content: [
51
+ {
52
+ type: 'text',
53
+ text: JSON.stringify({
54
+ url,
55
+ cached: true,
56
+ fetchedAt: cached.fetchedAt,
57
+ markdown: cached.content,
58
+ }),
59
+ },
60
+ ],
61
+ };
62
+ }
63
+ const html = await fetchUrlWithRetry(url);
64
+ const { markdown, title } = extractAndConvertToMarkdown(html, url, {
65
+ extractMainContent: input.extractMainContent ?? true,
66
+ includeMetadata: input.includeMetadata ?? true,
67
+ });
68
+ cache.set(cacheKey, markdown);
69
+ return {
70
+ content: [
71
+ {
72
+ type: 'text',
73
+ text: JSON.stringify({
74
+ url,
75
+ title,
76
+ fetchedAt: new Date().toISOString(),
77
+ markdown,
78
+ cached: false,
79
+ }, null, 2),
80
+ },
81
+ ],
82
+ };
83
+ }
84
+ catch (error) {
85
+ return {
86
+ content: [
87
+ {
88
+ type: 'text',
89
+ text: JSON.stringify({
90
+ error: `Failed to fetch markdown: ${error instanceof Error ? error.message : 'Unknown error'}`,
91
+ url: input.url,
92
+ }),
93
+ },
94
+ ],
95
+ isError: true,
96
+ };
97
+ }
98
+ }
99
+ //# sourceMappingURL=fetch-markdown.tool.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch-markdown.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-markdown.tool.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAC;AACvE,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAC7D,OAAO,EAAE,cAAc,EAAE,MAAM,4CAA4C,CAAC;AAC5E,OAAO,KAAK,KAAK,MAAM,yBAAyB,CAAC;AACjD,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAG/C,MAAM,CAAC,MAAM,wBAAwB,GAAG,gBAAgB,CAAC;AACzD,MAAM,CAAC,MAAM,+BAA+B,GAC1C,sFAAsF,CAAC;AAEzF,SAAS,2BAA2B,CAClC,IAAY,EACZ,GAAW,EACX,OAAkE;IAElE,+DAA+D;IAC/D,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,GAAG,cAAc,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IAEvE,IAAI,OAAO,CAAC,kBAAkB,IAAI,MAAM,CAAC,UAAU,CAAC,kBAAkB,IAAI,OAAO,EAAE,CAAC;QAClF,MAAM,QAAQ,GACZ,OAAO,CAAC,eAAe,IAAI,MAAM,CAAC,UAAU,CAAC,eAAe;YAC1D,CAAC,CAAC;gBACE,IAAI,EAAE,UAAmB;gBACzB,KAAK,EAAE,OAAO,CAAC,KAAK;gBACpB,MAAM,EAAE,OAAO,CAAC,MAAM;gBACtB,GAAG;gBACH,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACpC;YACH,CAAC,CAAC,SAAS,CAAC;QAEhB,OAAO;YACL,QAAQ,EAAE,cAAc,CAAC,OAAO,CAAC,OAAO,EAAE,QAAQ,CAAC;YACnD,KAAK,EAAE,OAAO,CAAC,KAAK;SACrB,CAAC;IACJ,CAAC;IAED,8BAA8B;IAC9B,MAAM,QAAQ,GACZ,OAAO,CAAC,eAAe,IAAI,MAAM,CAAC,UAAU,CAAC,eAAe;QAC1D,CAAC,CAAC;YACE,IAAI,EAAE,UAAmB;YACzB,KAAK,EAAE,aAAa,CAAC,KAAK;YAC1B,WAAW,EAAE,aAAa,CAAC,WAAW;YACtC,MAAM,EAAE,aAAa,CAAC,MAAM;YAC5B,GAAG;YACH,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC;QACH,CAAC,CAAC,SAAS,CAAC;IAEhB,OAAO;QACL,QAAQ,EAAE,cAAc,CAAC,IAAI,EAAE,QAAQ,CAAC;QACxC,KAAK,EAAE,aAAa,CAAC,KAAK;KAC3B,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,wBAAwB,CAAC,KAAyB;IACtE,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,uBAAuB,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC/C,MAAM,QAAQ,GAAG,KAAK,CAAC,cAAc,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;QAEvD,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACnC,IAAI,MAAM,EAAE,CAAC;YACX,OAAO;gBACL,OAAO,EAAE;oBACP;wBACE,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;4BACnB,GAAG;4BACH,MAAM,EAAE,IAAI;4BACZ,SAAS,EAAE,MAAM,CAAC,SAAS;4BAC3B,QAAQ,EAAE,MAAM,CAAC,OAAO;yBACzB,CAAC;qBACH;iBACF;aACF,CAAC;QACJ,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,iBAAiB,CAAC,GAAG,CAAC,CAAC;QAE1C,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,GAAG,2BAA2B,CAAC,IAAI,EAAE,GAAG,EAAE;YACjE,kBAAkB,EAAE,KAAK,CAAC,kBAAkB,IAAI,IAAI;YACpD,eAAe,EAAE,KAAK,CAAC,eAAe,IAAI,IAAI;SAC/C,CAAC,CAAC;QAEH,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;QAE9B,OAAO;YACL,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAClB;wBACE,GAAG;wBACH,KAAK;wBACL,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;wBACnC,QAAQ;wBACR,MAAM,EAAE,KAAK;qBACd,EACD,IAAI,EACJ,CAAC,CACF;iBACF;aACF;SACF,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO;YACL,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;wBACnB,KAAK,EAAE,6BAA6B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE;wBAC9F,GAAG,EAAE,KAAK,CAAC,GAAG;qBACf,CAAC;iBACH;aACF;YACD,OAAO,EAAE,IAAI;SACd,CAAC;IACJ,CAAC;AACH,CAAC"}
@@ -0,0 +1,17 @@
1
+ import type { FetchUrlInput } from '../../types/index.js';
2
+ export declare const FETCH_URL_TOOL_NAME = "fetch-url";
3
+ export declare const FETCH_URL_TOOL_DESCRIPTION = "Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks";
4
+ export declare function fetchUrlToolHandler(input: FetchUrlInput): Promise<{
5
+ content: {
6
+ type: "text";
7
+ text: string;
8
+ }[];
9
+ isError?: undefined;
10
+ } | {
11
+ content: {
12
+ type: "text";
13
+ text: string;
14
+ }[];
15
+ isError: boolean;
16
+ }>;
17
+ //# sourceMappingURL=fetch-url.tool.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch-url.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-url.tool.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EACV,aAAa,EAGd,MAAM,sBAAsB,CAAC;AAE9B,eAAO,MAAM,mBAAmB,cAAc,CAAC;AAC/C,eAAO,MAAM,0BAA0B,+FACuD,CAAC;AAkD/F,wBAAsB,mBAAmB,CAAC,KAAK,EAAE,aAAa;;;;;;;;;;;;GAwE7D"}
@@ -0,0 +1,103 @@
1
+ import { validateAndNormalizeUrl } from '../../utils/url-validator.js';
2
+ import { fetchUrlWithRetry } from '../../services/fetcher.js';
3
+ import { extractContent } from '../../services/extractor.js';
4
+ import { parseHtml } from '../../services/parser.js';
5
+ import { toJsonl } from '../../transformers/jsonl.transformer.js';
6
+ import * as cache from '../../services/cache.js';
7
+ import { config } from '../../config/index.js';
8
+ export const FETCH_URL_TOOL_NAME = 'fetch-url';
9
+ export const FETCH_URL_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks';
10
+ function extractContentFromHtml(html, url, options) {
11
+ // Use the optimized extractContent that parses JSDOM only once
12
+ const { article, metadata: extractedMeta } = extractContent(html, url);
13
+ if (options.extractMainContent && config.extraction.extractMainContent && article) {
14
+ const contentBlocks = parseHtml(article.content);
15
+ const metadata = options.includeMetadata && config.extraction.includeMetadata
16
+ ? {
17
+ type: 'metadata',
18
+ title: article.title,
19
+ author: article.byline,
20
+ url,
21
+ fetchedAt: new Date().toISOString(),
22
+ }
23
+ : undefined;
24
+ return { contentBlocks, metadata, title: article.title };
25
+ }
26
+ // Fallback: use parsed HTML directly
27
+ const contentBlocks = parseHtml(html);
28
+ const metadata = options.includeMetadata && config.extraction.includeMetadata
29
+ ? {
30
+ type: 'metadata',
31
+ title: extractedMeta.title,
32
+ description: extractedMeta.description,
33
+ author: extractedMeta.author,
34
+ url,
35
+ fetchedAt: new Date().toISOString(),
36
+ }
37
+ : undefined;
38
+ return { contentBlocks, metadata, title: extractedMeta.title };
39
+ }
40
+ export async function fetchUrlToolHandler(input) {
41
+ try {
42
+ const url = validateAndNormalizeUrl(input.url);
43
+ const cacheKey = cache.createCacheKey('url', url);
44
+ const cached = cache.get(cacheKey);
45
+ if (cached) {
46
+ return {
47
+ content: [
48
+ {
49
+ type: 'text',
50
+ text: JSON.stringify({
51
+ url,
52
+ cached: true,
53
+ fetchedAt: cached.fetchedAt,
54
+ content: cached.content,
55
+ }),
56
+ },
57
+ ],
58
+ };
59
+ }
60
+ const html = await fetchUrlWithRetry(url, input.customHeaders);
61
+ const { contentBlocks, metadata, title } = extractContentFromHtml(html, url, {
62
+ extractMainContent: input.extractMainContent ?? true,
63
+ includeMetadata: input.includeMetadata ?? true,
64
+ });
65
+ let jsonlContent = toJsonl(contentBlocks, metadata);
66
+ if (input.maxContentLength && jsonlContent.length > input.maxContentLength) {
67
+ jsonlContent =
68
+ jsonlContent.substring(0, input.maxContentLength) + '\n...[truncated]';
69
+ }
70
+ cache.set(cacheKey, jsonlContent);
71
+ return {
72
+ content: [
73
+ {
74
+ type: 'text',
75
+ text: JSON.stringify({
76
+ url,
77
+ title,
78
+ contentBlocks: contentBlocks.length,
79
+ fetchedAt: new Date().toISOString(),
80
+ format: 'jsonl',
81
+ content: jsonlContent,
82
+ cached: false,
83
+ }, null, 2),
84
+ },
85
+ ],
86
+ };
87
+ }
88
+ catch (error) {
89
+ return {
90
+ content: [
91
+ {
92
+ type: 'text',
93
+ text: JSON.stringify({
94
+ error: `Failed to fetch URL: ${error instanceof Error ? error.message : 'Unknown error'}`,
95
+ url: input.url,
96
+ }),
97
+ },
98
+ ],
99
+ isError: true,
100
+ };
101
+ }
102
+ }
103
+ //# sourceMappingURL=fetch-url.tool.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch-url.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-url.tool.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAC;AACvE,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAC7D,OAAO,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AACrD,OAAO,EAAE,OAAO,EAAE,MAAM,yCAAyC,CAAC;AAClE,OAAO,KAAK,KAAK,MAAM,yBAAyB,CAAC;AACjD,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAO/C,MAAM,CAAC,MAAM,mBAAmB,GAAG,WAAW,CAAC;AAC/C,MAAM,CAAC,MAAM,0BAA0B,GACrC,4FAA4F,CAAC;AAQ/F,SAAS,sBAAsB,CAC7B,IAAY,EACZ,GAAW,EACX,OAAkE;IAElE,+DAA+D;IAC/D,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,GAAG,cAAc,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IAEvE,IAAI,OAAO,CAAC,kBAAkB,IAAI,MAAM,CAAC,UAAU,CAAC,kBAAkB,IAAI,OAAO,EAAE,CAAC;QAClF,MAAM,aAAa,GAAG,SAAS,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QACjD,MAAM,QAAQ,GACZ,OAAO,CAAC,eAAe,IAAI,MAAM,CAAC,UAAU,CAAC,eAAe;YAC1D,CAAC,CAAC;gBACE,IAAI,EAAE,UAAmB;gBACzB,KAAK,EAAE,OAAO,CAAC,KAAK;gBACpB,MAAM,EAAE,OAAO,CAAC,MAAM;gBACtB,GAAG;gBACH,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACpC;YACH,CAAC,CAAC,SAAS,CAAC;QAEhB,OAAO,EAAE,aAAa,EAAE,QAAQ,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC;IAC3D,CAAC;IAED,qCAAqC;IACrC,MAAM,aAAa,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAEtC,MAAM,QAAQ,GACZ,OAAO,CAAC,eAAe,IAAI,MAAM,CAAC,UAAU,CAAC,eAAe;QAC1D,CAAC,CAAC;YACE,IAAI,EAAE,UAAmB;YACzB,KAAK,EAAE,aAAa,CAAC,KAAK;YAC1B,WAAW,EAAE,aAAa,CAAC,WAAW;YACtC,MAAM,EAAE,aAAa,CAAC,MAAM;YAC5B,GAAG;YACH,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC;QACH,CAAC,CAAC,SAAS,CAAC;IAEhB,OAAO,EAAE,aAAa,EAAE,QAAQ,EAAE,KAAK,EAAE,aAAa,CAAC,KAAK,EAAE,CAAC;AACjE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,KAAoB;IAC5D,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,uBAAuB,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC/C,MAAM,QAAQ,GAAG,KAAK,CAAC,cAAc,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QAElD,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACnC,IAAI,MAAM,EAAE,CAAC;YACX,OAAO;gBACL,OAAO,EAAE;oBACP;wBACE,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;4BACnB,GAAG;4BACH,MAAM,EAAE,IAAI;4BACZ,SAAS,EAAE,MAAM,CAAC,SAAS;4BAC3B,OAAO,EAAE,MAAM,CAAC,OAAO;yBACxB,CAAC;qBACH;iBACF;aACF,CAAC;QACJ,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,iBAAiB,CAAC,GAAG,EAAE,KAAK,CAAC,aAAa,CAAC,CAAC;QAE/D,MAAM,EAAE,aAAa,EAAE,QAAQ,EAAE,KAAK,EAAE,GAAG,sBAAsB,CAAC,IAAI,EAAE,GAAG,EAAE;YAC3E,kBAAkB,EAAE,KAAK,CAAC,kBAAkB,IAAI,IAAI;YACpD,eAAe,EAAE,KAAK,CAAC,eAAe,IAAI,IAAI;SAC/C,CAAC,CAAC;QAEH,IAAI,YAAY,GAAG,OAAO,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;QAEpD,IAAI,KAAK,CAAC,gBAAgB,IAAI,YAAY,CAAC,MAAM,GAAG,KAAK,CAAC,gBAAgB,EAAE,CAAC;YAC3E,YAAY;gBACV,YAAY,CAAC,SAAS,CAAC,CAAC,EAAE,KAAK,CAAC,gBAAgB,CAAC,GAAG,kBAAkB,CAAC;QAC3E,CAAC;QAED,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;QAElC,OAAO;YACL,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAClB;wBACE,GAAG;wBACH,KAAK;wBACL,aAAa,EAAE,aAAa,CAAC,MAAM;wBACnC,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;wBACnC,MAAM,EAAE,OAAO;wBACf,OAAO,EAAE,YAAY;wBACrB,MAAM,EAAE,KAAK;qBACd,EACD,IAAI,EACJ,CAAC,CACF;iBACF;aACF;SACF,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO;YACL,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;wBACnB,KAAK,EAAE,wBAAwB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE;wBACzF,GAAG,EAAE,KAAK,CAAC,GAAG;qBACf,CAAC;iBACH;aACF;YACD,OAAO,EAAE,IAAI;SACd,CAAC;IACJ,CAAC;AACH,CAAC"}
@@ -0,0 +1,7 @@
1
+ import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
+ /**
3
+ * Registers all tools with the MCP server using the modern McpServer API
4
+ * Tools are registered with Zod schemas for automatic validation
5
+ */
6
+ export declare function registerTools(server: McpServer): void;
7
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/tools/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AA2EzE;;;GAGG;AACH,wBAAgB,aAAa,CAAC,MAAM,EAAE,SAAS,GAAG,IAAI,CAiCrD"}