@j0hanz/superfetch 1.0.2 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/README.md +345 -57
  2. package/dist/config/index.d.ts.map +1 -1
  3. package/dist/config/index.js +6 -10
  4. package/dist/config/index.js.map +1 -1
  5. package/dist/config/types.d.ts +256 -0
  6. package/dist/config/types.d.ts.map +1 -0
  7. package/dist/config/types.js +2 -0
  8. package/dist/config/types.js.map +1 -0
  9. package/dist/errors/app-error.d.ts +6 -20
  10. package/dist/errors/app-error.d.ts.map +1 -1
  11. package/dist/errors/app-error.js +7 -18
  12. package/dist/errors/app-error.js.map +1 -1
  13. package/dist/index.js +75 -62
  14. package/dist/index.js.map +1 -1
  15. package/dist/middleware/error-handler.d.ts +1 -5
  16. package/dist/middleware/error-handler.d.ts.map +1 -1
  17. package/dist/middleware/error-handler.js +4 -12
  18. package/dist/middleware/error-handler.js.map +1 -1
  19. package/dist/middleware/rate-limiter.d.ts +2 -20
  20. package/dist/middleware/rate-limiter.d.ts.map +1 -1
  21. package/dist/middleware/rate-limiter.js +22 -47
  22. package/dist/middleware/rate-limiter.js.map +1 -1
  23. package/dist/prompts/index.d.ts +0 -3
  24. package/dist/prompts/index.d.ts.map +1 -1
  25. package/dist/prompts/index.js +2 -10
  26. package/dist/prompts/index.js.map +1 -1
  27. package/dist/resources/cached-content.d.ts +5 -0
  28. package/dist/resources/cached-content.d.ts.map +1 -0
  29. package/dist/resources/cached-content.js +93 -0
  30. package/dist/resources/cached-content.js.map +1 -0
  31. package/dist/resources/index.d.ts +0 -3
  32. package/dist/resources/index.d.ts.map +1 -1
  33. package/dist/resources/index.js +40 -5
  34. package/dist/resources/index.js.map +1 -1
  35. package/dist/server.d.ts +0 -4
  36. package/dist/server.d.ts.map +1 -1
  37. package/dist/server.js +11 -6
  38. package/dist/server.js.map +1 -1
  39. package/dist/services/cache.d.ts +20 -6
  40. package/dist/services/cache.d.ts.map +1 -1
  41. package/dist/services/cache.js +128 -20
  42. package/dist/services/cache.js.map +1 -1
  43. package/dist/services/card-extractor.d.ts +10 -0
  44. package/dist/services/card-extractor.d.ts.map +1 -0
  45. package/dist/services/card-extractor.js +194 -0
  46. package/dist/services/card-extractor.js.map +1 -0
  47. package/dist/services/extractor.d.ts +12 -19
  48. package/dist/services/extractor.d.ts.map +1 -1
  49. package/dist/services/extractor.js +60 -46
  50. package/dist/services/extractor.js.map +1 -1
  51. package/dist/services/fetcher.d.ts +13 -11
  52. package/dist/services/fetcher.d.ts.map +1 -1
  53. package/dist/services/fetcher.js +143 -54
  54. package/dist/services/fetcher.js.map +1 -1
  55. package/dist/services/logger.d.ts.map +1 -1
  56. package/dist/services/logger.js +4 -6
  57. package/dist/services/logger.js.map +1 -1
  58. package/dist/services/parser.d.ts +1 -6
  59. package/dist/services/parser.d.ts.map +1 -1
  60. package/dist/services/parser.js +57 -27
  61. package/dist/services/parser.js.map +1 -1
  62. package/dist/tools/handlers/fetch-links.tool.d.ts +6 -18
  63. package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
  64. package/dist/tools/handlers/fetch-links.tool.js +104 -79
  65. package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
  66. package/dist/tools/handlers/fetch-markdown.tool.d.ts +6 -10
  67. package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
  68. package/dist/tools/handlers/fetch-markdown.tool.js +83 -84
  69. package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
  70. package/dist/tools/handlers/fetch-url.tool.d.ts +6 -12
  71. package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
  72. package/dist/tools/handlers/fetch-url.tool.js +51 -93
  73. package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
  74. package/dist/tools/handlers/fetch-urls.tool.d.ts +12 -0
  75. package/dist/tools/handlers/fetch-urls.tool.d.ts.map +1 -0
  76. package/dist/tools/handlers/fetch-urls.tool.js +184 -0
  77. package/dist/tools/handlers/fetch-urls.tool.js.map +1 -0
  78. package/dist/tools/index.d.ts +0 -4
  79. package/dist/tools/index.d.ts.map +1 -1
  80. package/dist/tools/index.js +145 -15
  81. package/dist/tools/index.js.map +1 -1
  82. package/dist/tools/utils/common.d.ts +8 -0
  83. package/dist/tools/utils/common.d.ts.map +1 -0
  84. package/dist/tools/utils/common.js +35 -0
  85. package/dist/tools/utils/common.js.map +1 -0
  86. package/dist/tools/utils/fetch-pipeline.d.ts +3 -0
  87. package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -0
  88. package/dist/tools/utils/fetch-pipeline.js +78 -0
  89. package/dist/tools/utils/fetch-pipeline.js.map +1 -0
  90. package/dist/tools/utils/index.d.ts +4 -0
  91. package/dist/tools/utils/index.d.ts.map +1 -0
  92. package/dist/tools/utils/index.js +3 -0
  93. package/dist/tools/utils/index.js.map +1 -0
  94. package/dist/tools/utils/response-builder.d.ts +3 -0
  95. package/dist/tools/utils/response-builder.d.ts.map +1 -0
  96. package/dist/tools/utils/response-builder.js +24 -0
  97. package/dist/tools/utils/response-builder.js.map +1 -0
  98. package/dist/transformers/jsonl.transformer.d.ts +1 -1
  99. package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
  100. package/dist/transformers/jsonl.transformer.js +2 -1
  101. package/dist/transformers/jsonl.transformer.js.map +1 -1
  102. package/dist/transformers/markdown.transformer.d.ts +1 -1
  103. package/dist/transformers/markdown.transformer.d.ts.map +1 -1
  104. package/dist/transformers/markdown.transformer.js +99 -5
  105. package/dist/transformers/markdown.transformer.js.map +1 -1
  106. package/dist/types/content.types.d.ts +11 -11
  107. package/dist/types/content.types.d.ts.map +1 -1
  108. package/dist/types/index.d.ts +1 -2
  109. package/dist/types/index.d.ts.map +1 -1
  110. package/dist/types/index.js +1 -2
  111. package/dist/types/index.js.map +1 -1
  112. package/dist/types/schemas.d.ts +39 -12
  113. package/dist/types/schemas.d.ts.map +1 -1
  114. package/dist/utils/concurrency.d.ts +6 -0
  115. package/dist/utils/concurrency.d.ts.map +1 -0
  116. package/dist/utils/concurrency.js +38 -0
  117. package/dist/utils/concurrency.js.map +1 -0
  118. package/dist/utils/content-cleaner.d.ts +32 -0
  119. package/dist/utils/content-cleaner.d.ts.map +1 -0
  120. package/dist/utils/content-cleaner.js +238 -0
  121. package/dist/utils/content-cleaner.js.map +1 -0
  122. package/dist/utils/language-detector.d.ts +5 -0
  123. package/dist/utils/language-detector.d.ts.map +1 -0
  124. package/dist/utils/language-detector.js +50 -0
  125. package/dist/utils/language-detector.js.map +1 -0
  126. package/dist/utils/sanitizer.d.ts +0 -10
  127. package/dist/utils/sanitizer.d.ts.map +1 -1
  128. package/dist/utils/sanitizer.js +4 -12
  129. package/dist/utils/sanitizer.js.map +1 -1
  130. package/dist/utils/tool-error-handler.d.ts +1 -15
  131. package/dist/utils/tool-error-handler.d.ts.map +1 -1
  132. package/dist/utils/tool-error-handler.js +34 -6
  133. package/dist/utils/tool-error-handler.js.map +1 -1
  134. package/dist/utils/url-validator.d.ts +0 -8
  135. package/dist/utils/url-validator.d.ts.map +1 -1
  136. package/dist/utils/url-validator.js +17 -31
  137. package/dist/utils/url-validator.js.map +1 -1
  138. package/package.json +81 -79
@@ -0,0 +1,194 @@
1
+ /**
2
+ * Card link extraction utilities for preserving card-style navigation
3
+ * from documentation sites before Readability strips them.
4
+ */
5
+ const NOISE_SELECTORS = 'style, svg, [class*="icon"], [aria-hidden="true"]';
6
+ /**
7
+ * Clean element by removing noise (styles, SVGs, icons)
8
+ */
9
+ function cleanElement(element) {
10
+ const clone = element.cloneNode(true);
11
+ clone.querySelectorAll(NOISE_SELECTORS).forEach((el) => {
12
+ el.remove();
13
+ });
14
+ return clone;
15
+ }
16
+ /**
17
+ * Extract clean title from a card-like link element
18
+ */
19
+ function extractCardTitle(link) {
20
+ const clone = cleanElement(link);
21
+ // Look for the first div child which typically contains the title in card layouts
22
+ for (const div of clone.querySelectorAll('div')) {
23
+ if (div.querySelector('div'))
24
+ continue; // Skip container divs
25
+ const text = div.textContent.trim();
26
+ if (text.length > 1 &&
27
+ text.length < 50 &&
28
+ !text.includes(' with ') &&
29
+ !text.includes('Use ')) {
30
+ return text;
31
+ }
32
+ }
33
+ // Look for structured title elements
34
+ const titleEl = clone.querySelector('[class*="title"], h2, h3, h4, h5, strong');
35
+ if (titleEl) {
36
+ const title = titleEl.textContent.trim();
37
+ if (title.length > 1 && title.length < 100)
38
+ return title;
39
+ }
40
+ // Fall back to first meaningful text content
41
+ const text = clone.textContent.trim().replace(/\s+/g, ' ');
42
+ if (!text || text.length <= 1 || text.length >= 100)
43
+ return null;
44
+ // Extract title part (first word/phrase before description)
45
+ const words = text.split(/(?=Use |Try |Learn |Get )/);
46
+ if (words.length > 1 && words[0])
47
+ return words[0].trim();
48
+ const firstLine = text
49
+ .split(/[.\n]/)
50
+ .find((s) => s.trim().length > 1)
51
+ ?.trim();
52
+ return firstLine ?? text;
53
+ }
54
+ /**
55
+ * Extract description from a card-like link element
56
+ */
57
+ function extractCardDescription(link) {
58
+ const clone = cleanElement(link);
59
+ const descEl = clone.querySelector('p, [class*="description"], [class*="muted"]');
60
+ if (descEl) {
61
+ const desc = descEl.textContent.trim();
62
+ if (desc.length > 5 && desc.length < 200)
63
+ return desc;
64
+ }
65
+ const text = clone.textContent.trim().replace(/\s+/g, ' ');
66
+ if (!text)
67
+ return null;
68
+ const descMatch = /(Use |Try |Learn |Get ).*$/.exec(text);
69
+ if (descMatch && descMatch[0].length > 10)
70
+ return descMatch[0];
71
+ return null;
72
+ }
73
+ /**
74
+ * Create a list item with link and optional description
75
+ * Formats as markdown-style link to preserve href for AI parsing
76
+ */
77
+ function createLinkListItem(document, href, title, description) {
78
+ const li = document.createElement('li');
79
+ const link = document.createElement('a');
80
+ link.setAttribute('href', href);
81
+ link.textContent = title;
82
+ li.appendChild(link);
83
+ if (description && description !== title && !title.includes(description)) {
84
+ li.appendChild(document.createTextNode(` - ${description}`));
85
+ }
86
+ return li;
87
+ }
88
+ /**
89
+ * Process custom <card> elements (used by MDX-based docs)
90
+ */
91
+ function processCustomCards(document) {
92
+ const customCards = document.querySelectorAll('card[href], card[title]');
93
+ if (customCards.length === 0)
94
+ return;
95
+ const list = document.createElement('ul');
96
+ list.setAttribute('data-preserved-cards', 'true');
97
+ for (const card of customCards) {
98
+ const href = card.getAttribute('href');
99
+ const title = card.getAttribute('title') ?? card.textContent.trim();
100
+ if (href && title) {
101
+ const desc = card.querySelector('p')?.textContent.trim();
102
+ list.appendChild(createLinkListItem(document, href, title, desc));
103
+ }
104
+ }
105
+ if (list.children.length > 0) {
106
+ const firstCard = customCards[0];
107
+ firstCard?.parentNode?.insertBefore(list, firstCard);
108
+ customCards.forEach((card) => {
109
+ card.remove();
110
+ });
111
+ }
112
+ }
113
+ /**
114
+ * Process CSS grid card containers
115
+ * Optimized to use more specific selectors to reduce iteration overhead
116
+ */
117
+ function processCardGrids(document) {
118
+ // Use querySelectorAll on all divs but filter early with direct child selector
119
+ for (const div of document.querySelectorAll('div')) {
120
+ // Use :scope > a[href] for direct child links only (more efficient than Array.from + filter)
121
+ const childLinks = div.querySelectorAll(':scope > a[href]');
122
+ if (childLinks.length < 2)
123
+ continue;
124
+ const looksLikeCards = Array.from(childLinks).every((link) => {
125
+ const hasStructuredContent = link.querySelector('svg, div, p, span');
126
+ const hasReasonableText = link.textContent.trim().length > 3;
127
+ return hasStructuredContent && hasReasonableText;
128
+ });
129
+ if (!looksLikeCards)
130
+ continue;
131
+ const section = document.createElement('div');
132
+ section.setAttribute('data-preserved-cards', 'true');
133
+ const list = document.createElement('ul');
134
+ for (const link of childLinks) {
135
+ const href = link.getAttribute('href');
136
+ const title = extractCardTitle(link);
137
+ const desc = extractCardDescription(link);
138
+ if (href && title) {
139
+ list.appendChild(createLinkListItem(document, href, title, desc));
140
+ }
141
+ }
142
+ if (list.children.length > 0) {
143
+ section.appendChild(list);
144
+ div.parentNode?.replaceChild(section, div);
145
+ }
146
+ }
147
+ }
148
+ /**
149
+ * Process semantic card containers
150
+ */
151
+ function processSemanticCards(document) {
152
+ const cardSelectors = [
153
+ '[class*="card-group"]',
154
+ '[class*="card-grid"]',
155
+ '[class*="cards"]',
156
+ '[data-cards]',
157
+ '[class*="link-card"]',
158
+ '[class*="feature-card"]',
159
+ ];
160
+ for (const selector of cardSelectors) {
161
+ try {
162
+ for (const container of document.querySelectorAll(selector)) {
163
+ const links = container.querySelectorAll('a[href]');
164
+ if (links.length === 0)
165
+ continue;
166
+ const list = document.createElement('ul');
167
+ list.setAttribute('data-preserved-cards', 'true');
168
+ for (const link of links) {
169
+ const href = link.getAttribute('href');
170
+ const title = extractCardTitle(link);
171
+ if (href && title) {
172
+ list.appendChild(createLinkListItem(document, href, title));
173
+ }
174
+ }
175
+ if (list.children.length > 0) {
176
+ container.parentNode?.replaceChild(list, container);
177
+ }
178
+ }
179
+ }
180
+ catch {
181
+ // Selector might be invalid, skip it
182
+ }
183
+ }
184
+ }
185
+ /**
186
+ * Pre-process HTML to preserve card links that Readability might strip.
187
+ * Converts card-like elements into simple link lists.
188
+ */
189
+ export function preserveCardLinks(document) {
190
+ processCustomCards(document);
191
+ processCardGrids(document);
192
+ processSemanticCards(document);
193
+ }
194
+ //# sourceMappingURL=card-extractor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"card-extractor.js","sourceRoot":"","sources":["../../src/services/card-extractor.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,MAAM,eAAe,GAAG,mDAAmD,CAAC;AAE5E;;GAEG;AACH,SAAS,YAAY,CAAC,OAAgB;IACpC,MAAM,KAAK,GAAG,OAAO,CAAC,SAAS,CAAC,IAAI,CAAY,CAAC;IACjD,KAAK,CAAC,gBAAgB,CAAC,eAAe,CAAC,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,EAAE;QACrD,EAAE,CAAC,MAAM,EAAE,CAAC;IACd,CAAC,CAAC,CAAC;IACH,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,IAAa;IACrC,MAAM,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAEjC,kFAAkF;IAClF,KAAK,MAAM,GAAG,IAAI,KAAK,CAAC,gBAAgB,CAAC,KAAK,CAAC,EAAE,CAAC;QAChD,IAAI,GAAG,CAAC,aAAa,CAAC,KAAK,CAAC;YAAE,SAAS,CAAC,sBAAsB;QAE9D,MAAM,IAAI,GAAG,GAAG,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;QACpC,IACE,IAAI,CAAC,MAAM,GAAG,CAAC;YACf,IAAI,CAAC,MAAM,GAAG,EAAE;YAChB,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;YACxB,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EACtB,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,qCAAqC;IACrC,MAAM,OAAO,GAAG,KAAK,CAAC,aAAa,CACjC,0CAA0C,CAC3C,CAAC;IACF,IAAI,OAAO,EAAE,CAAC;QACZ,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;QACzC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,GAAG;YAAE,OAAO,KAAK,CAAC;IAC3D,CAAC;IAED,6CAA6C;IAC7C,MAAM,IAAI,GAAG,KAAK,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC3D,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG;QAAE,OAAO,IAAI,CAAC;IAEjE,4DAA4D;IAC5D,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC;IACtD,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC;QAAE,OAAO,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAEzD,MAAM,SAAS,GAAG,IAAI;SACnB,KAAK,CAAC,OAAO,CAAC;SACd,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC;QACjC,EAAE,IAAI,EAAE,CAAC;IACX,OAAO,SAAS,IAAI,IAAI,CAAC;AAC3B,CAAC;AAED;;GAEG;AACH,SAAS,sBAAsB,CAAC,IAAa;IAC3C,MAAM,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAEjC,MAAM,MAAM,GAAG,KAAK,CAAC,aAAa,CAChC,6CAA6C,CAC9C,CAAC;IACF,IAAI,MAAM,EAAE,CAAC;QACX,MAAM,IAAI,GAAG,MAAM,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;QACvC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG;YAAE,OAAO,IAAI,CAAC;IACxD,CAAC;IAED,MAAM,IAAI,GAAG,KAAK,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC3D,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IAEvB,MAAM,SAAS,GAAG,4BAA4B,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1D,IAAI,SAAS,IAAI,SAAS,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,SAAS,CAAC,CAAC,CAAC,CAAC;IAE/D,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;GAGG;AACH,SAAS,kBAAkB,CACzB,QAAkB,EAClB,IAAY,EACZ,KAAa,EACb,WAA2B;IAE3B,MAAM,EAAE,GAAG,QAAQ,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;IACxC,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;IACzC,IAAI,CAAC,YAAY,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IAChC,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;IACzB,EAAE,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;IAErB,IAAI,WAAW,IAAI,WAAW,KAAK,KAAK,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;QACzE,EAAE,CAAC,WAAW,CAAC,QAAQ,CAAC,cAAc,CAAC,MAAM,WAAW,EAAE,CAAC,CAAC,CAAC;IAC/D,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB,CAAC,QAAkB;IAC5C,MAAM,WAAW,GAAG,QAAQ,CAAC,gBAAgB,CAAC,yBAAyB,CAAC,CAAC;IACzE,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO;IAErC,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;IAC1C,IAAI,CAAC,YAAY,CAAC,sBAAsB,EAAE,MAAM,CAAC,CAAC;IAElD,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;QAC/B,MAAM,IAAI,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;QACvC,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;QAEpE,IAAI,IAAI,IAAI,KAAK,EAAE,CAAC;YAClB,MAAM,IAAI,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,EAAE,WAAW,CAAC,IAAI,EAAE,CAAC;YACzD,IAAI,CAAC,WAAW,CAAC,kBAAkB,CAAC,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC;QACpE,CAAC;IACH,CAAC;IAED,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7B,MAAM,SAAS,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QACjC,SAAS,EAAE,UAAU,EAAE,YAAY,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;QACrD,WAAW,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE;YAC3B,IAAI,CAAC,MAAM,EAAE,CAAC;QAChB,CAAC,CAAC,CAAC;IACL,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,SAAS,gBAAgB,CAAC,QAAkB;IAC1C,+EAA+E;IAC/E,KAAK,MAAM,GAAG,IAAI,QAAQ,CAAC,gBAAgB,CAAC,KAAK,CAAC,EAAE,CAAC;QACnD,6FAA6F;QAC7F,MAAM,UAAU,GAAG,GAAG,CAAC,gBAAgB,CAAC,kBAAkB,CAAC,CAAC;QAE5D,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAEpC,MAAM,cAAc,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE;YAC3D,MAAM,oBAAoB,GAAG,IAAI,CAAC,aAAa,CAAC,mBAAmB,CAAC,CAAC;YACrE,MAAM,iBAAiB,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC;YAC7D,OAAO,oBAAoB,IAAI,iBAAiB,CAAC;QACnD,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,cAAc;YAAE,SAAS;QAE9B,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QAC9C,OAAO,CAAC,YAAY,CAAC,sBAAsB,EAAE,MAAM,CAAC,CAAC;QACrD,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;QAE1C,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;YAC9B,MAAM,IAAI,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YACvC,MAAM,KAAK,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;YACrC,MAAM,IAAI,GAAG,sBAAsB,CAAC,IAAI,CAAC,CAAC;YAE1C,IAAI,IAAI,IAAI,KAAK,EAAE,CAAC;gBAClB,IAAI,CAAC,WAAW,CAAC,kBAAkB,CAAC,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC;YACpE,CAAC;QACH,CAAC;QAED,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;YAC1B,GAAG,CAAC,UAAU,EAAE,YAAY,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;QAC7C,CAAC;IACH,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAAC,QAAkB;IAC9C,MAAM,aAAa,GAAG;QACpB,uBAAuB;QACvB,sBAAsB;QACtB,kBAAkB;QAClB,cAAc;QACd,sBAAsB;QACtB,yBAAyB;KAC1B,CAAC;IAEF,KAAK,MAAM,QAAQ,IAAI,aAAa,EAAE,CAAC;QACrC,IAAI,CAAC;YACH,KAAK,MAAM,SAAS,IAAI,QAAQ,CAAC,gBAAgB,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC5D,MAAM,KAAK,GAAG,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC;gBACpD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;oBAAE,SAAS;gBAEjC,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;gBAC1C,IAAI,CAAC,YAAY,CAAC,sBAAsB,EAAE,MAAM,CAAC,CAAC;gBAElD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;oBACzB,MAAM,IAAI,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;oBACvC,MAAM,KAAK,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;oBAErC,IAAI,IAAI,IAAI,KAAK,EAAE,CAAC;wBAClB,IAAI,CAAC,WAAW,CAAC,kBAAkB,CAAC,QAAQ,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC;oBAC9D,CAAC;gBACH,CAAC;gBAED,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC7B,SAAS,CAAC,UAAU,EAAE,YAAY,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;gBACtD,CAAC;YACH,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,qCAAqC;QACvC,CAAC;IACH,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,iBAAiB,CAAC,QAAkB;IAClD,kBAAkB,CAAC,QAAQ,CAAC,CAAC;IAC7B,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IAC3B,oBAAoB,CAAC,QAAQ,CAAC,CAAC;AACjC,CAAC"}
@@ -1,22 +1,15 @@
1
- import type { ExtractedArticle } from '../types/index.js';
2
- /** Metadata extracted from HTML document (internal) */
3
- interface ExtractedMetadata {
4
- title?: string;
5
- description?: string;
6
- author?: string;
7
- }
8
- /** Combined extraction result (internal) */
9
- interface ExtractionResult {
10
- article: ExtractedArticle | null;
11
- metadata: ExtractedMetadata;
12
- }
1
+ import type { CheerioAPI } from 'cheerio';
2
+ import type { ExtractedMetadata, ExtractionResult } from '../config/types.js';
13
3
  /**
14
- * Extracts both article content and metadata from HTML in a single JSDOM parse.
15
- * This is more efficient than calling extractArticle and extractMetadata separately.
16
- * @param html - HTML string to extract content from
17
- * @param url - URL of the page (used for resolving relative links)
18
- * @returns Extraction result with article and metadata
4
+ * Extract metadata using Cheerio (fast, no full DOM)
5
+ * This avoids JSDOM overhead for simple meta tag extraction
19
6
  */
20
- export declare function extractContent(html: string, url: string): ExtractionResult;
21
- export {};
7
+ export declare function extractMetadataWithCheerio($: CheerioAPI): ExtractedMetadata;
8
+ /**
9
+ * Main extraction function - uses Cheerio for metadata (fast)
10
+ * and lazy-loads JSDOM only when article extraction is needed
11
+ */
12
+ export declare function extractContent(html: string, url: string, options?: {
13
+ extractArticle?: boolean;
14
+ }): ExtractionResult;
22
15
  //# sourceMappingURL=extractor.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"extractor.d.ts","sourceRoot":"","sources":["../../src/services/extractor.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAM1D,uDAAuD;AACvD,UAAU,iBAAiB;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,4CAA4C;AAC5C,UAAU,gBAAgB;IACxB,OAAO,EAAE,gBAAgB,GAAG,IAAI,CAAC;IACjC,QAAQ,EAAE,iBAAiB,CAAC;CAC7B;AA8DD;;;;;;GAMG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,gBAAgB,CAwC1E"}
1
+ {"version":3,"file":"extractor.d.ts","sourceRoot":"","sources":["../../src/services/extractor.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AAK1C,OAAO,KAAK,EAEV,iBAAiB,EACjB,gBAAgB,EACjB,MAAM,oBAAoB,CAAC;AAgB5B;;;GAGG;AACH,wBAAgB,0BAA0B,CAAC,CAAC,EAAE,UAAU,GAAG,iBAAiB,CA4B3E;AAsCD;;;GAGG;AACH,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,GAAE;IAAE,cAAc,CAAC,EAAE,OAAO,CAAA;CAA6B,GAC/D,gBAAgB,CAsClB"}
@@ -1,65 +1,79 @@
1
- import { JSDOM } from 'jsdom';
1
+ import * as cheerio from 'cheerio';
2
+ import { JSDOM, VirtualConsole } from 'jsdom';
2
3
  import { Readability } from '@mozilla/readability';
4
+ import { preserveCardLinks } from './card-extractor.js';
3
5
  import { logError, logWarn } from './logger.js';
4
- // Maximum HTML size to process (10MB)
5
6
  const MAX_HTML_SIZE = 10 * 1024 * 1024;
6
- function getMetaContent(document, selectors) {
7
- for (const selector of selectors) {
8
- const content = document.querySelector(selector)?.getAttribute('content');
9
- if (content)
10
- return content;
11
- }
12
- return undefined;
13
- }
7
+ // Shared VirtualConsole to suppress JSDOM warnings/errors
8
+ const sharedVirtualConsole = new VirtualConsole();
9
+ sharedVirtualConsole.on('error', () => {
10
+ /* suppress JSDOM errors */
11
+ });
12
+ sharedVirtualConsole.on('warn', () => {
13
+ /* suppress JSDOM warnings */
14
+ });
14
15
  /**
15
- * Extracts metadata from a pre-parsed Document
16
+ * Extract metadata using Cheerio (fast, no full DOM)
17
+ * This avoids JSDOM overhead for simple meta tag extraction
16
18
  */
17
- function extractMetadataFromDocument(document) {
18
- const title = getMetaContent(document, [
19
+ export function extractMetadataWithCheerio($) {
20
+ const getMetaContent = (selectors) => {
21
+ for (const selector of selectors) {
22
+ const content = $(selector).attr('content');
23
+ if (content)
24
+ return content;
25
+ }
26
+ return undefined;
27
+ };
28
+ const title = getMetaContent([
19
29
  'meta[property="og:title"]',
20
30
  'meta[name="twitter:title"]',
21
31
  ]) ??
22
- document.querySelector('title')?.textContent ??
23
- undefined;
24
- const description = getMetaContent(document, [
32
+ ($('title').text() || undefined);
33
+ const description = getMetaContent([
25
34
  'meta[property="og:description"]',
26
35
  'meta[name="twitter:description"]',
27
36
  'meta[name="description"]',
28
37
  ]);
29
- const author = getMetaContent(document, [
38
+ const author = getMetaContent([
30
39
  'meta[name="author"]',
31
40
  'meta[property="article:author"]',
32
41
  ]);
33
42
  return { title, description, author };
34
43
  }
35
44
  /**
36
- * Extracts article content from a pre-parsed Document using Readability
45
+ * Extract article content using JSDOM + Readability
46
+ * Only called when extractMainContent is true (lazy loading)
37
47
  */
38
- function extractArticleFromDocument(document) {
39
- // Clone the document since Readability mutates it
40
- const clonedDoc = document.cloneNode(true);
41
- const reader = new Readability(clonedDoc);
42
- const article = reader.parse();
43
- if (!article)
48
+ function extractArticleWithJsdom(html, url) {
49
+ try {
50
+ // Use shared VirtualConsole to reduce per-parse overhead
51
+ const dom = new JSDOM(html, { url, virtualConsole: sharedVirtualConsole });
52
+ const { document } = dom.window;
53
+ preserveCardLinks(document);
54
+ const reader = new Readability(document);
55
+ const article = reader.parse();
56
+ if (!article)
57
+ return null;
58
+ return {
59
+ title: article.title ?? undefined,
60
+ byline: article.byline ?? undefined,
61
+ content: article.content ?? '',
62
+ textContent: article.textContent ?? '',
63
+ excerpt: article.excerpt ?? undefined,
64
+ siteName: article.siteName ?? undefined,
65
+ };
66
+ }
67
+ catch (error) {
68
+ logError('Failed to extract article with JSDOM', error instanceof Error ? error : undefined);
44
69
  return null;
45
- return {
46
- title: article.title ?? undefined,
47
- byline: article.byline ?? undefined,
48
- content: article.content ?? '',
49
- textContent: article.textContent ?? '',
50
- excerpt: article.excerpt ?? undefined,
51
- siteName: article.siteName ?? undefined,
52
- };
70
+ }
53
71
  }
54
72
  /**
55
- * Extracts both article content and metadata from HTML in a single JSDOM parse.
56
- * This is more efficient than calling extractArticle and extractMetadata separately.
57
- * @param html - HTML string to extract content from
58
- * @param url - URL of the page (used for resolving relative links)
59
- * @returns Extraction result with article and metadata
73
+ * Main extraction function - uses Cheerio for metadata (fast)
74
+ * and lazy-loads JSDOM only when article extraction is needed
60
75
  */
61
- export function extractContent(html, url) {
62
- // Input validation
76
+ export function extractContent(html, url, options = { extractArticle: true }) {
63
77
  if (!html || typeof html !== 'string') {
64
78
  logWarn('extractContent called with invalid HTML input');
65
79
  return { article: null, metadata: {} };
@@ -68,7 +82,6 @@ export function extractContent(html, url) {
68
82
  logWarn('extractContent called with invalid URL');
69
83
  return { article: null, metadata: {} };
70
84
  }
71
- // Size validation to prevent memory issues
72
85
  let processedHtml = html;
73
86
  if (html.length > MAX_HTML_SIZE) {
74
87
  logWarn('HTML content exceeds maximum size for extraction, truncating', {
@@ -78,12 +91,13 @@ export function extractContent(html, url) {
78
91
  processedHtml = html.substring(0, MAX_HTML_SIZE);
79
92
  }
80
93
  try {
81
- const dom = new JSDOM(processedHtml, { url });
82
- const document = dom.window.document;
83
- // Extract metadata first (non-destructive)
84
- const metadata = extractMetadataFromDocument(document);
85
- // Extract article (uses cloned document since Readability mutates)
86
- const article = extractArticleFromDocument(document);
94
+ // Fast path: Extract metadata with Cheerio (no full DOM parsing)
95
+ const $ = cheerio.load(processedHtml);
96
+ const metadata = extractMetadataWithCheerio($);
97
+ // Lazy path: Only use JSDOM when article extraction is requested
98
+ const article = options.extractArticle
99
+ ? extractArticleWithJsdom(processedHtml, url)
100
+ : null;
87
101
  return { article, metadata };
88
102
  }
89
103
  catch (error) {
@@ -1 +1 @@
1
- {"version":3,"file":"extractor.js","sourceRoot":"","sources":["../../src/services/extractor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,OAAO,CAAC;AAC9B,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAEnD,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAEhD,sCAAsC;AACtC,MAAM,aAAa,GAAG,EAAE,GAAG,IAAI,GAAG,IAAI,CAAC;AAevC,SAAS,cAAc,CACrB,QAAkB,EAClB,SAAmB;IAEnB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,CAAC;QAC1E,IAAI,OAAO;YAAE,OAAO,OAAO,CAAC;IAC9B,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,2BAA2B,CAAC,QAAkB;IACrD,MAAM,KAAK,GACT,cAAc,CAAC,QAAQ,EAAE;QACvB,2BAA2B;QAC3B,4BAA4B;KAC7B,CAAC;QACF,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,EAAE,WAAW;QAC5C,SAAS,CAAC;IAEZ,MAAM,WAAW,GAAG,cAAc,CAAC,QAAQ,EAAE;QAC3C,iCAAiC;QACjC,kCAAkC;QAClC,0BAA0B;KAC3B,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,cAAc,CAAC,QAAQ,EAAE;QACtC,qBAAqB;QACrB,iCAAiC;KAClC,CAAC,CAAC;IAEH,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE,CAAC;AACxC,CAAC;AAED;;GAEG;AACH,SAAS,0BAA0B,CACjC,QAAkB;IAElB,kDAAkD;IAClD,MAAM,SAAS,GAAG,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAa,CAAC;IACvD,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,SAAS,CAAC,CAAC;IAC1C,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;IAE/B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAE1B,OAAO;QACL,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,SAAS;QACjC,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,SAAS;QACnC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,EAAE;QAC9B,WAAW,EAAE,OAAO,CAAC,WAAW,IAAI,EAAE;QACtC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,SAAS;QACrC,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,SAAS;KACxC,CAAC;AACJ,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,GAAW;IACtD,mBAAmB;IACnB,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtC,OAAO,CAAC,+CAA+C,CAAC,CAAC;QACzD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;IAED,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;QACpC,OAAO,CAAC,wCAAwC,CAAC,CAAC;QAClD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;IAED,2CAA2C;IAC3C,IAAI,aAAa,GAAG,IAAI,CAAC;IACzB,IAAI,IAAI,CAAC,MAAM,GAAG,aAAa,EAAE,CAAC;QAChC,OAAO,CAAC,8DAA8D,EAAE;YACtE,IAAI,EAAE,IAAI,CAAC,MAAM;YACjB,OAAO,EAAE,aAAa;SACvB,CAAC,CAAC;QACH,aAAa,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;IACnD,CAAC;IAED,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,aAAa,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;QAC9C,MAAM,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC;QAErC,2CAA2C;QAC3C,MAAM,QAAQ,GAAG,2BAA2B,CAAC,QAAQ,CAAC,CAAC;QAEvD,mEAAmE;QACnE,MAAM,OAAO,GAAG,0BAA0B,CAAC,QAAQ,CAAC,CAAC;QAErD,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC;IAC/B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,2BAA2B,EAC3B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"extractor.js","sourceRoot":"","sources":["../../src/services/extractor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC,OAAO,EAAE,KAAK,EAAE,cAAc,EAAE,MAAM,OAAO,CAAC;AAE9C,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAQnD,OAAO,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AACxD,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAEhD,MAAM,aAAa,GAAG,EAAE,GAAG,IAAI,GAAG,IAAI,CAAC;AAEvC,0DAA0D;AAC1D,MAAM,oBAAoB,GAAG,IAAI,cAAc,EAAE,CAAC;AAClD,oBAAoB,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;IACpC,2BAA2B;AAC7B,CAAC,CAAC,CAAC;AACH,oBAAoB,CAAC,EAAE,CAAC,MAAM,EAAE,GAAG,EAAE;IACnC,6BAA6B;AAC/B,CAAC,CAAC,CAAC;AAEH;;;GAGG;AACH,MAAM,UAAU,0BAA0B,CAAC,CAAa;IACtD,MAAM,cAAc,GAAG,CAAC,SAAmB,EAAsB,EAAE;QACjE,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YACjC,MAAM,OAAO,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC5C,IAAI,OAAO;gBAAE,OAAO,OAAO,CAAC;QAC9B,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC,CAAC;IAEF,MAAM,KAAK,GACT,cAAc,CAAC;QACb,2BAA2B;QAC3B,4BAA4B;KAC7B,CAAC;QACF,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,IAAI,SAAS,CAAC,CAAC;IAEnC,MAAM,WAAW,GAAG,cAAc,CAAC;QACjC,iCAAiC;QACjC,kCAAkC;QAClC,0BAA0B;KAC3B,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,cAAc,CAAC;QAC5B,qBAAqB;QACrB,iCAAiC;KAClC,CAAC,CAAC;IAEH,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE,CAAC;AACxC,CAAC;AAED;;;GAGG;AACH,SAAS,uBAAuB,CAC9B,IAAY,EACZ,GAAW;IAEX,IAAI,CAAC;QACH,yDAAyD;QACzD,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,GAAG,EAAE,cAAc,EAAE,oBAAoB,EAAE,CAAC,CAAC;QAC3E,MAAM,EAAE,QAAQ,EAAE,GAAG,GAAG,CAAC,MAAM,CAAC;QAEhC,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QAC5B,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,QAAQ,CAAC,CAAC;QACzC,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;QAE/B,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAE1B,OAAO;YACL,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,SAAS;YACjC,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,SAAS;YACnC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,EAAE;YAC9B,WAAW,EAAE,OAAO,CAAC,WAAW,IAAI,EAAE;YACtC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,SAAS;YACrC,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,SAAS;SACxC,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,sCAAsC,EACtC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAC5B,IAAY,EACZ,GAAW,EACX,UAAwC,EAAE,cAAc,EAAE,IAAI,EAAE;IAEhE,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtC,OAAO,CAAC,+CAA+C,CAAC,CAAC;QACzD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;IAED,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;QACpC,OAAO,CAAC,wCAAwC,CAAC,CAAC;QAClD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;IAED,IAAI,aAAa,GAAG,IAAI,CAAC;IACzB,IAAI,IAAI,CAAC,MAAM,GAAG,aAAa,EAAE,CAAC;QAChC,OAAO,CAAC,8DAA8D,EAAE;YACtE,IAAI,EAAE,IAAI,CAAC,MAAM;YACjB,OAAO,EAAE,aAAa;SACvB,CAAC,CAAC;QACH,aAAa,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;IACnD,CAAC;IAED,IAAI,CAAC;QACH,iEAAiE;QACjE,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QACtC,MAAM,QAAQ,GAAG,0BAA0B,CAAC,CAAC,CAAC,CAAC;QAE/C,iEAAiE;QACjE,MAAM,OAAO,GAAG,OAAO,CAAC,cAAc;YACpC,CAAC,CAAC,uBAAuB,CAAC,aAAa,EAAE,GAAG,CAAC;YAC7C,CAAC,CAAC,IAAI,CAAC;QAET,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC;IAC/B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,2BAA2B,EAC3B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;AACH,CAAC"}
@@ -1,13 +1,15 @@
1
- /**
2
- * Destroys HTTP agents and closes all sockets
3
- * Should be called during graceful shutdown
4
- */
1
+ /** Options for fetch operations */
2
+ export interface FetchOptions {
3
+ /** Custom HTTP headers to include in the request */
4
+ customHeaders?: Record<string, string>;
5
+ /** AbortSignal for request cancellation */
6
+ signal?: AbortSignal;
7
+ /** Per-request timeout override in milliseconds */
8
+ timeout?: number;
9
+ }
5
10
  export declare function destroyAgents(): void;
6
- /**
7
- * Fetches URL with exponential backoff retry logic
8
- * @param url - URL to fetch
9
- * @param customHeaders - Optional custom headers
10
- * @param maxRetries - Maximum retry attempts (1-10, defaults to 3)
11
- */
12
- export declare function fetchUrlWithRetry(url: string, customHeaders?: Record<string, string>, maxRetries?: number): Promise<string>;
11
+ export declare function fetchUrlWithRetry(url: string, options?: FetchOptions, maxRetries?: number, skipCache?: boolean): Promise<{
12
+ html: string;
13
+ fromHtmlCache: boolean;
14
+ }>;
13
15
  //# sourceMappingURL=fetcher.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"fetcher.d.ts","sourceRoot":"","sources":["../../src/services/fetcher.ts"],"names":[],"mappings":"AAyCA;;;GAGG;AACH,wBAAgB,aAAa,IAAI,IAAI,CAGpC;AAqID;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,GAAG,EAAE,MAAM,EACX,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EACtC,UAAU,SAAI,GACb,OAAO,CAAC,MAAM,CAAC,CA8BjB"}
1
+ {"version":3,"file":"fetcher.d.ts","sourceRoot":"","sources":["../../src/services/fetcher.ts"],"names":[],"mappings":"AAqBA,mCAAmC;AACnC,MAAM,WAAW,YAAY;IAC3B,oDAAoD;IACpD,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACvC,2CAA2C;IAC3C,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,mDAAmD;IACnD,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AA6DD,wBAAgB,aAAa,IAAI,IAAI,CAGpC;AA4LD,wBAAsB,iBAAiB,CACrC,GAAG,EAAE,MAAM,EACX,OAAO,CAAC,EAAE,YAAY,EACtB,UAAU,SAAI,EACd,SAAS,UAAQ,GAChB,OAAO,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,OAAO,CAAA;CAAE,CAAC,CAoEnD"}