@j0hanz/superfetch 1.0.2 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +345 -57
- package/dist/config/index.d.ts.map +1 -1
- package/dist/config/index.js +6 -10
- package/dist/config/index.js.map +1 -1
- package/dist/config/types.d.ts +256 -0
- package/dist/config/types.d.ts.map +1 -0
- package/dist/config/types.js +2 -0
- package/dist/config/types.js.map +1 -0
- package/dist/errors/app-error.d.ts +6 -20
- package/dist/errors/app-error.d.ts.map +1 -1
- package/dist/errors/app-error.js +7 -18
- package/dist/errors/app-error.js.map +1 -1
- package/dist/index.js +75 -62
- package/dist/index.js.map +1 -1
- package/dist/middleware/error-handler.d.ts +1 -5
- package/dist/middleware/error-handler.d.ts.map +1 -1
- package/dist/middleware/error-handler.js +4 -12
- package/dist/middleware/error-handler.js.map +1 -1
- package/dist/middleware/rate-limiter.d.ts +2 -20
- package/dist/middleware/rate-limiter.d.ts.map +1 -1
- package/dist/middleware/rate-limiter.js +22 -47
- package/dist/middleware/rate-limiter.js.map +1 -1
- package/dist/prompts/index.d.ts +0 -3
- package/dist/prompts/index.d.ts.map +1 -1
- package/dist/prompts/index.js +2 -10
- package/dist/prompts/index.js.map +1 -1
- package/dist/resources/cached-content.d.ts +5 -0
- package/dist/resources/cached-content.d.ts.map +1 -0
- package/dist/resources/cached-content.js +93 -0
- package/dist/resources/cached-content.js.map +1 -0
- package/dist/resources/index.d.ts +0 -3
- package/dist/resources/index.d.ts.map +1 -1
- package/dist/resources/index.js +40 -5
- package/dist/resources/index.js.map +1 -1
- package/dist/server.d.ts +0 -4
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +11 -6
- package/dist/server.js.map +1 -1
- package/dist/services/cache.d.ts +20 -6
- package/dist/services/cache.d.ts.map +1 -1
- package/dist/services/cache.js +128 -20
- package/dist/services/cache.js.map +1 -1
- package/dist/services/card-extractor.d.ts +10 -0
- package/dist/services/card-extractor.d.ts.map +1 -0
- package/dist/services/card-extractor.js +194 -0
- package/dist/services/card-extractor.js.map +1 -0
- package/dist/services/extractor.d.ts +12 -19
- package/dist/services/extractor.d.ts.map +1 -1
- package/dist/services/extractor.js +60 -46
- package/dist/services/extractor.js.map +1 -1
- package/dist/services/fetcher.d.ts +13 -11
- package/dist/services/fetcher.d.ts.map +1 -1
- package/dist/services/fetcher.js +143 -54
- package/dist/services/fetcher.js.map +1 -1
- package/dist/services/logger.d.ts.map +1 -1
- package/dist/services/logger.js +4 -6
- package/dist/services/logger.js.map +1 -1
- package/dist/services/parser.d.ts +1 -6
- package/dist/services/parser.d.ts.map +1 -1
- package/dist/services/parser.js +57 -27
- package/dist/services/parser.js.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.d.ts +6 -18
- package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.js +104 -79
- package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +6 -10
- package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.js +83 -84
- package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.d.ts +6 -12
- package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.js +51 -93
- package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-urls.tool.d.ts +12 -0
- package/dist/tools/handlers/fetch-urls.tool.d.ts.map +1 -0
- package/dist/tools/handlers/fetch-urls.tool.js +184 -0
- package/dist/tools/handlers/fetch-urls.tool.js.map +1 -0
- package/dist/tools/index.d.ts +0 -4
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +145 -15
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/utils/common.d.ts +8 -0
- package/dist/tools/utils/common.d.ts.map +1 -0
- package/dist/tools/utils/common.js +35 -0
- package/dist/tools/utils/common.js.map +1 -0
- package/dist/tools/utils/fetch-pipeline.d.ts +3 -0
- package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -0
- package/dist/tools/utils/fetch-pipeline.js +78 -0
- package/dist/tools/utils/fetch-pipeline.js.map +1 -0
- package/dist/tools/utils/index.d.ts +4 -0
- package/dist/tools/utils/index.d.ts.map +1 -0
- package/dist/tools/utils/index.js +3 -0
- package/dist/tools/utils/index.js.map +1 -0
- package/dist/tools/utils/response-builder.d.ts +3 -0
- package/dist/tools/utils/response-builder.d.ts.map +1 -0
- package/dist/tools/utils/response-builder.js +24 -0
- package/dist/tools/utils/response-builder.js.map +1 -0
- package/dist/transformers/jsonl.transformer.d.ts +1 -1
- package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
- package/dist/transformers/jsonl.transformer.js +2 -1
- package/dist/transformers/jsonl.transformer.js.map +1 -1
- package/dist/transformers/markdown.transformer.d.ts +1 -1
- package/dist/transformers/markdown.transformer.d.ts.map +1 -1
- package/dist/transformers/markdown.transformer.js +99 -5
- package/dist/transformers/markdown.transformer.js.map +1 -1
- package/dist/types/content.types.d.ts +11 -11
- package/dist/types/content.types.d.ts.map +1 -1
- package/dist/types/index.d.ts +1 -2
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js +1 -2
- package/dist/types/index.js.map +1 -1
- package/dist/types/schemas.d.ts +39 -12
- package/dist/types/schemas.d.ts.map +1 -1
- package/dist/utils/concurrency.d.ts +6 -0
- package/dist/utils/concurrency.d.ts.map +1 -0
- package/dist/utils/concurrency.js +38 -0
- package/dist/utils/concurrency.js.map +1 -0
- package/dist/utils/content-cleaner.d.ts +32 -0
- package/dist/utils/content-cleaner.d.ts.map +1 -0
- package/dist/utils/content-cleaner.js +238 -0
- package/dist/utils/content-cleaner.js.map +1 -0
- package/dist/utils/language-detector.d.ts +5 -0
- package/dist/utils/language-detector.d.ts.map +1 -0
- package/dist/utils/language-detector.js +50 -0
- package/dist/utils/language-detector.js.map +1 -0
- package/dist/utils/sanitizer.d.ts +0 -10
- package/dist/utils/sanitizer.d.ts.map +1 -1
- package/dist/utils/sanitizer.js +4 -12
- package/dist/utils/sanitizer.js.map +1 -1
- package/dist/utils/tool-error-handler.d.ts +1 -15
- package/dist/utils/tool-error-handler.d.ts.map +1 -1
- package/dist/utils/tool-error-handler.js +34 -6
- package/dist/utils/tool-error-handler.js.map +1 -1
- package/dist/utils/url-validator.d.ts +0 -8
- package/dist/utils/url-validator.d.ts.map +1 -1
- package/dist/utils/url-validator.js +17 -31
- package/dist/utils/url-validator.js.map +1 -1
- package/package.json +81 -79
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Card link extraction utilities for preserving card-style navigation
|
|
3
|
+
* from documentation sites before Readability strips them.
|
|
4
|
+
*/
|
|
5
|
+
const NOISE_SELECTORS = 'style, svg, [class*="icon"], [aria-hidden="true"]';
|
|
6
|
+
/**
|
|
7
|
+
* Clean element by removing noise (styles, SVGs, icons)
|
|
8
|
+
*/
|
|
9
|
+
function cleanElement(element) {
|
|
10
|
+
const clone = element.cloneNode(true);
|
|
11
|
+
clone.querySelectorAll(NOISE_SELECTORS).forEach((el) => {
|
|
12
|
+
el.remove();
|
|
13
|
+
});
|
|
14
|
+
return clone;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Extract clean title from a card-like link element
|
|
18
|
+
*/
|
|
19
|
+
function extractCardTitle(link) {
|
|
20
|
+
const clone = cleanElement(link);
|
|
21
|
+
// Look for the first div child which typically contains the title in card layouts
|
|
22
|
+
for (const div of clone.querySelectorAll('div')) {
|
|
23
|
+
if (div.querySelector('div'))
|
|
24
|
+
continue; // Skip container divs
|
|
25
|
+
const text = div.textContent.trim();
|
|
26
|
+
if (text.length > 1 &&
|
|
27
|
+
text.length < 50 &&
|
|
28
|
+
!text.includes(' with ') &&
|
|
29
|
+
!text.includes('Use ')) {
|
|
30
|
+
return text;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
// Look for structured title elements
|
|
34
|
+
const titleEl = clone.querySelector('[class*="title"], h2, h3, h4, h5, strong');
|
|
35
|
+
if (titleEl) {
|
|
36
|
+
const title = titleEl.textContent.trim();
|
|
37
|
+
if (title.length > 1 && title.length < 100)
|
|
38
|
+
return title;
|
|
39
|
+
}
|
|
40
|
+
// Fall back to first meaningful text content
|
|
41
|
+
const text = clone.textContent.trim().replace(/\s+/g, ' ');
|
|
42
|
+
if (!text || text.length <= 1 || text.length >= 100)
|
|
43
|
+
return null;
|
|
44
|
+
// Extract title part (first word/phrase before description)
|
|
45
|
+
const words = text.split(/(?=Use |Try |Learn |Get )/);
|
|
46
|
+
if (words.length > 1 && words[0])
|
|
47
|
+
return words[0].trim();
|
|
48
|
+
const firstLine = text
|
|
49
|
+
.split(/[.\n]/)
|
|
50
|
+
.find((s) => s.trim().length > 1)
|
|
51
|
+
?.trim();
|
|
52
|
+
return firstLine ?? text;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Extract description from a card-like link element
|
|
56
|
+
*/
|
|
57
|
+
function extractCardDescription(link) {
|
|
58
|
+
const clone = cleanElement(link);
|
|
59
|
+
const descEl = clone.querySelector('p, [class*="description"], [class*="muted"]');
|
|
60
|
+
if (descEl) {
|
|
61
|
+
const desc = descEl.textContent.trim();
|
|
62
|
+
if (desc.length > 5 && desc.length < 200)
|
|
63
|
+
return desc;
|
|
64
|
+
}
|
|
65
|
+
const text = clone.textContent.trim().replace(/\s+/g, ' ');
|
|
66
|
+
if (!text)
|
|
67
|
+
return null;
|
|
68
|
+
const descMatch = /(Use |Try |Learn |Get ).*$/.exec(text);
|
|
69
|
+
if (descMatch && descMatch[0].length > 10)
|
|
70
|
+
return descMatch[0];
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Create a list item with link and optional description
|
|
75
|
+
* Formats as markdown-style link to preserve href for AI parsing
|
|
76
|
+
*/
|
|
77
|
+
function createLinkListItem(document, href, title, description) {
|
|
78
|
+
const li = document.createElement('li');
|
|
79
|
+
const link = document.createElement('a');
|
|
80
|
+
link.setAttribute('href', href);
|
|
81
|
+
link.textContent = title;
|
|
82
|
+
li.appendChild(link);
|
|
83
|
+
if (description && description !== title && !title.includes(description)) {
|
|
84
|
+
li.appendChild(document.createTextNode(` - ${description}`));
|
|
85
|
+
}
|
|
86
|
+
return li;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Process custom <card> elements (used by MDX-based docs)
|
|
90
|
+
*/
|
|
91
|
+
function processCustomCards(document) {
|
|
92
|
+
const customCards = document.querySelectorAll('card[href], card[title]');
|
|
93
|
+
if (customCards.length === 0)
|
|
94
|
+
return;
|
|
95
|
+
const list = document.createElement('ul');
|
|
96
|
+
list.setAttribute('data-preserved-cards', 'true');
|
|
97
|
+
for (const card of customCards) {
|
|
98
|
+
const href = card.getAttribute('href');
|
|
99
|
+
const title = card.getAttribute('title') ?? card.textContent.trim();
|
|
100
|
+
if (href && title) {
|
|
101
|
+
const desc = card.querySelector('p')?.textContent.trim();
|
|
102
|
+
list.appendChild(createLinkListItem(document, href, title, desc));
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
if (list.children.length > 0) {
|
|
106
|
+
const firstCard = customCards[0];
|
|
107
|
+
firstCard?.parentNode?.insertBefore(list, firstCard);
|
|
108
|
+
customCards.forEach((card) => {
|
|
109
|
+
card.remove();
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Process CSS grid card containers
|
|
115
|
+
* Optimized to use more specific selectors to reduce iteration overhead
|
|
116
|
+
*/
|
|
117
|
+
function processCardGrids(document) {
|
|
118
|
+
// Use querySelectorAll on all divs but filter early with direct child selector
|
|
119
|
+
for (const div of document.querySelectorAll('div')) {
|
|
120
|
+
// Use :scope > a[href] for direct child links only (more efficient than Array.from + filter)
|
|
121
|
+
const childLinks = div.querySelectorAll(':scope > a[href]');
|
|
122
|
+
if (childLinks.length < 2)
|
|
123
|
+
continue;
|
|
124
|
+
const looksLikeCards = Array.from(childLinks).every((link) => {
|
|
125
|
+
const hasStructuredContent = link.querySelector('svg, div, p, span');
|
|
126
|
+
const hasReasonableText = link.textContent.trim().length > 3;
|
|
127
|
+
return hasStructuredContent && hasReasonableText;
|
|
128
|
+
});
|
|
129
|
+
if (!looksLikeCards)
|
|
130
|
+
continue;
|
|
131
|
+
const section = document.createElement('div');
|
|
132
|
+
section.setAttribute('data-preserved-cards', 'true');
|
|
133
|
+
const list = document.createElement('ul');
|
|
134
|
+
for (const link of childLinks) {
|
|
135
|
+
const href = link.getAttribute('href');
|
|
136
|
+
const title = extractCardTitle(link);
|
|
137
|
+
const desc = extractCardDescription(link);
|
|
138
|
+
if (href && title) {
|
|
139
|
+
list.appendChild(createLinkListItem(document, href, title, desc));
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
if (list.children.length > 0) {
|
|
143
|
+
section.appendChild(list);
|
|
144
|
+
div.parentNode?.replaceChild(section, div);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Process semantic card containers
|
|
150
|
+
*/
|
|
151
|
+
function processSemanticCards(document) {
|
|
152
|
+
const cardSelectors = [
|
|
153
|
+
'[class*="card-group"]',
|
|
154
|
+
'[class*="card-grid"]',
|
|
155
|
+
'[class*="cards"]',
|
|
156
|
+
'[data-cards]',
|
|
157
|
+
'[class*="link-card"]',
|
|
158
|
+
'[class*="feature-card"]',
|
|
159
|
+
];
|
|
160
|
+
for (const selector of cardSelectors) {
|
|
161
|
+
try {
|
|
162
|
+
for (const container of document.querySelectorAll(selector)) {
|
|
163
|
+
const links = container.querySelectorAll('a[href]');
|
|
164
|
+
if (links.length === 0)
|
|
165
|
+
continue;
|
|
166
|
+
const list = document.createElement('ul');
|
|
167
|
+
list.setAttribute('data-preserved-cards', 'true');
|
|
168
|
+
for (const link of links) {
|
|
169
|
+
const href = link.getAttribute('href');
|
|
170
|
+
const title = extractCardTitle(link);
|
|
171
|
+
if (href && title) {
|
|
172
|
+
list.appendChild(createLinkListItem(document, href, title));
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
if (list.children.length > 0) {
|
|
176
|
+
container.parentNode?.replaceChild(list, container);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
catch {
|
|
181
|
+
// Selector might be invalid, skip it
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Pre-process HTML to preserve card links that Readability might strip.
|
|
187
|
+
* Converts card-like elements into simple link lists.
|
|
188
|
+
*/
|
|
189
|
+
export function preserveCardLinks(document) {
|
|
190
|
+
processCustomCards(document);
|
|
191
|
+
processCardGrids(document);
|
|
192
|
+
processSemanticCards(document);
|
|
193
|
+
}
|
|
194
|
+
//# sourceMappingURL=card-extractor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"card-extractor.js","sourceRoot":"","sources":["../../src/services/card-extractor.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,MAAM,eAAe,GAAG,mDAAmD,CAAC;AAE5E;;GAEG;AACH,SAAS,YAAY,CAAC,OAAgB;IACpC,MAAM,KAAK,GAAG,OAAO,CAAC,SAAS,CAAC,IAAI,CAAY,CAAC;IACjD,KAAK,CAAC,gBAAgB,CAAC,eAAe,CAAC,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,EAAE;QACrD,EAAE,CAAC,MAAM,EAAE,CAAC;IACd,CAAC,CAAC,CAAC;IACH,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,IAAa;IACrC,MAAM,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAEjC,kFAAkF;IAClF,KAAK,MAAM,GAAG,IAAI,KAAK,CAAC,gBAAgB,CAAC,KAAK,CAAC,EAAE,CAAC;QAChD,IAAI,GAAG,CAAC,aAAa,CAAC,KAAK,CAAC;YAAE,SAAS,CAAC,sBAAsB;QAE9D,MAAM,IAAI,GAAG,GAAG,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;QACpC,IACE,IAAI,CAAC,MAAM,GAAG,CAAC;YACf,IAAI,CAAC,MAAM,GAAG,EAAE;YAChB,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;YACxB,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EACtB,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,qCAAqC;IACrC,MAAM,OAAO,GAAG,KAAK,CAAC,aAAa,CACjC,0CAA0C,CAC3C,CAAC;IACF,IAAI,OAAO,EAAE,CAAC;QACZ,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;QACzC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,GAAG;YAAE,OAAO,KAAK,CAAC;IAC3D,CAAC;IAED,6CAA6C;IAC7C,MAAM,IAAI,GAAG,KAAK,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC3D,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG;QAAE,OAAO,IAAI,CAAC;IAEjE,4DAA4D;IAC5D,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC;IACtD,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC;QAAE,OAAO,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAEzD,MAAM,SAAS,GAAG,IAAI;SACnB,KAAK,CAAC,OAAO,CAAC;SACd,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC;QACjC,EAAE,IAAI,EAAE,CAAC;IACX,OAAO,SAAS,IAAI,IAAI,CAAC;AAC3B,CAAC;AAED;;GAEG;AACH,SAAS,sBAAsB,CAAC,IAAa;IAC3C,MAAM,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAEjC,MAAM,MAAM,GAAG,KAAK,CAAC,aAAa,CAChC,6CAA6C,CAC9C,CAAC;IACF,IAAI,MAAM,EAAE,CAAC;QACX,MAAM,IAAI,GAAG,MAAM,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;QACvC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG;YAAE,OAAO,IAAI,CAAC;IACxD,CAAC;IAED,MAAM,IAAI,GAAG,KAAK,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC3D,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IAEvB,MAAM,SAAS,GAAG,4BAA4B,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1D,IAAI,SAAS,IAAI,SAAS,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,SAAS,CAAC,CAAC,CAAC,CAAC;IAE/D,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;GAGG;AACH,SAAS,kBAAkB,CACzB,QAAkB,EAClB,IAAY,EACZ,KAAa,EACb,WAA2B;IAE3B,MAAM,EAAE,GAAG,QAAQ,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;IACxC,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;IACzC,IAAI,CAAC,YAAY,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IAChC,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;IACzB,EAAE,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;IAErB,IAAI,WAAW,IAAI,WAAW,KAAK,KAAK,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;QACzE,EAAE,CAAC,WAAW,CAAC,QAAQ,CAAC,cAAc,CAAC,MAAM,WAAW,EAAE,CAAC,CAAC,CAAC;IAC/D,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB,CAAC,QAAkB;IAC5C,MAAM,WAAW,GAAG,QAAQ,CAAC,gBAAgB,CAAC,yBAAyB,CAAC,CAAC;IACzE,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO;IAErC,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;IAC1C,IAAI,CAAC,YAAY,CAAC,sBAAsB,EAAE,MAAM,CAAC,CAAC;IAElD,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;QAC/B,MAAM,IAAI,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;QACvC,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;QAEpE,IAAI,IAAI,IAAI,KAAK,EAAE,CAAC;YAClB,MAAM,IAAI,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,EAAE,WAAW,CAAC,IAAI,EAAE,CAAC;YACzD,IAAI,CAAC,WAAW,CAAC,kBAAkB,CAAC,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC;QACpE,CAAC;IACH,CAAC;IAED,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7B,MAAM,SAAS,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QACjC,SAAS,EAAE,UAAU,EAAE,YAAY,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;QACrD,WAAW,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE;YAC3B,IAAI,CAAC,MAAM,EAAE,CAAC;QAChB,CAAC,CAAC,CAAC;IACL,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,SAAS,gBAAgB,CAAC,QAAkB;IAC1C,+EAA+E;IAC/E,KAAK,MAAM,GAAG,IAAI,QAAQ,CAAC,gBAAgB,CAAC,KAAK,CAAC,EAAE,CAAC;QACnD,6FAA6F;QAC7F,MAAM,UAAU,GAAG,GAAG,CAAC,gBAAgB,CAAC,kBAAkB,CAAC,CAAC;QAE5D,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAEpC,MAAM,cAAc,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE;YAC3D,MAAM,oBAAoB,GAAG,IAAI,CAAC,aAAa,CAAC,mBAAmB,CAAC,CAAC;YACrE,MAAM,iBAAiB,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC;YAC7D,OAAO,oBAAoB,IAAI,iBAAiB,CAAC;QACnD,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,cAAc;YAAE,SAAS;QAE9B,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QAC9C,OAAO,CAAC,YAAY,CAAC,sBAAsB,EAAE,MAAM,CAAC,CAAC;QACrD,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;QAE1C,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;YAC9B,MAAM,IAAI,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YACvC,MAAM,KAAK,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;YACrC,MAAM,IAAI,GAAG,sBAAsB,CAAC,IAAI,CAAC,CAAC;YAE1C,IAAI,IAAI,IAAI,KAAK,EAAE,CAAC;gBAClB,IAAI,CAAC,WAAW,CAAC,kBAAkB,CAAC,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC;YACpE,CAAC;QACH,CAAC;QAED,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;YAC1B,GAAG,CAAC,UAAU,EAAE,YAAY,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;QAC7C,CAAC;IACH,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAAC,QAAkB;IAC9C,MAAM,aAAa,GAAG;QACpB,uBAAuB;QACvB,sBAAsB;QACtB,kBAAkB;QAClB,cAAc;QACd,sBAAsB;QACtB,yBAAyB;KAC1B,CAAC;IAEF,KAAK,MAAM,QAAQ,IAAI,aAAa,EAAE,CAAC;QACrC,IAAI,CAAC;YACH,KAAK,MAAM,SAAS,IAAI,QAAQ,CAAC,gBAAgB,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC5D,MAAM,KAAK,GAAG,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC;gBACpD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;oBAAE,SAAS;gBAEjC,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;gBAC1C,IAAI,CAAC,YAAY,CAAC,sBAAsB,EAAE,MAAM,CAAC,CAAC;gBAElD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;oBACzB,MAAM,IAAI,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;oBACvC,MAAM,KAAK,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;oBAErC,IAAI,IAAI,IAAI,KAAK,EAAE,CAAC;wBAClB,IAAI,CAAC,WAAW,CAAC,kBAAkB,CAAC,QAAQ,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC;oBAC9D,CAAC;gBACH,CAAC;gBAED,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC7B,SAAS,CAAC,UAAU,EAAE,YAAY,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;gBACtD,CAAC;YACH,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,qCAAqC;QACvC,CAAC;IACH,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,iBAAiB,CAAC,QAAkB;IAClD,kBAAkB,CAAC,QAAQ,CAAC,CAAC;IAC7B,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IAC3B,oBAAoB,CAAC,QAAQ,CAAC,CAAC;AACjC,CAAC"}
|
|
@@ -1,22 +1,15 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
|
|
3
|
-
interface ExtractedMetadata {
|
|
4
|
-
title?: string;
|
|
5
|
-
description?: string;
|
|
6
|
-
author?: string;
|
|
7
|
-
}
|
|
8
|
-
/** Combined extraction result (internal) */
|
|
9
|
-
interface ExtractionResult {
|
|
10
|
-
article: ExtractedArticle | null;
|
|
11
|
-
metadata: ExtractedMetadata;
|
|
12
|
-
}
|
|
1
|
+
import type { CheerioAPI } from 'cheerio';
|
|
2
|
+
import type { ExtractedMetadata, ExtractionResult } from '../config/types.js';
|
|
13
3
|
/**
|
|
14
|
-
*
|
|
15
|
-
* This
|
|
16
|
-
* @param html - HTML string to extract content from
|
|
17
|
-
* @param url - URL of the page (used for resolving relative links)
|
|
18
|
-
* @returns Extraction result with article and metadata
|
|
4
|
+
* Extract metadata using Cheerio (fast, no full DOM)
|
|
5
|
+
* This avoids JSDOM overhead for simple meta tag extraction
|
|
19
6
|
*/
|
|
20
|
-
export declare function
|
|
21
|
-
|
|
7
|
+
export declare function extractMetadataWithCheerio($: CheerioAPI): ExtractedMetadata;
|
|
8
|
+
/**
|
|
9
|
+
* Main extraction function - uses Cheerio for metadata (fast)
|
|
10
|
+
* and lazy-loads JSDOM only when article extraction is needed
|
|
11
|
+
*/
|
|
12
|
+
export declare function extractContent(html: string, url: string, options?: {
|
|
13
|
+
extractArticle?: boolean;
|
|
14
|
+
}): ExtractionResult;
|
|
22
15
|
//# sourceMappingURL=extractor.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"extractor.d.ts","sourceRoot":"","sources":["../../src/services/extractor.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"extractor.d.ts","sourceRoot":"","sources":["../../src/services/extractor.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AAK1C,OAAO,KAAK,EAEV,iBAAiB,EACjB,gBAAgB,EACjB,MAAM,oBAAoB,CAAC;AAgB5B;;;GAGG;AACH,wBAAgB,0BAA0B,CAAC,CAAC,EAAE,UAAU,GAAG,iBAAiB,CA4B3E;AAsCD;;;GAGG;AACH,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,GAAE;IAAE,cAAc,CAAC,EAAE,OAAO,CAAA;CAA6B,GAC/D,gBAAgB,CAsClB"}
|
|
@@ -1,65 +1,79 @@
|
|
|
1
|
-
import
|
|
1
|
+
import * as cheerio from 'cheerio';
|
|
2
|
+
import { JSDOM, VirtualConsole } from 'jsdom';
|
|
2
3
|
import { Readability } from '@mozilla/readability';
|
|
4
|
+
import { preserveCardLinks } from './card-extractor.js';
|
|
3
5
|
import { logError, logWarn } from './logger.js';
|
|
4
|
-
// Maximum HTML size to process (10MB)
|
|
5
6
|
const MAX_HTML_SIZE = 10 * 1024 * 1024;
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
}
|
|
7
|
+
// Shared VirtualConsole to suppress JSDOM warnings/errors
|
|
8
|
+
const sharedVirtualConsole = new VirtualConsole();
|
|
9
|
+
sharedVirtualConsole.on('error', () => {
|
|
10
|
+
/* suppress JSDOM errors */
|
|
11
|
+
});
|
|
12
|
+
sharedVirtualConsole.on('warn', () => {
|
|
13
|
+
/* suppress JSDOM warnings */
|
|
14
|
+
});
|
|
14
15
|
/**
|
|
15
|
-
*
|
|
16
|
+
* Extract metadata using Cheerio (fast, no full DOM)
|
|
17
|
+
* This avoids JSDOM overhead for simple meta tag extraction
|
|
16
18
|
*/
|
|
17
|
-
function
|
|
18
|
-
const
|
|
19
|
+
export function extractMetadataWithCheerio($) {
|
|
20
|
+
const getMetaContent = (selectors) => {
|
|
21
|
+
for (const selector of selectors) {
|
|
22
|
+
const content = $(selector).attr('content');
|
|
23
|
+
if (content)
|
|
24
|
+
return content;
|
|
25
|
+
}
|
|
26
|
+
return undefined;
|
|
27
|
+
};
|
|
28
|
+
const title = getMetaContent([
|
|
19
29
|
'meta[property="og:title"]',
|
|
20
30
|
'meta[name="twitter:title"]',
|
|
21
31
|
]) ??
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
const description = getMetaContent(document, [
|
|
32
|
+
($('title').text() || undefined);
|
|
33
|
+
const description = getMetaContent([
|
|
25
34
|
'meta[property="og:description"]',
|
|
26
35
|
'meta[name="twitter:description"]',
|
|
27
36
|
'meta[name="description"]',
|
|
28
37
|
]);
|
|
29
|
-
const author = getMetaContent(
|
|
38
|
+
const author = getMetaContent([
|
|
30
39
|
'meta[name="author"]',
|
|
31
40
|
'meta[property="article:author"]',
|
|
32
41
|
]);
|
|
33
42
|
return { title, description, author };
|
|
34
43
|
}
|
|
35
44
|
/**
|
|
36
|
-
*
|
|
45
|
+
* Extract article content using JSDOM + Readability
|
|
46
|
+
* Only called when extractMainContent is true (lazy loading)
|
|
37
47
|
*/
|
|
38
|
-
function
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
48
|
+
function extractArticleWithJsdom(html, url) {
|
|
49
|
+
try {
|
|
50
|
+
// Use shared VirtualConsole to reduce per-parse overhead
|
|
51
|
+
const dom = new JSDOM(html, { url, virtualConsole: sharedVirtualConsole });
|
|
52
|
+
const { document } = dom.window;
|
|
53
|
+
preserveCardLinks(document);
|
|
54
|
+
const reader = new Readability(document);
|
|
55
|
+
const article = reader.parse();
|
|
56
|
+
if (!article)
|
|
57
|
+
return null;
|
|
58
|
+
return {
|
|
59
|
+
title: article.title ?? undefined,
|
|
60
|
+
byline: article.byline ?? undefined,
|
|
61
|
+
content: article.content ?? '',
|
|
62
|
+
textContent: article.textContent ?? '',
|
|
63
|
+
excerpt: article.excerpt ?? undefined,
|
|
64
|
+
siteName: article.siteName ?? undefined,
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
catch (error) {
|
|
68
|
+
logError('Failed to extract article with JSDOM', error instanceof Error ? error : undefined);
|
|
44
69
|
return null;
|
|
45
|
-
|
|
46
|
-
title: article.title ?? undefined,
|
|
47
|
-
byline: article.byline ?? undefined,
|
|
48
|
-
content: article.content ?? '',
|
|
49
|
-
textContent: article.textContent ?? '',
|
|
50
|
-
excerpt: article.excerpt ?? undefined,
|
|
51
|
-
siteName: article.siteName ?? undefined,
|
|
52
|
-
};
|
|
70
|
+
}
|
|
53
71
|
}
|
|
54
72
|
/**
|
|
55
|
-
*
|
|
56
|
-
*
|
|
57
|
-
* @param html - HTML string to extract content from
|
|
58
|
-
* @param url - URL of the page (used for resolving relative links)
|
|
59
|
-
* @returns Extraction result with article and metadata
|
|
73
|
+
* Main extraction function - uses Cheerio for metadata (fast)
|
|
74
|
+
* and lazy-loads JSDOM only when article extraction is needed
|
|
60
75
|
*/
|
|
61
|
-
export function extractContent(html, url) {
|
|
62
|
-
// Input validation
|
|
76
|
+
export function extractContent(html, url, options = { extractArticle: true }) {
|
|
63
77
|
if (!html || typeof html !== 'string') {
|
|
64
78
|
logWarn('extractContent called with invalid HTML input');
|
|
65
79
|
return { article: null, metadata: {} };
|
|
@@ -68,7 +82,6 @@ export function extractContent(html, url) {
|
|
|
68
82
|
logWarn('extractContent called with invalid URL');
|
|
69
83
|
return { article: null, metadata: {} };
|
|
70
84
|
}
|
|
71
|
-
// Size validation to prevent memory issues
|
|
72
85
|
let processedHtml = html;
|
|
73
86
|
if (html.length > MAX_HTML_SIZE) {
|
|
74
87
|
logWarn('HTML content exceeds maximum size for extraction, truncating', {
|
|
@@ -78,12 +91,13 @@ export function extractContent(html, url) {
|
|
|
78
91
|
processedHtml = html.substring(0, MAX_HTML_SIZE);
|
|
79
92
|
}
|
|
80
93
|
try {
|
|
81
|
-
|
|
82
|
-
const
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
94
|
+
// Fast path: Extract metadata with Cheerio (no full DOM parsing)
|
|
95
|
+
const $ = cheerio.load(processedHtml);
|
|
96
|
+
const metadata = extractMetadataWithCheerio($);
|
|
97
|
+
// Lazy path: Only use JSDOM when article extraction is requested
|
|
98
|
+
const article = options.extractArticle
|
|
99
|
+
? extractArticleWithJsdom(processedHtml, url)
|
|
100
|
+
: null;
|
|
87
101
|
return { article, metadata };
|
|
88
102
|
}
|
|
89
103
|
catch (error) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"extractor.js","sourceRoot":"","sources":["../../src/services/extractor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,OAAO,CAAC;
|
|
1
|
+
{"version":3,"file":"extractor.js","sourceRoot":"","sources":["../../src/services/extractor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC,OAAO,EAAE,KAAK,EAAE,cAAc,EAAE,MAAM,OAAO,CAAC;AAE9C,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAQnD,OAAO,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AACxD,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAEhD,MAAM,aAAa,GAAG,EAAE,GAAG,IAAI,GAAG,IAAI,CAAC;AAEvC,0DAA0D;AAC1D,MAAM,oBAAoB,GAAG,IAAI,cAAc,EAAE,CAAC;AAClD,oBAAoB,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;IACpC,2BAA2B;AAC7B,CAAC,CAAC,CAAC;AACH,oBAAoB,CAAC,EAAE,CAAC,MAAM,EAAE,GAAG,EAAE;IACnC,6BAA6B;AAC/B,CAAC,CAAC,CAAC;AAEH;;;GAGG;AACH,MAAM,UAAU,0BAA0B,CAAC,CAAa;IACtD,MAAM,cAAc,GAAG,CAAC,SAAmB,EAAsB,EAAE;QACjE,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YACjC,MAAM,OAAO,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC5C,IAAI,OAAO;gBAAE,OAAO,OAAO,CAAC;QAC9B,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC,CAAC;IAEF,MAAM,KAAK,GACT,cAAc,CAAC;QACb,2BAA2B;QAC3B,4BAA4B;KAC7B,CAAC;QACF,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,IAAI,SAAS,CAAC,CAAC;IAEnC,MAAM,WAAW,GAAG,cAAc,CAAC;QACjC,iCAAiC;QACjC,kCAAkC;QAClC,0BAA0B;KAC3B,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,cAAc,CAAC;QAC5B,qBAAqB;QACrB,iCAAiC;KAClC,CAAC,CAAC;IAEH,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE,CAAC;AACxC,CAAC;AAED;;;GAGG;AACH,SAAS,uBAAuB,CAC9B,IAAY,EACZ,GAAW;IAEX,IAAI,CAAC;QACH,yDAAyD;QACzD,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,GAAG,EAAE,cAAc,EAAE,oBAAoB,EAAE,CAAC,CAAC;QAC3E,MAAM,EAAE,QAAQ,EAAE,GAAG,GAAG,CAAC,MAAM,CAAC;QAEhC,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QAC5B,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,QAAQ,CAAC,CAAC;QACzC,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;QAE/B,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAE1B,OAAO;YACL,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,SAAS;YACjC,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,SAAS;YACnC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,EAAE;YAC9B,WAAW,EAAE,OAAO,CAAC,WAAW,IAAI,EAAE;YACtC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,SAAS;YACrC,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,SAAS;SACxC,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,sCAAsC,EACtC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAC5B,IAAY,EACZ,GAAW,EACX,UAAwC,EAAE,cAAc,EAAE,IAAI,EAAE;IAEhE,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtC,OAAO,CAAC,+CAA+C,CAAC,CAAC;QACzD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;IAED,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;QACpC,OAAO,CAAC,wCAAwC,CAAC,CAAC;QAClD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;IAED,IAAI,aAAa,GAAG,IAAI,CAAC;IACzB,IAAI,IAAI,CAAC,MAAM,GAAG,aAAa,EAAE,CAAC;QAChC,OAAO,CAAC,8DAA8D,EAAE;YACtE,IAAI,EAAE,IAAI,CAAC,MAAM;YACjB,OAAO,EAAE,aAAa;SACvB,CAAC,CAAC;QACH,aAAa,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;IACnD,CAAC;IAED,IAAI,CAAC;QACH,iEAAiE;QACjE,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QACtC,MAAM,QAAQ,GAAG,0BAA0B,CAAC,CAAC,CAAC,CAAC;QAE/C,iEAAiE;QACjE,MAAM,OAAO,GAAG,OAAO,CAAC,cAAc;YACpC,CAAC,CAAC,uBAAuB,CAAC,aAAa,EAAE,GAAG,CAAC;YAC7C,CAAC,CAAC,IAAI,CAAC;QAET,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC;IAC/B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,2BAA2B,EAC3B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;AACH,CAAC"}
|
|
@@ -1,13 +1,15 @@
|
|
|
1
|
-
/**
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
1
|
+
/** Options for fetch operations */
|
|
2
|
+
export interface FetchOptions {
|
|
3
|
+
/** Custom HTTP headers to include in the request */
|
|
4
|
+
customHeaders?: Record<string, string>;
|
|
5
|
+
/** AbortSignal for request cancellation */
|
|
6
|
+
signal?: AbortSignal;
|
|
7
|
+
/** Per-request timeout override in milliseconds */
|
|
8
|
+
timeout?: number;
|
|
9
|
+
}
|
|
5
10
|
export declare function destroyAgents(): void;
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
* @param maxRetries - Maximum retry attempts (1-10, defaults to 3)
|
|
11
|
-
*/
|
|
12
|
-
export declare function fetchUrlWithRetry(url: string, customHeaders?: Record<string, string>, maxRetries?: number): Promise<string>;
|
|
11
|
+
export declare function fetchUrlWithRetry(url: string, options?: FetchOptions, maxRetries?: number, skipCache?: boolean): Promise<{
|
|
12
|
+
html: string;
|
|
13
|
+
fromHtmlCache: boolean;
|
|
14
|
+
}>;
|
|
13
15
|
//# sourceMappingURL=fetcher.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetcher.d.ts","sourceRoot":"","sources":["../../src/services/fetcher.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"fetcher.d.ts","sourceRoot":"","sources":["../../src/services/fetcher.ts"],"names":[],"mappings":"AAqBA,mCAAmC;AACnC,MAAM,WAAW,YAAY;IAC3B,oDAAoD;IACpD,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACvC,2CAA2C;IAC3C,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,mDAAmD;IACnD,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AA6DD,wBAAgB,aAAa,IAAI,IAAI,CAGpC;AA4LD,wBAAsB,iBAAiB,CACrC,GAAG,EAAE,MAAM,EACX,OAAO,CAAC,EAAE,YAAY,EACtB,UAAU,SAAI,EACd,SAAS,UAAQ,GAChB,OAAO,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,OAAO,CAAA;CAAE,CAAC,CAoEnD"}
|