@j0hanz/superfetch 1.0.6 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/README.md +228 -36
  2. package/dist/config/index.d.ts +10 -5
  3. package/dist/config/index.d.ts.map +1 -1
  4. package/dist/config/index.js +73 -19
  5. package/dist/config/index.js.map +1 -1
  6. package/dist/config/types.d.ts +98 -57
  7. package/dist/config/types.d.ts.map +1 -1
  8. package/dist/errors/app-error.d.ts +4 -28
  9. package/dist/errors/app-error.d.ts.map +1 -1
  10. package/dist/errors/app-error.js +10 -51
  11. package/dist/errors/app-error.js.map +1 -1
  12. package/dist/index.js +10 -55
  13. package/dist/index.js.map +1 -1
  14. package/dist/middleware/error-handler.d.ts +2 -2
  15. package/dist/middleware/error-handler.d.ts.map +1 -1
  16. package/dist/middleware/error-handler.js +12 -14
  17. package/dist/middleware/error-handler.js.map +1 -1
  18. package/dist/middleware/rate-limiter.d.ts.map +1 -1
  19. package/dist/middleware/rate-limiter.js +0 -8
  20. package/dist/middleware/rate-limiter.js.map +1 -1
  21. package/dist/parsers/base-html-element-parser.d.ts +43 -0
  22. package/dist/parsers/base-html-element-parser.d.ts.map +1 -0
  23. package/dist/parsers/base-html-element-parser.js +59 -0
  24. package/dist/parsers/base-html-element-parser.js.map +1 -0
  25. package/dist/parsers/heading-element-parser.d.ts +14 -0
  26. package/dist/parsers/heading-element-parser.d.ts.map +1 -0
  27. package/dist/parsers/heading-element-parser.js +26 -0
  28. package/dist/parsers/heading-element-parser.js.map +1 -0
  29. package/dist/parsers/image-element-parser.d.ts +16 -0
  30. package/dist/parsers/image-element-parser.d.ts.map +1 -0
  31. package/dist/parsers/image-element-parser.js +33 -0
  32. package/dist/parsers/image-element-parser.js.map +1 -0
  33. package/dist/parsers/link-element-parser.d.ts +15 -0
  34. package/dist/parsers/link-element-parser.d.ts.map +1 -0
  35. package/dist/parsers/link-element-parser.js +28 -0
  36. package/dist/parsers/link-element-parser.js.map +1 -0
  37. package/dist/parsers/open-graph-parser.d.ts +17 -0
  38. package/dist/parsers/open-graph-parser.d.ts.map +1 -0
  39. package/dist/parsers/open-graph-parser.js +41 -0
  40. package/dist/parsers/open-graph-parser.js.map +1 -0
  41. package/dist/parsers/schema-org-parser.d.ts +17 -0
  42. package/dist/parsers/schema-org-parser.d.ts.map +1 -0
  43. package/dist/parsers/schema-org-parser.js +32 -0
  44. package/dist/parsers/schema-org-parser.js.map +1 -0
  45. package/dist/parsers/standard-meta-parser.d.ts +18 -0
  46. package/dist/parsers/standard-meta-parser.d.ts.map +1 -0
  47. package/dist/parsers/standard-meta-parser.js +32 -0
  48. package/dist/parsers/standard-meta-parser.js.map +1 -0
  49. package/dist/parsers/twitter-card-parser.d.ts +17 -0
  50. package/dist/parsers/twitter-card-parser.d.ts.map +1 -0
  51. package/dist/parsers/twitter-card-parser.js +41 -0
  52. package/dist/parsers/twitter-card-parser.js.map +1 -0
  53. package/dist/resources/cached-content.d.ts +0 -1
  54. package/dist/resources/cached-content.d.ts.map +1 -1
  55. package/dist/resources/cached-content.js +3 -9
  56. package/dist/resources/cached-content.js.map +1 -1
  57. package/dist/resources/index.d.ts.map +1 -1
  58. package/dist/resources/index.js +8 -8
  59. package/dist/resources/index.js.map +1 -1
  60. package/dist/server.d.ts.map +1 -1
  61. package/dist/server.js +10 -10
  62. package/dist/server.js.map +1 -1
  63. package/dist/services/cache.d.ts +0 -28
  64. package/dist/services/cache.d.ts.map +1 -1
  65. package/dist/services/cache.js +10 -173
  66. package/dist/services/cache.js.map +1 -1
  67. package/dist/services/extractor.d.ts +1 -11
  68. package/dist/services/extractor.d.ts.map +1 -1
  69. package/dist/services/extractor.js +86 -84
  70. package/dist/services/extractor.js.map +1 -1
  71. package/dist/services/fetcher.d.ts +2 -13
  72. package/dist/services/fetcher.d.ts.map +1 -1
  73. package/dist/services/fetcher.js +195 -211
  74. package/dist/services/fetcher.js.map +1 -1
  75. package/dist/services/logger.d.ts +5 -4
  76. package/dist/services/logger.d.ts.map +1 -1
  77. package/dist/services/logger.js +27 -42
  78. package/dist/services/logger.js.map +1 -1
  79. package/dist/services/parser.d.ts.map +1 -1
  80. package/dist/services/parser.js +35 -26
  81. package/dist/services/parser.js.map +1 -1
  82. package/dist/services/session-manager.d.ts +18 -0
  83. package/dist/services/session-manager.d.ts.map +1 -0
  84. package/dist/services/session-manager.js +73 -0
  85. package/dist/services/session-manager.js.map +1 -0
  86. package/dist/strategies/exponential-backoff-strategy.d.ts +13 -0
  87. package/dist/strategies/exponential-backoff-strategy.d.ts.map +1 -0
  88. package/dist/strategies/exponential-backoff-strategy.js +32 -0
  89. package/dist/strategies/exponential-backoff-strategy.js.map +1 -0
  90. package/dist/tools/handlers/fetch-links.tool.d.ts +2 -9
  91. package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
  92. package/dist/tools/handlers/fetch-links.tool.js +0 -1
  93. package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
  94. package/dist/tools/handlers/fetch-markdown.tool.d.ts +5 -2
  95. package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
  96. package/dist/tools/handlers/fetch-markdown.tool.js +23 -33
  97. package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
  98. package/dist/tools/handlers/fetch-url.tool.d.ts +2 -9
  99. package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
  100. package/dist/tools/handlers/fetch-url.tool.js +15 -20
  101. package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
  102. package/dist/tools/handlers/fetch-urls.tool.d.ts +2 -9
  103. package/dist/tools/handlers/fetch-urls.tool.d.ts.map +1 -1
  104. package/dist/tools/handlers/fetch-urls.tool.js +124 -105
  105. package/dist/tools/handlers/fetch-urls.tool.js.map +1 -1
  106. package/dist/tools/index.d.ts.map +1 -1
  107. package/dist/tools/index.js +0 -4
  108. package/dist/tools/index.js.map +1 -1
  109. package/dist/tools/utils/common.d.ts +6 -7
  110. package/dist/tools/utils/common.d.ts.map +1 -1
  111. package/dist/tools/utils/common.js +8 -8
  112. package/dist/tools/utils/common.js.map +1 -1
  113. package/dist/tools/utils/fetch-pipeline.d.ts +8 -0
  114. package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -1
  115. package/dist/tools/utils/fetch-pipeline.js +47 -79
  116. package/dist/tools/utils/fetch-pipeline.js.map +1 -1
  117. package/dist/transformers/jsonl.transformer.d.ts +1 -1
  118. package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
  119. package/dist/transformers/jsonl.transformer.js +15 -10
  120. package/dist/transformers/jsonl.transformer.js.map +1 -1
  121. package/dist/transformers/markdown.transformer.d.ts.map +1 -1
  122. package/dist/transformers/markdown.transformer.js +58 -62
  123. package/dist/transformers/markdown.transformer.js.map +1 -1
  124. package/dist/utils/concurrency.d.ts +2 -5
  125. package/dist/utils/concurrency.d.ts.map +1 -1
  126. package/dist/utils/concurrency.js +19 -19
  127. package/dist/utils/concurrency.js.map +1 -1
  128. package/dist/utils/content-cleaner.d.ts +0 -25
  129. package/dist/utils/content-cleaner.d.ts.map +1 -1
  130. package/dist/utils/content-cleaner.js +12 -187
  131. package/dist/utils/content-cleaner.js.map +1 -1
  132. package/dist/utils/html-truncator.d.ts +2 -0
  133. package/dist/utils/html-truncator.d.ts.map +1 -0
  134. package/dist/utils/html-truncator.js +14 -0
  135. package/dist/utils/html-truncator.js.map +1 -0
  136. package/dist/utils/language-detector.d.ts +0 -3
  137. package/dist/utils/language-detector.d.ts.map +1 -1
  138. package/dist/utils/language-detector.js +0 -11
  139. package/dist/utils/language-detector.js.map +1 -1
  140. package/dist/utils/sanitizer.d.ts.map +1 -1
  141. package/dist/utils/sanitizer.js +7 -5
  142. package/dist/utils/sanitizer.js.map +1 -1
  143. package/dist/utils/tool-error-handler.d.ts.map +1 -1
  144. package/dist/utils/tool-error-handler.js +15 -42
  145. package/dist/utils/tool-error-handler.js.map +1 -1
  146. package/dist/utils/url-validator.d.ts +0 -6
  147. package/dist/utils/url-validator.d.ts.map +1 -1
  148. package/dist/utils/url-validator.js +12 -81
  149. package/dist/utils/url-validator.js.map +1 -1
  150. package/package.json +5 -6
@@ -1,78 +1,60 @@
1
- import * as cheerio from 'cheerio';
2
- import { JSDOM, VirtualConsole } from 'jsdom';
1
+ import { JSDOM } from 'jsdom';
3
2
  import { Readability } from '@mozilla/readability';
4
- import { config } from '../config/index.js';
5
- import { preserveCardLinks } from './card-extractor.js';
3
+ import { truncateHtml } from '../utils/html-truncator.js';
6
4
  import { logError, logWarn } from './logger.js';
7
- // Shared VirtualConsole to suppress JSDOM warnings/errors
8
- const sharedVirtualConsole = new VirtualConsole();
9
- sharedVirtualConsole.on('error', () => {
10
- /* suppress JSDOM errors */
11
- });
12
- sharedVirtualConsole.on('warn', () => {
13
- /* suppress JSDOM warnings */
14
- });
15
- /**
16
- * Extract metadata using Cheerio (fast, no full DOM)
17
- * This avoids JSDOM overhead for simple meta tag extraction
18
- */
19
- export function extractMetadataWithCheerio($) {
20
- const getMetaContent = (selectors) => {
21
- for (const selector of selectors) {
22
- const content = $(selector).attr('content');
23
- if (content)
24
- return content;
25
- }
26
- return undefined;
27
- };
28
- const title = getMetaContent([
29
- 'meta[property="og:title"]',
30
- 'meta[name="twitter:title"]',
31
- ]) ??
32
- ($('title').text() || undefined);
33
- const description = getMetaContent([
34
- 'meta[property="og:description"]',
35
- 'meta[name="twitter:description"]',
36
- 'meta[name="description"]',
37
- ]);
38
- const author = getMetaContent([
39
- 'meta[name="author"]',
40
- 'meta[property="article:author"]',
41
- ]);
42
- return { title, description, author };
5
+ function extractOpenGraph(document) {
6
+ const data = {};
7
+ const ogTags = document.querySelectorAll('meta[property^="og:"]');
8
+ for (const tag of ogTags) {
9
+ const property = tag.getAttribute('property');
10
+ const content = tag.getAttribute('content')?.trim();
11
+ if (!property || !content)
12
+ continue;
13
+ const key = property.replace('og:', '');
14
+ if (key === 'title')
15
+ data.title = content;
16
+ else if (key === 'description')
17
+ data.description = content;
18
+ }
19
+ return data;
43
20
  }
44
- /**
45
- * Extract article content using JSDOM + Readability
46
- * Only called when extractMainContent is true (lazy loading)
47
- */
48
- function extractArticleWithJsdom(html, url) {
49
- try {
50
- // Use shared VirtualConsole to reduce per-parse overhead
51
- const dom = new JSDOM(html, { url, virtualConsole: sharedVirtualConsole });
52
- const { document } = dom.window;
53
- preserveCardLinks(document);
54
- const reader = new Readability(document);
55
- const article = reader.parse();
56
- if (!article)
57
- return null;
58
- return {
59
- title: article.title ?? undefined,
60
- byline: article.byline ?? undefined,
61
- content: article.content ?? '',
62
- textContent: article.textContent ?? '',
63
- excerpt: article.excerpt ?? undefined,
64
- siteName: article.siteName ?? undefined,
65
- };
21
+ function extractTwitterCard(document) {
22
+ const data = {};
23
+ const twitterTags = document.querySelectorAll('meta[name^="twitter:"]');
24
+ for (const tag of twitterTags) {
25
+ const name = tag.getAttribute('name');
26
+ const content = tag.getAttribute('content')?.trim();
27
+ if (!name || !content)
28
+ continue;
29
+ const key = name.replace('twitter:', '');
30
+ if (key === 'title')
31
+ data.title = content;
32
+ else if (key === 'description')
33
+ data.description = content;
66
34
  }
67
- catch (error) {
68
- logError('Failed to extract article with JSDOM', error instanceof Error ? error : undefined);
69
- return null;
35
+ return data;
36
+ }
37
+ function extractStandardMeta(document) {
38
+ const data = {};
39
+ const metaTags = document.querySelectorAll('meta[name][content]');
40
+ for (const tag of metaTags) {
41
+ const name = tag.getAttribute('name');
42
+ const content = tag.getAttribute('content')?.trim();
43
+ if (!name || !content)
44
+ continue;
45
+ if (name === 'description')
46
+ data.description = content;
47
+ else if (name === 'author')
48
+ data.author = content;
49
+ }
50
+ if (!data.title) {
51
+ const titleEl = document.querySelector('title');
52
+ if (titleEl?.textContent)
53
+ data.title = titleEl.textContent.trim();
70
54
  }
55
+ return data;
71
56
  }
72
- /**
73
- * Main extraction function - uses Cheerio for metadata (fast)
74
- * and lazy-loads JSDOM only when article extraction is needed
75
- */
57
+ // Main extraction function
76
58
  export function extractContent(html, url, options = { extractArticle: true }) {
77
59
  if (!html || typeof html !== 'string') {
78
60
  logWarn('extractContent called with invalid HTML input');
@@ -82,22 +64,42 @@ export function extractContent(html, url, options = { extractArticle: true }) {
82
64
  logWarn('extractContent called with invalid URL');
83
65
  return { article: null, metadata: {} };
84
66
  }
85
- let processedHtml = html;
86
- if (html.length > config.constants.maxHtmlSize) {
87
- logWarn('HTML content exceeds maximum size for extraction, truncating', {
88
- size: html.length,
89
- maxSize: config.constants.maxHtmlSize,
90
- });
91
- processedHtml = html.substring(0, config.constants.maxHtmlSize);
92
- }
93
67
  try {
94
- // Fast path: Extract metadata with Cheerio (no full DOM parsing)
95
- const $ = cheerio.load(processedHtml);
96
- const metadata = extractMetadataWithCheerio($);
97
- // Lazy path: Only use JSDOM when article extraction is requested
98
- const article = options.extractArticle
99
- ? extractArticleWithJsdom(processedHtml, url)
100
- : null;
68
+ // Truncate HTML to improve performance
69
+ const processedHtml = truncateHtml(html);
70
+ // Parse HTML with JSDOM
71
+ const dom = new JSDOM(processedHtml, { url });
72
+ const { document } = dom.window;
73
+ const ogData = extractOpenGraph(document);
74
+ const twitterData = extractTwitterCard(document);
75
+ const standardData = extractStandardMeta(document);
76
+ const metadata = {
77
+ title: ogData.title ?? twitterData.title ?? standardData.title,
78
+ description: ogData.description ??
79
+ twitterData.description ??
80
+ standardData.description,
81
+ author: standardData.author,
82
+ };
83
+ let article = null;
84
+ if (options.extractArticle) {
85
+ try {
86
+ const reader = new Readability(document);
87
+ const parsed = reader.parse();
88
+ if (parsed) {
89
+ article = {
90
+ title: parsed.title ?? undefined,
91
+ byline: parsed.byline ?? undefined,
92
+ content: parsed.content ?? '',
93
+ textContent: parsed.textContent ?? '',
94
+ excerpt: parsed.excerpt ?? undefined,
95
+ siteName: parsed.siteName ?? undefined,
96
+ };
97
+ }
98
+ }
99
+ catch (error) {
100
+ logError('Failed to extract article with Readability', error instanceof Error ? error : undefined);
101
+ }
102
+ }
101
103
  return { article, metadata };
102
104
  }
103
105
  catch (error) {
@@ -1 +1 @@
1
- {"version":3,"file":"extractor.js","sourceRoot":"","sources":["../../src/services/extractor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC,OAAO,EAAE,KAAK,EAAE,cAAc,EAAE,MAAM,OAAO,CAAC;AAE9C,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAEnD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAO5C,OAAO,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AACxD,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAEhD,0DAA0D;AAC1D,MAAM,oBAAoB,GAAG,IAAI,cAAc,EAAE,CAAC;AAClD,oBAAoB,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;IACpC,2BAA2B;AAC7B,CAAC,CAAC,CAAC;AACH,oBAAoB,CAAC,EAAE,CAAC,MAAM,EAAE,GAAG,EAAE;IACnC,6BAA6B;AAC/B,CAAC,CAAC,CAAC;AAEH;;;GAGG;AACH,MAAM,UAAU,0BAA0B,CAAC,CAAa;IACtD,MAAM,cAAc,GAAG,CAAC,SAAmB,EAAsB,EAAE;QACjE,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YACjC,MAAM,OAAO,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC5C,IAAI,OAAO;gBAAE,OAAO,OAAO,CAAC;QAC9B,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC,CAAC;IAEF,MAAM,KAAK,GACT,cAAc,CAAC;QACb,2BAA2B;QAC3B,4BAA4B;KAC7B,CAAC;QACF,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,IAAI,SAAS,CAAC,CAAC;IAEnC,MAAM,WAAW,GAAG,cAAc,CAAC;QACjC,iCAAiC;QACjC,kCAAkC;QAClC,0BAA0B;KAC3B,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,cAAc,CAAC;QAC5B,qBAAqB;QACrB,iCAAiC;KAClC,CAAC,CAAC;IAEH,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE,CAAC;AACxC,CAAC;AAED;;;GAGG;AACH,SAAS,uBAAuB,CAC9B,IAAY,EACZ,GAAW;IAEX,IAAI,CAAC;QACH,yDAAyD;QACzD,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,GAAG,EAAE,cAAc,EAAE,oBAAoB,EAAE,CAAC,CAAC;QAC3E,MAAM,EAAE,QAAQ,EAAE,GAAG,GAAG,CAAC,MAAM,CAAC;QAEhC,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QAC5B,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,QAAQ,CAAC,CAAC;QACzC,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;QAE/B,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAE1B,OAAO;YACL,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,SAAS;YACjC,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,SAAS;YACnC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,EAAE;YAC9B,WAAW,EAAE,OAAO,CAAC,WAAW,IAAI,EAAE;YACtC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,SAAS;YACrC,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,SAAS;SACxC,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,sCAAsC,EACtC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAC5B,IAAY,EACZ,GAAW,EACX,UAAwC,EAAE,cAAc,EAAE,IAAI,EAAE;IAEhE,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtC,OAAO,CAAC,+CAA+C,CAAC,CAAC;QACzD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;IAED,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;QACpC,OAAO,CAAC,wCAAwC,CAAC,CAAC;QAClD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;IAED,IAAI,aAAa,GAAG,IAAI,CAAC;IACzB,IAAI,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC;QAC/C,OAAO,CAAC,8DAA8D,EAAE;YACtE,IAAI,EAAE,IAAI,CAAC,MAAM;YACjB,OAAO,EAAE,MAAM,CAAC,SAAS,CAAC,WAAW;SACtC,CAAC,CAAC;QACH,aAAa,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,MAAM,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAClE,CAAC;IAED,IAAI,CAAC;QACH,iEAAiE;QACjE,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QACtC,MAAM,QAAQ,GAAG,0BAA0B,CAAC,CAAC,CAAC,CAAC;QAE/C,iEAAiE;QACjE,MAAM,OAAO,GAAG,OAAO,CAAC,cAAc;YACpC,CAAC,CAAC,uBAAuB,CAAC,aAAa,EAAE,GAAG,CAAC;YAC7C,CAAC,CAAC,IAAI,CAAC;QAET,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC;IAC/B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,2BAA2B,EAC3B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"extractor.js","sourceRoot":"","sources":["../../src/services/extractor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,OAAO,CAAC;AAE9B,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAQnD,OAAO,EAAE,YAAY,EAAE,MAAM,4BAA4B,CAAC;AAE1D,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAEhD,SAAS,gBAAgB,CAAC,QAAkB;IAK1C,MAAM,IAAI,GAA6C,EAAE,CAAC;IAC1D,MAAM,MAAM,GAAG,QAAQ,CAAC,gBAAgB,CAAC,uBAAuB,CAAC,CAAC;IAElE,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,GAAG,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC;QAC9C,MAAM,OAAO,GAAG,GAAG,CAAC,YAAY,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,CAAC;QACpD,IAAI,CAAC,QAAQ,IAAI,CAAC,OAAO;YAAE,SAAS;QAEpC,MAAM,GAAG,GAAG,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QACxC,IAAI,GAAG,KAAK,OAAO;YAAE,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC;aACrC,IAAI,GAAG,KAAK,aAAa;YAAE,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC;IAC7D,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,kBAAkB,CAAC,QAAkB;IAI5C,MAAM,IAAI,GAA6C,EAAE,CAAC;IAC1D,MAAM,WAAW,GAAG,QAAQ,CAAC,gBAAgB,CAAC,wBAAwB,CAAC,CAAC;IAExE,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;QAC9B,MAAM,IAAI,GAAG,GAAG,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;QACtC,MAAM,OAAO,GAAG,GAAG,CAAC,YAAY,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,CAAC;QACpD,IAAI,CAAC,IAAI,IAAI,CAAC,OAAO;YAAE,SAAS;QAEhC,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;QACzC,IAAI,GAAG,KAAK,OAAO;YAAE,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC;aACrC,IAAI,GAAG,KAAK,aAAa;YAAE,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC;IAC7D,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,mBAAmB,CAAC,QAAkB;IAK7C,MAAM,IAAI,GAA8D,EAAE,CAAC;IAE3E,MAAM,QAAQ,GAAG,QAAQ,CAAC,gBAAgB,CAAC,qBAAqB,CAAC,CAAC;IAClE,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,GAAG,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;QACtC,MAAM,OAAO,GAAG,GAAG,CAAC,YAAY,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,CAAC;QACpD,IAAI,CAAC,IAAI,IAAI,CAAC,OAAO;YAAE,SAAS;QAEhC,IAAI,IAAI,KAAK,aAAa;YAAE,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC;aAClD,IAAI,IAAI,KAAK,QAAQ;YAAE,IAAI,CAAC,MAAM,GAAG,OAAO,CAAC;IACpD,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;QAChB,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;QAChD,IAAI,OAAO,EAAE,WAAW;YAAE,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;IACpE,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,2BAA2B;AAC3B,MAAM,UAAU,cAAc,CAC5B,IAAY,EACZ,GAAW,EACX,UAAwC,EAAE,cAAc,EAAE,IAAI,EAAE;IAEhE,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtC,OAAO,CAAC,+CAA+C,CAAC,CAAC;QACzD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;IAED,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;QACpC,OAAO,CAAC,wCAAwC,CAAC,CAAC;QAClD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;IAED,IAAI,CAAC;QACH,uCAAuC;QACvC,MAAM,aAAa,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;QACzC,wBAAwB;QACxB,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,aAAa,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;QAC9C,MAAM,EAAE,QAAQ,EAAE,GAAG,GAAG,CAAC,MAAM,CAAC;QAChC,MAAM,MAAM,GAAG,gBAAgB,CAAC,QAAQ,CAAC,CAAC;QAC1C,MAAM,WAAW,GAAG,kBAAkB,CAAC,QAAQ,CAAC,CAAC;QACjD,MAAM,YAAY,GAAG,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QAEnD,MAAM,QAAQ,GAAsB;YAClC,KAAK,EAAE,MAAM,CAAC,KAAK,IAAI,WAAW,CAAC,KAAK,IAAI,YAAY,CAAC,KAAK;YAC9D,WAAW,EACT,MAAM,CAAC,WAAW;gBAClB,WAAW,CAAC,WAAW;gBACvB,YAAY,CAAC,WAAW;YAC1B,MAAM,EAAE,YAAY,CAAC,MAAM;SAC5B,CAAC;QACF,IAAI,OAAO,GAA4B,IAAI,CAAC;QAC5C,IAAI,OAAO,CAAC,cAAc,EAAE,CAAC;YAC3B,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,QAAQ,CAAC,CAAC;gBACzC,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;gBAE9B,IAAI,MAAM,EAAE,CAAC;oBACX,OAAO,GAAG;wBACR,KAAK,EAAE,MAAM,CAAC,KAAK,IAAI,SAAS;wBAChC,MAAM,EAAE,MAAM,CAAC,MAAM,IAAI,SAAS;wBAClC,OAAO,EAAE,MAAM,CAAC,OAAO,IAAI,EAAE;wBAC7B,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,EAAE;wBACrC,OAAO,EAAE,MAAM,CAAC,OAAO,IAAI,SAAS;wBACpC,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,SAAS;qBACvC,CAAC;gBACJ,CAAC;YACH,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,QAAQ,CACN,4CAA4C,EAC5C,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;YACJ,CAAC;QACH,CAAC;QAED,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC;IAC/B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,2BAA2B,EAC3B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;AACH,CAAC"}
@@ -1,15 +1,4 @@
1
- /** Options for fetch operations */
2
- export interface FetchOptions {
3
- /** Custom HTTP headers to include in the request */
4
- customHeaders?: Record<string, string>;
5
- /** AbortSignal for request cancellation */
6
- signal?: AbortSignal;
7
- /** Per-request timeout override in milliseconds */
8
- timeout?: number;
9
- }
1
+ import type { FetchOptions } from '../config/types.js';
10
2
  export declare function destroyAgents(): void;
11
- export declare function fetchUrlWithRetry(url: string, options?: FetchOptions, maxRetries?: number, skipCache?: boolean): Promise<{
12
- html: string;
13
- fromHtmlCache: boolean;
14
- }>;
3
+ export declare function fetchUrlWithRetry(url: string, options?: FetchOptions, maxRetries?: number): Promise<string>;
15
4
  //# sourceMappingURL=fetcher.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"fetcher.d.ts","sourceRoot":"","sources":["../../src/services/fetcher.ts"],"names":[],"mappings":"AAwBA,mCAAmC;AACnC,MAAM,WAAW,YAAY;IAC3B,oDAAoD;IACpD,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACvC,2CAA2C;IAC3C,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,mDAAmD;IACnD,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAwED,wBAAgB,aAAa,IAAI,IAAI,CAGpC;AA+MD,wBAAsB,iBAAiB,CACrC,GAAG,EAAE,MAAM,EACX,OAAO,CAAC,EAAE,YAAY,EACtB,UAAU,SAAI,EACd,SAAS,UAAQ,GAChB,OAAO,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,OAAO,CAAA;CAAE,CAAC,CAoEnD"}
1
+ {"version":3,"file":"fetcher.d.ts","sourceRoot":"","sources":["../../src/services/fetcher.ts"],"names":[],"mappings":"AAiBA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AA2UvD,wBAAgB,aAAa,IAAI,IAAI,CAGpC;AAED,wBAAsB,iBAAiB,CACrC,GAAG,EAAE,MAAM,EACX,OAAO,CAAC,EAAE,YAAY,EACtB,UAAU,SAAI,GACb,OAAO,CAAC,MAAM,CAAC,CAsCjB"}