@staticn0va/wigolo 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. package/LICENSE +74 -0
  2. package/README.md +272 -0
  3. package/dist/cache/db.d.ts +5 -0
  4. package/dist/cache/db.d.ts.map +1 -0
  5. package/dist/cache/db.js +97 -0
  6. package/dist/cache/db.js.map +1 -0
  7. package/dist/cache/store.d.ts +26 -0
  8. package/dist/cache/store.d.ts.map +1 -0
  9. package/dist/cache/store.js +214 -0
  10. package/dist/cache/store.js.map +1 -0
  11. package/dist/cli/daemon.d.ts +2 -0
  12. package/dist/cli/daemon.d.ts.map +1 -0
  13. package/dist/cli/daemon.js +5 -0
  14. package/dist/cli/daemon.js.map +1 -0
  15. package/dist/cli/health.d.ts +2 -0
  16. package/dist/cli/health.d.ts.map +1 -0
  17. package/dist/cli/health.js +5 -0
  18. package/dist/cli/health.js.map +1 -0
  19. package/dist/cli/index.d.ts +7 -0
  20. package/dist/cli/index.d.ts.map +1 -0
  21. package/dist/cli/index.js +9 -0
  22. package/dist/cli/index.js.map +1 -0
  23. package/dist/cli/warmup.d.ts +11 -0
  24. package/dist/cli/warmup.d.ts.map +1 -0
  25. package/dist/cli/warmup.js +107 -0
  26. package/dist/cli/warmup.js.map +1 -0
  27. package/dist/config.d.ts +41 -0
  28. package/dist/config.d.ts.map +1 -0
  29. package/dist/config.js +66 -0
  30. package/dist/config.js.map +1 -0
  31. package/dist/crawl/crawler.d.ts +18 -0
  32. package/dist/crawl/crawler.d.ts.map +1 -0
  33. package/dist/crawl/crawler.js +228 -0
  34. package/dist/crawl/crawler.js.map +1 -0
  35. package/dist/crawl/dedup.d.ts +15 -0
  36. package/dist/crawl/dedup.d.ts.map +1 -0
  37. package/dist/crawl/dedup.js +93 -0
  38. package/dist/crawl/dedup.js.map +1 -0
  39. package/dist/crawl/mapper.d.ts +17 -0
  40. package/dist/crawl/mapper.d.ts.map +1 -0
  41. package/dist/crawl/mapper.js +178 -0
  42. package/dist/crawl/mapper.js.map +1 -0
  43. package/dist/crawl/rate-limiter.d.ts +10 -0
  44. package/dist/crawl/rate-limiter.d.ts.map +1 -0
  45. package/dist/crawl/rate-limiter.js +72 -0
  46. package/dist/crawl/rate-limiter.js.map +1 -0
  47. package/dist/crawl/robots.d.ts +9 -0
  48. package/dist/crawl/robots.d.ts.map +1 -0
  49. package/dist/crawl/robots.js +63 -0
  50. package/dist/crawl/robots.js.map +1 -0
  51. package/dist/crawl/sitemap.d.ts +4 -0
  52. package/dist/crawl/sitemap.d.ts.map +1 -0
  53. package/dist/crawl/sitemap.js +38 -0
  54. package/dist/crawl/sitemap.js.map +1 -0
  55. package/dist/crawl/url-utils.d.ts +3 -0
  56. package/dist/crawl/url-utils.d.ts.map +1 -0
  57. package/dist/crawl/url-utils.js +41 -0
  58. package/dist/crawl/url-utils.js.map +1 -0
  59. package/dist/extraction/defuddle.d.ts +3 -0
  60. package/dist/extraction/defuddle.d.ts.map +1 -0
  61. package/dist/extraction/defuddle.js +26 -0
  62. package/dist/extraction/defuddle.js.map +1 -0
  63. package/dist/extraction/extract.d.ts +5 -0
  64. package/dist/extraction/extract.d.ts.map +1 -0
  65. package/dist/extraction/extract.js +83 -0
  66. package/dist/extraction/extract.js.map +1 -0
  67. package/dist/extraction/jsonld.d.ts +4 -0
  68. package/dist/extraction/jsonld.d.ts.map +1 -0
  69. package/dist/extraction/jsonld.js +64 -0
  70. package/dist/extraction/jsonld.js.map +1 -0
  71. package/dist/extraction/markdown.d.ts +10 -0
  72. package/dist/extraction/markdown.d.ts.map +1 -0
  73. package/dist/extraction/markdown.js +107 -0
  74. package/dist/extraction/markdown.js.map +1 -0
  75. package/dist/extraction/pipeline.d.ts +11 -0
  76. package/dist/extraction/pipeline.d.ts.map +1 -0
  77. package/dist/extraction/pipeline.js +95 -0
  78. package/dist/extraction/pipeline.js.map +1 -0
  79. package/dist/extraction/readability.d.ts +3 -0
  80. package/dist/extraction/readability.d.ts.map +1 -0
  81. package/dist/extraction/readability.js +32 -0
  82. package/dist/extraction/readability.js.map +1 -0
  83. package/dist/extraction/schema.d.ts +7 -0
  84. package/dist/extraction/schema.d.ts.map +1 -0
  85. package/dist/extraction/schema.js +86 -0
  86. package/dist/extraction/schema.js.map +1 -0
  87. package/dist/extraction/site-extractors/docs-generic.d.ts +3 -0
  88. package/dist/extraction/site-extractors/docs-generic.d.ts.map +1 -0
  89. package/dist/extraction/site-extractors/docs-generic.js +104 -0
  90. package/dist/extraction/site-extractors/docs-generic.js.map +1 -0
  91. package/dist/extraction/site-extractors/github.d.ts +3 -0
  92. package/dist/extraction/site-extractors/github.d.ts.map +1 -0
  93. package/dist/extraction/site-extractors/github.js +107 -0
  94. package/dist/extraction/site-extractors/github.js.map +1 -0
  95. package/dist/extraction/site-extractors/mdn.d.ts +3 -0
  96. package/dist/extraction/site-extractors/mdn.d.ts.map +1 -0
  97. package/dist/extraction/site-extractors/mdn.js +58 -0
  98. package/dist/extraction/site-extractors/mdn.js.map +1 -0
  99. package/dist/extraction/site-extractors/stackoverflow.d.ts +3 -0
  100. package/dist/extraction/site-extractors/stackoverflow.d.ts.map +1 -0
  101. package/dist/extraction/site-extractors/stackoverflow.js +88 -0
  102. package/dist/extraction/site-extractors/stackoverflow.js.map +1 -0
  103. package/dist/extraction/trafilatura.d.ts +6 -0
  104. package/dist/extraction/trafilatura.d.ts.map +1 -0
  105. package/dist/extraction/trafilatura.js +105 -0
  106. package/dist/extraction/trafilatura.js.map +1 -0
  107. package/dist/fetch/auth.d.ts +8 -0
  108. package/dist/fetch/auth.d.ts.map +1 -0
  109. package/dist/fetch/auth.js +32 -0
  110. package/dist/fetch/auth.js.map +1 -0
  111. package/dist/fetch/browser-pool.d.ts +28 -0
  112. package/dist/fetch/browser-pool.d.ts.map +1 -0
  113. package/dist/fetch/browser-pool.js +138 -0
  114. package/dist/fetch/browser-pool.js.map +1 -0
  115. package/dist/fetch/content-check.d.ts +2 -0
  116. package/dist/fetch/content-check.d.ts.map +1 -0
  117. package/dist/fetch/content-check.js +62 -0
  118. package/dist/fetch/content-check.js.map +1 -0
  119. package/dist/fetch/http-client.d.ts +15 -0
  120. package/dist/fetch/http-client.d.ts.map +1 -0
  121. package/dist/fetch/http-client.js +146 -0
  122. package/dist/fetch/http-client.js.map +1 -0
  123. package/dist/fetch/router.d.ts +45 -0
  124. package/dist/fetch/router.d.ts.map +1 -0
  125. package/dist/fetch/router.js +89 -0
  126. package/dist/fetch/router.js.map +1 -0
  127. package/dist/index.d.ts +3 -0
  128. package/dist/index.d.ts.map +1 -0
  129. package/dist/index.js +22 -0
  130. package/dist/index.js.map +1 -0
  131. package/dist/logger.d.ts +10 -0
  132. package/dist/logger.d.ts.map +1 -0
  133. package/dist/logger.js +39 -0
  134. package/dist/logger.js.map +1 -0
  135. package/dist/search/dedup.d.ts +10 -0
  136. package/dist/search/dedup.d.ts.map +1 -0
  137. package/dist/search/dedup.js +35 -0
  138. package/dist/search/dedup.js.map +1 -0
  139. package/dist/search/engines/bing.d.ts +7 -0
  140. package/dist/search/engines/bing.d.ts.map +1 -0
  141. package/dist/search/engines/bing.js +48 -0
  142. package/dist/search/engines/bing.js.map +1 -0
  143. package/dist/search/engines/duckduckgo.d.ts +7 -0
  144. package/dist/search/engines/duckduckgo.d.ts.map +1 -0
  145. package/dist/search/engines/duckduckgo.js +50 -0
  146. package/dist/search/engines/duckduckgo.js.map +1 -0
  147. package/dist/search/engines/startpage.d.ts +7 -0
  148. package/dist/search/engines/startpage.d.ts.map +1 -0
  149. package/dist/search/engines/startpage.js +50 -0
  150. package/dist/search/engines/startpage.js.map +1 -0
  151. package/dist/search/filters.d.ts +16 -0
  152. package/dist/search/filters.d.ts.map +1 -0
  153. package/dist/search/filters.js +63 -0
  154. package/dist/search/filters.js.map +1 -0
  155. package/dist/search/flashrank.d.ts +12 -0
  156. package/dist/search/flashrank.d.ts.map +1 -0
  157. package/dist/search/flashrank.js +63 -0
  158. package/dist/search/flashrank.js.map +1 -0
  159. package/dist/search/query.d.ts +2 -0
  160. package/dist/search/query.d.ts.map +1 -0
  161. package/dist/search/query.js +41 -0
  162. package/dist/search/query.js.map +1 -0
  163. package/dist/search/rerank.d.ts +3 -0
  164. package/dist/search/rerank.d.ts.map +1 -0
  165. package/dist/search/rerank.js +40 -0
  166. package/dist/search/rerank.js.map +1 -0
  167. package/dist/search/searxng.d.ts +8 -0
  168. package/dist/search/searxng.d.ts.map +1 -0
  169. package/dist/search/searxng.js +87 -0
  170. package/dist/search/searxng.js.map +1 -0
  171. package/dist/search/validator.d.ts +6 -0
  172. package/dist/search/validator.d.ts.map +1 -0
  173. package/dist/search/validator.js +35 -0
  174. package/dist/search/validator.js.map +1 -0
  175. package/dist/searxng/bootstrap.d.ts +18 -0
  176. package/dist/searxng/bootstrap.d.ts.map +1 -0
  177. package/dist/searxng/bootstrap.js +136 -0
  178. package/dist/searxng/bootstrap.js.map +1 -0
  179. package/dist/searxng/docker.d.ts +9 -0
  180. package/dist/searxng/docker.d.ts.map +1 -0
  181. package/dist/searxng/docker.js +67 -0
  182. package/dist/searxng/docker.js.map +1 -0
  183. package/dist/searxng/process.d.ts +23 -0
  184. package/dist/searxng/process.d.ts.map +1 -0
  185. package/dist/searxng/process.js +188 -0
  186. package/dist/searxng/process.js.map +1 -0
  187. package/dist/server.d.ts +2 -0
  188. package/dist/server.d.ts.map +1 -0
  189. package/dist/server.js +311 -0
  190. package/dist/server.js.map +1 -0
  191. package/dist/tools/cache.d.ts +3 -0
  192. package/dist/tools/cache.d.ts.map +1 -0
  193. package/dist/tools/cache.js +50 -0
  194. package/dist/tools/cache.js.map +1 -0
  195. package/dist/tools/crawl.d.ts +6 -0
  196. package/dist/tools/crawl.d.ts.map +1 -0
  197. package/dist/tools/crawl.js +97 -0
  198. package/dist/tools/crawl.js.map +1 -0
  199. package/dist/tools/extract.d.ts +4 -0
  200. package/dist/tools/extract.d.ts.map +1 -0
  201. package/dist/tools/extract.js +69 -0
  202. package/dist/tools/extract.js.map +1 -0
  203. package/dist/tools/fetch.d.ts +4 -0
  204. package/dist/tools/fetch.d.ts.map +1 -0
  205. package/dist/tools/fetch.js +76 -0
  206. package/dist/tools/fetch.js.map +1 -0
  207. package/dist/tools/search.d.ts +4 -0
  208. package/dist/tools/search.d.ts.map +1 -0
  209. package/dist/tools/search.js +160 -0
  210. package/dist/tools/search.js.map +1 -0
  211. package/dist/types.d.ts +222 -0
  212. package/dist/types.d.ts.map +1 -0
  213. package/dist/types.js +2 -0
  214. package/dist/types.js.map +1 -0
  215. package/package.json +61 -0
@@ -0,0 +1,88 @@
1
+ import { parseHTML } from 'linkedom';
2
+ import TurndownService from 'turndown';
3
+ const turndown = new TurndownService({ headingStyle: 'atx', codeBlockStyle: 'fenced' });
4
+ function parseVotes(el) {
5
+ if (!el)
6
+ return 0;
7
+ const voteEl = el.querySelector('.js-vote-count');
8
+ const val = voteEl?.getAttribute('data-value') ?? voteEl?.textContent?.trim() ?? '0';
9
+ return parseInt(val, 10) || 0;
10
+ }
11
+ function parseAnswers(document) {
12
+ const answerEls = document.querySelectorAll('#answers .answer');
13
+ const answers = [];
14
+ for (const el of Array.from(answerEls)) {
15
+ const accepted = el.classList.contains('accepted-answer');
16
+ const votes = parseVotes(el);
17
+ const bodyEl = el.querySelector('.s-prose, .js-post-body, .post-text');
18
+ const bodyHtml = bodyEl ? bodyEl.innerHTML : '';
19
+ answers.push({ accepted, votes, bodyHtml });
20
+ }
21
+ return answers;
22
+ }
23
+ function buildMarkdown(title, tags, votes, questionHtml, answers) {
24
+ const tagLine = `Tags: ${tags.join(', ')} | Votes: ${votes}`;
25
+ const questionMd = turndown.turndown(questionHtml).trim();
26
+ const sections = [
27
+ `# ${title}`,
28
+ tagLine,
29
+ '',
30
+ questionMd,
31
+ ];
32
+ const accepted = answers.filter((a) => a.accepted);
33
+ const others = answers.filter((a) => !a.accepted).sort((a, b) => b.votes - a.votes);
34
+ const ordered = [...accepted, ...others];
35
+ for (const answer of ordered) {
36
+ const heading = answer.accepted
37
+ ? `## Accepted Answer (Votes: ${answer.votes})`
38
+ : `## Answer (Votes: ${answer.votes})`;
39
+ const bodyMd = turndown.turndown(answer.bodyHtml).trim();
40
+ sections.push('---', '', heading, '', bodyMd);
41
+ }
42
+ return sections.join('\n\n');
43
+ }
44
+ export const stackoverflowExtractor = {
45
+ name: 'stackoverflow',
46
+ canHandle(url) {
47
+ try {
48
+ const hostname = new URL(url).hostname;
49
+ return hostname === 'stackoverflow.com' ||
50
+ hostname.endsWith('.stackoverflow.com') ||
51
+ hostname === 'stackexchange.com' ||
52
+ hostname.endsWith('.stackexchange.com');
53
+ }
54
+ catch {
55
+ return false;
56
+ }
57
+ },
58
+ extract(html, url) {
59
+ if (!html)
60
+ return null;
61
+ const { document } = parseHTML(html);
62
+ const titleEl = document.querySelector('.question-hyperlink');
63
+ if (!titleEl)
64
+ return null;
65
+ const title = titleEl.textContent?.trim() ?? '';
66
+ if (!title)
67
+ return null;
68
+ const questionBodyEl = document.querySelector('#question .s-prose, #question .js-post-body, #question .post-text');
69
+ if (!questionBodyEl)
70
+ return null;
71
+ const questionHtml = questionBodyEl.innerHTML;
72
+ const tagEls = document.querySelectorAll('.js-post-tag-list-wrapper .post-tag, .post-taglist .post-tag');
73
+ const tags = Array.from(tagEls).map((el) => el.textContent?.trim() ?? '').filter(Boolean);
74
+ const questionEl = document.querySelector('#question');
75
+ const votes = parseVotes(questionEl);
76
+ const answers = parseAnswers(document);
77
+ const markdown = buildMarkdown(title, tags, votes, questionHtml, answers);
78
+ return {
79
+ title,
80
+ markdown,
81
+ metadata: {},
82
+ links: [],
83
+ images: [],
84
+ extractor: 'site-specific',
85
+ };
86
+ },
87
+ };
88
+ //# sourceMappingURL=stackoverflow.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"stackoverflow.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/stackoverflow.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;AAQxF,SAAS,UAAU,CAAC,EAAkB;IACpC,IAAI,CAAC,EAAE;QAAE,OAAO,CAAC,CAAC;IAClB,MAAM,MAAM,GAAG,EAAE,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC;IAClD,MAAM,GAAG,GAAG,MAAM,EAAE,YAAY,CAAC,YAAY,CAAC,IAAI,MAAM,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,GAAG,CAAC;IACrF,OAAO,QAAQ,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;AAChC,CAAC;AAED,SAAS,YAAY,CAAC,QAAkB;IACtC,MAAM,SAAS,GAAG,QAAQ,CAAC,gBAAgB,CAAC,kBAAkB,CAAC,CAAC;IAChE,MAAM,OAAO,GAAa,EAAE,CAAC;IAE7B,KAAK,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;QACvC,MAAM,QAAQ,GAAG,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC;QAC1D,MAAM,KAAK,GAAG,UAAU,CAAC,EAAa,CAAC,CAAC;QACxC,MAAM,MAAM,GAAG,EAAE,CAAC,aAAa,CAAC,qCAAqC,CAAC,CAAC;QACvE,MAAM,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAE,MAAkB,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7D,OAAO,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC;IAC9C,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,aAAa,CACpB,KAAa,EACb,IAAc,EACd,KAAa,EACb,YAAoB,EACpB,OAAiB;IAEjB,MAAM,OAAO,GAAG,SAAS,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,KAAK,EAAE,CAAC;IAC7D,MAAM,UAAU,GAAG,QAAQ,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC,IAAI,EAAE,CAAC;IAE1D,MAAM,QAAQ,GAAa;QACzB,KAAK,KAAK,EAAE;QACZ,OAAO;QACP,EAAE;QACF,UAAU;KACX,CAAC;IAEF,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IACnD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IACpF,MAAM,OAAO,GAAG,CAAC,GAAG,QAAQ,EAAE,GAAG,MAAM,CAAC,CAAC;IAEzC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ;YAC7B,CAAC,CAAC,8BAA8B,MAAM,CAAC,KAAK,GAAG;YAC/C,CAAC,CAAC,qBAAqB,MAAM,CAAC,KAAK,GAAG,CAAC;QACzC,MAAM,MAAM,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC;QACzD,QAAQ,CAAC,IAAI,CAAC,KAAK,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,CAAC,CAAC;IAChD,CAAC;IAED,OAAO,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC/B,CAAC;AAED,MAAM,CAAC,MAAM,sBAAsB,GAAc;IAC/C,IAAI,EAAE,eAAe;IAErB,SAAS,CAAC,GAAW;QACnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,OAAO,QAAQ,KAAK,mBAAmB;gBACrC,QAAQ,CAAC,QAAQ,CAAC,oBAAoB,CAAC;gBACvC,QAAQ,KAAK,mBAAmB;gBAChC,QAAQ,CAAC,QAAQ,CAAC,oBAAoB,CAAC,CAAC;QAC5C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,GAAW;QAC/B,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QAEvB,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAErC,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,qBAAqB,CAAC,CAAC;QAC9D,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAE1B,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QAChD,IAAI,CAAC,KAAK;YAAE,OAAO,IAAI,CAAC;QAExB,MAAM,cAAc,GAAG,QAAQ,CAAC,aAAa,CAAC,mEAAmE,CAAC,CAAC;QACnH,IAAI,CAAC,cAAc;YAAE,OAAO,IAAI,CAAC;QAEjC,MAAM,YAAY,GAAI,cAA0B,CAAC,SAAS,CAAC;QAE3D,MAAM,MAAM,GAAG,QAAQ,CAAC,gBAAgB,CAAC,8DAA8D,CAAC,CAAC;QACzG,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAE1F,MAAM,UAAU,GAAG,QAAQ,CAAC,aAAa,CAAC,WAAW,CAAC,CAAC;QACvD,MAAM,KAAK,GAAG,UAAU,CAAC,UAA4B,CAAC,CAAC;QAEvD,MAAM,OAAO,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;QAEvC,MAAM,QAAQ,GAAG,aAAa,CAAC,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;QAE1E,OAAO;YACL,KAAK;YACL,QAAQ;YACR,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,eAAe;SAC3B,CAAC;IACJ,CAAC;CACF,CAAC"}
@@ -0,0 +1,6 @@
1
+ import type { ExtractionResult } from '../types.js';
2
+ export declare function isTrafilaturaAvailable(): Promise<boolean>;
3
+ export declare function resetAvailabilityCache(): void;
4
+ export declare function runPythonWithStdin(script: string, stdin: string, timeoutMs: number): Promise<string>;
5
+ export declare function trafilaturaExtract(html: string, url: string): Promise<ExtractionResult | null>;
6
+ //# sourceMappingURL=trafilatura.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"trafilatura.d.ts","sourceRoot":"","sources":["../../src/extraction/trafilatura.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAoBpD,wBAAsB,sBAAsB,IAAI,OAAO,CAAC,OAAO,CAAC,CAW/D;AAED,wBAAgB,sBAAsB,IAAI,IAAI,CAE7C;AAED,wBAAgB,kBAAkB,CAChC,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,MAAM,EACb,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,MAAM,CAAC,CAsCjB;AAED,wBAAsB,kBAAkB,CACtC,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,GACV,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC,CAuClC"}
@@ -0,0 +1,105 @@
1
+ // src/extraction/trafilatura.ts
2
+ import { spawn, execFile as execFileCb } from 'node:child_process';
3
+ import { promisify } from 'node:util';
4
+ import { createLogger } from '../logger.js';
5
+ const execFileAsync = promisify(execFileCb);
6
+ const log = createLogger('extract');
7
+ const MIN_CONTENT_THRESHOLD = 100;
8
+ const SUBPROCESS_TIMEOUT_MS = 15000;
9
+ const AVAILABILITY_CHECK_TIMEOUT_MS = 5000;
10
+ const TRAFILATURA_SCRIPT = `
11
+ import sys, json
12
+ from trafilatura import extract
13
+ html = sys.stdin.read()
14
+ result = extract(html, output_format='json', include_links=True, include_images=True, favor_precision=True)
15
+ print(result or '{}')
16
+ `.trim();
17
+ let availableCache = null;
18
+ export async function isTrafilaturaAvailable() {
19
+ if (availableCache !== null)
20
+ return availableCache;
21
+ try {
22
+ await execFileAsync('python3', ['-c', 'import trafilatura'], {
23
+ timeout: AVAILABILITY_CHECK_TIMEOUT_MS,
24
+ });
25
+ availableCache = true;
26
+ }
27
+ catch {
28
+ availableCache = false;
29
+ }
30
+ return availableCache;
31
+ }
32
+ export function resetAvailabilityCache() {
33
+ availableCache = null;
34
+ }
35
+ export function runPythonWithStdin(script, stdin, timeoutMs) {
36
+ const procRef = { current: null };
37
+ const procPromise = new Promise((resolve, reject) => {
38
+ const proc = spawn('python3', ['-c', script], { timeout: timeoutMs });
39
+ procRef.current = proc;
40
+ let stdout = '';
41
+ let stderr = '';
42
+ proc.stdout.on('data', (d) => {
43
+ stdout += d.toString();
44
+ });
45
+ proc.stderr.on('data', (d) => {
46
+ stderr += d.toString();
47
+ });
48
+ proc.on('close', (code, signal) => {
49
+ if (signal) {
50
+ reject(new Error(`Python killed by signal ${signal}: ${stderr}`));
51
+ }
52
+ else if (code === 0) {
53
+ resolve(stdout);
54
+ }
55
+ else {
56
+ reject(new Error(`Python exited ${code}: ${stderr}`));
57
+ }
58
+ });
59
+ proc.on('error', reject);
60
+ proc.stdin.write(stdin);
61
+ proc.stdin.end();
62
+ });
63
+ const timeoutPromise = new Promise((_, reject) => setTimeout(() => {
64
+ procRef.current?.kill();
65
+ reject(new Error(`Python timed out after ${timeoutMs}ms`));
66
+ }, timeoutMs));
67
+ return Promise.race([procPromise, timeoutPromise]);
68
+ }
69
+ export async function trafilaturaExtract(html, url) {
70
+ try {
71
+ const stdout = await runPythonWithStdin(TRAFILATURA_SCRIPT, html, SUBPROCESS_TIMEOUT_MS);
72
+ const trimmed = stdout.trim();
73
+ if (!trimmed || trimmed === 'null')
74
+ return null;
75
+ let parsed;
76
+ try {
77
+ parsed = JSON.parse(trimmed);
78
+ }
79
+ catch {
80
+ log.debug('Trafilatura output was not valid JSON', { url });
81
+ return null;
82
+ }
83
+ if (!parsed || typeof parsed !== 'object')
84
+ return null;
85
+ const text = typeof parsed.text === 'string' ? parsed.text : '';
86
+ if (text.length < MIN_CONTENT_THRESHOLD)
87
+ return null;
88
+ return {
89
+ title: typeof parsed.title === 'string' ? parsed.title : '',
90
+ markdown: text,
91
+ metadata: {
92
+ author: typeof parsed.author === 'string' ? parsed.author : undefined,
93
+ date: typeof parsed.date === 'string' ? parsed.date : undefined,
94
+ },
95
+ links: [],
96
+ images: [],
97
+ extractor: 'trafilatura',
98
+ };
99
+ }
100
+ catch (err) {
101
+ log.debug('Trafilatura extraction failed', { url, error: String(err) });
102
+ return null;
103
+ }
104
+ }
105
+ //# sourceMappingURL=trafilatura.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"trafilatura.js","sourceRoot":"","sources":["../../src/extraction/trafilatura.ts"],"names":[],"mappings":"AAAA,gCAAgC;AAChC,OAAO,EAAE,KAAK,EAAE,QAAQ,IAAI,UAAU,EAAE,MAAM,oBAAoB,CAAC;AACnE,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,MAAM,aAAa,GAAG,SAAS,CAAC,UAAU,CAAC,CAAC;AAC5C,MAAM,GAAG,GAAG,YAAY,CAAC,SAAS,CAAC,CAAC;AAEpC,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAClC,MAAM,qBAAqB,GAAG,KAAK,CAAC;AACpC,MAAM,6BAA6B,GAAG,IAAI,CAAC;AAE3C,MAAM,kBAAkB,GAAG;;;;;;CAM1B,CAAC,IAAI,EAAE,CAAC;AAET,IAAI,cAAc,GAAmB,IAAI,CAAC;AAE1C,MAAM,CAAC,KAAK,UAAU,sBAAsB;IAC1C,IAAI,cAAc,KAAK,IAAI;QAAE,OAAO,cAAc,CAAC;IACnD,IAAI,CAAC;QACH,MAAM,aAAa,CAAC,SAAS,EAAE,CAAC,IAAI,EAAE,oBAAoB,CAAC,EAAE;YAC3D,OAAO,EAAE,6BAA6B;SACvC,CAAC,CAAC;QACH,cAAc,GAAG,IAAI,CAAC;IACxB,CAAC;IAAC,MAAM,CAAC;QACP,cAAc,GAAG,KAAK,CAAC;IACzB,CAAC;IACD,OAAO,cAAc,CAAC;AACxB,CAAC;AAED,MAAM,UAAU,sBAAsB;IACpC,cAAc,GAAG,IAAI,CAAC;AACxB,CAAC;AAED,MAAM,UAAU,kBAAkB,CAChC,MAAc,EACd,KAAa,EACb,SAAiB;IAEjB,MAAM,OAAO,GAAG,EAAE,OAAO,EAAE,IAAuC,EAAE,CAAC;IAErE,MAAM,WAAW,GAAG,IAAI,OAAO,CAAS,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QAC1D,MAAM,IAAI,GAAG,KAAK,CAAC,SAAS,EAAE,CAAC,IAAI,EAAE,MAAM,CAAC,EAAE,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC,CAAC;QACtE,OAAO,CAAC,OAAO,GAAG,IAAI,CAAC;QACvB,IAAI,MAAM,GAAG,EAAE,CAAC;QAChB,IAAI,MAAM,GAAG,EAAE,CAAC;QAEhB,IAAI,CAAC,MAAO,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,CAAS,EAAE,EAAE;YACpC,MAAM,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC;QACzB,CAAC,CAAC,CAAC;QACH,IAAI,CAAC,MAAO,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,CAAS,EAAE,EAAE;YACpC,MAAM,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC;QACzB,CAAC,CAAC,CAAC;QACH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE;YAChC,IAAI,MAAM,EAAE,CAAC;gBACX,MAAM,CAAC,IAAI,KAAK,CAAC,2BAA2B,MAAM,KAAK,MAAM,EAAE,CAAC,CAAC,CAAC;YACpE,CAAC;iBAAM,IAAI,IAAI,KAAK,CAAC,EAAE,CAAC;gBACtB,OAAO,CAAC,MAAM,CAAC,CAAC;YAClB,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,IAAI,KAAK,CAAC,iBAAiB,IAAI,KAAK,MAAM,EAAE,CAAC,CAAC,CAAC;YACxD,CAAC;QACH,CAAC,CAAC,CAAC;QACH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAEzB,IAAI,CAAC,KAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACzB,IAAI,CAAC,KAAM,CAAC,GAAG,EAAE,CAAC;IACpB,CAAC,CAAC,CAAC;IAEH,MAAM,cAAc,GAAG,IAAI,OAAO,CAAS,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CACvD,UAAU,CAAC,GAAG,EAAE;QACd,OAAO,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC;QACxB,MAAM,CAAC,IAAI,KAAK,CAAC,0BAA0B,SAAS,IAAI,CAAC,CAAC,CAAC;IAC7D,CAAC,EAAE,SAAS,CAAC,CACd,CAAC;IAEF,OAAO,OAAO,CAAC,IAAI,CAAC,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC,CAAC;AACrD,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,IAAY,EACZ,GAAW;IAEX,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,kBAAkB,CACrC,kBAAkB,EAClB,IAAI,EACJ,qBAAqB,CACtB,CAAC;QAEF,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC;QAC9B,IAAI,CAAC,OAAO,IAAI,OAAO,KAAK,MAAM;YAAE,OAAO,IAAI,CAAC;QAEhD,IAAI,MAA+B,CAAC;QACpC,IAAI,CAAC;YACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAC/B,CAAC;QAAC,MAAM,CAAC;YACP,GAAG,CAAC,KAAK,CAAC,uCAAuC,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;YAC5D,OAAO,IAAI,CAAC;QACd,CAAC;QAED,IAAI,CAAC,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ;YAAE,OAAO,IAAI,CAAC;QAEvD,MAAM,IAAI,GAAG,OAAO,MAAM,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;QAChE,IAAI,IAAI,CAAC,MAAM,GAAG,qBAAqB;YAAE,OAAO,IAAI,CAAC;QAErD,OAAO;YACL,KAAK,EAAE,OAAO,MAAM,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE;YAC3D,QAAQ,EAAE,IAAI;YACd,QAAQ,EAAE;gBACR,MAAM,EAAE,OAAO,MAAM,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS;gBACrE,IAAI,EAAE,OAAO,MAAM,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS;aAChE;YACD,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,aAAa;SACzB,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,KAAK,CAAC,+BAA+B,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACxE,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC"}
@@ -0,0 +1,8 @@
1
+ import type { CDPSession } from '../types.js';
2
+ export interface AuthOptions {
3
+ storageStatePath?: string;
4
+ userDataDir?: string;
5
+ }
6
+ export declare function getAuthOptions(): AuthOptions | null;
7
+ export declare function listSessions(): Promise<CDPSession[]>;
8
+ //# sourceMappingURL=auth.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"auth.d.ts","sourceRoot":"","sources":["../../src/fetch/auth.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAE9C,MAAM,WAAW,WAAW;IAC1B,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,wBAAgB,cAAc,IAAI,WAAW,GAAG,IAAI,CAyBnD;AAED,wBAAsB,YAAY,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC,CAE1D"}
@@ -0,0 +1,32 @@
1
+ import { existsSync, cpSync, mkdtempSync } from 'node:fs';
2
+ import { join } from 'node:path';
3
+ import { tmpdir } from 'node:os';
4
+ import { getConfig } from '../config.js';
5
+ import { createLogger } from '../logger.js';
6
+ export function getAuthOptions() {
7
+ const config = getConfig();
8
+ const logger = createLogger('fetch');
9
+ if (config.authStatePath) {
10
+ if (!existsSync(config.authStatePath)) {
11
+ throw new Error(`Auth state file not found: ${config.authStatePath}`);
12
+ }
13
+ return { storageStatePath: config.authStatePath };
14
+ }
15
+ if (config.chromeProfilePath) {
16
+ const lockFile = join(config.chromeProfilePath, 'SingletonLock');
17
+ if (existsSync(lockFile)) {
18
+ logger.warn('Chrome appears to be running (SingletonLock found) — close Chrome before using its profile', {
19
+ profilePath: config.chromeProfilePath,
20
+ });
21
+ }
22
+ const tempDir = mkdtempSync(join(tmpdir(), 'wigolo-chrome-'));
23
+ cpSync(config.chromeProfilePath, tempDir, { recursive: true });
24
+ logger.debug('copied Chrome profile to temp directory', { from: config.chromeProfilePath, to: tempDir });
25
+ return { userDataDir: tempDir };
26
+ }
27
+ return null;
28
+ }
29
+ export async function listSessions() {
30
+ return [];
31
+ }
32
+ //# sourceMappingURL=auth.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"auth.js","sourceRoot":"","sources":["../../src/fetch/auth.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AAC1D,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAQ5C,MAAM,UAAU,cAAc;IAC5B,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;IAC3B,MAAM,MAAM,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;IAErC,IAAI,MAAM,CAAC,aAAa,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,aAAa,CAAC,EAAE,CAAC;YACtC,MAAM,IAAI,KAAK,CAAC,8BAA8B,MAAM,CAAC,aAAa,EAAE,CAAC,CAAC;QACxE,CAAC;QACD,OAAO,EAAE,gBAAgB,EAAE,MAAM,CAAC,aAAa,EAAE,CAAC;IACpD,CAAC;IAED,IAAI,MAAM,CAAC,iBAAiB,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,iBAAiB,EAAE,eAAe,CAAC,CAAC;QACjE,IAAI,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YACzB,MAAM,CAAC,IAAI,CAAC,4FAA4F,EAAE;gBACxG,WAAW,EAAE,MAAM,CAAC,iBAAiB;aACtC,CAAC,CAAC;QACL,CAAC;QACD,MAAM,OAAO,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,gBAAgB,CAAC,CAAC,CAAC;QAC9D,MAAM,CAAC,MAAM,CAAC,iBAAiB,EAAE,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC/D,MAAM,CAAC,KAAK,CAAC,yCAAyC,EAAE,EAAE,IAAI,EAAE,MAAM,CAAC,iBAAiB,EAAE,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;QACzG,OAAO,EAAE,WAAW,EAAE,OAAO,EAAE,CAAC;IAClC,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY;IAChC,OAAO,EAAE,CAAC;AACZ,CAAC"}
@@ -0,0 +1,28 @@
1
+ import { type BrowserContext } from 'playwright';
2
+ import type { RawFetchResult, BrowserType } from '../types.js';
3
+ export interface BrowserFetchOptions {
4
+ timeoutMs?: number;
5
+ storageStatePath?: string;
6
+ userDataDir?: string;
7
+ headers?: Record<string, string>;
8
+ screenshot?: boolean;
9
+ }
10
+ export interface BrowserPoolOptions {
11
+ browserType?: BrowserType;
12
+ }
13
+ export declare class BrowserPool {
14
+ private browser;
15
+ private pool;
16
+ private activeCount;
17
+ private waitQueue;
18
+ private idleTimers;
19
+ private shutdownCalled;
20
+ private readonly browserType;
21
+ constructor(options?: BrowserPoolOptions);
22
+ private launchBrowser;
23
+ acquire(): Promise<BrowserContext>;
24
+ release(ctx: BrowserContext): void;
25
+ fetchWithBrowser(url: string, options?: BrowserFetchOptions): Promise<RawFetchResult>;
26
+ shutdown(): Promise<void>;
27
+ }
28
+ //# sourceMappingURL=browser-pool.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"browser-pool.d.ts","sourceRoot":"","sources":["../../src/fetch/browser-pool.ts"],"names":[],"mappings":"AAAA,OAAO,EAA2C,KAAK,cAAc,EAAE,MAAM,YAAY,CAAC;AAG1F,OAAO,KAAK,EAAE,cAAc,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAE/D,MAAM,WAAW,mBAAmB;IAClC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,UAAU,CAAC,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,WAAW,kBAAkB;IACjC,WAAW,CAAC,EAAE,WAAW,CAAC;CAC3B;AAED,qBAAa,WAAW;IACtB,OAAO,CAAC,OAAO,CAAwB;IACvC,OAAO,CAAC,IAAI,CAAwB;IACpC,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,SAAS,CAA4C;IAC7D,OAAO,CAAC,UAAU,CAA4D;IAC9E,OAAO,CAAC,cAAc,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAc;gBAE9B,OAAO,CAAC,EAAE,kBAAkB;YAI1B,aAAa;IAUrB,OAAO,IAAI,OAAO,CAAC,cAAc,CAAC;IAyBxC,OAAO,CAAC,GAAG,EAAE,cAAc,GAAG,IAAI;IAyB5B,gBAAgB,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,mBAAwB,GAAG,OAAO,CAAC,cAAc,CAAC;IA+DzF,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC;CAoBhC"}
@@ -0,0 +1,138 @@
1
+ import { chromium, firefox, webkit } from 'playwright';
2
+ import { getConfig } from '../config.js';
3
+ import { createLogger } from '../logger.js';
4
+ export class BrowserPool {
5
+ browser = null;
6
+ pool = [];
7
+ activeCount = 0;
8
+ waitQueue = [];
9
+ idleTimers = new Map();
10
+ shutdownCalled = false;
11
+ browserType;
12
+ constructor(options) {
13
+ this.browserType = options?.browserType ?? 'chromium';
14
+ }
15
+ async launchBrowser() {
16
+ if (!this.browser) {
17
+ const launcher = this.browserType === 'firefox' ? firefox
18
+ : this.browserType === 'webkit' ? webkit
19
+ : chromium;
20
+ this.browser = await launcher.launch({ headless: true });
21
+ }
22
+ return this.browser;
23
+ }
24
+ async acquire() {
25
+ const config = getConfig();
26
+ const maxBrowsers = config.maxBrowsers;
27
+ if (this.pool.length > 0) {
28
+ const ctx = this.pool.pop();
29
+ const timer = this.idleTimers.get(ctx);
30
+ if (timer !== undefined) {
31
+ clearTimeout(timer);
32
+ this.idleTimers.delete(ctx);
33
+ }
34
+ return ctx;
35
+ }
36
+ if (this.activeCount < maxBrowsers) {
37
+ this.activeCount++;
38
+ const browser = await this.launchBrowser();
39
+ return browser.newContext();
40
+ }
41
+ return new Promise((resolve) => {
42
+ this.waitQueue.push(resolve);
43
+ });
44
+ }
45
+ release(ctx) {
46
+ const config = getConfig();
47
+ const idleTimeoutMs = config.browserIdleTimeoutMs;
48
+ if (this.waitQueue.length > 0) {
49
+ const resolve = this.waitQueue.shift();
50
+ resolve(ctx);
51
+ return;
52
+ }
53
+ this.pool.push(ctx);
54
+ const timer = setTimeout(() => {
55
+ const idx = this.pool.indexOf(ctx);
56
+ if (idx !== -1) {
57
+ this.pool.splice(idx, 1);
58
+ this.idleTimers.delete(ctx);
59
+ this.activeCount = Math.max(0, this.activeCount - 1);
60
+ ctx.close().catch(() => { });
61
+ }
62
+ }, idleTimeoutMs);
63
+ this.idleTimers.set(ctx, timer);
64
+ }
65
+ async fetchWithBrowser(url, options = {}) {
66
+ const config = getConfig();
67
+ const logger = createLogger('fetch');
68
+ const navTimeoutMs = options.timeoutMs ?? config.playwrightNavTimeoutMs;
69
+ const loadTimeoutMs = config.playwrightLoadTimeoutMs;
70
+ const ctx = await this.acquire();
71
+ const page = await ctx.newPage();
72
+ if (options.headers) {
73
+ await page.setExtraHTTPHeaders(options.headers);
74
+ }
75
+ let statusCode = 200;
76
+ let contentType = '';
77
+ let responseHeaders = {};
78
+ let finalUrl = url;
79
+ try {
80
+ const response = await page.goto(url, {
81
+ timeout: navTimeoutMs,
82
+ waitUntil: 'domcontentloaded',
83
+ });
84
+ if (response) {
85
+ statusCode = response.status();
86
+ finalUrl = response.url();
87
+ const rawHeaders = response.headers();
88
+ responseHeaders = rawHeaders;
89
+ contentType = rawHeaders['content-type'] ?? '';
90
+ }
91
+ try {
92
+ await page.waitForLoadState('networkidle', { timeout: loadTimeoutMs });
93
+ }
94
+ catch {
95
+ // networkidle timeout is non-fatal — page content is still usable
96
+ logger.debug('networkidle timeout, using page content as-is', { url });
97
+ }
98
+ const html = await page.content();
99
+ let screenshotBase64;
100
+ if (options.screenshot) {
101
+ const buf = await page.screenshot({ fullPage: true });
102
+ screenshotBase64 = buf.toString('base64');
103
+ }
104
+ return {
105
+ url,
106
+ finalUrl,
107
+ html,
108
+ contentType,
109
+ statusCode,
110
+ method: 'playwright',
111
+ headers: responseHeaders,
112
+ screenshot: screenshotBase64,
113
+ };
114
+ }
115
+ finally {
116
+ await page.close();
117
+ this.release(ctx);
118
+ }
119
+ }
120
+ async shutdown() {
121
+ if (this.shutdownCalled)
122
+ return;
123
+ this.shutdownCalled = true;
124
+ for (const [, timer] of this.idleTimers) {
125
+ clearTimeout(timer);
126
+ }
127
+ this.idleTimers.clear();
128
+ const closePromises = this.pool.map((ctx) => ctx.close().catch(() => { }));
129
+ this.pool = [];
130
+ await Promise.all(closePromises);
131
+ if (this.browser) {
132
+ await this.browser.close().catch(() => { });
133
+ this.browser = null;
134
+ }
135
+ this.activeCount = 0;
136
+ }
137
+ }
138
+ //# sourceMappingURL=browser-pool.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"browser-pool.js","sourceRoot":"","sources":["../../src/fetch/browser-pool.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,EAAqC,MAAM,YAAY,CAAC;AAC1F,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAe5C,MAAM,OAAO,WAAW;IACd,OAAO,GAAmB,IAAI,CAAC;IAC/B,IAAI,GAAqB,EAAE,CAAC;IAC5B,WAAW,GAAG,CAAC,CAAC;IAChB,SAAS,GAAyC,EAAE,CAAC;IACrD,UAAU,GAAG,IAAI,GAAG,EAAiD,CAAC;IACtE,cAAc,GAAG,KAAK,CAAC;IACd,WAAW,CAAc;IAE1C,YAAY,OAA4B;QACtC,IAAI,CAAC,WAAW,GAAG,OAAO,EAAE,WAAW,IAAI,UAAU,CAAC;IACxD,CAAC;IAEO,KAAK,CAAC,aAAa;QACzB,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YAClB,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,KAAK,SAAS,CAAC,CAAC,CAAC,OAAO;gBACvD,CAAC,CAAC,IAAI,CAAC,WAAW,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM;oBACxC,CAAC,CAAC,QAAQ,CAAC;YACb,IAAI,CAAC,OAAO,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;QAC3D,CAAC;QACD,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;IAED,KAAK,CAAC,OAAO;QACX,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;QAC3B,MAAM,WAAW,GAAG,MAAM,CAAC,WAAW,CAAC;QAEvC,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAG,CAAC;YAC7B,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACvC,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;gBACxB,YAAY,CAAC,KAAK,CAAC,CAAC;gBACpB,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC9B,CAAC;YACD,OAAO,GAAG,CAAC;QACb,CAAC;QAED,IAAI,IAAI,CAAC,WAAW,GAAG,WAAW,EAAE,CAAC;YACnC,IAAI,CAAC,WAAW,EAAE,CAAC;YACnB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC;YAC3C,OAAO,OAAO,CAAC,UAAU,EAAE,CAAC;QAC9B,CAAC;QAED,OAAO,IAAI,OAAO,CAAiB,CAAC,OAAO,EAAE,EAAE;YAC7C,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC/B,CAAC,CAAC,CAAC;IACL,CAAC;IAED,OAAO,CAAC,GAAmB;QACzB,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;QAC3B,MAAM,aAAa,GAAG,MAAM,CAAC,oBAAoB,CAAC;QAElD,IAAI,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9B,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,EAAG,CAAC;YACxC,OAAO,CAAC,GAAG,CAAC,CAAC;YACb,OAAO;QACT,CAAC;QAED,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAEpB,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE;YAC5B,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YACnC,IAAI,GAAG,KAAK,CAAC,CAAC,EAAE,CAAC;gBACf,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;gBACzB,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;gBAC5B,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC;gBACrD,GAAG,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;YAC9B,CAAC;QACH,CAAC,EAAE,aAAa,CAAC,CAAC;QAElB,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IAClC,CAAC;IAED,KAAK,CAAC,gBAAgB,CAAC,GAAW,EAAE,UAA+B,EAAE;QACnE,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;QAC3B,MAAM,MAAM,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;QACrC,MAAM,YAAY,GAAG,OAAO,CAAC,SAAS,IAAI,MAAM,CAAC,sBAAsB,CAAC;QACxE,MAAM,aAAa,GAAG,MAAM,CAAC,uBAAuB,CAAC;QAErD,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;QACjC,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,OAAO,EAAE,CAAC;QAEjC,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;YACpB,MAAM,IAAI,CAAC,mBAAmB,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAClD,CAAC;QAED,IAAI,UAAU,GAAG,GAAG,CAAC;QACrB,IAAI,WAAW,GAAG,EAAE,CAAC;QACrB,IAAI,eAAe,GAA2B,EAAE,CAAC;QACjD,IAAI,QAAQ,GAAG,GAAG,CAAC;QAEnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE;gBACpC,OAAO,EAAE,YAAY;gBACrB,SAAS,EAAE,kBAAkB;aAC9B,CAAC,CAAC;YAEH,IAAI,QAAQ,EAAE,CAAC;gBACb,UAAU,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;gBAC/B,QAAQ,GAAG,QAAQ,CAAC,GAAG,EAAE,CAAC;gBAC1B,MAAM,UAAU,GAAG,QAAQ,CAAC,OAAO,EAAE,CAAC;gBACtC,eAAe,GAAG,UAAU,CAAC;gBAC7B,WAAW,GAAG,UAAU,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;YACjD,CAAC;YAED,IAAI,CAAC;gBACH,MAAM,IAAI,CAAC,gBAAgB,CAAC,aAAa,EAAE,EAAE,OAAO,EAAE,aAAa,EAAE,CAAC,CAAC;YACzE,CAAC;YAAC,MAAM,CAAC;gBACP,kEAAkE;gBAClE,MAAM,CAAC,KAAK,CAAC,+CAA+C,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;YACzE,CAAC;YAED,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;YAElC,IAAI,gBAAoC,CAAC;YACzC,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;gBACvB,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;gBACtD,gBAAgB,GAAG,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;YAC5C,CAAC;YAED,OAAO;gBACL,GAAG;gBACH,QAAQ;gBACR,IAAI;gBACJ,WAAW;gBACX,UAAU;gBACV,MAAM,EAAE,YAAY;gBACpB,OAAO,EAAE,eAAe;gBACxB,UAAU,EAAE,gBAAgB;aAC7B,CAAC;QACJ,CAAC;gBAAS,CAAC;YACT,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;YACnB,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACpB,CAAC;IACH,CAAC;IAED,KAAK,CAAC,QAAQ;QACZ,IAAI,IAAI,CAAC,cAAc;YAAE,OAAO;QAChC,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC;QAE3B,KAAK,MAAM,CAAC,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;YACxC,YAAY,CAAC,KAAK,CAAC,CAAC;QACtB,CAAC;QACD,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;QAExB,MAAM,aAAa,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC,CAAC;QAC1E,IAAI,CAAC,IAAI,GAAG,EAAE,CAAC;QACf,MAAM,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC;QAEjC,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;YAC3C,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;QAED,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC;IACvB,CAAC;CACF"}
@@ -0,0 +1,2 @@
1
+ export declare function contentAppearsEmpty(html: string): boolean;
2
+ //# sourceMappingURL=content-check.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"content-check.d.ts","sourceRoot":"","sources":["../../src/fetch/content-check.ts"],"names":[],"mappings":"AAsDA,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAUzD"}
@@ -0,0 +1,62 @@
1
+ const VISIBLE_TEXT_THRESHOLD = 200;
2
+ const SCRIPT_RATIO_THRESHOLD = 0.8;
3
+ function stripScriptsAndStyles(html) {
4
+ return html
5
+ .replace(/<script[\s\S]*?<\/script>/gi, '')
6
+ .replace(/<style[\s\S]*?<\/style>/gi, '');
7
+ }
8
+ function extractVisibleText(html) {
9
+ const stripped = stripScriptsAndStyles(html);
10
+ const noTags = stripped.replace(/<[^>]+>/g, ' ');
11
+ return noTags.replace(/\s+/g, ' ').trim();
12
+ }
13
+ function hasSpaShellIndicator(html) {
14
+ const spaPatterns = [
15
+ /<div[^>]+id=["']root["'][^>]*>\s*<\/div>/i,
16
+ /<div[^>]+id=["']app["'][^>]*>\s*<\/div>/i,
17
+ /<div[^>]+id=["']__next["'][^>]*>\s*<\/div>/i,
18
+ ];
19
+ return spaPatterns.some((pattern) => pattern.test(html));
20
+ }
21
+ function hasNextData(html) {
22
+ if (!/__NEXT_DATA__/.test(html))
23
+ return false;
24
+ const withoutScripts = stripScriptsAndStyles(html);
25
+ const visibleText = withoutScripts.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
26
+ return visibleText.length < VISIBLE_TEXT_THRESHOLD;
27
+ }
28
+ function hasNoscriptRequired(html) {
29
+ const noscriptMatches = html.match(/<noscript[^>]*>([\s\S]*?)<\/noscript>/gi);
30
+ if (!noscriptMatches)
31
+ return false;
32
+ return noscriptMatches.some((tag) => {
33
+ const inner = tag.replace(/<[^>]+>/g, '').toLowerCase();
34
+ return inner.includes('javascript') || inner.includes('enable');
35
+ });
36
+ }
37
+ function hasHighScriptRatio(html) {
38
+ const bodyMatch = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
39
+ const bodyContent = bodyMatch ? bodyMatch[1] : html;
40
+ const scriptMatches = bodyContent.match(/<script[\s\S]*?<\/script>/gi) ?? [];
41
+ const scriptText = scriptMatches.join('');
42
+ const scriptLen = scriptText.length;
43
+ const totalLen = bodyContent.length;
44
+ if (totalLen === 0)
45
+ return false;
46
+ return scriptLen / totalLen > SCRIPT_RATIO_THRESHOLD;
47
+ }
48
+ export function contentAppearsEmpty(html) {
49
+ const visibleText = extractVisibleText(html);
50
+ if (visibleText.length < VISIBLE_TEXT_THRESHOLD)
51
+ return true;
52
+ if (hasSpaShellIndicator(html))
53
+ return true;
54
+ if (hasNextData(html))
55
+ return true;
56
+ if (hasNoscriptRequired(html))
57
+ return true;
58
+ if (hasHighScriptRatio(html))
59
+ return true;
60
+ return false;
61
+ }
62
+ //# sourceMappingURL=content-check.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"content-check.js","sourceRoot":"","sources":["../../src/fetch/content-check.ts"],"names":[],"mappings":"AAAA,MAAM,sBAAsB,GAAG,GAAG,CAAC;AACnC,MAAM,sBAAsB,GAAG,GAAG,CAAC;AAEnC,SAAS,qBAAqB,CAAC,IAAY;IACzC,OAAO,IAAI;SACR,OAAO,CAAC,6BAA6B,EAAE,EAAE,CAAC;SAC1C,OAAO,CAAC,2BAA2B,EAAE,EAAE,CAAC,CAAC;AAC9C,CAAC;AAED,SAAS,kBAAkB,CAAC,IAAY;IACtC,MAAM,QAAQ,GAAG,qBAAqB,CAAC,IAAI,CAAC,CAAC;IAC7C,MAAM,MAAM,GAAG,QAAQ,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;IACjD,OAAO,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;AAC5C,CAAC;AAED,SAAS,oBAAoB,CAAC,IAAY;IACxC,MAAM,WAAW,GAAG;QAClB,2CAA2C;QAC3C,0CAA0C;QAC1C,6CAA6C;KAC9C,CAAC;IACF,OAAO,WAAW,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;AAC3D,CAAC;AAED,SAAS,WAAW,CAAC,IAAY;IAC/B,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IAC9C,MAAM,cAAc,GAAG,qBAAqB,CAAC,IAAI,CAAC,CAAC;IACnD,MAAM,WAAW,GAAG,cAAc,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IACxF,OAAO,WAAW,CAAC,MAAM,GAAG,sBAAsB,CAAC;AACrD,CAAC;AAED,SAAS,mBAAmB,CAAC,IAAY;IACvC,MAAM,eAAe,GAAG,IAAI,CAAC,KAAK,CAAC,yCAAyC,CAAC,CAAC;IAC9E,IAAI,CAAC,eAAe;QAAE,OAAO,KAAK,CAAC;IACnC,OAAO,eAAe,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE;QAClC,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;QACxD,OAAO,KAAK,CAAC,QAAQ,CAAC,YAAY,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAClE,CAAC,CAAC,CAAC;AACL,CAAC;AAED,SAAS,kBAAkB,CAAC,IAAY;IACtC,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;IAC/D,MAAM,WAAW,GAAG,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAEpD,MAAM,aAAa,GAAG,WAAW,CAAC,KAAK,CAAC,6BAA6B,CAAC,IAAI,EAAE,CAAC;IAC7E,MAAM,UAAU,GAAG,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAE1C,MAAM,SAAS,GAAG,UAAU,CAAC,MAAM,CAAC;IACpC,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC;IAEpC,IAAI,QAAQ,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACjC,OAAO,SAAS,GAAG,QAAQ,GAAG,sBAAsB,CAAC;AACvD,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,IAAY;IAC9C,MAAM,WAAW,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC;IAC7C,IAAI,WAAW,CAAC,MAAM,GAAG,sBAAsB;QAAE,OAAO,IAAI,CAAC;IAE7D,IAAI,oBAAoB,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAC5C,IAAI,WAAW,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IACnC,IAAI,mBAAmB,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAC3C,IAAI,kBAAkB,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAE1C,OAAO,KAAK,CAAC;AACf,CAAC"}
@@ -0,0 +1,15 @@
1
+ export interface HttpFetchOptions {
2
+ headers?: Record<string, string>;
3
+ timeoutMs?: number;
4
+ }
5
+ export interface HttpFetchResult {
6
+ url: string;
7
+ finalUrl: string;
8
+ html: string;
9
+ contentType: string;
10
+ statusCode: number;
11
+ headers: Record<string, string>;
12
+ rawBuffer?: Buffer;
13
+ }
14
+ export declare function httpFetch(url: string, options?: HttpFetchOptions): Promise<HttpFetchResult>;
15
+ //# sourceMappingURL=http-client.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"http-client.d.ts","sourceRoot":"","sources":["../../src/fetch/http-client.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,gBAAgB;IAC/B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,eAAe;IAC9B,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAmCD,wBAAsB,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,gBAAqB,GAAG,OAAO,CAAC,eAAe,CAAC,CAyCrG"}