@staticn0va/wigolo 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. package/LICENSE +74 -0
  2. package/README.md +272 -0
  3. package/dist/cache/db.d.ts +5 -0
  4. package/dist/cache/db.d.ts.map +1 -0
  5. package/dist/cache/db.js +97 -0
  6. package/dist/cache/db.js.map +1 -0
  7. package/dist/cache/store.d.ts +26 -0
  8. package/dist/cache/store.d.ts.map +1 -0
  9. package/dist/cache/store.js +214 -0
  10. package/dist/cache/store.js.map +1 -0
  11. package/dist/cli/daemon.d.ts +2 -0
  12. package/dist/cli/daemon.d.ts.map +1 -0
  13. package/dist/cli/daemon.js +5 -0
  14. package/dist/cli/daemon.js.map +1 -0
  15. package/dist/cli/health.d.ts +2 -0
  16. package/dist/cli/health.d.ts.map +1 -0
  17. package/dist/cli/health.js +5 -0
  18. package/dist/cli/health.js.map +1 -0
  19. package/dist/cli/index.d.ts +7 -0
  20. package/dist/cli/index.d.ts.map +1 -0
  21. package/dist/cli/index.js +9 -0
  22. package/dist/cli/index.js.map +1 -0
  23. package/dist/cli/warmup.d.ts +11 -0
  24. package/dist/cli/warmup.d.ts.map +1 -0
  25. package/dist/cli/warmup.js +107 -0
  26. package/dist/cli/warmup.js.map +1 -0
  27. package/dist/config.d.ts +41 -0
  28. package/dist/config.d.ts.map +1 -0
  29. package/dist/config.js +66 -0
  30. package/dist/config.js.map +1 -0
  31. package/dist/crawl/crawler.d.ts +18 -0
  32. package/dist/crawl/crawler.d.ts.map +1 -0
  33. package/dist/crawl/crawler.js +228 -0
  34. package/dist/crawl/crawler.js.map +1 -0
  35. package/dist/crawl/dedup.d.ts +15 -0
  36. package/dist/crawl/dedup.d.ts.map +1 -0
  37. package/dist/crawl/dedup.js +93 -0
  38. package/dist/crawl/dedup.js.map +1 -0
  39. package/dist/crawl/mapper.d.ts +17 -0
  40. package/dist/crawl/mapper.d.ts.map +1 -0
  41. package/dist/crawl/mapper.js +178 -0
  42. package/dist/crawl/mapper.js.map +1 -0
  43. package/dist/crawl/rate-limiter.d.ts +10 -0
  44. package/dist/crawl/rate-limiter.d.ts.map +1 -0
  45. package/dist/crawl/rate-limiter.js +72 -0
  46. package/dist/crawl/rate-limiter.js.map +1 -0
  47. package/dist/crawl/robots.d.ts +9 -0
  48. package/dist/crawl/robots.d.ts.map +1 -0
  49. package/dist/crawl/robots.js +63 -0
  50. package/dist/crawl/robots.js.map +1 -0
  51. package/dist/crawl/sitemap.d.ts +4 -0
  52. package/dist/crawl/sitemap.d.ts.map +1 -0
  53. package/dist/crawl/sitemap.js +38 -0
  54. package/dist/crawl/sitemap.js.map +1 -0
  55. package/dist/crawl/url-utils.d.ts +3 -0
  56. package/dist/crawl/url-utils.d.ts.map +1 -0
  57. package/dist/crawl/url-utils.js +41 -0
  58. package/dist/crawl/url-utils.js.map +1 -0
  59. package/dist/extraction/defuddle.d.ts +3 -0
  60. package/dist/extraction/defuddle.d.ts.map +1 -0
  61. package/dist/extraction/defuddle.js +26 -0
  62. package/dist/extraction/defuddle.js.map +1 -0
  63. package/dist/extraction/extract.d.ts +5 -0
  64. package/dist/extraction/extract.d.ts.map +1 -0
  65. package/dist/extraction/extract.js +83 -0
  66. package/dist/extraction/extract.js.map +1 -0
  67. package/dist/extraction/jsonld.d.ts +4 -0
  68. package/dist/extraction/jsonld.d.ts.map +1 -0
  69. package/dist/extraction/jsonld.js +64 -0
  70. package/dist/extraction/jsonld.js.map +1 -0
  71. package/dist/extraction/markdown.d.ts +10 -0
  72. package/dist/extraction/markdown.d.ts.map +1 -0
  73. package/dist/extraction/markdown.js +107 -0
  74. package/dist/extraction/markdown.js.map +1 -0
  75. package/dist/extraction/pipeline.d.ts +11 -0
  76. package/dist/extraction/pipeline.d.ts.map +1 -0
  77. package/dist/extraction/pipeline.js +95 -0
  78. package/dist/extraction/pipeline.js.map +1 -0
  79. package/dist/extraction/readability.d.ts +3 -0
  80. package/dist/extraction/readability.d.ts.map +1 -0
  81. package/dist/extraction/readability.js +32 -0
  82. package/dist/extraction/readability.js.map +1 -0
  83. package/dist/extraction/schema.d.ts +7 -0
  84. package/dist/extraction/schema.d.ts.map +1 -0
  85. package/dist/extraction/schema.js +86 -0
  86. package/dist/extraction/schema.js.map +1 -0
  87. package/dist/extraction/site-extractors/docs-generic.d.ts +3 -0
  88. package/dist/extraction/site-extractors/docs-generic.d.ts.map +1 -0
  89. package/dist/extraction/site-extractors/docs-generic.js +104 -0
  90. package/dist/extraction/site-extractors/docs-generic.js.map +1 -0
  91. package/dist/extraction/site-extractors/github.d.ts +3 -0
  92. package/dist/extraction/site-extractors/github.d.ts.map +1 -0
  93. package/dist/extraction/site-extractors/github.js +107 -0
  94. package/dist/extraction/site-extractors/github.js.map +1 -0
  95. package/dist/extraction/site-extractors/mdn.d.ts +3 -0
  96. package/dist/extraction/site-extractors/mdn.d.ts.map +1 -0
  97. package/dist/extraction/site-extractors/mdn.js +58 -0
  98. package/dist/extraction/site-extractors/mdn.js.map +1 -0
  99. package/dist/extraction/site-extractors/stackoverflow.d.ts +3 -0
  100. package/dist/extraction/site-extractors/stackoverflow.d.ts.map +1 -0
  101. package/dist/extraction/site-extractors/stackoverflow.js +88 -0
  102. package/dist/extraction/site-extractors/stackoverflow.js.map +1 -0
  103. package/dist/extraction/trafilatura.d.ts +6 -0
  104. package/dist/extraction/trafilatura.d.ts.map +1 -0
  105. package/dist/extraction/trafilatura.js +105 -0
  106. package/dist/extraction/trafilatura.js.map +1 -0
  107. package/dist/fetch/auth.d.ts +8 -0
  108. package/dist/fetch/auth.d.ts.map +1 -0
  109. package/dist/fetch/auth.js +32 -0
  110. package/dist/fetch/auth.js.map +1 -0
  111. package/dist/fetch/browser-pool.d.ts +28 -0
  112. package/dist/fetch/browser-pool.d.ts.map +1 -0
  113. package/dist/fetch/browser-pool.js +138 -0
  114. package/dist/fetch/browser-pool.js.map +1 -0
  115. package/dist/fetch/content-check.d.ts +2 -0
  116. package/dist/fetch/content-check.d.ts.map +1 -0
  117. package/dist/fetch/content-check.js +62 -0
  118. package/dist/fetch/content-check.js.map +1 -0
  119. package/dist/fetch/http-client.d.ts +15 -0
  120. package/dist/fetch/http-client.d.ts.map +1 -0
  121. package/dist/fetch/http-client.js +146 -0
  122. package/dist/fetch/http-client.js.map +1 -0
  123. package/dist/fetch/router.d.ts +45 -0
  124. package/dist/fetch/router.d.ts.map +1 -0
  125. package/dist/fetch/router.js +89 -0
  126. package/dist/fetch/router.js.map +1 -0
  127. package/dist/index.d.ts +3 -0
  128. package/dist/index.d.ts.map +1 -0
  129. package/dist/index.js +22 -0
  130. package/dist/index.js.map +1 -0
  131. package/dist/logger.d.ts +10 -0
  132. package/dist/logger.d.ts.map +1 -0
  133. package/dist/logger.js +39 -0
  134. package/dist/logger.js.map +1 -0
  135. package/dist/search/dedup.d.ts +10 -0
  136. package/dist/search/dedup.d.ts.map +1 -0
  137. package/dist/search/dedup.js +35 -0
  138. package/dist/search/dedup.js.map +1 -0
  139. package/dist/search/engines/bing.d.ts +7 -0
  140. package/dist/search/engines/bing.d.ts.map +1 -0
  141. package/dist/search/engines/bing.js +48 -0
  142. package/dist/search/engines/bing.js.map +1 -0
  143. package/dist/search/engines/duckduckgo.d.ts +7 -0
  144. package/dist/search/engines/duckduckgo.d.ts.map +1 -0
  145. package/dist/search/engines/duckduckgo.js +50 -0
  146. package/dist/search/engines/duckduckgo.js.map +1 -0
  147. package/dist/search/engines/startpage.d.ts +7 -0
  148. package/dist/search/engines/startpage.d.ts.map +1 -0
  149. package/dist/search/engines/startpage.js +50 -0
  150. package/dist/search/engines/startpage.js.map +1 -0
  151. package/dist/search/filters.d.ts +16 -0
  152. package/dist/search/filters.d.ts.map +1 -0
  153. package/dist/search/filters.js +63 -0
  154. package/dist/search/filters.js.map +1 -0
  155. package/dist/search/flashrank.d.ts +12 -0
  156. package/dist/search/flashrank.d.ts.map +1 -0
  157. package/dist/search/flashrank.js +63 -0
  158. package/dist/search/flashrank.js.map +1 -0
  159. package/dist/search/query.d.ts +2 -0
  160. package/dist/search/query.d.ts.map +1 -0
  161. package/dist/search/query.js +41 -0
  162. package/dist/search/query.js.map +1 -0
  163. package/dist/search/rerank.d.ts +3 -0
  164. package/dist/search/rerank.d.ts.map +1 -0
  165. package/dist/search/rerank.js +40 -0
  166. package/dist/search/rerank.js.map +1 -0
  167. package/dist/search/searxng.d.ts +8 -0
  168. package/dist/search/searxng.d.ts.map +1 -0
  169. package/dist/search/searxng.js +87 -0
  170. package/dist/search/searxng.js.map +1 -0
  171. package/dist/search/validator.d.ts +6 -0
  172. package/dist/search/validator.d.ts.map +1 -0
  173. package/dist/search/validator.js +35 -0
  174. package/dist/search/validator.js.map +1 -0
  175. package/dist/searxng/bootstrap.d.ts +18 -0
  176. package/dist/searxng/bootstrap.d.ts.map +1 -0
  177. package/dist/searxng/bootstrap.js +136 -0
  178. package/dist/searxng/bootstrap.js.map +1 -0
  179. package/dist/searxng/docker.d.ts +9 -0
  180. package/dist/searxng/docker.d.ts.map +1 -0
  181. package/dist/searxng/docker.js +67 -0
  182. package/dist/searxng/docker.js.map +1 -0
  183. package/dist/searxng/process.d.ts +23 -0
  184. package/dist/searxng/process.d.ts.map +1 -0
  185. package/dist/searxng/process.js +188 -0
  186. package/dist/searxng/process.js.map +1 -0
  187. package/dist/server.d.ts +2 -0
  188. package/dist/server.d.ts.map +1 -0
  189. package/dist/server.js +311 -0
  190. package/dist/server.js.map +1 -0
  191. package/dist/tools/cache.d.ts +3 -0
  192. package/dist/tools/cache.d.ts.map +1 -0
  193. package/dist/tools/cache.js +50 -0
  194. package/dist/tools/cache.js.map +1 -0
  195. package/dist/tools/crawl.d.ts +6 -0
  196. package/dist/tools/crawl.d.ts.map +1 -0
  197. package/dist/tools/crawl.js +97 -0
  198. package/dist/tools/crawl.js.map +1 -0
  199. package/dist/tools/extract.d.ts +4 -0
  200. package/dist/tools/extract.d.ts.map +1 -0
  201. package/dist/tools/extract.js +69 -0
  202. package/dist/tools/extract.js.map +1 -0
  203. package/dist/tools/fetch.d.ts +4 -0
  204. package/dist/tools/fetch.d.ts.map +1 -0
  205. package/dist/tools/fetch.js +76 -0
  206. package/dist/tools/fetch.js.map +1 -0
  207. package/dist/tools/search.d.ts +4 -0
  208. package/dist/tools/search.d.ts.map +1 -0
  209. package/dist/tools/search.js +160 -0
  210. package/dist/tools/search.js.map +1 -0
  211. package/dist/types.d.ts +222 -0
  212. package/dist/types.d.ts.map +1 -0
  213. package/dist/types.js +2 -0
  214. package/dist/types.js.map +1 -0
  215. package/package.json +61 -0
@@ -0,0 +1,146 @@
1
+ import { getConfig } from '../config.js';
2
+ import { createLogger } from '../logger.js';
3
+ const RETRYABLE_STATUSES = new Set([429, 502, 503]);
4
+ const RETRYABLE_ERROR_CODES = new Set(['ECONNRESET', 'ETIMEDOUT', 'ECONNREFUSED']);
5
+ const REDIRECT_STATUSES = new Set([301, 302, 307, 308]);
6
+ const DEFAULT_USER_AGENTS = [
7
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
8
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
9
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
10
+ ];
11
+ function getRotatingUserAgent(config) {
12
+ if (config.userAgent)
13
+ return config.userAgent;
14
+ return DEFAULT_USER_AGENTS[Math.floor(Math.random() * DEFAULT_USER_AGENTS.length)];
15
+ }
16
+ function isRetryableError(err) {
17
+ if (err instanceof Error) {
18
+ const code = err.code;
19
+ if (code && RETRYABLE_ERROR_CODES.has(code))
20
+ return true;
21
+ // AbortSignal timeout throws DOMException with name TimeoutError
22
+ if (err.name === 'TimeoutError')
23
+ return true;
24
+ }
25
+ return false;
26
+ }
27
+ function backoffMs(attempt) {
28
+ return 500 * Math.pow(2, attempt) + Math.random() * 500;
29
+ }
30
+ function sleep(ms) {
31
+ return new Promise((resolve) => setTimeout(resolve, ms));
32
+ }
33
+ export async function httpFetch(url, options = {}) {
34
+ const config = getConfig();
35
+ const logger = createLogger('fetch');
36
+ const maxRetries = config.fetchMaxRetries;
37
+ const timeoutMs = options.timeoutMs ?? config.fetchTimeoutMs;
38
+ const maxRedirects = config.maxRedirects;
39
+ let lastError;
40
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
41
+ if (attempt > 0) {
42
+ const delay = backoffMs(attempt - 1);
43
+ logger.debug('retrying after backoff', { attempt, delayMs: delay, url });
44
+ await sleep(delay);
45
+ }
46
+ try {
47
+ const result = await fetchWithRedirects(url, options, timeoutMs, maxRedirects, logger);
48
+ return result;
49
+ }
50
+ catch (err) {
51
+ lastError = err;
52
+ if (err instanceof HttpFetchError && !err.retryable) {
53
+ throw err;
54
+ }
55
+ const retryable = err instanceof HttpFetchError ? err.retryable : isRetryableError(err);
56
+ if (!retryable || attempt >= maxRetries) {
57
+ throw err;
58
+ }
59
+ logger.warn('fetch failed, will retry', {
60
+ attempt,
61
+ url,
62
+ error: err instanceof Error ? err.message : String(err),
63
+ });
64
+ }
65
+ }
66
+ throw lastError;
67
+ }
68
+ class HttpFetchError extends Error {
69
+ retryable;
70
+ constructor(message, retryable) {
71
+ super(message);
72
+ this.retryable = retryable;
73
+ this.name = 'HttpFetchError';
74
+ }
75
+ }
76
+ async function fetchWithRedirects(originalUrl, options, timeoutMs, maxRedirects, logger) {
77
+ const visited = new Set();
78
+ let currentUrl = originalUrl;
79
+ let redirectCount = 0;
80
+ while (true) {
81
+ if (visited.has(currentUrl)) {
82
+ throw new HttpFetchError(`Redirect loop detected at ${currentUrl}`, false);
83
+ }
84
+ visited.add(currentUrl);
85
+ logger.debug('fetching', { url: currentUrl, attempt: redirectCount });
86
+ const signal = AbortSignal.timeout(timeoutMs);
87
+ let response;
88
+ try {
89
+ const ua = getRotatingUserAgent(getConfig());
90
+ const mergedHeaders = { 'User-Agent': ua, ...options.headers };
91
+ response = await fetch(currentUrl, {
92
+ headers: mergedHeaders,
93
+ redirect: 'manual',
94
+ signal,
95
+ });
96
+ }
97
+ catch (err) {
98
+ const isTimeout = err instanceof Error && err.name === 'TimeoutError';
99
+ const isConnErr = err instanceof Error && RETRYABLE_ERROR_CODES.has(err.code ?? '');
100
+ const retryable = isTimeout || isConnErr;
101
+ throw Object.assign(err instanceof Error ? err : new Error(String(err)), { retryable });
102
+ }
103
+ if (REDIRECT_STATUSES.has(response.status)) {
104
+ const location = response.headers.get('location');
105
+ if (!location) {
106
+ throw new HttpFetchError(`Redirect with no location header at ${currentUrl}`, false);
107
+ }
108
+ redirectCount++;
109
+ if (redirectCount > maxRedirects) {
110
+ throw new HttpFetchError(`Too many redirects (>${maxRedirects}) from ${originalUrl}`, false);
111
+ }
112
+ // Resolve relative redirects
113
+ currentUrl = new URL(location, currentUrl).toString();
114
+ continue;
115
+ }
116
+ if (RETRYABLE_STATUSES.has(response.status)) {
117
+ throw new HttpFetchError(`HTTP ${response.status} from ${currentUrl}`, true);
118
+ }
119
+ const contentType = response.headers.get('content-type') ?? '';
120
+ const headers = {};
121
+ response.headers.forEach((value, key) => {
122
+ headers[key] = value;
123
+ });
124
+ const isPdf = contentType.includes('application/pdf');
125
+ let html;
126
+ let rawBuffer;
127
+ if (isPdf) {
128
+ const arrayBuf = await response.arrayBuffer();
129
+ rawBuffer = Buffer.from(arrayBuf);
130
+ html = '';
131
+ }
132
+ else {
133
+ html = await response.text();
134
+ }
135
+ return {
136
+ url: originalUrl,
137
+ finalUrl: currentUrl,
138
+ html,
139
+ contentType,
140
+ statusCode: response.status,
141
+ headers,
142
+ rawBuffer,
143
+ };
144
+ }
145
+ }
146
+ //# sourceMappingURL=http-client.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"http-client.js","sourceRoot":"","sources":["../../src/fetch/http-client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAiB5C,MAAM,kBAAkB,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;AACpD,MAAM,qBAAqB,GAAG,IAAI,GAAG,CAAC,CAAC,YAAY,EAAE,WAAW,EAAE,cAAc,CAAC,CAAC,CAAC;AACnF,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;AAExD,MAAM,mBAAmB,GAAG;IAC1B,uHAAuH;IACvH,iHAAiH;IACjH,uGAAuG;CACxG,CAAC;AAEF,SAAS,oBAAoB,CAAC,MAAqC;IACjE,IAAI,MAAM,CAAC,SAAS;QAAE,OAAO,MAAM,CAAC,SAAS,CAAC;IAC9C,OAAO,mBAAmB,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,mBAAmB,CAAC,MAAM,CAAC,CAAC,CAAC;AACrF,CAAC;AAED,SAAS,gBAAgB,CAAC,GAAY;IACpC,IAAI,GAAG,YAAY,KAAK,EAAE,CAAC;QACzB,MAAM,IAAI,GAAI,GAA6B,CAAC,IAAI,CAAC;QACjD,IAAI,IAAI,IAAI,qBAAqB,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,OAAO,IAAI,CAAC;QACzD,iEAAiE;QACjE,IAAI,GAAG,CAAC,IAAI,KAAK,cAAc;YAAE,OAAO,IAAI,CAAC;IAC/C,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,SAAS,CAAC,OAAe;IAChC,OAAO,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,GAAG,CAAC;AAC1D,CAAC;AAED,SAAS,KAAK,CAAC,EAAU;IACvB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,CAAC;AAC3D,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,GAAW,EAAE,UAA4B,EAAE;IACzE,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;IAC3B,MAAM,MAAM,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;IACrC,MAAM,UAAU,GAAG,MAAM,CAAC,eAAe,CAAC;IAC1C,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,MAAM,CAAC,cAAc,CAAC;IAC7D,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,CAAC;IAEzC,IAAI,SAAkB,CAAC;IAEvB,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,UAAU,EAAE,OAAO,EAAE,EAAE,CAAC;QACvD,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;YAChB,MAAM,KAAK,GAAG,SAAS,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC;YACrC,MAAM,CAAC,KAAK,CAAC,wBAAwB,EAAE,EAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;YACzE,MAAM,KAAK,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;QAED,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,MAAM,CAAC,CAAC;YACvF,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,SAAS,GAAG,GAAG,CAAC;YAEhB,IAAI,GAAG,YAAY,cAAc,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,CAAC;gBACpD,MAAM,GAAG,CAAC;YACZ,CAAC;YAED,MAAM,SAAS,GAAG,GAAG,YAAY,cAAc,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,gBAAgB,CAAC,GAAG,CAAC,CAAC;YAExF,IAAI,CAAC,SAAS,IAAI,OAAO,IAAI,UAAU,EAAE,CAAC;gBACxC,MAAM,GAAG,CAAC;YACZ,CAAC;YAED,MAAM,CAAC,IAAI,CAAC,0BAA0B,EAAE;gBACtC,OAAO;gBACP,GAAG;gBACH,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;aACxD,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,MAAM,SAAS,CAAC;AAClB,CAAC;AAED,MAAM,cAAe,SAAQ,KAAK;IACa;IAA7C,YAAY,OAAe,EAAkB,SAAkB;QAC7D,KAAK,CAAC,OAAO,CAAC,CAAC;QAD4B,cAAS,GAAT,SAAS,CAAS;QAE7D,IAAI,CAAC,IAAI,GAAG,gBAAgB,CAAC;IAC/B,CAAC;CACF;AAED,KAAK,UAAU,kBAAkB,CAC/B,WAAmB,EACnB,OAAyB,EACzB,SAAiB,EACjB,YAAoB,EACpB,MAAuC;IAEvC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,IAAI,UAAU,GAAG,WAAW,CAAC;IAC7B,IAAI,aAAa,GAAG,CAAC,CAAC;IAEtB,OAAO,IAAI,EAAE,CAAC;QACZ,IAAI,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;YAC5B,MAAM,IAAI,cAAc,CAAC,6BAA6B,UAAU,EAAE,EAAE,KAAK,CAAC,CAAC;QAC7E,CAAC;QACD,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAExB,MAAM,CAAC,KAAK,CAAC,UAAU,EAAE,EAAE,GAAG,EAAE,UAAU,EAAE,OAAO,EAAE,aAAa,EAAE,CAAC,CAAC;QAEtE,MAAM,MAAM,GAAG,WAAW,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;QAE9C,IAAI,QAAkB,CAAC;QACvB,IAAI,CAAC;YACH,MAAM,EAAE,GAAG,oBAAoB,CAAC,SAAS,EAAE,CAAC,CAAC;YAC7C,MAAM,aAAa,GAAG,EAAE,YAAY,EAAE,EAAE,EAAE,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC;YAC/D,QAAQ,GAAG,MAAM,KAAK,CAAC,UAAU,EAAE;gBACjC,OAAO,EAAE,aAAa;gBACtB,QAAQ,EAAE,QAAQ;gBAClB,MAAM;aACP,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,SAAS,GAAG,GAAG,YAAY,KAAK,IAAI,GAAG,CAAC,IAAI,KAAK,cAAc,CAAC;YACtE,MAAM,SAAS,GAAG,GAAG,YAAY,KAAK,IAAI,qBAAqB,CAAC,GAAG,CAAE,GAA6B,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;YAC/G,MAAM,SAAS,GAAG,SAAS,IAAI,SAAS,CAAC;YACzC,MAAM,MAAM,CAAC,MAAM,CAAC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,SAAS,EAAE,CAAC,CAAC;QAC1F,CAAC;QAED,IAAI,iBAAiB,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC3C,MAAM,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;YAClD,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,MAAM,IAAI,cAAc,CAAC,uCAAuC,UAAU,EAAE,EAAE,KAAK,CAAC,CAAC;YACvF,CAAC;YAED,aAAa,EAAE,CAAC;YAChB,IAAI,aAAa,GAAG,YAAY,EAAE,CAAC;gBACjC,MAAM,IAAI,cAAc,CAAC,wBAAwB,YAAY,UAAU,WAAW,EAAE,EAAE,KAAK,CAAC,CAAC;YAC/F,CAAC;YAED,6BAA6B;YAC7B,UAAU,GAAG,IAAI,GAAG,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC,QAAQ,EAAE,CAAC;YACtD,SAAS;QACX,CAAC;QAED,IAAI,kBAAkB,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC5C,MAAM,IAAI,cAAc,CAAC,QAAQ,QAAQ,CAAC,MAAM,SAAS,UAAU,EAAE,EAAE,IAAI,CAAC,CAAC;QAC/E,CAAC;QAED,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;QAC/D,MAAM,OAAO,GAA2B,EAAE,CAAC;QAC3C,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE;YACtC,OAAO,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;QACvB,CAAC,CAAC,CAAC;QAEH,MAAM,KAAK,GAAG,WAAW,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC;QACtD,IAAI,IAAY,CAAC;QACjB,IAAI,SAA6B,CAAC;QAElC,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;YAC9C,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAClC,IAAI,GAAG,EAAE,CAAC;QACZ,CAAC;aAAM,CAAC;YACN,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QAC/B,CAAC;QAED,OAAO;YACL,GAAG,EAAE,WAAW;YAChB,QAAQ,EAAE,UAAU;YACpB,IAAI;YACJ,WAAW;YACX,UAAU,EAAE,QAAQ,CAAC,MAAM;YAC3B,OAAO;YACP,SAAS;SACV,CAAC;IACJ,CAAC;AACH,CAAC"}
@@ -0,0 +1,45 @@
1
+ import type { RawFetchResult } from '../types.js';
2
+ export interface RouterFetchOptions {
3
+ renderJs?: 'auto' | 'always' | 'never';
4
+ useAuth?: boolean;
5
+ headers?: Record<string, string>;
6
+ screenshot?: boolean;
7
+ }
8
+ export interface HttpClient {
9
+ fetch(url: string, options?: {
10
+ headers?: Record<string, string>;
11
+ timeoutMs?: number;
12
+ }): Promise<{
13
+ url: string;
14
+ finalUrl: string;
15
+ html: string;
16
+ contentType: string;
17
+ statusCode: number;
18
+ headers: Record<string, string>;
19
+ rawBuffer?: Buffer;
20
+ }>;
21
+ }
22
+ export interface BrowserPoolInterface {
23
+ fetchWithBrowser(url: string, options?: {
24
+ headers?: Record<string, string>;
25
+ storageStatePath?: string;
26
+ userDataDir?: string;
27
+ screenshot?: boolean;
28
+ }): Promise<RawFetchResult>;
29
+ }
30
+ interface DomainStats {
31
+ failureCount: number;
32
+ preferPlaywright: boolean;
33
+ }
34
+ export declare class SmartRouter {
35
+ private readonly httpClient;
36
+ private readonly browserPool;
37
+ private readonly domainMap;
38
+ constructor(httpClient: HttpClient, browserPool: BrowserPoolInterface);
39
+ fetch(url: string, options?: RouterFetchOptions): Promise<RawFetchResult>;
40
+ getDomainStats(domain: string): DomainStats | undefined;
41
+ private ensureStats;
42
+ private toRawFetchResult;
43
+ }
44
+ export {};
45
+ //# sourceMappingURL=router.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"router.d.ts","sourceRoot":"","sources":["../../src/fetch/router.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,OAAO,CAAC;IACvC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,UAAU,CAAC,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,WAAW,UAAU;IACzB,KAAK,CACH,GAAG,EAAE,MAAM,EACX,OAAO,CAAC,EAAE;QAAE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QAAC,SAAS,CAAC,EAAE,MAAM,CAAA;KAAE,GACjE,OAAO,CAAC;QACT,GAAG,EAAE,MAAM,CAAC;QACZ,QAAQ,EAAE,MAAM,CAAC;QACjB,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,UAAU,EAAE,MAAM,CAAC;QACnB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QAChC,SAAS,CAAC,EAAE,MAAM,CAAC;KACpB,CAAC,CAAC;CACJ;AAED,MAAM,WAAW,oBAAoB;IACnC,gBAAgB,CACd,GAAG,EAAE,MAAM,EACX,OAAO,CAAC,EAAE;QAAE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QAAC,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAAC,WAAW,CAAC,EAAE,MAAM,CAAC;QAAC,UAAU,CAAC,EAAE,OAAO,CAAA;KAAE,GACpH,OAAO,CAAC,cAAc,CAAC,CAAC;CAC5B;AAED,UAAU,WAAW;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,OAAO,CAAC;CAC3B;AAED,qBAAa,WAAW;IAIpB,OAAO,CAAC,QAAQ,CAAC,UAAU;IAC3B,OAAO,CAAC,QAAQ,CAAC,WAAW;IAJ9B,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAkC;gBAGzC,UAAU,EAAE,UAAU,EACtB,WAAW,EAAE,oBAAoB;IAG9C,KAAK,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,kBAAuB,GAAG,OAAO,CAAC,cAAc,CAAC;IA6DnF,cAAc,CAAC,MAAM,EAAE,MAAM,GAAG,WAAW,GAAG,SAAS;IAIvD,OAAO,CAAC,WAAW;IASnB,OAAO,CAAC,gBAAgB;CAczB"}
@@ -0,0 +1,89 @@
1
+ import { getConfig } from '../config.js';
2
+ import { createLogger } from '../logger.js';
3
+ import { contentAppearsEmpty } from './content-check.js';
4
+ import { getAuthOptions } from './auth.js';
5
+ export class SmartRouter {
6
+ httpClient;
7
+ browserPool;
8
+ domainMap = new Map();
9
+ constructor(httpClient, browserPool) {
10
+ this.httpClient = httpClient;
11
+ this.browserPool = browserPool;
12
+ }
13
+ async fetch(url, options = {}) {
14
+ const { renderJs = 'auto', useAuth = false, headers, screenshot } = options;
15
+ const config = getConfig();
16
+ const logger = createLogger('fetch');
17
+ const threshold = config.browserFallbackThreshold;
18
+ const domain = new URL(url).hostname;
19
+ // Always Playwright for auth or explicit override
20
+ if (renderJs === 'always' || useAuth) {
21
+ const authOptions = useAuth ? (getAuthOptions() ?? {}) : {};
22
+ logger.debug('routing to playwright', { url, reason: useAuth ? 'auth' : 'render_js=always' });
23
+ return this.browserPool.fetchWithBrowser(url, { headers, screenshot, ...authOptions });
24
+ }
25
+ // HTTP only, no fallback
26
+ if (renderJs === 'never') {
27
+ logger.debug('routing to http (never)', { url });
28
+ const result = await this.httpClient.fetch(url, { headers });
29
+ this.ensureStats(domain);
30
+ return this.toRawFetchResult(result);
31
+ }
32
+ // auto: check if domain is already marked for Playwright
33
+ const stats = this.ensureStats(domain);
34
+ if (stats.preferPlaywright) {
35
+ logger.debug('routing to playwright (domain marked)', { url, domain });
36
+ return this.browserPool.fetchWithBrowser(url, { headers, screenshot });
37
+ }
38
+ // Try HTTP first
39
+ try {
40
+ const result = await this.httpClient.fetch(url, { headers });
41
+ // Check for SPA shell / empty content
42
+ if (contentAppearsEmpty(result.html)) {
43
+ logger.info('SPA shell detected, marking domain for playwright', { url, domain });
44
+ stats.preferPlaywright = true;
45
+ return this.browserPool.fetchWithBrowser(url, { headers, screenshot });
46
+ }
47
+ return this.toRawFetchResult(result);
48
+ }
49
+ catch (err) {
50
+ stats.failureCount++;
51
+ logger.warn('http fetch failed', {
52
+ url,
53
+ domain,
54
+ failureCount: stats.failureCount,
55
+ error: err instanceof Error ? err.message : String(err),
56
+ });
57
+ if (stats.failureCount >= threshold) {
58
+ logger.info('failure threshold reached, marking domain for playwright', { url, domain, threshold });
59
+ stats.preferPlaywright = true;
60
+ return this.browserPool.fetchWithBrowser(url, { headers, screenshot });
61
+ }
62
+ throw err;
63
+ }
64
+ }
65
+ getDomainStats(domain) {
66
+ return this.domainMap.get(domain);
67
+ }
68
+ ensureStats(domain) {
69
+ let stats = this.domainMap.get(domain);
70
+ if (!stats) {
71
+ stats = { failureCount: 0, preferPlaywright: false };
72
+ this.domainMap.set(domain, stats);
73
+ }
74
+ return stats;
75
+ }
76
+ toRawFetchResult(result) {
77
+ return {
78
+ url: result.url,
79
+ finalUrl: result.finalUrl,
80
+ html: result.html,
81
+ contentType: result.contentType,
82
+ statusCode: result.statusCode,
83
+ method: 'http',
84
+ headers: result.headers,
85
+ rawBuffer: result.rawBuffer,
86
+ };
87
+ }
88
+ }
89
+ //# sourceMappingURL=router.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"router.js","sourceRoot":"","sources":["../../src/fetch/router.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,cAAc,EAAE,MAAM,WAAW,CAAC;AAqC3C,MAAM,OAAO,WAAW;IAIH;IACA;IAJF,SAAS,GAAG,IAAI,GAAG,EAAuB,CAAC;IAE5D,YACmB,UAAsB,EACtB,WAAiC;QADjC,eAAU,GAAV,UAAU,CAAY;QACtB,gBAAW,GAAX,WAAW,CAAsB;IACjD,CAAC;IAEJ,KAAK,CAAC,KAAK,CAAC,GAAW,EAAE,UAA8B,EAAE;QACvD,MAAM,EAAE,QAAQ,GAAG,MAAM,EAAE,OAAO,GAAG,KAAK,EAAE,OAAO,EAAE,UAAU,EAAE,GAAG,OAAO,CAAC;QAC5E,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;QAC3B,MAAM,MAAM,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;QACrC,MAAM,SAAS,GAAG,MAAM,CAAC,wBAAwB,CAAC;QAClD,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;QAErC,kDAAkD;QAClD,IAAI,QAAQ,KAAK,QAAQ,IAAI,OAAO,EAAE,CAAC;YACrC,MAAM,WAAW,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC5D,MAAM,CAAC,KAAK,CAAC,uBAAuB,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,kBAAkB,EAAE,CAAC,CAAC;YAC9F,OAAO,IAAI,CAAC,WAAW,CAAC,gBAAgB,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,UAAU,EAAE,GAAG,WAAW,EAAE,CAAC,CAAC;QACzF,CAAC;QAED,yBAAyB;QACzB,IAAI,QAAQ,KAAK,OAAO,EAAE,CAAC;YACzB,MAAM,CAAC,KAAK,CAAC,yBAAyB,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;YACjD,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;YAC7D,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;YACzB,OAAO,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC;QACvC,CAAC;QAED,yDAAyD;QACzD,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;QAEvC,IAAI,KAAK,CAAC,gBAAgB,EAAE,CAAC;YAC3B,MAAM,CAAC,KAAK,CAAC,uCAAuC,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,CAAC,CAAC;YACvE,OAAO,IAAI,CAAC,WAAW,CAAC,gBAAgB,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC,CAAC;QACzE,CAAC;QAED,iBAAiB;QACjB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;YAE7D,sCAAsC;YACtC,IAAI,mBAAmB,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC;gBACrC,MAAM,CAAC,IAAI,CAAC,mDAAmD,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,CAAC,CAAC;gBAClF,KAAK,CAAC,gBAAgB,GAAG,IAAI,CAAC;gBAC9B,OAAO,IAAI,CAAC,WAAW,CAAC,gBAAgB,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC,CAAC;YACzE,CAAC;YAED,OAAO,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC;QACvC,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,KAAK,CAAC,YAAY,EAAE,CAAC;YACrB,MAAM,CAAC,IAAI,CAAC,mBAAmB,EAAE;gBAC/B,GAAG;gBACH,MAAM;gBACN,YAAY,EAAE,KAAK,CAAC,YAAY;gBAChC,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;aACxD,CAAC,CAAC;YAEH,IAAI,KAAK,CAAC,YAAY,IAAI,SAAS,EAAE,CAAC;gBACpC,MAAM,CAAC,IAAI,CAAC,0DAA0D,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC;gBACpG,KAAK,CAAC,gBAAgB,GAAG,IAAI,CAAC;gBAC9B,OAAO,IAAI,CAAC,WAAW,CAAC,gBAAgB,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC,CAAC;YACzE,CAAC;YAED,MAAM,GAAG,CAAC;QACZ,CAAC;IACH,CAAC;IAED,cAAc,CAAC,MAAc;QAC3B,OAAO,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IACpC,CAAC;IAEO,WAAW,CAAC,MAAc;QAChC,IAAI,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACvC,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,KAAK,GAAG,EAAE,YAAY,EAAE,CAAC,EAAE,gBAAgB,EAAE,KAAK,EAAE,CAAC;YACrD,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;QACpC,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IAEO,gBAAgB,CACtB,MAAgD;QAEhD,OAAO;YACL,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,MAAM,EAAE,MAAM;YACd,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,SAAS,EAAE,MAAM,CAAC,SAAS;SAC5B,CAAC;IACJ,CAAC;CACF"}
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ export {};
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":""}
package/dist/index.js ADDED
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env node
2
+ import { parseCommand } from './cli/index.js';
3
+ import { runWarmup } from './cli/warmup.js';
4
+ import { runDaemon } from './cli/daemon.js';
5
+ import { runHealthCheck } from './cli/health.js';
6
+ import { startServer } from './server.js';
7
+ const { command, args } = parseCommand(process.argv.slice(2));
8
+ switch (command) {
9
+ case 'warmup':
10
+ await runWarmup(args);
11
+ break;
12
+ case 'serve':
13
+ runDaemon(args);
14
+ break;
15
+ case 'health':
16
+ runHealthCheck();
17
+ break;
18
+ case 'mcp':
19
+ await startServer();
20
+ break;
21
+ }
22
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAC5C,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAC5C,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAE1C,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,GAAG,YAAY,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;AAE9D,QAAQ,OAAO,EAAE,CAAC;IAChB,KAAK,QAAQ;QACX,MAAM,SAAS,CAAC,IAAI,CAAC,CAAC;QACtB,MAAM;IAER,KAAK,OAAO;QACV,SAAS,CAAC,IAAI,CAAC,CAAC;QAChB,MAAM;IAER,KAAK,QAAQ;QACX,cAAc,EAAE,CAAC;QACjB,MAAM;IAER,KAAK,KAAK;QACR,MAAM,WAAW,EAAE,CAAC;QACpB,MAAM;AACV,CAAC"}
@@ -0,0 +1,10 @@
1
+ type Module = 'fetch' | 'search' | 'crawl' | 'cache' | 'extract' | 'searxng' | 'server' | 'cli' | 'jsonld';
2
+ export interface Logger {
3
+ debug(msg: string, data?: Record<string, unknown>): void;
4
+ info(msg: string, data?: Record<string, unknown>): void;
5
+ warn(msg: string, data?: Record<string, unknown>): void;
6
+ error(msg: string, data?: Record<string, unknown>): void;
7
+ }
8
+ export declare function createLogger(module: Module): Logger;
9
+ export {};
10
+ //# sourceMappingURL=logger.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":"AAGA,KAAK,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,OAAO,GAAG,OAAO,GAAG,SAAS,GAAG,SAAS,GAAG,QAAQ,GAAG,KAAK,GAAG,QAAQ,CAAC;AAS3G,MAAM,WAAW,MAAM;IACrB,KAAK,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACzD,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACxD,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACxD,KAAK,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;CAC1D;AAmBD,wBAAgB,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAiBnD"}
package/dist/logger.js ADDED
@@ -0,0 +1,39 @@
1
+ import { getConfig } from './config.js';
2
+ const LEVEL_PRIORITY = {
3
+ debug: 0,
4
+ info: 1,
5
+ warn: 2,
6
+ error: 3,
7
+ };
8
+ function writeJson(level, module, msg, data) {
9
+ const line = JSON.stringify({
10
+ ts: new Date().toISOString(),
11
+ level,
12
+ msg,
13
+ module,
14
+ ...(data ? { data } : {}),
15
+ });
16
+ process.stderr.write(line + '\n');
17
+ }
18
+ function writeText(level, module, msg, data) {
19
+ const ts = new Date().toISOString();
20
+ const dataStr = data ? ' ' + Object.entries(data).map(([k, v]) => `${k}=${v}`).join(' ') : '';
21
+ process.stderr.write(`[${ts}] ${level.toUpperCase().padEnd(5)} [${module}] ${msg}${dataStr}\n`);
22
+ }
23
+ export function createLogger(module) {
24
+ const config = getConfig();
25
+ const minPriority = LEVEL_PRIORITY[config.logLevel];
26
+ const write = config.logFormat === 'json' ? writeJson : writeText;
27
+ function log(level, msg, data) {
28
+ if (LEVEL_PRIORITY[level] >= minPriority) {
29
+ write(level, module, msg, data);
30
+ }
31
+ }
32
+ return {
33
+ debug: (msg, data) => log('debug', msg, data),
34
+ info: (msg, data) => log('info', msg, data),
35
+ warn: (msg, data) => log('warn', msg, data),
36
+ error: (msg, data) => log('error', msg, data),
37
+ };
38
+ }
39
+ //# sourceMappingURL=logger.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"logger.js","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAKxC,MAAM,cAAc,GAA6B;IAC/C,KAAK,EAAE,CAAC;IACR,IAAI,EAAE,CAAC;IACP,IAAI,EAAE,CAAC;IACP,KAAK,EAAE,CAAC;CACT,CAAC;AASF,SAAS,SAAS,CAAC,KAAe,EAAE,MAAc,EAAE,GAAW,EAAE,IAA8B;IAC7F,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC;QAC1B,EAAE,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QAC5B,KAAK;QACL,GAAG;QACH,MAAM;QACN,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KAC1B,CAAC,CAAC;IACH,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC;AACpC,CAAC;AAED,SAAS,SAAS,CAAC,KAAe,EAAE,MAAc,EAAE,GAAW,EAAE,IAA8B;IAC7F,MAAM,EAAE,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACpC,MAAM,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,GAAG,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9F,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,EAAE,KAAK,KAAK,CAAC,WAAW,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,MAAM,KAAK,GAAG,GAAG,OAAO,IAAI,CAAC,CAAC;AAClG,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,MAAc;IACzC,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;IAC3B,MAAM,WAAW,GAAG,cAAc,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;IACpD,MAAM,KAAK,GAAG,MAAM,CAAC,SAAS,KAAK,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC;IAElE,SAAS,GAAG,CAAC,KAAe,EAAE,GAAW,EAAE,IAA8B;QACvE,IAAI,cAAc,CAAC,KAAK,CAAC,IAAI,WAAW,EAAE,CAAC;YACzC,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,IAAI,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;IAED,OAAO;QACL,KAAK,EAAE,CAAC,GAAG,EAAE,IAAI,EAAE,EAAE,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,EAAE,IAAI,CAAC;QAC7C,IAAI,EAAE,CAAC,GAAG,EAAE,IAAI,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,EAAE,GAAG,EAAE,IAAI,CAAC;QAC3C,IAAI,EAAE,CAAC,GAAG,EAAE,IAAI,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,EAAE,GAAG,EAAE,IAAI,CAAC;QAC3C,KAAK,EAAE,CAAC,GAAG,EAAE,IAAI,EAAE,EAAE,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,EAAE,IAAI,CAAC;KAC9C,CAAC;AACJ,CAAC"}
@@ -0,0 +1,10 @@
1
+ import type { RawSearchResult } from '../types.js';
2
+ export interface MergedSearchResult {
3
+ title: string;
4
+ url: string;
5
+ snippet: string;
6
+ relevance_score: number;
7
+ engines: string[];
8
+ }
9
+ export declare function deduplicateResults(results: RawSearchResult[]): MergedSearchResult[];
10
+ //# sourceMappingURL=dedup.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"dedup.d.ts","sourceRoot":"","sources":["../../src/search/dedup.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAEnD,MAAM,WAAW,kBAAkB;IACjC,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC;IAChB,eAAe,EAAE,MAAM,CAAC;IACxB,OAAO,EAAE,MAAM,EAAE,CAAC;CACnB;AAED,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,eAAe,EAAE,GAAG,kBAAkB,EAAE,CAkCnF"}
@@ -0,0 +1,35 @@
1
+ import { normalizeUrl } from '../cache/store.js';
2
+ export function deduplicateResults(results) {
3
+ const urlMap = new Map();
4
+ for (const result of results) {
5
+ let normalized;
6
+ try {
7
+ normalized = normalizeUrl(result.url);
8
+ }
9
+ catch {
10
+ normalized = result.url;
11
+ }
12
+ const existing = urlMap.get(normalized);
13
+ if (existing) {
14
+ if (result.relevance_score > existing.relevance_score) {
15
+ existing.relevance_score = result.relevance_score;
16
+ existing.title = result.title;
17
+ existing.snippet = result.snippet;
18
+ }
19
+ if (!existing.engines.includes(result.engine)) {
20
+ existing.engines.push(result.engine);
21
+ }
22
+ }
23
+ else {
24
+ urlMap.set(normalized, {
25
+ title: result.title,
26
+ url: result.url,
27
+ snippet: result.snippet,
28
+ relevance_score: result.relevance_score,
29
+ engines: [result.engine],
30
+ });
31
+ }
32
+ }
33
+ return [...urlMap.values()].sort((a, b) => b.relevance_score - a.relevance_score);
34
+ }
35
+ //# sourceMappingURL=dedup.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"dedup.js","sourceRoot":"","sources":["../../src/search/dedup.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAWjD,MAAM,UAAU,kBAAkB,CAAC,OAA0B;IAC3D,MAAM,MAAM,GAAG,IAAI,GAAG,EAA8B,CAAC;IAErD,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,UAAkB,CAAC;QACvB,IAAI,CAAC;YACH,UAAU,GAAG,YAAY,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACxC,CAAC;QAAC,MAAM,CAAC;YACP,UAAU,GAAG,MAAM,CAAC,GAAG,CAAC;QAC1B,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAExC,IAAI,QAAQ,EAAE,CAAC;YACb,IAAI,MAAM,CAAC,eAAe,GAAG,QAAQ,CAAC,eAAe,EAAE,CAAC;gBACtD,QAAQ,CAAC,eAAe,GAAG,MAAM,CAAC,eAAe,CAAC;gBAClD,QAAQ,CAAC,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC;gBAC9B,QAAQ,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC;YACpC,CAAC;YACD,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC9C,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACvC,CAAC;QACH,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,GAAG,CAAC,UAAU,EAAE;gBACrB,KAAK,EAAE,MAAM,CAAC,KAAK;gBACnB,GAAG,EAAE,MAAM,CAAC,GAAG;gBACf,OAAO,EAAE,MAAM,CAAC,OAAO;gBACvB,eAAe,EAAE,MAAM,CAAC,eAAe;gBACvC,OAAO,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC;aACzB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,eAAe,GAAG,CAAC,CAAC,eAAe,CAAC,CAAC;AACpF,CAAC"}
@@ -0,0 +1,7 @@
1
+ import type { SearchEngine, SearchEngineOptions, RawSearchResult } from '../../types.js';
2
+ export declare class BingEngine implements SearchEngine {
3
+ name: string;
4
+ search(query: string, options?: SearchEngineOptions): Promise<RawSearchResult[]>;
5
+ parseResults(html: string, maxResults: number): RawSearchResult[];
6
+ }
7
+ //# sourceMappingURL=bing.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bing.d.ts","sourceRoot":"","sources":["../../../src/search/engines/bing.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,YAAY,EAAE,mBAAmB,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AAKzF,qBAAa,UAAW,YAAW,YAAY;IAC7C,IAAI,SAAU;IAER,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,mBAAwB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC;IAuB1F,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,eAAe,EAAE;CA4BlE"}
@@ -0,0 +1,48 @@
1
+ import { parseHTML } from 'linkedom';
2
+ import { createLogger } from '../../logger.js';
3
+ const log = createLogger('search');
4
+ export class BingEngine {
5
+ name = 'bing';
6
+ async search(query, options = {}) {
7
+ const timeoutMs = options.timeoutMs ?? 10000;
8
+ const maxResults = options.maxResults ?? 10;
9
+ const params = new URLSearchParams({ q: query });
10
+ const url = `https://www.bing.com/search?${params}`;
11
+ log.debug('scraping bing', { query });
12
+ const response = await fetch(url, {
13
+ signal: AbortSignal.timeout(timeoutMs),
14
+ headers: {
15
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
16
+ 'Accept-Language': options.language ?? 'en-US,en;q=0.9',
17
+ },
18
+ });
19
+ if (!response.ok)
20
+ throw new Error(`Bing returned ${response.status}`);
21
+ const html = await response.text();
22
+ return this.parseResults(html, maxResults);
23
+ }
24
+ parseResults(html, maxResults) {
25
+ const { document } = parseHTML(html);
26
+ const results = [];
27
+ const items = document.querySelectorAll('li.b_algo');
28
+ const total = Math.min(items.length, maxResults);
29
+ for (let i = 0; i < total; i++) {
30
+ const item = items[i];
31
+ const link = item.querySelector('h2 a');
32
+ const snippetEl = item.querySelector('.b_lineclamp2, .b_lineclamp3, .b_caption p');
33
+ const href = link?.getAttribute('href');
34
+ const title = link?.textContent?.trim();
35
+ if (href && title) {
36
+ results.push({
37
+ title,
38
+ url: href,
39
+ snippet: snippetEl?.textContent?.trim() ?? '',
40
+ relevance_score: 1 - i / Math.max(items.length, 1),
41
+ engine: 'bing',
42
+ });
43
+ }
44
+ }
45
+ return results;
46
+ }
47
+ }
48
+ //# sourceMappingURL=bing.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bing.js","sourceRoot":"","sources":["../../../src/search/engines/bing.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AAErC,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAE/C,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;AAEnC,MAAM,OAAO,UAAU;IACrB,IAAI,GAAG,MAAM,CAAC;IAEd,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,UAA+B,EAAE;QAC3D,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,KAAK,CAAC;QAC7C,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,EAAE,CAAC;QAE5C,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;QACjD,MAAM,GAAG,GAAG,+BAA+B,MAAM,EAAE,CAAC;QAEpD,GAAG,CAAC,KAAK,CAAC,eAAe,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC;QAEtC,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAChC,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,SAAS,CAAC;YACtC,OAAO,EAAE;gBACP,YAAY,EAAE,uHAAuH;gBACrI,iBAAiB,EAAE,OAAO,CAAC,QAAQ,IAAI,gBAAgB;aACxD;SACF,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE;YAAE,MAAM,IAAI,KAAK,CAAC,iBAAiB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QAEtE,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,OAAO,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;IAC7C,CAAC;IAED,YAAY,CAAC,IAAY,EAAE,UAAkB;QAC3C,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QACrC,MAAM,OAAO,GAAsB,EAAE,CAAC;QAEtC,MAAM,KAAK,GAAG,QAAQ,CAAC,gBAAgB,CAAC,WAAW,CAAC,CAAC;QACrD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;QAEjD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/B,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACtB,MAAM,IAAI,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;YACxC,MAAM,SAAS,GAAG,IAAI,CAAC,aAAa,CAAC,4CAA4C,CAAC,CAAC;YAEnF,MAAM,IAAI,GAAG,IAAI,EAAE,YAAY,CAAC,MAAM,CAAC,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;YAExC,IAAI,IAAI,IAAI,KAAK,EAAE,CAAC;gBAClB,OAAO,CAAC,IAAI,CAAC;oBACX,KAAK;oBACL,GAAG,EAAE,IAAI;oBACT,OAAO,EAAE,SAAS,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE;oBAC7C,eAAe,EAAE,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;oBAClD,MAAM,EAAE,MAAM;iBACf,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;CACF"}
@@ -0,0 +1,7 @@
1
+ import type { SearchEngine, SearchEngineOptions, RawSearchResult } from '../../types.js';
2
+ export declare class DuckDuckGoEngine implements SearchEngine {
3
+ name: string;
4
+ search(query: string, options?: SearchEngineOptions): Promise<RawSearchResult[]>;
5
+ parseResults(html: string, maxResults: number): RawSearchResult[];
6
+ }
7
+ //# sourceMappingURL=duckduckgo.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"duckduckgo.d.ts","sourceRoot":"","sources":["../../../src/search/engines/duckduckgo.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,YAAY,EAAE,mBAAmB,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AAWzF,qBAAa,gBAAiB,YAAW,YAAY;IACnD,IAAI,SAAgB;IAEd,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,mBAAwB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC;IAoB1F,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,eAAe,EAAE;CA4BlE"}
@@ -0,0 +1,50 @@
1
+ import { parseHTML } from 'linkedom';
2
+ import { createLogger } from '../../logger.js';
3
+ const log = createLogger('search');
4
+ const USER_AGENTS = [
5
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
6
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
7
+ 'Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0',
8
+ ];
9
+ export class DuckDuckGoEngine {
10
+ name = 'duckduckgo';
11
+ async search(query, options = {}) {
12
+ const timeoutMs = options.timeoutMs ?? 10000;
13
+ const maxResults = options.maxResults ?? 10;
14
+ const params = new URLSearchParams({ q: query });
15
+ const url = `https://lite.duckduckgo.com/lite/?${params}`;
16
+ log.debug('scraping duckduckgo', { query });
17
+ const response = await fetch(url, {
18
+ signal: AbortSignal.timeout(timeoutMs),
19
+ headers: { 'User-Agent': USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)] },
20
+ });
21
+ if (!response.ok)
22
+ throw new Error(`DDG returned ${response.status}`);
23
+ const html = await response.text();
24
+ return this.parseResults(html, maxResults);
25
+ }
26
+ parseResults(html, maxResults) {
27
+ const { document } = parseHTML(html);
28
+ const results = [];
29
+ const links = document.querySelectorAll('a.result-link');
30
+ const snippets = document.querySelectorAll('.result-snippet');
31
+ const total = Math.min(links.length, maxResults);
32
+ for (let i = 0; i < total; i++) {
33
+ const link = links[i];
34
+ const snippet = snippets[i];
35
+ const href = link?.getAttribute('href');
36
+ const title = link?.textContent?.trim();
37
+ if (href && title) {
38
+ results.push({
39
+ title,
40
+ url: href,
41
+ snippet: snippet?.textContent?.trim() ?? '',
42
+ relevance_score: 1 - i / Math.max(links.length, 1),
43
+ engine: 'duckduckgo',
44
+ });
45
+ }
46
+ }
47
+ return results;
48
+ }
49
+ }
50
+ //# sourceMappingURL=duckduckgo.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"duckduckgo.js","sourceRoot":"","sources":["../../../src/search/engines/duckduckgo.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AAErC,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAE/C,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;AAEnC,MAAM,WAAW,GAAG;IAClB,uHAAuH;IACvH,iHAAiH;IACjH,wEAAwE;CACzE,CAAC;AAEF,MAAM,OAAO,gBAAgB;IAC3B,IAAI,GAAG,YAAY,CAAC;IAEpB,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,UAA+B,EAAE;QAC3D,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,KAAK,CAAC;QAC7C,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,EAAE,CAAC;QAE5C,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;QACjD,MAAM,GAAG,GAAG,qCAAqC,MAAM,EAAE,CAAC;QAE1D,GAAG,CAAC,KAAK,CAAC,qBAAqB,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC;QAE5C,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAChC,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,SAAS,CAAC;YACtC,OAAO,EAAE,EAAE,YAAY,EAAE,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,EAAE;SACvF,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE;YAAE,MAAM,IAAI,KAAK,CAAC,gBAAgB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QAErE,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,OAAO,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;IAC7C,CAAC;IAED,YAAY,CAAC,IAAY,EAAE,UAAkB;QAC3C,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QACrC,MAAM,OAAO,GAAsB,EAAE,CAAC;QAEtC,MAAM,KAAK,GAAG,QAAQ,CAAC,gBAAgB,CAAC,eAAe,CAAC,CAAC;QACzD,MAAM,QAAQ,GAAG,QAAQ,CAAC,gBAAgB,CAAC,iBAAiB,CAAC,CAAC;QAE9D,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;QAEjD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/B,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACtB,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;YAC5B,MAAM,IAAI,GAAG,IAAI,EAAE,YAAY,CAAC,MAAM,CAAC,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;YAExC,IAAI,IAAI,IAAI,KAAK,EAAE,CAAC;gBAClB,OAAO,CAAC,IAAI,CAAC;oBACX,KAAK;oBACL,GAAG,EAAE,IAAI;oBACT,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE;oBAC3C,eAAe,EAAE,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;oBAClD,MAAM,EAAE,YAAY;iBACrB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;CACF"}
@@ -0,0 +1,7 @@
1
+ import type { SearchEngine, SearchEngineOptions, RawSearchResult } from '../../types.js';
2
+ export declare class StartpageEngine implements SearchEngine {
3
+ name: string;
4
+ search(query: string, options?: SearchEngineOptions): Promise<RawSearchResult[]>;
5
+ parseResults(html: string, maxResults: number): RawSearchResult[];
6
+ }
7
+ //# sourceMappingURL=startpage.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"startpage.d.ts","sourceRoot":"","sources":["../../../src/search/engines/startpage.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,YAAY,EAAE,mBAAmB,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AAKzF,qBAAa,eAAgB,YAAW,YAAY;IAClD,IAAI,SAAe;IAEb,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,mBAAwB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC;IAyB1F,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,eAAe,EAAE;CA4BlE"}