@staticn0va/wigolo 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. package/LICENSE +74 -0
  2. package/README.md +272 -0
  3. package/dist/cache/db.d.ts +5 -0
  4. package/dist/cache/db.d.ts.map +1 -0
  5. package/dist/cache/db.js +97 -0
  6. package/dist/cache/db.js.map +1 -0
  7. package/dist/cache/store.d.ts +26 -0
  8. package/dist/cache/store.d.ts.map +1 -0
  9. package/dist/cache/store.js +214 -0
  10. package/dist/cache/store.js.map +1 -0
  11. package/dist/cli/daemon.d.ts +2 -0
  12. package/dist/cli/daemon.d.ts.map +1 -0
  13. package/dist/cli/daemon.js +5 -0
  14. package/dist/cli/daemon.js.map +1 -0
  15. package/dist/cli/health.d.ts +2 -0
  16. package/dist/cli/health.d.ts.map +1 -0
  17. package/dist/cli/health.js +5 -0
  18. package/dist/cli/health.js.map +1 -0
  19. package/dist/cli/index.d.ts +7 -0
  20. package/dist/cli/index.d.ts.map +1 -0
  21. package/dist/cli/index.js +9 -0
  22. package/dist/cli/index.js.map +1 -0
  23. package/dist/cli/warmup.d.ts +11 -0
  24. package/dist/cli/warmup.d.ts.map +1 -0
  25. package/dist/cli/warmup.js +107 -0
  26. package/dist/cli/warmup.js.map +1 -0
  27. package/dist/config.d.ts +41 -0
  28. package/dist/config.d.ts.map +1 -0
  29. package/dist/config.js +66 -0
  30. package/dist/config.js.map +1 -0
  31. package/dist/crawl/crawler.d.ts +18 -0
  32. package/dist/crawl/crawler.d.ts.map +1 -0
  33. package/dist/crawl/crawler.js +228 -0
  34. package/dist/crawl/crawler.js.map +1 -0
  35. package/dist/crawl/dedup.d.ts +15 -0
  36. package/dist/crawl/dedup.d.ts.map +1 -0
  37. package/dist/crawl/dedup.js +93 -0
  38. package/dist/crawl/dedup.js.map +1 -0
  39. package/dist/crawl/mapper.d.ts +17 -0
  40. package/dist/crawl/mapper.d.ts.map +1 -0
  41. package/dist/crawl/mapper.js +178 -0
  42. package/dist/crawl/mapper.js.map +1 -0
  43. package/dist/crawl/rate-limiter.d.ts +10 -0
  44. package/dist/crawl/rate-limiter.d.ts.map +1 -0
  45. package/dist/crawl/rate-limiter.js +72 -0
  46. package/dist/crawl/rate-limiter.js.map +1 -0
  47. package/dist/crawl/robots.d.ts +9 -0
  48. package/dist/crawl/robots.d.ts.map +1 -0
  49. package/dist/crawl/robots.js +63 -0
  50. package/dist/crawl/robots.js.map +1 -0
  51. package/dist/crawl/sitemap.d.ts +4 -0
  52. package/dist/crawl/sitemap.d.ts.map +1 -0
  53. package/dist/crawl/sitemap.js +38 -0
  54. package/dist/crawl/sitemap.js.map +1 -0
  55. package/dist/crawl/url-utils.d.ts +3 -0
  56. package/dist/crawl/url-utils.d.ts.map +1 -0
  57. package/dist/crawl/url-utils.js +41 -0
  58. package/dist/crawl/url-utils.js.map +1 -0
  59. package/dist/extraction/defuddle.d.ts +3 -0
  60. package/dist/extraction/defuddle.d.ts.map +1 -0
  61. package/dist/extraction/defuddle.js +26 -0
  62. package/dist/extraction/defuddle.js.map +1 -0
  63. package/dist/extraction/extract.d.ts +5 -0
  64. package/dist/extraction/extract.d.ts.map +1 -0
  65. package/dist/extraction/extract.js +83 -0
  66. package/dist/extraction/extract.js.map +1 -0
  67. package/dist/extraction/jsonld.d.ts +4 -0
  68. package/dist/extraction/jsonld.d.ts.map +1 -0
  69. package/dist/extraction/jsonld.js +64 -0
  70. package/dist/extraction/jsonld.js.map +1 -0
  71. package/dist/extraction/markdown.d.ts +10 -0
  72. package/dist/extraction/markdown.d.ts.map +1 -0
  73. package/dist/extraction/markdown.js +107 -0
  74. package/dist/extraction/markdown.js.map +1 -0
  75. package/dist/extraction/pipeline.d.ts +11 -0
  76. package/dist/extraction/pipeline.d.ts.map +1 -0
  77. package/dist/extraction/pipeline.js +95 -0
  78. package/dist/extraction/pipeline.js.map +1 -0
  79. package/dist/extraction/readability.d.ts +3 -0
  80. package/dist/extraction/readability.d.ts.map +1 -0
  81. package/dist/extraction/readability.js +32 -0
  82. package/dist/extraction/readability.js.map +1 -0
  83. package/dist/extraction/schema.d.ts +7 -0
  84. package/dist/extraction/schema.d.ts.map +1 -0
  85. package/dist/extraction/schema.js +86 -0
  86. package/dist/extraction/schema.js.map +1 -0
  87. package/dist/extraction/site-extractors/docs-generic.d.ts +3 -0
  88. package/dist/extraction/site-extractors/docs-generic.d.ts.map +1 -0
  89. package/dist/extraction/site-extractors/docs-generic.js +104 -0
  90. package/dist/extraction/site-extractors/docs-generic.js.map +1 -0
  91. package/dist/extraction/site-extractors/github.d.ts +3 -0
  92. package/dist/extraction/site-extractors/github.d.ts.map +1 -0
  93. package/dist/extraction/site-extractors/github.js +107 -0
  94. package/dist/extraction/site-extractors/github.js.map +1 -0
  95. package/dist/extraction/site-extractors/mdn.d.ts +3 -0
  96. package/dist/extraction/site-extractors/mdn.d.ts.map +1 -0
  97. package/dist/extraction/site-extractors/mdn.js +58 -0
  98. package/dist/extraction/site-extractors/mdn.js.map +1 -0
  99. package/dist/extraction/site-extractors/stackoverflow.d.ts +3 -0
  100. package/dist/extraction/site-extractors/stackoverflow.d.ts.map +1 -0
  101. package/dist/extraction/site-extractors/stackoverflow.js +88 -0
  102. package/dist/extraction/site-extractors/stackoverflow.js.map +1 -0
  103. package/dist/extraction/trafilatura.d.ts +6 -0
  104. package/dist/extraction/trafilatura.d.ts.map +1 -0
  105. package/dist/extraction/trafilatura.js +105 -0
  106. package/dist/extraction/trafilatura.js.map +1 -0
  107. package/dist/fetch/auth.d.ts +8 -0
  108. package/dist/fetch/auth.d.ts.map +1 -0
  109. package/dist/fetch/auth.js +32 -0
  110. package/dist/fetch/auth.js.map +1 -0
  111. package/dist/fetch/browser-pool.d.ts +28 -0
  112. package/dist/fetch/browser-pool.d.ts.map +1 -0
  113. package/dist/fetch/browser-pool.js +138 -0
  114. package/dist/fetch/browser-pool.js.map +1 -0
  115. package/dist/fetch/content-check.d.ts +2 -0
  116. package/dist/fetch/content-check.d.ts.map +1 -0
  117. package/dist/fetch/content-check.js +62 -0
  118. package/dist/fetch/content-check.js.map +1 -0
  119. package/dist/fetch/http-client.d.ts +15 -0
  120. package/dist/fetch/http-client.d.ts.map +1 -0
  121. package/dist/fetch/http-client.js +146 -0
  122. package/dist/fetch/http-client.js.map +1 -0
  123. package/dist/fetch/router.d.ts +45 -0
  124. package/dist/fetch/router.d.ts.map +1 -0
  125. package/dist/fetch/router.js +89 -0
  126. package/dist/fetch/router.js.map +1 -0
  127. package/dist/index.d.ts +3 -0
  128. package/dist/index.d.ts.map +1 -0
  129. package/dist/index.js +22 -0
  130. package/dist/index.js.map +1 -0
  131. package/dist/logger.d.ts +10 -0
  132. package/dist/logger.d.ts.map +1 -0
  133. package/dist/logger.js +39 -0
  134. package/dist/logger.js.map +1 -0
  135. package/dist/search/dedup.d.ts +10 -0
  136. package/dist/search/dedup.d.ts.map +1 -0
  137. package/dist/search/dedup.js +35 -0
  138. package/dist/search/dedup.js.map +1 -0
  139. package/dist/search/engines/bing.d.ts +7 -0
  140. package/dist/search/engines/bing.d.ts.map +1 -0
  141. package/dist/search/engines/bing.js +48 -0
  142. package/dist/search/engines/bing.js.map +1 -0
  143. package/dist/search/engines/duckduckgo.d.ts +7 -0
  144. package/dist/search/engines/duckduckgo.d.ts.map +1 -0
  145. package/dist/search/engines/duckduckgo.js +50 -0
  146. package/dist/search/engines/duckduckgo.js.map +1 -0
  147. package/dist/search/engines/startpage.d.ts +7 -0
  148. package/dist/search/engines/startpage.d.ts.map +1 -0
  149. package/dist/search/engines/startpage.js +50 -0
  150. package/dist/search/engines/startpage.js.map +1 -0
  151. package/dist/search/filters.d.ts +16 -0
  152. package/dist/search/filters.d.ts.map +1 -0
  153. package/dist/search/filters.js +63 -0
  154. package/dist/search/filters.js.map +1 -0
  155. package/dist/search/flashrank.d.ts +12 -0
  156. package/dist/search/flashrank.d.ts.map +1 -0
  157. package/dist/search/flashrank.js +63 -0
  158. package/dist/search/flashrank.js.map +1 -0
  159. package/dist/search/query.d.ts +2 -0
  160. package/dist/search/query.d.ts.map +1 -0
  161. package/dist/search/query.js +41 -0
  162. package/dist/search/query.js.map +1 -0
  163. package/dist/search/rerank.d.ts +3 -0
  164. package/dist/search/rerank.d.ts.map +1 -0
  165. package/dist/search/rerank.js +40 -0
  166. package/dist/search/rerank.js.map +1 -0
  167. package/dist/search/searxng.d.ts +8 -0
  168. package/dist/search/searxng.d.ts.map +1 -0
  169. package/dist/search/searxng.js +87 -0
  170. package/dist/search/searxng.js.map +1 -0
  171. package/dist/search/validator.d.ts +6 -0
  172. package/dist/search/validator.d.ts.map +1 -0
  173. package/dist/search/validator.js +35 -0
  174. package/dist/search/validator.js.map +1 -0
  175. package/dist/searxng/bootstrap.d.ts +18 -0
  176. package/dist/searxng/bootstrap.d.ts.map +1 -0
  177. package/dist/searxng/bootstrap.js +136 -0
  178. package/dist/searxng/bootstrap.js.map +1 -0
  179. package/dist/searxng/docker.d.ts +9 -0
  180. package/dist/searxng/docker.d.ts.map +1 -0
  181. package/dist/searxng/docker.js +67 -0
  182. package/dist/searxng/docker.js.map +1 -0
  183. package/dist/searxng/process.d.ts +23 -0
  184. package/dist/searxng/process.d.ts.map +1 -0
  185. package/dist/searxng/process.js +188 -0
  186. package/dist/searxng/process.js.map +1 -0
  187. package/dist/server.d.ts +2 -0
  188. package/dist/server.d.ts.map +1 -0
  189. package/dist/server.js +311 -0
  190. package/dist/server.js.map +1 -0
  191. package/dist/tools/cache.d.ts +3 -0
  192. package/dist/tools/cache.d.ts.map +1 -0
  193. package/dist/tools/cache.js +50 -0
  194. package/dist/tools/cache.js.map +1 -0
  195. package/dist/tools/crawl.d.ts +6 -0
  196. package/dist/tools/crawl.d.ts.map +1 -0
  197. package/dist/tools/crawl.js +97 -0
  198. package/dist/tools/crawl.js.map +1 -0
  199. package/dist/tools/extract.d.ts +4 -0
  200. package/dist/tools/extract.d.ts.map +1 -0
  201. package/dist/tools/extract.js +69 -0
  202. package/dist/tools/extract.js.map +1 -0
  203. package/dist/tools/fetch.d.ts +4 -0
  204. package/dist/tools/fetch.d.ts.map +1 -0
  205. package/dist/tools/fetch.js +76 -0
  206. package/dist/tools/fetch.js.map +1 -0
  207. package/dist/tools/search.d.ts +4 -0
  208. package/dist/tools/search.d.ts.map +1 -0
  209. package/dist/tools/search.js +160 -0
  210. package/dist/tools/search.js.map +1 -0
  211. package/dist/types.d.ts +222 -0
  212. package/dist/types.d.ts.map +1 -0
  213. package/dist/types.js +2 -0
  214. package/dist/types.js.map +1 -0
  215. package/package.json +61 -0
@@ -0,0 +1,50 @@
1
+ import { parseHTML } from 'linkedom';
2
+ import { createLogger } from '../../logger.js';
3
+ const log = createLogger('search');
4
+ export class StartpageEngine {
5
+ name = 'startpage';
6
+ async search(query, options = {}) {
7
+ const timeoutMs = options.timeoutMs ?? 10000;
8
+ const maxResults = options.maxResults ?? 10;
9
+ const params = new URLSearchParams({ query, cat: 'web' });
10
+ const url = `https://www.startpage.com/sp/search?${params}`;
11
+ log.debug('scraping startpage', { query });
12
+ const response = await fetch(url, {
13
+ method: 'POST',
14
+ headers: {
15
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
16
+ 'Content-Type': 'application/x-www-form-urlencoded',
17
+ },
18
+ body: params.toString(),
19
+ signal: AbortSignal.timeout(timeoutMs),
20
+ });
21
+ if (!response.ok)
22
+ throw new Error(`Startpage returned ${response.status}`);
23
+ const html = await response.text();
24
+ return this.parseResults(html, maxResults);
25
+ }
26
+ parseResults(html, maxResults) {
27
+ const { document } = parseHTML(html);
28
+ const results = [];
29
+ const items = document.querySelectorAll('.w-gl__result');
30
+ const total = Math.min(items.length, maxResults);
31
+ for (let i = 0; i < total; i++) {
32
+ const item = items[i];
33
+ const link = item.querySelector('.w-gl__result-title, a.w-gl__result-url');
34
+ const snippetEl = item.querySelector('.w-gl__description');
35
+ const href = link?.getAttribute('href');
36
+ const title = link?.textContent?.trim();
37
+ if (href && title) {
38
+ results.push({
39
+ title,
40
+ url: href,
41
+ snippet: snippetEl?.textContent?.trim() ?? '',
42
+ relevance_score: 1 - i / Math.max(items.length, 1),
43
+ engine: 'startpage',
44
+ });
45
+ }
46
+ }
47
+ return results;
48
+ }
49
+ }
50
+ //# sourceMappingURL=startpage.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"startpage.js","sourceRoot":"","sources":["../../../src/search/engines/startpage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AAErC,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAE/C,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;AAEnC,MAAM,OAAO,eAAe;IAC1B,IAAI,GAAG,WAAW,CAAC;IAEnB,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,UAA+B,EAAE;QAC3D,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,KAAK,CAAC;QAC7C,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,EAAE,CAAC;QAE5C,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC;QAC1D,MAAM,GAAG,GAAG,uCAAuC,MAAM,EAAE,CAAC;QAE5D,GAAG,CAAC,KAAK,CAAC,oBAAoB,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC;QAE3C,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAChC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,YAAY,EAAE,uHAAuH;gBACrI,cAAc,EAAE,mCAAmC;aACpD;YACD,IAAI,EAAE,MAAM,CAAC,QAAQ,EAAE;YACvB,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,SAAS,CAAC;SACvC,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE;YAAE,MAAM,IAAI,KAAK,CAAC,sBAAsB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QAE3E,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,OAAO,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;IAC7C,CAAC;IAED,YAAY,CAAC,IAAY,EAAE,UAAkB;QAC3C,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QACrC,MAAM,OAAO,GAAsB,EAAE,CAAC;QAEtC,MAAM,KAAK,GAAG,QAAQ,CAAC,gBAAgB,CAAC,eAAe,CAAC,CAAC;QACzD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;QAEjD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/B,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACtB,MAAM,IAAI,GAAG,IAAI,CAAC,aAAa,CAAC,yCAAyC,CAAC,CAAC;YAC3E,MAAM,SAAS,GAAG,IAAI,CAAC,aAAa,CAAC,oBAAoB,CAAC,CAAC;YAE3D,MAAM,IAAI,GAAG,IAAI,EAAE,YAAY,CAAC,MAAM,CAAC,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;YAExC,IAAI,IAAI,IAAI,KAAK,EAAE,CAAC;gBAClB,OAAO,CAAC,IAAI,CAAC;oBACX,KAAK;oBACL,GAAG,EAAE,IAAI;oBACT,OAAO,EAAE,SAAS,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE;oBAC7C,eAAe,EAAE,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;oBAClD,MAAM,EAAE,WAAW;iBACpB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;CACF"}
@@ -0,0 +1,16 @@
1
+ export declare function filterByDomains<T extends {
2
+ url: string;
3
+ }>(results: T[], includeDomains?: string[], excludeDomains?: string[]): T[];
4
+ export declare function filterByDateRange<T>(results: T[], fromDate?: string, toDate?: string): T[];
5
+ export declare function filterByCategory<T>(results: T[], _category?: string): T[];
6
+ export interface FilterOptions {
7
+ includeDomains?: string[];
8
+ excludeDomains?: string[];
9
+ fromDate?: string;
10
+ toDate?: string;
11
+ category?: string;
12
+ }
13
+ export declare function applyAllFilters<T extends {
14
+ url: string;
15
+ }>(results: T[], options: FilterOptions): T[];
16
+ //# sourceMappingURL=filters.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"filters.d.ts","sourceRoot":"","sources":["../../src/search/filters.ts"],"names":[],"mappings":"AAiBA,wBAAgB,eAAe,CAAC,CAAC,SAAS;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,EACvD,OAAO,EAAE,CAAC,EAAE,EACZ,cAAc,CAAC,EAAE,MAAM,EAAE,EACzB,cAAc,CAAC,EAAE,MAAM,EAAE,GACxB,CAAC,EAAE,CAgBL;AAOD,wBAAgB,iBAAiB,CAAC,CAAC,EACjC,OAAO,EAAE,CAAC,EAAE,EACZ,QAAQ,CAAC,EAAE,MAAM,EACjB,MAAM,CAAC,EAAE,MAAM,GACd,CAAC,EAAE,CAYL;AAED,wBAAgB,gBAAgB,CAAC,CAAC,EAChC,OAAO,EAAE,CAAC,EAAE,EACZ,SAAS,CAAC,EAAE,MAAM,GACjB,CAAC,EAAE,CAGL;AAED,MAAM,WAAW,aAAa;IAC5B,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,wBAAgB,eAAe,CAAC,CAAC,SAAS;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,EACvD,OAAO,EAAE,CAAC,EAAE,EACZ,OAAO,EAAE,aAAa,GACrB,CAAC,EAAE,CAKL"}
@@ -0,0 +1,63 @@
1
+ function getDomain(url) {
2
+ try {
3
+ return new URL(url).hostname.toLowerCase();
4
+ }
5
+ catch {
6
+ return '';
7
+ }
8
+ }
9
+ function normalizeDomain(domain) {
10
+ return domain.replace(/\/+$/, '').toLowerCase();
11
+ }
12
+ function domainMatches(hostname, domain) {
13
+ const normalized = normalizeDomain(domain);
14
+ return hostname === normalized || hostname.endsWith('.' + normalized);
15
+ }
16
+ export function filterByDomains(results, includeDomains, excludeDomains) {
17
+ if (!includeDomains?.length && !excludeDomains?.length)
18
+ return results;
19
+ return results.filter((r) => {
20
+ const hostname = getDomain(r.url);
21
+ if (!hostname) {
22
+ return !includeDomains?.length;
23
+ }
24
+ if (includeDomains?.length) {
25
+ if (!includeDomains.some((d) => domainMatches(hostname, d)))
26
+ return false;
27
+ }
28
+ if (excludeDomains?.length) {
29
+ if (excludeDomains.some((d) => domainMatches(hostname, d)))
30
+ return false;
31
+ }
32
+ return true;
33
+ });
34
+ }
35
+ function isValidIsoDate(dateStr) {
36
+ const parsed = new Date(dateStr);
37
+ return !isNaN(parsed.getTime()) && /^\d{4}-\d{2}-\d{2}$/.test(dateStr);
38
+ }
39
+ export function filterByDateRange(results, fromDate, toDate) {
40
+ if (!fromDate && !toDate)
41
+ return results;
42
+ if (fromDate && !isValidIsoDate(fromDate))
43
+ return results;
44
+ if (toDate && !isValidIsoDate(toDate))
45
+ return results;
46
+ if (fromDate && toDate && new Date(fromDate) > new Date(toDate))
47
+ return results;
48
+ // Date filtering is best-effort on direct scraping engines.
49
+ // SearXNG handles dates natively via time_range. For fallback engines,
50
+ // snippet text doesn't reliably contain dates, so we keep all results.
51
+ return results;
52
+ }
53
+ export function filterByCategory(results, _category) {
54
+ // Category filtering is handled by SearXNG natively.
55
+ return results;
56
+ }
57
+ export function applyAllFilters(results, options) {
58
+ let filtered = filterByDomains(results, options.includeDomains, options.excludeDomains);
59
+ filtered = filterByDateRange(filtered, options.fromDate, options.toDate);
60
+ filtered = filterByCategory(filtered, options.category);
61
+ return filtered;
62
+ }
63
+ //# sourceMappingURL=filters.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"filters.js","sourceRoot":"","sources":["../../src/search/filters.ts"],"names":[],"mappings":"AAAA,SAAS,SAAS,CAAC,GAAW;IAC5B,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;IAC7C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED,SAAS,eAAe,CAAC,MAAc;IACrC,OAAO,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;AAClD,CAAC;AAED,SAAS,aAAa,CAAC,QAAgB,EAAE,MAAc;IACrD,MAAM,UAAU,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC;IAC3C,OAAO,QAAQ,KAAK,UAAU,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,GAAG,UAAU,CAAC,CAAC;AACxE,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,OAAY,EACZ,cAAyB,EACzB,cAAyB;IAEzB,IAAI,CAAC,cAAc,EAAE,MAAM,IAAI,CAAC,cAAc,EAAE,MAAM;QAAE,OAAO,OAAO,CAAC;IAEvE,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;QAC1B,MAAM,QAAQ,GAAG,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAClC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,OAAO,CAAC,cAAc,EAAE,MAAM,CAAC;QACjC,CAAC;QACD,IAAI,cAAc,EAAE,MAAM,EAAE,CAAC;YAC3B,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;gBAAE,OAAO,KAAK,CAAC;QAC5E,CAAC;QACD,IAAI,cAAc,EAAE,MAAM,EAAE,CAAC;YAC3B,IAAI,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;gBAAE,OAAO,KAAK,CAAC;QAC3E,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;AACL,CAAC;AAED,SAAS,cAAc,CAAC,OAAe;IACrC,MAAM,MAAM,GAAG,IAAI,IAAI,CAAC,OAAO,CAAC,CAAC;IACjC,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC,IAAI,qBAAqB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;AACzE,CAAC;AAED,MAAM,UAAU,iBAAiB,CAC/B,OAAY,EACZ,QAAiB,EACjB,MAAe;IAEf,IAAI,CAAC,QAAQ,IAAI,CAAC,MAAM;QAAE,OAAO,OAAO,CAAC;IAEzC,IAAI,QAAQ,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC;QAAE,OAAO,OAAO,CAAC;IAC1D,IAAI,MAAM,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC;QAAE,OAAO,OAAO,CAAC;IAEtD,IAAI,QAAQ,IAAI,MAAM,IAAI,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,IAAI,IAAI,CAAC,MAAM,CAAC;QAAE,OAAO,OAAO,CAAC;IAEhF,4DAA4D;IAC5D,uEAAuE;IACvE,uEAAuE;IACvE,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,gBAAgB,CAC9B,OAAY,EACZ,SAAkB;IAElB,qDAAqD;IACrD,OAAO,OAAO,CAAC;AACjB,CAAC;AAUD,MAAM,UAAU,eAAe,CAC7B,OAAY,EACZ,OAAsB;IAEtB,IAAI,QAAQ,GAAG,eAAe,CAAC,OAAO,EAAE,OAAO,CAAC,cAAc,EAAE,OAAO,CAAC,cAAc,CAAC,CAAC;IACxF,QAAQ,GAAG,iBAAiB,CAAC,QAAQ,EAAE,OAAO,CAAC,QAAQ,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;IACzE,QAAQ,GAAG,gBAAgB,CAAC,QAAQ,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;IACxD,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -0,0 +1,12 @@
1
+ export interface RerankPassage {
2
+ text: string;
3
+ index: number;
4
+ }
5
+ export interface RerankResult {
6
+ index: number;
7
+ score: number;
8
+ }
9
+ export declare function isFlashRankAvailable(): Promise<boolean>;
10
+ export declare function resetAvailabilityCache(): void;
11
+ export declare function flashRankRerank(query: string, passages: RerankPassage[], model?: string): Promise<RerankResult[] | null>;
12
+ //# sourceMappingURL=flashrank.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"flashrank.d.ts","sourceRoot":"","sources":["../../src/search/flashrank.ts"],"names":[],"mappings":"AAQA,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;CACf;AAkBD,wBAAsB,oBAAoB,IAAI,OAAO,CAAC,OAAO,CAAC,CAS7D;AAED,wBAAgB,sBAAsB,IAAI,IAAI,CAE7C;AAMD,wBAAsB,eAAe,CACnC,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,aAAa,EAAE,EACzB,KAAK,CAAC,EAAE,MAAM,GACb,OAAO,CAAC,YAAY,EAAE,GAAG,IAAI,CAAC,CA0BhC"}
@@ -0,0 +1,63 @@
1
+ import { execFile as execFileCb } from 'node:child_process';
2
+ import { promisify } from 'node:util';
3
+ import { runPythonWithStdin } from '../extraction/trafilatura.js';
4
+ import { createLogger } from '../logger.js';
5
+ const execFileAsync = promisify(execFileCb);
6
+ const log = createLogger('search');
7
+ const FLASHRANK_SCRIPT = `
8
+ import sys, json
9
+ from flashrank import Ranker, RerankRequest
10
+ data = json.loads(sys.stdin.read())
11
+ ranker = Ranker(model_name=data.get('model', 'ms-marco-MiniLM-L-12-v2'))
12
+ passages = [{"id": p["index"], "text": p["text"]} for p in data["passages"]]
13
+ req = RerankRequest(query=data["query"], passages=passages)
14
+ results = ranker.rerank(req)
15
+ print(json.dumps([{"index": int(r.metadata["id"] if hasattr(r, "metadata") else r["id"]), "score": float(r.score if hasattr(r, "score") else r["score"])} for r in results]))
16
+ `.trim();
17
+ const SUBPROCESS_TIMEOUT_MS = 30000;
18
+ const AVAILABILITY_CHECK_TIMEOUT_MS = 5000;
19
+ let availableCache = null;
20
+ export async function isFlashRankAvailable() {
21
+ if (availableCache !== null)
22
+ return availableCache;
23
+ try {
24
+ await execFileAsync('python3', ['-c', 'import flashrank'], { timeout: AVAILABILITY_CHECK_TIMEOUT_MS });
25
+ availableCache = true;
26
+ }
27
+ catch {
28
+ availableCache = false;
29
+ }
30
+ return availableCache;
31
+ }
32
+ export function resetAvailabilityCache() {
33
+ availableCache = null;
34
+ }
35
+ function clampScore(score) {
36
+ return Math.max(0, Math.min(1, score));
37
+ }
38
+ export async function flashRankRerank(query, passages, model) {
39
+ if (passages.length === 0)
40
+ return [];
41
+ try {
42
+ const input = JSON.stringify({
43
+ query,
44
+ passages,
45
+ model: model ?? 'ms-marco-MiniLM-L-12-v2',
46
+ });
47
+ const stdout = await runPythonWithStdin(FLASHRANK_SCRIPT, input, SUBPROCESS_TIMEOUT_MS);
48
+ const parsed = JSON.parse(stdout.trim());
49
+ if (!Array.isArray(parsed)) {
50
+ log.warn('FlashRank returned non-array output', { output: stdout.slice(0, 200) });
51
+ return null;
52
+ }
53
+ return parsed.map((r) => ({
54
+ index: r.index,
55
+ score: clampScore(r.score),
56
+ }));
57
+ }
58
+ catch (err) {
59
+ log.warn('FlashRank reranking failed, falling back to position scores', { error: String(err) });
60
+ return null;
61
+ }
62
+ }
63
+ //# sourceMappingURL=flashrank.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"flashrank.js","sourceRoot":"","sources":["../../src/search/flashrank.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,IAAI,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAC5D,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AACtC,OAAO,EAAE,kBAAkB,EAAE,MAAM,8BAA8B,CAAC;AAClE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,MAAM,aAAa,GAAG,SAAS,CAAC,UAAU,CAAC,CAAC;AAC5C,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;AAYnC,MAAM,gBAAgB,GAAG;;;;;;;;;CASxB,CAAC,IAAI,EAAE,CAAC;AAET,MAAM,qBAAqB,GAAG,KAAK,CAAC;AACpC,MAAM,6BAA6B,GAAG,IAAI,CAAC;AAE3C,IAAI,cAAc,GAAmB,IAAI,CAAC;AAE1C,MAAM,CAAC,KAAK,UAAU,oBAAoB;IACxC,IAAI,cAAc,KAAK,IAAI;QAAE,OAAO,cAAc,CAAC;IACnD,IAAI,CAAC;QACH,MAAM,aAAa,CAAC,SAAS,EAAE,CAAC,IAAI,EAAE,kBAAkB,CAAC,EAAE,EAAE,OAAO,EAAE,6BAA6B,EAAE,CAAC,CAAC;QACvG,cAAc,GAAG,IAAI,CAAC;IACxB,CAAC;IAAC,MAAM,CAAC;QACP,cAAc,GAAG,KAAK,CAAC;IACzB,CAAC;IACD,OAAO,cAAc,CAAC;AACxB,CAAC;AAED,MAAM,UAAU,sBAAsB;IACpC,cAAc,GAAG,IAAI,CAAC;AACxB,CAAC;AAED,SAAS,UAAU,CAAC,KAAa;IAC/B,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC;AACzC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,KAAa,EACb,QAAyB,EACzB,KAAc;IAEd,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAErC,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC;YAC3B,KAAK;YACL,QAAQ;YACR,KAAK,EAAE,KAAK,IAAI,yBAAyB;SAC1C,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,kBAAkB,CAAC,gBAAgB,EAAE,KAAK,EAAE,qBAAqB,CAAC,CAAC;QACxF,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;QAEzC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;YAC3B,GAAG,CAAC,IAAI,CAAC,qCAAqC,EAAE,EAAE,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;YAClF,OAAO,IAAI,CAAC;QACd,CAAC;QAED,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAmC,EAAE,EAAE,CAAC,CAAC;YAC1D,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,KAAK,EAAE,UAAU,CAAC,CAAC,CAAC,KAAK,CAAC;SAC3B,CAAC,CAAC,CAAC;IACN,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,IAAI,CAAC,6DAA6D,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAChG,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC"}
@@ -0,0 +1,2 @@
1
+ export declare function decomposeQuery(query: string): string[];
2
+ //# sourceMappingURL=query.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"query.d.ts","sourceRoot":"","sources":["../../src/search/query.ts"],"names":[],"mappings":"AAGA,wBAAgB,cAAc,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAsBtD"}
@@ -0,0 +1,41 @@
1
+ const MAX_QUERY_LENGTH = 200;
2
+ const CLAUSE_SEPARATORS = ['. ', '? ', '! ', '; ', ', and ', ', or ', ' - ', ' — '];
3
+ export function decomposeQuery(query) {
4
+ if (query.length <= MAX_QUERY_LENGTH) {
5
+ return [query];
6
+ }
7
+ let parts = [query];
8
+ for (const sep of CLAUSE_SEPARATORS) {
9
+ if (parts.every(p => p.length <= MAX_QUERY_LENGTH))
10
+ break;
11
+ parts = parts.flatMap(p => {
12
+ if (p.length <= MAX_QUERY_LENGTH)
13
+ return [p];
14
+ return p.split(sep).map(s => s.trim()).filter(Boolean);
15
+ });
16
+ }
17
+ parts = parts.flatMap(p => {
18
+ if (p.length <= MAX_QUERY_LENGTH)
19
+ return [p];
20
+ return splitAtWordBoundary(p);
21
+ });
22
+ return parts;
23
+ }
24
+ function splitAtWordBoundary(text) {
25
+ const chunks = [];
26
+ const words = text.split(' ');
27
+ let current = '';
28
+ for (const word of words) {
29
+ if (current.length + word.length + 1 > MAX_QUERY_LENGTH && current) {
30
+ chunks.push(current);
31
+ current = word;
32
+ }
33
+ else {
34
+ current = current ? `${current} ${word}` : word;
35
+ }
36
+ }
37
+ if (current)
38
+ chunks.push(current);
39
+ return chunks;
40
+ }
41
+ //# sourceMappingURL=query.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"query.js","sourceRoot":"","sources":["../../src/search/query.ts"],"names":[],"mappings":"AAAA,MAAM,gBAAgB,GAAG,GAAG,CAAC;AAC7B,MAAM,iBAAiB,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;AAEpF,MAAM,UAAU,cAAc,CAAC,KAAa;IAC1C,IAAI,KAAK,CAAC,MAAM,IAAI,gBAAgB,EAAE,CAAC;QACrC,OAAO,CAAC,KAAK,CAAC,CAAC;IACjB,CAAC;IAED,IAAI,KAAK,GAAG,CAAC,KAAK,CAAC,CAAC;IAEpB,KAAK,MAAM,GAAG,IAAI,iBAAiB,EAAE,CAAC;QACpC,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,gBAAgB,CAAC;YAAE,MAAM;QAE1D,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;YACxB,IAAI,CAAC,CAAC,MAAM,IAAI,gBAAgB;gBAAE,OAAO,CAAC,CAAC,CAAC,CAAC;YAC7C,OAAO,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACzD,CAAC,CAAC,CAAC;IACL,CAAC;IAED,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;QACxB,IAAI,CAAC,CAAC,MAAM,IAAI,gBAAgB;YAAE,OAAO,CAAC,CAAC,CAAC,CAAC;QAC7C,OAAO,mBAAmB,CAAC,CAAC,CAAC,CAAC;IAChC,CAAC,CAAC,CAAC;IAEH,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,mBAAmB,CAAC,IAAY;IACvC,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC9B,IAAI,OAAO,GAAG,EAAE,CAAC;IAEjB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,GAAG,gBAAgB,IAAI,OAAO,EAAE,CAAC;YACnE,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACrB,OAAO,GAAG,IAAI,CAAC;QACjB,CAAC;aAAM,CAAC;YACN,OAAO,GAAG,OAAO,CAAC,CAAC,CAAC,GAAG,OAAO,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QAClD,CAAC;IACH,CAAC;IACD,IAAI,OAAO;QAAE,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAElC,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { MergedSearchResult } from './dedup.js';
2
+ export declare function rerankResults(query: string, results: MergedSearchResult[]): Promise<MergedSearchResult[]>;
3
+ //# sourceMappingURL=rerank.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"rerank.d.ts","sourceRoot":"","sources":["../../src/search/rerank.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAOrD,wBAAsB,aAAa,CACjC,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,kBAAkB,EAAE,GAC5B,OAAO,CAAC,kBAAkB,EAAE,CAAC,CAgC/B"}
@@ -0,0 +1,40 @@
1
+ import { flashRankRerank, isFlashRankAvailable } from './flashrank.js';
2
+ import { getConfig } from '../config.js';
3
+ import { createLogger } from '../logger.js';
4
+ const log = createLogger('search');
5
+ export async function rerankResults(query, results) {
6
+ const config = getConfig();
7
+ if (results.length === 0)
8
+ return results;
9
+ if (config.reranker === 'flashrank') {
10
+ if (await isFlashRankAvailable()) {
11
+ const passages = results.map((r, i) => ({
12
+ text: `${r.title}\n${r.snippet}`,
13
+ index: i,
14
+ }));
15
+ const ranked = await flashRankRerank(query, passages, config.rerankerModel);
16
+ if (ranked) {
17
+ const reordered = ranked.map((r) => ({
18
+ ...results[r.index],
19
+ relevance_score: r.score,
20
+ }));
21
+ return applyThreshold(reordered, config.relevanceThreshold);
22
+ }
23
+ log.debug('FlashRank returned null, using passthrough');
24
+ }
25
+ else {
26
+ log.warn('FlashRank configured but not installed. Run: wigolo warmup --reranker');
27
+ }
28
+ }
29
+ else if (config.reranker !== 'none') {
30
+ log.warn('Unknown reranker configured, passing through', { reranker: config.reranker });
31
+ }
32
+ log.debug('Rerank passthrough', { count: results.length });
33
+ return applyThreshold(results, config.relevanceThreshold);
34
+ }
35
+ function applyThreshold(results, threshold) {
36
+ if (!threshold || threshold <= 0)
37
+ return results;
38
+ return results.filter((r) => r.relevance_score >= threshold);
39
+ }
40
+ //# sourceMappingURL=rerank.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"rerank.js","sourceRoot":"","sources":["../../src/search/rerank.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,oBAAoB,EAAE,MAAM,gBAAgB,CAAC;AACvE,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;AAEnC,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,KAAa,EACb,OAA6B;IAE7B,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;IAE3B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,OAAO,CAAC;IAEzC,IAAI,MAAM,CAAC,QAAQ,KAAK,WAAW,EAAE,CAAC;QACpC,IAAI,MAAM,oBAAoB,EAAE,EAAE,CAAC;YACjC,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;gBACtC,IAAI,EAAE,GAAG,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,OAAO,EAAE;gBAChC,KAAK,EAAE,CAAC;aACT,CAAC,CAAC,CAAC;YAEJ,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,KAAK,EAAE,QAAQ,EAAE,MAAM,CAAC,aAAa,CAAC,CAAC;YAC5E,IAAI,MAAM,EAAE,CAAC;gBACX,MAAM,SAAS,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBACnC,GAAG,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC;oBACnB,eAAe,EAAE,CAAC,CAAC,KAAK;iBACzB,CAAC,CAAC,CAAC;gBAEJ,OAAO,cAAc,CAAC,SAAS,EAAE,MAAM,CAAC,kBAAkB,CAAC,CAAC;YAC9D,CAAC;YAED,GAAG,CAAC,KAAK,CAAC,4CAA4C,CAAC,CAAC;QAC1D,CAAC;aAAM,CAAC;YACN,GAAG,CAAC,IAAI,CAAC,uEAAuE,CAAC,CAAC;QACpF,CAAC;IACH,CAAC;SAAM,IAAI,MAAM,CAAC,QAAQ,KAAK,MAAM,EAAE,CAAC;QACtC,GAAG,CAAC,IAAI,CAAC,8CAA8C,EAAE,EAAE,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;IAC1F,CAAC;IAED,GAAG,CAAC,KAAK,CAAC,oBAAoB,EAAE,EAAE,KAAK,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IAC3D,OAAO,cAAc,CAAC,OAAO,EAAE,MAAM,CAAC,kBAAkB,CAAC,CAAC;AAC5D,CAAC;AAED,SAAS,cAAc,CACrB,OAA6B,EAC7B,SAAiB;IAEjB,IAAI,CAAC,SAAS,IAAI,SAAS,IAAI,CAAC;QAAE,OAAO,OAAO,CAAC;IACjD,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,eAAe,IAAI,SAAS,CAAC,CAAC;AAC/D,CAAC"}
@@ -0,0 +1,8 @@
1
+ import type { SearchEngine, SearchEngineOptions, RawSearchResult } from '../types.js';
2
+ export declare class SearxngClient implements SearchEngine {
3
+ private readonly baseUrl;
4
+ name: string;
5
+ constructor(baseUrl: string);
6
+ search(query: string, options?: SearchEngineOptions): Promise<RawSearchResult[]>;
7
+ }
8
+ //# sourceMappingURL=searxng.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"searxng.d.ts","sourceRoot":"","sources":["../../src/search/searxng.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,mBAAmB,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AA2CtF,qBAAa,aAAc,YAAW,YAAY;IAGpC,OAAO,CAAC,QAAQ,CAAC,OAAO;IAFpC,IAAI,SAAa;gBAEY,OAAO,EAAE,MAAM;IAEtC,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,mBAAwB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC;CAyD3F"}
@@ -0,0 +1,87 @@
1
+ import { getConfig } from '../config.js';
2
+ import { createLogger } from '../logger.js';
3
+ const log = createLogger('search');
4
+ const CATEGORY_MAP = {
5
+ general: 'general',
6
+ news: 'news',
7
+ code: 'it',
8
+ docs: 'it',
9
+ papers: 'science',
10
+ images: 'images',
11
+ };
12
+ function computeTimeRange(fromDate, toDate) {
13
+ if (!fromDate)
14
+ return null;
15
+ const from = new Date(fromDate);
16
+ if (isNaN(from.getTime()))
17
+ return null;
18
+ const now = toDate ? new Date(toDate) : new Date();
19
+ if (isNaN(now.getTime()))
20
+ return null;
21
+ const diffDays = Math.round((now.getTime() - from.getTime()) / (1000 * 60 * 60 * 24));
22
+ if (diffDays <= 1)
23
+ return 'day';
24
+ if (diffDays <= 7)
25
+ return 'week';
26
+ if (diffDays <= 30)
27
+ return 'month';
28
+ return 'year';
29
+ }
30
+ export class SearxngClient {
31
+ baseUrl;
32
+ name = 'searxng';
33
+ constructor(baseUrl) {
34
+ this.baseUrl = baseUrl;
35
+ }
36
+ async search(query, options = {}) {
37
+ const config = getConfig();
38
+ const timeoutMs = options.timeoutMs ?? config.searxngQueryTimeoutMs;
39
+ const maxResults = options.maxResults ?? 10;
40
+ // Build query with domain site: operators
41
+ let queryStr = query;
42
+ if (options.includeDomains?.length) {
43
+ // Best-effort: site: syntax works on Google/Bing but not all SearXNG engines.
44
+ // Post-filter in filters.ts handles the gap for engines that ignore it.
45
+ const siteFilter = options.includeDomains.map(d => `site:${d}`).join(' OR ');
46
+ queryStr = options.includeDomains.length === 1
47
+ ? `${query} site:${options.includeDomains[0]}`
48
+ : `${query} (${siteFilter})`;
49
+ }
50
+ // exclude_domains are NOT passed to SearXNG — handled by post-filter
51
+ const params = new URLSearchParams({
52
+ q: queryStr,
53
+ format: 'json',
54
+ pageno: '1',
55
+ });
56
+ if (options.timeRange)
57
+ params.set('time_range', options.timeRange);
58
+ if (options.language)
59
+ params.set('language', options.language);
60
+ // Category pass-through
61
+ if (options.category) {
62
+ params.set('categories', CATEGORY_MAP[options.category] ?? 'general');
63
+ }
64
+ // Date range -> time_range bucket (SearXNG doesn't support arbitrary dates)
65
+ if (!options.timeRange && (options.fromDate || options.toDate)) {
66
+ const range = computeTimeRange(options.fromDate, options.toDate);
67
+ if (range)
68
+ params.set('time_range', range);
69
+ }
70
+ const url = `${this.baseUrl}/search?${params}`;
71
+ log.debug('querying searxng', { query: queryStr, url });
72
+ const response = await fetch(url, { signal: AbortSignal.timeout(timeoutMs) });
73
+ if (!response.ok) {
74
+ throw new Error(`SearXNG returned ${response.status}`);
75
+ }
76
+ const data = (await response.json());
77
+ const total = data.results.length;
78
+ return data.results.slice(0, maxResults).map((r, i) => ({
79
+ title: r.title,
80
+ url: r.url,
81
+ snippet: r.content,
82
+ relevance_score: r.score != null ? Math.min(r.score, 1) : 1 - i / Math.max(total, 1),
83
+ engine: 'searxng',
84
+ }));
85
+ }
86
+ }
87
+ //# sourceMappingURL=searxng.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"searxng.js","sourceRoot":"","sources":["../../src/search/searxng.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;AAEnC,MAAM,YAAY,GAA2B;IAC3C,OAAO,EAAE,SAAS;IAClB,IAAI,EAAE,MAAM;IACZ,IAAI,EAAE,IAAI;IACV,IAAI,EAAE,IAAI;IACV,MAAM,EAAE,SAAS;IACjB,MAAM,EAAE,QAAQ;CACjB,CAAC;AAEF,SAAS,gBAAgB,CAAC,QAAiB,EAAE,MAAe;IAC1D,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAC3B,MAAM,IAAI,GAAG,IAAI,IAAI,CAAC,QAAQ,CAAC,CAAC;IAChC,IAAI,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;QAAE,OAAO,IAAI,CAAC;IACvC,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC;IACnD,IAAI,KAAK,CAAC,GAAG,CAAC,OAAO,EAAE,CAAC;QAAE,OAAO,IAAI,CAAC;IACtC,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC,GAAG,CAAC,IAAI,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC;IACtF,IAAI,QAAQ,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IAChC,IAAI,QAAQ,IAAI,CAAC;QAAE,OAAO,MAAM,CAAC;IACjC,IAAI,QAAQ,IAAI,EAAE;QAAE,OAAO,OAAO,CAAC;IACnC,OAAO,MAAM,CAAC;AAChB,CAAC;AAiBD,MAAM,OAAO,aAAa;IAGK;IAF7B,IAAI,GAAG,SAAS,CAAC;IAEjB,YAA6B,OAAe;QAAf,YAAO,GAAP,OAAO,CAAQ;IAAG,CAAC;IAEhD,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,UAA+B,EAAE;QAC3D,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;QAC3B,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,MAAM,CAAC,qBAAqB,CAAC;QACpE,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,EAAE,CAAC;QAE5C,0CAA0C;QAC1C,IAAI,QAAQ,GAAG,KAAK,CAAC;QACrB,IAAI,OAAO,CAAC,cAAc,EAAE,MAAM,EAAE,CAAC;YACnC,8EAA8E;YAC9E,wEAAwE;YACxE,MAAM,UAAU,GAAG,OAAO,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAC7E,QAAQ,GAAG,OAAO,CAAC,cAAc,CAAC,MAAM,KAAK,CAAC;gBAC5C,CAAC,CAAC,GAAG,KAAK,SAAS,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE;gBAC9C,CAAC,CAAC,GAAG,KAAK,KAAK,UAAU,GAAG,CAAC;QACjC,CAAC;QACD,qEAAqE;QAErE,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC;YACjC,CAAC,EAAE,QAAQ;YACX,MAAM,EAAE,MAAM;YACd,MAAM,EAAE,GAAG;SACZ,CAAC,CAAC;QAEH,IAAI,OAAO,CAAC,SAAS;YAAE,MAAM,CAAC,GAAG,CAAC,YAAY,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC;QACnE,IAAI,OAAO,CAAC,QAAQ;YAAE,MAAM,CAAC,GAAG,CAAC,UAAU,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;QAE/D,wBAAwB;QACxB,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;YACrB,MAAM,CAAC,GAAG,CAAC,YAAY,EAAE,YAAY,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,SAAS,CAAC,CAAC;QACxE,CAAC;QAED,4EAA4E;QAC5E,IAAI,CAAC,OAAO,CAAC,SAAS,IAAI,CAAC,OAAO,CAAC,QAAQ,IAAI,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;YAC/D,MAAM,KAAK,GAAG,gBAAgB,CAAC,OAAO,CAAC,QAAQ,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;YACjE,IAAI,KAAK;gBAAE,MAAM,CAAC,GAAG,CAAC,YAAY,EAAE,KAAK,CAAC,CAAC;QAC7C,CAAC;QAED,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,WAAW,MAAM,EAAE,CAAC;QAC/C,GAAG,CAAC,KAAK,CAAC,kBAAkB,EAAE,EAAE,KAAK,EAAE,QAAQ,EAAE,GAAG,EAAE,CAAC,CAAC;QAExD,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;QAE9E,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,oBAAoB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QACzD,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAuB,CAAC;QAC3D,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC;QAElC,OAAO,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;YACtD,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,GAAG,EAAE,CAAC,CAAC,GAAG;YACV,OAAO,EAAE,CAAC,CAAC,OAAO;YAClB,eAAe,EAAE,CAAC,CAAC,KAAK,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC;YACpF,MAAM,EAAE,SAAS;SAClB,CAAC,CAAC,CAAC;IACN,CAAC;CACF"}
@@ -0,0 +1,6 @@
1
+ export declare function validateLinks<T extends {
2
+ url: string;
3
+ }>(results: T[], options?: {
4
+ maxConcurrent?: number;
5
+ }): Promise<T[]>;
6
+ //# sourceMappingURL=validator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"validator.d.ts","sourceRoot":"","sources":["../../src/search/validator.ts"],"names":[],"mappings":"AAKA,wBAAsB,aAAa,CAAC,CAAC,SAAS;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,EAC3D,OAAO,EAAE,CAAC,EAAE,EACZ,OAAO,CAAC,EAAE;IAAE,aAAa,CAAC,EAAE,MAAM,CAAA;CAAE,GACnC,OAAO,CAAC,CAAC,EAAE,CAAC,CAgCd"}
@@ -0,0 +1,35 @@
1
+ import { getConfig } from '../config.js';
2
+ import { createLogger } from '../logger.js';
3
+ const log = createLogger('search');
4
+ export async function validateLinks(results, options) {
5
+ const config = getConfig();
6
+ if (!config.validateLinks)
7
+ return results;
8
+ const maxConcurrent = options?.maxConcurrent ?? 5;
9
+ const timeoutMs = config.validateTimeoutMs;
10
+ const valid = [];
11
+ for (let i = 0; i < results.length; i += maxConcurrent) {
12
+ const batch = results.slice(i, i + maxConcurrent);
13
+ const checks = batch.map(async (result) => {
14
+ try {
15
+ const response = await fetch(result.url, {
16
+ method: 'HEAD',
17
+ redirect: 'follow',
18
+ signal: AbortSignal.timeout(timeoutMs),
19
+ });
20
+ return { result, ok: response.status < 400 };
21
+ }
22
+ catch {
23
+ log.debug('link validation failed', { url: result.url });
24
+ return { result, ok: false };
25
+ }
26
+ });
27
+ const batchResults = await Promise.all(checks);
28
+ for (const { result, ok } of batchResults) {
29
+ if (ok)
30
+ valid.push(result);
31
+ }
32
+ }
33
+ return valid;
34
+ }
35
+ //# sourceMappingURL=validator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"validator.js","sourceRoot":"","sources":["../../src/search/validator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;AAEnC,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,OAAY,EACZ,OAAoC;IAEpC,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;IAE3B,IAAI,CAAC,MAAM,CAAC,aAAa;QAAE,OAAO,OAAO,CAAC;IAE1C,MAAM,aAAa,GAAG,OAAO,EAAE,aAAa,IAAI,CAAC,CAAC;IAClD,MAAM,SAAS,GAAG,MAAM,CAAC,iBAAiB,CAAC;IAC3C,MAAM,KAAK,GAAQ,EAAE,CAAC;IAEtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,IAAI,aAAa,EAAE,CAAC;QACvD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,aAAa,CAAC,CAAC;QAClD,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,MAAM,EAAuC,EAAE;YAC7E,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,MAAM,CAAC,GAAG,EAAE;oBACvC,MAAM,EAAE,MAAM;oBACd,QAAQ,EAAE,QAAQ;oBAClB,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,SAAS,CAAC;iBACvC,CAAC,CAAC;gBACH,OAAO,EAAE,MAAM,EAAE,EAAE,EAAE,QAAQ,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YAC/C,CAAC;YAAC,MAAM,CAAC;gBACP,GAAG,CAAC,KAAK,CAAC,wBAAwB,EAAE,EAAE,GAAG,EAAE,MAAM,CAAC,GAAG,EAAE,CAAC,CAAC;gBACzD,OAAO,EAAE,MAAM,EAAE,EAAE,EAAE,KAAK,EAAE,CAAC;YAC/B,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,MAAM,YAAY,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAC/C,KAAK,MAAM,EAAE,MAAM,EAAE,EAAE,EAAE,IAAI,YAAY,EAAE,CAAC;YAC1C,IAAI,EAAE;gBAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC7B,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC"}
@@ -0,0 +1,18 @@
1
+ export interface BootstrapState {
2
+ status: 'downloading' | 'ready' | 'failed' | 'no_runtime';
3
+ searxngPath?: string;
4
+ error?: string;
5
+ }
6
+ export interface BackendResolution {
7
+ type: 'external' | 'native' | 'docker' | 'scraping';
8
+ url?: string;
9
+ searxngPath?: string;
10
+ }
11
+ export declare function checkPythonAvailable(): boolean;
12
+ export declare function checkDockerAvailable(): boolean;
13
+ export declare function getBootstrapState(dataDir: string): BootstrapState | null;
14
+ export declare function setBootstrapState(dataDir: string, state: BootstrapState): void;
15
+ export declare function generateSettings(port: number): string;
16
+ export declare function resolveSearchBackend(): Promise<BackendResolution>;
17
+ export declare function bootstrapNativeSearxng(dataDir: string): Promise<void>;
18
+ //# sourceMappingURL=bootstrap.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bootstrap.d.ts","sourceRoot":"","sources":["../../src/searxng/bootstrap.ts"],"names":[],"mappings":"AAQA,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,aAAa,GAAG,OAAO,GAAG,QAAQ,GAAG,YAAY,CAAC;IAC1D,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,UAAU,GAAG,QAAQ,GAAG,QAAQ,GAAG,UAAU,CAAC;IACpD,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,wBAAgB,oBAAoB,IAAI,OAAO,CAO9C;AAED,wBAAgB,oBAAoB,IAAI,OAAO,CAO9C;AAED,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,MAAM,GAAG,cAAc,GAAG,IAAI,CAQxE;AAED,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,cAAc,GAAG,IAAI,CAG9E;AAED,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAoCrD;AAED,wBAAsB,oBAAoB,IAAI,OAAO,CAAC,iBAAiB,CAAC,CAyCvE;AAED,wBAAsB,sBAAsB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAyB3E"}