@staticn0va/wigolo 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. package/LICENSE +74 -0
  2. package/README.md +272 -0
  3. package/dist/cache/db.d.ts +5 -0
  4. package/dist/cache/db.d.ts.map +1 -0
  5. package/dist/cache/db.js +97 -0
  6. package/dist/cache/db.js.map +1 -0
  7. package/dist/cache/store.d.ts +26 -0
  8. package/dist/cache/store.d.ts.map +1 -0
  9. package/dist/cache/store.js +214 -0
  10. package/dist/cache/store.js.map +1 -0
  11. package/dist/cli/daemon.d.ts +2 -0
  12. package/dist/cli/daemon.d.ts.map +1 -0
  13. package/dist/cli/daemon.js +5 -0
  14. package/dist/cli/daemon.js.map +1 -0
  15. package/dist/cli/health.d.ts +2 -0
  16. package/dist/cli/health.d.ts.map +1 -0
  17. package/dist/cli/health.js +5 -0
  18. package/dist/cli/health.js.map +1 -0
  19. package/dist/cli/index.d.ts +7 -0
  20. package/dist/cli/index.d.ts.map +1 -0
  21. package/dist/cli/index.js +9 -0
  22. package/dist/cli/index.js.map +1 -0
  23. package/dist/cli/warmup.d.ts +11 -0
  24. package/dist/cli/warmup.d.ts.map +1 -0
  25. package/dist/cli/warmup.js +107 -0
  26. package/dist/cli/warmup.js.map +1 -0
  27. package/dist/config.d.ts +41 -0
  28. package/dist/config.d.ts.map +1 -0
  29. package/dist/config.js +66 -0
  30. package/dist/config.js.map +1 -0
  31. package/dist/crawl/crawler.d.ts +18 -0
  32. package/dist/crawl/crawler.d.ts.map +1 -0
  33. package/dist/crawl/crawler.js +228 -0
  34. package/dist/crawl/crawler.js.map +1 -0
  35. package/dist/crawl/dedup.d.ts +15 -0
  36. package/dist/crawl/dedup.d.ts.map +1 -0
  37. package/dist/crawl/dedup.js +93 -0
  38. package/dist/crawl/dedup.js.map +1 -0
  39. package/dist/crawl/mapper.d.ts +17 -0
  40. package/dist/crawl/mapper.d.ts.map +1 -0
  41. package/dist/crawl/mapper.js +178 -0
  42. package/dist/crawl/mapper.js.map +1 -0
  43. package/dist/crawl/rate-limiter.d.ts +10 -0
  44. package/dist/crawl/rate-limiter.d.ts.map +1 -0
  45. package/dist/crawl/rate-limiter.js +72 -0
  46. package/dist/crawl/rate-limiter.js.map +1 -0
  47. package/dist/crawl/robots.d.ts +9 -0
  48. package/dist/crawl/robots.d.ts.map +1 -0
  49. package/dist/crawl/robots.js +63 -0
  50. package/dist/crawl/robots.js.map +1 -0
  51. package/dist/crawl/sitemap.d.ts +4 -0
  52. package/dist/crawl/sitemap.d.ts.map +1 -0
  53. package/dist/crawl/sitemap.js +38 -0
  54. package/dist/crawl/sitemap.js.map +1 -0
  55. package/dist/crawl/url-utils.d.ts +3 -0
  56. package/dist/crawl/url-utils.d.ts.map +1 -0
  57. package/dist/crawl/url-utils.js +41 -0
  58. package/dist/crawl/url-utils.js.map +1 -0
  59. package/dist/extraction/defuddle.d.ts +3 -0
  60. package/dist/extraction/defuddle.d.ts.map +1 -0
  61. package/dist/extraction/defuddle.js +26 -0
  62. package/dist/extraction/defuddle.js.map +1 -0
  63. package/dist/extraction/extract.d.ts +5 -0
  64. package/dist/extraction/extract.d.ts.map +1 -0
  65. package/dist/extraction/extract.js +83 -0
  66. package/dist/extraction/extract.js.map +1 -0
  67. package/dist/extraction/jsonld.d.ts +4 -0
  68. package/dist/extraction/jsonld.d.ts.map +1 -0
  69. package/dist/extraction/jsonld.js +64 -0
  70. package/dist/extraction/jsonld.js.map +1 -0
  71. package/dist/extraction/markdown.d.ts +10 -0
  72. package/dist/extraction/markdown.d.ts.map +1 -0
  73. package/dist/extraction/markdown.js +107 -0
  74. package/dist/extraction/markdown.js.map +1 -0
  75. package/dist/extraction/pipeline.d.ts +11 -0
  76. package/dist/extraction/pipeline.d.ts.map +1 -0
  77. package/dist/extraction/pipeline.js +95 -0
  78. package/dist/extraction/pipeline.js.map +1 -0
  79. package/dist/extraction/readability.d.ts +3 -0
  80. package/dist/extraction/readability.d.ts.map +1 -0
  81. package/dist/extraction/readability.js +32 -0
  82. package/dist/extraction/readability.js.map +1 -0
  83. package/dist/extraction/schema.d.ts +7 -0
  84. package/dist/extraction/schema.d.ts.map +1 -0
  85. package/dist/extraction/schema.js +86 -0
  86. package/dist/extraction/schema.js.map +1 -0
  87. package/dist/extraction/site-extractors/docs-generic.d.ts +3 -0
  88. package/dist/extraction/site-extractors/docs-generic.d.ts.map +1 -0
  89. package/dist/extraction/site-extractors/docs-generic.js +104 -0
  90. package/dist/extraction/site-extractors/docs-generic.js.map +1 -0
  91. package/dist/extraction/site-extractors/github.d.ts +3 -0
  92. package/dist/extraction/site-extractors/github.d.ts.map +1 -0
  93. package/dist/extraction/site-extractors/github.js +107 -0
  94. package/dist/extraction/site-extractors/github.js.map +1 -0
  95. package/dist/extraction/site-extractors/mdn.d.ts +3 -0
  96. package/dist/extraction/site-extractors/mdn.d.ts.map +1 -0
  97. package/dist/extraction/site-extractors/mdn.js +58 -0
  98. package/dist/extraction/site-extractors/mdn.js.map +1 -0
  99. package/dist/extraction/site-extractors/stackoverflow.d.ts +3 -0
  100. package/dist/extraction/site-extractors/stackoverflow.d.ts.map +1 -0
  101. package/dist/extraction/site-extractors/stackoverflow.js +88 -0
  102. package/dist/extraction/site-extractors/stackoverflow.js.map +1 -0
  103. package/dist/extraction/trafilatura.d.ts +6 -0
  104. package/dist/extraction/trafilatura.d.ts.map +1 -0
  105. package/dist/extraction/trafilatura.js +105 -0
  106. package/dist/extraction/trafilatura.js.map +1 -0
  107. package/dist/fetch/auth.d.ts +8 -0
  108. package/dist/fetch/auth.d.ts.map +1 -0
  109. package/dist/fetch/auth.js +32 -0
  110. package/dist/fetch/auth.js.map +1 -0
  111. package/dist/fetch/browser-pool.d.ts +28 -0
  112. package/dist/fetch/browser-pool.d.ts.map +1 -0
  113. package/dist/fetch/browser-pool.js +138 -0
  114. package/dist/fetch/browser-pool.js.map +1 -0
  115. package/dist/fetch/content-check.d.ts +2 -0
  116. package/dist/fetch/content-check.d.ts.map +1 -0
  117. package/dist/fetch/content-check.js +62 -0
  118. package/dist/fetch/content-check.js.map +1 -0
  119. package/dist/fetch/http-client.d.ts +15 -0
  120. package/dist/fetch/http-client.d.ts.map +1 -0
  121. package/dist/fetch/http-client.js +146 -0
  122. package/dist/fetch/http-client.js.map +1 -0
  123. package/dist/fetch/router.d.ts +45 -0
  124. package/dist/fetch/router.d.ts.map +1 -0
  125. package/dist/fetch/router.js +89 -0
  126. package/dist/fetch/router.js.map +1 -0
  127. package/dist/index.d.ts +3 -0
  128. package/dist/index.d.ts.map +1 -0
  129. package/dist/index.js +22 -0
  130. package/dist/index.js.map +1 -0
  131. package/dist/logger.d.ts +10 -0
  132. package/dist/logger.d.ts.map +1 -0
  133. package/dist/logger.js +39 -0
  134. package/dist/logger.js.map +1 -0
  135. package/dist/search/dedup.d.ts +10 -0
  136. package/dist/search/dedup.d.ts.map +1 -0
  137. package/dist/search/dedup.js +35 -0
  138. package/dist/search/dedup.js.map +1 -0
  139. package/dist/search/engines/bing.d.ts +7 -0
  140. package/dist/search/engines/bing.d.ts.map +1 -0
  141. package/dist/search/engines/bing.js +48 -0
  142. package/dist/search/engines/bing.js.map +1 -0
  143. package/dist/search/engines/duckduckgo.d.ts +7 -0
  144. package/dist/search/engines/duckduckgo.d.ts.map +1 -0
  145. package/dist/search/engines/duckduckgo.js +50 -0
  146. package/dist/search/engines/duckduckgo.js.map +1 -0
  147. package/dist/search/engines/startpage.d.ts +7 -0
  148. package/dist/search/engines/startpage.d.ts.map +1 -0
  149. package/dist/search/engines/startpage.js +50 -0
  150. package/dist/search/engines/startpage.js.map +1 -0
  151. package/dist/search/filters.d.ts +16 -0
  152. package/dist/search/filters.d.ts.map +1 -0
  153. package/dist/search/filters.js +63 -0
  154. package/dist/search/filters.js.map +1 -0
  155. package/dist/search/flashrank.d.ts +12 -0
  156. package/dist/search/flashrank.d.ts.map +1 -0
  157. package/dist/search/flashrank.js +63 -0
  158. package/dist/search/flashrank.js.map +1 -0
  159. package/dist/search/query.d.ts +2 -0
  160. package/dist/search/query.d.ts.map +1 -0
  161. package/dist/search/query.js +41 -0
  162. package/dist/search/query.js.map +1 -0
  163. package/dist/search/rerank.d.ts +3 -0
  164. package/dist/search/rerank.d.ts.map +1 -0
  165. package/dist/search/rerank.js +40 -0
  166. package/dist/search/rerank.js.map +1 -0
  167. package/dist/search/searxng.d.ts +8 -0
  168. package/dist/search/searxng.d.ts.map +1 -0
  169. package/dist/search/searxng.js +87 -0
  170. package/dist/search/searxng.js.map +1 -0
  171. package/dist/search/validator.d.ts +6 -0
  172. package/dist/search/validator.d.ts.map +1 -0
  173. package/dist/search/validator.js +35 -0
  174. package/dist/search/validator.js.map +1 -0
  175. package/dist/searxng/bootstrap.d.ts +18 -0
  176. package/dist/searxng/bootstrap.d.ts.map +1 -0
  177. package/dist/searxng/bootstrap.js +136 -0
  178. package/dist/searxng/bootstrap.js.map +1 -0
  179. package/dist/searxng/docker.d.ts +9 -0
  180. package/dist/searxng/docker.d.ts.map +1 -0
  181. package/dist/searxng/docker.js +67 -0
  182. package/dist/searxng/docker.js.map +1 -0
  183. package/dist/searxng/process.d.ts +23 -0
  184. package/dist/searxng/process.d.ts.map +1 -0
  185. package/dist/searxng/process.js +188 -0
  186. package/dist/searxng/process.js.map +1 -0
  187. package/dist/server.d.ts +2 -0
  188. package/dist/server.d.ts.map +1 -0
  189. package/dist/server.js +311 -0
  190. package/dist/server.js.map +1 -0
  191. package/dist/tools/cache.d.ts +3 -0
  192. package/dist/tools/cache.d.ts.map +1 -0
  193. package/dist/tools/cache.js +50 -0
  194. package/dist/tools/cache.js.map +1 -0
  195. package/dist/tools/crawl.d.ts +6 -0
  196. package/dist/tools/crawl.d.ts.map +1 -0
  197. package/dist/tools/crawl.js +97 -0
  198. package/dist/tools/crawl.js.map +1 -0
  199. package/dist/tools/extract.d.ts +4 -0
  200. package/dist/tools/extract.d.ts.map +1 -0
  201. package/dist/tools/extract.js +69 -0
  202. package/dist/tools/extract.js.map +1 -0
  203. package/dist/tools/fetch.d.ts +4 -0
  204. package/dist/tools/fetch.d.ts.map +1 -0
  205. package/dist/tools/fetch.js +76 -0
  206. package/dist/tools/fetch.js.map +1 -0
  207. package/dist/tools/search.d.ts +4 -0
  208. package/dist/tools/search.d.ts.map +1 -0
  209. package/dist/tools/search.js +160 -0
  210. package/dist/tools/search.js.map +1 -0
  211. package/dist/types.d.ts +222 -0
  212. package/dist/types.d.ts.map +1 -0
  213. package/dist/types.js +2 -0
  214. package/dist/types.js.map +1 -0
  215. package/package.json +61 -0
@@ -0,0 +1,76 @@
1
+ import { extractContent } from '../extraction/pipeline.js';
2
+ import { getCachedContent, cacheContent, isExpired } from '../cache/store.js';
3
+ import { extractSection } from '../extraction/markdown.js';
4
+ import { createLogger } from '../logger.js';
5
+ const log = createLogger('fetch');
6
+ function formatCachedResponse(cached, input) {
7
+ let markdown = cached.markdown;
8
+ let sectionMatched;
9
+ if (input.section) {
10
+ const result = extractSection(markdown, input.section, input.section_index);
11
+ markdown = result.content;
12
+ sectionMatched = result.matched;
13
+ }
14
+ if (input.max_chars && markdown.length > input.max_chars) {
15
+ markdown = markdown.slice(0, input.max_chars);
16
+ }
17
+ return {
18
+ url: cached.url,
19
+ title: cached.title,
20
+ markdown,
21
+ metadata: {
22
+ ...JSON.parse(cached.metadata || '{}'),
23
+ ...(sectionMatched !== undefined ? { section_matched: sectionMatched } : {}),
24
+ },
25
+ links: JSON.parse(cached.links || '[]'),
26
+ images: JSON.parse(cached.images || '[]'),
27
+ cached: true,
28
+ };
29
+ }
30
+ export async function handleFetch(input, router) {
31
+ try {
32
+ const cached = getCachedContent(input.url);
33
+ if (cached && !isExpired(cached)) {
34
+ log.info('Serving from cache', { url: input.url });
35
+ return formatCachedResponse(cached, input);
36
+ }
37
+ const raw = await router.fetch(input.url, {
38
+ renderJs: input.render_js ?? 'auto',
39
+ useAuth: input.use_auth ?? false,
40
+ headers: input.headers,
41
+ screenshot: input.screenshot,
42
+ });
43
+ const extraction = await extractContent(raw.html, raw.finalUrl, {
44
+ maxChars: input.max_chars,
45
+ section: input.section,
46
+ sectionIndex: input.section_index,
47
+ contentType: raw.contentType,
48
+ pdfBuffer: raw.rawBuffer,
49
+ });
50
+ cacheContent(raw, extraction);
51
+ return {
52
+ url: raw.finalUrl,
53
+ title: extraction.title,
54
+ markdown: extraction.markdown,
55
+ metadata: extraction.metadata,
56
+ links: extraction.links,
57
+ images: extraction.images,
58
+ screenshot: raw.screenshot,
59
+ cached: false,
60
+ };
61
+ }
62
+ catch (err) {
63
+ log.error('Fetch failed', { url: input.url, error: String(err) });
64
+ return {
65
+ url: input.url,
66
+ title: '',
67
+ markdown: '',
68
+ metadata: {},
69
+ links: [],
70
+ images: [],
71
+ cached: false,
72
+ error: err instanceof Error ? err.message : String(err),
73
+ };
74
+ }
75
+ }
76
+ //# sourceMappingURL=fetch.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch.js","sourceRoot":"","sources":["../../src/tools/fetch.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EAAE,gBAAgB,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAC9E,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,MAAM,GAAG,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;AAElC,SAAS,oBAAoB,CAAC,MAAqB,EAAE,KAAiB;IACpE,IAAI,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;IAC/B,IAAI,cAAmC,CAAC;IAExC,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;QAClB,MAAM,MAAM,GAAG,cAAc,CAAC,QAAQ,EAAE,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,aAAa,CAAC,CAAC;QAC5E,QAAQ,GAAG,MAAM,CAAC,OAAO,CAAC;QAC1B,cAAc,GAAG,MAAM,CAAC,OAAO,CAAC;IAClC,CAAC;IAED,IAAI,KAAK,CAAC,SAAS,IAAI,QAAQ,CAAC,MAAM,GAAG,KAAK,CAAC,SAAS,EAAE,CAAC;QACzD,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAChD,CAAC;IAED,OAAO;QACL,GAAG,EAAE,MAAM,CAAC,GAAG;QACf,KAAK,EAAE,MAAM,CAAC,KAAK;QACnB,QAAQ;QACR,QAAQ,EAAE;YACR,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,IAAI,IAAI,CAAC;YACtC,GAAG,CAAC,cAAc,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,eAAe,EAAE,cAAc,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SAC7E;QACD,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,IAAI,IAAI,CAAC;QACvC,MAAM,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,IAAI,IAAI,CAAC;QACzC,MAAM,EAAE,IAAI;KACb,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,KAAiB,EACjB,MAAmB;IAEnB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,gBAAgB,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC3C,IAAI,MAAM,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC;YACjC,GAAG,CAAC,IAAI,CAAC,oBAAoB,EAAE,EAAE,GAAG,EAAE,KAAK,CAAC,GAAG,EAAE,CAAC,CAAC;YACnD,OAAO,oBAAoB,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;QAC7C,CAAC;QAED,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,EAAE;YACxC,QAAQ,EAAE,KAAK,CAAC,SAAS,IAAI,MAAM;YACnC,OAAO,EAAE,KAAK,CAAC,QAAQ,IAAI,KAAK;YAChC,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,UAAU,EAAE,KAAK,CAAC,UAAU;SAC7B,CAAC,CAAC;QAEH,MAAM,UAAU,GAAG,MAAM,cAAc,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,QAAQ,EAAE;YAC9D,QAAQ,EAAE,KAAK,CAAC,SAAS;YACzB,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,YAAY,EAAE,KAAK,CAAC,aAAa;YACjC,WAAW,EAAE,GAAG,CAAC,WAAW;YAC5B,SAAS,EAAE,GAAG,CAAC,SAAS;SACzB,CAAC,CAAC;QAEH,YAAY,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;QAE9B,OAAO;YACL,GAAG,EAAE,GAAG,CAAC,QAAQ;YACjB,KAAK,EAAE,UAAU,CAAC,KAAK;YACvB,QAAQ,EAAE,UAAU,CAAC,QAAQ;YAC7B,QAAQ,EAAE,UAAU,CAAC,QAAQ;YAC7B,KAAK,EAAE,UAAU,CAAC,KAAK;YACvB,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,UAAU,EAAE,GAAG,CAAC,UAAU;YAC1B,MAAM,EAAE,KAAK;SACd,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,KAAK,CAAC,cAAc,EAAE,EAAE,GAAG,EAAE,KAAK,CAAC,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAClE,OAAO;YACL,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,KAAK,EAAE,EAAE;YACT,QAAQ,EAAE,EAAE;YACZ,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,MAAM,EAAE,KAAK;YACb,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;SACxD,CAAC;IACJ,CAAC;AACH,CAAC"}
@@ -0,0 +1,4 @@
1
+ import type { SearchInput, SearchOutput, SearchEngine } from '../types.js';
2
+ import type { SmartRouter } from '../fetch/router.js';
3
+ export declare function handleSearch(input: SearchInput, engines: SearchEngine[], router: SmartRouter): Promise<SearchOutput>;
4
+ //# sourceMappingURL=search.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"search.d.ts","sourceRoot":"","sources":["../../src/tools/search.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,YAAY,EAAoB,YAAY,EAAmB,MAAM,aAAa,CAAC;AAC9G,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAkBtD,wBAAsB,YAAY,CAChC,KAAK,EAAE,WAAW,EAClB,OAAO,EAAE,YAAY,EAAE,EACvB,MAAM,EAAE,WAAW,GAClB,OAAO,CAAC,YAAY,CAAC,CA4HvB"}
@@ -0,0 +1,160 @@
1
+ import { deduplicateResults } from '../search/dedup.js';
2
+ import { decomposeQuery } from '../search/query.js';
3
+ import { validateLinks } from '../search/validator.js';
4
+ import { rerankResults } from '../search/rerank.js';
5
+ import { applyAllFilters } from '../search/filters.js';
6
+ import { extractContent } from '../extraction/pipeline.js';
7
+ import { cacheSearchResults, getCachedSearchResults } from '../cache/store.js';
8
+ import { getConfig } from '../config.js';
9
+ import { createLogger } from '../logger.js';
10
+ const log = createLogger('search');
11
+ const DEFAULT_MAX_RESULTS = 5;
12
+ const MAX_RESULTS_CAP = 20;
13
+ const DEFAULT_CONTENT_MAX_CHARS = 30000;
14
+ const DEFAULT_MAX_TOTAL_CHARS = 50000;
15
+ export async function handleSearch(input, engines, router) {
16
+ const start = Date.now();
17
+ const config = getConfig();
18
+ const maxResults = Math.min(input.max_results ?? DEFAULT_MAX_RESULTS, MAX_RESULTS_CAP);
19
+ const includeContent = input.include_content ?? true;
20
+ const contentMaxChars = input.content_max_chars ?? DEFAULT_CONTENT_MAX_CHARS;
21
+ const maxTotalChars = input.max_total_chars ?? DEFAULT_MAX_TOTAL_CHARS;
22
+ const totalTimeoutMs = config.searchTotalTimeoutMs;
23
+ const fetchTimeoutMs = config.searchFetchTimeoutMs;
24
+ const cached = getCachedSearchResults(input.query);
25
+ if (cached && !includeContent) {
26
+ log.info('serving search results from cache', { query: input.query });
27
+ return {
28
+ results: cached.results.slice(0, maxResults),
29
+ query: input.query,
30
+ engines_used: cached.engines_used,
31
+ total_time_ms: Date.now() - start,
32
+ };
33
+ }
34
+ let activeEngines = engines;
35
+ if (input.search_engines && input.search_engines.length > 0) {
36
+ activeEngines = engines.filter(e => input.search_engines.includes(e.name));
37
+ if (activeEngines.length === 0) {
38
+ log.warn('no engines matched search_engines filter, using all', { requested: input.search_engines });
39
+ activeEngines = engines;
40
+ }
41
+ }
42
+ const subQueries = decomposeQuery(input.query);
43
+ log.debug('query decomposition', { original: input.query, parts: subQueries.length });
44
+ const allRaw = [];
45
+ const enginesUsed = new Set();
46
+ const errors = [];
47
+ // Increase overfetch when domain filters are active (more results will be filtered out)
48
+ const hasFilterAttrition = !!(input.include_domains?.length || input.exclude_domains?.length);
49
+ const overfetchFactor = hasFilterAttrition ? 3 : 2;
50
+ const searchPromises = activeEngines.flatMap(engine => subQueries.map(async (query) => {
51
+ try {
52
+ const results = await engine.search(query, {
53
+ maxResults: maxResults * overfetchFactor,
54
+ timeRange: input.time_range,
55
+ language: input.language,
56
+ includeDomains: input.include_domains,
57
+ excludeDomains: input.exclude_domains,
58
+ fromDate: input.from_date,
59
+ toDate: input.to_date,
60
+ category: input.category,
61
+ });
62
+ for (const r of results) {
63
+ allRaw.push(r);
64
+ enginesUsed.add(engine.name);
65
+ }
66
+ }
67
+ catch (err) {
68
+ const msg = err instanceof Error ? err.message : String(err);
69
+ log.warn('engine search failed', { engine: engine.name, query, error: msg });
70
+ errors.push(`${engine.name}: ${msg}`);
71
+ }
72
+ }));
73
+ await Promise.allSettled(searchPromises);
74
+ if (allRaw.length === 0) {
75
+ return {
76
+ results: [],
77
+ query: input.query,
78
+ engines_used: [...enginesUsed],
79
+ total_time_ms: Date.now() - start,
80
+ error: errors.length > 0 ? errors.join('; ') : 'No results found',
81
+ };
82
+ }
83
+ let merged = deduplicateResults(allRaw);
84
+ // Post-filter: domain + date + category (Slice 7) — runs before rerank so
85
+ // the reranker only scores results that pass filters (Slice 9 interaction).
86
+ merged = applyAllFilters(merged, {
87
+ includeDomains: input.include_domains,
88
+ excludeDomains: input.exclude_domains,
89
+ fromDate: input.from_date,
90
+ toDate: input.to_date,
91
+ category: input.category,
92
+ });
93
+ merged = await rerankResults(input.query, merged);
94
+ merged = await validateLinks(merged);
95
+ merged = merged.slice(0, maxResults);
96
+ const results = merged.map(m => ({
97
+ title: m.title,
98
+ url: m.url,
99
+ snippet: m.snippet,
100
+ relevance_score: m.relevance_score,
101
+ }));
102
+ if (includeContent && results.length > 0) {
103
+ await fetchContentForResults(results, router, {
104
+ contentMaxChars,
105
+ maxTotalChars,
106
+ fetchTimeoutMs,
107
+ totalDeadline: start + totalTimeoutMs,
108
+ });
109
+ }
110
+ try {
111
+ cacheSearchResults(input.query, results, [...enginesUsed]);
112
+ }
113
+ catch (err) {
114
+ log.warn('failed to cache search results', { error: String(err) });
115
+ }
116
+ return {
117
+ results,
118
+ query: input.query,
119
+ engines_used: [...enginesUsed],
120
+ total_time_ms: Date.now() - start,
121
+ };
122
+ }
123
+ // v1: sequential fetch for correct budget enforcement. v2: parallel fetch then apply budget in relevance order.
124
+ async function fetchContentForResults(results, router, ctx) {
125
+ let totalCharsUsed = 0;
126
+ for (const result of results) {
127
+ if (Date.now() >= ctx.totalDeadline) {
128
+ result.fetch_failed = 'total_timeout';
129
+ continue;
130
+ }
131
+ if (totalCharsUsed >= ctx.maxTotalChars) {
132
+ result.content_truncated = true;
133
+ continue;
134
+ }
135
+ try {
136
+ const raw = await Promise.race([
137
+ router.fetch(result.url, { renderJs: 'auto' }),
138
+ new Promise((_, reject) => setTimeout(() => reject(new Error('timeout')), ctx.fetchTimeoutMs)),
139
+ ]);
140
+ const extraction = await extractContent(raw.html, raw.finalUrl, {
141
+ maxChars: ctx.contentMaxChars,
142
+ contentType: raw.contentType,
143
+ });
144
+ let content = extraction.markdown;
145
+ const remaining = ctx.maxTotalChars - totalCharsUsed;
146
+ if (content.length > remaining) {
147
+ content = content.slice(0, remaining);
148
+ result.content_truncated = true;
149
+ }
150
+ totalCharsUsed += content.length;
151
+ result.markdown_content = content;
152
+ }
153
+ catch (err) {
154
+ const msg = err instanceof Error ? err.message : String(err);
155
+ log.debug('content fetch failed', { url: result.url, error: msg });
156
+ result.fetch_failed = msg;
157
+ }
158
+ }
159
+ }
160
+ //# sourceMappingURL=search.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"search.js","sourceRoot":"","sources":["../../src/tools/search.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,kBAAkB,EAA2B,MAAM,oBAAoB,CAAC;AACjF,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpD,OAAO,EAAE,aAAa,EAAE,MAAM,wBAAwB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AACvD,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EAAE,kBAAkB,EAAE,sBAAsB,EAAE,MAAM,mBAAmB,CAAC;AAC/E,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;AAEnC,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAC9B,MAAM,eAAe,GAAG,EAAE,CAAC;AAC3B,MAAM,yBAAyB,GAAG,KAAK,CAAC;AACxC,MAAM,uBAAuB,GAAG,KAAK,CAAC;AAEtC,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,KAAkB,EAClB,OAAuB,EACvB,MAAmB;IAEnB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;IAE3B,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,IAAI,mBAAmB,EAAE,eAAe,CAAC,CAAC;IACvF,MAAM,cAAc,GAAG,KAAK,CAAC,eAAe,IAAI,IAAI,CAAC;IACrD,MAAM,eAAe,GAAG,KAAK,CAAC,iBAAiB,IAAI,yBAAyB,CAAC;IAC7E,MAAM,aAAa,GAAG,KAAK,CAAC,eAAe,IAAI,uBAAuB,CAAC;IACvE,MAAM,cAAc,GAAG,MAAM,CAAC,oBAAoB,CAAC;IACnD,MAAM,cAAc,GAAG,MAAM,CAAC,oBAAoB,CAAC;IAEnD,MAAM,MAAM,GAAG,sBAAsB,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACnD,IAAI,MAAM,IAAI,CAAC,cAAc,EAAE,CAAC;QAC9B,GAAG,CAAC,IAAI,CAAC,mCAAmC,EAAE,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC;QACtE,OAAO;YACL,OAAO,EAAE,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC;YAC5C,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,YAAY,EAAE,MAAM,CAAC,YAAY;YACjC,aAAa,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAClC,CAAC;IACJ,CAAC;IAED,IAAI,aAAa,GAAG,OAAO,CAAC;IAC5B,IAAI,KAAK,CAAC,cAAc,IAAI,KAAK,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5D,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,cAAe,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QAC5E,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC/B,GAAG,CAAC,IAAI,CAAC,qDAAqD,EAAE,EAAE,SAAS,EAAE,KAAK,CAAC,cAAc,EAAE,CAAC,CAAC;YACrG,aAAa,GAAG,OAAO,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,MAAM,UAAU,GAAG,cAAc,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAC/C,GAAG,CAAC,KAAK,CAAC,qBAAqB,EAAE,EAAE,QAAQ,EAAE,KAAK,CAAC,KAAK,EAAE,KAAK,EAAE,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;IAEtF,MAAM,MAAM,GAAsB,EAAE,CAAC;IACrC,MAAM,WAAW,GAAG,IAAI,GAAG,EAAU,CAAC;IACtC,MAAM,MAAM,GAAa,EAAE,CAAC;IAE5B,wFAAwF;IACxF,MAAM,kBAAkB,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,eAAe,EAAE,MAAM,IAAI,KAAK,CAAC,eAAe,EAAE,MAAM,CAAC,CAAC;IAC9F,MAAM,eAAe,GAAG,kBAAkB,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAEnD,MAAM,cAAc,GAAG,aAAa,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CACpD,UAAU,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;QAC7B,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,KAAK,EAAE;gBACzC,UAAU,EAAE,UAAU,GAAG,eAAe;gBACxC,SAAS,EAAE,KAAK,CAAC,UAAU;gBAC3B,QAAQ,EAAE,KAAK,CAAC,QAAQ;gBACxB,cAAc,EAAE,KAAK,CAAC,eAAe;gBACrC,cAAc,EAAE,KAAK,CAAC,eAAe;gBACrC,QAAQ,EAAE,KAAK,CAAC,SAAS;gBACzB,MAAM,EAAE,KAAK,CAAC,OAAO;gBACrB,QAAQ,EAAE,KAAK,CAAC,QAAQ;aACzB,CAAC,CAAC;YACH,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;gBACxB,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBACf,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YAC/B,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC7D,GAAG,CAAC,IAAI,CAAC,sBAAsB,EAAE,EAAE,MAAM,EAAE,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;YAC7E,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,IAAI,KAAK,GAAG,EAAE,CAAC,CAAC;QACxC,CAAC;IACH,CAAC,CAAC,CACH,CAAC;IAEF,MAAM,OAAO,CAAC,UAAU,CAAC,cAAc,CAAC,CAAC;IAEzC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO;YACL,OAAO,EAAE,EAAE;YACX,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,YAAY,EAAE,CAAC,GAAG,WAAW,CAAC;YAC9B,aAAa,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;YACjC,KAAK,EAAE,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,kBAAkB;SAClE,CAAC;IACJ,CAAC;IAED,IAAI,MAAM,GAAG,kBAAkB,CAAC,MAAM,CAAC,CAAC;IAExC,0EAA0E;IAC1E,4EAA4E;IAC5E,MAAM,GAAG,eAAe,CAAC,MAAM,EAAE;QAC/B,cAAc,EAAE,KAAK,CAAC,eAAe;QACrC,cAAc,EAAE,KAAK,CAAC,eAAe;QACrC,QAAQ,EAAE,KAAK,CAAC,SAAS;QACzB,MAAM,EAAE,KAAK,CAAC,OAAO;QACrB,QAAQ,EAAE,KAAK,CAAC,QAAQ;KACzB,CAAC,CAAC;IAEH,MAAM,GAAG,MAAM,aAAa,CAAC,KAAK,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;IAClD,MAAM,GAAG,MAAM,aAAa,CAAC,MAAM,CAAC,CAAC;IAErC,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;IAErC,MAAM,OAAO,GAAuB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACnD,KAAK,EAAE,CAAC,CAAC,KAAK;QACd,GAAG,EAAE,CAAC,CAAC,GAAG;QACV,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,eAAe,EAAE,CAAC,CAAC,eAAe;KACnC,CAAC,CAAC,CAAC;IAEJ,IAAI,cAAc,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACzC,MAAM,sBAAsB,CAAC,OAAO,EAAE,MAAM,EAAE;YAC5C,eAAe;YACf,aAAa;YACb,cAAc;YACd,aAAa,EAAE,KAAK,GAAG,cAAc;SACtC,CAAC,CAAC;IACL,CAAC;IAED,IAAI,CAAC;QACH,kBAAkB,CAAC,KAAK,CAAC,KAAK,EAAE,OAAO,EAAE,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC;IAC7D,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,IAAI,CAAC,gCAAgC,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACrE,CAAC;IAED,OAAO;QACL,OAAO;QACP,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,YAAY,EAAE,CAAC,GAAG,WAAW,CAAC;QAC9B,aAAa,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;KAClC,CAAC;AACJ,CAAC;AASD,gHAAgH;AAChH,KAAK,UAAU,sBAAsB,CACnC,OAA2B,EAC3B,MAAmB,EACnB,GAAiB;IAEjB,IAAI,cAAc,GAAG,CAAC,CAAC;IAEvB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,IAAI,CAAC,GAAG,EAAE,IAAI,GAAG,CAAC,aAAa,EAAE,CAAC;YACpC,MAAM,CAAC,YAAY,GAAG,eAAe,CAAC;YACtC,SAAS;QACX,CAAC;QAED,IAAI,cAAc,IAAI,GAAG,CAAC,aAAa,EAAE,CAAC;YACxC,MAAM,CAAC,iBAAiB,GAAG,IAAI,CAAC;YAChC,SAAS;QACX,CAAC;QAED,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC;gBAC7B,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC;gBAC9C,IAAI,OAAO,CAAQ,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAC/B,UAAU,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,cAAc,CAAC,CACnE;aACF,CAAC,CAAC;YAEH,MAAM,UAAU,GAAG,MAAM,cAAc,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,QAAQ,EAAE;gBAC9D,QAAQ,EAAE,GAAG,CAAC,eAAe;gBAC7B,WAAW,EAAE,GAAG,CAAC,WAAW;aAC7B,CAAC,CAAC;YAEH,IAAI,OAAO,GAAG,UAAU,CAAC,QAAQ,CAAC;YAElC,MAAM,SAAS,GAAG,GAAG,CAAC,aAAa,GAAG,cAAc,CAAC;YACrD,IAAI,OAAO,CAAC,MAAM,GAAG,SAAS,EAAE,CAAC;gBAC/B,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;gBACtC,MAAM,CAAC,iBAAiB,GAAG,IAAI,CAAC;YAClC,CAAC;YAED,cAAc,IAAI,OAAO,CAAC,MAAM,CAAC;YACjC,MAAM,CAAC,gBAAgB,GAAG,OAAO,CAAC;QACpC,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC7D,GAAG,CAAC,KAAK,CAAC,sBAAsB,EAAE,EAAE,GAAG,EAAE,MAAM,CAAC,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;YACnE,MAAM,CAAC,YAAY,GAAG,GAAG,CAAC;QAC5B,CAAC;IACH,CAAC;AACH,CAAC"}
@@ -0,0 +1,222 @@
1
+ import type { JsonSchema } from './extraction/schema.js';
2
+ export interface FetchInput {
3
+ url: string;
4
+ render_js?: 'auto' | 'always' | 'never';
5
+ use_auth?: boolean;
6
+ max_chars?: number;
7
+ section?: string;
8
+ section_index?: number;
9
+ screenshot?: boolean;
10
+ headers?: Record<string, string>;
11
+ }
12
+ export interface FetchOutput {
13
+ url: string;
14
+ title: string;
15
+ markdown: string;
16
+ metadata: {
17
+ description?: string;
18
+ author?: string;
19
+ date?: string;
20
+ language?: string;
21
+ section_matched?: boolean;
22
+ };
23
+ links: string[];
24
+ images: string[];
25
+ screenshot?: string;
26
+ cached: boolean;
27
+ error?: string;
28
+ }
29
+ export interface RawFetchResult {
30
+ url: string;
31
+ finalUrl: string;
32
+ html: string;
33
+ contentType: string;
34
+ statusCode: number;
35
+ method: 'http' | 'playwright';
36
+ headers: Record<string, string>;
37
+ rawBuffer?: Buffer;
38
+ screenshot?: string;
39
+ }
40
+ export interface ExtractionResult {
41
+ title: string;
42
+ markdown: string;
43
+ metadata: {
44
+ description?: string;
45
+ author?: string;
46
+ date?: string;
47
+ language?: string;
48
+ };
49
+ links: string[];
50
+ images: string[];
51
+ extractor: ExtractorType;
52
+ }
53
+ export type ExtractorType = 'defuddle' | 'readability' | 'turndown' | 'site-specific' | 'trafilatura';
54
+ export type BrowserType = 'chromium' | 'firefox' | 'webkit';
55
+ export interface CDPSession {
56
+ id: string;
57
+ url: string;
58
+ title: string;
59
+ webSocketDebuggerUrl: string;
60
+ }
61
+ export interface CachedContent {
62
+ id: number;
63
+ url: string;
64
+ normalizedUrl: string;
65
+ title: string;
66
+ markdown: string;
67
+ rawHtml: string;
68
+ metadata: string;
69
+ links: string;
70
+ images: string;
71
+ fetchMethod: 'http' | 'playwright';
72
+ extractorUsed: ExtractorType;
73
+ contentHash: string;
74
+ fetchedAt: string;
75
+ expiresAt: string | null;
76
+ }
77
+ export interface Extractor {
78
+ name: string;
79
+ canHandle(url: string, html?: string): boolean;
80
+ extract(html: string, url: string): ExtractionResult | null;
81
+ }
82
+ export interface SearchInput {
83
+ query: string;
84
+ max_results?: number;
85
+ include_content?: boolean;
86
+ content_max_chars?: number;
87
+ max_total_chars?: number;
88
+ time_range?: 'day' | 'week' | 'month' | 'year';
89
+ search_engines?: string[];
90
+ language?: string;
91
+ include_domains?: string[];
92
+ exclude_domains?: string[];
93
+ from_date?: string;
94
+ to_date?: string;
95
+ category?: 'general' | 'news' | 'code' | 'docs' | 'papers' | 'images';
96
+ }
97
+ export interface SearchResultItem {
98
+ title: string;
99
+ url: string;
100
+ snippet: string;
101
+ markdown_content?: string;
102
+ fetch_failed?: string;
103
+ content_truncated?: boolean;
104
+ relevance_score: number;
105
+ }
106
+ export interface SearchOutput {
107
+ results: SearchResultItem[];
108
+ query: string;
109
+ engines_used: string[];
110
+ total_time_ms: number;
111
+ error?: string;
112
+ }
113
+ export interface RawSearchResult {
114
+ title: string;
115
+ url: string;
116
+ snippet: string;
117
+ relevance_score: number;
118
+ engine: string;
119
+ }
120
+ export interface SearchEngineOptions {
121
+ maxResults?: number;
122
+ timeRange?: string;
123
+ language?: string;
124
+ timeoutMs?: number;
125
+ includeDomains?: string[];
126
+ excludeDomains?: string[];
127
+ fromDate?: string;
128
+ toDate?: string;
129
+ category?: 'general' | 'news' | 'code' | 'docs' | 'papers' | 'images';
130
+ }
131
+ export interface SearchEngine {
132
+ name: string;
133
+ search(query: string, options?: SearchEngineOptions): Promise<RawSearchResult[]>;
134
+ }
135
+ export interface CrawlInput {
136
+ url: string;
137
+ max_depth?: number;
138
+ max_pages?: number;
139
+ strategy?: 'bfs' | 'dfs' | 'sitemap' | 'map';
140
+ include_patterns?: string[];
141
+ exclude_patterns?: string[];
142
+ use_auth?: boolean;
143
+ extract_links?: boolean;
144
+ max_total_chars?: number;
145
+ }
146
+ export interface CrawlResultItem {
147
+ url: string;
148
+ title: string;
149
+ markdown: string;
150
+ depth: number;
151
+ }
152
+ export interface LinkEdge {
153
+ from: string;
154
+ to: string;
155
+ }
156
+ export interface CrawlOutput {
157
+ pages: CrawlResultItem[];
158
+ total_found: number;
159
+ crawled: number;
160
+ links?: LinkEdge[];
161
+ error?: string;
162
+ }
163
+ export interface MapOutput {
164
+ urls: string[];
165
+ total_found: number;
166
+ sitemap_found: boolean;
167
+ error?: string;
168
+ }
169
+ export interface CacheInput {
170
+ query?: string;
171
+ url_pattern?: string;
172
+ since?: string;
173
+ clear?: boolean;
174
+ stats?: boolean;
175
+ }
176
+ export interface CacheResultItem {
177
+ url: string;
178
+ title: string;
179
+ markdown: string;
180
+ fetched_at: string;
181
+ }
182
+ export interface CacheStats {
183
+ total_urls: number;
184
+ total_size_mb: number;
185
+ oldest: string;
186
+ newest: string;
187
+ }
188
+ export interface CacheOutput {
189
+ results?: CacheResultItem[];
190
+ stats?: CacheStats;
191
+ cleared?: number;
192
+ error?: string;
193
+ }
194
+ export interface ExtractInput {
195
+ url?: string;
196
+ html?: string;
197
+ mode?: 'selector' | 'tables' | 'metadata' | 'schema';
198
+ css_selector?: string;
199
+ multiple?: boolean;
200
+ schema?: JsonSchema;
201
+ }
202
+ export interface MetadataData {
203
+ title?: string;
204
+ description?: string;
205
+ author?: string;
206
+ date?: string;
207
+ keywords?: string[];
208
+ og_image?: string;
209
+ jsonld?: Record<string, unknown>[];
210
+ }
211
+ export interface TableData {
212
+ caption?: string;
213
+ headers: string[];
214
+ rows: Array<Record<string, string>>;
215
+ }
216
+ export interface ExtractOutput {
217
+ data: string | string[] | TableData[] | MetadataData | Record<string, unknown>;
218
+ source_url?: string;
219
+ mode: 'selector' | 'tables' | 'metadata' | 'schema';
220
+ error?: string;
221
+ }
222
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAEzD,MAAM,WAAW,UAAU;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,OAAO,CAAC;IACxC,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAClC;AAED,MAAM,WAAW,WAAW;IAC1B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE;QACR,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,eAAe,CAAC,EAAE,OAAO,CAAC;KAC3B,CAAC;IACF,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,OAAO,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,GAAG,YAAY,CAAC;IAC9B,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,gBAAgB;IAC/B,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE;QACR,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,QAAQ,CAAC,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,SAAS,EAAE,aAAa,CAAC;CAC1B;AAED,MAAM,MAAM,aAAa,GAAG,UAAU,GAAG,aAAa,GAAG,UAAU,GAAG,eAAe,GAAG,aAAa,CAAC;AAEtG,MAAM,MAAM,WAAW,GAAG,UAAU,GAAG,SAAS,GAAG,QAAQ,CAAC;AAE5D,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,oBAAoB,EAAE,MAAM,CAAC;CAC9B;AAED,MAAM,WAAW,aAAa;IAC5B,EAAE,EAAE,MAAM,CAAC;IACX,GAAG,EAAE,MAAM,CAAC;IACZ,aAAa,EAAE,MAAM,CAAC;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,GAAG,YAAY,CAAC;IACnC,aAAa,EAAE,aAAa,CAAC;IAC7B,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;CAC1B;AAED,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC;IAC/C,OAAO,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,gBAAgB,GAAG,IAAI,CAAC;CAC7D;AAID,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,UAAU,CAAC,EAAE,KAAK,GAAG,MAAM,GAAG,OAAO,GAAG,MAAM,CAAC;IAC/C,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,SAAS,GAAG,MAAM,GAAG,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,QAAQ,CAAC;CACvE;AAED,MAAM,WAAW,gBAAgB;IAC/B,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC;IAChB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,iBAAiB,CAAC,EAAE,OAAO,CAAC;IAC5B,eAAe,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,YAAY;IAC3B,OAAO,EAAE,gBAAgB,EAAE,CAAC;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,aAAa,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC;IAChB,eAAe,EAAE,MAAM,CAAC;IACxB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,mBAAmB;IAClC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,SAAS,GAAG,MAAM,GAAG,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,QAAQ,CAAC;CACvE;AAED,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAAC;CAClF;AAID,MAAM,WAAW,UAAU;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,KAAK,GAAG,KAAK,GAAG,SAAS,GAAG,KAAK,CAAC;IAC7C,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC5B,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC5B,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED,MAAM,WAAW,eAAe;IAC9B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,eAAe,EAAE,CAAC;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,QAAQ,EAAE,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,OAAO,CAAC;IACvB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAID,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,KAAK,CAAC,EAAE,OAAO,CAAC;CACjB;AAED,MAAM,WAAW,eAAe;IAC9B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,CAAC,EAAE,eAAe,EAAE,CAAC;IAC5B,KAAK,CAAC,EAAE,UAAU,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAID,MAAM,WAAW,YAAY;IAC3B,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,UAAU,GAAG,QAAQ,GAAG,UAAU,GAAG,QAAQ,CAAC;IACrD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,MAAM,CAAC,EAAE,UAAU,CAAC;CACrB;AAED,MAAM,WAAW,YAAY;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC;CACpC;AAED,MAAM,WAAW,SAAS;IACxB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;CACrC;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,SAAS,EAAE,GAAG,YAAY,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC/E,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE,UAAU,GAAG,QAAQ,GAAG,UAAU,GAAG,QAAQ,CAAC;IACpD,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB"}
package/dist/types.js ADDED
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":""}
package/package.json ADDED
@@ -0,0 +1,61 @@
1
+ {
2
+ "name": "@staticn0va/wigolo",
3
+ "version": "0.1.0",
4
+ "description": "Local-first web search MCP server for AI coding agents",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "bin": {
8
+ "wigolo": "dist/index.js"
9
+ },
10
+ "files": [
11
+ "dist",
12
+ "README.md",
13
+ "LICENSE"
14
+ ],
15
+ "keywords": [
16
+ "mcp",
17
+ "web-search",
18
+ "web-scraping",
19
+ "ai-agent",
20
+ "playwright",
21
+ "searxng",
22
+ "local-first",
23
+ "content-extraction"
24
+ ],
25
+ "scripts": {
26
+ "build": "tsc",
27
+ "dev": "tsx src/index.ts",
28
+ "test": "vitest run",
29
+ "test:watch": "vitest",
30
+ "test:unit": "vitest run tests/unit",
31
+ "test:integration": "vitest run tests/integration",
32
+ "test:e2e": "vitest run tests/e2e",
33
+ "lint": "tsc --noEmit"
34
+ },
35
+ "license": "BUSL-1.1",
36
+ "repository": {
37
+ "type": "git",
38
+ "url": "https://github.com/KnockOutEZ/wigolo"
39
+ },
40
+ "engines": {
41
+ "node": ">=20"
42
+ },
43
+ "dependencies": {
44
+ "@modelcontextprotocol/sdk": "^1.29.0",
45
+ "@mozilla/readability": "^0.6.0",
46
+ "better-sqlite3": "^12.8.0",
47
+ "defuddle": "^0.16.0",
48
+ "linkedom": "^0.18.12",
49
+ "pdf-parse": "^2.4.5",
50
+ "playwright": "^1.59.1",
51
+ "turndown": "^7.2.4"
52
+ },
53
+ "devDependencies": {
54
+ "@types/better-sqlite3": "^7.6.13",
55
+ "@types/node": "^25.6.0",
56
+ "@types/turndown": "^5.0.6",
57
+ "tsx": "^4.21.0",
58
+ "typescript": "^6.0.2",
59
+ "vitest": "^4.1.4"
60
+ }
61
+ }