@staticn0va/wigolo 0.6.6 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. package/dist/cache/store.d.ts +9 -1
  2. package/dist/cache/store.d.ts.map +1 -1
  3. package/dist/cache/store.js +30 -4
  4. package/dist/cache/store.js.map +1 -1
  5. package/dist/cli/doctor.d.ts.map +1 -1
  6. package/dist/cli/doctor.js +56 -2
  7. package/dist/cli/doctor.js.map +1 -1
  8. package/dist/cli/status.js +1 -1
  9. package/dist/cli/status.js.map +1 -1
  10. package/dist/cli/tui/hooks/useInstall.js +1 -1
  11. package/dist/cli/tui/hooks/useInstall.js.map +1 -1
  12. package/dist/cli/tui/hooks/useVerify.js +1 -1
  13. package/dist/cli/tui/hooks/useVerify.js.map +1 -1
  14. package/dist/cli/tui/status-format.d.ts +1 -1
  15. package/dist/cli/tui/status-format.d.ts.map +1 -1
  16. package/dist/cli/tui/status-format.js +1 -1
  17. package/dist/cli/tui/status-format.js.map +1 -1
  18. package/dist/cli/tui/status-python.d.ts +1 -1
  19. package/dist/cli/tui/status-python.d.ts.map +1 -1
  20. package/dist/cli/tui/status-python.js +17 -1
  21. package/dist/cli/tui/status-python.js.map +1 -1
  22. package/dist/cli/tui/verify-suggestions.d.ts +1 -1
  23. package/dist/cli/tui/verify-suggestions.d.ts.map +1 -1
  24. package/dist/cli/tui/verify-suggestions.js +3 -3
  25. package/dist/cli/tui/verify-suggestions.js.map +1 -1
  26. package/dist/cli/tui/verify.d.ts +2 -2
  27. package/dist/cli/tui/verify.d.ts.map +1 -1
  28. package/dist/cli/tui/verify.js +32 -6
  29. package/dist/cli/tui/verify.js.map +1 -1
  30. package/dist/cli/warmup.d.ts.map +1 -1
  31. package/dist/cli/warmup.js +16 -12
  32. package/dist/cli/warmup.js.map +1 -1
  33. package/dist/config.d.ts +6 -1
  34. package/dist/config.d.ts.map +1 -1
  35. package/dist/config.js +15 -2
  36. package/dist/config.js.map +1 -1
  37. package/dist/crawl/dedup.d.ts +1 -0
  38. package/dist/crawl/dedup.d.ts.map +1 -1
  39. package/dist/crawl/dedup.js +47 -1
  40. package/dist/crawl/dedup.js.map +1 -1
  41. package/dist/extraction/boilerplate.d.ts +15 -0
  42. package/dist/extraction/boilerplate.d.ts.map +1 -0
  43. package/dist/extraction/boilerplate.js +49 -0
  44. package/dist/extraction/boilerplate.js.map +1 -0
  45. package/dist/extraction/defuddle.d.ts.map +1 -1
  46. package/dist/extraction/defuddle.js +7 -3
  47. package/dist/extraction/defuddle.js.map +1 -1
  48. package/dist/extraction/jsonld.js +1 -1
  49. package/dist/extraction/jsonld.js.map +1 -1
  50. package/dist/extraction/lang-hints.d.ts +2 -0
  51. package/dist/extraction/lang-hints.d.ts.map +1 -0
  52. package/dist/extraction/lang-hints.js +28 -0
  53. package/dist/extraction/lang-hints.js.map +1 -0
  54. package/dist/extraction/llm/anthropic.d.ts +3 -0
  55. package/dist/extraction/llm/anthropic.d.ts.map +1 -0
  56. package/dist/extraction/llm/anthropic.js +33 -0
  57. package/dist/extraction/llm/anthropic.js.map +1 -0
  58. package/dist/extraction/llm/cache.d.ts +5 -0
  59. package/dist/extraction/llm/cache.d.ts.map +1 -0
  60. package/dist/extraction/llm/cache.js +35 -0
  61. package/dist/extraction/llm/cache.js.map +1 -0
  62. package/dist/extraction/llm/gemini.d.ts +3 -0
  63. package/dist/extraction/llm/gemini.d.ts.map +1 -0
  64. package/dist/extraction/llm/gemini.js +35 -0
  65. package/dist/extraction/llm/gemini.js.map +1 -0
  66. package/dist/extraction/llm/groq.d.ts +3 -0
  67. package/dist/extraction/llm/groq.d.ts.map +1 -0
  68. package/dist/extraction/llm/groq.js +63 -0
  69. package/dist/extraction/llm/groq.js.map +1 -0
  70. package/dist/extraction/llm/hash.d.ts +3 -0
  71. package/dist/extraction/llm/hash.d.ts.map +1 -0
  72. package/dist/extraction/llm/hash.js +22 -0
  73. package/dist/extraction/llm/hash.js.map +1 -0
  74. package/dist/extraction/llm/openai.d.ts +3 -0
  75. package/dist/extraction/llm/openai.d.ts.map +1 -0
  76. package/dist/extraction/llm/openai.js +38 -0
  77. package/dist/extraction/llm/openai.js.map +1 -0
  78. package/dist/extraction/llm/select.d.ts +5 -0
  79. package/dist/extraction/llm/select.d.ts.map +1 -0
  80. package/dist/extraction/llm/select.js +27 -0
  81. package/dist/extraction/llm/select.js.map +1 -0
  82. package/dist/extraction/llm/types.d.ts +24 -0
  83. package/dist/extraction/llm/types.d.ts.map +1 -0
  84. package/dist/extraction/llm/types.js +2 -0
  85. package/dist/extraction/llm/types.js.map +1 -0
  86. package/dist/extraction/llm/validate.d.ts +6 -0
  87. package/dist/extraction/llm/validate.d.ts.map +1 -0
  88. package/dist/extraction/llm/validate.js +63 -0
  89. package/dist/extraction/llm/validate.js.map +1 -0
  90. package/dist/extraction/llm-fallback.d.ts +17 -0
  91. package/dist/extraction/llm-fallback.d.ts.map +1 -0
  92. package/dist/extraction/llm-fallback.js +129 -0
  93. package/dist/extraction/llm-fallback.js.map +1 -0
  94. package/dist/extraction/markdown.d.ts +9 -0
  95. package/dist/extraction/markdown.d.ts.map +1 -1
  96. package/dist/extraction/markdown.js +52 -3
  97. package/dist/extraction/markdown.js.map +1 -1
  98. package/dist/extraction/pipeline.d.ts.map +1 -1
  99. package/dist/extraction/pipeline.js +17 -5
  100. package/dist/extraction/pipeline.js.map +1 -1
  101. package/dist/extraction/readability.d.ts.map +1 -1
  102. package/dist/extraction/readability.js +2 -3
  103. package/dist/extraction/readability.js.map +1 -1
  104. package/dist/extraction/schema.d.ts +12 -0
  105. package/dist/extraction/schema.d.ts.map +1 -1
  106. package/dist/extraction/schema.js +81 -11
  107. package/dist/extraction/schema.js.map +1 -1
  108. package/dist/extraction/site-extractors/docs-generic.d.ts.map +1 -1
  109. package/dist/extraction/site-extractors/docs-generic.js +2 -3
  110. package/dist/extraction/site-extractors/docs-generic.js.map +1 -1
  111. package/dist/extraction/site-extractors/github.d.ts.map +1 -1
  112. package/dist/extraction/site-extractors/github.js +4 -5
  113. package/dist/extraction/site-extractors/github.js.map +1 -1
  114. package/dist/extraction/site-extractors/mdn.d.ts.map +1 -1
  115. package/dist/extraction/site-extractors/mdn.js +2 -3
  116. package/dist/extraction/site-extractors/mdn.js.map +1 -1
  117. package/dist/extraction/site-extractors/stackoverflow.d.ts.map +1 -1
  118. package/dist/extraction/site-extractors/stackoverflow.js +3 -4
  119. package/dist/extraction/site-extractors/stackoverflow.js.map +1 -1
  120. package/dist/extraction/structured-data.d.ts +4 -0
  121. package/dist/extraction/structured-data.d.ts.map +1 -0
  122. package/dist/extraction/structured-data.js +203 -0
  123. package/dist/extraction/structured-data.js.map +1 -0
  124. package/dist/fetch/router.d.ts +2 -1
  125. package/dist/fetch/router.d.ts.map +1 -1
  126. package/dist/fetch/router.js +19 -1
  127. package/dist/fetch/router.js.map +1 -1
  128. package/dist/instructions.d.ts +7 -7
  129. package/dist/instructions.d.ts.map +1 -1
  130. package/dist/instructions.js +43 -36
  131. package/dist/instructions.js.map +1 -1
  132. package/dist/logger.d.ts +1 -1
  133. package/dist/logger.d.ts.map +1 -1
  134. package/dist/research/brief.js +1 -1
  135. package/dist/research/brief.js.map +1 -1
  136. package/dist/search/evidence.d.ts +25 -0
  137. package/dist/search/evidence.d.ts.map +1 -0
  138. package/dist/search/evidence.js +260 -0
  139. package/dist/search/evidence.js.map +1 -0
  140. package/dist/search/highlights.d.ts +11 -2
  141. package/dist/search/highlights.d.ts.map +1 -1
  142. package/dist/search/highlights.js +131 -48
  143. package/dist/search/highlights.js.map +1 -1
  144. package/dist/search/multi-query.d.ts +1 -0
  145. package/dist/search/multi-query.d.ts.map +1 -1
  146. package/dist/search/multi-query.js +13 -0
  147. package/dist/search/multi-query.js.map +1 -1
  148. package/dist/search/rerank.d.ts +3 -2
  149. package/dist/search/rerank.d.ts.map +1 -1
  150. package/dist/search/rerank.js +16 -44
  151. package/dist/search/rerank.js.map +1 -1
  152. package/dist/search/reranker/download.d.ts +9 -0
  153. package/dist/search/reranker/download.d.ts.map +1 -0
  154. package/dist/search/reranker/download.js +77 -0
  155. package/dist/search/reranker/download.js.map +1 -0
  156. package/dist/search/reranker/models.d.ts +14 -0
  157. package/dist/search/reranker/models.d.ts.map +1 -0
  158. package/dist/search/reranker/models.js +37 -0
  159. package/dist/search/reranker/models.js.map +1 -0
  160. package/dist/search/reranker/onnx.d.ts +13 -0
  161. package/dist/search/reranker/onnx.d.ts.map +1 -0
  162. package/dist/search/reranker/onnx.js +70 -0
  163. package/dist/search/reranker/onnx.js.map +1 -0
  164. package/dist/search/reranker/recency-boost.d.ts +3 -0
  165. package/dist/search/reranker/recency-boost.d.ts.map +1 -0
  166. package/dist/search/reranker/recency-boost.js +12 -0
  167. package/dist/search/reranker/recency-boost.js.map +1 -0
  168. package/dist/search/reranker/recency.d.ts +3 -0
  169. package/dist/search/reranker/recency.d.ts.map +1 -0
  170. package/dist/search/reranker/recency.js +26 -0
  171. package/dist/search/reranker/recency.js.map +1 -0
  172. package/dist/search/reranker/tokenizer.d.ts +30 -0
  173. package/dist/search/reranker/tokenizer.d.ts.map +1 -0
  174. package/dist/search/reranker/tokenizer.js +49 -0
  175. package/dist/search/reranker/tokenizer.js.map +1 -0
  176. package/dist/search/tokens.d.ts +3 -0
  177. package/dist/search/tokens.d.ts.map +1 -0
  178. package/dist/search/tokens.js +38 -0
  179. package/dist/search/tokens.js.map +1 -0
  180. package/dist/search/truncate.d.ts +4 -0
  181. package/dist/search/truncate.d.ts.map +1 -1
  182. package/dist/search/truncate.js +13 -0
  183. package/dist/search/truncate.js.map +1 -1
  184. package/dist/server/tool-schemas.d.ts +503 -0
  185. package/dist/server/tool-schemas.d.ts.map +1 -0
  186. package/dist/server/tool-schemas.js +425 -0
  187. package/dist/server/tool-schemas.js.map +1 -0
  188. package/dist/server.d.ts.map +1 -1
  189. package/dist/server.js +1 -326
  190. package/dist/server.js.map +1 -1
  191. package/dist/tools/agent.d.ts.map +1 -1
  192. package/dist/tools/agent.js +36 -0
  193. package/dist/tools/agent.js.map +1 -1
  194. package/dist/tools/crawl.d.ts.map +1 -1
  195. package/dist/tools/crawl.js +37 -2
  196. package/dist/tools/crawl.js.map +1 -1
  197. package/dist/tools/extract.d.ts.map +1 -1
  198. package/dist/tools/extract.js +19 -3
  199. package/dist/tools/extract.js.map +1 -1
  200. package/dist/tools/fetch.d.ts.map +1 -1
  201. package/dist/tools/fetch.js +44 -7
  202. package/dist/tools/fetch.js.map +1 -1
  203. package/dist/tools/find-similar.d.ts.map +1 -1
  204. package/dist/tools/find-similar.js +32 -1
  205. package/dist/tools/find-similar.js.map +1 -1
  206. package/dist/tools/research.d.ts.map +1 -1
  207. package/dist/tools/research.js +34 -1
  208. package/dist/tools/research.js.map +1 -1
  209. package/dist/tools/search.d.ts.map +1 -1
  210. package/dist/tools/search.js +97 -53
  211. package/dist/tools/search.js.map +1 -1
  212. package/dist/types.d.ts +65 -1
  213. package/dist/types.d.ts.map +1 -1
  214. package/dist/types.js +1 -1
  215. package/dist/types.js.map +1 -1
  216. package/dist/util/mode.d.ts +4 -0
  217. package/dist/util/mode.d.ts.map +1 -0
  218. package/dist/util/mode.js +13 -0
  219. package/dist/util/mode.js.map +1 -0
  220. package/package.json +9 -1
  221. package/dist/search/flashrank.d.ts +0 -12
  222. package/dist/search/flashrank.d.ts.map +0 -1
  223. package/dist/search/flashrank.js +0 -64
  224. package/dist/search/flashrank.js.map +0 -1
@@ -0,0 +1,203 @@
1
+ import { parseHTML } from 'linkedom';
2
+ import { createLogger } from '../logger.js';
3
+ const log = createLogger('structured-data');
4
+ const KNOWN_TYPES = new Set([
5
+ 'Article',
6
+ 'Product',
7
+ 'Recipe',
8
+ 'BreadcrumbList',
9
+ 'Organization',
10
+ 'Person',
11
+ ]);
12
+ export function extractStructuredData(html) {
13
+ if (!html)
14
+ return [];
15
+ const { document: doc } = parseHTML(html);
16
+ const out = [];
17
+ out.push(...extractJsonLdBlocks(doc));
18
+ out.push(...extractMicrodataBlocks(doc));
19
+ out.push(...extractRdfaBlocks(doc));
20
+ return out;
21
+ }
22
+ function extractJsonLdBlocks(doc) {
23
+ const out = [];
24
+ const scripts = doc.querySelectorAll('script[type="application/ld+json"]');
25
+ for (const script of scripts) {
26
+ const text = script.textContent?.trim();
27
+ if (!text)
28
+ continue;
29
+ let parsed;
30
+ try {
31
+ parsed = JSON.parse(text);
32
+ }
33
+ catch (err) {
34
+ log.warn('Failed to parse JSON-LD block', { error: String(err) });
35
+ continue;
36
+ }
37
+ for (const node of flattenJsonLd(parsed)) {
38
+ const type = normalizeType(node['@type']);
39
+ if (!type)
40
+ continue;
41
+ const fields = {};
42
+ for (const [k, v] of Object.entries(node)) {
43
+ if (k.startsWith('@'))
44
+ continue;
45
+ fields[k] = v;
46
+ }
47
+ out.push({ provenance: 'json-ld', type, fields });
48
+ }
49
+ }
50
+ return out;
51
+ }
52
+ function flattenJsonLd(value) {
53
+ if (!value || typeof value !== 'object')
54
+ return [];
55
+ if (Array.isArray(value))
56
+ return value.flatMap(flattenJsonLd);
57
+ const obj = value;
58
+ if (Array.isArray(obj['@graph']))
59
+ return obj['@graph'].flatMap(flattenJsonLd);
60
+ return [obj];
61
+ }
62
+ function normalizeType(raw) {
63
+ if (typeof raw === 'string') {
64
+ const tail = raw.split('/').pop();
65
+ return tail || null;
66
+ }
67
+ if (Array.isArray(raw)) {
68
+ for (const t of raw) {
69
+ const norm = normalizeType(t);
70
+ if (norm)
71
+ return norm;
72
+ }
73
+ }
74
+ return null;
75
+ }
76
+ function extractMicrodataBlocks(doc) {
77
+ const out = [];
78
+ // Top-level itemscopes only — nested itemscopes are walked into as fields.
79
+ const all = Array.from(doc.querySelectorAll('[itemscope]'));
80
+ const tops = all.filter((el) => !hasItemscopeAncestor(el));
81
+ for (const el of tops) {
82
+ const node = readMicrodataNode(el);
83
+ if (!node)
84
+ continue;
85
+ out.push(node);
86
+ }
87
+ return out;
88
+ }
89
+ function hasItemscopeAncestor(el) {
90
+ let cur = el.parentElement;
91
+ while (cur) {
92
+ if (cur.hasAttribute('itemscope'))
93
+ return true;
94
+ cur = cur.parentElement;
95
+ }
96
+ return false;
97
+ }
98
+ function readMicrodataNode(el) {
99
+ const itemtype = el.getAttribute('itemtype') ?? '';
100
+ const type = itemtype ? itemtype.split('/').pop() : '';
101
+ if (!type)
102
+ return null;
103
+ const fields = {};
104
+ // Walk descendants but stop crossing into nested itemscopes (handle them as nested objects)
105
+ collectItemprops(el, fields);
106
+ return { provenance: 'microdata', type, fields };
107
+ }
108
+ function collectItemprops(root, target) {
109
+ const stack = Array.from(root.children);
110
+ while (stack.length) {
111
+ const el = stack.shift();
112
+ const prop = el.getAttribute('itemprop');
113
+ if (prop) {
114
+ let value;
115
+ if (el.hasAttribute('itemscope')) {
116
+ const nested = {};
117
+ collectItemprops(el, nested);
118
+ value = nested;
119
+ }
120
+ else {
121
+ value =
122
+ el.getAttribute('content') ??
123
+ el.getAttribute('href') ??
124
+ el.getAttribute('src') ??
125
+ (el.textContent ?? '').trim();
126
+ }
127
+ mergeProp(target, prop, value);
128
+ }
129
+ // Always stop at any itemscope: it is an independent item, regardless of
130
+ // whether it carries an itemprop. Otherwise its descendants' itemprops
131
+ // would leak into the parent record.
132
+ if (el.hasAttribute('itemscope'))
133
+ continue;
134
+ for (const c of el.children)
135
+ stack.push(c);
136
+ }
137
+ }
138
+ function mergeProp(target, prop, value) {
139
+ if (target[prop] === undefined) {
140
+ target[prop] = value;
141
+ return;
142
+ }
143
+ if (Array.isArray(target[prop])) {
144
+ target[prop].push(value);
145
+ return;
146
+ }
147
+ target[prop] = [target[prop], value];
148
+ }
149
+ function extractRdfaBlocks(doc) {
150
+ const out = [];
151
+ const all = Array.from(doc.querySelectorAll('[typeof]'));
152
+ const tops = all.filter((el) => !hasTypeofAncestor(el));
153
+ for (const el of tops) {
154
+ const typeAttr = el.getAttribute('typeof') ?? '';
155
+ const type = typeAttr.split(/\s+/)[0]?.split(/[:/]/).pop() ?? '';
156
+ if (!type)
157
+ continue;
158
+ const fields = {};
159
+ collectRdfaProps(el, fields);
160
+ out.push({ provenance: 'rdfa', type, fields });
161
+ }
162
+ return out;
163
+ }
164
+ function hasTypeofAncestor(el) {
165
+ let cur = el.parentElement;
166
+ while (cur) {
167
+ if (cur.hasAttribute('typeof'))
168
+ return true;
169
+ cur = cur.parentElement;
170
+ }
171
+ return false;
172
+ }
173
+ function collectRdfaProps(root, target) {
174
+ const stack = Array.from(root.children);
175
+ while (stack.length) {
176
+ const el = stack.shift();
177
+ const prop = el.getAttribute('property');
178
+ if (prop) {
179
+ const propName = prop.split(/[:/]/).pop() ?? prop;
180
+ let value;
181
+ if (el.hasAttribute('typeof')) {
182
+ const nested = {};
183
+ collectRdfaProps(el, nested);
184
+ value = nested;
185
+ }
186
+ else {
187
+ value =
188
+ el.getAttribute('content') ??
189
+ el.getAttribute('href') ??
190
+ el.getAttribute('resource') ??
191
+ (el.textContent ?? '').trim();
192
+ }
193
+ mergeProp(target, propName, value);
194
+ }
195
+ // Always stop at any nested typeof, regardless of property — independent item.
196
+ if (el.hasAttribute('typeof'))
197
+ continue;
198
+ for (const c of el.children)
199
+ stack.push(c);
200
+ }
201
+ }
202
+ export const KNOWN_SCHEMA_TYPES = KNOWN_TYPES;
203
+ //# sourceMappingURL=structured-data.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"structured-data.js","sourceRoot":"","sources":["../../src/extraction/structured-data.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAG5C,MAAM,GAAG,GAAG,YAAY,CAAC,iBAAiB,CAAC,CAAC;AAE5C,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC;IAC1B,SAAS;IACT,SAAS;IACT,QAAQ;IACR,gBAAgB;IAChB,cAAc;IACd,QAAQ;CACT,CAAC,CAAC;AAEH,MAAM,UAAU,qBAAqB,CAAC,IAAY;IAChD,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IACrB,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,GAAG,GAA2B,EAAE,CAAC;IACvC,GAAG,CAAC,IAAI,CAAC,GAAG,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC;IACtC,GAAG,CAAC,IAAI,CAAC,GAAG,sBAAsB,CAAC,GAAG,CAAC,CAAC,CAAC;IACzC,GAAG,CAAC,IAAI,CAAC,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC;IACpC,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,mBAAmB,CAAC,GAAa;IACxC,MAAM,GAAG,GAA2B,EAAE,CAAC;IACvC,MAAM,OAAO,GAAG,GAAG,CAAC,gBAAgB,CAAC,oCAAoC,CAAC,CAAC;IAC3E,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,IAAI,GAAG,MAAM,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC;QACxC,IAAI,CAAC,IAAI;YAAE,SAAS;QACpB,IAAI,MAAe,CAAC;QACpB,IAAI,CAAC;YACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC5B,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,IAAI,CAAC,+BAA+B,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAClE,SAAS;QACX,CAAC;QACD,KAAK,MAAM,IAAI,IAAI,aAAa,CAAC,MAAM,CAAC,EAAE,CAAC;YACzC,MAAM,IAAI,GAAG,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;YAC1C,IAAI,CAAC,IAAI;gBAAE,SAAS;YACpB,MAAM,MAAM,GAA4B,EAAE,CAAC;YAC3C,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC1C,IAAI,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC;oBAAE,SAAS;gBAChC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YAChB,CAAC;YACD,GAAG,CAAC,IAAI,CAAC,EAAE,UAAU,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;QACpD,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,aAAa,CAAC,KAAc;IACnC,IAAI,CAAC,KAAK,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,EAAE,CAAC;IACnD,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;IAC9D,MAAM,GAAG,GAAG,KAAgC,CAAC;IAC7C,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAAE,OAAO,GAAG,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;IAC9E,OAAO,CAAC,GAAG,CAAC,CAAC;AACf,CAAC;AAED,SAAS,aAAa,CAAC,GAAY;IACjC,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;QAC5B,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAG,CAAC;QACnC,OAAO,IAAI,IAAI,IAAI,CAAC;IACtB,CAAC;IACD,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;QACvB,KAAK,MAAM,CAAC,IAAI,GAAG,EAAE,CAAC;YACpB,MAAM,IAAI,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;YAC9B,IAAI,IAAI;gBAAE,OAAO,IAAI,CAAC;QACxB,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,sBAAsB,CAAC,GAAa;IAC3C,MAAM,GAAG,GAA2B,EAAE,CAAC;IACvC,2EAA2E;IAC3E,MAAM,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,aAAa,CAAC,CAAC,CAAC;IAC5D,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,oBAAoB,CAAC,EAAE,CAAC,CAAC,CAAC;IAC3D,KAAK,MAAM,EAAE,IAAI,IAAI,EAAE,CAAC;QACtB,MAAM,IAAI,GAAG,iBAAiB,CAAC,EAAE,CAAC,CAAC;QACnC,IAAI,CAAC,IAAI;YAAE,SAAS;QACpB,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACjB,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,oBAAoB,CAAC,EAAW;IACvC,IAAI,GAAG,GAAG,EAAE,CAAC,aAAa,CAAC;IAC3B,OAAO,GAAG,EAAE,CAAC;QACX,IAAI,GAAG,CAAC,YAAY,CAAC,WAAW,CAAC;YAAE,OAAO,IAAI,CAAC;QAC/C,GAAG,GAAG,GAAG,CAAC,aAAa,CAAC;IAC1B,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,iBAAiB,CAAC,EAAW;IACpC,MAAM,QAAQ,GAAG,EAAE,CAAC,YAAY,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC;IACnD,MAAM,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAG,CAAC,CAAC,CAAC,EAAE,CAAC;IACxD,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IACvB,MAAM,MAAM,GAA4B,EAAE,CAAC;IAC3C,4FAA4F;IAC5F,gBAAgB,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;IAC7B,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;AACnD,CAAC;AAED,SAAS,gBAAgB,CAAC,IAAa,EAAE,MAA+B;IACtE,MAAM,KAAK,GAAc,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACnD,OAAO,KAAK,CAAC,MAAM,EAAE,CAAC;QACpB,MAAM,EAAE,GAAG,KAAK,CAAC,KAAK,EAAG,CAAC;QAC1B,MAAM,IAAI,GAAG,EAAE,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC;QACzC,IAAI,IAAI,EAAE,CAAC;YACT,IAAI,KAAc,CAAC;YACnB,IAAI,EAAE,CAAC,YAAY,CAAC,WAAW,CAAC,EAAE,CAAC;gBACjC,MAAM,MAAM,GAA4B,EAAE,CAAC;gBAC3C,gBAAgB,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;gBAC7B,KAAK,GAAG,MAAM,CAAC;YACjB,CAAC;iBAAM,CAAC;gBACN,KAAK;oBACH,EAAE,CAAC,YAAY,CAAC,SAAS,CAAC;wBAC1B,EAAE,CAAC,YAAY,CAAC,MAAM,CAAC;wBACvB,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC;wBACtB,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YAClC,CAAC;YACD,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC;QACjC,CAAC;QACD,yEAAyE;QACzE,uEAAuE;QACvE,qCAAqC;QACrC,IAAI,EAAE,CAAC,YAAY,CAAC,WAAW,CAAC;YAAE,SAAS;QAC3C,KAAK,MAAM,CAAC,IAAI,EAAE,CAAC,QAAQ;YAAE,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC7C,CAAC;AACH,CAAC;AAED,SAAS,SAAS,CAAC,MAA+B,EAAE,IAAY,EAAE,KAAc;IAC9E,IAAI,MAAM,CAAC,IAAI,CAAC,KAAK,SAAS,EAAE,CAAC;QAC/B,MAAM,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC;QACrB,OAAO;IACT,CAAC;IACD,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;QAC/B,MAAM,CAAC,IAAI,CAAe,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACxC,OAAO;IACT,CAAC;IACD,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,KAAK,CAAC,CAAC;AACvC,CAAC;AAED,SAAS,iBAAiB,CAAC,GAAa;IACtC,MAAM,GAAG,GAA2B,EAAE,CAAC;IACvC,MAAM,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC,CAAC;IACzD,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC,CAAC;IACxD,KAAK,MAAM,EAAE,IAAI,IAAI,EAAE,CAAC;QACtB,MAAM,QAAQ,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;QACjD,MAAM,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC;QACjE,IAAI,CAAC,IAAI;YAAE,SAAS;QACpB,MAAM,MAAM,GAA4B,EAAE,CAAC;QAC3C,gBAAgB,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;QAC7B,GAAG,CAAC,IAAI,CAAC,EAAE,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;IACjD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,iBAAiB,CAAC,EAAW;IACpC,IAAI,GAAG,GAAG,EAAE,CAAC,aAAa,CAAC;IAC3B,OAAO,GAAG,EAAE,CAAC;QACX,IAAI,GAAG,CAAC,YAAY,CAAC,QAAQ,CAAC;YAAE,OAAO,IAAI,CAAC;QAC5C,GAAG,GAAG,GAAG,CAAC,aAAa,CAAC;IAC1B,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,gBAAgB,CAAC,IAAa,EAAE,MAA+B;IACtE,MAAM,KAAK,GAAc,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACnD,OAAO,KAAK,CAAC,MAAM,EAAE,CAAC;QACpB,MAAM,EAAE,GAAG,KAAK,CAAC,KAAK,EAAG,CAAC;QAC1B,MAAM,IAAI,GAAG,EAAE,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC;QACzC,IAAI,IAAI,EAAE,CAAC;YACT,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC;YAClD,IAAI,KAAc,CAAC;YACnB,IAAI,EAAE,CAAC,YAAY,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC9B,MAAM,MAAM,GAA4B,EAAE,CAAC;gBAC3C,gBAAgB,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;gBAC7B,KAAK,GAAG,MAAM,CAAC;YACjB,CAAC;iBAAM,CAAC;gBACN,KAAK;oBACH,EAAE,CAAC,YAAY,CAAC,SAAS,CAAC;wBAC1B,EAAE,CAAC,YAAY,CAAC,MAAM,CAAC;wBACvB,EAAE,CAAC,YAAY,CAAC,UAAU,CAAC;wBAC3B,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YAClC,CAAC;YACD,SAAS,CAAC,MAAM,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC;QACrC,CAAC;QACD,+EAA+E;QAC/E,IAAI,EAAE,CAAC,YAAY,CAAC,QAAQ,CAAC;YAAE,SAAS;QACxC,KAAK,MAAM,CAAC,IAAI,EAAE,CAAC,QAAQ;YAAE,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC7C,CAAC;AACH,CAAC;AAED,MAAM,CAAC,MAAM,kBAAkB,GAAwB,WAAW,CAAC"}
@@ -1,4 +1,4 @@
1
- import type { RawFetchResult, BrowserAction } from '../types.js';
1
+ import type { RawFetchResult, BrowserAction, Mode } from '../types.js';
2
2
  export interface RouterFetchOptions {
3
3
  renderJs?: 'auto' | 'always' | 'never';
4
4
  useAuth?: boolean;
@@ -6,6 +6,7 @@ export interface RouterFetchOptions {
6
6
  screenshot?: boolean;
7
7
  actions?: BrowserAction[];
8
8
  force_refresh?: boolean;
9
+ mode?: Mode;
9
10
  }
10
11
  export interface HttpClient {
11
12
  fetch(url: string, options?: {
@@ -1 +1 @@
1
- {"version":3,"file":"router.d.ts","sourceRoot":"","sources":["../../src/fetch/router.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,cAAc,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAEjE,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,OAAO,CAAC;IACvC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,OAAO,CAAC,EAAE,aAAa,EAAE,CAAC;IAC1B,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB;AAED,MAAM,WAAW,UAAU;IACzB,KAAK,CACH,GAAG,EAAE,MAAM,EACX,OAAO,CAAC,EAAE;QAAE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QAAC,SAAS,CAAC,EAAE,MAAM,CAAA;KAAE,GACjE,OAAO,CAAC;QACT,GAAG,EAAE,MAAM,CAAC;QACZ,QAAQ,EAAE,MAAM,CAAC;QACjB,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,UAAU,EAAE,MAAM,CAAC;QACnB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QAChC,SAAS,CAAC,EAAE,MAAM,CAAC;KACpB,CAAC,CAAC;CACJ;AAED,MAAM,WAAW,oBAAoB;IACnC,gBAAgB,CACd,GAAG,EAAE,MAAM,EACX,OAAO,CAAC,EAAE;QAAE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QAAC,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAAC,WAAW,CAAC,EAAE,MAAM,CAAC;QAAC,UAAU,CAAC,EAAE,OAAO,CAAC;QAAC,OAAO,CAAC,EAAE,aAAa,EAAE,CAAC;QAAC,MAAM,CAAC,EAAE,MAAM,CAAA;KAAE,GAChK,OAAO,CAAC,cAAc,CAAC,CAAC;CAC5B;AAED,UAAU,WAAW;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,OAAO,CAAC;CAC3B;AAED,qBAAa,WAAW;IAIpB,OAAO,CAAC,QAAQ,CAAC,UAAU;IAC3B,OAAO,CAAC,QAAQ,CAAC,WAAW;IAJ9B,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAkC;gBAGzC,UAAU,EAAE,UAAU,EACtB,WAAW,EAAE,oBAAoB;IAG9C,KAAK,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,kBAAuB,GAAG,OAAO,CAAC,cAAc,CAAC;IAoEnF,cAAc,CAAC,MAAM,EAAE,MAAM,GAAG,WAAW,GAAG,SAAS;IAIvD,OAAO,CAAC,WAAW;IASnB,OAAO,CAAC,gBAAgB;CAczB"}
1
+ {"version":3,"file":"router.d.ts","sourceRoot":"","sources":["../../src/fetch/router.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,cAAc,EAAE,aAAa,EAAE,IAAI,EAAE,MAAM,aAAa,CAAC;AAEvE,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,OAAO,CAAC;IACvC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,OAAO,CAAC,EAAE,aAAa,EAAE,CAAC;IAC1B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,IAAI,CAAC,EAAE,IAAI,CAAC;CACb;AAED,MAAM,WAAW,UAAU;IACzB,KAAK,CACH,GAAG,EAAE,MAAM,EACX,OAAO,CAAC,EAAE;QAAE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QAAC,SAAS,CAAC,EAAE,MAAM,CAAA;KAAE,GACjE,OAAO,CAAC;QACT,GAAG,EAAE,MAAM,CAAC;QACZ,QAAQ,EAAE,MAAM,CAAC;QACjB,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,UAAU,EAAE,MAAM,CAAC;QACnB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QAChC,SAAS,CAAC,EAAE,MAAM,CAAC;KACpB,CAAC,CAAC;CACJ;AAED,MAAM,WAAW,oBAAoB;IACnC,gBAAgB,CACd,GAAG,EAAE,MAAM,EACX,OAAO,CAAC,EAAE;QAAE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QAAC,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAAC,WAAW,CAAC,EAAE,MAAM,CAAC;QAAC,UAAU,CAAC,EAAE,OAAO,CAAC;QAAC,OAAO,CAAC,EAAE,aAAa,EAAE,CAAC;QAAC,MAAM,CAAC,EAAE,MAAM,CAAA;KAAE,GAChK,OAAO,CAAC,cAAc,CAAC,CAAC;CAC5B;AAED,UAAU,WAAW;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,OAAO,CAAC;CAC3B;AAED,qBAAa,WAAW;IAIpB,OAAO,CAAC,QAAQ,CAAC,UAAU;IAC3B,OAAO,CAAC,QAAQ,CAAC,WAAW;IAJ9B,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAkC;gBAGzC,UAAU,EAAE,UAAU,EACtB,WAAW,EAAE,oBAAoB;IAG9C,KAAK,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,kBAAuB,GAAG,OAAO,CAAC,cAAc,CAAC;IAuFnF,cAAc,CAAC,MAAM,EAAE,MAAM,GAAG,WAAW,GAAG,SAAS;IAIvD,OAAO,CAAC,WAAW;IASnB,OAAO,CAAC,gBAAgB;CAczB"}
@@ -11,11 +11,29 @@ export class SmartRouter {
11
11
  this.browserPool = browserPool;
12
12
  }
13
13
  async fetch(url, options = {}) {
14
- const { renderJs = 'auto', useAuth = false, headers, screenshot, actions } = options;
14
+ const { renderJs = 'auto', useAuth = false, headers, screenshot, actions, mode } = options;
15
15
  const config = getConfig();
16
16
  const logger = createLogger('fetch');
17
17
  const threshold = config.browserFallbackThreshold;
18
18
  const domain = new URL(url).hostname;
19
+ // Fast mode: HTTP-only with tight timeout, never escalates to a browser.
20
+ if (mode === 'fast') {
21
+ if (actions && actions.length > 0) {
22
+ logger.warn('mode=fast ignores browser actions; switch to balanced/deep to execute them', {
23
+ url,
24
+ actionCount: actions.length,
25
+ });
26
+ }
27
+ logger.debug('routing to http (fast)', { url });
28
+ const result = await this.httpClient.fetch(url, {
29
+ headers,
30
+ timeoutMs: config.fastTimeoutMs,
31
+ });
32
+ this.ensureStats(domain);
33
+ const raw = this.toRawFetchResult(result);
34
+ raw.jsRequired = contentAppearsEmpty(result.html);
35
+ return raw;
36
+ }
19
37
  // Actions always force Playwright --- actions need a live browser page
20
38
  if (actions && actions.length > 0) {
21
39
  const authOptions = useAuth ? (await getAuthOptions() ?? {}) : {};
@@ -1 +1 @@
1
- {"version":3,"file":"router.js","sourceRoot":"","sources":["../../src/fetch/router.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,cAAc,EAAE,MAAM,WAAW,CAAC;AAuC3C,MAAM,OAAO,WAAW;IAIH;IACA;IAJF,SAAS,GAAG,IAAI,GAAG,EAAuB,CAAC;IAE5D,YACmB,UAAsB,EACtB,WAAiC;QADjC,eAAU,GAAV,UAAU,CAAY;QACtB,gBAAW,GAAX,WAAW,CAAsB;IACjD,CAAC;IAEJ,KAAK,CAAC,KAAK,CAAC,GAAW,EAAE,UAA8B,EAAE;QACvD,MAAM,EAAE,QAAQ,GAAG,MAAM,EAAE,OAAO,GAAG,KAAK,EAAE,OAAO,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC;QACrF,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;QAC3B,MAAM,MAAM,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;QACrC,MAAM,SAAS,GAAG,MAAM,CAAC,wBAAwB,CAAC;QAClD,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;QAErC,uEAAuE;QACvE,IAAI,OAAO,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAClC,MAAM,WAAW,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAClE,MAAM,CAAC,KAAK,CAAC,uBAAuB,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,iBAAiB,EAAE,CAAC,CAAC;YAC1E,OAAO,IAAI,CAAC,WAAW,CAAC,gBAAgB,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,WAAW,EAAE,CAAC,CAAC;QAClG,CAAC;QAED,kDAAkD;QAClD,IAAI,QAAQ,KAAK,QAAQ,IAAI,OAAO,EAAE,CAAC;YACrC,MAAM,WAAW,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAClE,MAAM,CAAC,KAAK,CAAC,uBAAuB,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,kBAAkB,EAAE,CAAC,CAAC;YAC9F,OAAO,IAAI,CAAC,WAAW,CAAC,gBAAgB,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,UAAU,EAAE,GAAG,WAAW,EAAE,CAAC,CAAC;QACzF,CAAC;QAED,yBAAyB;QACzB,IAAI,QAAQ,KAAK,OAAO,EAAE,CAAC;YACzB,MAAM,CAAC,KAAK,CAAC,yBAAyB,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;YACjD,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;YAC7D,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;YACzB,OAAO,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC;QACvC,CAAC;QAED,yDAAyD;QACzD,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;QAEvC,IAAI,KAAK,CAAC,gBAAgB,EAAE,CAAC;YAC3B,MAAM,CAAC,KAAK,CAAC,uCAAuC,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,CAAC,CAAC;YACvE,OAAO,IAAI,CAAC,WAAW,CAAC,gBAAgB,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC,CAAC;QACzE,CAAC;QAED,iBAAiB;QACjB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;YAE7D,sCAAsC;YACtC,IAAI,mBAAmB,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC;gBACrC,MAAM,CAAC,IAAI,CAAC,mDAAmD,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,CAAC,CAAC;gBAClF,KAAK,CAAC,gBAAgB,GAAG,IAAI,CAAC;gBAC9B,OAAO,IAAI,CAAC,WAAW,CAAC,gBAAgB,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC,CAAC;YACzE,CAAC;YAED,OAAO,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC;QACvC,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,KAAK,CAAC,YAAY,EAAE,CAAC;YACrB,MAAM,CAAC,IAAI,CAAC,mBAAmB,EAAE;gBAC/B,GAAG;gBACH,MAAM;gBACN,YAAY,EAAE,KAAK,CAAC,YAAY;gBAChC,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;aACxD,CAAC,CAAC;YAEH,IAAI,KAAK,CAAC,YAAY,IAAI,SAAS,EAAE,CAAC;gBACpC,MAAM,CAAC,IAAI,CAAC,0DAA0D,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC;gBACpG,KAAK,CAAC,gBAAgB,GAAG,IAAI,CAAC;gBAC9B,OAAO,IAAI,CAAC,WAAW,CAAC,gBAAgB,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC,CAAC;YACzE,CAAC;YAED,MAAM,GAAG,CAAC;QACZ,CAAC;IACH,CAAC;IAED,cAAc,CAAC,MAAc;QAC3B,OAAO,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IACpC,CAAC;IAEO,WAAW,CAAC,MAAc;QAChC,IAAI,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACvC,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,KAAK,GAAG,EAAE,YAAY,EAAE,CAAC,EAAE,gBAAgB,EAAE,KAAK,EAAE,CAAC;YACrD,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;QACpC,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IAEO,gBAAgB,CACtB,MAAgD;QAEhD,OAAO;YACL,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,MAAM,EAAE,MAAM;YACd,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,SAAS,EAAE,MAAM,CAAC,SAAS;SAC5B,CAAC;IACJ,CAAC;CACF"}
1
+ {"version":3,"file":"router.js","sourceRoot":"","sources":["../../src/fetch/router.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,cAAc,EAAE,MAAM,WAAW,CAAC;AAwC3C,MAAM,OAAO,WAAW;IAIH;IACA;IAJF,SAAS,GAAG,IAAI,GAAG,EAAuB,CAAC;IAE5D,YACmB,UAAsB,EACtB,WAAiC;QADjC,eAAU,GAAV,UAAU,CAAY;QACtB,gBAAW,GAAX,WAAW,CAAsB;IACjD,CAAC;IAEJ,KAAK,CAAC,KAAK,CAAC,GAAW,EAAE,UAA8B,EAAE;QACvD,MAAM,EAAE,QAAQ,GAAG,MAAM,EAAE,OAAO,GAAG,KAAK,EAAE,OAAO,EAAE,UAAU,EAAE,OAAO,EAAE,IAAI,EAAE,GAAG,OAAO,CAAC;QAC3F,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;QAC3B,MAAM,MAAM,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;QACrC,MAAM,SAAS,GAAG,MAAM,CAAC,wBAAwB,CAAC;QAClD,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;QAErC,yEAAyE;QACzE,IAAI,IAAI,KAAK,MAAM,EAAE,CAAC;YACpB,IAAI,OAAO,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAClC,MAAM,CAAC,IAAI,CAAC,4EAA4E,EAAE;oBACxF,GAAG;oBACH,WAAW,EAAE,OAAO,CAAC,MAAM;iBAC5B,CAAC,CAAC;YACL,CAAC;YACD,MAAM,CAAC,KAAK,CAAC,wBAAwB,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;YAChD,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,EAAE;gBAC9C,OAAO;gBACP,SAAS,EAAE,MAAM,CAAC,aAAa;aAChC,CAAC,CAAC;YACH,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;YACzB,MAAM,GAAG,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC;YAC1C,GAAG,CAAC,UAAU,GAAG,mBAAmB,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YAClD,OAAO,GAAG,CAAC;QACb,CAAC;QAED,uEAAuE;QACvE,IAAI,OAAO,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAClC,MAAM,WAAW,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAClE,MAAM,CAAC,KAAK,CAAC,uBAAuB,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,iBAAiB,EAAE,CAAC,CAAC;YAC1E,OAAO,IAAI,CAAC,WAAW,CAAC,gBAAgB,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,WAAW,EAAE,CAAC,CAAC;QAClG,CAAC;QAED,kDAAkD;QAClD,IAAI,QAAQ,KAAK,QAAQ,IAAI,OAAO,EAAE,CAAC;YACrC,MAAM,WAAW,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAClE,MAAM,CAAC,KAAK,CAAC,uBAAuB,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,kBAAkB,EAAE,CAAC,CAAC;YAC9F,OAAO,IAAI,CAAC,WAAW,CAAC,gBAAgB,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,UAAU,EAAE,GAAG,WAAW,EAAE,CAAC,CAAC;QACzF,CAAC;QAED,yBAAyB;QACzB,IAAI,QAAQ,KAAK,OAAO,EAAE,CAAC;YACzB,MAAM,CAAC,KAAK,CAAC,yBAAyB,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;YACjD,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;YAC7D,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;YACzB,OAAO,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC;QACvC,CAAC;QAED,yDAAyD;QACzD,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;QAEvC,IAAI,KAAK,CAAC,gBAAgB,EAAE,CAAC;YAC3B,MAAM,CAAC,KAAK,CAAC,uCAAuC,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,CAAC,CAAC;YACvE,OAAO,IAAI,CAAC,WAAW,CAAC,gBAAgB,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC,CAAC;QACzE,CAAC;QAED,iBAAiB;QACjB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;YAE7D,sCAAsC;YACtC,IAAI,mBAAmB,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC;gBACrC,MAAM,CAAC,IAAI,CAAC,mDAAmD,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,CAAC,CAAC;gBAClF,KAAK,CAAC,gBAAgB,GAAG,IAAI,CAAC;gBAC9B,OAAO,IAAI,CAAC,WAAW,CAAC,gBAAgB,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC,CAAC;YACzE,CAAC;YAED,OAAO,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC;QACvC,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,KAAK,CAAC,YAAY,EAAE,CAAC;YACrB,MAAM,CAAC,IAAI,CAAC,mBAAmB,EAAE;gBAC/B,GAAG;gBACH,MAAM;gBACN,YAAY,EAAE,KAAK,CAAC,YAAY;gBAChC,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;aACxD,CAAC,CAAC;YAEH,IAAI,KAAK,CAAC,YAAY,IAAI,SAAS,EAAE,CAAC;gBACpC,MAAM,CAAC,IAAI,CAAC,0DAA0D,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC;gBACpG,KAAK,CAAC,gBAAgB,GAAG,IAAI,CAAC;gBAC9B,OAAO,IAAI,CAAC,WAAW,CAAC,gBAAgB,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC,CAAC;YACzE,CAAC;YAED,MAAM,GAAG,CAAC;QACZ,CAAC;IACH,CAAC;IAED,cAAc,CAAC,MAAc;QAC3B,OAAO,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IACpC,CAAC;IAEO,WAAW,CAAC,MAAc;QAChC,IAAI,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACvC,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,KAAK,GAAG,EAAE,YAAY,EAAE,CAAC,EAAE,gBAAgB,EAAE,KAAK,EAAE,CAAC;YACrD,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;QACpC,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IAEO,gBAAgB,CACtB,MAAgD;QAEhD,OAAO;YACL,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,MAAM,EAAE,MAAM;YACd,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,SAAS,EAAE,MAAM,CAAC,SAAS;SAC5B,CAAC;IACJ,CAAC;CACF"}
@@ -14,16 +14,16 @@
14
14
  * Parameter schemas (types, enums, required/optional) belong on the JSON
15
15
  * Schema, not here. Installation/configuration is for humans, not LLMs.
16
16
  */
17
- export declare const WIGOLO_INSTRUCTIONS = "Wigolo is a local-first web access layer: search the open web, fetch pages, crawl sites, extract structured data, find related content, run multi-step research, and execute agent-driven data gathering. All results land in a local knowledge cache that persists across sessions.\n\n## Host-LLM synthesis pattern (read this first)\n\nWigolo has no internal LLM. It returns *structured evidence* so YOU (the host LLM) write the final answer. Fold structure into your reply:\n\n- `search` `format: \"highlights\"` \u2192 ML-scored passages + `citations`. Quote [N].\n- `research` \u2192 `brief` with `topics`, `highlights`, `key_findings`, `sections` when sampling unavailable. Use `sections.overview.cross_references` for corroborated findings, `sections.gaps` for coverage limits, `sections.comparison` for entity-vs-entity analysis. `query_type` indicates decomposition strategy used.\n- `find_similar` \u2192 `cold_start` string when local signals are weak. Pass to user verbatim.\n- `extract` `mode: \"structured\"` \u2192 tables + definitions + jsonld + chart_hints + key_value_pairs in one call.\n- `fetch` metadata \u2192 `og_type`, `canonical_url`, `og_image` when present.\n\n## When to use which tool\n\n- `search` -- you need information on a topic but do not have a URL yet. Pass a query string or an array of 3-5 semantically varied keyword forms for broader coverage.\n- `fetch` -- you already have a specific URL to read.\n- `crawl` -- you need multiple pages from the same site (docs, wikis, references).\n- `cache` -- you want to know if the content is already on disk from an earlier read.\n- `extract` -- you need specific data points (tables, metadata, schema-shaped fields) rather than a whole page as markdown.\n- `find_similar` -- you have a URL or concept and want related content from the cache or web. Useful for \"more like this\" discovery.\n- `research` -- you have a complex question that needs multi-step investigation: question decomposition, parallel search, source synthesis into a report. Set `depth` to control thoroughness.\n- `agent` -- you need to gather structured or unstructured data from multiple sources based on a natural-language prompt. Provides full step transparency.\n\n## Routing by intent\n\n| Intent | Tool | Key parameters |\n|--------|------|----------------|\n| Documentation lookup | `search` | `include_domains: [\"react.dev\", \"nextjs.org\"]` -- scope to the project's official site, do not rely on `category: \"docs\"` alone |\n| Error debugging | `search` | exact error string as query, `category: \"code\"` (no domain scoping -- errors appear everywhere) |\n| Library research | `crawl` | seed URL of docs site, `strategy: \"sitemap\"`, then `cache` for later queries |\n| Related content | `find_similar` | `url` of a known good page, or `concept` as free text |\n| Direct quote | `search` | `format: \"highlights\"` returns ML-scored passages with citations; cite [N] in your reply |\n| Direct answer | `search` | `format: \"answer\"` if client supports sampling, else falls back to `highlights` (not plain context) |\n| Comprehensive research | `research` | `depth: \"comprehensive\"`, optional `include_domains` to scope |\n| Data gathering | `agent` | natural-language `prompt`, optional `schema` for structured output |\n| Structured extraction | `extract` | `mode: \"structured\"` (tables + dl + JSON-LD + chart hints + kv pairs), or `mode: \"schema\"` with a JSON Schema |\n| Site inventory | `crawl` | `strategy: \"map\"` for URL-only discovery, no content fetched |\n\n## Rapidly changing content\n\nFor news, prices, status pages, or release notes, bypass the cache with `force_refresh: true`:\n\n search({ query: \"...\", force_refresh: true })\n fetch({ url: \"...\", force_refresh: true })\n\nFor docs, tutorials, and reference pages, let the cache work -- much faster.\n\n## Check the cache before going to the network\n\nBefore every `search` or `fetch`, consider a `cache` call. Pages read this session or earlier return instantly with full markdown -- no network. `research` and `agent` check the cache internally.\n\n## Multi-query search strategy\n\nFor broad queries, pass an array of 3-5 semantically varied keyword forms rather than one natural-language question. Example: instead of \"how does React handle state management\", pass `[\"react state management\", \"useState useReducer\", \"react hooks state\", \"react context vs redux\"]`. Sub-queries are deduplicated automatically.\n\n## Pick the right strategy\n\n- For docs sites, prefer `crawl` with `strategy: \"sitemap\"` -- faster and more complete than BFS.\n- For URL discovery only, use `crawl` with `strategy: \"map\"` -- URLs only, no content. Follow with targeted `fetch` calls.\n- For structured data (prices, specs, table rows), use `extract` with `mode: \"schema\"` or `mode: \"tables\"`. Use `fetch` only when you want the whole page as markdown.\n- For multi-source synthesis, use `research` instead of chaining `search` + `fetch` manually.\n- For natural-language data gathering, use `agent` with optional `schema`.\n- `crawl` accepts regex `include_patterns` and `exclude_patterns` to stay inside a section of a large site.\n\n## Scope searches by domain\n\nFor library/framework/SDK queries, **always pass `include_domains`** with official sites. Unscoped queries return generic noise. `category: \"docs\"` alone returns generic portals -- pair with `include_domains` or omit. Skip domain scoping for error strings, broad exploration, and news.\n\n## Performance\n\n- `max_results: 3` for focused lookups; `5` default; `10+` only for broad research.\n- `max_content_chars: 3000` on `search` or `fetch` smart-truncates each result's markdown at a paragraph/heading boundary with a `[... content truncated]` marker. Keeps context compact for AI agents. Prefer this over raw `max_chars` slicing.\n- `fetch` with `section: \"Heading Name\"` returns content under that heading -- cheaper than the whole page.\n- Repeated fetches of the same URL are free (local cache).\n- `research` with `depth: \"quick\"` (~15s) suits most factual questions; reserve `\"comprehensive\"` for deep investigation.\n- `agent` respects `max_pages` (default 10) and `max_time_ms` (default 60s).\n\n## Extras\n\n- Localhost URLs (`localhost:3000`, `127.0.0.1:8080`) work for local dev servers.\n- `use_auth: true` on `fetch`/`crawl` reuses browser session for logged-in pages.\n- `cache` supports full-text search syntax (`AND`, `OR`, `NOT`, `\"phrase\"`).\n- `research`/`agent` use MCP sampling when supported; fall back to structured data for host-LLM synthesis.";
17
+ export declare const WIGOLO_INSTRUCTIONS = "Wigolo is a local-first web access layer: search the open web, fetch pages, crawl sites, extract structured data, find related content, run multi-step research, and execute agent-driven data gathering. All results land in a local knowledge cache that persists across sessions.\n\n## Host-LLM synthesis pattern (read this first)\n\nWigolo has no internal LLM. It returns *structured evidence* so YOU (the host LLM) write the final answer. Fold structure into your reply:\n\n- `search` \u2192 evidence (title/url/section_heading/excerpt/score/citation_id/source_span) + citations. Quote [N] or {citation_id}.\n- `format: 'answer'|'stream_answer'` \u2192 LLM synthesis when sampling supported; else evidence fallback.\n- `max_tokens_out` caps total output (cl100k-base, ~5-15% drift on non-OpenAI). `include_full_markdown: true` restores full body. `citation_format`: `'numbered'`|`'json'`|`'anthropic_tags'`.\n- `research` \u2192 `brief` with `topics`, `highlights`, `key_findings`, `sections` when sampling unavailable. Use `sections.overview.cross_references` for corroborated findings, `sections.gaps` for coverage limits, `sections.comparison` for entity-vs-entity analysis. `query_type` indicates decomposition strategy used.\n- `find_similar` \u2192 `cold_start` string when local signals are weak. Pass to user verbatim.\n- `extract` `mode: \"structured\"` \u2192 tables + definitions + jsonld + chart_hints + key_value_pairs in one call.\n- `fetch` metadata \u2192 `og_type`, `canonical_url`, `og_image` when present.\n\n## When to use which tool\n\n- `search` -- you need information on a topic but do not have a URL yet. Pass a query string or an array of 3-5 semantically varied keyword forms for broader coverage.\n- `fetch` -- you already have a specific URL to read.\n- `crawl` -- you need multiple pages from the same site (docs, wikis, references).\n- `cache` -- you want to know if the content is already on disk from an earlier read.\n- `extract` -- you need specific data points (tables, metadata, schema-shaped fields) rather than a whole page as markdown.\n- `find_similar` -- you have a URL or concept and want related content from the cache or web. Useful for \"more like this\" discovery.\n- `research` -- you have a complex question that needs multi-step investigation: question decomposition, parallel search, source synthesis into a report. Set `depth` to control thoroughness.\n- `agent` -- you need to gather structured or unstructured data from multiple sources based on a natural-language prompt. Provides full step transparency.\n\n## Routing by intent\n\n| Intent | Tool | Key parameters |\n|--------|------|----------------|\n| Documentation lookup | `search` | `include_domains: [\"react.dev\", \"nextjs.org\"]` -- scope to the project's official site, do not rely on `category: \"docs\"` alone |\n| Error debugging | `search` | exact error string as query, `category: \"code\"` (no domain scoping -- errors appear everywhere) |\n| Library research | `crawl` | seed URL of docs site, `strategy: \"sitemap\"`, then `cache` for later queries |\n| Related content | `find_similar` | `url` of a known good page, or `concept` as free text |\n| Evidence excerpt | `search` | default output; cite [N] or {citation_id} from each evidence item |\n| Direct answer | `search` | `format: \"answer\"` if client supports sampling, else falls back to evidence |\n| Comprehensive research | `research` | `depth: \"comprehensive\"`, optional `include_domains` to scope |\n| Data gathering | `agent` | natural-language `prompt`, optional `schema` for structured output |\n| Structured extraction | `extract` | `mode: \"structured\"` (tables + dl + JSON-LD + chart hints + kv pairs), or `mode: \"schema\"` with a JSON Schema |\n| Site inventory | `crawl` | `strategy: \"map\"` for URL-only discovery, no content fetched |\n\n## Rapidly changing content\n\nFor news, prices, status pages, or release notes, bypass the cache with `force_refresh: true`:\n\n search({ query: \"...\", force_refresh: true })\n fetch({ url: \"...\", force_refresh: true })\n\nFor docs, tutorials, and reference pages, let the cache work -- much faster.\n\n## Check the cache before going to the network\n\nBefore every `search` or `fetch`, consider a `cache` call. Pages read this session or earlier return instantly with full markdown -- no network. `research` and `agent` check the cache internally.\n\n## Multi-query search strategy\n\nFor broad queries, pass an array of 3-5 semantically varied keyword forms rather than one natural-language question. Example: instead of \"how does React handle state management\", pass `[\"react state management\", \"useState useReducer\", \"react hooks state\", \"react context vs redux\"]`. Sub-queries are deduplicated automatically.\n\n## Pick the right strategy\n\n- For docs sites, prefer `crawl` with `strategy: \"sitemap\"` -- faster and more complete than BFS.\n- For URL discovery only, use `crawl` with `strategy: \"map\"` -- URLs only, no content. Follow with targeted `fetch` calls.\n- For structured data (prices, specs, table rows), use `extract` with `mode: \"schema\"` or `mode: \"tables\"`. Use `fetch` only when you want the whole page as markdown.\n- For multi-source synthesis, use `research` instead of chaining `search` + `fetch` manually.\n- For natural-language data gathering, use `agent` with optional `schema`.\n- `crawl` accepts regex `include_patterns` and `exclude_patterns` to stay inside a section of a large site.\n\n## Scope searches by domain\n\nFor library/framework/SDK queries, **always pass `include_domains`** with official sites. Unscoped queries return generic noise. `category: \"docs\"` alone returns generic portals -- pair with `include_domains` or omit. Skip domain scoping for error strings, broad exploration, and news.\n\n## Performance\n\n- `max_results: 3` for focused lookups; `5` default; `10+` only for broad research.\n- `max_tokens_out` caps total response size (cl100k-base BPE); prefer this over `max_chars` for budget-aware agents. When both are set, `max_tokens_out` wins.\n- `max_content_chars: 3000` remains a legitimate per-page budget \u2014 smart-truncates each result's markdown at a paragraph/heading boundary with a `[... content truncated]` marker.\n- `fetch` with `section: \"Heading Name\"` returns content under that heading -- cheaper than the whole page.\n- Repeated fetches of the same URL are free (local cache).\n- `research` with `depth: \"quick\"` (~15s) suits most factual questions; reserve `\"comprehensive\"` for deep investigation.\n- `agent` respects `max_pages` (default 10) and `max_time_ms` (default 60s).\n\n## Extras\n\n- Localhost URLs (`localhost:3000`, `127.0.0.1:8080`) work for local dev servers.\n- `use_auth: true` on `fetch`/`crawl` reuses browser session for logged-in pages.\n- `cache` supports full-text search syntax (`AND`, `OR`, `NOT`, `\"phrase\"`).\n- `research`/`agent` use MCP sampling when supported; fall back to structured data for host-LLM synthesis.";
18
18
  export declare const TOOL_DESCRIPTIONS: {
19
- readonly fetch: "Fetch a single URL and return clean markdown. Use when you have a specific URL to read. Automatically detects if JavaScript rendering is needed.\n\nKey parameters:\n- section: extract content under a specific heading (e.g., section: \"API Reference\") -- faster than reading the whole page\n- max_content_chars: smart-truncate markdown at a paragraph/heading boundary with a `[... content truncated]` marker (e.g., 3000 for compact context). Preferred over max_chars for AI agents.\n- use_auth: true to use stored browser session for authenticated/private pages\n- render_js: \"auto\" (default, detects JS need), \"always\" (force browser), \"never\" (HTTP only, fastest)\n- headers: custom HTTP headers if needed\n- force_refresh: true to bypass cache and fetch fresh content from the network\n\nReturns title, markdown, links, images, metadata (og_image, og_type, canonical_url, keywords). Decorative images filtered, relative URLs resolved. Cached locally; repeat fetches are instant. Localhost URLs work.\n\nUse force_refresh: true for frequently changing content. Default serves from cache.";
20
- readonly search: "Search the web and return full markdown content from top results. Returns extracted page content, not just snippets.\n\nKey parameters:\n- query: string or string[] array (3-5 keyword variants for broader coverage; deduplicated automatically)\n- include_domains/exclude_domains: scope to specific sites. ALWAYS scope library/framework queries.\n- category: \"general\" | \"news\" | \"code\" | \"docs\" | \"papers\" — coarse filter, pair with include_domains.\n- from_date/to_date: ISO YYYY-MM-DD for time-bounded queries\n- max_results: default 5; use 3 for focused, 10+ for research\n- format: \"full\" (default), \"context\", \"highlights\" (ML-scored passages + [N] citations), \"answer\" (sampling synthesis; falls back to highlights), \"stream_answer\"\n- max_highlights: cap highlights count (default 10)\n- max_content_chars: smart-truncate markdown at paragraph boundary (e.g., 3000)\n- force_refresh: true to bypass all caches\n\n\"answer\" falls back to \"highlights\" when sampling unsupported (most clients). Results include title, URL, relevance_score, and full markdown_content. Cache serves previously fetched pages instantly.";
21
- readonly crawl: "Crawl a website starting from a URL and return content from multiple pages. Use for indexing documentation sites, wikis, or any multi-page resource.\n\nKey parameters:\n- strategy: \"bfs\" (breadth-first, default), \"dfs\" (depth-first), \"sitemap\" (use sitemap.xml -- fastest for doc sites), \"map\" (URL discovery only, no content -- fastest for scoping a site)\n- max_depth: how many links deep to follow (default 2)\n- max_pages: maximum pages to fetch (default 20)\n- include_patterns/exclude_patterns: regex filters on URLs\n\nReturns an array of pages with title, markdown, and depth. Content is deduplicated across pages (repeated nav/headers/footers stripped). All pages are cached for later cache queries.";
19
+ readonly fetch: "Fetch a single URL and return clean markdown. Use when you have a specific URL to read. Automatically detects if JavaScript rendering is needed.\n\nKey parameters:\n- section: extract content under a specific heading (e.g., section: \"API Reference\") -- faster than reading the whole page\n- max_content_chars: smart-truncate markdown at a paragraph/heading boundary with a `[... content truncated]` marker (e.g., 3000 for compact context). Preferred over max_chars for AI agents.\n- max_tokens_out: token-budget cap on total output (cl100k-base BPE). Takes precedence over max_chars when both are set.\n- include_full_markdown: default false. Set true to include the full markdown body in addition to evidence excerpts.\n- citation_format: 'numbered' (default) | 'json' | 'anthropic_tags'.\n- use_auth: true to use stored browser session for authenticated/private pages\n- render_js: \"auto\" (default, detects JS need), \"always\" (force browser), \"never\" (HTTP only, fastest)\n- headers: custom HTTP headers if needed\n- force_refresh: true to bypass cache and fetch fresh content from the network\n- mode: 'fast' | 'balanced' (default) | 'deep'. fast=HTTP-only, accepts cache up to 24h stale. deep=full render + freshness.\n\nReturns title, markdown, links, images, metadata (og_image, og_type, canonical_url, keywords). Cached locally; repeat fetches are instant. Localhost URLs work.";
20
+ readonly search: "Search the web and return scored evidence excerpts (title/url/section_heading/excerpt/score/citation_id/source_span) plus citations. Default shape is evidence-only no full markdown body.\n\nKey parameters:\n- query: string or string[] array (3-5 keyword variants; deduplicated automatically)\n- include_domains/exclude_domains: scope to specific sites. ALWAYS scope library/framework queries.\n- category: \"general\" | \"news\" | \"code\" | \"docs\" | \"papers\" — coarse filter, pair with include_domains.\n- from_date/to_date: ISO YYYY-MM-DD for time-bounded queries\n- max_results: default 5; use 3 for focused, 10+ for research\n- format: omit for default evidence shape. 'answer'/'stream_answer' = sampling synthesis (falls back to evidence). Retired values 'full'/'context'/'highlights' reject with a migration error.\n- max_tokens_out: token-budget cap on total output (cl100k-base; wins over max_chars).\n- include_full_markdown: true to restore full markdown body alongside evidence (default false).\n- citation_format: 'numbered' (default) | 'json' | 'anthropic_tags'.\n- max_content_chars: smart-truncate per-page markdown at paragraph boundary (e.g., 3000)\n- force_refresh: true to bypass all caches\n- mode: 'fast' | 'balanced' (default) | 'deep'. fast=single-engine, no rerank, 24h-stale cache. deep=multi-query expansion + full-body top-K.\n\nQuote [N] or {citation_id} from the evidence list.";
21
+ readonly crawl: "Crawl a website starting from a URL and return content from multiple pages. Use for indexing documentation sites, wikis, or any multi-page resource.\n\nKey parameters:\n- strategy: \"bfs\" (breadth-first, default), \"dfs\" (depth-first), \"sitemap\" (use sitemap.xml -- fastest for doc sites), \"map\" (URL discovery only, no content -- fastest for scoping a site)\n- max_depth: how many links deep to follow (default 2)\n- max_pages: maximum pages to fetch (default 20)\n- include_patterns/exclude_patterns: regex filters on URLs\n- max_tokens_out: token-budget cap on total output (cl100k-base; wins over max_chars).\n- include_full_markdown: default false — pages return evidence excerpts; set true for full bodies.\n- citation_format: 'numbered' (default) | 'json' | 'anthropic_tags'.\n\nReturns an array of pages with title, evidence, and depth. Content is deduplicated across pages. All pages are cached for later cache queries.";
22
22
  readonly cache: "Search previously fetched content without hitting the network. Use before searching the web -- if relevant content was already fetched or crawled, this returns it instantly.\n\nKey parameters:\n- query: full-text search over cached markdown and titles (supports AND, OR, NOT, \"phrase match\")\n- url_pattern: glob filter on URLs (e.g., \"*example.com*\")\n- since: ISO date -- only results cached after this date\n- stats: true to get cache size, entry count, oldest/newest dates\n- clear: true to delete matching entries\n\nReturns matching cached pages with full markdown content. Cache persists across sessions locally.";
23
23
  readonly extract: "Extract structured data from a URL or raw HTML. Use when you need specific data points, tables, or metadata rather than full page markdown.\n\nKey parameters:\n- mode: \"selector\" (CSS selector -> text), \"tables\" (HTML tables only), \"metadata\" (title/author/date/description/og_* + JSON-LD), \"schema\" (JSON Schema -> heuristic field extraction), \"structured\" (ONE-SHOT: tables + <dl> definitions + JSON-LD + chart hints from SVG/figure + microdata/data-attr/grid key-value pairs)\n- css_selector: required for mode=\"selector\" -- any valid CSS selector\n- schema: for mode=\"schema\", a JSON Schema object describing the fields to extract\n- multiple: true to return array of all matches (mode=\"selector\" only)\n\nPrefer mode=\"structured\" over chaining multiple extract calls — it returns every structured pattern on the page in one response:\n { tables, definitions, jsonld, chart_hints, key_value_pairs }\n\nchart_hints surfaces SVG titles, aria-labels, and figcaptions — host LLMs use these to describe data visualizations even when the underlying data is rendered by JavaScript.\n\nFor mode=\"tables\", returns array of table objects with headers and row data. For mode=\"schema\", pass { price: \"string\", name: \"string\" } and get structured fields extracted from the page.";
24
- readonly find_similar: "Find content related to a URL or concept. Use when you have a known-good page or topic and want to discover similar resources from the cache or web.\n\nKey parameters:\n- url: a URL to find content similar to. The page's content and embeddings are used for similarity matching.\n- concept: free-text description of what you want similar content for. Use when you do not have a specific URL.\n- max_results: number of similar items to return (default 5)\n- include_cached: true (default) to search the local cache first, false to skip cache and search the web only\n- threshold: minimum similarity score (0-1, default 0.5) -- higher values return fewer, more relevant results\n\nProvide either url or concept (not both). Results are fused from three signals via 3-way RRF: keyword match, semantic embeddings, and (if local hits are sparse) a live web search. Each result carries `match_signals` with `embedding_rank`, `fts5_rank`, and `fused_score` so you can explain ranking to the user.\n\nThe response may include a `cold_start` string when local signals are weak (empty cache, embeddings unavailable, < 20 cached pages). Pass this verbatim to the user — it explains why results came from web search and how to warm the cache.\n\nReturns results array, method used (\"hybrid\" | \"embedding\" | \"fts5\" | \"search\"), cache_hits, search_hits, embedding_available, and total_time_ms.";
25
- readonly research: "Run multi-step research on a complex question. Decomposes the question into sub-queries, searches in parallel, fetches top sources, and synthesizes a report with citations.\n\nKey parameters:\n- question: the research question to investigate\n- depth: \"quick\" (~15s, 2 sub-queries, 5-8 sources), \"standard\" (~40s, 4 sub-queries, 10-15 sources, default), \"comprehensive\" (~80s, 7 sub-queries, 20-25 sources)\n- max_sources: override the default source count for the chosen depth\n- include_domains/exclude_domains: scope research to specific sites\n- schema: optional JSON Schema -- if provided, the report is structured to extract fields matching the schema\n- stream: true to receive progress notifications as each research phase completes\n\nThe pipeline: (1) decompose question into sub-queries, (2) parallel search across sub-queries, (3) fetch and extract top unique sources, (4) synthesize report with citations from all sources, (5) optionally structure report fields if schema is provided.\n\nUses MCP requestSampling for intelligent decomposition and synthesis when available. Without sampling support (the common case), the output includes a `brief` with:\n - `topics`, `highlights` (ML-scored), `key_findings` (per-source, by relevance)\n - `query_type`: \"comparison\" | \"how-to\" | \"concept\" | \"general\"\n - `sections.overview`: top findings + cross_references (corroborated by 2+ sources)\n - `sections.comparison`: entities + comparison_points (comparison queries only)\n - `sections.gaps`: sub-queries with limited source coverage\n\nBuild your report: overview from key_findings, cross-referenced findings first (most reliable), per-topic sections, comparison table if present, then gaps and sources.\n\nReturns report (markdown), citations array, sources with full content, sub_queries used, depth level, total_time_ms, sampling_supported flag, and optional brief.";
26
- readonly agent: "Execute a natural-language data gathering task. Plans search queries and URLs from a prompt, executes them in parallel, and synthesizes results. Full step transparency.\n\nKey parameters:\n- prompt: natural-language description of what data to gather (e.g., \"find pricing for the top 5 CRM tools\")\n- urls: optional array of specific URLs to include in the gathering\n- schema: optional JSON Schema -- if provided, extracts structured data matching the schema from each page and merges results\n- max_pages: maximum pages to fetch (default 10)\n- max_time_ms: maximum execution time in milliseconds (default 60000)\n- stream: true to receive progress notifications as each step completes\n\nThe pipeline: (1) plan -- interpret prompt to determine search queries and URLs to visit, (2) execute -- run searches and fetch URLs in parallel within budget, (3) extract -- if schema provided, apply schema extraction to each page and merge, (4) synthesize -- produce natural-language or structured result.\n\nThe steps array in the output provides full transparency into every action taken (plan, search, fetch, extract, synthesize) with timing. This differentiates from black-box alternatives.\n\nUses MCP requestSampling for planning and synthesis. Without sampling support, uses keyword extraction for planning and returns raw content.\n\nReturns result (string or structured object), sources array, pages_fetched count, steps array with action/detail/time_ms, total_time_ms, and sampling_supported flag.";
24
+ readonly find_similar: "Find content related to a URL or concept. Use when you have a known-good page or topic and want to discover similar resources from the cache or web.\n\nKey parameters:\n- url: a URL to find content similar to. The page's content and embeddings are used for similarity matching.\n- concept: free-text description of what you want similar content for. Use when you do not have a specific URL.\n- max_results: number of similar items to return (default 5)\n- include_cached: true (default) to search the local cache first, false to skip cache and search the web only\n- threshold: minimum similarity score (0-1, default 0.5)\n- max_tokens_out: token-budget cap on total output (cl100k-base; wins over max_chars).\n- include_full_markdown: default false — results return evidence excerpts; set true for full bodies.\n- citation_format: 'numbered' (default) | 'json' | 'anthropic_tags'.\n\nProvide either url or concept. Results fuse three signals via 3-way RRF: keyword match, semantic embeddings, and (if local hits sparse) live web search. Each result carries `match_signals` with `embedding_rank`, `fts5_rank`, and `fused_score`.\n\nThe response may include a `cold_start` string when local signals are weak. Pass this verbatim to the user.\n\nReturns results array, method used (\"hybrid\" | \"embedding\" | \"fts5\" | \"search\"), cache_hits, search_hits, embedding_available, and total_time_ms.";
25
+ readonly research: "Run multi-step research on a complex question. Decomposes the question into sub-queries, searches in parallel, fetches top sources, and synthesizes a report with citations.\n\nKey parameters:\n- question: the research question to investigate\n- depth: \"quick\" (~15s, 2 sub-queries, 5-8 sources), \"standard\" (~40s, 4 sub-queries, 10-15 sources, default), \"comprehensive\" (~80s, 7 sub-queries, 20-25 sources)\n- max_sources: override the default source count for the chosen depth\n- include_domains/exclude_domains: scope research to specific sites\n- schema: optional JSON Schema -- structures the report to extract matching fields\n- stream: true to receive progress notifications as each phase completes\n- max_tokens_out: token-budget cap on total output (cl100k-base; wins over max_chars).\n- include_full_markdown: default false sources return evidence excerpts; set true for full bodies.\n- citation_format: 'numbered' (default) | 'json' | 'anthropic_tags'.\n\nReturns report (markdown with [N] citations), citations array, sources, sub_queries, depth, total_time_ms, sampling_supported, and brief (topics, highlights, key_findings, sections.overview/comparison/gaps).";
26
+ readonly agent: "Execute a natural-language data gathering task. Plans search queries and URLs from a prompt, executes them in parallel, and synthesizes results. Full step transparency.\n\nKey parameters:\n- prompt: natural-language description of what data to gather (e.g., \"find pricing for the top 5 CRM tools\")\n- urls: optional array of specific URLs to include in the gathering\n- schema: optional JSON Schema -- if provided, extracts structured data matching the schema from each page and merges results\n- max_pages: maximum pages to fetch (default 10)\n- max_time_ms: maximum execution time in milliseconds (default 60000)\n- stream: true to receive progress notifications as each step completes\n- max_tokens_out: token-budget cap on total output (cl100k-base; wins over max_chars).\n- include_full_markdown: default false pages return evidence excerpts; set true for full bodies.\n- citation_format: 'numbered' (default) | 'json' | 'anthropic_tags'.\n\nPipeline: (1) plan, (2) execute search+fetch in parallel within budget, (3) optional schema extraction, (4) synthesize. The steps array exposes every action with timing.\n\nUses MCP requestSampling for planning and synthesis. Without sampling support, uses keyword extraction.\n\nReturns result, sources array, pages_fetched count, steps array, total_time_ms, sampling_supported.";
27
27
  };
28
28
  export type ToolName = keyof typeof TOOL_DESCRIPTIONS;
29
29
  //# sourceMappingURL=instructions.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"instructions.d.ts","sourceRoot":"","sources":["../src/instructions.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,eAAO,MAAM,mBAAmB,u8MAkF+E,CAAC;AAEhH,eAAO,MAAM,iBAAiB;;;;;;;;;CAyHpB,CAAC;AAEX,MAAM,MAAM,QAAQ,GAAG,MAAM,OAAO,iBAAiB,CAAC"}
1
+ {"version":3,"file":"instructions.d.ts","sourceRoot":"","sources":["../src/instructions.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,eAAO,MAAM,mBAAmB,01NAqF+E,CAAC;AAEhH,eAAO,MAAM,iBAAiB;;;;;;;;;CA6HpB,CAAC;AAEX,MAAM,MAAM,QAAQ,GAAG,MAAM,OAAO,iBAAiB,CAAC"}
@@ -20,7 +20,9 @@ export const WIGOLO_INSTRUCTIONS = `Wigolo is a local-first web access layer: se
20
20
 
21
21
  Wigolo has no internal LLM. It returns *structured evidence* so YOU (the host LLM) write the final answer. Fold structure into your reply:
22
22
 
23
- - \`search\` \`format: "highlights"\` ML-scored passages + \`citations\`. Quote [N].
23
+ - \`search\` → evidence (title/url/section_heading/excerpt/score/citation_id/source_span) + citations. Quote [N] or {citation_id}.
24
+ - \`format: 'answer'|'stream_answer'\` → LLM synthesis when sampling supported; else evidence fallback.
25
+ - \`max_tokens_out\` caps total output (cl100k-base, ~5-15% drift on non-OpenAI). \`include_full_markdown: true\` restores full body. \`citation_format\`: \`'numbered'\`|\`'json'\`|\`'anthropic_tags'\`.
24
26
  - \`research\` → \`brief\` with \`topics\`, \`highlights\`, \`key_findings\`, \`sections\` when sampling unavailable. Use \`sections.overview.cross_references\` for corroborated findings, \`sections.gaps\` for coverage limits, \`sections.comparison\` for entity-vs-entity analysis. \`query_type\` indicates decomposition strategy used.
25
27
  - \`find_similar\` → \`cold_start\` string when local signals are weak. Pass to user verbatim.
26
28
  - \`extract\` \`mode: "structured"\` → tables + definitions + jsonld + chart_hints + key_value_pairs in one call.
@@ -45,8 +47,8 @@ Wigolo has no internal LLM. It returns *structured evidence* so YOU (the host LL
45
47
  | Error debugging | \`search\` | exact error string as query, \`category: "code"\` (no domain scoping -- errors appear everywhere) |
46
48
  | Library research | \`crawl\` | seed URL of docs site, \`strategy: "sitemap"\`, then \`cache\` for later queries |
47
49
  | Related content | \`find_similar\` | \`url\` of a known good page, or \`concept\` as free text |
48
- | Direct quote | \`search\` | \`format: "highlights"\` returns ML-scored passages with citations; cite [N] in your reply |
49
- | Direct answer | \`search\` | \`format: "answer"\` if client supports sampling, else falls back to \`highlights\` (not plain context) |
50
+ | Evidence excerpt | \`search\` | default output; cite [N] or {citation_id} from each evidence item |
51
+ | Direct answer | \`search\` | \`format: "answer"\` if client supports sampling, else falls back to evidence |
50
52
  | Comprehensive research | \`research\` | \`depth: "comprehensive"\`, optional \`include_domains\` to scope |
51
53
  | Data gathering | \`agent\` | natural-language \`prompt\`, optional \`schema\` for structured output |
52
54
  | Structured extraction | \`extract\` | \`mode: "structured"\` (tables + dl + JSON-LD + chart hints + kv pairs), or \`mode: "schema"\` with a JSON Schema |
@@ -85,7 +87,8 @@ For library/framework/SDK queries, **always pass \`include_domains\`** with offi
85
87
  ## Performance
86
88
 
87
89
  - \`max_results: 3\` for focused lookups; \`5\` default; \`10+\` only for broad research.
88
- - \`max_content_chars: 3000\` on \`search\` or \`fetch\` smart-truncates each result's markdown at a paragraph/heading boundary with a \`[... content truncated]\` marker. Keeps context compact for AI agents. Prefer this over raw \`max_chars\` slicing.
90
+ - \`max_tokens_out\` caps total response size (cl100k-base BPE); prefer this over \`max_chars\` for budget-aware agents. When both are set, \`max_tokens_out\` wins.
91
+ - \`max_content_chars: 3000\` remains a legitimate per-page budget — smart-truncates each result's markdown at a paragraph/heading boundary with a \`[... content truncated]\` marker.
89
92
  - \`fetch\` with \`section: "Heading Name"\` returns content under that heading -- cheaper than the whole page.
90
93
  - Repeated fetches of the same URL are free (local cache).
91
94
  - \`research\` with \`depth: "quick"\` (~15s) suits most factual questions; reserve \`"comprehensive"\` for deep investigation.
@@ -103,28 +106,33 @@ export const TOOL_DESCRIPTIONS = {
103
106
  Key parameters:
104
107
  - section: extract content under a specific heading (e.g., section: "API Reference") -- faster than reading the whole page
105
108
  - max_content_chars: smart-truncate markdown at a paragraph/heading boundary with a \`[... content truncated]\` marker (e.g., 3000 for compact context). Preferred over max_chars for AI agents.
109
+ - max_tokens_out: token-budget cap on total output (cl100k-base BPE). Takes precedence over max_chars when both are set.
110
+ - include_full_markdown: default false. Set true to include the full markdown body in addition to evidence excerpts.
111
+ - citation_format: 'numbered' (default) | 'json' | 'anthropic_tags'.
106
112
  - use_auth: true to use stored browser session for authenticated/private pages
107
113
  - render_js: "auto" (default, detects JS need), "always" (force browser), "never" (HTTP only, fastest)
108
114
  - headers: custom HTTP headers if needed
109
115
  - force_refresh: true to bypass cache and fetch fresh content from the network
116
+ - mode: 'fast' | 'balanced' (default) | 'deep'. fast=HTTP-only, accepts cache up to 24h stale. deep=full render + freshness.
110
117
 
111
- Returns title, markdown, links, images, metadata (og_image, og_type, canonical_url, keywords). Decorative images filtered, relative URLs resolved. Cached locally; repeat fetches are instant. Localhost URLs work.
112
-
113
- Use force_refresh: true for frequently changing content. Default serves from cache.`,
114
- search: `Search the web and return full markdown content from top results. Returns extracted page content, not just snippets.
118
+ Returns title, markdown, links, images, metadata (og_image, og_type, canonical_url, keywords). Cached locally; repeat fetches are instant. Localhost URLs work.`,
119
+ search: `Search the web and return scored evidence excerpts (title/url/section_heading/excerpt/score/citation_id/source_span) plus citations. Default shape is evidence-only — no full markdown body.
115
120
 
116
121
  Key parameters:
117
- - query: string or string[] array (3-5 keyword variants for broader coverage; deduplicated automatically)
122
+ - query: string or string[] array (3-5 keyword variants; deduplicated automatically)
118
123
  - include_domains/exclude_domains: scope to specific sites. ALWAYS scope library/framework queries.
119
124
  - category: "general" | "news" | "code" | "docs" | "papers" — coarse filter, pair with include_domains.
120
125
  - from_date/to_date: ISO YYYY-MM-DD for time-bounded queries
121
126
  - max_results: default 5; use 3 for focused, 10+ for research
122
- - format: "full" (default), "context", "highlights" (ML-scored passages + [N] citations), "answer" (sampling synthesis; falls back to highlights), "stream_answer"
123
- - max_highlights: cap highlights count (default 10)
124
- - max_content_chars: smart-truncate markdown at paragraph boundary (e.g., 3000)
127
+ - format: omit for default evidence shape. 'answer'/'stream_answer' = sampling synthesis (falls back to evidence). Retired values 'full'/'context'/'highlights' reject with a migration error.
128
+ - max_tokens_out: token-budget cap on total output (cl100k-base; wins over max_chars).
129
+ - include_full_markdown: true to restore full markdown body alongside evidence (default false).
130
+ - citation_format: 'numbered' (default) | 'json' | 'anthropic_tags'.
131
+ - max_content_chars: smart-truncate per-page markdown at paragraph boundary (e.g., 3000)
125
132
  - force_refresh: true to bypass all caches
133
+ - mode: 'fast' | 'balanced' (default) | 'deep'. fast=single-engine, no rerank, 24h-stale cache. deep=multi-query expansion + full-body top-K.
126
134
 
127
- "answer" falls back to "highlights" when sampling unsupported (most clients). Results include title, URL, relevance_score, and full markdown_content. Cache serves previously fetched pages instantly.`,
135
+ Quote [N] or {citation_id} from the evidence list.`,
128
136
  crawl: `Crawl a website starting from a URL and return content from multiple pages. Use for indexing documentation sites, wikis, or any multi-page resource.
129
137
 
130
138
  Key parameters:
@@ -132,8 +140,11 @@ Key parameters:
132
140
  - max_depth: how many links deep to follow (default 2)
133
141
  - max_pages: maximum pages to fetch (default 20)
134
142
  - include_patterns/exclude_patterns: regex filters on URLs
143
+ - max_tokens_out: token-budget cap on total output (cl100k-base; wins over max_chars).
144
+ - include_full_markdown: default false — pages return evidence excerpts; set true for full bodies.
145
+ - citation_format: 'numbered' (default) | 'json' | 'anthropic_tags'.
135
146
 
136
- Returns an array of pages with title, markdown, and depth. Content is deduplicated across pages (repeated nav/headers/footers stripped). All pages are cached for later cache queries.`,
147
+ Returns an array of pages with title, evidence, and depth. Content is deduplicated across pages. All pages are cached for later cache queries.`,
137
148
  cache: `Search previously fetched content without hitting the network. Use before searching the web -- if relevant content was already fetched or crawled, this returns it instantly.
138
149
 
139
150
  Key parameters:
@@ -165,11 +176,14 @@ Key parameters:
165
176
  - concept: free-text description of what you want similar content for. Use when you do not have a specific URL.
166
177
  - max_results: number of similar items to return (default 5)
167
178
  - include_cached: true (default) to search the local cache first, false to skip cache and search the web only
168
- - threshold: minimum similarity score (0-1, default 0.5) -- higher values return fewer, more relevant results
179
+ - threshold: minimum similarity score (0-1, default 0.5)
180
+ - max_tokens_out: token-budget cap on total output (cl100k-base; wins over max_chars).
181
+ - include_full_markdown: default false — results return evidence excerpts; set true for full bodies.
182
+ - citation_format: 'numbered' (default) | 'json' | 'anthropic_tags'.
169
183
 
170
- Provide either url or concept (not both). Results are fused from three signals via 3-way RRF: keyword match, semantic embeddings, and (if local hits are sparse) a live web search. Each result carries \`match_signals\` with \`embedding_rank\`, \`fts5_rank\`, and \`fused_score\` so you can explain ranking to the user.
184
+ Provide either url or concept. Results fuse three signals via 3-way RRF: keyword match, semantic embeddings, and (if local hits sparse) live web search. Each result carries \`match_signals\` with \`embedding_rank\`, \`fts5_rank\`, and \`fused_score\`.
171
185
 
172
- The response may include a \`cold_start\` string when local signals are weak (empty cache, embeddings unavailable, < 20 cached pages). Pass this verbatim to the user — it explains why results came from web search and how to warm the cache.
186
+ The response may include a \`cold_start\` string when local signals are weak. Pass this verbatim to the user.
173
187
 
174
188
  Returns results array, method used ("hybrid" | "embedding" | "fts5" | "search"), cache_hits, search_hits, embedding_available, and total_time_ms.`,
175
189
  research: `Run multi-step research on a complex question. Decomposes the question into sub-queries, searches in parallel, fetches top sources, and synthesizes a report with citations.
@@ -179,21 +193,13 @@ Key parameters:
179
193
  - depth: "quick" (~15s, 2 sub-queries, 5-8 sources), "standard" (~40s, 4 sub-queries, 10-15 sources, default), "comprehensive" (~80s, 7 sub-queries, 20-25 sources)
180
194
  - max_sources: override the default source count for the chosen depth
181
195
  - include_domains/exclude_domains: scope research to specific sites
182
- - schema: optional JSON Schema -- if provided, the report is structured to extract fields matching the schema
183
- - stream: true to receive progress notifications as each research phase completes
184
-
185
- The pipeline: (1) decompose question into sub-queries, (2) parallel search across sub-queries, (3) fetch and extract top unique sources, (4) synthesize report with citations from all sources, (5) optionally structure report fields if schema is provided.
186
-
187
- Uses MCP requestSampling for intelligent decomposition and synthesis when available. Without sampling support (the common case), the output includes a \`brief\` with:
188
- - \`topics\`, \`highlights\` (ML-scored), \`key_findings\` (per-source, by relevance)
189
- - \`query_type\`: "comparison" | "how-to" | "concept" | "general"
190
- - \`sections.overview\`: top findings + cross_references (corroborated by 2+ sources)
191
- - \`sections.comparison\`: entities + comparison_points (comparison queries only)
192
- - \`sections.gaps\`: sub-queries with limited source coverage
196
+ - schema: optional JSON Schema -- structures the report to extract matching fields
197
+ - stream: true to receive progress notifications as each phase completes
198
+ - max_tokens_out: token-budget cap on total output (cl100k-base; wins over max_chars).
199
+ - include_full_markdown: default false sources return evidence excerpts; set true for full bodies.
200
+ - citation_format: 'numbered' (default) | 'json' | 'anthropic_tags'.
193
201
 
194
- Build your report: overview from key_findings, cross-referenced findings first (most reliable), per-topic sections, comparison table if present, then gaps and sources.
195
-
196
- Returns report (markdown), citations array, sources with full content, sub_queries used, depth level, total_time_ms, sampling_supported flag, and optional brief.`,
202
+ Returns report (markdown with [N] citations), citations array, sources, sub_queries, depth, total_time_ms, sampling_supported, and brief (topics, highlights, key_findings, sections.overview/comparison/gaps).`,
197
203
  agent: `Execute a natural-language data gathering task. Plans search queries and URLs from a prompt, executes them in parallel, and synthesizes results. Full step transparency.
198
204
 
199
205
  Key parameters:
@@ -203,13 +209,14 @@ Key parameters:
203
209
  - max_pages: maximum pages to fetch (default 10)
204
210
  - max_time_ms: maximum execution time in milliseconds (default 60000)
205
211
  - stream: true to receive progress notifications as each step completes
212
+ - max_tokens_out: token-budget cap on total output (cl100k-base; wins over max_chars).
213
+ - include_full_markdown: default false — pages return evidence excerpts; set true for full bodies.
214
+ - citation_format: 'numbered' (default) | 'json' | 'anthropic_tags'.
206
215
 
207
- The pipeline: (1) plan -- interpret prompt to determine search queries and URLs to visit, (2) execute -- run searches and fetch URLs in parallel within budget, (3) extract -- if schema provided, apply schema extraction to each page and merge, (4) synthesize -- produce natural-language or structured result.
208
-
209
- The steps array in the output provides full transparency into every action taken (plan, search, fetch, extract, synthesize) with timing. This differentiates from black-box alternatives.
216
+ Pipeline: (1) plan, (2) execute search+fetch in parallel within budget, (3) optional schema extraction, (4) synthesize. The steps array exposes every action with timing.
210
217
 
211
- Uses MCP requestSampling for planning and synthesis. Without sampling support, uses keyword extraction for planning and returns raw content.
218
+ Uses MCP requestSampling for planning and synthesis. Without sampling support, uses keyword extraction.
212
219
 
213
- Returns result (string or structured object), sources array, pages_fetched count, steps array with action/detail/time_ms, total_time_ms, and sampling_supported flag.`,
220
+ Returns result, sources array, pages_fetched count, steps array, total_time_ms, sampling_supported.`,
214
221
  };
215
222
  //# sourceMappingURL=instructions.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"instructions.js","sourceRoot":"","sources":["../src/instructions.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,MAAM,CAAC,MAAM,mBAAmB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;+GAkF4E,CAAC;AAEhH,MAAM,CAAC,MAAM,iBAAiB,GAAG;IAC/B,KAAK,EAAE;;;;;;;;;;;;oFAY2E;IAElF,MAAM,EAAE;;;;;;;;;;;;;uMAa6L;IAErM,KAAK,EAAE;;;;;;;;uLAQ8K;IAErL,KAAK,EAAE;;;;;;;;;kGASyF;IAEhG,OAAO,EAAE;;;;;;;;;;;;;4LAaiL;IAE1L,YAAY,EAAE;;;;;;;;;;;;;kJAakI;IAEhJ,QAAQ,EAAE;;;;;;;;;;;;;;;;;;;;;kKAqBsJ;IAEhK,KAAK,EAAE;;;;;;;;;;;;;;;;sKAgB6J;CAC5J,CAAC"}
1
+ {"version":3,"file":"instructions.js","sourceRoot":"","sources":["../src/instructions.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,MAAM,CAAC,MAAM,mBAAmB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;+GAqF4E,CAAC;AAEhH,MAAM,CAAC,MAAM,iBAAiB,GAAG;IAC/B,KAAK,EAAE;;;;;;;;;;;;;;gKAcuJ;IAE9J,MAAM,EAAE;;;;;;;;;;;;;;;;mDAgByC;IAEjD,KAAK,EAAE;;;;;;;;;;;+IAWsI;IAE7I,KAAK,EAAE;;;;;;;;;kGASyF;IAEhG,OAAO,EAAE;;;;;;;;;;;;;4LAaiL;IAE1L,YAAY,EAAE;;;;;;;;;;;;;;;;kJAgBkI;IAEhJ,QAAQ,EAAE;;;;;;;;;;;;;gNAaoM;IAE9M,KAAK,EAAE;;;;;;;;;;;;;;;;;oGAiB2F;CAC1F,CAAC"}
package/dist/logger.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- type Module = 'fetch' | 'search' | 'crawl' | 'cache' | 'extract' | 'searxng' | 'server' | 'cli' | 'jsonld' | 'repl' | 'embedding' | 'research' | 'agent';
1
+ type Module = 'fetch' | 'search' | 'crawl' | 'cache' | 'extract' | 'searxng' | 'server' | 'cli' | 'jsonld' | 'repl' | 'embedding' | 'research' | 'agent' | 'structured-data' | 'reranker';
2
2
  export interface Logger {
3
3
  debug(msg: string, data?: Record<string, unknown>): void;
4
4
  info(msg: string, data?: Record<string, unknown>): void;
@@ -1 +1 @@
1
- {"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":"AAKA,KAAK,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,OAAO,GAAG,OAAO,GAAG,SAAS,GAAG,SAAS,GAAG,QAAQ,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,WAAW,GAAG,UAAU,GAAG,OAAO,CAAC;AASzJ,MAAM,WAAW,MAAM;IACrB,KAAK,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACzD,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACxD,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACxD,KAAK,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;CAC1D;AA+CD,wBAAgB,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAiBnD"}
1
+ {"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":"AAKA,KAAK,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,OAAO,GAAG,OAAO,GAAG,SAAS,GAAG,SAAS,GAAG,QAAQ,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,WAAW,GAAG,UAAU,GAAG,OAAO,GAAG,iBAAiB,GAAG,UAAU,CAAC;AAS1L,MAAM,WAAW,MAAM;IACrB,KAAK,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACzD,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACxD,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACxD,KAAK,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;CAC1D;AA+CD,wBAAgB,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAiBnD"}
@@ -9,7 +9,7 @@ const MIN_PHRASE_LEN = 4;
9
9
  // shape to produce the final report without needing to re-read raw sources.
10
10
  export async function buildResearchBrief(question, sources, subQueries, perSourceCharCap, totalSourcesCharCap, queryType = 'general', comparisonEntities = []) {
11
11
  const fetched = sources.filter((s) => s.fetched && s.markdown_content.length > 0);
12
- // Highlights reuse the FlashRank-or-paragraph scorer so briefs align with
12
+ // Highlights reuse the ONNX-reranker-or-paragraph scorer so briefs align with
13
13
  // whatever format='highlights' produces for single-query searches.
14
14
  const searchItems = fetched.map((s) => ({
15
15
  title: s.title,