@apmantza/greedysearch-pi 1.9.2 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/CHANGELOG.md +132 -2
  2. package/README.md +82 -47
  3. package/bin/cdp.mjs +1153 -1108
  4. package/bin/launch.mjs +9 -0
  5. package/bin/search.mjs +318 -81
  6. package/extractors/bing-copilot.mjs +48 -18
  7. package/extractors/chatgpt.mjs +553 -0
  8. package/extractors/common.mjs +213 -22
  9. package/extractors/consensus.mjs +655 -0
  10. package/extractors/consent.mjs +182 -18
  11. package/extractors/gemini.mjs +350 -217
  12. package/extractors/google-ai.mjs +129 -128
  13. package/extractors/logically.mjs +629 -0
  14. package/extractors/perplexity.mjs +547 -217
  15. package/extractors/selectors.mjs +3 -2
  16. package/extractors/semantic-scholar.mjs +219 -0
  17. package/package.json +8 -4
  18. package/skills/greedy-search/skill.md +20 -12
  19. package/src/fetcher.mjs +23 -1
  20. package/src/formatters/results.ts +185 -128
  21. package/src/search/browser-lifecycle.mjs +27 -5
  22. package/src/search/challenge-detect.mjs +205 -0
  23. package/src/search/chrome.mjs +653 -590
  24. package/src/search/constants.mjs +155 -39
  25. package/src/search/engines.mjs +114 -76
  26. package/src/search/fetch-source.mjs +566 -451
  27. package/src/search/pdf.mjs +68 -0
  28. package/src/search/progress.mjs +145 -0
  29. package/src/search/recovery.mjs +73 -45
  30. package/src/search/research.mjs +1419 -62
  31. package/src/search/scale-aware.mjs +93 -0
  32. package/src/search/simple-research.mjs +520 -0
  33. package/src/search/sources.mjs +52 -22
  34. package/src/search/synthesis-runner.mjs +105 -26
  35. package/src/search/synthesis.mjs +286 -246
  36. package/src/tools/greedy-search-handler.ts +129 -59
  37. package/src/tools/shared.ts +312 -186
  38. package/src/types.ts +110 -104
  39. package/test.mjs +537 -18
@@ -43,8 +43,9 @@ export const SELECTORS = {
43
43
  gemini: {
44
44
  input: "rich-textarea .ql-editor",
45
45
  // Language-agnostic: use Material icon data attributes (work across locales)
46
- copyButton: 'button:has(mat-icon[data-mat-icon-name="content_copy"])',
47
- sendButton: 'button:has(mat-icon[data-mat-icon-name="send"]), .send-button',
46
+ copyButton: 'button:has(mat-icon[data-mat-icon-name="copy"])',
47
+ sendButton:
48
+ 'button:has(mat-icon[data-mat-icon-name="arrow_upward"]), [data-test-id="send-button"], .send-button',
48
49
  sourcesSidebarButton: "button.legacy-sources-sidebar-button",
49
50
  sourcesExclude: ["gemini.google", "gstatic", "google.com/search"],
50
51
  citationButtonPattern: 'button[aria-label*="citation from"]',
@@ -0,0 +1,219 @@
1
+ #!/usr/bin/env node
2
+
3
+ // extractors/semantic-scholar.mjs
4
+ // Search Semantic Scholar without API keys and return paper/PDF sources for
5
+ // GreedySearch's source fetcher and research synthesizer.
6
+
7
+ import {
8
+ buildEnvelope,
9
+ cdp,
10
+ formatAnswer,
11
+ getOrOpenTab,
12
+ handleError,
13
+ logStage,
14
+ outputJson,
15
+ parseArgs,
16
+ prepareArgs,
17
+ validateQuery,
18
+ waitForSelector,
19
+ } from "./common.mjs";
20
+
21
+ const USAGE =
22
+ 'Usage: node extractors/semantic-scholar.mjs "<query>" [--tab <prefix>]\n';
23
+ const RESULT_SELECTOR = ".cl-paper-row[data-paper-id]";
24
+
25
+ function semanticScholarSearchUrl(query) {
26
+ // Semantic Scholar docs note hyphenated terms can reduce matches; use spaces.
27
+ const normalized = String(query || "").replaceAll("-", " ");
28
+ return `https://www.semanticscholar.org/search?q=${encodeURIComponent(normalized)}&sort=relevance`;
29
+ }
30
+
31
+ async function dismissCookieBanner(tab) {
32
+ await cdp([
33
+ "eval",
34
+ tab,
35
+ String.raw`
36
+ (() => {
37
+ const selectors = [
38
+ '.osano-cm-dialog__close',
39
+ '.osano-cm-denyAll',
40
+ '.osano-cm-accept-all',
41
+ 'button[aria-label*="Close" i]',
42
+ ];
43
+ for (const selector of selectors) {
44
+ const btn = document.querySelector(selector);
45
+ if (btn) { btn.click(); return selector; }
46
+ }
47
+ return null;
48
+ })()
49
+ `,
50
+ ]).catch(() => null);
51
+ }
52
+
53
+ async function extractPapers(tab, { limit = 8 } = {}) {
54
+ const raw = await cdp([
55
+ "eval",
56
+ tab,
57
+ String.raw`
58
+ ((limit) => {
59
+ function clean(value) {
60
+ return String(value || '').replace(/\s+/g, ' ').trim();
61
+ }
62
+ function absolutize(href) {
63
+ try { return new URL(href, location.href).href; } catch { return ''; }
64
+ }
65
+ function isDirectPdf(url) {
66
+ return /\.pdf(?:[?#]|$)/i.test(url) || /\/pdf\//i.test(url);
67
+ }
68
+ const rows = Array.from(document.querySelectorAll('.cl-paper-row[data-paper-id]')).slice(0, limit);
69
+ return JSON.stringify(rows.map((row, index) => {
70
+ const titleLink = row.querySelector('a[data-test-id="title-link"][href], a[href*="/paper/"][href]');
71
+ const paperUrl = absolutize(titleLink?.getAttribute('href') || '');
72
+ const title = clean(titleLink?.innerText || row.querySelector('.cl-paper-title')?.innerText || '');
73
+ const authors = Array.from(row.querySelectorAll('[data-test-id="author-list"] a, .cl-paper-authors a'))
74
+ .map((a) => clean(a.innerText))
75
+ .filter(Boolean)
76
+ .slice(0, 8);
77
+ const field = clean(row.querySelector('.cl-paper-fos')?.innerText || '');
78
+ const venue = clean(row.querySelector('[data-test-id="normalized-venue-link"], .cl-paper-venue')?.innerText || '');
79
+ const date = clean(row.querySelector('.cl-paper-pubdates')?.innerText || '');
80
+ const tldrNode = row.querySelector('.tldr-abstract-replacement');
81
+ let tldr = clean(tldrNode?.innerText || '');
82
+ tldr = tldr.replace(/^TLDR\s*/i, '').replace(/\s*Expand$/i, '').trim();
83
+ const citationNode = row.querySelector('[data-test-id="total-citations-stat"]');
84
+ const citationLabel = citationNode?.getAttribute('aria-label') || citationNode?.innerText || '';
85
+ const citationMatch = clean(citationLabel).match(/[\d,]+/);
86
+ const citationCount = citationMatch ? Number.parseInt(citationMatch[0].replace(/,/g, ''), 10) : null;
87
+ const externalLinks = Array.from(row.querySelectorAll('a[data-test-id="paper-link"][href], a.cl-paper-view-paper[href]'))
88
+ .map((a) => ({
89
+ url: absolutize(a.getAttribute('href')),
90
+ label: clean(a.innerText),
91
+ }))
92
+ .filter((link) => link.url);
93
+ const directPdf = externalLinks.find((link) => isDirectPdf(link.url));
94
+ const primaryExternal = directPdf || externalLinks[0] || null;
95
+ const sourceUrl = primaryExternal?.url || paperUrl;
96
+ return {
97
+ rank: index + 1,
98
+ paperId: row.getAttribute('data-paper-id') || '',
99
+ title,
100
+ url: sourceUrl,
101
+ semanticScholarUrl: paperUrl,
102
+ pdfUrl: directPdf?.url || '',
103
+ externalUrl: primaryExternal?.url || '',
104
+ externalLabel: primaryExternal?.label || '',
105
+ authors,
106
+ field,
107
+ venue,
108
+ date,
109
+ tldr,
110
+ citationCount,
111
+ };
112
+ }));
113
+ })(${limit})
114
+ `,
115
+ ]);
116
+ try {
117
+ return JSON.parse(raw);
118
+ } catch {
119
+ return [];
120
+ }
121
+ }
122
+
123
+ function formatPaperSummary(papers) {
124
+ if (!papers.length) return "Semantic Scholar returned no paper results.";
125
+ return papers
126
+ .map((paper) => {
127
+ const parts = [];
128
+ if (paper.authors?.length) parts.push(paper.authors.join(", "));
129
+ if (paper.venue) parts.push(paper.venue);
130
+ if (paper.date) parts.push(paper.date);
131
+ if (Number.isFinite(paper.citationCount)) {
132
+ parts.push(`${paper.citationCount.toLocaleString()} citations`);
133
+ }
134
+ const meta = parts.length ? ` — ${parts.join(" · ")}` : "";
135
+ const tldr = paper.tldr ? `\n TLDR: ${paper.tldr}` : "";
136
+ return `${paper.rank}. ${paper.title}${meta}${tldr}`;
137
+ })
138
+ .join("\n\n");
139
+ }
140
+
141
+ async function main() {
142
+ const args = await prepareArgs(process.argv.slice(2));
143
+ validateQuery(args, USAGE);
144
+ const { query, tabPrefix, short } = parseArgs(args);
145
+ const startTime = Date.now();
146
+ const mode =
147
+ process.env.GREEDY_SEARCH_VISIBLE === "1" ? "visible" : "headless";
148
+ const env = {
149
+ engine: "semantic-scholar",
150
+ mode,
151
+ blockedBy: null,
152
+ verificationResult: null,
153
+ inputReady: null,
154
+ };
155
+
156
+ try {
157
+ if (!tabPrefix) await cdp(["list"]);
158
+ const tab = await getOrOpenTab(tabPrefix);
159
+ logStage(env, "nav", startTime);
160
+ await cdp(["nav", tab, semanticScholarSearchUrl(query)], 25000);
161
+ await new Promise((r) => setTimeout(r, 800));
162
+
163
+ logStage(env, "consent", startTime);
164
+ await dismissCookieBanner(tab);
165
+
166
+ logStage(env, "results-wait", startTime);
167
+ const inputReady = await waitForSelector(tab, RESULT_SELECTOR, 15000, 500);
168
+ env.inputReady = inputReady;
169
+ if (!inputReady) {
170
+ const body = await cdp([
171
+ "eval",
172
+ tab,
173
+ "document.body?.innerText || ''",
174
+ ]).catch(() => "");
175
+ if (/captcha|cloudflare|verify|robot|blocked/i.test(body)) {
176
+ env.blockedBy = "verification";
177
+ env.verificationResult = "needs-human";
178
+ throw new Error(
179
+ "Semantic Scholar verification required — please solve it in the visible browser window",
180
+ );
181
+ }
182
+ throw new Error("Semantic Scholar results not found");
183
+ }
184
+
185
+ logStage(env, "extract", startTime);
186
+ const papers = await extractPapers(tab, { limit: short ? 5 : 8 });
187
+ const sources = papers
188
+ .filter((paper) => paper.title && paper.url)
189
+ .map((paper) => ({
190
+ title: paper.pdfUrl ? `${paper.title} (PDF)` : paper.title,
191
+ url: paper.url,
192
+ semanticScholarUrl: paper.semanticScholarUrl,
193
+ paperId: paper.paperId,
194
+ citationCount: paper.citationCount,
195
+ venue: paper.venue,
196
+ year: paper.date,
197
+ }));
198
+ const answer = formatPaperSummary(papers);
199
+ const durationMs = Date.now() - startTime;
200
+ outputJson({
201
+ answer: formatAnswer(answer, short),
202
+ sources,
203
+ query,
204
+ url: semanticScholarSearchUrl(query),
205
+ papers,
206
+ _envelope: buildEnvelope({ ...env, durationMs }),
207
+ });
208
+ } catch (error) {
209
+ handleError(
210
+ error,
211
+ buildEnvelope({
212
+ ...env,
213
+ durationMs: Date.now() - startTime,
214
+ }),
215
+ );
216
+ }
217
+ }
218
+
219
+ main();
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@apmantza/greedysearch-pi",
3
- "version": "1.9.2",
4
- "description": "Headless multi-engine AI search (Perplexity, Bing Copilot, Google AI) via browser automation -- NO API KEYS needed. Extracts answers with sources, optional synthesis. Grounded AI answers from real browser interactions.",
3
+ "version": "2.1.2",
4
+ "description": "Headless multi-engine AI search (Perplexity, Google AI, ChatGPT, Gemini) via browser automation. NO API KEYS needed. Grounded all-engine search fetches sources by default, with optional configurable synthesis and deep research.",
5
5
  "type": "module",
6
6
  "keywords": [
7
7
  "pi-package"
@@ -15,11 +15,14 @@
15
15
  "license": "MIT",
16
16
  "scripts": {
17
17
  "test": "node test.mjs",
18
+ "test:unit": "node test.mjs unit",
18
19
  "test:quick": "node test.mjs quick",
19
20
  "test:smoke": "node test.mjs smoke",
20
21
  "test:bash": "./test.sh",
21
22
  "test:bash:quick": "./test.sh quick",
22
- "test:bash:smoke": "./test.sh smoke"
23
+ "test:bash:smoke": "./test.sh smoke",
24
+ "lint": "node scripts/lint.mjs",
25
+ "check:lockfile": "node scripts/check-lockfile.mjs"
23
26
  },
24
27
  "engines": {
25
28
  "node": ">=20.11.0"
@@ -46,7 +49,8 @@
46
49
  "dependencies": {
47
50
  "@mozilla/readability": "^0.6.0",
48
51
  "@sinclair/typebox": "^0.34.48",
49
- "jsdom": "^24.0.0",
52
+ "jsdom": "^29.1.1",
53
+ "pdf-parse": "^2.4.5",
50
54
  "turndown": "^7.1.2"
51
55
  },
52
56
  "peerDependencies": {
@@ -1,12 +1,20 @@
1
- ---
2
- name: greedy-search
3
- description: Web search via Perplexity, Bing, Google AI & Gemini. Current docs, recent changes, dependency choices. NOT codebase search.
4
- ---
5
-
6
- `greedy_search({ query, engine: "all"|"perplexity"|"bing"|"google"|"gemini", depth: "fast"|"standard"|"deep"|"research", breadth: 1-5, iterations: 1-3, maxSources: 3-12, visible: bool })`
7
-
8
- **Depth:** `fast`(15-30s, no synthesis) · `standard`(30-90s, all+synthesis+sources) · `deep`(60-180s, stronger grounding) · `research`(slowest, iterative planning+follow-ups+learning extraction; uses breadth/iterations/maxSources)
9
-
10
- **Auto-recovery:** Headless default. Bing/Perplexity auto-retry visible on CF block. Manual CAPTCHA visible stays open; solve then rerun.
11
-
12
- **CDP safety:** Use `bin/cdp-greedy.mjs` only. Never raw `bin/cdp.mjs`.
1
+ ---
2
+ name: greedy-search
3
+ description: Web/search plus opt-in research via Perplexity, Google AI, ChatGPT, Gemini, Semantic Scholar, and Logically. Grounded all-engine search fetches sources by default; optional configurable synthesis; deep research as separate workflow. Configurable via ~/.pi/greedyconfig. Bing Copilot available for signed-in users. Current docs, recent changes, dependency choices. NOT codebase search.
4
+ ---
5
+
6
+ `greedy_search({ query, engine: "all"|"perplexity"|"google"|"chatgpt"|"gemini"|"semantic-scholar"|"logically"|"bing", synthesize?: bool, synthesizer?: "gemini"|"chatgpt", depth?: "research", breadth: 1-5, iterations: 1-3, maxSources: 3-12, researchOutDir?: string, writeResearchBundle?: bool, visible: bool })`
7
+
8
+ **Modes:** individual engine search · grounded `engine:"all"` search with fetched sources · optional `synthesize:true` using the configured synthesizer over all-engine results · `depth:"research"` for the iterative deep-research workflow.
9
+
10
+ **Config:** `~/.pi/greedyconfig` supports `{ "engines": ["perplexity", "google", "chatgpt", "gemini"], "synthesizer": "gemini" }` by default. `semantic-scholar` and `logically` are opt-in academic/research engines — add them to `engines` only when you want academic paper discovery or research-assistant workflows in the normal all-search fan-out. Without explicit opt-in, `engine:"all"` excludes them because their results are noisy for casual web search; they shine in `depth:"research"` mode. Any configured engine can participate in `engine:"all"`; deep research child searches reuse the same configured `engines` list and stdin-safe query passing. Normal all-search synthesis remains controlled separately by `synthesizer`; research planning/final synthesis uses Gemini.
11
+
12
+ **Compatibility:** legacy `depth:"fast"|"standard"|"deep"` is still accepted. `fast` skips source fetching; `standard`/`deep` alias `synthesize:true`. Prefer `synthesize:true`, optional `synthesizer`, and `depth:"research"` going forward.
13
+
14
+ **Research output:** `depth:"research"` writes a dataroom-style bundle by default under `.pi/greedysearch-research/<timestamp>_<query>/` with `STATUS.md`, `OUTLINE.md`, `reports/SUMMARY.md`, `reports/CLAIMS.md`, `reports/GAPS.md`, `sources/`, and `data/manifest.json`. Pass `researchOutDir` to choose the directory or `writeResearchBundle:false` to disable disk output.
15
+
16
+ **Scale-aware research:** When `breadth` and `iterations` are not explicitly set, the classifier auto-detects query complexity. Simple queries ("what is X") use a fast single-pass path (~70% faster). Moderate queries get tighter breadth/iterations. Complex queries use the full loop. Explicit `breadth`/`iterations` always override the classifier.
17
+
18
+ **Auto-recovery:** Headless default. Bing/Perplexity auto-retry visible on CF block. Manual CAPTCHA → visible stays open; solve then rerun.
19
+
20
+ **CDP safety:** Use `bin/cdp-greedy.mjs` only. Never raw `bin/cdp.mjs`.
package/src/fetcher.mjs CHANGED
@@ -63,6 +63,10 @@ const PRIVATE_URL_PATTERNS = [
63
63
  * @param {string} url - URL to check
64
64
  * @returns {{blocked: boolean, reason?: string}}
65
65
  */
66
+ export function defaultFetchHeaders(overrides = {}) {
67
+ return { ...DEFAULT_HEADERS, ...overrides };
68
+ }
69
+
66
70
  export function isPrivateUrl(url) {
67
71
  try {
68
72
  const parsed = new URL(url);
@@ -191,6 +195,21 @@ export async function fetchSourceHttp(url, options = {}) {
191
195
  const finalUrl = response.url;
192
196
  const lastModified = response.headers.get("last-modified") || "";
193
197
 
198
+ // SSRF defense: re-validate the post-redirect finalUrl. A malicious
199
+ // server could redirect our fetch to a private IP, bypassing the
200
+ // initial isPrivateUrl() check on the original URL.
201
+ const finalPrivateCheck = isPrivateUrl(finalUrl);
202
+ if (finalPrivateCheck.blocked) {
203
+ return {
204
+ ok: false,
205
+ url,
206
+ finalUrl,
207
+ status: response.status,
208
+ error: `Blocked: ${finalPrivateCheck.reason}`,
209
+ needsBrowser: false,
210
+ };
211
+ }
212
+
194
213
  // Handle raw text/plain from GitHub (raw file content)
195
214
  let isRawGitHub = false;
196
215
  try {
@@ -594,7 +613,10 @@ export function checkContentQuality(extracted) {
594
613
  desc: "access denied in content",
595
614
  },
596
615
  {
597
- check: () => /^\s{0,10}sign\s{1,5}in\s{0,10}$|^\s{0,10}log\s{1,5}in\s{0,10}$/im.test(markdown),
616
+ check: () =>
617
+ /^\s{0,10}sign\s{1,5}in\s{0,10}$|^\s{0,10}log\s{1,5}in\s{0,10}$/im.test(
618
+ markdown,
619
+ ),
598
620
  desc: "login form only",
599
621
  },
600
622
  ];
@@ -1,128 +1,185 @@
1
- /**
2
- * Search results formatters
3
- * Extracted from index.ts
4
- */
5
-
6
- import { formatEngineName } from "../utils/helpers.js";
7
- import { renderSynthesis } from "./synthesis.js";
8
-
9
- /**
10
- * Format search results based on engine type
11
- */
12
- export function formatResults(
13
- engine: string,
14
- data: Record<string, unknown>,
15
- ): string {
16
- const lines: string[] = [];
17
-
18
- if (engine === "all") {
19
- return formatAllEnginesResult(data, lines);
20
- }
21
-
22
- return formatSingleEngineResult(data, lines);
23
- }
24
-
25
- /**
26
- * Format multi-engine results with synthesis
27
- */
28
- function formatAllEnginesResult(
29
- data: Record<string, unknown>,
30
- lines: string[],
31
- ): string {
32
- const synthesis = data._synthesis as Record<string, unknown> | undefined;
33
- const dedupedSources = data._sources as
34
- | Array<Record<string, unknown>>
35
- | undefined;
36
- const needsHuman = data._needsHumanVerification as
37
- | Record<string, unknown>
38
- | undefined;
39
-
40
- if (needsHuman) {
41
- const engines = Array.isArray(needsHuman.engines)
42
- ? needsHuman.engines.join(", ")
43
- : "one or more engines";
44
- lines.push("## Manual verification required");
45
- lines.push(
46
- String(
47
- needsHuman.message ||
48
- "Visible Chrome is open. Solve the verification challenge, then rerun the same search.",
49
- ),
50
- );
51
- lines.push(`Engines: ${engines}`);
52
- lines.push("");
53
- }
54
-
55
- // If we have a synthesis answer, render it
56
- if (synthesis?.answer) {
57
- renderSynthesis(lines, synthesis, dedupedSources || [], 6);
58
- lines.push("*Synthesized from Perplexity, Bing Copilot, and Google AI*\n");
59
- return lines.join("\n").trim();
60
- }
61
-
62
- // Fallback: render individual engine results
63
- for (const [eng, result] of Object.entries(data)) {
64
- if (eng.startsWith("_")) continue;
65
- lines.push(`\n## ${formatEngineName(eng)}`);
66
- formatEngineResult(result as Record<string, unknown>, lines, 3);
67
- }
68
-
69
- return lines.join("\n").trim();
70
- }
71
-
72
- /**
73
- * Format single engine result
74
- */
75
- function formatSingleEngineResult(
76
- data: Record<string, unknown>,
77
- lines: string[],
78
- ): string {
79
- const needsHuman = data._needsHumanVerification as
80
- | Record<string, unknown>
81
- | undefined;
82
- if (needsHuman) {
83
- const engines = Array.isArray(needsHuman.engines)
84
- ? needsHuman.engines.join(", ")
85
- : "this engine";
86
- lines.push("## Manual verification required");
87
- lines.push(
88
- String(
89
- needsHuman.message ||
90
- "Visible Chrome is open. Solve the verification challenge, then rerun the same search.",
91
- ),
92
- );
93
- lines.push(`Engines: ${engines}`);
94
- lines.push("");
95
- }
96
- formatEngineResult(data, lines, 5);
97
- return lines.join("\n").trim();
98
- }
99
-
100
- /**
101
- * Format a single engine's result (answer + sources)
102
- */
103
- function formatEngineResult(
104
- data: Record<string, unknown>,
105
- lines: string[],
106
- maxSources: number,
107
- ): void {
108
- if (data.error) {
109
- lines.push(`Error: ${data.error}`);
110
- return;
111
- }
112
-
113
- if (data.answer) {
114
- lines.push(String(data.answer));
115
- }
116
-
117
- const sources = data.sources as Array<Record<string, string>> | undefined;
118
- if (Array.isArray(sources) && sources.length > 0) {
119
- lines.push("\nSources:");
120
- for (const s of sources.slice(0, maxSources)) {
121
- lines.push(`- [${s.title || s.url}](${s.url})`);
122
- }
123
- }
124
- }
125
-
126
- /**
127
- * Format deep research results with confidence metrics
128
- */
1
+ /**
2
+ * Search results formatters
3
+ * Extracted from index.ts
4
+ */
5
+
6
+ import { formatEngineName } from "../utils/helpers.js";
7
+ import { renderSynthesis } from "./synthesis.js";
8
+
9
+ /**
10
+ * Maximum line length for any text passed to the TUI. Lines longer than
11
+ * this are truncated with an ellipsis. The TUI's Text.render wraps at the
12
+ * terminal width, but it crashes with
13
+ * "Rendered line N exceeds terminal width (W > W-4)"
14
+ * when a single line is wider than its own internal render width. Long
15
+ * lines (e.g. a markdown table row inside a chatgpt synthesis answer) that
16
+ * don't have a \n break would otherwise produce this crash. The safety
17
+ * net below trims individual lines before they reach the TUI.
18
+ */
19
+ const MAX_LINE_WIDTH = 800;
20
+ function _truncateLongLines(text: string): string {
21
+ return text
22
+ .split("\n")
23
+ .map((line) =>
24
+ line.length > MAX_LINE_WIDTH
25
+ ? line.slice(0, MAX_LINE_WIDTH - 1) + "…"
26
+ : line,
27
+ )
28
+ .join("\n");
29
+ }
30
+
31
+ /**
32
+ * Format search results based on engine type
33
+ */
34
+ export function formatResults(
35
+ engine: string,
36
+ data: Record<string, unknown>,
37
+ ): string {
38
+ const lines: string[] = [];
39
+
40
+ if (engine === "all") {
41
+ return _truncateLongLines(formatAllEnginesResult(data, lines));
42
+ }
43
+
44
+ return _truncateLongLines(formatSingleEngineResult(data, lines));
45
+ }
46
+
47
+ /**
48
+ * Format multi-engine results with synthesis
49
+ */
50
+ function formatAllEnginesResult(
51
+ data: Record<string, unknown>,
52
+ lines: string[],
53
+ ): string {
54
+ const synthesis = data._synthesis as Record<string, unknown> | undefined;
55
+ const dedupedSources = data._sources as
56
+ | Array<Record<string, unknown>>
57
+ | undefined;
58
+ const needsHuman = data._needsHumanVerification as
59
+ | Record<string, unknown>
60
+ | undefined;
61
+ const research = data._research as Record<string, unknown> | undefined;
62
+
63
+ if (needsHuman) {
64
+ const engines = Array.isArray(needsHuman.engines)
65
+ ? needsHuman.engines.join(", ")
66
+ : "one or more engines";
67
+ lines.push("## Manual verification required");
68
+ lines.push(
69
+ String(
70
+ needsHuman.message ||
71
+ "Visible Chrome is open. Solve the verification challenge, then rerun the same search.",
72
+ ),
73
+ );
74
+ lines.push(`Engines: ${engines}`);
75
+ lines.push("");
76
+ }
77
+
78
+ // If we have a synthesis answer, render it
79
+ if (synthesis?.answer) {
80
+ if (research?.mode === "iterative") renderResearchHeader(lines, research);
81
+ renderSynthesis(lines, synthesis, dedupedSources || [], 6);
82
+ const synthesizedBy = String(
83
+ synthesis.synthesizedBy || "configured synthesizer",
84
+ );
85
+ lines.push(
86
+ research?.mode === "iterative"
87
+ ? "*Research mode: iterative planning, source fetching, citation audit, and bundle output*\n"
88
+ : `*Synthesized by ${synthesizedBy} from multi-engine results and fetched sources*\n`,
89
+ );
90
+ return lines.join("\n").trim();
91
+ }
92
+
93
+ // Fallback: render individual engine results
94
+ for (const [eng, result] of Object.entries(data)) {
95
+ if (eng.startsWith("_")) continue;
96
+ lines.push(`\n## ${formatEngineName(eng)}`);
97
+ formatEngineResult(result as Record<string, unknown>, lines, 3);
98
+ }
99
+
100
+ return lines.join("\n").trim();
101
+ }
102
+
103
+ function renderResearchHeader(
104
+ lines: string[],
105
+ research: Record<string, unknown>,
106
+ ): void {
107
+ const floor = research.floor as Record<string, unknown> | undefined;
108
+ const metrics = floor?.metrics as Record<string, unknown> | undefined;
109
+ const bundle = research.bundle as Record<string, unknown> | undefined;
110
+ const manifest = research.manifest as Record<string, unknown> | undefined;
111
+ lines.push("## Research Run");
112
+ lines.push(
113
+ `- Status: ${floor?.floorMet ? "floor met" : "partial / floor unmet"}`,
114
+ );
115
+ if (manifest?.terminationReason)
116
+ lines.push(`- Stop reason: ${String(manifest.terminationReason)}`);
117
+ if (metrics) {
118
+ lines.push(
119
+ `- Evidence: ${metrics.fetchedOk || 0} fetched sources, ${metrics.primarySources || 0} primary/official, ${metrics.claims || 0} claims, ${metrics.cited || 0} citations`,
120
+ );
121
+ lines.push(
122
+ `- Questions: ${metrics.closedQuestions || 0}/${metrics.totalQuestions || 0} closed${metrics.openQuestions ? `, ${metrics.openQuestions} open` : ""}`,
123
+ );
124
+ }
125
+ if (bundle?.dir) lines.push(`- Bundle: ${String(bundle.dir)}`);
126
+ lines.push("");
127
+ }
128
+
129
+ /**
130
+ * Format single engine result
131
+ */
132
+ function formatSingleEngineResult(
133
+ data: Record<string, unknown>,
134
+ lines: string[],
135
+ ): string {
136
+ const needsHuman = data._needsHumanVerification as
137
+ | Record<string, unknown>
138
+ | undefined;
139
+ if (needsHuman) {
140
+ const engines = Array.isArray(needsHuman.engines)
141
+ ? needsHuman.engines.join(", ")
142
+ : "this engine";
143
+ lines.push("## Manual verification required");
144
+ lines.push(
145
+ String(
146
+ needsHuman.message ||
147
+ "Visible Chrome is open. Solve the verification challenge, then rerun the same search.",
148
+ ),
149
+ );
150
+ lines.push(`Engines: ${engines}`);
151
+ lines.push("");
152
+ }
153
+ formatEngineResult(data, lines, 5);
154
+ return lines.join("\n").trim();
155
+ }
156
+
157
+ /**
158
+ * Format a single engine's result (answer + sources)
159
+ */
160
+ function formatEngineResult(
161
+ data: Record<string, unknown>,
162
+ lines: string[],
163
+ maxSources: number,
164
+ ): void {
165
+ if (data.error) {
166
+ lines.push(`Error: ${data.error}`);
167
+ return;
168
+ }
169
+
170
+ if (data.answer) {
171
+ lines.push(String(data.answer));
172
+ }
173
+
174
+ const sources = data.sources as Array<Record<string, string>> | undefined;
175
+ if (Array.isArray(sources) && sources.length > 0) {
176
+ lines.push("\nSources:");
177
+ for (const s of sources.slice(0, maxSources)) {
178
+ lines.push(`- [${s.title || s.url}](${s.url})`);
179
+ }
180
+ }
181
+ }
182
+
183
+ /**
184
+ * Format deep research results with confidence metrics
185
+ */