@apmantza/greedysearch-pi 1.9.2 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +132 -2
- package/README.md +82 -47
- package/bin/cdp.mjs +1153 -1108
- package/bin/launch.mjs +9 -0
- package/bin/search.mjs +318 -81
- package/extractors/bing-copilot.mjs +48 -18
- package/extractors/chatgpt.mjs +553 -0
- package/extractors/common.mjs +213 -22
- package/extractors/consensus.mjs +655 -0
- package/extractors/consent.mjs +182 -18
- package/extractors/gemini.mjs +350 -217
- package/extractors/google-ai.mjs +129 -128
- package/extractors/logically.mjs +629 -0
- package/extractors/perplexity.mjs +547 -217
- package/extractors/selectors.mjs +3 -2
- package/extractors/semantic-scholar.mjs +219 -0
- package/package.json +8 -4
- package/skills/greedy-search/skill.md +20 -12
- package/src/fetcher.mjs +23 -1
- package/src/formatters/results.ts +185 -128
- package/src/search/browser-lifecycle.mjs +27 -5
- package/src/search/challenge-detect.mjs +205 -0
- package/src/search/chrome.mjs +653 -590
- package/src/search/constants.mjs +155 -39
- package/src/search/engines.mjs +114 -76
- package/src/search/fetch-source.mjs +566 -451
- package/src/search/pdf.mjs +68 -0
- package/src/search/progress.mjs +145 -0
- package/src/search/recovery.mjs +73 -45
- package/src/search/research.mjs +1419 -62
- package/src/search/scale-aware.mjs +93 -0
- package/src/search/simple-research.mjs +520 -0
- package/src/search/sources.mjs +52 -22
- package/src/search/synthesis-runner.mjs +105 -26
- package/src/search/synthesis.mjs +286 -246
- package/src/tools/greedy-search-handler.ts +129 -59
- package/src/tools/shared.ts +312 -186
- package/src/types.ts +110 -104
- package/test.mjs +537 -18
package/extractors/selectors.mjs
CHANGED
|
@@ -43,8 +43,9 @@ export const SELECTORS = {
|
|
|
43
43
|
gemini: {
|
|
44
44
|
input: "rich-textarea .ql-editor",
|
|
45
45
|
// Language-agnostic: use Material icon data attributes (work across locales)
|
|
46
|
-
copyButton: 'button:has(mat-icon[data-mat-icon-name="
|
|
47
|
-
sendButton:
|
|
46
|
+
copyButton: 'button:has(mat-icon[data-mat-icon-name="copy"])',
|
|
47
|
+
sendButton:
|
|
48
|
+
'button:has(mat-icon[data-mat-icon-name="arrow_upward"]), [data-test-id="send-button"], .send-button',
|
|
48
49
|
sourcesSidebarButton: "button.legacy-sources-sidebar-button",
|
|
49
50
|
sourcesExclude: ["gemini.google", "gstatic", "google.com/search"],
|
|
50
51
|
citationButtonPattern: 'button[aria-label*="citation from"]',
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// extractors/semantic-scholar.mjs
|
|
4
|
+
// Search Semantic Scholar without API keys and return paper/PDF sources for
|
|
5
|
+
// GreedySearch's source fetcher and research synthesizer.
|
|
6
|
+
|
|
7
|
+
import {
|
|
8
|
+
buildEnvelope,
|
|
9
|
+
cdp,
|
|
10
|
+
formatAnswer,
|
|
11
|
+
getOrOpenTab,
|
|
12
|
+
handleError,
|
|
13
|
+
logStage,
|
|
14
|
+
outputJson,
|
|
15
|
+
parseArgs,
|
|
16
|
+
prepareArgs,
|
|
17
|
+
validateQuery,
|
|
18
|
+
waitForSelector,
|
|
19
|
+
} from "./common.mjs";
|
|
20
|
+
|
|
21
|
+
const USAGE =
|
|
22
|
+
'Usage: node extractors/semantic-scholar.mjs "<query>" [--tab <prefix>]\n';
|
|
23
|
+
const RESULT_SELECTOR = ".cl-paper-row[data-paper-id]";
|
|
24
|
+
|
|
25
|
+
function semanticScholarSearchUrl(query) {
|
|
26
|
+
// Semantic Scholar docs note hyphenated terms can reduce matches; use spaces.
|
|
27
|
+
const normalized = String(query || "").replaceAll("-", " ");
|
|
28
|
+
return `https://www.semanticscholar.org/search?q=${encodeURIComponent(normalized)}&sort=relevance`;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
async function dismissCookieBanner(tab) {
|
|
32
|
+
await cdp([
|
|
33
|
+
"eval",
|
|
34
|
+
tab,
|
|
35
|
+
String.raw`
|
|
36
|
+
(() => {
|
|
37
|
+
const selectors = [
|
|
38
|
+
'.osano-cm-dialog__close',
|
|
39
|
+
'.osano-cm-denyAll',
|
|
40
|
+
'.osano-cm-accept-all',
|
|
41
|
+
'button[aria-label*="Close" i]',
|
|
42
|
+
];
|
|
43
|
+
for (const selector of selectors) {
|
|
44
|
+
const btn = document.querySelector(selector);
|
|
45
|
+
if (btn) { btn.click(); return selector; }
|
|
46
|
+
}
|
|
47
|
+
return null;
|
|
48
|
+
})()
|
|
49
|
+
`,
|
|
50
|
+
]).catch(() => null);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
async function extractPapers(tab, { limit = 8 } = {}) {
|
|
54
|
+
const raw = await cdp([
|
|
55
|
+
"eval",
|
|
56
|
+
tab,
|
|
57
|
+
String.raw`
|
|
58
|
+
((limit) => {
|
|
59
|
+
function clean(value) {
|
|
60
|
+
return String(value || '').replace(/\s+/g, ' ').trim();
|
|
61
|
+
}
|
|
62
|
+
function absolutize(href) {
|
|
63
|
+
try { return new URL(href, location.href).href; } catch { return ''; }
|
|
64
|
+
}
|
|
65
|
+
function isDirectPdf(url) {
|
|
66
|
+
return /\.pdf(?:[?#]|$)/i.test(url) || /\/pdf\//i.test(url);
|
|
67
|
+
}
|
|
68
|
+
const rows = Array.from(document.querySelectorAll('.cl-paper-row[data-paper-id]')).slice(0, limit);
|
|
69
|
+
return JSON.stringify(rows.map((row, index) => {
|
|
70
|
+
const titleLink = row.querySelector('a[data-test-id="title-link"][href], a[href*="/paper/"][href]');
|
|
71
|
+
const paperUrl = absolutize(titleLink?.getAttribute('href') || '');
|
|
72
|
+
const title = clean(titleLink?.innerText || row.querySelector('.cl-paper-title')?.innerText || '');
|
|
73
|
+
const authors = Array.from(row.querySelectorAll('[data-test-id="author-list"] a, .cl-paper-authors a'))
|
|
74
|
+
.map((a) => clean(a.innerText))
|
|
75
|
+
.filter(Boolean)
|
|
76
|
+
.slice(0, 8);
|
|
77
|
+
const field = clean(row.querySelector('.cl-paper-fos')?.innerText || '');
|
|
78
|
+
const venue = clean(row.querySelector('[data-test-id="normalized-venue-link"], .cl-paper-venue')?.innerText || '');
|
|
79
|
+
const date = clean(row.querySelector('.cl-paper-pubdates')?.innerText || '');
|
|
80
|
+
const tldrNode = row.querySelector('.tldr-abstract-replacement');
|
|
81
|
+
let tldr = clean(tldrNode?.innerText || '');
|
|
82
|
+
tldr = tldr.replace(/^TLDR\s*/i, '').replace(/\s*Expand$/i, '').trim();
|
|
83
|
+
const citationNode = row.querySelector('[data-test-id="total-citations-stat"]');
|
|
84
|
+
const citationLabel = citationNode?.getAttribute('aria-label') || citationNode?.innerText || '';
|
|
85
|
+
const citationMatch = clean(citationLabel).match(/[\d,]+/);
|
|
86
|
+
const citationCount = citationMatch ? Number.parseInt(citationMatch[0].replace(/,/g, ''), 10) : null;
|
|
87
|
+
const externalLinks = Array.from(row.querySelectorAll('a[data-test-id="paper-link"][href], a.cl-paper-view-paper[href]'))
|
|
88
|
+
.map((a) => ({
|
|
89
|
+
url: absolutize(a.getAttribute('href')),
|
|
90
|
+
label: clean(a.innerText),
|
|
91
|
+
}))
|
|
92
|
+
.filter((link) => link.url);
|
|
93
|
+
const directPdf = externalLinks.find((link) => isDirectPdf(link.url));
|
|
94
|
+
const primaryExternal = directPdf || externalLinks[0] || null;
|
|
95
|
+
const sourceUrl = primaryExternal?.url || paperUrl;
|
|
96
|
+
return {
|
|
97
|
+
rank: index + 1,
|
|
98
|
+
paperId: row.getAttribute('data-paper-id') || '',
|
|
99
|
+
title,
|
|
100
|
+
url: sourceUrl,
|
|
101
|
+
semanticScholarUrl: paperUrl,
|
|
102
|
+
pdfUrl: directPdf?.url || '',
|
|
103
|
+
externalUrl: primaryExternal?.url || '',
|
|
104
|
+
externalLabel: primaryExternal?.label || '',
|
|
105
|
+
authors,
|
|
106
|
+
field,
|
|
107
|
+
venue,
|
|
108
|
+
date,
|
|
109
|
+
tldr,
|
|
110
|
+
citationCount,
|
|
111
|
+
};
|
|
112
|
+
}));
|
|
113
|
+
})(${limit})
|
|
114
|
+
`,
|
|
115
|
+
]);
|
|
116
|
+
try {
|
|
117
|
+
return JSON.parse(raw);
|
|
118
|
+
} catch {
|
|
119
|
+
return [];
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function formatPaperSummary(papers) {
|
|
124
|
+
if (!papers.length) return "Semantic Scholar returned no paper results.";
|
|
125
|
+
return papers
|
|
126
|
+
.map((paper) => {
|
|
127
|
+
const parts = [];
|
|
128
|
+
if (paper.authors?.length) parts.push(paper.authors.join(", "));
|
|
129
|
+
if (paper.venue) parts.push(paper.venue);
|
|
130
|
+
if (paper.date) parts.push(paper.date);
|
|
131
|
+
if (Number.isFinite(paper.citationCount)) {
|
|
132
|
+
parts.push(`${paper.citationCount.toLocaleString()} citations`);
|
|
133
|
+
}
|
|
134
|
+
const meta = parts.length ? ` — ${parts.join(" · ")}` : "";
|
|
135
|
+
const tldr = paper.tldr ? `\n TLDR: ${paper.tldr}` : "";
|
|
136
|
+
return `${paper.rank}. ${paper.title}${meta}${tldr}`;
|
|
137
|
+
})
|
|
138
|
+
.join("\n\n");
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
async function main() {
|
|
142
|
+
const args = await prepareArgs(process.argv.slice(2));
|
|
143
|
+
validateQuery(args, USAGE);
|
|
144
|
+
const { query, tabPrefix, short } = parseArgs(args);
|
|
145
|
+
const startTime = Date.now();
|
|
146
|
+
const mode =
|
|
147
|
+
process.env.GREEDY_SEARCH_VISIBLE === "1" ? "visible" : "headless";
|
|
148
|
+
const env = {
|
|
149
|
+
engine: "semantic-scholar",
|
|
150
|
+
mode,
|
|
151
|
+
blockedBy: null,
|
|
152
|
+
verificationResult: null,
|
|
153
|
+
inputReady: null,
|
|
154
|
+
};
|
|
155
|
+
|
|
156
|
+
try {
|
|
157
|
+
if (!tabPrefix) await cdp(["list"]);
|
|
158
|
+
const tab = await getOrOpenTab(tabPrefix);
|
|
159
|
+
logStage(env, "nav", startTime);
|
|
160
|
+
await cdp(["nav", tab, semanticScholarSearchUrl(query)], 25000);
|
|
161
|
+
await new Promise((r) => setTimeout(r, 800));
|
|
162
|
+
|
|
163
|
+
logStage(env, "consent", startTime);
|
|
164
|
+
await dismissCookieBanner(tab);
|
|
165
|
+
|
|
166
|
+
logStage(env, "results-wait", startTime);
|
|
167
|
+
const inputReady = await waitForSelector(tab, RESULT_SELECTOR, 15000, 500);
|
|
168
|
+
env.inputReady = inputReady;
|
|
169
|
+
if (!inputReady) {
|
|
170
|
+
const body = await cdp([
|
|
171
|
+
"eval",
|
|
172
|
+
tab,
|
|
173
|
+
"document.body?.innerText || ''",
|
|
174
|
+
]).catch(() => "");
|
|
175
|
+
if (/captcha|cloudflare|verify|robot|blocked/i.test(body)) {
|
|
176
|
+
env.blockedBy = "verification";
|
|
177
|
+
env.verificationResult = "needs-human";
|
|
178
|
+
throw new Error(
|
|
179
|
+
"Semantic Scholar verification required — please solve it in the visible browser window",
|
|
180
|
+
);
|
|
181
|
+
}
|
|
182
|
+
throw new Error("Semantic Scholar results not found");
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
logStage(env, "extract", startTime);
|
|
186
|
+
const papers = await extractPapers(tab, { limit: short ? 5 : 8 });
|
|
187
|
+
const sources = papers
|
|
188
|
+
.filter((paper) => paper.title && paper.url)
|
|
189
|
+
.map((paper) => ({
|
|
190
|
+
title: paper.pdfUrl ? `${paper.title} (PDF)` : paper.title,
|
|
191
|
+
url: paper.url,
|
|
192
|
+
semanticScholarUrl: paper.semanticScholarUrl,
|
|
193
|
+
paperId: paper.paperId,
|
|
194
|
+
citationCount: paper.citationCount,
|
|
195
|
+
venue: paper.venue,
|
|
196
|
+
year: paper.date,
|
|
197
|
+
}));
|
|
198
|
+
const answer = formatPaperSummary(papers);
|
|
199
|
+
const durationMs = Date.now() - startTime;
|
|
200
|
+
outputJson({
|
|
201
|
+
answer: formatAnswer(answer, short),
|
|
202
|
+
sources,
|
|
203
|
+
query,
|
|
204
|
+
url: semanticScholarSearchUrl(query),
|
|
205
|
+
papers,
|
|
206
|
+
_envelope: buildEnvelope({ ...env, durationMs }),
|
|
207
|
+
});
|
|
208
|
+
} catch (error) {
|
|
209
|
+
handleError(
|
|
210
|
+
error,
|
|
211
|
+
buildEnvelope({
|
|
212
|
+
...env,
|
|
213
|
+
durationMs: Date.now() - startTime,
|
|
214
|
+
}),
|
|
215
|
+
);
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
main();
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@apmantza/greedysearch-pi",
|
|
3
|
-
"version": "1.
|
|
4
|
-
"description": "Headless multi-engine AI search (Perplexity,
|
|
3
|
+
"version": "2.1.2",
|
|
4
|
+
"description": "Headless multi-engine AI search (Perplexity, Google AI, ChatGPT, Gemini) via browser automation. NO API KEYS needed. Grounded all-engine search fetches sources by default, with optional configurable synthesis and deep research.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"keywords": [
|
|
7
7
|
"pi-package"
|
|
@@ -15,11 +15,14 @@
|
|
|
15
15
|
"license": "MIT",
|
|
16
16
|
"scripts": {
|
|
17
17
|
"test": "node test.mjs",
|
|
18
|
+
"test:unit": "node test.mjs unit",
|
|
18
19
|
"test:quick": "node test.mjs quick",
|
|
19
20
|
"test:smoke": "node test.mjs smoke",
|
|
20
21
|
"test:bash": "./test.sh",
|
|
21
22
|
"test:bash:quick": "./test.sh quick",
|
|
22
|
-
"test:bash:smoke": "./test.sh smoke"
|
|
23
|
+
"test:bash:smoke": "./test.sh smoke",
|
|
24
|
+
"lint": "node scripts/lint.mjs",
|
|
25
|
+
"check:lockfile": "node scripts/check-lockfile.mjs"
|
|
23
26
|
},
|
|
24
27
|
"engines": {
|
|
25
28
|
"node": ">=20.11.0"
|
|
@@ -46,7 +49,8 @@
|
|
|
46
49
|
"dependencies": {
|
|
47
50
|
"@mozilla/readability": "^0.6.0",
|
|
48
51
|
"@sinclair/typebox": "^0.34.48",
|
|
49
|
-
"jsdom": "^
|
|
52
|
+
"jsdom": "^29.1.1",
|
|
53
|
+
"pdf-parse": "^2.4.5",
|
|
50
54
|
"turndown": "^7.1.2"
|
|
51
55
|
},
|
|
52
56
|
"peerDependencies": {
|
|
@@ -1,12 +1,20 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: greedy-search
|
|
3
|
-
description: Web
|
|
4
|
-
---
|
|
5
|
-
|
|
6
|
-
`greedy_search({ query, engine: "all"|"perplexity"|"
|
|
7
|
-
|
|
8
|
-
**
|
|
9
|
-
|
|
10
|
-
**
|
|
11
|
-
|
|
12
|
-
**
|
|
1
|
+
---
|
|
2
|
+
name: greedy-search
|
|
3
|
+
description: Web/search plus opt-in research via Perplexity, Google AI, ChatGPT, Gemini, Semantic Scholar, and Logically. Grounded all-engine search fetches sources by default; optional configurable synthesis; deep research as separate workflow. Configurable via ~/.pi/greedyconfig. Bing Copilot available for signed-in users. Current docs, recent changes, dependency choices. NOT codebase search.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
`greedy_search({ query, engine: "all"|"perplexity"|"google"|"chatgpt"|"gemini"|"semantic-scholar"|"logically"|"bing", synthesize?: bool, synthesizer?: "gemini"|"chatgpt", depth?: "research", breadth: 1-5, iterations: 1-3, maxSources: 3-12, researchOutDir?: string, writeResearchBundle?: bool, visible: bool })`
|
|
7
|
+
|
|
8
|
+
**Modes:** individual engine search · grounded `engine:"all"` search with fetched sources · optional `synthesize:true` using the configured synthesizer over all-engine results · `depth:"research"` for the iterative deep-research workflow.
|
|
9
|
+
|
|
10
|
+
**Config:** `~/.pi/greedyconfig` supports `{ "engines": ["perplexity", "google", "chatgpt", "gemini"], "synthesizer": "gemini" }` by default. `semantic-scholar` and `logically` are opt-in academic/research engines — add them to `engines` only when you want academic paper discovery or research-assistant workflows in the normal all-search fan-out. Without explicit opt-in, `engine:"all"` excludes them because their results are noisy for casual web search; they shine in `depth:"research"` mode. Any configured engine can participate in `engine:"all"`; deep research child searches reuse the same configured `engines` list and stdin-safe query passing. Normal all-search synthesis remains controlled separately by `synthesizer`; research planning/final synthesis uses Gemini.
|
|
11
|
+
|
|
12
|
+
**Compatibility:** legacy `depth:"fast"|"standard"|"deep"` is still accepted. `fast` skips source fetching; `standard`/`deep` alias `synthesize:true`. Prefer `synthesize:true`, optional `synthesizer`, and `depth:"research"` going forward.
|
|
13
|
+
|
|
14
|
+
**Research output:** `depth:"research"` writes a dataroom-style bundle by default under `.pi/greedysearch-research/<timestamp>_<query>/` with `STATUS.md`, `OUTLINE.md`, `reports/SUMMARY.md`, `reports/CLAIMS.md`, `reports/GAPS.md`, `sources/`, and `data/manifest.json`. Pass `researchOutDir` to choose the directory or `writeResearchBundle:false` to disable disk output.
|
|
15
|
+
|
|
16
|
+
**Scale-aware research:** When `breadth` and `iterations` are not explicitly set, the classifier auto-detects query complexity. Simple queries ("what is X") use a fast single-pass path (~70% faster). Moderate queries get tighter breadth/iterations. Complex queries use the full loop. Explicit `breadth`/`iterations` always override the classifier.
|
|
17
|
+
|
|
18
|
+
**Auto-recovery:** Headless default. Bing/Perplexity auto-retry visible on CF block. Manual CAPTCHA → visible stays open; solve then rerun.
|
|
19
|
+
|
|
20
|
+
**CDP safety:** Use `bin/cdp-greedy.mjs` only. Never raw `bin/cdp.mjs`.
|
package/src/fetcher.mjs
CHANGED
|
@@ -63,6 +63,10 @@ const PRIVATE_URL_PATTERNS = [
|
|
|
63
63
|
* @param {string} url - URL to check
|
|
64
64
|
* @returns {{blocked: boolean, reason?: string}}
|
|
65
65
|
*/
|
|
66
|
+
export function defaultFetchHeaders(overrides = {}) {
|
|
67
|
+
return { ...DEFAULT_HEADERS, ...overrides };
|
|
68
|
+
}
|
|
69
|
+
|
|
66
70
|
export function isPrivateUrl(url) {
|
|
67
71
|
try {
|
|
68
72
|
const parsed = new URL(url);
|
|
@@ -191,6 +195,21 @@ export async function fetchSourceHttp(url, options = {}) {
|
|
|
191
195
|
const finalUrl = response.url;
|
|
192
196
|
const lastModified = response.headers.get("last-modified") || "";
|
|
193
197
|
|
|
198
|
+
// SSRF defense: re-validate the post-redirect finalUrl. A malicious
|
|
199
|
+
// server could redirect our fetch to a private IP, bypassing the
|
|
200
|
+
// initial isPrivateUrl() check on the original URL.
|
|
201
|
+
const finalPrivateCheck = isPrivateUrl(finalUrl);
|
|
202
|
+
if (finalPrivateCheck.blocked) {
|
|
203
|
+
return {
|
|
204
|
+
ok: false,
|
|
205
|
+
url,
|
|
206
|
+
finalUrl,
|
|
207
|
+
status: response.status,
|
|
208
|
+
error: `Blocked: ${finalPrivateCheck.reason}`,
|
|
209
|
+
needsBrowser: false,
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
|
|
194
213
|
// Handle raw text/plain from GitHub (raw file content)
|
|
195
214
|
let isRawGitHub = false;
|
|
196
215
|
try {
|
|
@@ -594,7 +613,10 @@ export function checkContentQuality(extracted) {
|
|
|
594
613
|
desc: "access denied in content",
|
|
595
614
|
},
|
|
596
615
|
{
|
|
597
|
-
check: () =>
|
|
616
|
+
check: () =>
|
|
617
|
+
/^\s{0,10}sign\s{1,5}in\s{0,10}$|^\s{0,10}log\s{1,5}in\s{0,10}$/im.test(
|
|
618
|
+
markdown,
|
|
619
|
+
),
|
|
598
620
|
desc: "login form only",
|
|
599
621
|
},
|
|
600
622
|
];
|
|
@@ -1,128 +1,185 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Search results formatters
|
|
3
|
-
* Extracted from index.ts
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
import { formatEngineName } from "../utils/helpers.js";
|
|
7
|
-
import { renderSynthesis } from "./synthesis.js";
|
|
8
|
-
|
|
9
|
-
/**
|
|
10
|
-
*
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
)
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
if (
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
function
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Search results formatters
|
|
3
|
+
* Extracted from index.ts
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { formatEngineName } from "../utils/helpers.js";
|
|
7
|
+
import { renderSynthesis } from "./synthesis.js";
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Maximum line length for any text passed to the TUI. Lines longer than
|
|
11
|
+
* this are truncated with an ellipsis. The TUI's Text.render wraps at the
|
|
12
|
+
* terminal width, but it crashes with
|
|
13
|
+
* "Rendered line N exceeds terminal width (W > W-4)"
|
|
14
|
+
* when a single line is wider than its own internal render width. Long
|
|
15
|
+
* lines (e.g. a markdown table row inside a chatgpt synthesis answer) that
|
|
16
|
+
* don't have a \n break would otherwise produce this crash. The safety
|
|
17
|
+
* net below trims individual lines before they reach the TUI.
|
|
18
|
+
*/
|
|
19
|
+
const MAX_LINE_WIDTH = 800;
|
|
20
|
+
function _truncateLongLines(text: string): string {
|
|
21
|
+
return text
|
|
22
|
+
.split("\n")
|
|
23
|
+
.map((line) =>
|
|
24
|
+
line.length > MAX_LINE_WIDTH
|
|
25
|
+
? line.slice(0, MAX_LINE_WIDTH - 1) + "…"
|
|
26
|
+
: line,
|
|
27
|
+
)
|
|
28
|
+
.join("\n");
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Format search results based on engine type
|
|
33
|
+
*/
|
|
34
|
+
export function formatResults(
|
|
35
|
+
engine: string,
|
|
36
|
+
data: Record<string, unknown>,
|
|
37
|
+
): string {
|
|
38
|
+
const lines: string[] = [];
|
|
39
|
+
|
|
40
|
+
if (engine === "all") {
|
|
41
|
+
return _truncateLongLines(formatAllEnginesResult(data, lines));
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return _truncateLongLines(formatSingleEngineResult(data, lines));
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Format multi-engine results with synthesis
|
|
49
|
+
*/
|
|
50
|
+
function formatAllEnginesResult(
|
|
51
|
+
data: Record<string, unknown>,
|
|
52
|
+
lines: string[],
|
|
53
|
+
): string {
|
|
54
|
+
const synthesis = data._synthesis as Record<string, unknown> | undefined;
|
|
55
|
+
const dedupedSources = data._sources as
|
|
56
|
+
| Array<Record<string, unknown>>
|
|
57
|
+
| undefined;
|
|
58
|
+
const needsHuman = data._needsHumanVerification as
|
|
59
|
+
| Record<string, unknown>
|
|
60
|
+
| undefined;
|
|
61
|
+
const research = data._research as Record<string, unknown> | undefined;
|
|
62
|
+
|
|
63
|
+
if (needsHuman) {
|
|
64
|
+
const engines = Array.isArray(needsHuman.engines)
|
|
65
|
+
? needsHuman.engines.join(", ")
|
|
66
|
+
: "one or more engines";
|
|
67
|
+
lines.push("## Manual verification required");
|
|
68
|
+
lines.push(
|
|
69
|
+
String(
|
|
70
|
+
needsHuman.message ||
|
|
71
|
+
"Visible Chrome is open. Solve the verification challenge, then rerun the same search.",
|
|
72
|
+
),
|
|
73
|
+
);
|
|
74
|
+
lines.push(`Engines: ${engines}`);
|
|
75
|
+
lines.push("");
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// If we have a synthesis answer, render it
|
|
79
|
+
if (synthesis?.answer) {
|
|
80
|
+
if (research?.mode === "iterative") renderResearchHeader(lines, research);
|
|
81
|
+
renderSynthesis(lines, synthesis, dedupedSources || [], 6);
|
|
82
|
+
const synthesizedBy = String(
|
|
83
|
+
synthesis.synthesizedBy || "configured synthesizer",
|
|
84
|
+
);
|
|
85
|
+
lines.push(
|
|
86
|
+
research?.mode === "iterative"
|
|
87
|
+
? "*Research mode: iterative planning, source fetching, citation audit, and bundle output*\n"
|
|
88
|
+
: `*Synthesized by ${synthesizedBy} from multi-engine results and fetched sources*\n`,
|
|
89
|
+
);
|
|
90
|
+
return lines.join("\n").trim();
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Fallback: render individual engine results
|
|
94
|
+
for (const [eng, result] of Object.entries(data)) {
|
|
95
|
+
if (eng.startsWith("_")) continue;
|
|
96
|
+
lines.push(`\n## ${formatEngineName(eng)}`);
|
|
97
|
+
formatEngineResult(result as Record<string, unknown>, lines, 3);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return lines.join("\n").trim();
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function renderResearchHeader(
|
|
104
|
+
lines: string[],
|
|
105
|
+
research: Record<string, unknown>,
|
|
106
|
+
): void {
|
|
107
|
+
const floor = research.floor as Record<string, unknown> | undefined;
|
|
108
|
+
const metrics = floor?.metrics as Record<string, unknown> | undefined;
|
|
109
|
+
const bundle = research.bundle as Record<string, unknown> | undefined;
|
|
110
|
+
const manifest = research.manifest as Record<string, unknown> | undefined;
|
|
111
|
+
lines.push("## Research Run");
|
|
112
|
+
lines.push(
|
|
113
|
+
`- Status: ${floor?.floorMet ? "floor met" : "partial / floor unmet"}`,
|
|
114
|
+
);
|
|
115
|
+
if (manifest?.terminationReason)
|
|
116
|
+
lines.push(`- Stop reason: ${String(manifest.terminationReason)}`);
|
|
117
|
+
if (metrics) {
|
|
118
|
+
lines.push(
|
|
119
|
+
`- Evidence: ${metrics.fetchedOk || 0} fetched sources, ${metrics.primarySources || 0} primary/official, ${metrics.claims || 0} claims, ${metrics.cited || 0} citations`,
|
|
120
|
+
);
|
|
121
|
+
lines.push(
|
|
122
|
+
`- Questions: ${metrics.closedQuestions || 0}/${metrics.totalQuestions || 0} closed${metrics.openQuestions ? `, ${metrics.openQuestions} open` : ""}`,
|
|
123
|
+
);
|
|
124
|
+
}
|
|
125
|
+
if (bundle?.dir) lines.push(`- Bundle: ${String(bundle.dir)}`);
|
|
126
|
+
lines.push("");
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Format single engine result
|
|
131
|
+
*/
|
|
132
|
+
function formatSingleEngineResult(
|
|
133
|
+
data: Record<string, unknown>,
|
|
134
|
+
lines: string[],
|
|
135
|
+
): string {
|
|
136
|
+
const needsHuman = data._needsHumanVerification as
|
|
137
|
+
| Record<string, unknown>
|
|
138
|
+
| undefined;
|
|
139
|
+
if (needsHuman) {
|
|
140
|
+
const engines = Array.isArray(needsHuman.engines)
|
|
141
|
+
? needsHuman.engines.join(", ")
|
|
142
|
+
: "this engine";
|
|
143
|
+
lines.push("## Manual verification required");
|
|
144
|
+
lines.push(
|
|
145
|
+
String(
|
|
146
|
+
needsHuman.message ||
|
|
147
|
+
"Visible Chrome is open. Solve the verification challenge, then rerun the same search.",
|
|
148
|
+
),
|
|
149
|
+
);
|
|
150
|
+
lines.push(`Engines: ${engines}`);
|
|
151
|
+
lines.push("");
|
|
152
|
+
}
|
|
153
|
+
formatEngineResult(data, lines, 5);
|
|
154
|
+
return lines.join("\n").trim();
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* Format a single engine's result (answer + sources)
|
|
159
|
+
*/
|
|
160
|
+
function formatEngineResult(
|
|
161
|
+
data: Record<string, unknown>,
|
|
162
|
+
lines: string[],
|
|
163
|
+
maxSources: number,
|
|
164
|
+
): void {
|
|
165
|
+
if (data.error) {
|
|
166
|
+
lines.push(`Error: ${data.error}`);
|
|
167
|
+
return;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
if (data.answer) {
|
|
171
|
+
lines.push(String(data.answer));
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const sources = data.sources as Array<Record<string, string>> | undefined;
|
|
175
|
+
if (Array.isArray(sources) && sources.length > 0) {
|
|
176
|
+
lines.push("\nSources:");
|
|
177
|
+
for (const s of sources.slice(0, maxSources)) {
|
|
178
|
+
lines.push(`- [${s.title || s.url}](${s.url})`);
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Format deep research results with confidence metrics
|
|
185
|
+
*/
|