@apmantza/greedysearch-pi 1.9.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +80 -1
- package/README.md +82 -47
- package/bin/cdp.mjs +1153 -1108
- package/bin/launch.mjs +9 -0
- package/bin/search.mjs +197 -68
- package/extractors/bing-copilot.mjs +42 -4
- package/extractors/chatgpt.mjs +436 -0
- package/extractors/common.mjs +155 -21
- package/extractors/consensus.mjs +655 -0
- package/extractors/gemini.mjs +335 -217
- package/extractors/logically.mjs +567 -0
- package/extractors/selectors.mjs +3 -2
- package/extractors/semantic-scholar.mjs +219 -0
- package/package.json +7 -3
- package/skills/greedy-search/skill.md +9 -3
- package/src/fetcher.mjs +8 -1
- package/src/formatters/results.ts +163 -128
- package/src/search/browser-lifecycle.mjs +27 -5
- package/src/search/chrome.mjs +653 -590
- package/src/search/constants.mjs +150 -39
- package/src/search/engines.mjs +114 -76
- package/src/search/fetch-source.mjs +566 -451
- package/src/search/pdf.mjs +68 -0
- package/src/search/recovery.mjs +51 -45
- package/src/search/research.mjs +1059 -61
- package/src/search/sources.mjs +52 -22
- package/src/search/synthesis-runner.mjs +105 -26
- package/src/search/synthesis.mjs +286 -246
- package/src/tools/greedy-search-handler.ts +124 -52
- package/src/tools/shared.ts +187 -186
- package/src/types.ts +110 -104
- package/test.mjs +377 -6
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// extractors/semantic-scholar.mjs
|
|
4
|
+
// Search Semantic Scholar without API keys and return paper/PDF sources for
|
|
5
|
+
// GreedySearch's source fetcher and research synthesizer.
|
|
6
|
+
|
|
7
|
+
import {
|
|
8
|
+
buildEnvelope,
|
|
9
|
+
cdp,
|
|
10
|
+
formatAnswer,
|
|
11
|
+
getOrOpenTab,
|
|
12
|
+
handleError,
|
|
13
|
+
logStage,
|
|
14
|
+
outputJson,
|
|
15
|
+
parseArgs,
|
|
16
|
+
prepareArgs,
|
|
17
|
+
validateQuery,
|
|
18
|
+
waitForSelector,
|
|
19
|
+
} from "./common.mjs";
|
|
20
|
+
|
|
21
|
+
const USAGE =
|
|
22
|
+
'Usage: node extractors/semantic-scholar.mjs "<query>" [--tab <prefix>]\n';
|
|
23
|
+
const RESULT_SELECTOR = ".cl-paper-row[data-paper-id]";
|
|
24
|
+
|
|
25
|
+
function semanticScholarSearchUrl(query) {
|
|
26
|
+
// Semantic Scholar docs note hyphenated terms can reduce matches; use spaces.
|
|
27
|
+
const normalized = String(query || "").replaceAll("-", " ");
|
|
28
|
+
return `https://www.semanticscholar.org/search?q=${encodeURIComponent(normalized)}&sort=relevance`;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
async function dismissCookieBanner(tab) {
|
|
32
|
+
await cdp([
|
|
33
|
+
"eval",
|
|
34
|
+
tab,
|
|
35
|
+
String.raw`
|
|
36
|
+
(() => {
|
|
37
|
+
const selectors = [
|
|
38
|
+
'.osano-cm-dialog__close',
|
|
39
|
+
'.osano-cm-denyAll',
|
|
40
|
+
'.osano-cm-accept-all',
|
|
41
|
+
'button[aria-label*="Close" i]',
|
|
42
|
+
];
|
|
43
|
+
for (const selector of selectors) {
|
|
44
|
+
const btn = document.querySelector(selector);
|
|
45
|
+
if (btn) { btn.click(); return selector; }
|
|
46
|
+
}
|
|
47
|
+
return null;
|
|
48
|
+
})()
|
|
49
|
+
`,
|
|
50
|
+
]).catch(() => null);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
async function extractPapers(tab, { limit = 8 } = {}) {
|
|
54
|
+
const raw = await cdp([
|
|
55
|
+
"eval",
|
|
56
|
+
tab,
|
|
57
|
+
String.raw`
|
|
58
|
+
((limit) => {
|
|
59
|
+
function clean(value) {
|
|
60
|
+
return String(value || '').replace(/\s+/g, ' ').trim();
|
|
61
|
+
}
|
|
62
|
+
function absolutize(href) {
|
|
63
|
+
try { return new URL(href, location.href).href; } catch { return ''; }
|
|
64
|
+
}
|
|
65
|
+
function isDirectPdf(url) {
|
|
66
|
+
return /\.pdf(?:[?#]|$)/i.test(url) || /\/pdf\//i.test(url);
|
|
67
|
+
}
|
|
68
|
+
const rows = Array.from(document.querySelectorAll('.cl-paper-row[data-paper-id]')).slice(0, limit);
|
|
69
|
+
return JSON.stringify(rows.map((row, index) => {
|
|
70
|
+
const titleLink = row.querySelector('a[data-test-id="title-link"][href], a[href*="/paper/"][href]');
|
|
71
|
+
const paperUrl = absolutize(titleLink?.getAttribute('href') || '');
|
|
72
|
+
const title = clean(titleLink?.innerText || row.querySelector('.cl-paper-title')?.innerText || '');
|
|
73
|
+
const authors = Array.from(row.querySelectorAll('[data-test-id="author-list"] a, .cl-paper-authors a'))
|
|
74
|
+
.map((a) => clean(a.innerText))
|
|
75
|
+
.filter(Boolean)
|
|
76
|
+
.slice(0, 8);
|
|
77
|
+
const field = clean(row.querySelector('.cl-paper-fos')?.innerText || '');
|
|
78
|
+
const venue = clean(row.querySelector('[data-test-id="normalized-venue-link"], .cl-paper-venue')?.innerText || '');
|
|
79
|
+
const date = clean(row.querySelector('.cl-paper-pubdates')?.innerText || '');
|
|
80
|
+
const tldrNode = row.querySelector('.tldr-abstract-replacement');
|
|
81
|
+
let tldr = clean(tldrNode?.innerText || '');
|
|
82
|
+
tldr = tldr.replace(/^TLDR\s*/i, '').replace(/\s*Expand$/i, '').trim();
|
|
83
|
+
const citationNode = row.querySelector('[data-test-id="total-citations-stat"]');
|
|
84
|
+
const citationLabel = citationNode?.getAttribute('aria-label') || citationNode?.innerText || '';
|
|
85
|
+
const citationMatch = clean(citationLabel).match(/[\d,]+/);
|
|
86
|
+
const citationCount = citationMatch ? Number.parseInt(citationMatch[0].replace(/,/g, ''), 10) : null;
|
|
87
|
+
const externalLinks = Array.from(row.querySelectorAll('a[data-test-id="paper-link"][href], a.cl-paper-view-paper[href]'))
|
|
88
|
+
.map((a) => ({
|
|
89
|
+
url: absolutize(a.getAttribute('href')),
|
|
90
|
+
label: clean(a.innerText),
|
|
91
|
+
}))
|
|
92
|
+
.filter((link) => link.url);
|
|
93
|
+
const directPdf = externalLinks.find((link) => isDirectPdf(link.url));
|
|
94
|
+
const primaryExternal = directPdf || externalLinks[0] || null;
|
|
95
|
+
const sourceUrl = primaryExternal?.url || paperUrl;
|
|
96
|
+
return {
|
|
97
|
+
rank: index + 1,
|
|
98
|
+
paperId: row.getAttribute('data-paper-id') || '',
|
|
99
|
+
title,
|
|
100
|
+
url: sourceUrl,
|
|
101
|
+
semanticScholarUrl: paperUrl,
|
|
102
|
+
pdfUrl: directPdf?.url || '',
|
|
103
|
+
externalUrl: primaryExternal?.url || '',
|
|
104
|
+
externalLabel: primaryExternal?.label || '',
|
|
105
|
+
authors,
|
|
106
|
+
field,
|
|
107
|
+
venue,
|
|
108
|
+
date,
|
|
109
|
+
tldr,
|
|
110
|
+
citationCount,
|
|
111
|
+
};
|
|
112
|
+
}));
|
|
113
|
+
})(${limit})
|
|
114
|
+
`,
|
|
115
|
+
]);
|
|
116
|
+
try {
|
|
117
|
+
return JSON.parse(raw);
|
|
118
|
+
} catch {
|
|
119
|
+
return [];
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function formatPaperSummary(papers) {
|
|
124
|
+
if (!papers.length) return "Semantic Scholar returned no paper results.";
|
|
125
|
+
return papers
|
|
126
|
+
.map((paper) => {
|
|
127
|
+
const parts = [];
|
|
128
|
+
if (paper.authors?.length) parts.push(paper.authors.join(", "));
|
|
129
|
+
if (paper.venue) parts.push(paper.venue);
|
|
130
|
+
if (paper.date) parts.push(paper.date);
|
|
131
|
+
if (Number.isFinite(paper.citationCount)) {
|
|
132
|
+
parts.push(`${paper.citationCount.toLocaleString()} citations`);
|
|
133
|
+
}
|
|
134
|
+
const meta = parts.length ? ` — ${parts.join(" · ")}` : "";
|
|
135
|
+
const tldr = paper.tldr ? `\n TLDR: ${paper.tldr}` : "";
|
|
136
|
+
return `${paper.rank}. ${paper.title}${meta}${tldr}`;
|
|
137
|
+
})
|
|
138
|
+
.join("\n\n");
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
async function main() {
|
|
142
|
+
const args = await prepareArgs(process.argv.slice(2));
|
|
143
|
+
validateQuery(args, USAGE);
|
|
144
|
+
const { query, tabPrefix, short } = parseArgs(args);
|
|
145
|
+
const startTime = Date.now();
|
|
146
|
+
const mode =
|
|
147
|
+
process.env.GREEDY_SEARCH_VISIBLE === "1" ? "visible" : "headless";
|
|
148
|
+
const env = {
|
|
149
|
+
engine: "semantic-scholar",
|
|
150
|
+
mode,
|
|
151
|
+
blockedBy: null,
|
|
152
|
+
verificationResult: null,
|
|
153
|
+
inputReady: null,
|
|
154
|
+
};
|
|
155
|
+
|
|
156
|
+
try {
|
|
157
|
+
if (!tabPrefix) await cdp(["list"]);
|
|
158
|
+
const tab = await getOrOpenTab(tabPrefix);
|
|
159
|
+
logStage(env, "nav", startTime);
|
|
160
|
+
await cdp(["nav", tab, semanticScholarSearchUrl(query)], 25000);
|
|
161
|
+
await new Promise((r) => setTimeout(r, 800));
|
|
162
|
+
|
|
163
|
+
logStage(env, "consent", startTime);
|
|
164
|
+
await dismissCookieBanner(tab);
|
|
165
|
+
|
|
166
|
+
logStage(env, "results-wait", startTime);
|
|
167
|
+
const inputReady = await waitForSelector(tab, RESULT_SELECTOR, 15000, 500);
|
|
168
|
+
env.inputReady = inputReady;
|
|
169
|
+
if (!inputReady) {
|
|
170
|
+
const body = await cdp([
|
|
171
|
+
"eval",
|
|
172
|
+
tab,
|
|
173
|
+
"document.body?.innerText || ''",
|
|
174
|
+
]).catch(() => "");
|
|
175
|
+
if (/captcha|cloudflare|verify|robot|blocked/i.test(body)) {
|
|
176
|
+
env.blockedBy = "verification";
|
|
177
|
+
env.verificationResult = "needs-human";
|
|
178
|
+
throw new Error(
|
|
179
|
+
"Semantic Scholar verification required — please solve it in the visible browser window",
|
|
180
|
+
);
|
|
181
|
+
}
|
|
182
|
+
throw new Error("Semantic Scholar results not found");
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
logStage(env, "extract", startTime);
|
|
186
|
+
const papers = await extractPapers(tab, { limit: short ? 5 : 8 });
|
|
187
|
+
const sources = papers
|
|
188
|
+
.filter((paper) => paper.title && paper.url)
|
|
189
|
+
.map((paper) => ({
|
|
190
|
+
title: paper.pdfUrl ? `${paper.title} (PDF)` : paper.title,
|
|
191
|
+
url: paper.url,
|
|
192
|
+
semanticScholarUrl: paper.semanticScholarUrl,
|
|
193
|
+
paperId: paper.paperId,
|
|
194
|
+
citationCount: paper.citationCount,
|
|
195
|
+
venue: paper.venue,
|
|
196
|
+
year: paper.date,
|
|
197
|
+
}));
|
|
198
|
+
const answer = formatPaperSummary(papers);
|
|
199
|
+
const durationMs = Date.now() - startTime;
|
|
200
|
+
outputJson({
|
|
201
|
+
answer: formatAnswer(answer, short),
|
|
202
|
+
sources,
|
|
203
|
+
query,
|
|
204
|
+
url: semanticScholarSearchUrl(query),
|
|
205
|
+
papers,
|
|
206
|
+
_envelope: buildEnvelope({ ...env, durationMs }),
|
|
207
|
+
});
|
|
208
|
+
} catch (error) {
|
|
209
|
+
handleError(
|
|
210
|
+
error,
|
|
211
|
+
buildEnvelope({
|
|
212
|
+
...env,
|
|
213
|
+
durationMs: Date.now() - startTime,
|
|
214
|
+
}),
|
|
215
|
+
);
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
main();
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@apmantza/greedysearch-pi",
|
|
3
|
-
"version": "
|
|
4
|
-
"description": "Headless multi-engine AI search (Perplexity,
|
|
3
|
+
"version": "2.0.0",
|
|
4
|
+
"description": "Headless multi-engine AI search (Perplexity, Google AI, ChatGPT, Gemini) via browser automation. NO API KEYS needed. Grounded all-engine search fetches sources by default, with optional configurable synthesis and deep research.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"keywords": [
|
|
7
7
|
"pi-package"
|
|
@@ -15,11 +15,14 @@
|
|
|
15
15
|
"license": "MIT",
|
|
16
16
|
"scripts": {
|
|
17
17
|
"test": "node test.mjs",
|
|
18
|
+
"test:unit": "node test.mjs unit",
|
|
18
19
|
"test:quick": "node test.mjs quick",
|
|
19
20
|
"test:smoke": "node test.mjs smoke",
|
|
20
21
|
"test:bash": "./test.sh",
|
|
21
22
|
"test:bash:quick": "./test.sh quick",
|
|
22
|
-
"test:bash:smoke": "./test.sh smoke"
|
|
23
|
+
"test:bash:smoke": "./test.sh smoke",
|
|
24
|
+
"lint": "node scripts/lint.mjs",
|
|
25
|
+
"check:lockfile": "node scripts/check-lockfile.mjs"
|
|
23
26
|
},
|
|
24
27
|
"engines": {
|
|
25
28
|
"node": ">=20.11.0"
|
|
@@ -47,6 +50,7 @@
|
|
|
47
50
|
"@mozilla/readability": "^0.6.0",
|
|
48
51
|
"@sinclair/typebox": "^0.34.48",
|
|
49
52
|
"jsdom": "^24.0.0",
|
|
53
|
+
"pdf-parse": "^2.4.5",
|
|
50
54
|
"turndown": "^7.1.2"
|
|
51
55
|
},
|
|
52
56
|
"peerDependencies": {
|
|
@@ -1,11 +1,17 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: greedy-search
|
|
3
|
-
description: Web
|
|
3
|
+
description: Web/search plus opt-in research via Perplexity, Google AI, ChatGPT, Gemini, Semantic Scholar, and Logically. Grounded all-engine search fetches sources by default; optional configurable synthesis; deep research as separate workflow. Configurable via ~/.pi/greedyconfig. Bing Copilot available for signed-in users. Current docs, recent changes, dependency choices. NOT codebase search.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
-
`greedy_search({ query, engine: "all"|"perplexity"|"
|
|
6
|
+
`greedy_search({ query, engine: "all"|"perplexity"|"google"|"chatgpt"|"gemini"|"semantic-scholar"|"logically"|"bing", synthesize?: bool, synthesizer?: "gemini"|"chatgpt", depth?: "research", breadth: 1-5, iterations: 1-3, maxSources: 3-12, researchOutDir?: string, writeResearchBundle?: bool, visible: bool })`
|
|
7
7
|
|
|
8
|
-
**
|
|
8
|
+
**Modes:** individual engine search · grounded `engine:"all"` search with fetched sources · optional `synthesize:true` using the configured synthesizer over all-engine results · `depth:"research"` for the iterative deep-research workflow.
|
|
9
|
+
|
|
10
|
+
**Config:** `~/.pi/greedyconfig` supports `{ "engines": ["perplexity", "google", "chatgpt", "gemini", "semantic-scholar", "logically"], "synthesizer": "gemini" }`. Gemini is a normal search engine; Semantic Scholar and Logically are opt-in research engines. Any configured engine can participate in `engine:"all"`; deep research child searches reuse the same configured `engines` list and stdin-safe query passing. Normal all-search synthesis remains controlled separately by `synthesizer`; research planning/final synthesis uses Gemini.
|
|
11
|
+
|
|
12
|
+
**Compatibility:** legacy `depth:"fast"|"standard"|"deep"` is still accepted. `fast` skips source fetching; `standard`/`deep` alias `synthesize:true`. Prefer `synthesize:true`, optional `synthesizer`, and `depth:"research"` going forward.
|
|
13
|
+
|
|
14
|
+
**Research output:** `depth:"research"` writes a dataroom-style bundle by default under `.pi/greedysearch-research/<timestamp>_<query>/` with `STATUS.md`, `OUTLINE.md`, `reports/SUMMARY.md`, `reports/CLAIMS.md`, `reports/GAPS.md`, `sources/`, and `data/manifest.json`. Pass `researchOutDir` to choose the directory or `writeResearchBundle:false` to disable disk output.
|
|
9
15
|
|
|
10
16
|
**Auto-recovery:** Headless default. Bing/Perplexity auto-retry visible on CF block. Manual CAPTCHA → visible stays open; solve then rerun.
|
|
11
17
|
|
package/src/fetcher.mjs
CHANGED
|
@@ -63,6 +63,10 @@ const PRIVATE_URL_PATTERNS = [
|
|
|
63
63
|
* @param {string} url - URL to check
|
|
64
64
|
* @returns {{blocked: boolean, reason?: string}}
|
|
65
65
|
*/
|
|
66
|
+
export function defaultFetchHeaders(overrides = {}) {
|
|
67
|
+
return { ...DEFAULT_HEADERS, ...overrides };
|
|
68
|
+
}
|
|
69
|
+
|
|
66
70
|
export function isPrivateUrl(url) {
|
|
67
71
|
try {
|
|
68
72
|
const parsed = new URL(url);
|
|
@@ -594,7 +598,10 @@ export function checkContentQuality(extracted) {
|
|
|
594
598
|
desc: "access denied in content",
|
|
595
599
|
},
|
|
596
600
|
{
|
|
597
|
-
check: () =>
|
|
601
|
+
check: () =>
|
|
602
|
+
/^\s{0,10}sign\s{1,5}in\s{0,10}$|^\s{0,10}log\s{1,5}in\s{0,10}$/im.test(
|
|
603
|
+
markdown,
|
|
604
|
+
),
|
|
598
605
|
desc: "login form only",
|
|
599
606
|
},
|
|
600
607
|
];
|
|
@@ -1,128 +1,163 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Search results formatters
|
|
3
|
-
* Extracted from index.ts
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
import { formatEngineName } from "../utils/helpers.js";
|
|
7
|
-
import { renderSynthesis } from "./synthesis.js";
|
|
8
|
-
|
|
9
|
-
/**
|
|
10
|
-
* Format search results based on engine type
|
|
11
|
-
*/
|
|
12
|
-
export function formatResults(
|
|
13
|
-
engine: string,
|
|
14
|
-
data: Record<string, unknown>,
|
|
15
|
-
): string {
|
|
16
|
-
const lines: string[] = [];
|
|
17
|
-
|
|
18
|
-
if (engine === "all") {
|
|
19
|
-
return formatAllEnginesResult(data, lines);
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
return formatSingleEngineResult(data, lines);
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
/**
|
|
26
|
-
* Format multi-engine results with synthesis
|
|
27
|
-
*/
|
|
28
|
-
function formatAllEnginesResult(
|
|
29
|
-
data: Record<string, unknown>,
|
|
30
|
-
lines: string[],
|
|
31
|
-
): string {
|
|
32
|
-
const synthesis = data._synthesis as Record<string, unknown> | undefined;
|
|
33
|
-
const dedupedSources = data._sources as
|
|
34
|
-
| Array<Record<string, unknown>>
|
|
35
|
-
| undefined;
|
|
36
|
-
const needsHuman = data._needsHumanVerification as
|
|
37
|
-
| Record<string, unknown>
|
|
38
|
-
| undefined;
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
lines.push(
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
lines.push(
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
)
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
lines.push(
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Search results formatters
|
|
3
|
+
* Extracted from index.ts
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { formatEngineName } from "../utils/helpers.js";
|
|
7
|
+
import { renderSynthesis } from "./synthesis.js";
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Format search results based on engine type
|
|
11
|
+
*/
|
|
12
|
+
export function formatResults(
|
|
13
|
+
engine: string,
|
|
14
|
+
data: Record<string, unknown>,
|
|
15
|
+
): string {
|
|
16
|
+
const lines: string[] = [];
|
|
17
|
+
|
|
18
|
+
if (engine === "all") {
|
|
19
|
+
return formatAllEnginesResult(data, lines);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
return formatSingleEngineResult(data, lines);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Format multi-engine results with synthesis
|
|
27
|
+
*/
|
|
28
|
+
function formatAllEnginesResult(
|
|
29
|
+
data: Record<string, unknown>,
|
|
30
|
+
lines: string[],
|
|
31
|
+
): string {
|
|
32
|
+
const synthesis = data._synthesis as Record<string, unknown> | undefined;
|
|
33
|
+
const dedupedSources = data._sources as
|
|
34
|
+
| Array<Record<string, unknown>>
|
|
35
|
+
| undefined;
|
|
36
|
+
const needsHuman = data._needsHumanVerification as
|
|
37
|
+
| Record<string, unknown>
|
|
38
|
+
| undefined;
|
|
39
|
+
const research = data._research as Record<string, unknown> | undefined;
|
|
40
|
+
|
|
41
|
+
if (needsHuman) {
|
|
42
|
+
const engines = Array.isArray(needsHuman.engines)
|
|
43
|
+
? needsHuman.engines.join(", ")
|
|
44
|
+
: "one or more engines";
|
|
45
|
+
lines.push("## Manual verification required");
|
|
46
|
+
lines.push(
|
|
47
|
+
String(
|
|
48
|
+
needsHuman.message ||
|
|
49
|
+
"Visible Chrome is open. Solve the verification challenge, then rerun the same search.",
|
|
50
|
+
),
|
|
51
|
+
);
|
|
52
|
+
lines.push(`Engines: ${engines}`);
|
|
53
|
+
lines.push("");
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// If we have a synthesis answer, render it
|
|
57
|
+
if (synthesis?.answer) {
|
|
58
|
+
if (research?.mode === "iterative") renderResearchHeader(lines, research);
|
|
59
|
+
renderSynthesis(lines, synthesis, dedupedSources || [], 6);
|
|
60
|
+
const synthesizedBy = String(
|
|
61
|
+
synthesis.synthesizedBy || "configured synthesizer",
|
|
62
|
+
);
|
|
63
|
+
lines.push(
|
|
64
|
+
research?.mode === "iterative"
|
|
65
|
+
? "*Research mode: iterative planning, source fetching, citation audit, and bundle output*\n"
|
|
66
|
+
: `*Synthesized by ${synthesizedBy} from multi-engine results and fetched sources*\n`,
|
|
67
|
+
);
|
|
68
|
+
return lines.join("\n").trim();
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Fallback: render individual engine results
|
|
72
|
+
for (const [eng, result] of Object.entries(data)) {
|
|
73
|
+
if (eng.startsWith("_")) continue;
|
|
74
|
+
lines.push(`\n## ${formatEngineName(eng)}`);
|
|
75
|
+
formatEngineResult(result as Record<string, unknown>, lines, 3);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return lines.join("\n").trim();
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function renderResearchHeader(
|
|
82
|
+
lines: string[],
|
|
83
|
+
research: Record<string, unknown>,
|
|
84
|
+
): void {
|
|
85
|
+
const floor = research.floor as Record<string, unknown> | undefined;
|
|
86
|
+
const metrics = floor?.metrics as Record<string, unknown> | undefined;
|
|
87
|
+
const bundle = research.bundle as Record<string, unknown> | undefined;
|
|
88
|
+
const manifest = research.manifest as Record<string, unknown> | undefined;
|
|
89
|
+
lines.push("## Research Run");
|
|
90
|
+
lines.push(
|
|
91
|
+
`- Status: ${floor?.floorMet ? "floor met" : "partial / floor unmet"}`,
|
|
92
|
+
);
|
|
93
|
+
if (manifest?.terminationReason)
|
|
94
|
+
lines.push(`- Stop reason: ${String(manifest.terminationReason)}`);
|
|
95
|
+
if (metrics) {
|
|
96
|
+
lines.push(
|
|
97
|
+
`- Evidence: ${metrics.fetchedOk || 0} fetched sources, ${metrics.primarySources || 0} primary/official, ${metrics.claims || 0} claims, ${metrics.cited || 0} citations`,
|
|
98
|
+
);
|
|
99
|
+
lines.push(
|
|
100
|
+
`- Questions: ${metrics.closedQuestions || 0}/${metrics.totalQuestions || 0} closed${metrics.openQuestions ? `, ${metrics.openQuestions} open` : ""}`,
|
|
101
|
+
);
|
|
102
|
+
}
|
|
103
|
+
if (bundle?.dir) lines.push(`- Bundle: ${String(bundle.dir)}`);
|
|
104
|
+
lines.push("");
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Format single engine result
|
|
109
|
+
*/
|
|
110
|
+
function formatSingleEngineResult(
|
|
111
|
+
data: Record<string, unknown>,
|
|
112
|
+
lines: string[],
|
|
113
|
+
): string {
|
|
114
|
+
const needsHuman = data._needsHumanVerification as
|
|
115
|
+
| Record<string, unknown>
|
|
116
|
+
| undefined;
|
|
117
|
+
if (needsHuman) {
|
|
118
|
+
const engines = Array.isArray(needsHuman.engines)
|
|
119
|
+
? needsHuman.engines.join(", ")
|
|
120
|
+
: "this engine";
|
|
121
|
+
lines.push("## Manual verification required");
|
|
122
|
+
lines.push(
|
|
123
|
+
String(
|
|
124
|
+
needsHuman.message ||
|
|
125
|
+
"Visible Chrome is open. Solve the verification challenge, then rerun the same search.",
|
|
126
|
+
),
|
|
127
|
+
);
|
|
128
|
+
lines.push(`Engines: ${engines}`);
|
|
129
|
+
lines.push("");
|
|
130
|
+
}
|
|
131
|
+
formatEngineResult(data, lines, 5);
|
|
132
|
+
return lines.join("\n").trim();
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Format a single engine's result (answer + sources)
|
|
137
|
+
*/
|
|
138
|
+
function formatEngineResult(
|
|
139
|
+
data: Record<string, unknown>,
|
|
140
|
+
lines: string[],
|
|
141
|
+
maxSources: number,
|
|
142
|
+
): void {
|
|
143
|
+
if (data.error) {
|
|
144
|
+
lines.push(`Error: ${data.error}`);
|
|
145
|
+
return;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
if (data.answer) {
|
|
149
|
+
lines.push(String(data.answer));
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
const sources = data.sources as Array<Record<string, string>> | undefined;
|
|
153
|
+
if (Array.isArray(sources) && sources.length > 0) {
|
|
154
|
+
lines.push("\nSources:");
|
|
155
|
+
for (const s of sources.slice(0, maxSources)) {
|
|
156
|
+
lines.push(`- [${s.title || s.url}](${s.url})`);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Format deep research results with confidence metrics
|
|
163
|
+
*/
|
|
@@ -116,13 +116,35 @@ function getProcessCommandLine(pid) {
|
|
|
116
116
|
* @param {number} debugPort - expected debug port
|
|
117
117
|
* @returns {boolean}
|
|
118
118
|
*/
|
|
119
|
-
export function
|
|
120
|
-
|
|
119
|
+
export function commandLineMatchesGreedyChrome(
|
|
120
|
+
cmdLine,
|
|
121
|
+
tempDir,
|
|
122
|
+
debugPort = GREEDY_PORT,
|
|
123
|
+
) {
|
|
121
124
|
if (!cmdLine) return false;
|
|
125
|
+
// Windows may report Chrome command lines with backslashes while the shared
|
|
126
|
+
// GREEDY_PROFILE_DIR constant is normalized to forward slashes. Compare a
|
|
127
|
+
// normalized form so child processes do not misclassify a live GreedySearch
|
|
128
|
+
// Chrome as a ghost and kill it during cleanupStaleSessions().
|
|
129
|
+
const normalize = (value) =>
|
|
130
|
+
String(value || "")
|
|
131
|
+
.replaceAll("\\", "/")
|
|
132
|
+
.toLowerCase();
|
|
133
|
+
const normalizedCmdLine = normalize(cmdLine);
|
|
134
|
+
const normalizedTempDir = normalize(tempDir);
|
|
135
|
+
|
|
122
136
|
return (
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
!
|
|
137
|
+
normalizedCmdLine.includes(normalizedTempDir) &&
|
|
138
|
+
normalizedCmdLine.includes(`--remote-debugging-port=${debugPort}`) &&
|
|
139
|
+
!normalizedCmdLine.includes("--type=")
|
|
140
|
+
);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
export function verifyBrowserProcess(pid, tempDir, debugPort = GREEDY_PORT) {
|
|
144
|
+
return commandLineMatchesGreedyChrome(
|
|
145
|
+
getProcessCommandLine(pid),
|
|
146
|
+
tempDir,
|
|
147
|
+
debugPort,
|
|
126
148
|
);
|
|
127
149
|
}
|
|
128
150
|
|