@apmantza/greedysearch-pi 1.9.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,219 @@
1
+ #!/usr/bin/env node
2
+
3
+ // extractors/semantic-scholar.mjs
4
+ // Search Semantic Scholar without API keys and return paper/PDF sources for
5
+ // GreedySearch's source fetcher and research synthesizer.
6
+
7
+ import {
8
+ buildEnvelope,
9
+ cdp,
10
+ formatAnswer,
11
+ getOrOpenTab,
12
+ handleError,
13
+ logStage,
14
+ outputJson,
15
+ parseArgs,
16
+ prepareArgs,
17
+ validateQuery,
18
+ waitForSelector,
19
+ } from "./common.mjs";
20
+
21
+ const USAGE =
22
+ 'Usage: node extractors/semantic-scholar.mjs "<query>" [--tab <prefix>]\n';
23
+ const RESULT_SELECTOR = ".cl-paper-row[data-paper-id]";
24
+
25
+ function semanticScholarSearchUrl(query) {
26
+ // Semantic Scholar docs note hyphenated terms can reduce matches; use spaces.
27
+ const normalized = String(query || "").replaceAll("-", " ");
28
+ return `https://www.semanticscholar.org/search?q=${encodeURIComponent(normalized)}&sort=relevance`;
29
+ }
30
+
31
+ async function dismissCookieBanner(tab) {
32
+ await cdp([
33
+ "eval",
34
+ tab,
35
+ String.raw`
36
+ (() => {
37
+ const selectors = [
38
+ '.osano-cm-dialog__close',
39
+ '.osano-cm-denyAll',
40
+ '.osano-cm-accept-all',
41
+ 'button[aria-label*="Close" i]',
42
+ ];
43
+ for (const selector of selectors) {
44
+ const btn = document.querySelector(selector);
45
+ if (btn) { btn.click(); return selector; }
46
+ }
47
+ return null;
48
+ })()
49
+ `,
50
+ ]).catch(() => null);
51
+ }
52
+
53
+ async function extractPapers(tab, { limit = 8 } = {}) {
54
+ const raw = await cdp([
55
+ "eval",
56
+ tab,
57
+ String.raw`
58
+ ((limit) => {
59
+ function clean(value) {
60
+ return String(value || '').replace(/\s+/g, ' ').trim();
61
+ }
62
+ function absolutize(href) {
63
+ try { return new URL(href, location.href).href; } catch { return ''; }
64
+ }
65
+ function isDirectPdf(url) {
66
+ return /\.pdf(?:[?#]|$)/i.test(url) || /\/pdf\//i.test(url);
67
+ }
68
+ const rows = Array.from(document.querySelectorAll('.cl-paper-row[data-paper-id]')).slice(0, limit);
69
+ return JSON.stringify(rows.map((row, index) => {
70
+ const titleLink = row.querySelector('a[data-test-id="title-link"][href], a[href*="/paper/"][href]');
71
+ const paperUrl = absolutize(titleLink?.getAttribute('href') || '');
72
+ const title = clean(titleLink?.innerText || row.querySelector('.cl-paper-title')?.innerText || '');
73
+ const authors = Array.from(row.querySelectorAll('[data-test-id="author-list"] a, .cl-paper-authors a'))
74
+ .map((a) => clean(a.innerText))
75
+ .filter(Boolean)
76
+ .slice(0, 8);
77
+ const field = clean(row.querySelector('.cl-paper-fos')?.innerText || '');
78
+ const venue = clean(row.querySelector('[data-test-id="normalized-venue-link"], .cl-paper-venue')?.innerText || '');
79
+ const date = clean(row.querySelector('.cl-paper-pubdates')?.innerText || '');
80
+ const tldrNode = row.querySelector('.tldr-abstract-replacement');
81
+ let tldr = clean(tldrNode?.innerText || '');
82
+ tldr = tldr.replace(/^TLDR\s*/i, '').replace(/\s*Expand$/i, '').trim();
83
+ const citationNode = row.querySelector('[data-test-id="total-citations-stat"]');
84
+ const citationLabel = citationNode?.getAttribute('aria-label') || citationNode?.innerText || '';
85
+ const citationMatch = clean(citationLabel).match(/[\d,]+/);
86
+ const citationCount = citationMatch ? Number.parseInt(citationMatch[0].replace(/,/g, ''), 10) : null;
87
+ const externalLinks = Array.from(row.querySelectorAll('a[data-test-id="paper-link"][href], a.cl-paper-view-paper[href]'))
88
+ .map((a) => ({
89
+ url: absolutize(a.getAttribute('href')),
90
+ label: clean(a.innerText),
91
+ }))
92
+ .filter((link) => link.url);
93
+ const directPdf = externalLinks.find((link) => isDirectPdf(link.url));
94
+ const primaryExternal = directPdf || externalLinks[0] || null;
95
+ const sourceUrl = primaryExternal?.url || paperUrl;
96
+ return {
97
+ rank: index + 1,
98
+ paperId: row.getAttribute('data-paper-id') || '',
99
+ title,
100
+ url: sourceUrl,
101
+ semanticScholarUrl: paperUrl,
102
+ pdfUrl: directPdf?.url || '',
103
+ externalUrl: primaryExternal?.url || '',
104
+ externalLabel: primaryExternal?.label || '',
105
+ authors,
106
+ field,
107
+ venue,
108
+ date,
109
+ tldr,
110
+ citationCount,
111
+ };
112
+ }));
113
+ })(${limit})
114
+ `,
115
+ ]);
116
+ try {
117
+ return JSON.parse(raw);
118
+ } catch {
119
+ return [];
120
+ }
121
+ }
122
+
123
+ function formatPaperSummary(papers) {
124
+ if (!papers.length) return "Semantic Scholar returned no paper results.";
125
+ return papers
126
+ .map((paper) => {
127
+ const parts = [];
128
+ if (paper.authors?.length) parts.push(paper.authors.join(", "));
129
+ if (paper.venue) parts.push(paper.venue);
130
+ if (paper.date) parts.push(paper.date);
131
+ if (Number.isFinite(paper.citationCount)) {
132
+ parts.push(`${paper.citationCount.toLocaleString()} citations`);
133
+ }
134
+ const meta = parts.length ? ` — ${parts.join(" · ")}` : "";
135
+ const tldr = paper.tldr ? `\n TLDR: ${paper.tldr}` : "";
136
+ return `${paper.rank}. ${paper.title}${meta}${tldr}`;
137
+ })
138
+ .join("\n\n");
139
+ }
140
+
141
+ async function main() {
142
+ const args = await prepareArgs(process.argv.slice(2));
143
+ validateQuery(args, USAGE);
144
+ const { query, tabPrefix, short } = parseArgs(args);
145
+ const startTime = Date.now();
146
+ const mode =
147
+ process.env.GREEDY_SEARCH_VISIBLE === "1" ? "visible" : "headless";
148
+ const env = {
149
+ engine: "semantic-scholar",
150
+ mode,
151
+ blockedBy: null,
152
+ verificationResult: null,
153
+ inputReady: null,
154
+ };
155
+
156
+ try {
157
+ if (!tabPrefix) await cdp(["list"]);
158
+ const tab = await getOrOpenTab(tabPrefix);
159
+ logStage(env, "nav", startTime);
160
+ await cdp(["nav", tab, semanticScholarSearchUrl(query)], 25000);
161
+ await new Promise((r) => setTimeout(r, 800));
162
+
163
+ logStage(env, "consent", startTime);
164
+ await dismissCookieBanner(tab);
165
+
166
+ logStage(env, "results-wait", startTime);
167
+ const inputReady = await waitForSelector(tab, RESULT_SELECTOR, 15000, 500);
168
+ env.inputReady = inputReady;
169
+ if (!inputReady) {
170
+ const body = await cdp([
171
+ "eval",
172
+ tab,
173
+ "document.body?.innerText || ''",
174
+ ]).catch(() => "");
175
+ if (/captcha|cloudflare|verify|robot|blocked/i.test(body)) {
176
+ env.blockedBy = "verification";
177
+ env.verificationResult = "needs-human";
178
+ throw new Error(
179
+ "Semantic Scholar verification required — please solve it in the visible browser window",
180
+ );
181
+ }
182
+ throw new Error("Semantic Scholar results not found");
183
+ }
184
+
185
+ logStage(env, "extract", startTime);
186
+ const papers = await extractPapers(tab, { limit: short ? 5 : 8 });
187
+ const sources = papers
188
+ .filter((paper) => paper.title && paper.url)
189
+ .map((paper) => ({
190
+ title: paper.pdfUrl ? `${paper.title} (PDF)` : paper.title,
191
+ url: paper.url,
192
+ semanticScholarUrl: paper.semanticScholarUrl,
193
+ paperId: paper.paperId,
194
+ citationCount: paper.citationCount,
195
+ venue: paper.venue,
196
+ year: paper.date,
197
+ }));
198
+ const answer = formatPaperSummary(papers);
199
+ const durationMs = Date.now() - startTime;
200
+ outputJson({
201
+ answer: formatAnswer(answer, short),
202
+ sources,
203
+ query,
204
+ url: semanticScholarSearchUrl(query),
205
+ papers,
206
+ _envelope: buildEnvelope({ ...env, durationMs }),
207
+ });
208
+ } catch (error) {
209
+ handleError(
210
+ error,
211
+ buildEnvelope({
212
+ ...env,
213
+ durationMs: Date.now() - startTime,
214
+ }),
215
+ );
216
+ }
217
+ }
218
+
219
+ main();
package/index.ts CHANGED
@@ -13,9 +13,10 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
13
13
  import { homedir } from "node:os";
14
14
  import { dirname, join } from "node:path";
15
15
  import { fileURLToPath } from "node:url";
16
- import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
17
16
 
18
17
  import { registerGreedySearchTool } from "./src/tools/greedy-search-handler.js";
18
+
19
+ type ExtensionAPI = any;
19
20
  import { cdpAvailable } from "./src/tools/shared.js";
20
21
 
21
22
  const __dir = dirname(fileURLToPath(import.meta.url));
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@apmantza/greedysearch-pi",
3
- "version": "1.9.1",
4
- "description": "Headless multi-engine AI search (Perplexity, Bing Copilot, Google AI) via browser automation -- NO API KEYS needed. Extracts answers with sources, optional synthesis. Grounded AI answers from real browser interactions.",
3
+ "version": "2.0.0",
4
+ "description": "Headless multi-engine AI search (Perplexity, Google AI, ChatGPT, Gemini) via browser automation. NO API KEYS needed. Grounded all-engine search fetches sources by default, with optional configurable synthesis and deep research.",
5
5
  "type": "module",
6
6
  "keywords": [
7
7
  "pi-package"
@@ -15,11 +15,14 @@
15
15
  "license": "MIT",
16
16
  "scripts": {
17
17
  "test": "node test.mjs",
18
+ "test:unit": "node test.mjs unit",
18
19
  "test:quick": "node test.mjs quick",
19
20
  "test:smoke": "node test.mjs smoke",
20
21
  "test:bash": "./test.sh",
21
22
  "test:bash:quick": "./test.sh quick",
22
- "test:bash:smoke": "./test.sh smoke"
23
+ "test:bash:smoke": "./test.sh smoke",
24
+ "lint": "node scripts/lint.mjs",
25
+ "check:lockfile": "node scripts/check-lockfile.mjs"
23
26
  },
24
27
  "engines": {
25
28
  "node": ">=20.11.0"
@@ -45,12 +48,17 @@
45
48
  },
46
49
  "dependencies": {
47
50
  "@mozilla/readability": "^0.6.0",
51
+ "@sinclair/typebox": "^0.34.48",
48
52
  "jsdom": "^24.0.0",
53
+ "pdf-parse": "^2.4.5",
49
54
  "turndown": "^7.1.2"
50
55
  },
51
56
  "peerDependencies": {
52
- "@earendil-works/pi-coding-agent": "*",
53
- "@earendil-works/pi-tui": "*",
54
- "@sinclair/typebox": "*"
57
+ "@earendil-works/pi-coding-agent": "*"
58
+ },
59
+ "peerDependenciesMeta": {
60
+ "@earendil-works/pi-coding-agent": {
61
+ "optional": true
62
+ }
55
63
  }
56
64
  }
@@ -1,21 +1,18 @@
1
1
  ---
2
2
  name: greedy-search
3
- description: Live web search via Perplexity, Bing, Google AI, and Gemini. Use for current docs, recent errors/framework changes, dependency choices, or stale-knowledge questions. NOT for codebase search.
3
+ description: Web/search plus opt-in research via Perplexity, Google AI, ChatGPT, Gemini, Semantic Scholar, and Logically. Grounded all-engine search fetches sources by default; optional configurable synthesis; deep research as separate workflow. Configurable via ~/.pi/greedyconfig. Bing Copilot available for signed-in users. Current docs, recent changes, dependency choices. NOT codebase search.
4
4
  ---
5
5
 
6
- Use `greedy_search` for live web answers.
6
+ `greedy_search({ query, engine: "all"|"perplexity"|"google"|"chatgpt"|"gemini"|"semantic-scholar"|"logically"|"bing", synthesize?: bool, synthesizer?: "gemini"|"chatgpt", depth?: "research", breadth: 1-5, iterations: 1-3, maxSources: 3-12, researchOutDir?: string, writeResearchBundle?: bool, visible: bool })`
7
7
 
8
- ```js
9
- greedy_search({ query: "React 19 changes", depth: "standard" });
10
- ```
8
+ **Modes:** individual engine search · grounded `engine:"all"` search with fetched sources · optional `synthesize:true` using the configured synthesizer over all-engine results · `depth:"research"` for the iterative deep-research workflow.
11
9
 
12
- **Params:** `query` (required), `engine`: `all`|`perplexity`|`bing`|`google`|`gemini`, `depth`: `fast`|`standard`|`deep`
10
+ **Config:** `~/.pi/greedyconfig` supports `{ "engines": ["perplexity", "google", "chatgpt", "gemini", "semantic-scholar", "logically"], "synthesizer": "gemini" }`. Gemini is a normal search engine; Semantic Scholar and Logically are opt-in research engines. Any configured engine can participate in `engine:"all"`; deep research child searches reuse the same configured `engines` list and stdin-safe query passing. Normal all-search synthesis remains controlled separately by `synthesizer`; research planning/final synthesis uses Gemini.
13
11
 
14
- **Depths:**
15
- - `fast`: ~15-30s, single engine, no synthesis
16
- - `standard`: ~30-90s, all engines + Gemini synthesis + sources
17
- - `deep`: ~60-180s, stronger grounding + confidence metadata
12
+ **Compatibility:** legacy `depth:"fast"|"standard"|"deep"` is still accepted. `fast` skips source fetching; `standard`/`deep` alias `synthesize:true`. Prefer `synthesize:true`, optional `synthesizer`, and `depth:"research"` going forward.
18
13
 
19
- **Blocks:** Headless by default; auto-retries in visible mode. If human verification is needed, visible Chrome stays open tell the user to solve it and rerun.
14
+ **Research output:** `depth:"research"` writes a dataroom-style bundle by default under `.pi/greedysearch-research/<timestamp>_<query>/` with `STATUS.md`, `OUTLINE.md`, `reports/SUMMARY.md`, `reports/CLAIMS.md`, `reports/GAPS.md`, `sources/`, and `data/manifest.json`. Pass `researchOutDir` to choose the directory or `writeResearchBundle:false` to disable disk output.
20
15
 
21
- **CDP safety:** Never call raw `bin/cdp.mjs`. Use `bin/cdp-greedy.mjs`, `bin/cdp-visible.mjs`, or `bin/cdp-headless.mjs`.
16
+ **Auto-recovery:** Headless default. Bing/Perplexity auto-retry visible on CF block. Manual CAPTCHA → visible stays open; solve then rerun.
17
+
18
+ **CDP safety:** Use `bin/cdp-greedy.mjs` only. Never raw `bin/cdp.mjs`.
package/src/fetcher.mjs CHANGED
@@ -63,6 +63,10 @@ const PRIVATE_URL_PATTERNS = [
63
63
  * @param {string} url - URL to check
64
64
  * @returns {{blocked: boolean, reason?: string}}
65
65
  */
66
+ export function defaultFetchHeaders(overrides = {}) {
67
+ return { ...DEFAULT_HEADERS, ...overrides };
68
+ }
69
+
66
70
  export function isPrivateUrl(url) {
67
71
  try {
68
72
  const parsed = new URL(url);
@@ -594,7 +598,10 @@ export function checkContentQuality(extracted) {
594
598
  desc: "access denied in content",
595
599
  },
596
600
  {
597
- check: () => /^\s{0,10}sign\s{1,5}in\s{0,10}$|^\s{0,10}log\s{1,5}in\s{0,10}$/im.test(markdown),
601
+ check: () =>
602
+ /^\s{0,10}sign\s{1,5}in\s{0,10}$|^\s{0,10}log\s{1,5}in\s{0,10}$/im.test(
603
+ markdown,
604
+ ),
598
605
  desc: "login form only",
599
606
  },
600
607
  ];
@@ -1,128 +1,163 @@
1
- /**
2
- * Search results formatters
3
- * Extracted from index.ts
4
- */
5
-
6
- import { formatEngineName } from "../utils/helpers.js";
7
- import { renderSynthesis } from "./synthesis.js";
8
-
9
- /**
10
- * Format search results based on engine type
11
- */
12
- export function formatResults(
13
- engine: string,
14
- data: Record<string, unknown>,
15
- ): string {
16
- const lines: string[] = [];
17
-
18
- if (engine === "all") {
19
- return formatAllEnginesResult(data, lines);
20
- }
21
-
22
- return formatSingleEngineResult(data, lines);
23
- }
24
-
25
- /**
26
- * Format multi-engine results with synthesis
27
- */
28
- function formatAllEnginesResult(
29
- data: Record<string, unknown>,
30
- lines: string[],
31
- ): string {
32
- const synthesis = data._synthesis as Record<string, unknown> | undefined;
33
- const dedupedSources = data._sources as
34
- | Array<Record<string, unknown>>
35
- | undefined;
36
- const needsHuman = data._needsHumanVerification as
37
- | Record<string, unknown>
38
- | undefined;
39
-
40
- if (needsHuman) {
41
- const engines = Array.isArray(needsHuman.engines)
42
- ? needsHuman.engines.join(", ")
43
- : "one or more engines";
44
- lines.push("## Manual verification required");
45
- lines.push(
46
- String(
47
- needsHuman.message ||
48
- "Visible Chrome is open. Solve the verification challenge, then rerun the same search.",
49
- ),
50
- );
51
- lines.push(`Engines: ${engines}`);
52
- lines.push("");
53
- }
54
-
55
- // If we have a synthesis answer, render it
56
- if (synthesis?.answer) {
57
- renderSynthesis(lines, synthesis, dedupedSources || [], 6);
58
- lines.push("*Synthesized from Perplexity, Bing Copilot, and Google AI*\n");
59
- return lines.join("\n").trim();
60
- }
61
-
62
- // Fallback: render individual engine results
63
- for (const [eng, result] of Object.entries(data)) {
64
- if (eng.startsWith("_")) continue;
65
- lines.push(`\n## ${formatEngineName(eng)}`);
66
- formatEngineResult(result as Record<string, unknown>, lines, 3);
67
- }
68
-
69
- return lines.join("\n").trim();
70
- }
71
-
72
- /**
73
- * Format single engine result
74
- */
75
- function formatSingleEngineResult(
76
- data: Record<string, unknown>,
77
- lines: string[],
78
- ): string {
79
- const needsHuman = data._needsHumanVerification as
80
- | Record<string, unknown>
81
- | undefined;
82
- if (needsHuman) {
83
- const engines = Array.isArray(needsHuman.engines)
84
- ? needsHuman.engines.join(", ")
85
- : "this engine";
86
- lines.push("## Manual verification required");
87
- lines.push(
88
- String(
89
- needsHuman.message ||
90
- "Visible Chrome is open. Solve the verification challenge, then rerun the same search.",
91
- ),
92
- );
93
- lines.push(`Engines: ${engines}`);
94
- lines.push("");
95
- }
96
- formatEngineResult(data, lines, 5);
97
- return lines.join("\n").trim();
98
- }
99
-
100
- /**
101
- * Format a single engine's result (answer + sources)
102
- */
103
- function formatEngineResult(
104
- data: Record<string, unknown>,
105
- lines: string[],
106
- maxSources: number,
107
- ): void {
108
- if (data.error) {
109
- lines.push(`Error: ${data.error}`);
110
- return;
111
- }
112
-
113
- if (data.answer) {
114
- lines.push(String(data.answer));
115
- }
116
-
117
- const sources = data.sources as Array<Record<string, string>> | undefined;
118
- if (Array.isArray(sources) && sources.length > 0) {
119
- lines.push("\nSources:");
120
- for (const s of sources.slice(0, maxSources)) {
121
- lines.push(`- [${s.title || s.url}](${s.url})`);
122
- }
123
- }
124
- }
125
-
126
- /**
127
- * Format deep research results with confidence metrics
128
- */
1
+ /**
2
+ * Search results formatters
3
+ * Extracted from index.ts
4
+ */
5
+
6
+ import { formatEngineName } from "../utils/helpers.js";
7
+ import { renderSynthesis } from "./synthesis.js";
8
+
9
+ /**
10
+ * Format search results based on engine type
11
+ */
12
+ export function formatResults(
13
+ engine: string,
14
+ data: Record<string, unknown>,
15
+ ): string {
16
+ const lines: string[] = [];
17
+
18
+ if (engine === "all") {
19
+ return formatAllEnginesResult(data, lines);
20
+ }
21
+
22
+ return formatSingleEngineResult(data, lines);
23
+ }
24
+
25
+ /**
26
+ * Format multi-engine results with synthesis
27
+ */
28
+ function formatAllEnginesResult(
29
+ data: Record<string, unknown>,
30
+ lines: string[],
31
+ ): string {
32
+ const synthesis = data._synthesis as Record<string, unknown> | undefined;
33
+ const dedupedSources = data._sources as
34
+ | Array<Record<string, unknown>>
35
+ | undefined;
36
+ const needsHuman = data._needsHumanVerification as
37
+ | Record<string, unknown>
38
+ | undefined;
39
+ const research = data._research as Record<string, unknown> | undefined;
40
+
41
+ if (needsHuman) {
42
+ const engines = Array.isArray(needsHuman.engines)
43
+ ? needsHuman.engines.join(", ")
44
+ : "one or more engines";
45
+ lines.push("## Manual verification required");
46
+ lines.push(
47
+ String(
48
+ needsHuman.message ||
49
+ "Visible Chrome is open. Solve the verification challenge, then rerun the same search.",
50
+ ),
51
+ );
52
+ lines.push(`Engines: ${engines}`);
53
+ lines.push("");
54
+ }
55
+
56
+ // If we have a synthesis answer, render it
57
+ if (synthesis?.answer) {
58
+ if (research?.mode === "iterative") renderResearchHeader(lines, research);
59
+ renderSynthesis(lines, synthesis, dedupedSources || [], 6);
60
+ const synthesizedBy = String(
61
+ synthesis.synthesizedBy || "configured synthesizer",
62
+ );
63
+ lines.push(
64
+ research?.mode === "iterative"
65
+ ? "*Research mode: iterative planning, source fetching, citation audit, and bundle output*\n"
66
+ : `*Synthesized by ${synthesizedBy} from multi-engine results and fetched sources*\n`,
67
+ );
68
+ return lines.join("\n").trim();
69
+ }
70
+
71
+ // Fallback: render individual engine results
72
+ for (const [eng, result] of Object.entries(data)) {
73
+ if (eng.startsWith("_")) continue;
74
+ lines.push(`\n## ${formatEngineName(eng)}`);
75
+ formatEngineResult(result as Record<string, unknown>, lines, 3);
76
+ }
77
+
78
+ return lines.join("\n").trim();
79
+ }
80
+
81
+ function renderResearchHeader(
82
+ lines: string[],
83
+ research: Record<string, unknown>,
84
+ ): void {
85
+ const floor = research.floor as Record<string, unknown> | undefined;
86
+ const metrics = floor?.metrics as Record<string, unknown> | undefined;
87
+ const bundle = research.bundle as Record<string, unknown> | undefined;
88
+ const manifest = research.manifest as Record<string, unknown> | undefined;
89
+ lines.push("## Research Run");
90
+ lines.push(
91
+ `- Status: ${floor?.floorMet ? "floor met" : "partial / floor unmet"}`,
92
+ );
93
+ if (manifest?.terminationReason)
94
+ lines.push(`- Stop reason: ${String(manifest.terminationReason)}`);
95
+ if (metrics) {
96
+ lines.push(
97
+ `- Evidence: ${metrics.fetchedOk || 0} fetched sources, ${metrics.primarySources || 0} primary/official, ${metrics.claims || 0} claims, ${metrics.cited || 0} citations`,
98
+ );
99
+ lines.push(
100
+ `- Questions: ${metrics.closedQuestions || 0}/${metrics.totalQuestions || 0} closed${metrics.openQuestions ? `, ${metrics.openQuestions} open` : ""}`,
101
+ );
102
+ }
103
+ if (bundle?.dir) lines.push(`- Bundle: ${String(bundle.dir)}`);
104
+ lines.push("");
105
+ }
106
+
107
+ /**
108
+ * Format single engine result
109
+ */
110
+ function formatSingleEngineResult(
111
+ data: Record<string, unknown>,
112
+ lines: string[],
113
+ ): string {
114
+ const needsHuman = data._needsHumanVerification as
115
+ | Record<string, unknown>
116
+ | undefined;
117
+ if (needsHuman) {
118
+ const engines = Array.isArray(needsHuman.engines)
119
+ ? needsHuman.engines.join(", ")
120
+ : "this engine";
121
+ lines.push("## Manual verification required");
122
+ lines.push(
123
+ String(
124
+ needsHuman.message ||
125
+ "Visible Chrome is open. Solve the verification challenge, then rerun the same search.",
126
+ ),
127
+ );
128
+ lines.push(`Engines: ${engines}`);
129
+ lines.push("");
130
+ }
131
+ formatEngineResult(data, lines, 5);
132
+ return lines.join("\n").trim();
133
+ }
134
+
135
+ /**
136
+ * Format a single engine's result (answer + sources)
137
+ */
138
+ function formatEngineResult(
139
+ data: Record<string, unknown>,
140
+ lines: string[],
141
+ maxSources: number,
142
+ ): void {
143
+ if (data.error) {
144
+ lines.push(`Error: ${data.error}`);
145
+ return;
146
+ }
147
+
148
+ if (data.answer) {
149
+ lines.push(String(data.answer));
150
+ }
151
+
152
+ const sources = data.sources as Array<Record<string, string>> | undefined;
153
+ if (Array.isArray(sources) && sources.length > 0) {
154
+ lines.push("\nSources:");
155
+ for (const s of sources.slice(0, maxSources)) {
156
+ lines.push(`- [${s.title || s.url}](${s.url})`);
157
+ }
158
+ }
159
+ }
160
+
161
+ /**
162
+ * Format deep research results with confidence metrics
163
+ */
@@ -116,13 +116,35 @@ function getProcessCommandLine(pid) {
116
116
  * @param {number} debugPort - expected debug port
117
117
  * @returns {boolean}
118
118
  */
119
- export function verifyBrowserProcess(pid, tempDir, debugPort = GREEDY_PORT) {
120
- const cmdLine = getProcessCommandLine(pid);
119
+ export function commandLineMatchesGreedyChrome(
120
+ cmdLine,
121
+ tempDir,
122
+ debugPort = GREEDY_PORT,
123
+ ) {
121
124
  if (!cmdLine) return false;
125
+ // Windows may report Chrome command lines with backslashes while the shared
126
+ // GREEDY_PROFILE_DIR constant is normalized to forward slashes. Compare a
127
+ // normalized form so child processes do not misclassify a live GreedySearch
128
+ // Chrome as a ghost and kill it during cleanupStaleSessions().
129
+ const normalize = (value) =>
130
+ String(value || "")
131
+ .replaceAll("\\", "/")
132
+ .toLowerCase();
133
+ const normalizedCmdLine = normalize(cmdLine);
134
+ const normalizedTempDir = normalize(tempDir);
135
+
122
136
  return (
123
- cmdLine.includes(tempDir) &&
124
- cmdLine.includes(`--remote-debugging-port=${debugPort}`) &&
125
- !cmdLine.includes("--type=")
137
+ normalizedCmdLine.includes(normalizedTempDir) &&
138
+ normalizedCmdLine.includes(`--remote-debugging-port=${debugPort}`) &&
139
+ !normalizedCmdLine.includes("--type=")
140
+ );
141
+ }
142
+
143
+ export function verifyBrowserProcess(pid, tempDir, debugPort = GREEDY_PORT) {
144
+ return commandLineMatchesGreedyChrome(
145
+ getProcessCommandLine(pid),
146
+ tempDir,
147
+ debugPort,
126
148
  );
127
149
  }
128
150