npm - @apmantza/greedysearch-pi - Versions diffs - 1.3.0 → 1.4.0 - Mend

@apmantza/greedysearch-pi 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md CHANGED Viewed

@@ -4,12 +4,13 @@ Pi extension that adds a `greedy_search` tool — fans out queries to Perplexity
 Forked from [GreedySearch-claude](https://github.com/apmantza/GreedySearch-claude).
-## What's New (v1.2.0)
+## What's New (v1.4.0)
-- **Fixed parallel search race condition** — multiple `greedy_search` calls can now run concurrently without tab conflicts
-- **Improved Bing Copilot verification** — better auto-handling of Turnstile challenges and modal dialogs
-- **Added test suite** — run `./test.sh` to verify all modes work correctly
-- **Atomic port file writes** — prevents corruption when multiple processes connect to Chrome
+- **Grounded synthesis** — Gemini now receives a normalized source registry with stable source IDs, agreement summaries, caveats, and cited claims
+- **Real deep research** — top sources are fetched before synthesis so deep research answers are grounded in fetched evidence, not just engine summaries
+- **Richer source metadata** — source output now includes canonical URLs, domains, source types, per-engine attribution, and confidence metadata
+- **Cleaner tab lifecycle** — temporary Perplexity, Bing, and Google tabs are closed after each fan-out search, and synthesis finishes on the Gemini tab
+- **Isolated Chrome targeting** — GreedySearch now refuses to fall back to your normal Chrome session, preventing stray remote-debugging prompts
 ## Install
@@ -69,7 +70,15 @@ For complex research questions, use `synthesize: true` with `engine: "all"`:
 greedy_search({ query: "best auth patterns for SaaS in 2026", engine: "all", synthesize: true })
 ```
-This deduplicates sources across engines and feeds them to Gemini for one clean, synthesized answer. Adds ~30s but produces the highest quality output with deduped sources showing consensus scores (`[2/3]`, `[3/3]`).
+This deduplicates sources across engines, builds a normalized source registry, and feeds that context to Gemini for one clean synthesized answer. Adds ~30s but now returns agreement summaries, caveats, key claims, and better-labeled top sources.
+For the most grounded mode, use deep research from the CLI:
+```bash
+node search.mjs all "best auth patterns for SaaS in 2026" --deep-research
+```
+Deep research fetches top source pages before synthesis and reports source confidence metadata such as agreement level, fetched-source success rate, and source mix.
 **Use synthesis when:**
 - You need one definitive answer, not multiple perspectives
@@ -112,7 +121,7 @@ greedy_search({ query: "Error: Cannot find module 'react-dom/client' Next.js 15"
 ## Requirements
-- **Chrome** — must be installed. The extension auto-launches a dedicated Chrome instance on port 9222 (separate from your main browser session).
+- **Chrome** — must be installed. The extension auto-launches a dedicated Chrome instance on port 9222 with its own isolated profile and DevTools port file, separate from your main browser session.
 - **Node.js 22+** — for built-in `fetch` and WebSocket support.
 ## Setup (first time)

package/cdp.mjs CHANGED Viewed

@@ -37,21 +37,22 @@ function getDevToolsActivePortPath() {
   return join(homedir(), '.config', 'google-chrome', 'DevToolsActivePort');
 }
-function getWsUrl() {
-  // If CDP_PROFILE_DIR is set (by search.mjs), prefer that profile's port file
-  // so GreedySearch targets its own Chrome, not the user's main session.
-  const profileDir = process.env.CDP_PROFILE_DIR;
-  if (profileDir) {
-    const p = profileDir.replace(/\\/g, '/') + '/DevToolsActivePort';
-    if (existsSync(p)) {
-      const lines = readFileSync(p, 'utf8').trim().split('\n');
-      return `ws://127.0.0.1:${lines[0]}${lines[1]}`;
-    }
-  }
-  const portFile = getDevToolsActivePortPath();
-  const lines = readFileSync(portFile, 'utf8').trim().split('\n');
-  return `ws://127.0.0.1:${lines[0]}${lines[1]}`;
-}
+function getWsUrl() {
+  // If CDP_PROFILE_DIR is set (by search.mjs), prefer that profile's port file
+  // so GreedySearch targets its own Chrome, not the user's main session.
+  const profileDir = process.env.CDP_PROFILE_DIR;
+  if (profileDir) {
+    const p = profileDir.replace(/\\/g, '/') + '/DevToolsActivePort';
+    if (existsSync(p)) {
+      const lines = readFileSync(p, 'utf8').trim().split('\n');
+      return `ws://127.0.0.1:${lines[0]}${lines[1]}`;
+    }
+    throw new Error(`GreedySearch DevToolsActivePort not found at ${p}. Refusing to fall back to the main Chrome session.`);
+  }
+  const portFile = getDevToolsActivePortPath();
+  const lines = readFileSync(portFile, 'utf8').trim().split('\n');
+  return `ws://127.0.0.1:${lines[0]}${lines[1]}`;
+}
 const sleep = (ms) => new Promise(r => setTimeout(r, ms));

package/index.ts CHANGED Viewed

@@ -68,31 +68,164 @@ function runSearch(
 	});
 }
+function formatEngineName(engine: string): string {
+	if (engine === "bing") return "Bing Copilot";
+	if (engine === "google") return "Google AI";
+	return engine.charAt(0).toUpperCase() + engine.slice(1);
+}
+function humanizeSourceType(sourceType: string): string {
+	if (!sourceType) return "";
+	if (sourceType === "official-docs") return "official docs";
+	return sourceType.replace(/-/g, " ");
+}
+function sourceUrl(source: Record<string, unknown>): string {
+	return String(source.displayUrl || source.canonicalUrl || source.url || "");
+}
+function sourceLabel(source: Record<string, unknown>): string {
+	return String(source.title || source.domain || sourceUrl(source) || "Untitled source");
+}
+function sourceConsensus(source: Record<string, unknown>): number {
+	if (typeof source.engineCount === "number") return source.engineCount;
+	const engines = Array.isArray(source.engines) ? (source.engines as string[]) : [];
+	return engines.length;
+}
+function formatAgreementLevel(level: string): string {
+	if (!level) return "Mixed";
+	return level.charAt(0).toUpperCase() + level.slice(1);
+}
+function getSourceMap(sources: Array<Record<string, unknown>>): Map<string, Record<string, unknown>> {
+	return new Map(
+		sources
+			.map((source) => [String(source.id || ""), source] as const)
+			.filter(([id]) => id),
+	);
+}
+function formatSourceLine(source: Record<string, unknown>): string {
+	const id = String(source.id || "?");
+	const url = sourceUrl(source);
+	const title = sourceLabel(source);
+	const domain = String(source.domain || "");
+	const engines = Array.isArray(source.engines) ? (source.engines as string[]) : [];
+	const consensus = sourceConsensus(source);
+	const typeLabel = humanizeSourceType(String(source.sourceType || ""));
+	const fetch = source.fetch as Record<string, unknown> | undefined;
+	const fetchStatus = fetch?.ok ? `fetched ${fetch.status || 200}` : fetch?.attempted ? "fetch failed" : "";
+	const pieces = [
+		`${id} - [${title}](${url})`,
+		domain,
+		typeLabel,
+		engines.length ? `cited by ${engines.map(formatEngineName).join(", ")} (${consensus}/3)` : `${consensus}/3`,
+		fetchStatus,
+	].filter(Boolean);
+	return `- ${pieces.join(" - ")}`;
+}
+function renderSourceEvidence(lines: string[], source: Record<string, unknown>): void {
+	const fetch = source.fetch as Record<string, unknown> | undefined;
+	if (!fetch?.attempted) return;
+	const snippet = String(fetch.snippet || "").trim();
+	const lastModified = String(fetch.lastModified || "").trim();
+	if (snippet) lines.push(`  Evidence: ${snippet}`);
+	if (lastModified) lines.push(`  Last-Modified: ${lastModified}`);
+	if (fetch.error) lines.push(`  Fetch error: ${String(fetch.error)}`);
+}
+function pickSources(
+	sources: Array<Record<string, unknown>>,
+	recommendedIds: string[] = [],
+	max = 6,
+): Array<Record<string, unknown>> {
+	if (!sources.length) return [];
+	const sourceMap = getSourceMap(sources);
+	const recommended = recommendedIds
+		.map((id) => sourceMap.get(id))
+		.filter((source): source is Record<string, unknown> => Boolean(source));
+	if (recommended.length > 0) return recommended.slice(0, max);
+	return sources.slice(0, max);
+}
+function renderSynthesis(
+	lines: string[],
+	synthesis: Record<string, unknown>,
+	sources: Array<Record<string, unknown>>,
+	maxSources = 6,
+): void {
+	if (synthesis.answer) {
+		lines.push("## Answer");
+		lines.push(String(synthesis.answer));
+		lines.push("");
+	}
+	const agreement = synthesis.agreement as Record<string, unknown> | undefined;
+	const agreementSummary = String(agreement?.summary || "").trim();
+	const agreementLevel = String(agreement?.level || "").trim();
+	if (agreementSummary || agreementLevel) {
+		lines.push("## Consensus");
+		lines.push(`- ${formatAgreementLevel(agreementLevel)}${agreementSummary ? ` - ${agreementSummary}` : ""}`);
+		lines.push("");
+	}
+	const differences = Array.isArray(synthesis.differences) ? (synthesis.differences as string[]) : [];
+	if (differences.length > 0) {
+		lines.push("## Where Engines Differ");
+		for (const difference of differences) lines.push(`- ${difference}`);
+		lines.push("");
+	}
+	const caveats = Array.isArray(synthesis.caveats) ? (synthesis.caveats as string[]) : [];
+	if (caveats.length > 0) {
+		lines.push("## Caveats");
+		for (const caveat of caveats) lines.push(`- ${caveat}`);
+		lines.push("");
+	}
+	const claims = Array.isArray(synthesis.claims)
+		? (synthesis.claims as Array<Record<string, unknown>>)
+		: [];
+	if (claims.length > 0) {
+		lines.push("## Key Claims");
+		for (const claim of claims) {
+			const sourceIds = Array.isArray(claim.sourceIds) ? (claim.sourceIds as string[]) : [];
+			const support = String(claim.support || "moderate");
+			lines.push(`- ${String(claim.claim || "")} [${support}${sourceIds.length ? `; ${sourceIds.join(", ")}` : ""}]`);
+		}
+		lines.push("");
+	}
+	const recommendedIds = Array.isArray(synthesis.recommendedSources)
+		? (synthesis.recommendedSources as string[])
+		: [];
+	const topSources = pickSources(sources, recommendedIds, maxSources);
+	if (topSources.length > 0) {
+		lines.push("## Top Sources");
+		for (const source of topSources) lines.push(formatSourceLine(source));
+		lines.push("");
+	}
+}
 function formatResults(engine: string, data: Record<string, unknown>): string {
 	const lines: string[] = [];
 	if (engine === "all") {
-		// Synthesized output: prefer _synthesis + _sources
 		const synthesis = data._synthesis as Record<string, unknown> | undefined;
 		const dedupedSources = data._sources as Array<Record<string, unknown>> | undefined;
 		if (synthesis?.answer) {
-			lines.push("## Synthesis");
-			lines.push(String(synthesis.answer));
-			if (dedupedSources?.length) {
-				lines.push("\n**Top sources by consensus:**");
-				for (const s of dedupedSources.slice(0, 6)) {
-					const engines = (s.engines as string[]) || [];
-					lines.push(`- [${s.title || s.url}](${s.url}) [${engines.length}/3]`);
-				}
-			}
-			lines.push("\n---\n*Synthesized from Perplexity, Bing Copilot, and Google AI*");
+			renderSynthesis(lines, synthesis, dedupedSources || [], 6);
+			lines.push("*Synthesized from Perplexity, Bing Copilot, and Google AI*\n");
 			return lines.join("\n").trim();
 		}
-		// Standard output: per-engine answers
 		for (const [eng, result] of Object.entries(data)) {
 			if (eng.startsWith("_")) continue;
-			lines.push(`\n## ${eng.charAt(0).toUpperCase() + eng.slice(1)}`);
+			lines.push(`\n## ${formatEngineName(eng)}`);
 			const r = result as Record<string, unknown>;
 			if (r.error) {
 				lines.push(`Error: ${r.error}`);
@@ -128,33 +261,42 @@ function formatResults(engine: string, data: Record<string, unknown>): string {
 function formatDeepResearch(data: Record<string, unknown>): string {
 	const lines: string[] = [];
 	const confidence = data._confidence as Record<string, unknown> | undefined;
-	const fetchedSources = data._fetchedSources as Array<Record<string, unknown>> | undefined;
 	const dedupedSources = data._sources as Array<Record<string, unknown>> | undefined;
+	const synthesis = data._synthesis as Record<string, unknown> | undefined;
 	lines.push("# Deep Research Report\n");
-	// Confidence summary
 	if (confidence) {
 		const enginesResponded = (confidence.enginesResponded as string[]) || [];
 		const enginesFailed = (confidence.enginesFailed as string[]) || [];
-		const consensusScore = confidence.consensusScore || 0;
+		const agreementLevel = String(confidence.agreementLevel || "mixed");
+		const firstPartySourceCount = Number(confidence.firstPartySourceCount || 0);
+		const sourceTypeBreakdown = confidence.sourceTypeBreakdown as Record<string, number> | undefined;
 		lines.push("## Confidence\n");
-		lines.push(`- **Engines responded:** ${enginesResponded.join(", ") || "none"}`);
+		lines.push(`- Agreement: ${formatAgreementLevel(agreementLevel)}`);
+		lines.push(`- Engines responded: ${enginesResponded.map(formatEngineName).join(", ") || "none"}`);
 		if (enginesFailed.length > 0) {
-			lines.push(`- **Engines failed:** ${enginesFailed.join(", ")}`);
+			lines.push(`- Engines failed: ${enginesFailed.map(formatEngineName).join(", ")}`);
+		}
+		lines.push(`- Top source consensus: ${confidence.topSourceConsensus || 0}/3 engines`);
+		lines.push(`- Total unique sources: ${confidence.sourcesCount || 0}`);
+		lines.push(`- Official sources: ${confidence.officialSourceCount || 0}`);
+		lines.push(`- First-party sources: ${firstPartySourceCount}`);
+		lines.push(`- Fetch success rate: ${confidence.fetchedSourceSuccessRate || 0}`);
+		if (sourceTypeBreakdown && Object.keys(sourceTypeBreakdown).length > 0) {
+			lines.push(`- Source mix: ${Object.entries(sourceTypeBreakdown).map(([type, count]) => `${humanizeSourceType(type)} ${count}`).join(", ")}`);
 		}
-		lines.push(`- **Top source consensus:** ${consensusScore}/3 engines`);
-		lines.push(`- **Total unique sources:** ${confidence.sourcesCount || 0}`);
 		lines.push("");
 	}
-	// Per-engine answers
-	lines.push("## Findings\n");
+	if (synthesis?.answer) renderSynthesis(lines, synthesis, dedupedSources || [], 8);
+	lines.push("## Engine Perspectives\n");
 	for (const engine of ["perplexity", "bing", "google"]) {
 		const r = data[engine] as Record<string, unknown> | undefined;
 		if (!r) continue;
-		lines.push(`### ${engine.charAt(0).toUpperCase() + engine.slice(1)}`);
+		lines.push(`### ${formatEngineName(engine)}`);
 		if (r.error) {
 			lines.push(`⚠️ Error: ${r.error}`);
 		} else if (r.answer) {
@@ -163,41 +305,15 @@ function formatDeepResearch(data: Record<string, unknown>): string {
 		lines.push("");
 	}
-	// Synthesis
-	const synthesis = data._synthesis as Record<string, unknown> | undefined;
-	if (synthesis?.answer) {
-		lines.push("## Synthesized Answer\n");
-		lines.push(String(synthesis.answer));
-		lines.push("");
-	}
-	// Deduplicated sources by consensus
 	if (dedupedSources && dedupedSources.length > 0) {
-		lines.push("## Sources (Ranked by Consensus)\n");
-		for (const s of dedupedSources) {
-			const engines = (s.engines as string[]) || [];
-			const consensus = engines.length;
-			lines.push(`- **[${consensus}/3]** [${s.title || "Untitled"}](${s.url})`);
+		lines.push("## Source Registry\n");
+		for (const source of dedupedSources) {
+			lines.push(formatSourceLine(source));
+			renderSourceEvidence(lines, source);
 		}
 		lines.push("");
 	}
-	// Fetched source content
-	if (fetchedSources && fetchedSources.length > 0) {
-		lines.push("## Source Content (Top Matches)\n");
-		for (const fs of fetchedSources) {
-			lines.push(`### ${fs.title || fs.url}`);
-			lines.push(`*Source: ${fs.url}*`);
-			lines.push("");
-			if (fs.content) {
-				lines.push(String(fs.content).slice(0, 3000));
-			} else if (fs.error) {
-				lines.push(`⚠️ Could not fetch: ${fs.error}`);
-			}
-			lines.push("\n---\n");
-		}
-	}
 	return lines.join("\n").trim();
 }

package/launch.mjs CHANGED Viewed

@@ -5,9 +5,8 @@
 // the "Allow remote debugging?" dialog entirely. It runs on port 9222 so it doesn't
 // conflict with your main Chrome session (which may use port 9223).
 //
-// On launch, it overwrites the DevToolsActivePort file that cdp.mjs reads so all
-// extractors automatically target the GreedySearch Chrome, with no code changes.
-// The original file is restored on --kill.
+// search.mjs passes CDP_PROFILE_DIR so cdp.mjs targets this dedicated Chrome
+// without ever touching the user's main Chrome DevToolsActivePort file.
 //
 // Usage:
 //   node launch.mjs          — launch (or report if already running)
@@ -15,8 +14,8 @@
 //   node launch.mjs --status — check if running
 import { spawn } from 'child_process';
-import { existsSync, writeFileSync, readFileSync, copyFileSync, mkdirSync, unlinkSync } from 'fs';
-import { tmpdir, homedir, platform } from 'os';
+import { existsSync, writeFileSync, readFileSync, mkdirSync, unlinkSync } from 'fs';
+import { tmpdir, platform } from 'os';
 import { join } from 'path';
 import http from 'http';
@@ -43,18 +42,8 @@ function findChrome() {
   return candidates.find(existsSync) || null;
 }
-function systemPortPath() {
-  const os = platform();
-  if (os === 'win32') return join(homedir(), 'AppData', 'Local', 'Google', 'Chrome', 'User Data', 'DevToolsActivePort');
-  if (os === 'darwin') return join(homedir(), 'Library', 'Application Support', 'Google', 'Chrome', 'DevToolsActivePort');
-  return join(homedir(), '.config', 'google-chrome', 'DevToolsActivePort');
-}
-const SYSTEM_PORT   = systemPortPath();
-const SYSTEM_BACKUP = SYSTEM_PORT + '.bak';
-const CHROME_FLAGS = [
-  `--remote-debugging-port=${PORT}`,
+const CHROME_FLAGS = [
+  `--remote-debugging-port=${PORT}`,
   '--disable-features=DevToolsPrivacyUI',      // suppresses "Allow remote debugging?" dialog
   '--no-first-run',
   '--no-default-browser-check',
@@ -108,52 +97,21 @@ async function writePortFile(timeoutMs = 15000) {
   return false;
 }
-function redirectCdpToGreedySearch() {
-  // Back up system DevToolsActivePort (user's main Chrome)
-  if (existsSync(SYSTEM_PORT) && !existsSync(SYSTEM_BACKUP)) {
-    copyFileSync(SYSTEM_PORT, SYSTEM_BACKUP);
-  }
-  // Point cdp.mjs to our dedicated Chrome's port
-  // On Windows, main Chrome may hold a lock on SYSTEM_PORT (EBUSY).
-  // Fall back to writeFileSync which uses CreateFile/WriteFile instead of CopyFile.
-  try {
-    copyFileSync(ACTIVE_PORT, SYSTEM_PORT);
-  } catch (e) {
-    if (e.code !== 'EBUSY') throw e;
-    try {
-      writeFileSync(SYSTEM_PORT, readFileSync(ACTIVE_PORT, 'utf8'), 'utf8');
-    } catch {
-      console.warn('Warning: could not redirect DevToolsActivePort (file busy) — cdp.mjs will use existing port.');
-    }
-  }
-}
-function restoreCdpToMainChrome() {
-  if (existsSync(SYSTEM_BACKUP)) {
-    copyFileSync(SYSTEM_BACKUP, SYSTEM_PORT);
-    console.log('Restored DevToolsActivePort to main Chrome.');
-  } else if (existsSync(SYSTEM_PORT)) {
-    // No backup means main Chrome wasn't using CDP — remove our file
-    try { unlinkSync(SYSTEM_PORT); } catch {}
-  }
-}
-// ---------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
 async function main() {
   const arg = process.argv[2];
-  if (arg === '--kill') {
-    const pid = isRunning();
-    if (pid) {
-      try { process.kill(pid, 'SIGTERM'); console.log(`Stopped Chrome (pid ${pid}).`); }
-      catch (e) { console.error(`Failed: ${e.message}`); }
-    } else {
-      console.log('GreedySearch Chrome is not running.');
-    }
-    restoreCdpToMainChrome();
-    return;
-  }
+  if (arg === '--kill') {
+    const pid = isRunning();
+    if (pid) {
+      try { process.kill(pid, 'SIGTERM'); console.log(`Stopped Chrome (pid ${pid}).`); }
+      catch (e) { console.error(`Failed: ${e.message}`); }
+    } else {
+      console.log('GreedySearch Chrome is not running.');
+    }
+    return;
+  }
   if (arg === '--status') {
     const pid = isRunning();
@@ -165,13 +123,12 @@ async function main() {
   // Already running?
   const existing = isRunning();
   if (existing) {
-    const ready = await writePortFile(5000);
-    if (ready) {
-      console.log(`GreedySearch Chrome already running (pid ${existing}, port ${PORT}).`);
-      redirectCdpToGreedySearch();
-      console.log('DevToolsActivePort redirected.');
-      return;
-    }
+    const ready = await writePortFile(5000);
+    if (ready) {
+      console.log(`GreedySearch Chrome already running (pid ${existing}, port ${PORT}).`);
+      console.log('Dedicated GreedySearch DevToolsActivePort is ready.');
+      return;
+    }
     // Stale PID — process alive but not Chrome on port 9223. Fall through to fresh launch.
     console.log(`Stale PID ${existing} detected (not Chrome on port ${PORT}) — launching fresh.`);
     try { unlinkSync(PID_FILE); } catch {}
@@ -195,16 +152,15 @@ async function main() {
   proc.unref();
   writeFileSync(PID_FILE, String(proc.pid));
-  // Wait for Chrome HTTP endpoint, build DevToolsActivePort file, redirect cdp.mjs
-  const portFileReady = await writePortFile();
-  if (!portFileReady) {
-    console.error('Chrome did not become ready within 15s.');
-    process.exit(1);
-  }
-  redirectCdpToGreedySearch();
-  console.log(`Ready. No more "Allow remote debugging?" dialogs.`);
-  console.log(`Run "node launch.mjs --kill" when done to restore your main Chrome's CDP.`);
-}
+  // Wait for Chrome HTTP endpoint and build the dedicated DevToolsActivePort file
+  const portFileReady = await writePortFile();
+  if (!portFileReady) {
+    console.error('Chrome did not become ready within 15s.');
+    process.exit(1);
+  }
+  console.log(`Ready. No more "Allow remote debugging?" dialogs.`);
+  console.log('GreedySearch now uses its own isolated DevToolsActivePort file.');
+}
 main();

package/newfeaturesideas.md ADDED Viewed

@@ -0,0 +1,105 @@
+# New Feature Ideas
+Ideas for future features — thinking from the perspective of an AI assistant using these tools.
+---
+## 1. Source Verification
+**Problem:** I get sources but can't verify if they're live, updated, or actually support the claimed content.
+```
+verify_sources({ urls: ["https://...", "https://..."] })
+→ [{ url, status: 200, title, snippet, lastModified, claim: "supports X" }]
+```
+**Use cases:**
+- Before citing a source, verify it's not 404
+- Check if a page actually contains the claimed information
+- Get last-modified dates to assess freshness
+---
+## 2. Incremental / Continuation Research
+**Problem:** After deep_research on "RAG vs fine-tuning", going deeper on just RAG means re-running everything with a new query and losing original context.
+```
+deep_research({ query: "RAG vs fine-tuning", ... })  // initial
+continue_research({ previousId: "...", query: "production RAG architectures" })  // goes deeper on RAG
+```
+**Use cases:**
+- Drill into a specific aspect after initial broad research
+- Build on previous results without re-fetching everything
+- Progressive disclosure of complex topics
+---
+## 3. Multi-Query Synthesis
+**Problem:** One query isn't enough for complex research. I chain multiple greedy_search calls manually.
+```
+multi_research({
+  queries: ["auth best practices", "NextAuth vs Clerk vs Lucia", "Next.js auth security"],
+  synthesize: true
+})
+```
+**Use cases:**
+- "Best auth for Next.js" needs multiple angles
+- Research with different facets (comparison, security, performance)
+- Casting a wider net when single query returns narrow results
+---
+## 4. Structured Extraction
+**Problem:** When researching "which libraries are maintained", I want tables (name, stars, last commit, license), not prose.
+```
+extract_structured({
+  query: "Python HTTP client libraries 2026",
+  schema: { name: "string", stars: "number", lastUpdated: "date", async: "boolean" }
+})
+```
+**Use cases:**
+- Library comparisons as structured data
+- Dependency audits
+- Feature matrices for tools/frameworks
+---
+## 5. Confidence Scoring on Specific Claims
+**Problem:** I say "high confidence" but it's hand-wavy. What if I could ask: "how confident are we that library X is actively maintained?"
+```
+verify_claim({
+  claim: "Prisma is actively maintained",
+  evidence: ["last commit: 2 weeks ago", "open issues: 45", "npm downloads: 2M/week"]
+})
+→ { confidence: 0.95, reasoning: "..." }
+```
+---
+## 6. Research Cache / History
+**Problem:** I do expensive deep_research, then the user asks a follow-up. I have to re-run everything.
+```
+get_research(id: "...")  // retrieve previous results
+list_research({ query: "RAG" })  // find related previous research
+```
+---
+## Priority
+1. **Source verification** — high value, relatively simple, fixes trust gap
+2. **Multi-query synthesis** — high value, complex but powerful
+3. **Incremental research** — medium value, nice UX improvement
+4. **Structured extraction** — medium value, specialized use cases

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@apmantza/greedysearch-pi",
-  "version": "1.3.0",
+  "version": "1.4.0",
   "description": "Pi extension: browser-automation tool that searches Perplexity, Bing Copilot, and Google AI in parallel, extracts answers and sources via CDP, with optional Gemini synthesis — grounded AI answers from real browser interactions.",
   "type": "module",
   "keywords": [

package/search.mjs CHANGED Viewed

@@ -54,12 +54,400 @@ const ENGINE_DOMAINS = {
   gemini:     'gemini.google.com',
 };
-function getTabFromCache(engine) {
+const TRACKING_PARAMS = [
+  'fbclid',
+  'gclid',
+  'ref',
+  'ref_src',
+  'ref_url',
+  'source',
+  'utm_campaign',
+  'utm_content',
+  'utm_medium',
+  'utm_source',
+  'utm_term',
+];
+const COMMUNITY_HOSTS = [
+  'dev.to',
+  'hashnode.com',
+  'medium.com',
+  'reddit.com',
+  'stackoverflow.com',
+  'stackexchange.com',
+  'substack.com',
+];
+const NEWS_HOSTS = [
+  'arstechnica.com',
+  'techcrunch.com',
+  'theverge.com',
+  'venturebeat.com',
+  'wired.com',
+  'zdnet.com',
+];
+function trimText(text = '', maxChars = 240) {
+  const clean = String(text).replace(/\s+/g, ' ').trim();
+  if (clean.length <= maxChars) return clean;
+  return clean.slice(0, maxChars).replace(/\s+\S*$/, '') + '...';
+}
+function normalizeSourceTitle(title = '') {
+  const clean = trimText(title, 180);
+  if (!clean) return '';
+  if (/^https?:\/\//i.test(clean)) return '';
+  const wordCount = clean.split(/\s+/).filter(Boolean).length;
+  const hasUppercase = /[A-Z]/.test(clean);
+  const hasDigit = /\d/.test(clean);
+  const looksLikeFragment = clean === clean.toLowerCase() && wordCount <= 4 && !hasUppercase && !hasDigit;
+  return looksLikeFragment ? '' : clean;
+}
+function pickPreferredTitle(currentTitle = '', nextTitle = '') {
+  const current = normalizeSourceTitle(currentTitle);
+  const next = normalizeSourceTitle(nextTitle);
+  if (!next) return current;
+  if (!current) return next;
+  const currentLooksLikeUrl = /^https?:\/\//i.test(current);
+  const nextLooksLikeUrl = /^https?:\/\//i.test(next);
+  if (currentLooksLikeUrl && !nextLooksLikeUrl) return next;
+  if (!currentLooksLikeUrl && nextLooksLikeUrl) return current;
+  return next.length > current.length ? next : current;
+}
+function normalizeUrl(rawUrl) {
+  if (!rawUrl) return null;
+  try {
+    const url = new URL(rawUrl);
+    if (!['http:', 'https:'].includes(url.protocol)) return null;
+    url.hash = '';
+    url.hostname = url.hostname.toLowerCase();
+    if ((url.protocol === 'https:' && url.port === '443') || (url.protocol === 'http:' && url.port === '80')) {
+      url.port = '';
+    }
+    for (const key of [...url.searchParams.keys()]) {
+      const lower = key.toLowerCase();
+      if (TRACKING_PARAMS.includes(lower) || lower.startsWith('utm_')) {
+        url.searchParams.delete(key);
+      }
+    }
+    url.searchParams.sort();
+    const normalizedPath = url.pathname.replace(/\/+$/, '') || '/';
+    url.pathname = normalizedPath;
+    const normalized = url.toString();
+    return normalizedPath === '/' ? normalized.replace(/\/$/, '') : normalized;
+  } catch {
+    return null;
+  }
+}
+function getDomain(rawUrl) {
+  try {
+    const domain = new URL(rawUrl).hostname.toLowerCase();
+    return domain.replace(/^www\./, '');
+  } catch {
+    return '';
+  }
+}
+function matchesDomain(domain, hosts) {
+  return hosts.some(host => domain === host || domain.endsWith(`.${host}`));
+}
+function classifySourceType(domain, title = '', rawUrl = '') {
+  const lowerTitle = title.toLowerCase();
+  const lowerUrl = rawUrl.toLowerCase();
+  if (domain === 'github.com' || domain === 'gitlab.com') return 'repo';
+  if (matchesDomain(domain, COMMUNITY_HOSTS)) return 'community';
+  if (matchesDomain(domain, NEWS_HOSTS)) return 'news';
+  if (
+    domain.startsWith('docs.') ||
+    domain.startsWith('developer.') ||
+    domain.startsWith('developers.') ||
+    domain.startsWith('api.') ||
+    lowerTitle.includes('documentation') ||
+    lowerTitle.includes('docs') ||
+    lowerTitle.includes('reference') ||
+    lowerUrl.includes('/docs/') ||
+    lowerUrl.includes('/reference/') ||
+    lowerUrl.includes('/api/')
+  ) {
+    return 'official-docs';
+  }
+  if (domain.startsWith('blog.') || lowerUrl.includes('/blog/')) return 'maintainer-blog';
+  return 'website';
+}
+function sourceTypePriority(sourceType) {
+  switch (sourceType) {
+    case 'official-docs': return 5;
+    case 'repo': return 4;
+    case 'maintainer-blog': return 3;
+    case 'website': return 2;
+    case 'community': return 1;
+    case 'news': return 0;
+    default: return 0;
+  }
+}
+function bestRank(source) {
+  const ranks = Object.values(source.perEngine || {}).map(v => v?.rank || 99);
+  return ranks.length ? Math.min(...ranks) : 99;
+}
+function buildSourceRegistry(out) {
+  const seen = new Map();
+  const engineOrder = ['perplexity', 'bing', 'google'];
+  for (const engine of engineOrder) {
+    const result = out[engine];
+    if (!result?.sources) continue;
+    for (let i = 0; i < result.sources.length; i++) {
+      const source = result.sources[i];
+      const canonicalUrl = normalizeUrl(source.url);
+      if (!canonicalUrl || canonicalUrl.length < 10) continue;
+      const title = normalizeSourceTitle(source.title || '');
+      const domain = getDomain(canonicalUrl);
+      const sourceType = classifySourceType(domain, title, canonicalUrl);
+      const existing = seen.get(canonicalUrl) || {
+        id: '',
+        canonicalUrl,
+        displayUrl: source.url || canonicalUrl,
+        domain,
+        title: '',
+        engines: [],
+        engineCount: 0,
+        perEngine: {},
+        sourceType,
+        isOfficial: sourceType === 'official-docs',
+      };
+      existing.title = pickPreferredTitle(existing.title, title);
+      existing.displayUrl = existing.displayUrl || source.url || canonicalUrl;
+      existing.sourceType = existing.sourceType || sourceType;
+      existing.isOfficial = existing.isOfficial || sourceType === 'official-docs';
+      if (!existing.engines.includes(engine)) {
+        existing.engines.push(engine);
+      }
+      existing.perEngine[engine] = {
+        rank: i + 1,
+        title: pickPreferredTitle(existing.perEngine[engine]?.title || '', title),
+      };
+      seen.set(canonicalUrl, existing);
+    }
+  }
+  const sources = Array.from(seen.values())
+    .map(source => ({
+      ...source,
+      engineCount: source.engines.length,
+    }))
+    .sort((a, b) => {
+      if (b.engineCount !== a.engineCount) return b.engineCount - a.engineCount;
+      if (sourceTypePriority(b.sourceType) !== sourceTypePriority(a.sourceType)) {
+        return sourceTypePriority(b.sourceType) - sourceTypePriority(a.sourceType);
+      }
+      if (bestRank(a) !== bestRank(b)) return bestRank(a) - bestRank(b);
+      return a.domain.localeCompare(b.domain);
+    })
+    .slice(0, 12)
+    .map((source, index) => ({
+      ...source,
+      id: `S${index + 1}`,
+      title: source.title || source.domain || source.canonicalUrl,
+    }));
+  return sources;
+}
+function mergeFetchDataIntoSources(sources, fetchedSources) {
+  const byId = new Map(fetchedSources.map(source => [source.id, source]));
+  return sources.map(source => {
+    const fetched = byId.get(source.id);
+    if (!fetched) return source;
+    const title = pickPreferredTitle(source.title, fetched.title || '');
+    return {
+      ...source,
+      title: title || source.title,
+      fetch: {
+        attempted: true,
+        ok: !fetched.error,
+        status: fetched.status || null,
+        finalUrl: fetched.finalUrl || fetched.url || source.canonicalUrl,
+        contentType: fetched.contentType || '',
+        lastModified: fetched.lastModified || '',
+        title: fetched.title || '',
+        snippet: fetched.snippet || '',
+        contentChars: fetched.contentChars || 0,
+        error: fetched.error || '',
+      },
+    };
+  });
+}
+function parseStructuredJson(text) {
+  if (!text) return null;
+  const trimmed = String(text).trim();
+  const candidates = [
+    trimmed,
+    trimmed.replace(/^```json\s*/i, '').replace(/^```\s*/i, '').replace(/```$/i, '').trim(),
+  ];
+  const objectMatch = trimmed.match(/\{[\s\S]*\}/);
+  if (objectMatch) candidates.push(objectMatch[0]);
+  for (const candidate of candidates) {
+    try {
+      return JSON.parse(candidate);
+    } catch {
+      // try next candidate
+    }
+  }
+  return null;
+}
+function normalizeSynthesisPayload(payload, sources, fallbackAnswer = '') {
+  const sourceIds = new Set(sources.map(source => source.id));
+  const agreementLevel = ['high', 'medium', 'low', 'mixed', 'conflicting'].includes(payload?.agreement?.level)
+    ? payload.agreement.level
+    : 'mixed';
+  const claims = Array.isArray(payload?.claims)
+    ? payload.claims.map(claim => ({
+        claim: trimText(claim?.claim || '', 260),
+        support: ['strong', 'moderate', 'weak', 'conflicting'].includes(claim?.support) ? claim.support : 'moderate',
+        sourceIds: Array.isArray(claim?.sourceIds) ? claim.sourceIds.filter(id => sourceIds.has(id)) : [],
+      })).filter(claim => claim.claim)
+    : [];
+  const recommendedSources = Array.isArray(payload?.recommendedSources)
+    ? payload.recommendedSources.filter(id => sourceIds.has(id)).slice(0, 6)
+    : [];
+  return {
+    answer: trimText(payload?.answer || fallbackAnswer, 4000),
+    agreement: {
+      level: agreementLevel,
+      summary: trimText(payload?.agreement?.summary || '', 280),
+    },
+    differences: Array.isArray(payload?.differences)
+      ? payload.differences.map(item => trimText(item, 220)).filter(Boolean).slice(0, 5)
+      : [],
+    caveats: Array.isArray(payload?.caveats)
+      ? payload.caveats.map(item => trimText(item, 220)).filter(Boolean).slice(0, 5)
+      : [],
+    claims,
+    recommendedSources,
+  };
+}
+function buildSynthesisPrompt(query, results, sources, { grounded = false } = {}) {
+  const engineSummaries = {};
+  for (const engine of ['perplexity', 'bing', 'google']) {
+    const result = results[engine];
+    if (!result) continue;
+    if (result.error) {
+      engineSummaries[engine] = { status: 'error', error: String(result.error) };
+      continue;
+    }
+    engineSummaries[engine] = {
+      status: 'ok',
+      answer: trimText(result.answer || '', grounded ? 4500 : 2200),
+      sourceIds: sources
+        .filter(source => source.engines.includes(engine))
+        .sort((a, b) => (a.perEngine[engine]?.rank || 99) - (b.perEngine[engine]?.rank || 99))
+        .map(source => source.id)
+        .slice(0, 6),
+    };
+  }
+  const sourceRegistry = sources.slice(0, grounded ? 10 : 8).map(source => ({
+    id: source.id,
+    title: source.title,
+    domain: source.domain,
+    canonicalUrl: source.canonicalUrl,
+    sourceType: source.sourceType,
+    isOfficial: source.isOfficial,
+    engines: source.engines,
+    engineCount: source.engineCount,
+    perEngine: source.perEngine,
+    fetch: grounded && source.fetch?.attempted ? {
+      ok: source.fetch.ok,
+      status: source.fetch.status,
+      lastModified: source.fetch.lastModified,
+      snippet: trimText(source.fetch.snippet || '', 700),
+    } : undefined,
+  }));
+  return [
+    'You are synthesizing results from Perplexity, Bing Copilot, and Google AI.',
+    grounded
+      ? 'Use the fetched source snippets as the strongest evidence. Use engine answers for perspective and conflict detection.'
+      : 'Use the engine answers for perspective. Use the source registry for provenance and citations.',
+    'Prefer official docs, release notes, repositories, and maintainer-authored sources when available.',
+    'If the engines disagree, say so explicitly.',
+    'Do not invent sources. Only reference source IDs from the source registry.',
+    'Return valid JSON only. No markdown fences, no prose outside the JSON object.',
+    '',
+    'JSON schema:',
+    '{',
+    '  "answer": "short direct answer",',
+    '  "agreement": { "level": "high|medium|low|mixed|conflicting", "summary": "..." },',
+    '  "differences": ["..."],',
+    '  "caveats": ["..."],',
+    '  "claims": [',
+    '    { "claim": "...", "support": "strong|moderate|weak|conflicting", "sourceIds": ["S1"] }',
+    '  ],',
+    '  "recommendedSources": ["S1", "S2"]',
+    '}',
+    '',
+    `User query: ${query}`,
+    '',
+    `Engine results:\n${JSON.stringify(engineSummaries, null, 2)}`,
+    '',
+    `Source registry:\n${JSON.stringify(sourceRegistry, null, 2)}`,
+  ].join('\n');
+}
+function buildConfidence(out) {
+  const sources = Array.isArray(out._sources) ? out._sources : [];
+  const topConsensus = sources.length > 0 ? sources[0]?.engineCount || 0 : 0;
+  const officialSourceCount = sources.filter(source => source.isOfficial).length;
+  const firstPartySourceCount = sources.filter(source => source.isOfficial || source.sourceType === 'maintainer-blog').length;
+  const fetchedAttempted = sources.filter(source => source.fetch?.attempted).length;
+  const fetchedSucceeded = sources.filter(source => source.fetch?.ok).length;
+  const sourceTypeBreakdown = sources.reduce((acc, source) => {
+    acc[source.sourceType] = (acc[source.sourceType] || 0) + 1;
+    return acc;
+  }, {});
+  const synthesisLevel = out._synthesis?.agreement?.level;
+  return {
+    sourcesCount: sources.length,
+    topSourceConsensus: topConsensus,
+    agreementLevel: synthesisLevel || (topConsensus >= 3 ? 'high' : topConsensus >= 2 ? 'medium' : 'low'),
+    enginesResponded: ALL_ENGINES.filter(engine => out[engine]?.answer && !out[engine]?.error),
+    enginesFailed: ALL_ENGINES.filter(engine => out[engine]?.error),
+    officialSourceCount,
+    firstPartySourceCount,
+    fetchedSourceSuccessRate: fetchedAttempted > 0 ? Number((fetchedSucceeded / fetchedAttempted).toFixed(2)) : 0,
+    sourceTypeBreakdown,
+  };
+}
+function getFullTabFromCache(engine) {
   try {
     if (!existsSync(PAGES_CACHE)) return null;
     const pages = JSON.parse(readFileSync(PAGES_CACHE, 'utf8'));
     const found = pages.find(p => p.url.includes(ENGINE_DOMAINS[engine]));
-    return found ? found.targetId.slice(0, 8) : null;
+    return found ? found.targetId : null;
   } catch { return null; }
 }
@@ -108,6 +496,31 @@ async function openNewTab() {
   return targetId;
 }
+async function getOrOpenEngineTab(engine) {
+  await cdp(['list']);
+  return getFullTabFromCache(engine) || openNewTab();
+}
+async function activateTab(targetId) {
+  try {
+    const anchor = await getAnyTab();
+    await cdp(['evalraw', anchor, 'Target.activateTarget', JSON.stringify({ targetId })]);
+  } catch {
+    // best-effort
+  }
+}
+async function closeTabs(targetIds = []) {
+  for (const targetId of targetIds) {
+    if (!targetId) continue;
+    await closeTab(targetId);
+  }
+  if (targetIds.length > 0) {
+    await new Promise(r => setTimeout(r, 300));
+    await cdp(['list']).catch(() => null);
+  }
+}
 async function closeTab(targetId) {
   try {
     const anchor = await getAnyTab();
@@ -200,10 +613,22 @@ async function fetchSourceContent(url, maxChars = 5000) {
     // Extract title
     const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
     const title = titleMatch ? titleMatch[1].trim() : '';
+    const finalUrl = res.url || url;
+    const snippet = trimText(content, 320);
-    return { url, title, content };
+    return {
+      url,
+      finalUrl,
+      status: res.status,
+      contentType: res.headers.get('content-type') || '',
+      lastModified: res.headers.get('last-modified') || '',
+      title,
+      snippet,
+      content,
+      contentChars: content.length,
+    };
   } catch (e) {
-    return { url, title: '', content: null, error: e.message };
+    return { url, title: '', content: null, snippet: '', contentChars: 0, error: e.message };
   }
 }
@@ -216,16 +641,17 @@ async function fetchMultipleSources(sources, maxSources = 5, maxChars = 5000) {
   for (let i = 0; i < toFetch.length; i++) {
     const s = toFetch[i];
-    process.stderr.write(`[greedysearch] Fetching ${i + 1}/${toFetch.length}: ${s.url.slice(0, 60)}...\n`);
+    process.stderr.write(`[greedysearch] Fetching ${i + 1}/${toFetch.length}: ${(s.canonicalUrl || s.url).slice(0, 60)}...\n`);
     try {
-      const result = await fetchSourceContent(s.url, maxChars);
+      const result = await fetchSourceContent(s.canonicalUrl || s.url, maxChars);
+      fetched.push({ id: s.id, ...result });
       if (result.content && result.content.length > 100) {
-        fetched.push(result);
         process.stderr.write(`[greedysearch] ✓ Got ${result.content.length} chars\n`);
       } else {
         process.stderr.write(`[greedysearch] ✗ Empty or too short\n`);
       }
     } catch (e) {
+      fetched.push({ id: s.id, url: s.canonicalUrl || s.url, error: e.message });
       process.stderr.write(`[greedysearch] ✗ Failed: ${e.message.slice(0, 80)}\n`);
     }
     process.stderr.write(`PROGRESS:fetch:${i + 1}/${toFetch.length}\n`);
@@ -235,6 +661,7 @@ async function fetchMultipleSources(sources, maxSources = 5, maxChars = 5000) {
 }
 function pickTopSource(out) {
+  if (Array.isArray(out._sources) && out._sources.length > 0) return out._sources[0];
   for (const engine of ['perplexity', 'google', 'bing']) {
     const r = out[engine];
     if (r?.sources?.length > 0) return r.sources[0];
@@ -242,59 +669,13 @@ function pickTopSource(out) {
   return null;
 }
-function deduplicateSources(out) {
-  const seen = new Map(); // url -> { title, engines }
-  const engineOrder = ['perplexity', 'bing', 'google'];
-  for (const engine of engineOrder) {
-    const r = out[engine];
-    if (!r?.sources) continue;
-    for (const s of r.sources) {
-      const url = s.url?.split('#')[0]?.replace(/\/$/, '');
-      if (!url || url.length < 10) continue;
-      if (!seen.has(url)) {
-        seen.set(url, { url: s.url, title: s.title || '', engines: [engine] });
-      } else {
-        const existing = seen.get(url);
-        if (!existing.engines.includes(engine)) {
-          existing.engines.push(engine);
-        }
-        if (!existing.title && s.title) existing.title = s.title;
-      }
-    }
-  }
-  // Sort by consensus (most engines = highest confidence)
-  return Array.from(seen.values())
-    .sort((a, b) => b.engines.length - a.engines.length)
-    .slice(0, 10);
-}
+async function synthesizeWithGemini(query, results, { grounded = false, tabPrefix = null } = {}) {
+  const sources = Array.isArray(results._sources) ? results._sources : buildSourceRegistry(results);
+  const prompt = buildSynthesisPrompt(query, results, sources, { grounded });
-async function synthesizeWithGemini(query, results) {
-  // Build a prompt that includes all engine results
-  const sources = deduplicateSources(results);
-  let prompt = `Based on the following search results from multiple AI engines, provide a single, synthesized answer to the user's question. Combine the information, resolve any conflicts, and present the most accurate and complete answer.\n\n`;
-  prompt += `User's question: "${query}"\n\n`;
-  for (const engine of ['perplexity', 'bing', 'google']) {
-    const r = results[engine];
-    if (r?.error) {
-      prompt += `## ${engine} (failed)\nError: ${r.error}\n\n`;
-    } else if (r?.answer) {
-      prompt += `## ${engine}\n${r.answer}\n\n`;
-    }
-  }
-  prompt += `Provide a synthesized answer that:\n`;
-  prompt += `1. Combines the best information from all sources\n`;
-  prompt += `2. Notes where sources agree or disagree\n`;
-  prompt += `3. Is clear and well-structured\n`;
-  prompt += `4. Includes key sources at the end\n`;
-  // Run the query through Gemini extractor
   return new Promise((resolve, reject) => {
-    const proc = spawn('node', [join(__dir, 'extractors', 'gemini.mjs'), prompt, '--short'], {
+    const extraArgs = tabPrefix ? ['--tab', String(tabPrefix)] : [];
+    const proc = spawn('node', [join(__dir, 'extractors', 'gemini.mjs'), prompt, ...extraArgs], {
       stdio: ['ignore', 'pipe', 'pipe'],
     });
     let out = '';
@@ -309,8 +690,18 @@ async function synthesizeWithGemini(query, results) {
       clearTimeout(t);
       if (code !== 0) reject(new Error(err.trim() || 'gemini extractor failed'));
       else {
-        try { resolve(JSON.parse(out.trim())); }
-        catch { reject(new Error(`bad JSON from gemini: ${out.slice(0, 100)}`)); }
+        try {
+          const raw = JSON.parse(out.trim());
+          const structured = parseStructuredJson(raw.answer || '');
+          resolve({
+            ...normalizeSynthesisPayload(structured, sources, raw.answer || ''),
+            rawAnswer: raw.answer || '',
+            geminiSources: raw.sources || [],
+          });
+        }
+        catch {
+          reject(new Error(`bad JSON from gemini: ${out.slice(0, 100)}`));
+        }
       }
     });
   });
@@ -509,83 +900,79 @@ async function main() {
     }
     // All tabs assigned — run extractors in parallel
-    const results = await Promise.allSettled(
-      ALL_ENGINES.map((e, i) =>
-        runExtractor(ENGINES[e], query, tabs[i], short)
-          .then(r => {
-            process.stderr.write(`PROGRESS:${e}:done\n`);
-            return { engine: e, ...r };
-          })
-          .catch(err => {
-            process.stderr.write(`PROGRESS:${e}:error\n`);
-            throw err;
-          })
-      )
-    );
-    const out = {};
-    for (let i = 0; i < results.length; i++) {
-      const r = results[i];
-      if (r.status === 'fulfilled') {
-        out[r.value.engine] = r.value;
-      } else {
-        out[ALL_ENGINES[i]] = { error: r.reason?.message || 'unknown error' };
+    try {
+      const results = await Promise.allSettled(
+        ALL_ENGINES.map((e, i) =>
+          runExtractor(ENGINES[e], query, tabs[i], short)
+            .then(r => {
+              process.stderr.write(`PROGRESS:${e}:done\n`);
+              return { engine: e, ...r };
+            })
+            .catch(err => {
+              process.stderr.write(`PROGRESS:${e}:error\n`);
+              throw err;
+            })
+        )
+      );
+      const out = {};
+      for (let i = 0; i < results.length; i++) {
+        const r = results[i];
+        if (r.status === 'fulfilled') {
+          out[r.value.engine] = r.value;
+        } else {
+          out[ALL_ENGINES[i]] = { error: r.reason?.message || 'unknown error' };
+        }
       }
-    }
-    // Deduplicate sources across all engines
-    out._sources = deduplicateSources(out);
+      await closeTabs(tabs);
-    // Synthesize with Gemini if requested
-    if (synthesize) {
-      process.stderr.write('PROGRESS:synthesis:start\n');
-      process.stderr.write('[greedysearch] Synthesizing results with Gemini...\n');
-      try {
-        const synthesis = await synthesizeWithGemini(query, out);
-        out._synthesis = {
-          answer: synthesis.answer || '',
-          sources: synthesis.sources || [],
-          synthesized: true,
-        };
-        process.stderr.write('PROGRESS:synthesis:done\n');
-      } catch (e) {
-        process.stderr.write(`[greedysearch] Synthesis failed: ${e.message}\n`);
-        out._synthesis = { error: e.message, synthesized: false };
+      // Build a canonical source registry across all engines
+      out._sources = buildSourceRegistry(out);
+      if (deepResearch) {
+        process.stderr.write('PROGRESS:deep-research:start\n');
+        const fetchedSources = out._sources.length > 0
+          ? await fetchMultipleSources(out._sources, 5, 8000)
+          : [];
+        out._sources = mergeFetchDataIntoSources(out._sources, fetchedSources);
+        out._fetchedSources = fetchedSources;
+        process.stderr.write(out._sources.length > 0 ? 'PROGRESS:deep-research:done\n' : 'PROGRESS:deep-research:no-sources\n');
       }
-    }
-    if (fetchSource) {
-      const top = pickTopSource(out);
-      if (top) out._topSource = await fetchTopSource(top.url);
-    }
+      // Synthesize with Gemini if requested
+      if (synthesize) {
+        process.stderr.write('PROGRESS:synthesis:start\n');
+        process.stderr.write('[greedysearch] Synthesizing results with Gemini...\n');
+        try {
+          const geminiTab = await getOrOpenEngineTab('gemini');
+          await activateTab(geminiTab);
+          const synthesis = await synthesizeWithGemini(query, out, { grounded: deepResearch, tabPrefix: geminiTab });
+          await activateTab(geminiTab);
+          out._synthesis = {
+            ...synthesis,
+            synthesized: true,
+          };
+          process.stderr.write('PROGRESS:synthesis:done\n');
+        } catch (e) {
+          process.stderr.write(`[greedysearch] Synthesis failed: ${e.message}\n`);
+          out._synthesis = { error: e.message, synthesized: false };
+        }
+      }
-    // Deep research mode: fetch top sources and return structured document
-    if (deepResearch) {
-      process.stderr.write('PROGRESS:deep-research:start\n');
-      // Get top sources by consensus
-      const topSources = out._sources || [];
-      if (topSources.length > 0) {
-        // Fetch content from top sources
-        out._fetchedSources = await fetchMultipleSources(topSources, 5, 8000);
-        process.stderr.write('PROGRESS:deep-research:done\n');
-      } else {
-        out._fetchedSources = [];
-        process.stderr.write('PROGRESS:deep-research:no-sources\n');
+      if (fetchSource) {
+        const top = pickTopSource(out);
+        if (top) out._topSource = await fetchTopSource(top.canonicalUrl || top.url);
       }
-      // Build confidence scores
-      out._confidence = {
-        sourcesCount: topSources.length,
-        consensusScore: topSources.length > 0 ? topSources[0]?.engines?.length || 0 : 0,
-        enginesResponded: ALL_ENGINES.filter(e => out[e]?.answer && !out[e]?.error),
-        enginesFailed: ALL_ENGINES.filter(e => out[e]?.error),
-      };
-    }
-    writeOutput(out, outFile, { inline, synthesize, query });
-    return;
+      if (deepResearch) out._confidence = buildConfidence(out);
+      writeOutput(out, outFile, { inline, synthesize, query });
+      return;
+    } finally {
+      await closeTabs(tabs);
+    }
   }
   const script = ENGINES[engine];