npm - @apmantza/greedysearch-pi - Versions diffs - 1.9.2 → 2.1.2 - Mend

@apmantza/greedysearch-pi 1.9.2 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/CHANGELOG.md +132 -2
package/README.md +82 -47
package/bin/cdp.mjs +1153 -1108
package/bin/launch.mjs +9 -0
package/bin/search.mjs +318 -81
package/extractors/bing-copilot.mjs +48 -18
package/extractors/chatgpt.mjs +553 -0
package/extractors/common.mjs +213 -22
package/extractors/consensus.mjs +655 -0
package/extractors/consent.mjs +182 -18
package/extractors/gemini.mjs +350 -217
package/extractors/google-ai.mjs +129 -128
package/extractors/logically.mjs +629 -0
package/extractors/perplexity.mjs +547 -217
package/extractors/selectors.mjs +3 -2
package/extractors/semantic-scholar.mjs +219 -0
package/package.json +8 -4
package/skills/greedy-search/skill.md +20 -12
package/src/fetcher.mjs +23 -1
package/src/formatters/results.ts +185 -128
package/src/search/browser-lifecycle.mjs +27 -5
package/src/search/challenge-detect.mjs +205 -0
package/src/search/chrome.mjs +653 -590
package/src/search/constants.mjs +155 -39
package/src/search/engines.mjs +114 -76
package/src/search/fetch-source.mjs +566 -451
package/src/search/pdf.mjs +68 -0
package/src/search/progress.mjs +145 -0
package/src/search/recovery.mjs +73 -45
package/src/search/research.mjs +1419 -62
package/src/search/scale-aware.mjs +93 -0
package/src/search/simple-research.mjs +520 -0
package/src/search/sources.mjs +52 -22
package/src/search/synthesis-runner.mjs +105 -26
package/src/search/synthesis.mjs +286 -246
package/src/tools/greedy-search-handler.ts +129 -59
package/src/tools/shared.ts +312 -186
package/src/types.ts +110 -104
package/test.mjs +537 -18

package/extractors/bing-copilot.mjs CHANGED Viewed

@@ -41,21 +41,31 @@ const GLOBAL_VAR = "__bingClipboard";
 // Bing Copilot-specific helpers
 // ============================================================================
+async function detectSignInWall(tab) {
+	// Language-agnostic: if the chat input is absent but the page hosts
+	// known OAuth provider endpoints, we're on the Copilot login wall.
+	const code = `(() => {
+		if (document.querySelector('#userInput')) return false;
+		const links = Array.from(document.querySelectorAll('a[href], button'));
+		const hasOAuth = links.some(el => {
+			const h = (el.href || el.getAttribute('formaction') || '').toLowerCase();
+			return h.includes('login.microsoftonline.com')
+				|| h.includes('appleid.apple.com')
+				|| h.includes('accounts.google.com');
+		});
+		return hasOAuth;
+	})()`;
+	const result = await cdp(["eval", tab, code]).catch(() => "false");
+	return result === "true";
+}
 async function extractAnswer(tab, env, query = "") {
-	// In headless mode: snap the accessibility tree before spending ~18s on
-	// clipboard polls. Copilot loads its input fine in headless but renders
-	// responses behind a Cloudflare-protected iframe — detecting that here
-	// fast-fails to the visible retry instead of burning all the poll time.
-	if (process.env.GREEDY_SEARCH_HEADLESS === "1") {
-		const verification = await detectVerificationChallenge(tab, cdp);
-		if (verification) {
-			console.error(
-				"[bing] Verification challenge detected — fast-failing to visible retry",
-			);
-			env.blockedBy = "verification";
-			throw new Error("Verification challenge detected — headless blocked");
-		}
-	}
+	// Note: removed the prior headless fast-fail on Cloudflare detection.
+	// The new CDP-pierce + browser-level-click path in handleVerification
+	// can auto-clear the Turnstile checkbox from a fresh headless session,
+	// so we let the main flow run handleVerification and either click
+	// through or surface needs-human. We keep the env.blockedBy / signal
+	// surface so callers still see why an answer came back empty.
 	// Wait for the assistant copy button to exist. On fresh Copilot
 	// sessions the answer text can render before the button handler is
@@ -181,10 +191,15 @@ async function extractFromAccessibilityTree(tab, query = "") {
 		const snap = await cdp(["snap", tab]).catch(() => "");
 		if (!snap || (await detectVerificationChallenge(tab, cdp))) return "";
-		const articleLines = snap
-			.split("\n")
-			.map((line) => line.match(/^\s*\[article\]\s+(.+)$/i)?.[1])
-			.filter(Boolean);
+		// Linear article extraction — no regex. Avoids the ReDoS-prone
+		// /^\s*\[article\]\s+(.+)$/i pattern (SonarCloud hotspot js:S5852).
+		const articleLines = [];
+		for (const line of snap.split("\n")) {
+			const trimmed = line.trimStart();
+			if (!trimmed.toLowerCase().startsWith("[article]")) continue;
+			const after = trimmed.slice("[article]".length).trimStart();
+			if (after) articleLines.push(after);
+		}
 		if (articleLines.length === 0) return "";
 		const answer = pickAnswerArticle(articleLines, query);
@@ -419,12 +434,27 @@ async function main() {
 			}
 		}
+		// Detect sign-in wall before burning time waiting for an input that
+		// will never appear. Copilot now gates the chat behind Microsoft/Apple/Google
+		// login on fresh sessions.
+		if (await detectSignInWall(tab)) {
+			throw new Error(
+				"Copilot requires sign-in — please sign in with Microsoft, Apple, or Google in the visible browser window. Once signed in, cookies persist for future runs.",
+			);
+		}
 		// Wait for React app to mount input (up to 15s, longer after verification)
 		const inputReady = await waitForSelector(tab, S.input, 15000, 500);
 		env.inputReady = inputReady;
 		await new Promise((r) => setTimeout(r, jitter(300)));
 		if (!inputReady) {
+			// If input still missing, double-check we didn't land on the login wall
+			if (await detectSignInWall(tab)) {
+				throw new Error(
+					"Copilot requires sign-in — please sign in with Microsoft, Apple, or Google in the visible browser window. Once signed in, cookies persist for future runs.",
+				);
+			}
 			throw new Error(
 				"Copilot input not found — verification may have failed or page is in unexpected state",
 			);

package/extractors/chatgpt.mjs ADDED Viewed

@@ -0,0 +1,553 @@
+#!/usr/bin/env node
+// extractors/chatgpt.mjs
+// Navigate chatgpt.com, submit query, wait for answer, extract answer + sources.
+//
+// Usage:
+//   node extractors/chatgpt.mjs "<query>" [--tab <prefix>]
+//
+// Output (stdout): JSON { answer, sources, query, url }
+// Errors go to stderr only — stdout is always clean JSON for piping.
+import {
+	buildEnvelope,
+	cdp,
+	formatAnswer,
+	getOrOpenTab,
+	handleError,
+	injectClipboardInterceptor,
+	jitter,
+	logStage,
+	outputJson,
+	parseArgs,
+	parseSourcesFromMarkdown,
+	parseSourcesFromMarkdownRefStyle,
+	prepareArgs,
+	validateQuery,
+	waitForSelector,
+	waitForStreamComplete,
+} from "./common.mjs";
+import { dismissConsent, handleVerification } from "./consent.mjs";
+const GLOBAL_VAR = "__chatgptClipboard";
+const PROSE_SELECTOR = "div.ProseMirror";
+const SEND_SELECTOR = 'button[data-testid="send-button"]';
+const COPY_SELECTOR = 'button[data-testid="copy-turn-action-button"]';
+// ============================================================================
+// ChatGPT-specific helpers
+// ============================================================================
+async function typeAndSubmit(tab, query) {
+	// Focus the ProseMirror editor
+	await cdp(["click", tab, PROSE_SELECTOR]);
+	await new Promise((r) => setTimeout(r, jitter(200)));
+	// Type via execCommand — this is the only reliable way to insert text into
+	// a ProseMirror editor (ChatGPT's input). CDP's Input.insertText targets
+	// input/textarea elements and doesn't dispatch the synthetic events that
+	// ProseMirror's editor view listens for, causing the send button to stay
+	// disabled in all-mode under CDP contention.
+	const typeResult = await cdp(
+		[
+			"eval",
+			tab,
+			`(() => {
+				const editor = document.querySelector('${PROSE_SELECTOR}');
+				if (!editor) return 'no-editor';
+				editor.focus();
+				const ok = document.execCommand('insertText', false, ${JSON.stringify(query)});
+				return ok ? 'ok' : 'exec-failed';
+			})()`,
+		],
+		5000,
+	);
+	if (typeResult !== "ok") {
+		throw new Error(`ChatGPT type failed: ${typeResult}`);
+	}
+	await new Promise((r) => setTimeout(r, jitter(300)));
+	// Click send button
+	const sendCode = `
+		(() => {
+			const btn = document.querySelector('${SEND_SELECTOR}');
+			if (!btn) return 'no-send';
+			if (btn.disabled) return 'send-disabled';
+			btn.click();
+			return 'ok';
+		})()
+	`;
+	const sendResult = await cdp(["eval", tab, sendCode]);
+	if (sendResult === "no-send")
+		throw new Error("ChatGPT send button not found");
+	if (sendResult === "send-disabled")
+		throw new Error("ChatGPT send button disabled — query was not registered");
+	await new Promise((r) => setTimeout(r, jitter(300)));
+}
+/**
+ * Inline selector for waitForStreamComplete: returns the assistant message
+ * that comes AFTER the last user message, or null if none exists. This
+ * skips chatgpt.com's static pre-rendered greeting card (which is
+ * `data-turn-start-message="true"` and lives on the homepage before any
+ * conversation) so short answers like "Hello! 👋" don't get confused with
+ * the 32-char placeholder.
+ */
+const CHATGPT_RESPONSE_SELECTOR = String.raw`(() => {
+	const all = document.querySelectorAll('[data-message-author-role]');
+	let lastUserIdx = -1;
+	for (let i = 0; i < all.length; i++) {
+		if (all[i].getAttribute('data-message-author-role') === 'user') lastUserIdx = i;
+	}
+	if (lastUserIdx < 0) return null;
+	let bestEl = null;
+	let bestLen = 0;
+	for (let i = lastUserIdx + 1; i < all.length; i++) {
+		if (all[i].getAttribute('data-message-author-role') === 'assistant') {
+			const len = (all[i].innerText || '').length;
+			if (len > bestLen) { bestLen = len; bestEl = all[i]; }
+		}
+	}
+	return bestEl;
+})()`;
+/**
+ * Wait for ChatGPT's response to finish streaming. Delegates to the shared
+ * waitForStreamComplete in common.mjs with a custom selector that skips
+ * the static homepage greeting card.
+ *
+ * Tuning (fixes premature-stability race for complex answers):
+ *   minLength: 1    — kept low so short factual answers (e.g. "2 + 2 = 4.")
+ *                      stabilize correctly. The previous run reported a 10-char
+ *                      answer after 35s of waiting because minLength: 50 was
+ *                      too high for short replies.
+ *   stableRounds: 6  — require 6 rounds (~3.6s) of stable text. Complex
+ *                      answers stream a header/title block ("Next.jsReactNext.js",
+ *                      citation strips, etc.) that often stays at 19-40 chars
+ *                      for ~1.5-2s before the body arrives. The previous
+ *                      stableRounds: 3 (~1.8s) wasn't enough headroom; 6 rounds
+ *                      forces the body content to land before the wait resolves.
+ *                      Short answers like "2+2=4" stay stable at low length
+ *                      and resolve quickly because the entire response
+ *                      actually has finished.
+ */
+async function waitForResponse(tab, timeoutMs = 20000) {
+	return waitForStreamComplete(tab, {
+		timeout: timeoutMs,
+		interval: 600,
+		stableRounds: 6,
+		minLength: 1,
+		selector: CHATGPT_RESPONSE_SELECTOR,
+	});
+}
+/**
+ * Node-side fallback for chatgpt stream completion. Used when the in-browser
+ * poll times out (typically because Chrome throttles background tabs to 1Hz
+ * when 3+ extractors run in parallel in `all` mode). Polls the same
+ * greeting-card-skipping selector via short independent Runtime.evaluate
+ * calls so the WebSocket is free between polls.
+ */
+async function pollForResponseNodeSide(tab, maxMs = 15000) {
+	const deadline = Date.now() + maxMs;
+	let lastLen = 0;
+	let stableRounds = 0;
+	while (Date.now() < deadline) {
+		const result = await cdp(
+			["eval", tab, `${CHATGPT_RESPONSE_SELECTOR}?.innerText?.length ?? 0`],
+			4000,
+		).catch(() => "0");
+		const len = parseInt(result, 10) || 0;
+		if (len >= 1 && len === lastLen) {
+			stableRounds++;
+			if (stableRounds >= 3) return len;
+		} else {
+			lastLen = len;
+			stableRounds = 0;
+		}
+		await new Promise((r) => setTimeout(r, 1200));
+	}
+	return lastLen;
+}
+async function extractAnswerFromDom(tab) {
+	const raw = await cdp([
+		"eval",
+		tab,
+		String.raw`
+		(() => {
+			// Find the assistant message that comes AFTER the last user message,
+			// not the absolute last assistant element. The chatgpt.com homepage
+			// has a static pre-rendered greeting card that renders as a
+			// [data-message-author-role="assistant"] element with
+			// data-turn-start-message="true" — it must be skipped or the
+			// static "Hello! How can I help you today?" placeholder gets
+			// returned as the answer to a query the assistant never answered.
+			const all = Array.from(document.querySelectorAll('[data-message-author-role]'));
+			let lastUserIdx = -1;
+			for (let i = 0; i < all.length; i++) {
+				if (all[i].getAttribute('data-message-author-role') === 'user') {
+					lastUserIdx = i;
+				}
+			}
+			if (lastUserIdx < 0) {
+				// No user message at all — page is still on the homepage.
+				return JSON.stringify({
+					answer: '',
+					sources: [],
+					skipped: 'no-user-message',
+				});
+			}
+			let assistant = null;
+			for (let i = lastUserIdx + 1; i < all.length; i++) {
+				if (all[i].getAttribute('data-message-author-role') === 'assistant') {
+					assistant = all[i];
+				}
+			}
+			if (!assistant) {
+				return JSON.stringify({
+					answer: '',
+					sources: [],
+					skipped: 'no-assistant-response',
+				});
+			}
+			const answer = (assistant.innerText || assistant.textContent || '').trim();
+			const seen = new Set();
+			const sources = [];
+			for (const link of assistant.querySelectorAll('a[href]')) {
+				const url = link.href;
+				if (!url || seen.has(url)) continue;
+				seen.add(url);
+				const title = (link.innerText || link.textContent || '').replace(/\s+/g, ' ').trim();
+				sources.push({ title, url });
+				if (sources.length >= 10) break;
+			}
+			return JSON.stringify({ answer, sources });
+		})()
+	`,
+	]);
+	try {
+		return JSON.parse(raw);
+	} catch {
+		return { answer: "", sources: [], skipped: "parse-error" };
+	}
+}
+async function extractAnswer(tab, env) {
+	// Click the copy button on the assistant's response (after the last
+	// user message). The old `buttons[buttons.length - 1]` picked the
+	// absolute last copy button on the page — which is the USER message's
+	// copy button when the assistant response is still empty (0 chars) and
+	// has no copy button of its own. That copied the user's query into
+	// the clipboard interceptor and returned it as the "answer".
+	//
+	// If the assistant message has no copy button yet (still streaming, or
+	// the React tree hasn't rendered the button after streaming completed),
+	// we deliberately click NOTHING rather than falling back to the last
+	// copy button on the page. An empty clipboard routes us to the DOM
+	// fallback, which correctly targets the assistant message after the
+	// last user message and returns its innerText.
+	await cdp([
+		"eval",
+		tab,
+		`(() => {
+			const all = document.querySelectorAll('[data-message-author-role]');
+			let lastUserIdx = -1;
+			for (let i = 0; i < all.length; i++) {
+				if (all[i].getAttribute('data-message-author-role') === 'user') lastUserIdx = i;
+			}
+			if (lastUserIdx < 0) return 'no-user';
+			let assistantCopy = null;
+			for (let i = lastUserIdx + 1; i < all.length; i++) {
+				if (all[i].getAttribute('data-message-author-role') === 'assistant') {
+					const btn = all[i].querySelector('${COPY_SELECTOR}');
+					if (btn) assistantCopy = btn;
+				}
+			}
+			if (assistantCopy) { assistantCopy.click(); return 'clicked'; }
+			return 'no-assistant-copy';
+		})()`,
+	]);
+	await new Promise((r) => setTimeout(r, 600));
+	let answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
+	env.clipboardEmpty = !answer;
+	// Retry once if clipboard is empty — the assistant message may have
+	// finished streaming and the copy button may have rendered in the
+	// meantime.
+	if (!answer) {
+		console.error("[chatgpt] Clipboard empty, retrying in 2s...");
+		await cdp([
+			"eval",
+			tab,
+			`(() => {
+				const all = document.querySelectorAll('[data-message-author-role]');
+				let lastUserIdx = -1;
+				for (let i = 0; i < all.length; i++) {
+					if (all[i].getAttribute('data-message-author-role') === 'user') lastUserIdx = i;
+				}
+				if (lastUserIdx < 0) return 'no-user';
+				let assistantCopy = null;
+				for (let i = lastUserIdx + 1; i < all.length; i++) {
+					if (all[i].getAttribute('data-message-author-role') === 'assistant') {
+						const btn = all[i].querySelector('${COPY_SELECTOR}');
+						if (btn) assistantCopy = btn;
+					}
+				}
+				if (assistantCopy) { assistantCopy.click(); return 'clicked'; }
+				return 'no-assistant-copy';
+			})()`,
+		]);
+		await new Promise((r) => setTimeout(r, 2000));
+		answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
+		env.clipboardEmpty = !answer;
+	}
+	let domFallback = null;
+	if (!answer) {
+		domFallback = await extractAnswerFromDom(tab);
+		answer = domFallback.answer;
+		env.fallbackUsed = answer ? "dom" : null;
+	}
+	// Reject suspicious DOM-fallback answers: header-only text (e.g. the
+	// "Next.jsReactNext.js" title block ChatGPT renders before the body
+	// streams in) and query-echoed text. These were the failure modes the
+	// earlier stream-wait race was producing — minLength: 1 + stableRounds: 3
+	// resolved too early on the header. The tightened stream-wait covers
+	// the common case; this guard catches the tail where the wait still
+	// resolved prematurely under CDP contention with parallel extractors.
+	//
+	// Heuristic: a real answer is either long (> 50 chars) or matches the
+	// shape of a short factual answer (10-50 chars and contains at least
+	// one punctuation/space-delimited word). The 5-char absolute floor
+	// catches the "Gemini said"/"Next.jsReactNext.js" header stubs that
+	// the old path let through.
+	//
+	// Return an empty result (NOT throw) so the caller's retry loop can
+	// re-wait and try again. The retry path itself is the right place
+	// for backoff, not here.
+	if (answer) {
+		const trimmed = answer.trim();
+		const looksLikeShortAnswer =
+			trimmed.length >= 5 &&
+			trimmed.length <= 50 &&
+			/\s|[.,!?;:]/.test(trimmed);
+		const looksLikeLongAnswer = trimmed.length > 50;
+		if (!looksLikeShortAnswer && !looksLikeLongAnswer) {
+			console.error(
+				`[chatgpt] DOM fallback answer suspiciously short (${trimmed.length} chars: ${JSON.stringify(trimmed.slice(0, 80))}) — returning empty for caller to retry`,
+			);
+			env.fallbackUsed = null;
+			return {
+				answer: "",
+				sources: [],
+				skipped: "header-stub",
+			};
+		}
+	}
+	if (!answer) {
+		return { answer: "", sources: [], skipped: "no-answer" };
+	}
+	// Parse sources from both inline/reference-style markdown links and DOM links
+	// (DOM fallback preserves sources even when native clipboard copy fails).
+	const sourcesInline = parseSourcesFromMarkdown(answer);
+	const sourcesRef = parseSourcesFromMarkdownRefStyle(answer);
+	const sourceMap = new Map();
+	for (const s of [
+		...(domFallback?.sources || []),
+		...sourcesRef,
+		...sourcesInline,
+	]) {
+		if (s?.url && !sourceMap.has(s.url)) sourceMap.set(s.url, s);
+	}
+	const sources = Array.from(sourceMap.values()).slice(0, 10);
+	return { answer: answer.trim(), sources };
+}
+// ============================================================================
+// Main
+// ============================================================================
+const USAGE = 'Usage: node extractors/chatgpt.mjs "<query>" [--tab <prefix>]\n';
+async function main() {
+	const args = await prepareArgs(process.argv.slice(2));
+	validateQuery(args, USAGE);
+	const { query, tabPrefix, short } = parseArgs(args);
+	const startTime = Date.now();
+	const mode =
+		process.env.GREEDY_SEARCH_VISIBLE === "1" ? "visible" : "headless";
+	const env = {
+		engine: "chatgpt",
+		mode,
+		clipboardEmpty: null,
+		fallbackUsed: null,
+		blockedBy: null,
+		verificationResult: null,
+		inputReady: null,
+	};
+	try {
+		if (!tabPrefix) await cdp(["list"]);
+		const tab = await getOrOpenTab(tabPrefix);
+		const currentUrl = await cdp(["eval", tab, "document.location.href"]).catch(
+			() => "",
+		);
+		let onChatGPT = false;
+		try {
+			onChatGPT = new URL(currentUrl).hostname.toLowerCase() === "chatgpt.com";
+		} catch {}
+		if (!onChatGPT) {
+			logStage(env, "nav", startTime);
+			await cdp(["nav", tab, "https://chatgpt.com"], 20000);
+			await new Promise((r) => setTimeout(r, 600));
+		}
+		logStage(env, "consent", startTime);
+		await dismissConsent(tab, cdp);
+		logStage(env, "verification", startTime);
+		const verificationResult = await handleVerification(tab, cdp, 10000);
+		env.verificationResult = verificationResult;
+		if (verificationResult === "needs-human") {
+			env.blockedBy = "cloudflare-closed-shadow-dom";
+			throw new Error(
+				"ChatGPT is showing a Cloudflare Turnstile challenge that auto-clicking could not clear — please solve it in the visible browser window",
+			);
+		}
+		// Verification was auto-cleared (button clicked via CDP pierce).
+		// Wait for the chat UI to render before continuing.
+		if (verificationResult === "clicked") {
+			await new Promise((r) => setTimeout(r, 2500));
+		}
+		logStage(env, "input-wait", startTime);
+		const inputReady = await waitForSelector(tab, PROSE_SELECTOR, 8000, 400);
+		env.inputReady = inputReady;
+		if (!inputReady) {
+			const bodyText = await cdp([
+				"eval",
+				tab,
+				`document.body?.innerText || ''`,
+			]).catch(() => "");
+			if (
+				/sign in|log in|sign up|\u03a3\u03cd\u03bd\u03b4\u03b5\u03c3\u03b7|login/i.test(
+					bodyText,
+				)
+			) {
+				throw new Error(
+					"ChatGPT requires sign-in — please sign in in the visible browser window",
+				);
+			}
+			throw new Error(
+				"ChatGPT input not found — page may be blocked or in unexpected state",
+			);
+		}
+		logStage(env, "clipboard-inject", startTime);
+		await injectClipboardInterceptor(tab, GLOBAL_VAR);
+		logStage(env, "type-and-submit", startTime);
+		await typeAndSubmit(tab, query);
+		logStage(env, "stream-wait", startTime);
+		// waitForStreamComplete handles the in-browser poll in a single
+		// Runtime.evaluate call. If the response is still streaming past
+		// 20s (slow under tab throttling in `all` mode), fall back to
+		// node-side polls that release the WebSocket between each call.
+		// Together they stay well within the engine's 80s outer budget.
+		let asstLen = 0;
+		try {
+			asstLen = await waitForResponse(tab, 20000);
+		} catch (e) {
+			logStage(env, "stream-poll-fallback", startTime);
+			asstLen = await pollForResponseNodeSide(tab, 15000);
+		}
+		env.assistantTextLen = asstLen;
+		if (asstLen < 1) {
+			console.error(
+				"[chatgpt] Warning: assistant response may not have completed",
+			);
+		}
+		logStage(env, "extract", startTime);
+		// Retry extract up to 3 times with 2s delays. After stream-wait
+		// times out in all-mode under CDP contention, the assistant message
+		// may still be rendering. A short retry loop catches the response
+		// once it lands without burning the full 60s engine budget.
+		//
+		// Each retry first re-runs waitForResponse (which the tightened
+		// minLength=50 + stableRounds=5 makes more accurate), so we don't
+		// just blindly re-click the copy button on a still-streaming
+		// assistant message.
+		let extractResult;
+		for (let attempt = 0; attempt < 3; attempt++) {
+			// Re-wait on retries (attempt 0 already waited; attempts 1-2
+			// didn't because we already passed waitForResponse once). Skip
+			// the wait on attempt 0 to avoid a redundant 20s budget burn.
+			if (attempt > 0) {
+				try {
+					await waitForResponse(tab, 10000);
+				} catch {
+					// Best-effort: fall through to extract which itself
+					// returns empty on a still-streaming page.
+				}
+			}
+			extractResult = await extractAnswer(tab, env);
+			if (extractResult.answer) break;
+			if (attempt < 2) {
+				console.error(
+					`[chatgpt] Extract attempt ${attempt + 1} returned empty, retrying in 2s...`,
+				);
+				await new Promise((r) => setTimeout(r, 2000));
+			}
+		}
+		const { answer, sources, skipped } = extractResult;
+		// If the DOM fallback skipped the response (no real assistant
+		// message after the user's query), surface a clear error so the
+		// caller doesn't silently consume the static homepage greeting
+		// card as a real answer. The static card lives on chatgpt.com
+		// before any conversation; without this guard the extractor used
+		// to return "Hello! How can I help you today?" as a successful
+		// response to every query.
+		if (!answer) {
+			env.blockedBy = "no-response";
+			env.skipped = skipped || null;
+			throw new Error(
+				skipped === "no-user-message"
+					? "ChatGPT still on homepage — query was not submitted"
+					: skipped === "no-assistant-response"
+						? "ChatGPT did not return an assistant response after submit"
+						: skipped === "header-stub"
+							? "ChatGPT response appeared to be a header stub after 3 retries — assistant never rendered the body"
+							: "ChatGPT returned no answer — assistant never responded",
+			);
+		}
+		logStage(env, "done", startTime);
+		const finalUrl = await cdp(["eval", tab, "document.location.href"]).catch(
+			() => "https://chatgpt.com",
+		);
+		env.durationMs = Date.now() - startTime;
+		outputJson({
+			query,
+			url: finalUrl,
+			answer: formatAnswer(answer, short),
+			sources,
+			_envelope: buildEnvelope(env),
+		});
+	} catch (e) {
+		env.durationMs = Date.now() - startTime;
+		console.error(
+			`[chatgpt] error during stage '${env.lastStage || "unknown"}': ${e.message}`,
+		);
+		handleError(e, buildEnvelope(env));
+	}
+}
+main();