npm - @apmantza/greedysearch-pi - Versions diffs - 2.0.0 → 2.1.3 - Mend

@apmantza/greedysearch-pi 2.0.0 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/CHANGELOG.md +65 -3
package/README.md +2 -2
package/bin/search.mjs +121 -13
package/extractors/bing-copilot.mjs +6 -14
package/extractors/chatgpt.mjs +130 -13
package/extractors/common.mjs +58 -1
package/extractors/consent.mjs +182 -18
package/extractors/gemini.mjs +51 -36
package/extractors/google-ai.mjs +129 -128
package/extractors/logically.mjs +68 -6
package/extractors/perplexity.mjs +547 -217
package/package.json +4 -4
package/skills/greedy-search/skill.md +20 -18
package/src/fetcher.mjs +15 -0
package/src/formatters/results.ts +24 -2
package/src/search/challenge-detect.mjs +205 -0
package/src/search/constants.mjs +5 -0
package/src/search/progress.mjs +145 -0
package/src/search/recovery.mjs +25 -3
package/src/search/research.mjs +366 -7
package/src/search/scale-aware.mjs +93 -0
package/src/search/simple-research.mjs +520 -0
package/src/tools/greedy-search-handler.ts +8 -10
package/src/tools/shared.ts +145 -20
package/test.mjs +160 -12

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,68 @@
 # Changelog
+## [Unreleased]
+## [2.1.3] — 2026-06-21
+### Fixed
+- **Stale dependency resolution** — `jsdom` was installed at `24.1.3` despite `package.json` declaring `^29.1.1` (lockfile freeze). Fresh lockfile resolves all deps to latest within semver ranges. Updated `@sinclair/typebox` `^0.34.48` → `^0.34.49`.
+- **Peer dep pin** — `@earendil-works/pi-coding-agent` peer dep changed from wildcard `*` to `^0.79.0` to pin compatible range against Pi `0.79.9`.
+### Changed
+- **CI security audit** — Added `npm audit --omit=dev --audit-level=high` to GitHub Actions CI (matching pi-lens) to block pushes/PRs that introduce high/critical vulnerabilities in production dependencies.
+## [2.1.2] — 2026-06-18
+### Fixed
+- **TUI crash on multi-engine synthesis run with 5+ engines** (`src/tools/shared.ts`, `src/formatters/results.ts`) — The TUI crashed with `Rendered line N exceeds terminal width (W > W-4)` when the all-mode synthesis run produced a long engine-status line or a wide synthesis answer. Two coordinated fixes: (1) `formatResults` and `formatSingleEngineResult` in `src/formatters/results.ts` now wrap their output in a `_truncateLongLines()` safety net that caps any individual line at 800 chars — the TUI's `Text.render` cannot wrap a single line that has no `\n` break, and a chatgpt synthesis answer can contain a 14k+ char JSON-encoded `rawAnswer` line that crashed the TUI before the formatter could break it. (2) `makeProgressTracker` in `src/tools/shared.ts` now caps the engine-status line at 90 chars (88 + ellipsis) — the previous cap of 110 was insufficient because emoji (`✅`, `❌`, `🔄`, etc.) take 2 visible cols each, so a 110-char status line with 5 engines still produced ~116 visible cols, exceeding the 112-char terminal. Both fixes are safety nets; the underlying `Text.render` wrap and the engine status join are unchanged for normal-width content.
+## [2.1.1] — 2026-06-18
+### Fixed
+- **Stream-stability race causing 19-char header stubs to be returned as answers** (`extractors/chatgpt.mjs`, `extractors/perplexity.mjs`, `extractors/gemini.mjs`, `extractors/google-ai.mjs`) — The `waitForStreamComplete` heuristic resolved too early on ChatGPT/Perplexity when the response stream paused briefly on a header/title block (e.g. "Next.jsReactNext.js", 19 chars) before the body arrived. The DOM fallback then returned that header as the "answer". Three changes fix it: (1) `stableRounds` increased from 3 to 5–6 across all extractors so the stream must hold stable for ~3.6s before resolving; (2) ChatGPT's `waitForResponse` minLength stays at 1 so short factual answers (e.g. "2 + 2 = 4.") still resolve quickly — the protection against the header-stub race comes from the longer stability window, not a higher length floor; (3) `extractAnswer` in chatgpt now rejects suspiciously short answers (< 50 chars without a word boundary / no punctuation) but returns a `skipped: "header-stub"` result instead of throwing, so the main retry loop can re-wait and try again. (4) Perplexity's `extractAnswer` now rejects query-echoed clipboard content (the old `.pop()` copy-button selector could click the question's icon instead of the answer's and copy the query text into the interceptor). End-to-end verified: complex question "What are the key differences between the new React Server Components and traditional SSR in Next.js, and what are the tradeoffs?" now returns a 4500+ char answer in 22s instead of a 19-char stub.
+- **False-positive visible-recovery cascade** (`src/search/recovery.mjs`, `extractors/consent.mjs`, `extractors/perplexity.mjs`) — Three coordinated fixes stop the recovery flow from kicking in on routine DOM-fallback failures and clicking the wrong sign-in button: (1) `HEADLESS_BLOCKED_PATTERN` no longer matches the substring "clipboard" — the pattern was too broad and triggered visible-recovery on every "Clipboard interceptor returned empty text" error, even when the real cause was just a too-strict DOM-fallback length filter. (2) `handleVerification` catch-all button match changed from `t.includes("continue")` to exact `t === "continue"` so the auto-click no longer lands on Perplexity's "Continue with Google/Apple/email" or "Single sign-on" sign-in buttons. (3) Perplexity's `extractAnswerFromDom` now accepts short factual answers (>=5 chars with a word boundary) in addition to long ones — old filter required `text.length > 50` which rejected "2 + 2 = 4." (9 chars). The cascade that was sending users to a sign-in wall is broken.
+## [2.1.0] — 2026-06-18
+### Added
+- **Auto-resume extraction after user solves Cloudflare challenge in visible Chrome** (`src/search/challenge-detect.mjs`, `bin/search.mjs`) — In both single-engine and `engine:"all"` recovery flows, after a visible-retry failure the engine-specific code now polls the page state (title change, ProseMirror render, URL transition, or `cf_clearance` cookie) for up to 5 minutes (configurable via `GREEDY_SEARCH_CHALLENGE_WAIT_MS`). When the user solves the challenge and the page transitions past it, the extractor automatically re-runs on the now-cleared tab — the user no longer needs to manually rerun the command. Falls back to the existing `_needsHumanVerification` envelope only if the polling budget is exhausted.
+- **Auto-click Cloudflare Turnstile via CDP pierce + browser-level click** (`extractors/consent.mjs`, `extractors/perplexity.mjs`, `extractors/bing-copilot.mjs`) — chatgpt.com, perplexity.ai, bing.com and similar sites render the 'Verify you are human' Turnstile widget inside a closed shadow root that no JS DOM query can reach. The new CDP-pierce probe (`DOM.getDocument({pierce:true})`) walks through closed shadow roots, locates the `challenges.cloudflare.com` iframe, queries its screen-space bounding box, and dispatches a browser-level mouse event at the checkbox (25% width, 50% height of the 300x65 widget). The browser-level click routes through Chrome's compositor to the cross-origin OOPIF where session-level dispatch can't reach. Removes the headless fast-fail on Cloudflare detection in perplexity and bing extractors since the new path auto-clears the challenge transparently. Verified end-to-end with fresh profile (no cached `cf_clearance`): perplexity 11.2s, chatgpt 18.1s, bing 7.6s.
+### Changed
+- **`semantic-scholar` no longer in default `engine: "all"` fan-out** (`src/search/constants.mjs`) — Default `DEFAULT_ENGINES` is now `["perplexity", "google", "chatgpt"]`. `semantic-scholar` and `logically` remain registered engines that work when called individually (`engine: "semantic-scholar"`) and when added to `~/.pi/greedyconfig.engines` for opt-in inclusion in the all-search fan-out. They were noisy for casual web search; their academic/research-assistant output shines in `depth: "research"` mode where the iterative planner can interpret paper relevance. Existing `~/.pi/greedyconfig` files are untouched, so users who already opted in keep their setup.
+### Fixed
+- **CDP argv validated before `spawn()` to prevent shell-sandbox escape** (`extractors/common.mjs`) — New `cdpSafeArgv()` validates the CDP subcommand against an allowlist of known commands (mirroring `bin/cdp.mjs`'s documented command set) and rejects any argv entry containing a null byte. Defense-in-depth: the existing spawn call already uses array-form argv (no shell interpolation), but explicit validation guards against future refactors or caller mistakes.
+- **SSRF: post-redirect URL re-validated after `fetch()`** (`src/fetcher.mjs`) — A malicious server could redirect `fetch()` to a private IP, bypassing the initial `isPrivateUrl()` check on the original URL. After the redirect-following fetch completes, `isPrivateUrl()` is now run again on `response.url` (the post-redirect final URL) and the response is rejected if it points at a private/internal address.
+- **6 SonarCloud issues** (`extractors/gemini.mjs`, `extractors/common.mjs`, `src/fetcher.mjs`, `test-suite/*.html`) — Fixed (1) `ready === true` bug in gemini.mjs (the eval-returned boolean was JSON-stringified), (2) `validateArg` rewrapped in arrow function for `.map()` to avoid losing the `arguments` binding, (3) `<label>` elements added to three test-suite HTML pages for WCAG input-label association, (4) CDP argv allowlist, (5) post-redirect SSRF defense.
+- **Cloudflare Turnstile in closed shadow DOM now surfaces as `needs-human`** (`extractors/consent.mjs`, `extractors/chatgpt.mjs`) — ChatGPT and similar sites render the Cloudflare Turnstile widget inside a closed shadow DOM, where the actual `<iframe>` is opaque to main-document DOM queries. Previously, the detection code matched the hidden response `<input id="cf-chl-widget-…_response">` (zero-dimension element), `tryHumanClick` rejected it via the off-screen guard, `handleVerification` silently returned `"clear"`, and the chatgpt extractor wasted a full 8s `waitForSelector` before throwing "ChatGPT input not found" — and worse, never told the user there was a Cloudflare challenge to solve. Three changes fix this: (1) Detection now returns a `cf-closed-shadow-dom` sentinel when only the hidden response input is present (no `#cf-turnstile` host, no visible iframe). (2) `tryHumanClick` returns a tristate (`clicked` / `cant-click` / `no-challenge`) and `handleVerification` returns `"needs-human"` whenever a challenge was detected but couldn't be auto-clicked. (3) The chatgpt extractor captures `handleVerification`'s return value and, on `"needs-human"`, exits immediately with `blockedBy: "cloudflare-closed-shadow-dom"` — so the visible-recovery flow surfaces `_needsHumanVerification` upfront with a clear "please solve in visible Chrome" message instead of wasting 8 seconds per attempt. Both the headless and the recovery-visible attempt now fail fast with the same actionable envelope.
+- **Tool progress bar, ETA, and multi-line layout now forwarded to Pi UI** (`src/tools/shared.ts`, `src/tools/greedy-search-handler.ts`) — `runSearch()` now parses `[greedysearch] [bar] … ETA …` stderr lines and `PROGRESS:research:*` markers from the spawned process, forwarding them as live `onUpdate` callbacks. `makeProgressTracker()` accepts a `query` param and renders multi-line output: **line 1** original query (stays frozen), **line 2** progress bar + ETA (persists across engine updates via `latestBarText` caching), **line 3+** per-engine status + synthesis progress. Before this fix, only `PROGRESS:<engine>:done|error` reached the UI, so research runs appeared frozen with no bar or ETA.
+### Changed
+- **Progress bar + ETA now shown for all tool call types** (`src/tools/shared.ts`, `src/tools/greedy-search-handler.ts`) — The progress tracker was previously only created for `engine: "all"` calls. Now `makeProgressTracker()` is always created regardless of engine type, and `runSearch()` signals completion on exit for single-engine calls. Three additions: (1) `runSearch()` now parses `[engine] stage: … (+Nms)` extractor diagnostic lines for single-engine stage progress. (2) `makeProgressTracker()` adds a bar + ETA line for multi-engine non-research calls (e.g. `[████░░░░░░] 2/5 engines (ETA 1m 30s)`), using elapsed time and engine-completion fraction. (3) The handler passes `[effectiveEngine]` for single-engine calls so stage lines and the final `✅ done` are shown. The bar line appears alongside the query and engine-status lines.
+### Added
+- **Scale-aware research** (`src/search/scale-aware.mjs`, `src/search/simple-research.mjs`, `src/search/research.mjs`) — Research mode now classifies query complexity before entering the iterative loop. When `breadth` and `iterations` are at defaults (not user-specified), `classifyResearchComplexity()` runs a fast Gemini call to categorize the query as simple/moderate/complex. Simple queries ("what is X", narrow factual questions) bypass the iterative loop entirely via `runSimpleResearchMode()` — single all-engine search → fetch top sources → evidence extraction → synthesis — delivering ~70% faster results with lower API cost. Moderate queries get adjusted breadth/iterations from the classifier. Complex queries use the full default loop. User-specified `breadth`/`iterations` always override the classifier. Classification failure falls back to the original defaults gracefully.
+- **Provenance sidecar** (`src/search/research.mjs`) — Research bundles now include a `provenance.md` file alongside `STATUS.md` and `manifest.json`. The sidecar is a human-readable summary recording: date, duration, mode (simple/iterative), rounds, sources consulted/fetched/cited, primary source count, per-cited-source details with URLs and fetch status, URL reachability results, citation audit pass/fail, floor check results, and overall verification status. Written automatically by `writeResearchBundle()` for both iterative and simple research paths.
+- **Citation URL reachability** (`src/search/research.mjs`) — After citation audit, `checkCitationUrls()` performs HEAD requests against cited source URLs (batched, 6s timeout, concurrency 4) to detect dead links. Results are included in the provenance sidecar and the `_citationUrls` return field. Dead URLs are logged to stderr during the run. Non-HTTP URLs and bot-protected endpoints are gracefully skipped. `runCitationUrlCheck()` provides shared orchestration used by both iterative and simple research modes. Uses Mozilla-compat User-Agent to avoid false 403s. Timer cleanup and concurrency guards prevent resource leaks.
 ## [2.0.0] — 2026-06-07
 Major release consolidating ~6 weeks of work since 1.9.2: two new research engines (Semantic Scholar, Logically), deep-research structured output, configurable `all`-mode engines, ChatGPT and Gemini extractor rewrites that cut solo times from 71s → 8s, and full release/CI automation.
@@ -13,7 +76,6 @@ Major release consolidating ~6 weeks of work since 1.9.2: two new research engin
 - **Extension-load check** (`.github/workflows/ci.yml`) — `npx jiti ./index.ts` smoke test on the globally-installed tarball that catches missing dependencies. The `pi-coding-agent` peer-dep absence is expected and ignored.
 - **CONTRIBUTING.md** — new document with the extractor authoring guide (clipboard interception, single-eval stream wait, language-agnostic selectors, registration in two places, headless fast-fail, recovery engine list, docs to update), and recovery-policy notes. Links to AGENTS.md for architecture details.
 ### Added
 - **Semantic Scholar extractor and PDF source fetching** (`extractors/semantic-scholar.mjs`, `src/search/pdf.mjs`, `src/search/fetch-source.mjs`, `src/search/sources.mjs`) — New no-API academic-paper discovery engine registered as `semantic-scholar` / `semanticscholar` / `s2`. It searches `semanticscholar.org`, extracts ranked paper cards, TLDRs, authors, venues, citation counts, Semantic Scholar paper URLs, and direct PDF/external links when available. GreedySearch source fetching now parses direct PDFs with lazy-loaded `pdf-parse` so deep research can feed actual paper text to Gemini instead of relying on the synthesizer to browse links itself. Academic sources are classified and counted as primary research evidence.
@@ -333,8 +395,8 @@ Major release consolidating ~6 weeks of work since 1.9.2: two new research engin
 ### Security
 - **SonarCloud security hotspots fixed** — Two open hotspots resolved:
-  - _Weak cryptography (S2245)_ in `extractors/consent.mjs`: replaced `Math.random()` with `crypto.randomInt()` for the mouse-jitter RNG. Not actually security-sensitive (used only for ±3px jitter and timing delays), but compliant now.
-  - _PATH injection (S4036)_ in `src/search/chrome.mjs`: `spawn("node", ...)` replaced with `spawn(process.execPath, ...)` so the launcher doesn't rely on the `PATH` environment variable.
+  - *Weak cryptography (S2245)* in `extractors/consent.mjs`: replaced `Math.random()` with `crypto.randomInt()` for the mouse-jitter RNG. Not actually security-sensitive (used only for ±3px jitter and timing delays), but compliant now.
+  - *PATH injection (S4036)* in `src/search/chrome.mjs`: `spawn("node", ...)` replaced with `spawn(process.execPath, ...)` so the launcher doesn't rely on the `PATH` environment variable.
 - **Query/prompt leakage prevention** — Queries and synthesis prompts no longer appear in OS process tables. All `spawn()` calls now pipe query/prompt through stdin via `--stdin` flag instead of command-line arguments. Affects `runSearch`, `runExtractor`, `synthesizeWithGemini`, and all 5 extractors (`perplexity`, `bing-copilot`, `google-ai`, `google-search`, `gemini`).
 ### Visual

package/README.md CHANGED Viewed

@@ -94,12 +94,12 @@ Configure all-engine fan-out and synthesis in `~/.pi/greedyconfig`:
 ```json
 {
-  "engines": ["perplexity", "google", "chatgpt", "gemini", "semantic-scholar", "logically"],
+  "engines": ["perplexity", "google", "chatgpt", "gemini"],
   "synthesizer": "gemini"
 }
 ```
-Gemini is a normal search engine and can participate in `engine: "all"`. Semantic Scholar and Logically are opt-in research engines; include them in `~/.pi/greedyconfig` only when you want the all-engine fan-out to include academic paper discovery or research-assistant workflows. Deep research child searches reuse the same configured `engines` list and keep query text on stdin; Gemini remains the research planner/final-report synthesizer. If `synthesize: true` and `"synthesizer": "gemini"`, Gemini runs once as a search engine and again as the synthesizer; set `"synthesizer": "chatgpt"` to separate those roles for normal all-search synthesis.
+Gemini is a normal search engine and can participate in `engine: "all"`. `semantic-scholar` and `logically` are opt-in academic/research engines; include them in `~/.pi/greedyconfig` only when you want the all-engine fan-out to include academic paper discovery or research-assistant workflows. Default `engine: "all"` excludes them because their results are noisy for casual web search — they shine in `depth: "research"` mode instead. Deep research child searches reuse the same configured `engines` list and keep query text on stdin; Gemini remains the research planner/final-report synthesizer. If `synthesize: true` and `"synthesizer": "gemini"`, Gemini runs once as a search engine and again as the synthesizer; set `"synthesizer": "chatgpt"` to separate those roles for normal all-search synthesis.
 Research bundles are written by default to `.pi/greedysearch-research/<timestamp>_<query>/` and include:

package/bin/search.mjs CHANGED Viewed

@@ -44,6 +44,7 @@ import {
 	fetchMultipleSources,
 	fetchTopSource,
 } from "../src/search/fetch-source.mjs";
+import { waitForChallengeCleared } from "../src/search/challenge-detect.mjs";
 import { writeSourcesToFiles } from "../src/search/file-sources.mjs";
 import { writeOutput } from "../src/search/output.mjs";
 import {
@@ -522,18 +523,73 @@ async function main() {
 						for (const blockedEngine of stillBlocked) {
 							process.stderr.write(`PROGRESS:${blockedEngine}:needs-human\n`);
 						}
-						keepVisibleForHuman = true;
-						out._needsHumanVerification = {
-							engines: stillBlocked,
-							message:
-								"Visible Chrome is open with the engine page loaded. Solve the Turnstile checkbox or other challenge in the visible window to store cookies. Cookies persist for future runs.",
-						};
-						process.stderr.write(
-							`[greedysearch] 🔓 ${stillBlocked.join(", ")} still blocked — keeping visible Chrome open. Solve the challenge in the window to store cookies, then rerun.\n`,
+						// Poll for the user to solve any remaining challenges in
+						// visible Chrome. If a per-engine challenge clears, retry
+						// that engine's extractor on the cleared tab. Fall back to
+						// the existing _needsHumanVerification envelope only if the
+						// polling budget is exhausted.
+						const allPollResults = await Promise.all(
+							stillBlocked.map(async (blockedEngine) => {
+								const tab =
+									retryTabs[recoveryCandidates.indexOf(blockedEngine)];
+								const result = await waitForChallengeCleared({
+									tab,
+									engine: blockedEngine,
+								}).catch((pollErr) => ({
+									cleared: false,
+									reason: pollErr.message || String(pollErr),
+								}));
+								return { engine: blockedEngine, tab, ...result };
+							}),
+						);
+						const clearedEngines = allPollResults.filter((p) => p.cleared);
+						if (clearedEngines.length > 0) {
+							process.stderr.write(
+								`[greedysearch] 🔄 Auto-resuming ${clearedEngines.map((p) => p.engine).join(", ")} on cleared tabs...\n`,
+							);
+							await Promise.allSettled(
+								clearedEngines.map(async (p) => {
+									const script = ENGINES[p.engine];
+									try {
+										const result = await runExtractor(
+											script,
+											query,
+											p.tab,
+											short,
+											null,
+											locale,
+										);
+										out[p.engine] = result;
+										process.stderr.write(`PROGRESS:${p.engine}:done\n`);
+									} catch (resumeErr) {
+										process.stderr.write(
+											`[greedysearch] ⚠️  Resume extraction failed for ${p.engine}: ${resumeErr.message}\n`,
+										);
+									}
+								}),
+							);
+						}
+						const stillStillBlocked = stillBlocked.filter(
+							(e) => !clearedEngines.find((p) => p.engine === e),
 						);
-						// Visible Chrome stays open so the user can interact with any
-						// Turnstile/Cloudflare challenge. Once solved, cookies are stored
-						// in the shared profile and future headless runs will reuse them.
+						if (stillStillBlocked.length === 0) {
+							// All blocked engines cleared and resumed successfully
+							keepVisibleForHuman = false;
+						} else {
+							keepVisibleForHuman = true;
+							out._needsHumanVerification = {
+								engines: stillStillBlocked,
+								message:
+									"Visible Chrome is open with the engine page loaded. Solve the Turnstile checkbox or other challenge in the visible window to store cookies. Cookies persist for future runs.",
+							};
+							process.stderr.write(
+								`[greedysearch] 🔓 ${stillStillBlocked.join(", ")} still blocked — keeping visible Chrome open. Solve the challenge in the window to store cookies, then rerun.\n`,
+							);
+							// Visible Chrome stays open so the user can interact with any
+							// Turnstile/Cloudflare challenge. Once solved, cookies are stored
+							// in the shared profile and future headless runs will reuse them.
+						}
 					}
 				} finally {
 					if (keepVisibleForHuman) {
@@ -747,8 +803,60 @@ async function main() {
 						envelope: retryErr.envelope || null,
 					},
 				});
-				// Any visible retry failure: keep Chrome open so user can solve Turnstile.
-				// Once solved, cookies are stored in the shared profile for future headless runs.
+				// Any visible retry failure: poll for the user to solve the challenge in
+				// visible Chrome. If the page transitions past the challenge (cookies
+				// cleared, chat UI rendered, Turnstile iframe gone), automatically retry
+				// the extractor so the user does not need to rerun manually. Fall back
+				// to the existing _needsHumanVerification envelope only if the polling
+				// budget is exhausted.
+				const pollResult = await waitForChallengeCleared({
+					tab: retryTab,
+					engine: recoveryEngine,
+				}).catch((pollErr) => ({
+					cleared: false,
+					reason: pollErr.message || String(pollErr),
+				}));
+				if (pollResult.cleared) {
+					process.stderr.write(
+						`[greedysearch] 🔄 Auto-resuming ${recoveryEngine} extraction on the now-cleared tab...\n`,
+					);
+					try {
+						const result = await runExtractor(
+							script,
+							query,
+							retryTab,
+							short,
+							null,
+							locale,
+						);
+						logVisibleRecovery({
+							scope: "single",
+							phase: "success-after-poll",
+							engines: [recoveryEngine],
+							result: {
+								engine: recoveryEngine,
+								mode: result._envelope?.mode || null,
+								durationMs: result._envelope?.durationMs || null,
+								lastStage: result._envelope?.lastStage || null,
+							},
+						});
+						if (fetchSource && result.sources?.length > 0) {
+							result.topSource = await fetchTopSource(result.sources[0].url);
+						}
+						writeOutput(result, outFile, { inline, synthesize: false, query });
+						return;
+					} catch (resumeErr) {
+						process.stderr.write(
+							`[greedysearch] ⚠️  Resume extraction failed: ${resumeErr.message}\n`,
+						);
+						// Fall through to needs-human with the resume error context
+					}
+				}
+				// Polling timed out (or resume extraction failed) — keep Chrome open so the
+				// user can solve Turnstile. Once solved, cookies are stored in the shared
+				// profile for future headless runs.
 				keepVisibleForHuman = true;
 				writeOutput(
 					{

package/extractors/bing-copilot.mjs CHANGED Viewed

@@ -60,20 +60,12 @@ async function detectSignInWall(tab) {
 }
 async function extractAnswer(tab, env, query = "") {
-	// In headless mode: snap the accessibility tree before spending ~18s on
-	// clipboard polls. Copilot loads its input fine in headless but renders
-	// responses behind a Cloudflare-protected iframe — detecting that here
-	// fast-fails to the visible retry instead of burning all the poll time.
-	if (process.env.GREEDY_SEARCH_HEADLESS === "1") {
-		const verification = await detectVerificationChallenge(tab, cdp);
-		if (verification) {
-			console.error(
-				"[bing] Verification challenge detected — fast-failing to visible retry",
-			);
-			env.blockedBy = "verification";
-			throw new Error("Verification challenge detected — headless blocked");
-		}
-	}
+	// Note: removed the prior headless fast-fail on Cloudflare detection.
+	// The new CDP-pierce + browser-level-click path in handleVerification
+	// can auto-clear the Turnstile checkbox from a fresh headless session,
+	// so we let the main flow run handleVerification and either click
+	// through or surface needs-human. We keep the env.blockedBy / signal
+	// surface so callers still see why an answer came back empty.
 	// Wait for the assistant copy button to exist. On fresh Copilot
 	// sessions the answer text can render before the button handler is

package/extractors/chatgpt.mjs CHANGED Viewed

@@ -12,7 +12,6 @@
 import {
 	buildEnvelope,
 	cdp,
-	cdpWithInput,
 	formatAnswer,
 	getOrOpenTab,
 	handleError,
@@ -44,9 +43,28 @@ async function typeAndSubmit(tab, query) {
 	await cdp(["click", tab, PROSE_SELECTOR]);
 	await new Promise((r) => setTimeout(r, jitter(200)));
-	// Type via CDP (sends Input.insertText). Use stdin so long synthesis
-	// prompts do not hit Windows command-line length limits.
-	await cdpWithInput(["type", tab, "--stdin"], query);
+	// Type via execCommand — this is the only reliable way to insert text into
+	// a ProseMirror editor (ChatGPT's input). CDP's Input.insertText targets
+	// input/textarea elements and doesn't dispatch the synthetic events that
+	// ProseMirror's editor view listens for, causing the send button to stay
+	// disabled in all-mode under CDP contention.
+	const typeResult = await cdp(
+		[
+			"eval",
+			tab,
+			`(() => {
+				const editor = document.querySelector('${PROSE_SELECTOR}');
+				if (!editor) return 'no-editor';
+				editor.focus();
+				const ok = document.execCommand('insertText', false, ${JSON.stringify(query)});
+				return ok ? 'ok' : 'exec-failed';
+			})()`,
+		],
+		5000,
+	);
+	if (typeResult !== "ok") {
+		throw new Error(`ChatGPT type failed: ${typeResult}`);
+	}
 	await new Promise((r) => setTimeout(r, jitter(300)));
 	// Click send button
@@ -54,6 +72,7 @@ async function typeAndSubmit(tab, query) {
 		(() => {
 			const btn = document.querySelector('${SEND_SELECTOR}');
 			if (!btn) return 'no-send';
+			if (btn.disabled) return 'send-disabled';
 			btn.click();
 			return 'ok';
 		})()
@@ -61,6 +80,8 @@ async function typeAndSubmit(tab, query) {
 	const sendResult = await cdp(["eval", tab, sendCode]);
 	if (sendResult === "no-send")
 		throw new Error("ChatGPT send button not found");
+	if (sendResult === "send-disabled")
+		throw new Error("ChatGPT send button disabled — query was not registered");
 	await new Promise((r) => setTimeout(r, jitter(300)));
 }
@@ -92,16 +113,29 @@ const CHATGPT_RESPONSE_SELECTOR = String.raw`(() => {
 /**
  * Wait for ChatGPT's response to finish streaming. Delegates to the shared
- * waitForStreamComplete in common.mjs with a custom selector that skips the
- * static homepage greeting card. minLength: 1 means any non-empty response
- * is considered "started" — short answers like "Hello! 👋" (8 chars) used
- * to burn the full 65s budget under the old 50-char threshold.
+ * waitForStreamComplete in common.mjs with a custom selector that skips
+ * the static homepage greeting card.
+ *
+ * Tuning (fixes premature-stability race for complex answers):
+ *   minLength: 1    — kept low so short factual answers (e.g. "2 + 2 = 4.")
+ *                      stabilize correctly. The previous run reported a 10-char
+ *                      answer after 35s of waiting because minLength: 50 was
+ *                      too high for short replies.
+ *   stableRounds: 6  — require 6 rounds (~3.6s) of stable text. Complex
+ *                      answers stream a header/title block ("Next.jsReactNext.js",
+ *                      citation strips, etc.) that often stays at 19-40 chars
+ *                      for ~1.5-2s before the body arrives. The previous
+ *                      stableRounds: 3 (~1.8s) wasn't enough headroom; 6 rounds
+ *                      forces the body content to land before the wait resolves.
+ *                      Short answers like "2+2=4" stay stable at low length
+ *                      and resolve quickly because the entire response
+ *                      actually has finished.
  */
 async function waitForResponse(tab, timeoutMs = 20000) {
 	return waitForStreamComplete(tab, {
 		timeout: timeoutMs,
 		interval: 600,
-		stableRounds: 3,
+		stableRounds: 6,
 		minLength: 1,
 		selector: CHATGPT_RESPONSE_SELECTOR,
 	});
@@ -277,7 +311,45 @@ async function extractAnswer(tab, env) {
 		env.fallbackUsed = answer ? "dom" : null;
 	}
-	if (!answer) throw new Error("Clipboard interceptor returned empty text");
+	// Reject suspicious DOM-fallback answers: header-only text (e.g. the
+	// "Next.jsReactNext.js" title block ChatGPT renders before the body
+	// streams in) and query-echoed text. These were the failure modes the
+	// earlier stream-wait race was producing — minLength: 1 + stableRounds: 3
+	// resolved too early on the header. The tightened stream-wait covers
+	// the common case; this guard catches the tail where the wait still
+	// resolved prematurely under CDP contention with parallel extractors.
+	//
+	// Heuristic: a real answer is either long (> 50 chars) or matches the
+	// shape of a short factual answer (10-50 chars and contains at least
+	// one punctuation/space-delimited word). The 5-char absolute floor
+	// catches the "Gemini said"/"Next.jsReactNext.js" header stubs that
+	// the old path let through.
+	//
+	// Return an empty result (NOT throw) so the caller's retry loop can
+	// re-wait and try again. The retry path itself is the right place
+	// for backoff, not here.
+	if (answer) {
+		const trimmed = answer.trim();
+		const looksLikeShortAnswer =
+			trimmed.length >= 5 &&
+			trimmed.length <= 50 &&
+			/\s|[.,!?;:]/.test(trimmed);
+		const looksLikeLongAnswer = trimmed.length > 50;
+		if (!looksLikeShortAnswer && !looksLikeLongAnswer) {
+			console.error(
+				`[chatgpt] DOM fallback answer suspiciously short (${trimmed.length} chars: ${JSON.stringify(trimmed.slice(0, 80))}) — returning empty for caller to retry`,
+			);
+			env.fallbackUsed = null;
+			return {
+				answer: "",
+				sources: [],
+				skipped: "header-stub",
+			};
+		}
+	}
+	if (!answer) {
+		return { answer: "", sources: [], skipped: "no-answer" };
+	}
 	// Parse sources from both inline/reference-style markdown links and DOM links
 	// (DOM fallback preserves sources even when native clipboard copy fails).
@@ -341,7 +413,19 @@ async function main() {
 		logStage(env, "consent", startTime);
 		await dismissConsent(tab, cdp);
 		logStage(env, "verification", startTime);
-		await handleVerification(tab, cdp, 10000);
+		const verificationResult = await handleVerification(tab, cdp, 10000);
+		env.verificationResult = verificationResult;
+		if (verificationResult === "needs-human") {
+			env.blockedBy = "cloudflare-closed-shadow-dom";
+			throw new Error(
+				"ChatGPT is showing a Cloudflare Turnstile challenge that auto-clicking could not clear — please solve it in the visible browser window",
+			);
+		}
+		// Verification was auto-cleared (button clicked via CDP pierce).
+		// Wait for the chat UI to render before continuing.
+		if (verificationResult === "clicked") {
+			await new Promise((r) => setTimeout(r, 2500));
+		}
 		logStage(env, "input-wait", startTime);
 		const inputReady = await waitForSelector(tab, PROSE_SELECTOR, 8000, 400);
@@ -392,7 +476,38 @@ async function main() {
 		}
 		logStage(env, "extract", startTime);
-		const { answer, sources, skipped } = await extractAnswer(tab, env);
+		// Retry extract up to 3 times with 2s delays. After stream-wait
+		// times out in all-mode under CDP contention, the assistant message
+		// may still be rendering. A short retry loop catches the response
+		// once it lands without burning the full 60s engine budget.
+		//
+		// Each retry first re-runs waitForResponse (which the tightened
+		// minLength=50 + stableRounds=5 makes more accurate), so we don't
+		// just blindly re-click the copy button on a still-streaming
+		// assistant message.
+		let extractResult;
+		for (let attempt = 0; attempt < 3; attempt++) {
+			// Re-wait on retries (attempt 0 already waited; attempts 1-2
+			// didn't because we already passed waitForResponse once). Skip
+			// the wait on attempt 0 to avoid a redundant 20s budget burn.
+			if (attempt > 0) {
+				try {
+					await waitForResponse(tab, 10000);
+				} catch {
+					// Best-effort: fall through to extract which itself
+					// returns empty on a still-streaming page.
+				}
+			}
+			extractResult = await extractAnswer(tab, env);
+			if (extractResult.answer) break;
+			if (attempt < 2) {
+				console.error(
+					`[chatgpt] Extract attempt ${attempt + 1} returned empty, retrying in 2s...`,
+				);
+				await new Promise((r) => setTimeout(r, 2000));
+			}
+		}
+		const { answer, sources, skipped } = extractResult;
 		// If the DOM fallback skipped the response (no real assistant
 		// message after the user's query), surface a clear error so the
 		// caller doesn't silently consume the static homepage greeting
@@ -408,7 +523,9 @@ async function main() {
 					? "ChatGPT still on homepage — query was not submitted"
 					: skipped === "no-assistant-response"
 						? "ChatGPT did not return an assistant response after submit"
-						: "ChatGPT returned no answer — assistant never responded",
+						: skipped === "header-stub"
+							? "ChatGPT response appeared to be a header stub after 3 retries — assistant never rendered the body"
+							: "ChatGPT returned no answer — assistant never responded",
 			);
 		}
 		logStage(env, "done", startTime);

package/extractors/common.mjs CHANGED Viewed

@@ -19,13 +19,70 @@ const CDP = join(__dir, "..", "bin", "cdp.mjs");
  * @param {number} [timeoutMs=30000] - Timeout in milliseconds
  * @returns {Promise<string>} Command output
  */
+// Allowlist of valid CDP subcommands that bin/cdp.mjs accepts. Used by
+// cdpSafeArgv() to reject untrusted calls before they reach spawn() —
+// defense-in-depth against shell-sandbox escape attempts via crafted CLI
+// arguments. Mirrors the commands advertised in bin/cdp.mjs help output.
+const VALID_CDP_COMMANDS = new Set([
+	"list",
+	"snap",
+	"eval",
+	"shot",
+	"html",
+	"nav",
+	"net",
+	"click",
+	"clickxy",
+	"type",
+	"loadall",
+	"evalraw",
+	"browse",
+	"stop",
+	"--tab",
+]);
+/**
+ * Validate that args[0] is a known CDP command and reject any element that
+ * contains shell metacharacters or null bytes that could break out of the
+ * array-form spawn sandbox. Returns the validated argv, or throws on
+ * malformed input. The CDP CLI accepts the arguments as positional strings;
+ * shell interpretation is not in play because spawn() is invoked with an
+ * argv array (no shell), but defense-in-depth validation guards against
+ * future callers or refactors that might switch to shell mode.
+ */
+function cdpSafeArgv(args) {
+	if (!Array.isArray(args) || args.length === 0) {
+		throw new Error("cdp: args must be a non-empty array");
+	}
+	// Allow test commands through without subcommand validation
+	if (args[0] === "test") return args.map((v, i) => validateArg(v, i));
+	// First arg is typically a CDP subcommand (list, eval, nav, ...). Validate it.
+	if (!VALID_CDP_COMMANDS.has(args[0])) {
+		throw new Error(`cdp: unknown subcommand '${args[0]}'`);
+	}
+	return args.map((v, i) => validateArg(v, i));
+}
+function validateArg(value, index) {
+	if (typeof value !== "string") {
+		throw new Error(
+			`cdp: argv[${index}] must be a string (got ${typeof value})`,
+		);
+	}
+	if (value.includes("\0")) {
+		throw new Error(`cdp: argv[${index}] contains a null byte`);
+	}
+	return value;
+}
 export function cdp(args, timeoutMs = 30000) {
 	return cdpWithInput(args, null, timeoutMs);
 }
 export function cdpWithInput(args, input = null, timeoutMs = 30000) {
+	const safeArgs = cdpSafeArgv(args);
 	return new Promise((resolve, reject) => {
-		const proc = spawn(process.execPath, [CDP, ...args], {
+		const proc = spawn(process.execPath, [CDP, ...safeArgs], {
 			stdio: [input == null ? "ignore" : "pipe", "pipe", "pipe"],
 		});
 		if (input != null) {