@apmantza/greedysearch-pi 1.8.5 → 1.8.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,6 +1,116 @@
1
1
  # Changelog
2
2
 
3
- ## Unreleased
3
+ ## [1.8.6] — 2026-05-04
4
+
5
+ ### Bing Copilot: Headless Cloudflare Recovery
6
+
7
+ - **Auto-retry triggers on all Bing failures** — Error pattern expanded from `input not found|verification` to include `clipboard` failures, so any extraction failure triggers the visible Chrome recovery.
8
+ - **Clipboard retry** — `bing-copilot.mjs` now retries clipboard extraction once with a 2s delay, matching the Perplexity extractor pattern.
9
+ - **Cloudflare detection** — If the clipboard is empty and the AI copy button is hidden, the extractor checks the accessibility tree for Cloudflare challenge text and logs it explicitly for faster diagnosis.
10
+ - **DOM extraction fallback** — If clipboard fails and the copy button is missing (headless anti-bot behavior), attempts direct text extraction from the `copilot.fun` → blob: iframe chain via CDP targets. Falls through to the visible auto-retry if Cloudflare blocks the iframe.
11
+ - **Investigation confirmed** — In headless mode, Copilot renders the AI response inside a `copilot.fun` → blob: iframe sandbox with a Cloudflare Turnstile challenge. The `copy-ai-message-button` (`data-testid`) is hidden. Content is unreachable from both the main frame JS (cross-origin) and CDP iframe traversal (Cloudflare blocks load). The only viable path is visible Chrome recovery — once cookies are cached in the profile, subsequent headless searches pass transparently.
12
+
13
+ ### Visible Chrome Recovery
14
+
15
+ - **Mode-aware `ensureChrome()`** — `src/search/chrome.mjs` now reads a mode marker file (`greedysearch-chrome-mode`) written by `launch.mjs`. When `GREEDY_SEARCH_VISIBLE=1` and Chrome is running headless, it kills and relaunches in visible mode with a forced relaunch guard (always relaunches after kill, even if port wasn't freed).
16
+ - **`launch.mjs` mode check on reuse** — When Chrome is already running and visible is requested (`GREEDY_SEARCH_VISIBLE=1`), checks the mode file. If headless, kills the running instance and launches visible instead of reusing.
17
+ - **Mode file cleanup** — Mode marker file cleaned on `--kill`, ghost cleanup, and idle timeout kill.
18
+ - **`bin/launch-visible.mjs`** — Standalone visible Chrome launcher. Nukes any process on port 9222 (by PID file + port scan), launches Chrome without `--headless`, and writes `"visible"` to the mode file. No ghost cleanup complexity, no mode switching — fire-and-forget visible Chrome.
19
+ - **`bin/visible.mjs`** — Convenience wrapper: kills headless, then launches visible (delegates to `launch-visible.mjs`).
20
+ - **Progress notification** — When the auto-retry launches visible Chrome for manual Cloudflare verification, a `PROGRESS:bing:needs-human` line is emitted to stderr. The progress tracker renders `🔓 bing needs manual verification` in the Pi UI.
21
+ - **Idle cleanup preserves mode** — Headless idle timeout cleanup now also removes the mode marker file.
22
+
23
+ ### Security & Robustness
24
+
25
+ - **Chrome process cleanup hardening** — `launch-visible.mjs` uses `taskkill /F /PID X /T` (process tree kill) on Windows to prevent orphan renderer processes. Repeated up to 5s until port 9222 is confirmed free.
26
+ - **Zombie Chrome prevention** — `launch.mjs` and `chrome.mjs` now clean up the mode marker and PID file consistently across all kill paths (--kill, ghost cleanup, idle timeout).
27
+
28
+ ### Added
29
+
30
+ - **`google-search` engine** — plain Google search extractor (locale-agnostic, `textarea[name="q"]`). Returns title/URL/snippet for traditional 10-blue-link results. Aliases: `gs`, `googlesearch`.
31
+
32
+ ### Headless Mode (default)
33
+
34
+ - **Chrome now runs headless by default** — no window, no GUI, purely background. Set `GREEDY_SEARCH_VISIBLE=1` to show the browser window.
35
+ - **Anti-detection stealth** — Patches injected via `Page.addScriptToEvaluateOnNewDocument` (runs before any page JS):
36
+ - `Runtime.enable` / CDP marker deletion (`__REBROWSER_*`, `__nightmare`, `__phantom`, etc.)
37
+ - `navigator.webdriver` → `false`, `navigator.plugins` → realistic list, `navigator.languages` → `['en-US', 'en']`
38
+ - `window.chrome` shim, WebGL vendor → Intel Iris, `hardwareConcurrency` → 8, `deviceMemory` → 8
39
+ - `TrustedTypes` policy, `requestAnimationFrame` keep-alive (prevents headless stall detection)
40
+ - `--disable-blink-features=AutomationControlled`, realistic `--user-agent`, `--window-size=1920,1080`
41
+ - **Human click simulation** — All verification/clicks now use CDP `Input.dispatchMouseEvent` with multi-event `mouseMoved→pressed→released`, ±3px coordinate jitter, and random delays (80–180ms hover, 30–90ms hold). Detection scripts return element selectors instead of clicking in-page; `handleVerification` performs human clicks via `humanClickElement()`/`humanClickXY()`. Applies to Turnstile iframes, reCAPTCHA, Cloudflare challenges, Microsoft auth, Copilot modals, and all generic verify/continue buttons.
42
+ - **Idle auto-cleanup** — Headless Chrome auto-killed after `GREEDY_SEARCH_IDLE_TIMEOUT_MINUTES` (default 5 min) of inactivity. Kills only the PID-tracked instance on port 9222 — never touches the main Chrome session. Activity timestamp written at search start and end.
43
+
44
+ ### Performance
45
+
46
+ - **Timeouts cut ~40–50%** across all extractors — typical search ~60–90s → ~30–45s:
47
+ - `TIMING`: postNav 1500→800ms, postNavSlow 2000→1000ms, postClick 400→250ms, postType 400→250ms, inputPoll 400→300ms, copyPoll 600→400ms, afterVerify 3000→2000ms
48
+ - Defaults: waitForCopyButton 60s→30s, waitForStreamComplete 30s→20s, handleVerification 60s→30s
49
+ - Per-extractor: Google stream 45s→30s, Gemini copyButton 120s→60s + inputDeadline 10s→8s, Perplexity inputDeadline 8s→5s + stream 30s→20s, Bing verification 90s→30s + copyButton 60s→30s
50
+ - Engine process timeout: 90s→60s (180s→120s Gemini)
51
+
52
+ ### Security
53
+
54
+ - **SonarCloud security hotspots fixed** — Two open hotspots resolved:
55
+ - _Weak cryptography (S2245)_ in `extractors/consent.mjs`: replaced `Math.random()` with `crypto.randomInt()` for the mouse-jitter RNG. Not actually security-sensitive (used only for ±3px jitter and timing delays), but compliant now.
56
+ - _PATH injection (S4036)_ in `src/search/chrome.mjs`: `spawn("node", ...)` replaced with `spawn(process.execPath, ...)` so the launcher doesn't rely on the `PATH` environment variable.
57
+ - **Query/prompt leakage prevention** — Queries and synthesis prompts no longer appear in OS process tables. All `spawn()` calls now pipe query/prompt through stdin via `--stdin` flag instead of command-line arguments. Affects `runSearch`, `runExtractor`, `synthesizeWithGemini`, and all 5 extractors (`perplexity`, `bing-copilot`, `google-ai`, `google-search`, `gemini`).
58
+
59
+ ### Visual
60
+
61
+ - **Redesigned banner** — Cleaner SVG layout with pi logo icon, no text, no lens graphic. Gemini Synthesizer pill badge integrated. Three design iterations landed on a minimal icon-only look (`docs/banner.svg`).
62
+
63
+ ### Fixed
64
+
65
+ - **Gemini & Bing copy button race condition** — Both extractors were capturing the user's query instead of the AI's answer. Root cause: `document.querySelector()` returns the first copy button in DOM order, which is the user's echoed message (above the assistant's response). For short queries this triggers instantly. Fixed by: (1) replacing `waitForCopyButton` with `waitForStreamComplete` to ensure the response finishes streaming before copying, and (2) clicking the **last** copy button (`querySelectorAll` + `[length-1]`) instead of the first — matching Perplexity's proven pattern. Also added periodic scroll-to-bottom alongside stream wait for Gemini to trigger lazy-loaded content.
66
+ - **Progress tracker shows false ✅ for errors** — `makeProgressTracker` in `shared.ts` completely ignored the `status` parameter, always showing `✅ done` for every engine. Now correctly tracks per-engine status and shows `❌ failed` when an engine errors.
67
+ - **Synthesis echoes engine JSON when engines fail** — When Perplexity/Bing fail, Gemini was echoing the engine summary JSON back as its "answer". `synthesis-runner.mjs` now detects this pattern (engine keys without synthesis fields) and treats it as a parse failure, falling back to individual engine results.
68
+ - **`headless=false` parameter ignored** — The `--headless` flag was never checked by `search.mjs` or `launch.mjs`; they only read `GREEDY_SEARCH_VISIBLE`. `shared.ts` now propagates the visibility preference via the env var when `headless=false` is passed.
69
+
70
+ ### Cloudflare / Verification Recovery
71
+
72
+ - **Auto-recovery from Cloudflare blocks** — When Perplexity (`#ask-input` not found) or Bing (`input not found` / `verification required`) fail in headless mode, `search.mjs` now:
73
+ 1. Detects the Cloudflare/verification error pattern
74
+ 2. Kills headless Chrome, relaunches in visible mode
75
+ 3. Retries the blocked engines — Cloudflare bypasses, cookies stored in Chrome profile
76
+ 4. Kills visible Chrome, relaunches headless
77
+ 5. Continues remaining pipeline (source fetch, synthesis)
78
+ 6. Cookies persist — subsequent headless searches pass transparently
79
+
80
+ ### Removed
81
+
82
+ - **`coding_task` tool removed** — `bin/coding-task.mjs`, `src/formatters/coding.ts`, registration deleted (644 lines).
83
+ - **`deep_research` tool removed** — handler, test, and `formatDeepResearch` + helpers deleted (521 lines). Use `greedy_search` with `depth: "deep"`.
84
+ - **Minimize debug logs** — Removed 9 verbose `[minimize]` console.log statements from launch.mjs.
85
+
86
+ ### Fixes
87
+
88
+ - **Code scanning alerts resolved (5 alerts)** — (1) Added `permissions: contents: read` to `sync-to-webaio.yml` workflow (#14). (2) Fixed backslash escaping in `consent.mjs`'s `humanClickElement` selector injection (#10) — selectors containing backslashes (e.g., `\"`) weren't properly escaped before DOM injection. (3) Fixed same backslash escaping in `google-search.mjs`'s `SEARCH_BOX` selector in 3 locations (#11-13).
89
+ - **`cdp.mjs` `getPages()` filter** — Allows `chrome://newtab/` (headless Chrome default initial tab). Prevents "No Chrome tabs found" on cold start.
90
+
91
+ ### Security
92
+
93
+ - **SonarCloud: Log injection vulnerability (1 alert)** — `bin/launch.mjs` no longer logs the raw WebSocket debugger URL (user-controlled data). Replaced with a static "WebSocket URL received" message to prevent query/URL content from leaking into logs.
94
+
95
+ ### Code Quality
96
+
97
+ - **SonarCloud batch fixes (~52 issues resolved)** across 16 source files:
98
+ - `S7781` — Replaced 18 `String#replace()` calls with `String#replaceAll()` for global replacements (regex → literal where applicable).
99
+ - `S1128` — Removed 15 unused imports (`dirname`, `join`, `relative`, `spawn`, `tmpdir`, `existsSync`, `shouldUseBrowser`, `closeTabs`, `cdp`, `openNewTab`, `closeTab`, `activateTab`, `trimText`).
100
+ - `S7773` — Migrated 11 `parseInt`/`parseFloat` calls to `Number.parseInt`/`Number.parseFloat`.
101
+ - `S7780` — Wrapped 8 CDP eval templates containing backslash sequences in `String.raw()` to eliminate double-escaping.
102
+ - `S7735` — Eliminated 13 negated-condition ternaries by inverting the conditional logic (`!== -1 ? ... : null` → `=== -1 ? null : ...`).
103
+
104
+ ### Security Hotspot Review
105
+
106
+ - **SonarCloud: 20 security hotspots reviewed and marked Safe** — All outstanding hotspots were assessed and resolved in SonarCloud:
107
+ - `S4721` OS Command Injection (×2) — Inputs are hardcoded (`port=9222`) or parsed from system output and validated via `Number.parseInt`. Not user-controlled.
108
+ - `S5852` Regex ReDoS (×10) — Regexes operate on bounded input with negated char classes or short fixed patterns. No practical denial-of-service risk.
109
+ - `S4036` PATH environment variable (×8) — Local CLI extension spawning package-internal Node scripts. PATH is host-controlled; no untrusted input reaches the command.
110
+
111
+ ### Tooling
112
+
113
+ - **SonarCloud configuration** — Added `sonar-project.properties` with exclusions for `test/**`, `test.mjs`, `test.sh`, `test_unit.mjs`, and `scripts/**` so test-only code does not skew source quality metrics.
4
114
 
5
115
  ## v1.8.5 (2026-04-29)
6
116
 
package/README.md CHANGED
@@ -1,98 +1,150 @@
1
- # GreedySearch for Pi
2
-
3
- Multi-engine AI web search for Pi via browser automation.
4
-
5
- - No API keys
6
- - Real browser results (Perplexity, Bing Copilot, Google AI)
7
- - Optional Gemini synthesis with source grounding
8
-
9
- ## Install
10
-
11
- ```bash
12
- pi install npm:@apmantza/greedysearch-pi
13
- ```
14
-
15
- Or from git:
16
-
17
- ```bash
18
- pi install git:github.com/apmantza/GreedySearch-pi
19
- ```
20
-
21
- ## Tools
22
-
23
- - `greedy_search` - fast or grounded multi-engine search
24
- - `coding_task` - browser-routed Gemini/Copilot coding assistance
25
-
26
- ## Quick usage
27
-
28
- ```js
29
- greedy_search({ query: "React 19 changes" })
30
- greedy_search({ query: "Prisma vs Drizzle", engine: "all", depth: "fast" })
31
- greedy_search({ query: "Best auth architecture 2026", engine: "all", depth: "deep" })
32
- ```
33
-
34
- ## Parameters (`greedy_search`)
35
-
36
- - `query` (required)
37
- - `engine`: `all` (default), `perplexity`, `bing`, `google`, `gemini`
38
- - `depth`: `standard` (default), `fast`, `deep`
39
- - `fullAnswer`: return full single-engine output instead of preview
40
-
41
- ## Depth modes
42
-
43
- - `fast` - quickest, no synthesis/source fetching
44
- - `standard` - balanced default for `engine: "all"` (synthesis + fetched sources)
45
- - `deep` - strongest grounding and confidence metadata
46
-
47
- ## Runtime commands
48
-
49
- ```bash
50
- node ~/.pi/agent/git/GreedySearch-pi/bin/launch.mjs
51
- node ~/.pi/agent/git/GreedySearch-pi/bin/launch.mjs --status
52
- node ~/.pi/agent/git/GreedySearch-pi/bin/launch.mjs --kill
53
- ```
54
-
55
- ## Requirements
56
-
57
- - Chrome
58
- - Node.js 20.11.0+ (22+ recommended)
59
-
60
- ## Source fetching
61
-
62
- When using `depth: "standard"` or `depth: "deep"`, source content is fetched and synthesized:
63
-
64
- - **Reddit** Uses Reddit's public `.json` API for posts and comments (no scraping)
65
- - **GitHub** Uses GitHub REST API for repos, READMEs, and file trees
66
- - **General web** — Mozilla Readability extraction with browser fallback for bot-blocked pages
67
- - **Metadata** — title, author/byline, site name, publish date, language, excerpt
68
-
69
- ## Project layout
70
-
71
- - `bin/` - runtime CLIs (`search.mjs`, `launch.mjs`, `cdp.mjs`, `coding-task.mjs`)
72
- - `extractors/` - engine-specific automation
73
- - `src/` - ranking/fetching/formatting internals (includes `reddit.mjs`, `github.mjs`, `fetcher.mjs`)
74
- - `skills/` - Pi skill metadata
75
-
76
- ## Testing
77
-
78
- Cross-platform test runner (Windows + Unix):
79
- ```bash
80
- npm test # run all tests
81
- npm run test:quick # skip slow tests
82
- npm run test:smoke # basic health check
83
- ```
84
-
85
- Full bash test suite (Unix only):
86
- ```bash
87
- npm run test:bash # comprehensive tests
88
- ./test.sh parallel # race condition tests
89
- ./test.sh flags # flag/option tests
90
- ```
91
-
92
- ## Changelog
93
-
94
- See `CHANGELOG.md`.
95
-
96
- ## License
97
-
98
- MIT
1
+ # GreedySearch for Pi
2
+
3
+ ![GreedySearch](docs/banner.svg)
4
+
5
+ Multi-engine AI web search for Pi via browser automation.
6
+
7
+ - No API keys
8
+ - Real browser results (Perplexity, Bing Copilot, Google AI)
9
+ - Optional Gemini synthesis with source grounding
10
+ - Chrome runs headless by default — no window, purely background
11
+
12
+ ## Install
13
+
14
+ ```bash
15
+ pi install npm:@apmantza/greedysearch-pi
16
+ ```
17
+
18
+ Or from git:
19
+
20
+ ```bash
21
+ pi install git:github.com/apmantza/GreedySearch-pi
22
+ ```
23
+
24
+ ## Tools
25
+
26
+ - `greedy_search` — multi-engine AI web search
27
+ - `websearch` — lightweight DuckDuckGo/Brave search (via pi-webaio)
28
+ - `webfetch` / `webpull` — page fetching and site crawling (via pi-webaio)
29
+
30
+ ## Quick usage
31
+
32
+ ```js
33
+ greedy_search({ query: "React 19 changes" });
34
+ greedy_search({ query: "Prisma vs Drizzle", engine: "all", depth: "fast" });
35
+ greedy_search({
36
+ query: "Best auth architecture 2026",
37
+ engine: "all",
38
+ depth: "deep",
39
+ });
40
+ // Headless is the default — no window. To see the browser:
41
+ // Set GREEDY_SEARCH_VISIBLE=1 before launching Pi
42
+ ```
43
+
44
+ ## Parameters (`greedy_search`)
45
+
46
+ - `query` (required)
47
+ - `engine`: `all` (default), `perplexity`, `bing`, `google`, `gemini`
48
+ - `depth`: `standard` (default), `fast`, `deep`
49
+ - `fullAnswer`: return full single-engine output instead of preview
50
+ - `headless`: set to `false` to show Chrome window (default: `true`)
51
+
52
+ ## Environment variables
53
+
54
+ | Variable | Default | Description |
55
+ | ------------------------------------ | ------------- | --------------------------------------------------------- |
56
+ | `GREEDY_SEARCH_VISIBLE` | (unset) | Set to `1` to show Chrome window instead of headless |
57
+ | `GREEDY_SEARCH_IDLE_TIMEOUT_MINUTES` | `5` | Minutes of inactivity before auto-killing headless Chrome |
58
+ | `GREEDY_SEARCH_LOCALE` | `en` | Default result language (en, de, fr, es, ja, etc.) |
59
+ | `CHROME_PATH` | auto-detected | Path to Chrome/Chromium executable |
60
+
61
+ ## Depth modes
62
+
63
+ - `fast` - quickest, no synthesis/source fetching
64
+ - `standard` - balanced default for `engine: "all"` (synthesis + fetched sources)
65
+ - `deep` - strongest grounding and confidence metadata
66
+
67
+ ## Runtime commands
68
+
69
+ ````bash
70
+ # Headless (default, no GUI)
71
+ node ~/.pi/agent/git/GreedySearch-pi/bin/launch.mjs
72
+ node ~/.pi/agent/git/GreedySearch-pi/bin/launch.mjs --status
73
+ node ~/.pi/agent/git/GreedySearch-pi/bin/launch.mjs --kill
74
+
75
+ # Visible (show browser window — useful for one-time Cloudflare clearance)
76
+ node ~/.pi/agent/git/GreedySearch-pi/bin/launch-visible.mjs
77
+ node ~/.pi/agent/git/GreedySearch-pi/bin/launch-visible.mjs --kill
78
+
79
+ # Chrome auto-cleaned after 5 min idle (prevents OOM)
80
+ # Override: GREEDY_SEARCH_IDLE_TIMEOUT_MINUTES=10
81
+
82
+ ## Requirements
83
+
84
+ - Chrome
85
+ - Node.js 20.11.0+ (22+ recommended)
86
+
87
+ ## Known engine quirks
88
+
89
+ ### Bing Copilot
90
+
91
+ Bing Copilot detects headless Chrome and sandboxes all AI responses inside nested iframes (`copilot.microsoft.com` → `copilot.fun` → `blob:`). In this mode the copy button is hidden and the Cloudflare Turnstile challenge blocks content delivery. The clipboard-based extraction cannot work.
92
+
93
+ **Auto-recovery:** When Bing fails with any extraction error (clipboard, verification, Cloudflare), GreedySearch automatically switches to **visible Chrome**, retries the search, and caches Cloudflare clearance cookies in the Chrome profile. You may need to solve the Cloudflare challenge **once** manually when the visible Chrome window appears. After that, all subsequent headless searches bypass the challenge — the cookies persist in the profile.
94
+
95
+ If you prefer to skip the auto-recovery delay, launch visible Chrome ahead of time:
96
+
97
+ ```bash
98
+ node ~/.pi/agent/git/GreedySearch-pi/bin/launch-visible.mjs
99
+ ````
100
+
101
+ ## Anti-detection
102
+
103
+ Headless Chrome auto-injects stealth patches before any page JavaScript runs:
104
+
105
+ - `navigator.webdriver` hidden, plugins/languages faked, `window.chrome` shimmed
106
+ - WebGL vendor spoofed (Intel Iris), realistic hardware concurrency / memory
107
+ - CDP automation markers deleted, `requestAnimationFrame` kept alive
108
+ - Human-like click simulation with coordinate jitter and variable delays
109
+
110
+ This bypasses casual bot detection (basic `navigator.webdriver` checks) but does not defeat commercial anti-bot services (DataDome, PerimeterX, Kasada). **Bing Copilot specifically detects headless and sandboxes responses behind Cloudflare Turnstile** — see [Known engine quirks](#known-engine-quirks) for the auto-recovery mechanism.
111
+
112
+ When using `depth: "standard"` or `depth: "deep"`, source content is fetched and synthesized:
113
+
114
+ - **Reddit** — Uses Reddit's public `.json` API for posts and comments (no scraping)
115
+ - **GitHub** — Uses GitHub REST API for repos, READMEs, and file trees
116
+ - **General web** — Mozilla Readability extraction with browser fallback for bot-blocked pages
117
+ - **Metadata** — title, author/byline, site name, publish date, language, excerpt
118
+
119
+ ## Project layout
120
+
121
+ - `bin/` — runtime CLIs (`search.mjs`, `launch.mjs`, `launch-visible.mjs`, `visible.mjs`, `cdp.mjs`)
122
+ - `extractors/` — engine-specific automation + stealth/consent handling
123
+ - `src/` — search pipeline, chrome management, source fetching, formatting
124
+ - `skills/` — Pi skill metadata
125
+
126
+ ## Testing
127
+
128
+ Cross-platform test runner (Windows + Unix):
129
+
130
+ ```bash
131
+ npm test # run all tests
132
+ npm run test:quick # skip slow tests
133
+ npm run test:smoke # basic health check
134
+ ```
135
+
136
+ Full bash test suite (Unix only):
137
+
138
+ ```bash
139
+ npm run test:bash # comprehensive tests
140
+ ./test.sh parallel # race condition tests
141
+ ./test.sh flags # flag/option tests
142
+ ```
143
+
144
+ ## Changelog
145
+
146
+ See `CHANGELOG.md`.
147
+
148
+ ## License
149
+
150
+ MIT
package/bin/cdp.mjs CHANGED
@@ -28,7 +28,7 @@ const DAEMON_CONNECT_RETRIES = 20;
28
28
  const DAEMON_CONNECT_DELAY = 300;
29
29
  const MIN_TARGET_PREFIX_LEN = 8;
30
30
 
31
- const _tmpdir = tmpdir().replace(/\\/g, "/");
31
+ const _tmpdir = tmpdir().replaceAll("\\", "/");
32
32
  const PAGES_CACHE = `${_tmpdir}/cdp-pages.json`;
33
33
 
34
34
  function sockPath(targetId) {
@@ -66,7 +66,7 @@ function getWsUrl() {
66
66
  // so GreedySearch targets its own Chrome, not the user's main session.
67
67
  const profileDir = process.env.CDP_PROFILE_DIR;
68
68
  if (profileDir) {
69
- const p = `${profileDir.replace(/\\/g, "/")}/DevToolsActivePort`;
69
+ const p = `${profileDir.replaceAll("\\", "/")}/DevToolsActivePort`;
70
70
  if (existsSync(p)) {
71
71
  const lines = readFileSync(p, "utf8").trim().split("\n");
72
72
  return `ws://localhost:${lines[0]}${lines[1]}`;
@@ -233,7 +233,9 @@ class CDP {
233
233
  async function getPages(cdp) {
234
234
  const { targetInfos } = await cdp.send("Target.getTargets");
235
235
  return targetInfos.filter(
236
- (t) => t.type === "page" && !t.url.startsWith("chrome://"),
236
+ (t) =>
237
+ t.type === "page" &&
238
+ (!t.url.startsWith("chrome://") || t.url === "chrome://newtab/"),
237
239
  );
238
240
  }
239
241
 
@@ -365,7 +367,7 @@ async function shotStr(cdp, sid, filePath) {
365
367
  if (dpr === 1) {
366
368
  try {
367
369
  const raw = await evalStr(cdp, sid, "window.devicePixelRatio");
368
- const parsed = parseFloat(raw);
370
+ const parsed = Number.parseFloat(raw);
369
371
  if (parsed > 0) dpr = parsed;
370
372
  } catch {}
371
373
  }
@@ -482,8 +484,8 @@ async function clickStr(cdp, sid, selector) {
482
484
  }
483
485
 
484
486
  async function clickXyStr(cdp, sid, x, y) {
485
- const cx = parseFloat(x);
486
- const cy = parseFloat(y);
487
+ const cx = Number.parseFloat(x);
488
+ const cy = Number.parseFloat(y);
487
489
  if (Number.isNaN(cx) || Number.isNaN(cy))
488
490
  throw new Error("x and y must be numbers (CSS pixels)");
489
491
  const base = { x: cx, y: cy, button: "left", clickCount: 1, modifiers: 0 };
@@ -514,7 +516,10 @@ async function typeStr(cdp, sid, text) {
514
516
 
515
517
  async function loadAllStr(cdp, sid, selector, intervalMs = 1500) {
516
518
  if (!selector) throw new Error("CSS selector required");
517
- intervalMs = Math.min(Math.max(parseInt(intervalMs, 10) || 1500, 100), 30000);
519
+ intervalMs = Math.min(
520
+ Math.max(Number.parseInt(intervalMs, 10) || 1500, 100),
521
+ 30000,
522
+ );
518
523
  let clicks = 0;
519
524
  const deadline = Date.now() + 5 * 60 * 1000;
520
525
  while (Date.now() < deadline) {
@@ -662,7 +667,7 @@ async function runDaemon(targetId) {
662
667
  cdp,
663
668
  sessionId,
664
669
  args[0],
665
- args[1] ? parseInt(args[1], 10) : 1500,
670
+ args[1] ? Number.parseInt(args[1], 10) : 1500,
666
671
  );
667
672
  break;
668
673
  case "evalraw":
@@ -0,0 +1,233 @@
1
+ #!/usr/bin/env node
2
+ // launch-visible.mjs — launch GreedySearch Chrome in VISIBLE mode (window shown).
3
+ // No headless, no mode switching, no ghost cleanup complexity.
4
+ //
5
+ // Usage:
6
+ // node bin/launch-visible.mjs — launch visible Chrome
7
+ // node bin/launch-visible.mjs --kill — stop Chrome
8
+ // node bin/launch-visible.mjs --status — check if running
9
+
10
+ import { execSync, spawn } from "node:child_process";
11
+ import {
12
+ existsSync,
13
+ mkdirSync,
14
+ readFileSync,
15
+ unlinkSync,
16
+ writeFileSync,
17
+ } from "node:fs";
18
+ import http from "node:http";
19
+ import { platform, tmpdir } from "node:os";
20
+ import { join } from "node:path";
21
+
22
+ const PORT = 9222;
23
+ const PROFILE_DIR = join(tmpdir(), "greedysearch-chrome-profile");
24
+ const ACTIVE_PORT = join(PROFILE_DIR, "DevToolsActivePort");
25
+ const PID_FILE = join(tmpdir(), "greedysearch-chrome.pid");
26
+ const MODE_FILE = join(tmpdir(), "greedysearch-chrome-mode");
27
+
28
+ // ─── Helpers ──────────────────────────────────────────────────────
29
+
30
+ function findChrome() {
31
+ const os = platform();
32
+ const candidates =
33
+ os === "win32"
34
+ ? [
35
+ "C:/Program Files/Google/Chrome/Application/chrome.exe",
36
+ "C:/Program Files (x86)/Google/Chrome/Application/chrome.exe",
37
+ ]
38
+ : os === "darwin"
39
+ ? [
40
+ "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
41
+ "/Applications/Chromium.app/Contents/MacOS/Chromium",
42
+ ]
43
+ : [
44
+ "/usr/bin/google-chrome",
45
+ "/usr/bin/google-chrome-stable",
46
+ "/usr/bin/chromium-browser",
47
+ "/usr/bin/chromium",
48
+ ];
49
+ return candidates.find(existsSync) || null;
50
+ }
51
+
52
+ function isRunning() {
53
+ if (!existsSync(PID_FILE)) return false;
54
+ const pid = Number.parseInt(readFileSync(PID_FILE, "utf8").trim(), 10);
55
+ if (!pid) return false;
56
+ try {
57
+ process.kill(pid, 0);
58
+ return pid;
59
+ } catch {
60
+ return false;
61
+ }
62
+ }
63
+
64
+ function getPortPid(port) {
65
+ try {
66
+ const os = platform();
67
+ if (os === "win32") {
68
+ const out = execSync(`netstat -ano -p TCP 2>nul`, { encoding: "utf8" });
69
+ const regex = new RegExp(
70
+ String.raw`TCP\s+[^\s]*:${port}\s+[^\s]*:0\s+LISTENING\s+(\d+)`,
71
+ "i",
72
+ );
73
+ const match = out.match(regex);
74
+ return match ? Number.parseInt(match[1], 10) : null;
75
+ }
76
+ } catch {
77
+ return null;
78
+ }
79
+ }
80
+
81
+ function killProcess(pid) {
82
+ try {
83
+ if (platform() === "win32") {
84
+ execSync(`taskkill /F /PID ${pid} /T`, { stdio: "ignore" });
85
+ } else {
86
+ process.kill(pid, "SIGKILL");
87
+ }
88
+ return true;
89
+ } catch {
90
+ return false;
91
+ }
92
+ }
93
+
94
+ function httpGet(url, timeoutMs = 1000) {
95
+ return new Promise((resolve) => {
96
+ const req = http.get(url, (res) => {
97
+ let body = "";
98
+ res.on("data", (d) => (body += d));
99
+ res.on("end", () => resolve({ ok: res.statusCode === 200, body }));
100
+ });
101
+ req.on("error", () => resolve({ ok: false }));
102
+ req.setTimeout(timeoutMs, () => {
103
+ req.destroy();
104
+ resolve({ ok: false });
105
+ });
106
+ });
107
+ }
108
+
109
+ async function waitForPort(timeoutMs = 15000) {
110
+ const deadline = Date.now() + timeoutMs;
111
+ while (Date.now() < deadline) {
112
+ const { ok, body } = await httpGet(
113
+ `http://localhost:${PORT}/json/version`,
114
+ 1500,
115
+ );
116
+ if (ok) {
117
+ try {
118
+ const { webSocketDebuggerUrl } = JSON.parse(body);
119
+ const wsPath = new URL(webSocketDebuggerUrl).pathname;
120
+ writeFileSync(ACTIVE_PORT, `${PORT}\n${wsPath}`, "utf8");
121
+ return true;
122
+ } catch {}
123
+ }
124
+ await new Promise((r) => setTimeout(r, 400));
125
+ }
126
+ return false;
127
+ }
128
+
129
+ // ─── Nuke any Chrome holding port 9222 ────────────────────────────
130
+
131
+ function nukePort() {
132
+ // Kill by PID file
133
+ const pid = isRunning();
134
+ if (pid) killProcess(pid);
135
+
136
+ // Kill by port (ghost)
137
+ const portPid = getPortPid(PORT);
138
+ if (portPid && portPid !== pid) killProcess(portPid);
139
+
140
+ // Clean up files
141
+ try {
142
+ unlinkSync(PID_FILE);
143
+ } catch {}
144
+ try {
145
+ unlinkSync(ACTIVE_PORT);
146
+ } catch {}
147
+ try {
148
+ unlinkSync(MODE_FILE);
149
+ } catch {}
150
+
151
+ // Wait for port to free
152
+ return new Promise((resolve) => {
153
+ const start = Date.now();
154
+ const check = () => {
155
+ const p = getPortPid(PORT);
156
+ if (!p) return resolve(true);
157
+ if (Date.now() - start > 5000) return resolve(false);
158
+ killProcess(p);
159
+ setTimeout(check, 500);
160
+ };
161
+ check();
162
+ });
163
+ }
164
+
165
+ // ─── Main ─────────────────────────────────────────────────────────
166
+
167
+ async function main() {
168
+ const arg = process.argv[2];
169
+
170
+ if (arg === "--kill") {
171
+ await nukePort();
172
+ console.log("Chrome stopped.");
173
+ return;
174
+ }
175
+
176
+ if (arg === "--status") {
177
+ const pid = isRunning() || getPortPid(PORT);
178
+ if (pid) {
179
+ console.log(`Running — pid ${pid}, port ${PORT}`);
180
+ } else {
181
+ console.log("Not running.");
182
+ }
183
+ return;
184
+ }
185
+
186
+ // Nuke anything on the port before launching
187
+ console.log("Stopping any existing Chrome on port 9222...");
188
+ await nukePort();
189
+
190
+ const CHROME_EXE = process.env.CHROME_PATH || findChrome();
191
+ if (!CHROME_EXE) {
192
+ console.error("Chrome not found. Set CHROME_PATH env var.");
193
+ process.exit(1);
194
+ }
195
+
196
+ mkdirSync(PROFILE_DIR, { recursive: true });
197
+
198
+ // Visible-only flags — NO --headless
199
+ const flags = [
200
+ `--remote-debugging-port=${PORT}`,
201
+ "--disable-features=DevToolsPrivacyUI",
202
+ "--no-first-run",
203
+ "--no-default-browser-check",
204
+ "--disable-default-apps",
205
+ `--user-data-dir=${PROFILE_DIR}`,
206
+ "--profile-directory=Default",
207
+ "--new-window",
208
+ "about:blank",
209
+ ];
210
+
211
+ console.log("Launching visible Chrome...");
212
+ const proc = spawn(CHROME_EXE, flags, {
213
+ detached: true,
214
+ stdio: "ignore",
215
+ });
216
+ proc.unref();
217
+
218
+ const chromePid = proc.pid;
219
+ writeFileSync(PID_FILE, String(chromePid));
220
+ writeFileSync(MODE_FILE, "visible", "utf8");
221
+ console.log(`Chrome PID: ${chromePid}`);
222
+
223
+ const ready = await waitForPort();
224
+ if (!ready) {
225
+ console.error("Chrome did not become ready within 15s.");
226
+ process.exit(1);
227
+ }
228
+
229
+ console.log("Visible Chrome ready on port 9222.");
230
+ console.log("Keep this terminal open to keep Chrome alive.");
231
+ }
232
+
233
+ main();