npm - @apmantza/greedysearch-pi - Versions diffs - 1.1.7 → 1.2.0 - Mend

@apmantza/greedysearch-pi 1.1.7 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -4,6 +4,13 @@ Pi extension that adds a `greedy_search` tool — fans out queries to Perplexity
 Forked from [GreedySearch-claude](https://github.com/apmantza/GreedySearch-claude).
+## What's New (v1.2.0)
+- **Fixed parallel search race condition** — multiple `greedy_search` calls can now run concurrently without tab conflicts
+- **Improved Bing Copilot verification** — better auto-handling of Turnstile challenges and modal dialogs
+- **Added test suite** — run `./test.sh` to verify all modes work correctly
+- **Atomic port file writes** — prevents corruption when multiple processes connect to Chrome
 ## Install
 ```bash
@@ -128,6 +135,22 @@ Check status:
 node ~/.pi/agent/git/GreedySearch-pi/launch.mjs --status
 ```
+## Testing
+Run the test suite to verify everything works:
+```bash
+./test.sh           # full suite (~3-4 min)
+./test.sh quick     # skip parallel tests (~1 min)
+./test.sh parallel  # parallel race condition tests only
+```
+Tests verify:
+- Single engine mode (perplexity, bing, google)
+- Sequential "all" mode searches
+- Parallel "all" mode (5 concurrent searches) — detects tab race conditions
+- Synthesis mode with Gemini
 ## Troubleshooting
 ### "Chrome not found"
@@ -144,7 +167,10 @@ node ~/.pi/agent/git/GreedySearch-pi/launch.mjs
 ```
 ### Google / Bing "verify you're human"
-The extension auto-clicks simple verification buttons. For CAPTCHAs, solve manually in the Chrome window that opens.
+The extension auto-clicks verification buttons and Cloudflare Turnstile challenges. For hard CAPTCHAs (image puzzles), solve manually in the Chrome window that opens.
+### Parallel searches failing
+Earlier versions shared Chrome tabs between concurrent searches, causing `ERR_ABORTED` errors. Version 1.2.0+ creates fresh tabs for each search, allowing safe parallel execution.
 ### Search hangs
 Chrome may be unresponsive. Restart it with `launch.mjs --kill` then `launch.mjs`.

package/extractors/consent.mjs CHANGED Viewed

@@ -36,12 +36,42 @@ const VERIFY_DETECT_JS = `
     if (msVerify) { msVerify.click(); return 'clicked-ms-verify:' + (msVerify.innerText?.trim() || msVerify.value); }
   }
+  // --- Bing Copilot / Microsoft "Verify you're human" interstitial ---
+  // Copilot sometimes shows a modal with "Continue" or "Verify" before allowing queries
+  if (url.includes('copilot.microsoft.com') || url.includes('bing.com/chat')) {
+    // Look for verification modal/dialog
+    var modal = document.querySelector('[role="dialog"], .b_modal, .bnp_hfly, [class*="verify"], [class*="challenge"]');
+    if (modal) {
+      // Find any actionable button in the modal
+      var modalBtns = Array.from(modal.querySelectorAll('button, a[role="button"], input[type="submit"]'));
+      var actionBtn = modalBtns.find(b => /^(continue|verify|submit|next|i agree|accept|got it)$/i.test(b.innerText?.trim() || b.value || ''));
+      if (actionBtn) { actionBtn.click(); return 'clicked-copilot-modal:' + actionBtn.innerText.trim(); }
+    }
+    // Check for Turnstile iframe (Copilot uses Cloudflare Turnstile)
+    var turnstileIframe = document.querySelector('iframe[src*="challenges.cloudflare.com"], iframe[src*="turnstile"], iframe[title*="challenge"], iframe[title*="Widget"]');
+    if (turnstileIframe) {
+      // Try clicking the iframe container or nearby checkbox
+      var container = turnstileIframe.closest('[class*="turnstile"], [class*="challenge"], [id*="turnstile"]') || turnstileIframe.parentElement;
+      if (container) {
+        var checkbox = container.querySelector('input[type="checkbox"]');
+        if (checkbox && !checkbox.checked) {
+          checkbox.click();
+          return 'clicked-turnstile-in-iframe';
+        }
+        // Click the container itself (Turnstile often captures clicks on parent)
+        container.click();
+        return 'clicked-turnstile-container-near-iframe';
+      }
+    }
+  }
   // --- Cloudflare Turnstile (used by Copilot and many sites) ---
   // Turnstile widget in iframe
   var turnstileIframe = document.querySelector('iframe[src*="challenges.cloudflare.com"], iframe[src*="turnstile"]');
   if (turnstileIframe) {
     // Try to find and click the checkbox inside the iframe's container
-    var turnstileCheckbox = document.querySelector('#cf-turnstile-response, [data-turnstile-callback] input, .cf-turnstile input[type=checkbox]');
+    var turnstileCheckbox = document.querySelector('#cf-turnstile-response, [data-turnstile-callback] input, .cf-turnstile input[type="checkbox"]');
     if (turnstileCheckbox && !turnstileCheckbox.checked) {
       turnstileCheckbox.click();
       return 'clicked-turnstile-checkbox';
@@ -95,19 +125,26 @@ const VERIFY_RETRY_JS = `
   var isVerifyPage = url.includes('/sorry/') ||
                      url.includes('challenges.cloudflare.com') ||
                      url.includes('login.microsoftonline.com') ||
-                     document.querySelector('#challenge-running, #challenge-stage, .cf-turnstile');
+                     document.querySelector('#challenge-running, #challenge-stage, .cf-turnstile, [role="dialog"]');
   if (!isVerifyPage) return 'cleared';
   // Try clicking any verify/continue button again
   var btns = Array.from(document.querySelectorAll('button, input[type=submit], a[role=button]'));
-  var btn = btns.find(b => /^(verify|continue|next|i am human|not a robot)$/i.test(b.innerText?.trim() || b.value || ''));
+  var btn = btns.find(b => /^(verify|continue|next|i am human|not a robot|submit)$/i.test(b.innerText?.trim() || b.value || ''));
   if (btn) { btn.click(); return 'clicked:' + (btn.innerText?.trim() || btn.value); }
   // Try Turnstile checkbox
   var cf = document.querySelector('#cf-stage input[type="checkbox"], .cf-turnstile input');
   if (cf && !cf.checked) { cf.click(); return 'clicked-turnstile'; }
+  // Check for modal dialog with continue button (Copilot interstitial)
+  var modal = document.querySelector('[role="dialog"], .b_modal, [class*="verify"]');
+  if (modal) {
+    var modalBtn = modal.querySelector('button, a[role="button"]');
+    if (modalBtn) { modalBtn.click(); return 'clicked-modal-btn:' + modalBtn.innerText.trim(); }
+  }
   return 'still-verifying';
 })()
 `;
@@ -119,6 +156,17 @@ export async function dismissConsent(tab, cdp) {
   }
 }
+// Get iframe bounding box for coordinate-based clicking (for cross-origin Turnstile)
+const GET_IFRAME_CENTER_JS = `
+(function() {
+  var iframe = document.querySelector('iframe[src*="challenges.cloudflare.com"], iframe[src*="turnstile"], iframe[title*="challenge"], iframe[title*="Widget"]');
+  if (!iframe) return null;
+  var rect = iframe.getBoundingClientRect();
+  // Click near the center-left where the checkbox usually is
+  return JSON.stringify({ x: rect.left + 30, y: rect.top + rect.height / 2 });
+})()
+`;
 // Returns 'clear' | 'clicked' | 'needs-human'
 export async function handleVerification(tab, cdp, waitMs = 60000) {
   const result = await cdp(['eval', tab, VERIFY_DETECT_JS]).catch(() => null);
@@ -158,6 +206,17 @@ export async function handleVerification(tab, cdp, waitMs = 60000) {
         await new Promise(r => setTimeout(r, 2000));
       }
+      // If verification is stuck, try clicking the Turnstile iframe by coordinates
+      const iframeCenter = await cdp(['eval', tab, GET_IFRAME_CENTER_JS]).catch(() => null);
+      if (iframeCenter && iframeCenter !== 'null') {
+        try {
+          const { x, y } = JSON.parse(iframeCenter);
+          process.stderr.write(`[greedysearch] Trying coordinate click on Turnstile iframe at (${x}, ${y})...\n`);
+          await cdp(['clickxy', tab, String(x), String(y)]);
+          await new Promise(r => setTimeout(r, 3000));
+        } catch {}
+      }
       await new Promise(r => setTimeout(r, 1500));
     }
@@ -166,5 +225,24 @@ export async function handleVerification(tab, cdp, waitMs = 60000) {
     return 'needs-human';
   }
+  // Detection didn't find anything initially, but check for Turnstile iframe with coordinates
+  if (result === 'null' || !result) {
+    const iframeCenter = await cdp(['eval', tab, GET_IFRAME_CENTER_JS]).catch(() => null);
+    if (iframeCenter && iframeCenter !== 'null') {
+      process.stderr.write(`[greedysearch] Found Turnstile iframe, attempting coordinate click...\n`);
+      try {
+        const { x, y } = JSON.parse(iframeCenter);
+        await cdp(['clickxy', tab, String(x), String(y)]);
+        await new Promise(r => setTimeout(r, 3000));
+        // Check if it worked
+        const cleared = await cdp(['eval', tab, VERIFY_RETRY_JS]).catch(() => null);
+        if (cleared === 'cleared' || cleared === 'null') {
+          return 'clicked';
+        }
+      } catch {}
+    }
+  }
   return 'clear';
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@apmantza/greedysearch-pi",
-  "version": "1.1.7",
+  "version": "1.2.0",
   "description": "Pi extension: browser-automation tool that searches Perplexity, Bing Copilot, and Google AI in parallel, extracts answers and sources via CDP, with optional Gemini synthesis — grounded AI answers from real browser interactions.",
   "type": "module",
   "keywords": [

package/search.mjs CHANGED Viewed

@@ -22,7 +22,7 @@
 import { spawn } from 'child_process';
 import { fileURLToPath } from 'url';
 import { join, dirname } from 'path';
-import { readFileSync, existsSync, writeFileSync, mkdirSync } from 'fs';
+import { readFileSync, existsSync, writeFileSync, mkdirSync, renameSync, unlinkSync } from 'fs';
 import { tmpdir, homedir } from 'os';
 import http from 'http';
@@ -305,7 +305,39 @@ function probeGreedyChrome(timeoutMs = 3000) {
 // Write (or refresh) the DevToolsActivePort file for the GreedySearch Chrome so
 // cdp.mjs always connects to the right port rather than the user's main Chrome.
+// Uses atomic write (write to temp + rename) to prevent corruption from parallel processes.
 async function refreshPortFile() {
+  const LOCK_FILE = ACTIVE_PORT_FILE + '.lock';
+  const TEMP_FILE = ACTIVE_PORT_FILE + '.tmp';
+  // Simple file-based lock with timeout (prevents parallel writes from corrupting the port file)
+  const lockAcquired = await new Promise((resolve) => {
+    const start = Date.now();
+    const tryLock = () => {
+      try {
+        writeFileSync(LOCK_FILE, `${process.pid}`, 'utf8');
+        resolve(true);
+      } catch {
+        // Lock file exists - check if stale (older than 5 seconds)
+        try {
+          const lockTime = parseInt(readFileSync(LOCK_FILE, 'utf8'));
+          if (Date.now() - lockTime > 5000) {
+            // Stale lock - overwrite
+            writeFileSync(LOCK_FILE, `${process.pid}`, 'utf8');
+            resolve(true);
+          } else if (Date.now() - start < 1000) {
+            setTimeout(tryLock, 50);
+          } else {
+            resolve(false); // Give up after 1s
+          }
+        } catch {
+          setTimeout(tryLock, 50);
+        }
+      }
+    };
+    tryLock();
+  });
   try {
     const body = await new Promise((res, rej) => {
       const req = http.get(`http://localhost:${GREEDY_PORT}/json/version`, r => {
@@ -318,8 +350,19 @@ async function refreshPortFile() {
     });
     const { webSocketDebuggerUrl } = JSON.parse(body);
     const wsPath = new URL(webSocketDebuggerUrl).pathname;
-    writeFileSync(ACTIVE_PORT_FILE, `${GREEDY_PORT}\n${wsPath}`, 'utf8');
+    // Atomic write: write to temp file, then rename
+    if (lockAcquired) {
+      writeFileSync(TEMP_FILE, `${GREEDY_PORT}\n${wsPath}`, 'utf8');
+      try { unlinkSync(ACTIVE_PORT_FILE); } catch {}
+      renameSync(TEMP_FILE, ACTIVE_PORT_FILE);
+    }
   } catch { /* best-effort — launch.mjs already wrote the file on first start */ }
+  finally {
+    if (lockAcquired) {
+      try { unlinkSync(LOCK_FILE); } catch {}
+    }
+  }
 }
 async function ensureChrome() {
@@ -377,25 +420,14 @@ async function main() {
   if (engine === 'all') {
     await cdp(['list']); // refresh pages cache
-    // Assign tabs: reuse existing engine tabs from cache, open new ones where needed.
-    // Engine tabs are never closed — keeping them alive preserves session cookies and
-    // reduces the chance of verification challenges on subsequent searches.
+    // PARALLEL-SAFE: Always create fresh tabs for each engine to avoid race conditions
+    // when multiple "all" searches run concurrently. Previously, reusing cached tabs
+    // caused ERR_ABORTED and Uncaught errors as multiple processes fought over the same tab.
     const tabs = [];
-    let blankReused = false;
-    for (const e of ALL_ENGINES) {
-      const existing = getTabFromCache(e);
-      if (existing) {
-        tabs.push(existing);
-      } else if (!blankReused) {
-        const tab = await getOrReuseBlankTab();
-        tabs.push(tab);
-        blankReused = true;
-      } else {
-        await new Promise(r => setTimeout(r, 500));
-        const tab = await openNewTab();
-        tabs.push(tab);
-      }
+    for (let i = 0; i < ALL_ENGINES.length; i++) {
+      if (i > 0) await new Promise(r => setTimeout(r, 300)); // small delay between tab opens
+      const tab = await openNewTab();
+      tabs.push(tab);
     }
     // All tabs assigned — run extractors in parallel

package/test.sh ADDED Viewed

@@ -0,0 +1,221 @@
+#!/usr/bin/env bash
+# test.sh — GreedySearch test suite
+#
+# Usage:
+#   ./test.sh           # run all tests
+#   ./test.sh parallel  # run only parallel test
+#   ./test.sh quick     # skip slow tests (parallel + stress)
+#
+# Tests verify:
+#   - No crashes/errors from extractors
+#   - All engines complete in "all" mode
+#   - Correct queries in results (not mixed up)
+#   - Parallel searches don't race on shared tabs
+set -e
+cd "$(dirname "$0")"
+RESULTS_DIR="results/test_$(date +%Y%m%d_%H%M%S)"
+mkdir -p "$RESULTS_DIR"
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+PASS=0
+FAIL=0
+pass() { PASS=$((PASS+1)); echo -e "  ${GREEN}✓${NC} $1"; }
+fail() { FAIL=$((FAIL+1)); echo -e "  ${RED}✗${NC} $1"; }
+check_no_errors() {
+  local file="$1"
+  local errors=$(node -e "
+    const d = JSON.parse(require('fs').readFileSync('$file','utf8'));
+    const errs = [];
+    if (d.perplexity?.error) errs.push('perplexity: ' + d.perplexity.error);
+    if (d.bing?.error) errs.push('bing: ' + d.bing.error);
+    if (d.google?.error) errs.push('google: ' + d.google.error);
+    console.log(errs.join('; ') || '');
+  " 2>/dev/null)
+  echo "$errors"
+}
+check_correct_queries() {
+  local file="$1"
+  local expected="$2"
+  local result=$(node -e "
+    const d = JSON.parse(require('fs').readFileSync('$file','utf8'));
+    const queries = [d.perplexity?.query, d.bing?.query, d.google?.query].filter(Boolean);
+    const allMatch = queries.every(q => q === '$expected');
+    console.log(allMatch ? 'ok' : 'queries: ' + queries.join(', '));
+  " 2>/dev/null)
+  echo "$result"
+}
+check_all_engines_completed() {
+  local file="$1"
+  local result=$(node -e "
+    const d = JSON.parse(require('fs').readFileSync('$file','utf8'));
+    const hasAnswer = (e) => d[e]?.answer && d[e].answer.length > 10;
+    const engines = ['perplexity', 'bing', 'google'];
+    const ok = engines.every(hasAnswer);
+    console.log(ok ? 'ok' : 'missing: ' + engines.filter(e => !hasAnswer(e)).join(', '));
+  " 2>/dev/null)
+  echo "$result"
+}
+# ─────────────────────────────────────────────────────────
+echo -e "\n${YELLOW}═══ GreedySearch Test Suite ═══${NC}\n"
+# ── Test 1: Single engine mode ──────────────────────────
+if [[ "$1" != "parallel" ]]; then
+  echo "Test 1: Single engine mode"
+  for engine in perplexity bing google; do
+    outfile="$RESULTS_DIR/single_${engine}.json"
+    node search.mjs "$engine" "explain $engine attention mechanism" --out "$outfile" 2>/dev/null
+    if [[ $? -eq 0 && -f "$outfile" ]]; then
+      errors=$(check_no_errors "$outfile")
+      if [[ -z "$errors" ]]; then
+        pass "$engine completed without errors"
+      else
+        fail "$engine errors: $errors"
+      fi
+    else
+      fail "$engine failed to run"
+    fi
+  done
+fi
+# ── Test 2: Sequential "all" mode ───────────────────────
+if [[ "$1" != "parallel" ]]; then
+  echo -e "\nTest 2: Sequential 'all' mode (3 runs)"
+  for i in 1 2 3; do
+    outfile="$RESULTS_DIR/seq_${i}.json"
+    query="LLM inference optimization techniques $i"
+    node search.mjs all "$query" --out "$outfile" 2>/dev/null
+    if [[ $? -eq 0 && -f "$outfile" ]]; then
+      errors=$(check_no_errors "$outfile")
+      if [[ -z "$errors" ]]; then
+        pass "Run $i: no errors"
+      else
+        fail "Run $i errors: $errors"
+      fi
+      correct=$(check_correct_queries "$outfile" "$query")
+      if [[ "$correct" == "ok" ]]; then
+        pass "Run $i: correct queries"
+      else
+        fail "Run $i: $correct"
+      fi
+    else
+      fail "Run $i: failed to run"
+    fi
+  done
+fi
+# ── Test 3: Parallel "all" mode (race condition test) ───
+if [[ "$1" != "quick" && "$1" != "sequential" ]]; then
+  echo -e "\nTest 3: Parallel 'all' mode (5 concurrent searches)"
+  PARALLEL_QUERIES=(
+    "what are transformer architectures in LLMs"
+    "explain RLHF fine-tuning process"
+    "difference between GPT and BERT models"
+    "how does chain of thought prompting work"
+    "what is retrieval augmented generation"
+  )
+  PIDS=()
+  for i in "${!PARALLEL_QUERIES[@]}"; do
+    outfile="$RESULTS_DIR/parallel_${i}.json"
+    query="${PARALLEL_QUERIES[$i]}"
+    node search.mjs all "$query" --out "$outfile" 2>/dev/null &
+    PIDS+=($!)
+  done
+  # Wait for all to complete
+  FAILED=0
+  for i in "${!PIDS[@]}"; do
+    if ! wait "${PIDS[$i]}"; then
+      fail "Parallel $i: process exited with error"
+      ((FAILED++))
+    fi
+  done
+  if [[ $FAILED -eq 0 ]]; then
+    # Check results
+    for i in "${!PARALLEL_QUERIES[@]}"; do
+      outfile="$RESULTS_DIR/parallel_${i}.json"
+      query="${PARALLEL_QUERIES[$i]}"
+      if [[ -f "$outfile" ]]; then
+        errors=$(check_no_errors "$outfile")
+        if [[ -z "$errors" ]]; then
+          pass "Parallel $i: no errors"
+        else
+          fail "Parallel $i: $errors"
+        fi
+        correct=$(check_correct_queries "$outfile" "$query")
+        if [[ "$correct" == "ok" ]]; then
+          pass "Parallel $i: correct query"
+        else
+          fail "Parallel $i: $correct (TAB RACE DETECTED)"
+        fi
+        all_done=$(check_all_engines_completed "$outfile")
+        if [[ "$all_done" == "ok" ]]; then
+          pass "Parallel $i: all engines answered"
+        else
+          fail "Parallel $i: $all_done"
+        fi
+      else
+        fail "Parallel $i: no result file"
+      fi
+    done
+  fi
+fi
+# ── Test 4: Synthesis mode ──────────────────────────────
+if [[ "$1" != "parallel" && "$1" != "quick" ]]; then
+  echo -e "\nTest 4: Synthesis mode"
+  outfile="$RESULTS_DIR/synthesis.json"
+  node search.mjs all "what is Mixture of Experts in neural networks" --synthesize --out "$outfile" 2>/dev/null
+  if [[ $? -eq 0 && -f "$outfile" ]]; then
+    has_synthesis=$(node -e "
+      const d = JSON.parse(require('fs').readFileSync('$outfile','utf8'));
+      console.log(d._synthesis?.answer ? 'ok' : 'missing');
+    " 2>/dev/null)
+    if [[ "$has_synthesis" == "ok" ]]; then
+      pass "Synthesis completed"
+    else
+      fail "Synthesis missing"
+    fi
+    errors=$(check_no_errors "$outfile")
+    if [[ -z "$errors" ]]; then
+      pass "Synthesis: no engine errors"
+    else
+      fail "Synthesis: $errors"
+    fi
+  else
+    fail "Synthesis failed to run"
+  fi
+fi
+# ─────────────────────────────────────────────────────────
+echo -e "\n${YELLOW}═══ Results ═══${NC}"
+echo -e "  ${GREEN}Passed: $PASS${NC}"
+[[ $FAIL -gt 0 ]] && echo -e "  ${RED}Failed: $FAIL${NC}" || echo "  Failed: 0"
+echo "  Results in: $RESULTS_DIR"
+echo ""
+[[ $FAIL -eq 0 ]] && exit 0 || exit 1