npm - @apmantza/greedysearch-pi - Versions diffs - 1.4.1 → 1.4.2 - Mend

@apmantza/greedysearch-pi 1.4.1 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/README.md +219 -208
package/cdp.mjs +16 -16
package/extractors/bing-copilot.mjs +12 -21
package/extractors/consent.mjs +10 -3
package/extractors/gemini.mjs +12 -53
package/extractors/google-ai.mjs +7 -10
package/extractors/perplexity.mjs +28 -31
package/extractors/selectors.mjs +52 -52
package/index.ts +623 -623
package/launch.mjs +33 -33
package/newfeaturesideas.md +105 -0
package/package.json +1 -1
package/skills/greedy-search/SKILL.md +145 -145
package/test.sh +298 -298

package/launch.mjs CHANGED Viewed

@@ -5,8 +5,8 @@
 // the "Allow remote debugging?" dialog entirely. It runs on port 9222 so it doesn't
 // conflict with your main Chrome session (which may use port 9223).
 //
-// search.mjs passes CDP_PROFILE_DIR so cdp.mjs targets this dedicated Chrome
-// without ever touching the user's main Chrome DevToolsActivePort file.
+// search.mjs passes CDP_PROFILE_DIR so cdp.mjs targets this dedicated Chrome
+// without ever touching the user's main Chrome DevToolsActivePort file.
 //
 // Usage:
 //   node launch.mjs          — launch (or report if already running)
@@ -14,8 +14,8 @@
 //   node launch.mjs --status — check if running
 import { spawn } from 'child_process';
-import { existsSync, writeFileSync, readFileSync, mkdirSync, unlinkSync } from 'fs';
-import { tmpdir, platform } from 'os';
+import { existsSync, writeFileSync, readFileSync, mkdirSync, unlinkSync } from 'fs';
+import { tmpdir, platform } from 'os';
 import { join } from 'path';
 import http from 'http';
@@ -42,8 +42,8 @@ function findChrome() {
   return candidates.find(existsSync) || null;
 }
-const CHROME_FLAGS = [
-  `--remote-debugging-port=${PORT}`,
+const CHROME_FLAGS = [
+  `--remote-debugging-port=${PORT}`,
   '--disable-features=DevToolsPrivacyUI',      // suppresses "Allow remote debugging?" dialog
   '--no-first-run',
   '--no-default-browser-check',
@@ -97,21 +97,21 @@ async function writePortFile(timeoutMs = 15000) {
   return false;
 }
-// ---------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
 async function main() {
   const arg = process.argv[2];
-  if (arg === '--kill') {
-    const pid = isRunning();
-    if (pid) {
-      try { process.kill(pid, 'SIGTERM'); console.log(`Stopped Chrome (pid ${pid}).`); }
-      catch (e) { console.error(`Failed: ${e.message}`); }
-    } else {
-      console.log('GreedySearch Chrome is not running.');
-    }
-    return;
-  }
+  if (arg === '--kill') {
+    const pid = isRunning();
+    if (pid) {
+      try { process.kill(pid, 'SIGTERM'); console.log(`Stopped Chrome (pid ${pid}).`); }
+      catch (e) { console.error(`Failed: ${e.message}`); }
+    } else {
+      console.log('GreedySearch Chrome is not running.');
+    }
+    return;
+  }
   if (arg === '--status') {
     const pid = isRunning();
@@ -123,12 +123,12 @@ async function main() {
   // Already running?
   const existing = isRunning();
   if (existing) {
-    const ready = await writePortFile(5000);
-    if (ready) {
-      console.log(`GreedySearch Chrome already running (pid ${existing}, port ${PORT}).`);
-      console.log('Dedicated GreedySearch DevToolsActivePort is ready.');
-      return;
-    }
+    const ready = await writePortFile(5000);
+    if (ready) {
+      console.log(`GreedySearch Chrome already running (pid ${existing}, port ${PORT}).`);
+      console.log('Dedicated GreedySearch DevToolsActivePort is ready.');
+      return;
+    }
     // Stale PID — process alive but not Chrome on port 9223. Fall through to fresh launch.
     console.log(`Stale PID ${existing} detected (not Chrome on port ${PORT}) — launching fresh.`);
     try { unlinkSync(PID_FILE); } catch {}
@@ -152,15 +152,15 @@ async function main() {
   proc.unref();
   writeFileSync(PID_FILE, String(proc.pid));
-  // Wait for Chrome HTTP endpoint and build the dedicated DevToolsActivePort file
-  const portFileReady = await writePortFile();
-  if (!portFileReady) {
-    console.error('Chrome did not become ready within 15s.');
-    process.exit(1);
-  }
-  console.log(`Ready. No more "Allow remote debugging?" dialogs.`);
-  console.log('GreedySearch now uses its own isolated DevToolsActivePort file.');
-}
+  // Wait for Chrome HTTP endpoint and build the dedicated DevToolsActivePort file
+  const portFileReady = await writePortFile();
+  if (!portFileReady) {
+    console.error('Chrome did not become ready within 15s.');
+    process.exit(1);
+  }
+  console.log(`Ready. No more "Allow remote debugging?" dialogs.`);
+  console.log('GreedySearch now uses its own isolated DevToolsActivePort file.');
+}
 main();

package/newfeaturesideas.md ADDED Viewed

@@ -0,0 +1,105 @@
+# New Feature Ideas
+Ideas for future features — thinking from the perspective of an AI assistant using these tools.
+---
+## 1. Source Verification
+**Problem:** I get sources but can't verify if they're live, updated, or actually support the claimed content.
+```
+verify_sources({ urls: ["https://...", "https://..."] })
+→ [{ url, status: 200, title, snippet, lastModified, claim: "supports X" }]
+```
+**Use cases:**
+- Before citing a source, verify it's not 404
+- Check if a page actually contains the claimed information
+- Get last-modified dates to assess freshness
+---
+## 2. Incremental / Continuation Research
+**Problem:** After deep_research on "RAG vs fine-tuning", going deeper on just RAG means re-running everything with a new query and losing original context.
+```
+deep_research({ query: "RAG vs fine-tuning", ... })  // initial
+continue_research({ previousId: "...", query: "production RAG architectures" })  // goes deeper on RAG
+```
+**Use cases:**
+- Drill into a specific aspect after initial broad research
+- Build on previous results without re-fetching everything
+- Progressive disclosure of complex topics
+---
+## 3. Multi-Query Synthesis
+**Problem:** One query isn't enough for complex research. I chain multiple greedy_search calls manually.
+```
+multi_research({
+  queries: ["auth best practices", "NextAuth vs Clerk vs Lucia", "Next.js auth security"],
+  synthesize: true
+})
+```
+**Use cases:**
+- "Best auth for Next.js" needs multiple angles
+- Research with different facets (comparison, security, performance)
+- Casting a wider net when single query returns narrow results
+---
+## 4. Structured Extraction
+**Problem:** When researching "which libraries are maintained", I want tables (name, stars, last commit, license), not prose.
+```
+extract_structured({
+  query: "Python HTTP client libraries 2026",
+  schema: { name: "string", stars: "number", lastUpdated: "date", async: "boolean" }
+})
+```
+**Use cases:**
+- Library comparisons as structured data
+- Dependency audits
+- Feature matrices for tools/frameworks
+---
+## 5. Confidence Scoring on Specific Claims
+**Problem:** I say "high confidence" but it's hand-wavy. What if I could ask: "how confident are we that library X is actively maintained?"
+```
+verify_claim({
+  claim: "Prisma is actively maintained",
+  evidence: ["last commit: 2 weeks ago", "open issues: 45", "npm downloads: 2M/week"]
+})
+→ { confidence: 0.95, reasoning: "..." }
+```
+---
+## 6. Research Cache / History
+**Problem:** I do expensive deep_research, then the user asks a follow-up. I have to re-run everything.
+```
+get_research(id: "...")  // retrieve previous results
+list_research({ query: "RAG" })  // find related previous research
+```
+---
+## Priority
+1. **Source verification** — high value, relatively simple, fixes trust gap
+2. **Multi-query synthesis** — high value, complex but powerful
+3. **Incremental research** — medium value, nice UX improvement
+4. **Structured extraction** — medium value, specialized use cases

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@apmantza/greedysearch-pi",
-  "version": "1.4.1",
+  "version": "1.4.2",
   "description": "Pi extension: browser-automation tool that searches Perplexity, Bing Copilot, and Google AI in parallel, extracts answers and sources via CDP, with optional Gemini synthesis — grounded AI answers from real browser interactions.",
   "type": "module",
   "keywords": [

package/skills/greedy-search/SKILL.md CHANGED Viewed

@@ -1,145 +1,145 @@
----
-name: greedy-search
-description: Multi-engine AI web search — greedy_search, deep_research, and coding_task. Use for high-quality research where training data may be stale or single-engine results are insufficient.
----
-# GreedySearch Tools
-## Tool Overview
-| Tool | Speed | Use for |
-|------|-------|---------|
-| `greedy_search` | 15-90s | Quick lookups, comparisons, debugging errors |
-| `deep_research` | 60-120s | Architecture decisions, thorough research, source-backed answers |
-| `coding_task` | 60-180s | Second opinions on code, reviews, debugging tricky issues |
-## When to Use Which
-- **`greedy_search`** — Default. Fast enough for most things. Use when you need current info.
-- **`deep_research`** — When the answer *matters*. Gives you a structured document with confidence scores, deduplicated sources ranked by consensus, Gemini synthesis, AND actual content from top sources.
-- **`coding_task`** — When you need a "second opinion" on hard problems. Best for `debug` and `plan` modes on tricky issues.
----
-# greedy_search
-Multi-engine AI web search with streaming progress.
-```greedy_search({ query: "what changed in React 19", engine: "all" })```
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `query` | string | required | The search question |
-| `engine` | string | `"all"` | `all`, `perplexity`, `bing`, `google`, `gemini` |
-| `synthesize` | boolean | `false` | Synthesize via Gemini |
-| `fullAnswer` | boolean | `false` | Complete answer vs ~300 char summary |
-**When to use:** Quick lookups, error messages, comparing tools, "what's new in X".
----
-# deep_research
-Comprehensive research with source fetching and synthesis. Returns a structured document.
-```deep_research({ query: "RAG vs fine-tuning for production" })```
-Returns:
-- Full answers from all 3 engines (Perplexity, Bing, Google)
-- Gemini synthesis combining all perspectives
-- Deduplicated sources ranked by consensus (3/3 > 2/3 > 1/3)
-- Fetched content from top 5 sources (no CDP — uses native fetch)
-- Confidence metadata (which engines responded, consensus score)
-**When to use:** Architecture decisions, "which library should I use", research for a writeup, anything where you need source-backed confidence.
----
-# coding_task
-Browser-based coding assistant using Gemini and/or Copilot.
-```coding_task({ task: "debug this race condition", mode: "debug", engine: "all" })```
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `task` | string | required | The coding task/question |
-| `engine` | string | `"gemini"` | `gemini`, `copilot`, or `all` |
-| `mode` | string | `"code"` | See modes below |
-| `context` | string | — | Code snippet to include |
-**Modes:**
-| Mode | Use when |
-|------|----------|
-| `debug` | Stuck on a tricky bug. Fresh eyes catch different failure modes. |
-| `plan` | About to refactor something big. Gemini plays devil's advocate. |
-| `review` | Code review before merge. High-stakes code benefits from second opinion. |
-| `test` | Need edge cases the author missed. |
-| `code` | Just need the code written (but you can probably do this yourself faster). |
-**When to use:** Debugging tricky issues, planning major refactors, security-critical reviews. **Skip for** simple code generation — you're faster.
-## Greedy Search vs Built-in Web Search
-| | `web_search` | `greedy_search` |
-|---|---|---|
-| Speed | Instant (~2s) | 15-60s (one engine) / 30-90s (all engines) |
-| Quality | Good for simple lookups | Higher — 3 AI engines cross-verify |
-| Synthesis | Single engine answer | Optional Gemini synthesis (cleanest answer) |
-| Use for | Quick facts, simple questions | Research, decisions, complex topics |
-**Rule of thumb:** Use `web_search` for quick facts. Use `greedy_search` when the answer matters — architecture decisions, comparing libraries, understanding new releases, debugging tricky errors.
-## When to Use
-- **Version-specific changes** — "What changed in React 19?" / "Breaking changes in FastAPI 0.100"
-- **Choosing between tools** — "Prisma vs Drizzle in 2026" / "Best auth library for Next.js 15"
-- **Debugging** — User pastes an error message or stack trace
-- **Research tasks** — When you need to synthesize information from multiple sources
-- **Best practices** — "How to structure a monorepo" / "Auth patterns for SaaS"
-- **Anything where training data might be stale** — 2025+, 2026+, "latest", "current", "still maintained"
-## Engine Selection
-```greedy_search({ query: "what changed in React 19", engine: "all" })```
-| Engine | Latency | Best for |
-|---|---|---|
-| `all` (default) | 30-90s | Highest confidence — all 3 engines in parallel |
-| `perplexity` | 15-30s | Technical Q&A, code explanations, documentation |
-| `bing` | 15-30s | Recent news, Microsoft ecosystem |
-| `google` | 15-30s | Broad coverage, multiple perspectives |
-| `gemini` | 15-30s | Google's perspective, different training data |
-Use a single engine when speed matters and the question isn't contentious.
-## Synthesis Mode
-For complex research questions, use `synthesize: true` with `engine: "all"`:
-```greedy_search({ query: "best auth patterns for SaaS in 2026", engine: "all", synthesize: true })```
-This deduplicates sources across engines and feeds them to Gemini for one clean, synthesized answer. Adds ~30s but produces the highest quality output — ideal for research tasks where you'd otherwise need to parse 3 separate answers.
-Use synthesis when:
-- You need one definitive answer, not multiple perspectives
-- You're researching a topic to write about or make a decision
-- The question has a lot of noise and you want the signal
-Skip synthesis when:
-- You want to see where engines disagree (useful for controversial topics)
-- Speed matters
-## Full vs Short Answers
-Default mode returns ~300 char summaries to save tokens. Use `fullAnswer: true` when you need the complete response:
-```greedy_search({ query: "explain the React compiler", engine: "perplexity", fullAnswer: true })```
-## Interpreting Results
-- **All 3 agree** → High confidence, present as fact
-- **2 agree, 1 differs** → Likely correct but note the dissent
-- **All differ** → Present the different perspectives to the user
-- **Sources with `[3/3]` or `[2/3]`** → Cited by multiple engines, higher confidence
+---
+name: greedy-search
+description: Multi-engine AI web search — greedy_search, deep_research, and coding_task. Use for high-quality research where training data may be stale or single-engine results are insufficient.
+---
+# GreedySearch Tools
+## Tool Overview
+| Tool | Speed | Use for |
+|------|-------|---------|
+| `greedy_search` | 15-90s | Quick lookups, comparisons, debugging errors |
+| `deep_research` | 60-120s | Architecture decisions, thorough research, source-backed answers |
+| `coding_task` | 60-180s | Second opinions on code, reviews, debugging tricky issues |
+## When to Use Which
+- **`greedy_search`** — Default. Fast enough for most things. Use when you need current info.
+- **`deep_research`** — When the answer *matters*. Gives you a structured document with confidence scores, deduplicated sources ranked by consensus, Gemini synthesis, AND actual content from top sources.
+- **`coding_task`** — When you need a "second opinion" on hard problems. Best for `debug` and `plan` modes on tricky issues.
+---
+# greedy_search
+Multi-engine AI web search with streaming progress.
+```greedy_search({ query: "what changed in React 19", engine: "all" })```
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `query` | string | required | The search question |
+| `engine` | string | `"all"` | `all`, `perplexity`, `bing`, `google`, `gemini` |
+| `synthesize` | boolean | `false` | Synthesize via Gemini |
+| `fullAnswer` | boolean | `false` | Complete answer vs ~300 char summary |
+**When to use:** Quick lookups, error messages, comparing tools, "what's new in X".
+---
+# deep_research
+Comprehensive research with source fetching and synthesis. Returns a structured document.
+```deep_research({ query: "RAG vs fine-tuning for production" })```
+Returns:
+- Full answers from all 3 engines (Perplexity, Bing, Google)
+- Gemini synthesis combining all perspectives
+- Deduplicated sources ranked by consensus (3/3 > 2/3 > 1/3)
+- Fetched content from top 5 sources (no CDP — uses native fetch)
+- Confidence metadata (which engines responded, consensus score)
+**When to use:** Architecture decisions, "which library should I use", research for a writeup, anything where you need source-backed confidence.
+---
+# coding_task
+Browser-based coding assistant using Gemini and/or Copilot.
+```coding_task({ task: "debug this race condition", mode: "debug", engine: "all" })```
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `task` | string | required | The coding task/question |
+| `engine` | string | `"gemini"` | `gemini`, `copilot`, or `all` |
+| `mode` | string | `"code"` | See modes below |
+| `context` | string | — | Code snippet to include |
+**Modes:**
+| Mode | Use when |
+|------|----------|
+| `debug` | Stuck on a tricky bug. Fresh eyes catch different failure modes. |
+| `plan` | About to refactor something big. Gemini plays devil's advocate. |
+| `review` | Code review before merge. High-stakes code benefits from second opinion. |
+| `test` | Need edge cases the author missed. |
+| `code` | Just need the code written (but you can probably do this yourself faster). |
+**When to use:** Debugging tricky issues, planning major refactors, security-critical reviews. **Skip for** simple code generation — you're faster.
+## Greedy Search vs Built-in Web Search
+| | `web_search` | `greedy_search` |
+|---|---|---|
+| Speed | Instant (~2s) | 15-60s (one engine) / 30-90s (all engines) |
+| Quality | Good for simple lookups | Higher — 3 AI engines cross-verify |
+| Synthesis | Single engine answer | Optional Gemini synthesis (cleanest answer) |
+| Use for | Quick facts, simple questions | Research, decisions, complex topics |
+**Rule of thumb:** Use `web_search` for quick facts. Use `greedy_search` when the answer matters — architecture decisions, comparing libraries, understanding new releases, debugging tricky errors.
+## When to Use
+- **Version-specific changes** — "What changed in React 19?" / "Breaking changes in FastAPI 0.100"
+- **Choosing between tools** — "Prisma vs Drizzle in 2026" / "Best auth library for Next.js 15"
+- **Debugging** — User pastes an error message or stack trace
+- **Research tasks** — When you need to synthesize information from multiple sources
+- **Best practices** — "How to structure a monorepo" / "Auth patterns for SaaS"
+- **Anything where training data might be stale** — 2025+, 2026+, "latest", "current", "still maintained"
+## Engine Selection
+```greedy_search({ query: "what changed in React 19", engine: "all" })```
+| Engine | Latency | Best for |
+|---|---|---|
+| `all` (default) | 30-90s | Highest confidence — all 3 engines in parallel |
+| `perplexity` | 15-30s | Technical Q&A, code explanations, documentation |
+| `bing` | 15-30s | Recent news, Microsoft ecosystem |
+| `google` | 15-30s | Broad coverage, multiple perspectives |
+| `gemini` | 15-30s | Google's perspective, different training data |
+Use a single engine when speed matters and the question isn't contentious.
+## Synthesis Mode
+For complex research questions, use `synthesize: true` with `engine: "all"`:
+```greedy_search({ query: "best auth patterns for SaaS in 2026", engine: "all", synthesize: true })```
+This deduplicates sources across engines and feeds them to Gemini for one clean, synthesized answer. Adds ~30s but produces the highest quality output — ideal for research tasks where you'd otherwise need to parse 3 separate answers.
+Use synthesis when:
+- You need one definitive answer, not multiple perspectives
+- You're researching a topic to write about or make a decision
+- The question has a lot of noise and you want the signal
+Skip synthesis when:
+- You want to see where engines disagree (useful for controversial topics)
+- Speed matters
+## Full vs Short Answers
+Default mode returns ~300 char summaries to save tokens. Use `fullAnswer: true` when you need the complete response:
+```greedy_search({ query: "explain the React compiler", engine: "perplexity", fullAnswer: true })```
+## Interpreting Results
+- **All 3 agree** → High confidence, present as fact
+- **2 agree, 1 differs** → Likely correct but note the dissent
+- **All differ** → Present the different perspectives to the user
+- **Sources with `[3/3]` or `[2/3]`** → Cited by multiple engines, higher confidence