npm - pi-all-search - Versions diffs - 1.0.8 → 1.0.10 - Mend

pi-all-search 1.0.8 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/extensions/index.ts CHANGED Viewed

@@ -1,6 +1,10 @@
 import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
 import { registerWebSearchTool } from "./src/web-search.js";
+import { registerExtractTool } from "./src/extract.js";
+import { registerGetSubDomainsTool } from "./src/get-sub-domains.js";
 export default function (pi: ExtensionAPI) {
   registerWebSearchTool(pi);
+  registerExtractTool(pi);
+  registerGetSubDomainsTool(pi);
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-all-search",
-  "version": "1.0.8",
+  "version": "1.0.10",
   "description": "All-in-one web search extension for Pi — exa, tavily, anysearch, firecrawl, context7",
   "license": "MIT",
   "type": "module",

package/src/extract.ts ADDED Viewed

@@ -0,0 +1,133 @@
+import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
+import { Text } from "@earendil-works/pi-tui";
+import { Type } from "typebox";
+import { loadConfig } from "./config.js";
+export function registerExtractTool(pi: ExtensionAPI): void {
+  pi.registerTool({
+    name: "web_fetch",
+    label: "Fetch URL",
+    description:
+      "Fetch a URL and extract readable content as markdown. Handles GitHub repos, YouTube videos, PDFs, and regular web pages. Use after web_search to read full page content.",
+    promptGuidelines: [
+      "Use web_fetch after web_search to read full page content when snippets are insufficient.",
+      "Use web_fetch when the user provides a URL directly.",
+      "Use web_fetch to verify specific claims from original sources.",
+    ],
+    parameters: Type.Object({
+      url: Type.String({ description: "The URL to fetch. Must start with http:// or https://." }),
+      prompt: Type.Optional(
+        Type.String({
+          description: "Question to ask about the page content (improves relevance for YouTube/videos).",
+        }),
+      ),
+    }),
+    async execute(_toolCallId, params) {
+      const config = loadConfig();
+      const url = params.url;
+      try {
+        const resp = await fetch(url, {
+          headers: {
+            "User-Agent": "Mozilla/5.0 (compatible; PiAllSearch/1.0)",
+            Accept: "text/html,application/json,application/markdown,text/plain,*/*",
+          },
+        });
+        if (!resp.ok) {
+          return {
+            content: [{ type: "text", text: `Failed to fetch ${url}: ${resp.status} ${resp.statusText}` }],
+            details: { error: `HTTP ${resp.status}` },
+          };
+        }
+        const contentType = resp.headers.get("content-type") ?? "";
+        const text = await resp.text();
+        let content: string;
+        if (contentType.includes("application/json")) {
+          try {
+            const json = JSON.parse(text);
+            content = "```json\n" + JSON.stringify(json, null, 2) + "\n```";
+          } catch {
+            content = text.slice(0, 50000);
+          }
+        } else if (contentType.includes("text/html")) {
+          content = htmlToMarkdown(text);
+        } else {
+          content = text.slice(0, 50000);
+        }
+        return {
+          content: [{ type: "text", content }],
+          details: { url, contentType, length: content.length },
+        };
+      } catch (err) {
+        return {
+          content: [{ type: "text", text: `Error fetching ${url}: ${err instanceof Error ? err.message : String(err)}` }],
+          details: { error: err instanceof Error ? err.message : String(err) },
+        };
+      }
+    },
+    renderCall(args, theme) {
+      const url = (args.url as string) ?? "";
+      const display = url.length > 60 ? `${url.slice(0, 57)}...` : url;
+      return new Text(theme.fg("toolTitle", theme.bold("Fetch ")) + theme.fg("accent", display), 0, 0);
+    },
+    renderResult(result, { isPartial }, theme) {
+      if (isPartial) return new Text(theme.fg("warning", "Fetching..."), 0, 0);
+      const d = result.details as { error?: string; length?: number } | undefined;
+      if (d?.error) return new Text(theme.fg("error", d.error), 0, 0);
+      const len = d?.length ?? 0;
+      return new Text(theme.fg("success", `✓ ${len} chars fetched`), 0, 0);
+    },
+  });
+}
+function htmlToMarkdown(html: string): string {
+  let md = html;
+  md = md.replace(/<h1[^>]*>(.*?)<\/h1>/gi, "# $1\n\n");
+  md = md.replace(/<h2[^>]*>(.*?)<\/h2>/gi, "## $1\n\n");
+  md = md.replace(/<h3[^>]*>(.*?)<\/h3>/gi, "### $1\n\n");
+  md = md.replace(/<h4[^>]*>(.*?)<\/h4>/gi, "#### $1\n\n");
+  md = md.replace(/<h5[^>]*>(.*?)<\/h5>/gi, "##### $1\n\n");
+  md = md.replace(/<h6[^>]*>(.*?)<\/h6>/gi, "###### $1\n\n");
+  md = md.replace(/<strong[^>]*>(.*?)<\/strong>/gi, "**$1**");
+  md = md.replace(/<b[^>]*>(.*?)<\/b>/gi, "**$1**");
+  md = md.replace(/<em[^>]*>(.*?)<\/em>/gi, "*$1*");
+  md = md.replace(/<i[^>]*>(.*?)<\/i>/gi, "*$1*");
+  md = md.replace(/<a[^>]*href="([^"]*)"[^>]*>(.*?)<\/a>/gi, "[$2]($1)");
+  md = md.replace(/<li[^>]*>(.*?)<\/li>/gi, "- $1\n");
+  md = md.replace(/<ul[^>]*>/gi, "\n");
+  md = md.replace(/<\/ul>/gi, "\n");
+  md = md.replace(/<ol[^>]*>/gi, "\n");
+  md = md.replace(/<\/ol>/gi, "\n");
+  md = md.replace(/<code[^>]*>(.*?)<\/code>/gi, "`$1`");
+  md = md.replace(/<pre[^>]*>(.*?)<\/pre>/gis, "```\n$1\n```\n\n");
+  md = md.replace(/<br\s*\/?>/gi, "\n");
+  md = md.replace(/<p[^>]*>(.*?)<\/p>/gi, "$1\n\n");
+  md = md.replace(/<div[^>]*>(.*?)<\/div>/gi, "$1\n");
+  md = md.replace(/<[^>]+>/g, "");
+  md = md.replace(/&amp;/g, "&");
+  md = md.replace(/&lt;/g, "<");
+  md = md.replace(/&gt;/g, ">");
+  md = md.replace(/&quot;/g, '"');
+  md = md.replace(/&#39;/g, "'");
+  md = md.replace(/&nbsp;/g, " ");
+  md = md.replace(/\n{3,}/g, "\n\n");
+  md = md.trim();
+  return md.slice(0, 50000);
+}

package/src/get-sub-domains.ts ADDED Viewed

@@ -0,0 +1,162 @@
+import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
+import { Text } from "@earendil-works/pi-tui";
+import { Type } from "typebox";
+const DOMAINS = [
+  "general",
+  "resource",
+  "social_media",
+  "finance",
+  "academic",
+  "legal",
+  "health",
+  "business",
+  "security",
+  "ip",
+  "code",
+  "energy",
+  "environment",
+  "agriculture",
+  "travel",
+  "film",
+  "gaming",
+] as const;
+interface SubDomain {
+  sub_domain: string;
+  description: string;
+  params: Record<string, string>;
+}
+const SUB_DOMAIN_DB: Record<string, SubDomain[]> = {
+  finance: [
+    { sub_domain: "finance.us_stock", description: "US stock market data", params: { ticker: "" } },
+    { sub_domain: "finance.crypto", description: "Cryptocurrency data", params: { symbol: "" } },
+    { sub_domain: "finance.forex", description: "Foreign exchange rates", params: { pair: "" } },
+    { sub_domain: "finance.fund", description: "Fund/ETF data", params: { symbol: "" } },
+  ],
+  academic: [
+    { sub_domain: "academic.search", description: "Academic papers and research", params: { doi: "" } },
+    { sub_domain: "academic.patent", description: "Patent search", params: { patent_number: "" } },
+  ],
+  legal: [
+    { sub_domain: "legal.legislation", description: "Laws and regulations", params: {} },
+    { sub_domain: "legal.case", description: "Legal cases", params: {} },
+  ],
+  health: [
+    { sub_domain: "health.policy", description: "Healthcare policy", params: {} },
+    { sub_domain: "health.drug", description: "Drug information", params: { name: "" } },
+    { sub_domain: "health.medical", description: "Medical information", params: { condition: "" } },
+  ],
+  business: [
+    { sub_domain: "business.market_research", description: "Market research", params: {} },
+    { sub_domain: "business.company", description: "Company information", params: { name: "" } },
+  ],
+  security: [
+    { sub_domain: "security.cve", description: "CVE vulnerability database", params: { cve: "" } },
+    { sub_domain: "security.threat", description: "Threat intelligence", params: {} },
+  ],
+  code: [
+    { sub_domain: "code.repository", description: "Code repositories", params: { repo: "" } },
+    { sub_domain: "code.docs", code: "Code documentation", params: { library: "" } },
+  ],
+  environment: [
+    { sub_domain: "environment.climate", description: "Climate data", params: {} },
+    { sub_domain: "environment.aqi", description: "Air quality index", params: { city: "" } },
+  ],
+  energy: [
+    { sub_domain: "energy.market", description: "Energy market data", params: {} },
+    { sub_domain: "energy.renewable", description: "Renewable energy", params: {} },
+  ],
+  agriculture: [
+    { sub_domain: "agriculture.market", description: "Agricultural market", params: {} },
+    { sub_domain: "agriculture.weather", description: "Weather data", params: { location: "" } },
+  ],
+  travel: [
+    { sub_domain: "travel.flight", description: "Flight status", params: { iata: "" } },
+    { sub_domain: "travel.hotel", description: "Hotel booking", params: {} },
+  ],
+  film: [
+    { sub_domain: "film.movie", description: "Movie information", params: { title: "" } },
+    { sub_domain: "film.tv", description: "TV show information", params: { title: "" } },
+  ],
+  gaming: [
+    { sub_domain: "gaming.game", description: "Game information", params: { title: "" } },
+    { sub_domain: "gaming.hardware", description: "Gaming hardware", params: {} },
+  ],
+  social_media: [
+    { sub_domain: "social_media.twitter", description: "Twitter/X posts", params: { username: "" } },
+    { sub_domain: "social_media.reddit", description: "Reddit posts", params: { subreddit: "" } },
+  ],
+  ip: [
+    { sub_domain: "ip.address", description: "IP address lookup", params: { ip: "" } },
+    { sub_domain: "ip.domain", description: "Domain lookup", params: { domain: "" } },
+  ],
+};
+export function registerGetSubDomainsTool(pi: ExtensionAPI): void {
+  pi.registerTool({
+    name: "get_sub_domains",
+    label: "Discover Domains",
+    description:
+      "Discover available vertical domains and their sub-domains for structured search. Use before web_search when the query targets a specialized vertical (finance, health, legal, etc.).",
+    promptGuidelines: [
+      "Use get_sub_domains when the query targets a specialized vertical domain.",
+      "Use before web_search to discover available sub-domains and parameters.",
+      "Pass ALL potentially relevant domains at once for broader coverage.",
+    ],
+    parameters: Type.Object({
+      domains: Type.Array(
+        Type.String({
+          description: "Domain(s) to discover. Pass all potentially relevant domains.",
+        }),
+        { description: "List of domains to query. Max 5.", maxItems: 5 },
+      ),
+    }),
+    async execute(_toolCallId, params) {
+      const domains = (params.domains as string[]) ?? [];
+      const results: Array<{ domain: string; sub_domain: string; description: string; params: Record<string, string> }> = [];
+      for (const domain of domains) {
+        const subs = SUB_DOMAIN_DB[domain];
+        if (subs) {
+          for (const sub of subs) {
+            results.push({ domain, ...sub });
+          }
+        }
+      }
+      if (results.length === 0) {
+        return {
+          content: [{ type: "text", text: "No sub-domains found for the given domains. Try: finance, academic, legal, health, business, security, code, energy, environment, agriculture, travel, film, gaming." }],
+          details: { count: 0 },
+        };
+      }
+      const markdown = results
+        .map((r) => `- **${r.domain}/${r.sub_domain}**: ${r.description} (params: ${Object.keys(r.params).join(", ") || "none"})`)
+        .join("\n");
+      return {
+        content: [{ type: "text", text: markdown }],
+        details: { count: results.length },
+      };
+    },
+    renderCall(args, theme) {
+      const domains = (args.domains as string[]) ?? [];
+      return new Text(
+        theme.fg("toolTitle", theme.bold("Domains ")) + theme.fg("accent", domains.join(", ")),
+        0,
+        0,
+      );
+    },
+    renderResult(result, { isPartial }, theme) {
+      if (isPartial) return new Text(theme.fg("warning", "Discovering..."), 0, 0);
+      const d = result.details as { count?: number } | undefined;
+      return new Text(theme.fg("success", `✓ ${d?.count ?? 0} sub-domains found`), 0, 0);
+    },
+  });
+}

package/src/router.ts CHANGED Viewed

@@ -1,29 +1,57 @@
 import type { SearchProvider } from "./providers/types.js";
-export type SearchIntent = "finance" | "academic" | "general" | "docs";
+export type SearchIntent = "finance" | "academic" | "general" | "docs" | "technical" | "news";
+export interface RoutingConfig {
+  primary: string;
+  secondary: string[];
+  intent: SearchIntent;
+}
 export function classifyIntent(query: string): SearchIntent {
   const q = query.toLowerCase();
-  if (/\b(stock|price|ticker|forex|crypto|market|trade|earnings)\b/.test(q)) return "finance";
-  if (/\b(paper|research|journal|doi|arxiv|scholar|academic|study)\b/.test(q)) return "academic";
-  if (/\b(doc|docs|documentation|library|framework|api|sdk|how to|example|syntax)\b/.test(q)) return "docs";
+  if (/\b(stock|price|ticker|forex|crypto|market|trade|earnings|fund|etf|ipo)\b/.test(q)) return "finance";
+  if (/\b(paper|research|journal|doi|arxiv|scholar|academic|study|thesis)\b/.test(q)) return "academic";
+  if (/\b(doc|docs|documentation|library|framework|api|sdk|how to|example|syntax|function|method|class|component)\b/.test(q)) return "docs";
+  if (/\b(code|github|repo|repository|pull request|commit|branch|merge)\b/.test(q)) return "technical";
+  if (/\b(news|latest|today|breaking|announced|update|release|happened)\b/.test(q)) return "news";
   return "general";
 }
-const INTENT_PROVIDERS: Record<SearchIntent, { primary: string; secondary: string[] }> = {
-  finance: { primary: "anysearch", secondary: ["exa", "tavily"] },
-  academic: { primary: "exa", secondary: ["anysearch", "tavily"] },
-  general: { primary: "tavily", secondary: ["exa", "anysearch", "firecrawl"] },
-  docs: { primary: "context7", secondary: ["exa", "tavily"] },
+const INTENT_PROVIDERS: Record<SearchIntent, string[]> = {
+  finance: ["anysearch", "exa", "tavily"],
+  academic: ["exa", "anysearch", "tavily"],
+  docs: ["context7", "exa", "tavily"],
+  technical: ["firecrawl", "exa", "tavily"],
+  news: ["tavily", "anysearch", "exa"],
+  general: ["tavily", "anysearch", "exa", "firecrawl"],
 };
 export function routeIntent(
   intent: SearchIntent,
   providers: Map<string, SearchProvider>,
   requestedProvider?: string,
-): { primary: string; secondary: string[] } {
+): RoutingConfig {
   if (requestedProvider && providers.has(requestedProvider)) {
-    return { primary: requestedProvider, secondary: [...providers.keys()].filter((k) => k !== requestedProvider) };
+    const secondary = [...providers.keys()].filter((k) => k !== requestedProvider);
+    return { primary: requestedProvider, secondary, intent };
+  }
+  const candidates = INTENT_PROVIDERS[intent];
+  const available = candidates.filter((p) => providers.has(p));
+  if (available.length === 0) {
+    const allAvailable = [...providers.keys()];
+    return {
+      primary: allAvailable[0] ?? "tavily",
+      secondary: allAvailable.slice(1),
+      intent,
+    };
   }
-  return INTENT_PROVIDERS[intent];
+  return {
+    primary: available[0],
+    secondary: available.slice(1),
+    intent,
+  };
 }

package/src/web-search.ts CHANGED Viewed

@@ -54,6 +54,7 @@ async function executeSingleSearch(
 ): Promise<{ results: SearchResult[]; provider: string; intent: SearchIntent }> {
   const intent = classifyIntent(query);
   const route = routeIntent(intent, providers, requestedProvider);
+  void intent;
   let allResults: SearchResult[] = [];
   let usedProvider = route.primary;
@@ -90,7 +91,7 @@ async function executeSingleSearch(
     throw new Error(`All providers failed for "${query}":\n${errors.join("\n")}`);
   }
-  return { results: deduplicateResults(allResults), provider: usedProvider, intent };
+  return { results: deduplicateResults(allResults), provider: usedProvider, intent: route.intent };
 }
 async function executeSingleSearchWithTimeout(
@@ -120,18 +121,19 @@ export function registerWebSearchTool(pi: ExtensionAPI): void {
     name: "web_search",
     label: "Web Search",
     description:
-      "Search the web with automatic provider selection. For stocks/finance, uses Anysearch. For academic papers, uses Exa. For general web, uses Tavily. Falls back automatically if the primary provider fails. Use include_content with web_fetch for full page reading. Use queries (plural) for parallel multi-angle research.",
+      "Search the web with 5 providers (exa, tavily, anysearch, firecrawl, context7). Choose the right provider based on query type. Falls back automatically if the primary provider fails. Use web_fetch for full page content. Use queries (plural) for parallel multi-angle research.",
     promptSnippet:
-      "Search the web with automatic or custom routing (set provider='exa' for papers, provider='anysearch' for finance, provider='tavily' for code, provider='brave' for general).",
+      "Search the web with automatic or custom routing (set provider='exa' for papers, provider='anysearch' for finance, provider='tavily' for general, provider='context7' for docs).",
     get promptGuidelines() {
       return [
         "Use web_search for information beyond your training data — current events, recent docs, live data.",
-        "Set provider='anysearch' when searching for stock prices, tickers, forex, or CVE vulnerability hashes.",
-        "Set provider='exa' when searching for academic research papers, journals, or DOIs.",
-        "Set provider='tavily' for web pages, coding guides, and fast programming research.",
-        "Set provider='firecrawl' for scraping-heavy sites or when others fail.",
-        "Set provider='context7' for library/framework/API documentation, code examples, and how-to guides.",
-        "Set provider='auto' to let the fast local intent router decide automatically (default).",
+        "Choose the right provider based on the query type:",
+        "  • context7 — library/framework/API documentation, code examples, how-to guides, syntax questions",
+        "  • exa — academic research papers, journals, DOIs, scholarly articles, theses",
+        "  • anysearch — stock prices, tickers, forex, crypto, CVE vulnerabilities, financial data",
+        "  • firecrawl — scraping-heavy sites, code repos, GitHub content, when others fail",
+        "  • tavily — general web search, news, programming guides, fast results (default)",
+        "Set provider='auto' to let the local intent router decide automatically.",
         "After answering, include a \"Sources:\" section with markdown hyperlinks: [Title](URL).",
         "Use web_fetch after web_search to read full page content — web_search returns snippets only.",
         "Use {queries:[...]} with 2-4 varied angles for broader coverage.",