pi-all-search 1.0.8 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,10 @@
1
1
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
2
2
  import { registerWebSearchTool } from "./src/web-search.js";
3
+ import { registerExtractTool } from "./src/extract.js";
4
+ import { registerGetSubDomainsTool } from "./src/get-sub-domains.js";
3
5
 
4
6
  export default function (pi: ExtensionAPI) {
5
7
  registerWebSearchTool(pi);
8
+ registerExtractTool(pi);
9
+ registerGetSubDomainsTool(pi);
6
10
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-all-search",
3
- "version": "1.0.8",
3
+ "version": "1.0.10",
4
4
  "description": "All-in-one web search extension for Pi — exa, tavily, anysearch, firecrawl, context7",
5
5
  "license": "MIT",
6
6
  "type": "module",
package/src/extract.ts ADDED
@@ -0,0 +1,133 @@
1
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
2
+ import { Text } from "@earendil-works/pi-tui";
3
+ import { Type } from "typebox";
4
+ import { loadConfig } from "./config.js";
5
+
6
+ export function registerExtractTool(pi: ExtensionAPI): void {
7
+ pi.registerTool({
8
+ name: "web_fetch",
9
+ label: "Fetch URL",
10
+ description:
11
+ "Fetch a URL and extract readable content as markdown. Handles GitHub repos, YouTube videos, PDFs, and regular web pages. Use after web_search to read full page content.",
12
+ promptGuidelines: [
13
+ "Use web_fetch after web_search to read full page content when snippets are insufficient.",
14
+ "Use web_fetch when the user provides a URL directly.",
15
+ "Use web_fetch to verify specific claims from original sources.",
16
+ ],
17
+ parameters: Type.Object({
18
+ url: Type.String({ description: "The URL to fetch. Must start with http:// or https://." }),
19
+ prompt: Type.Optional(
20
+ Type.String({
21
+ description: "Question to ask about the page content (improves relevance for YouTube/videos).",
22
+ }),
23
+ ),
24
+ }),
25
+
26
+ async execute(_toolCallId, params) {
27
+ const config = loadConfig();
28
+ const url = params.url;
29
+
30
+ try {
31
+ const resp = await fetch(url, {
32
+ headers: {
33
+ "User-Agent": "Mozilla/5.0 (compatible; PiAllSearch/1.0)",
34
+ Accept: "text/html,application/json,application/markdown,text/plain,*/*",
35
+ },
36
+ });
37
+
38
+ if (!resp.ok) {
39
+ return {
40
+ content: [{ type: "text", text: `Failed to fetch ${url}: ${resp.status} ${resp.statusText}` }],
41
+ details: { error: `HTTP ${resp.status}` },
42
+ };
43
+ }
44
+
45
+ const contentType = resp.headers.get("content-type") ?? "";
46
+ const text = await resp.text();
47
+
48
+ let content: string;
49
+ if (contentType.includes("application/json")) {
50
+ try {
51
+ const json = JSON.parse(text);
52
+ content = "```json\n" + JSON.stringify(json, null, 2) + "\n```";
53
+ } catch {
54
+ content = text.slice(0, 50000);
55
+ }
56
+ } else if (contentType.includes("text/html")) {
57
+ content = htmlToMarkdown(text);
58
+ } else {
59
+ content = text.slice(0, 50000);
60
+ }
61
+
62
+ return {
63
+ content: [{ type: "text", content }],
64
+ details: { url, contentType, length: content.length },
65
+ };
66
+ } catch (err) {
67
+ return {
68
+ content: [{ type: "text", text: `Error fetching ${url}: ${err instanceof Error ? err.message : String(err)}` }],
69
+ details: { error: err instanceof Error ? err.message : String(err) },
70
+ };
71
+ }
72
+ },
73
+
74
+ renderCall(args, theme) {
75
+ const url = (args.url as string) ?? "";
76
+ const display = url.length > 60 ? `${url.slice(0, 57)}...` : url;
77
+ return new Text(theme.fg("toolTitle", theme.bold("Fetch ")) + theme.fg("accent", display), 0, 0);
78
+ },
79
+
80
+ renderResult(result, { isPartial }, theme) {
81
+ if (isPartial) return new Text(theme.fg("warning", "Fetching..."), 0, 0);
82
+ const d = result.details as { error?: string; length?: number } | undefined;
83
+ if (d?.error) return new Text(theme.fg("error", d.error), 0, 0);
84
+ const len = d?.length ?? 0;
85
+ return new Text(theme.fg("success", `✓ ${len} chars fetched`), 0, 0);
86
+ },
87
+ });
88
+ }
89
+
90
+ function htmlToMarkdown(html: string): string {
91
+ let md = html;
92
+
93
+ md = md.replace(/<h1[^>]*>(.*?)<\/h1>/gi, "# $1\n\n");
94
+ md = md.replace(/<h2[^>]*>(.*?)<\/h2>/gi, "## $1\n\n");
95
+ md = md.replace(/<h3[^>]*>(.*?)<\/h3>/gi, "### $1\n\n");
96
+ md = md.replace(/<h4[^>]*>(.*?)<\/h4>/gi, "#### $1\n\n");
97
+ md = md.replace(/<h5[^>]*>(.*?)<\/h5>/gi, "##### $1\n\n");
98
+ md = md.replace(/<h6[^>]*>(.*?)<\/h6>/gi, "###### $1\n\n");
99
+
100
+ md = md.replace(/<strong[^>]*>(.*?)<\/strong>/gi, "**$1**");
101
+ md = md.replace(/<b[^>]*>(.*?)<\/b>/gi, "**$1**");
102
+ md = md.replace(/<em[^>]*>(.*?)<\/em>/gi, "*$1*");
103
+ md = md.replace(/<i[^>]*>(.*?)<\/i>/gi, "*$1*");
104
+
105
+ md = md.replace(/<a[^>]*href="([^"]*)"[^>]*>(.*?)<\/a>/gi, "[$2]($1)");
106
+
107
+ md = md.replace(/<li[^>]*>(.*?)<\/li>/gi, "- $1\n");
108
+ md = md.replace(/<ul[^>]*>/gi, "\n");
109
+ md = md.replace(/<\/ul>/gi, "\n");
110
+ md = md.replace(/<ol[^>]*>/gi, "\n");
111
+ md = md.replace(/<\/ol>/gi, "\n");
112
+
113
+ md = md.replace(/<code[^>]*>(.*?)<\/code>/gi, "`$1`");
114
+ md = md.replace(/<pre[^>]*>(.*?)<\/pre>/gis, "```\n$1\n```\n\n");
115
+
116
+ md = md.replace(/<br\s*\/?>/gi, "\n");
117
+ md = md.replace(/<p[^>]*>(.*?)<\/p>/gi, "$1\n\n");
118
+ md = md.replace(/<div[^>]*>(.*?)<\/div>/gi, "$1\n");
119
+
120
+ md = md.replace(/<[^>]+>/g, "");
121
+
122
+ md = md.replace(/&amp;/g, "&");
123
+ md = md.replace(/&lt;/g, "<");
124
+ md = md.replace(/&gt;/g, ">");
125
+ md = md.replace(/&quot;/g, '"');
126
+ md = md.replace(/&#39;/g, "'");
127
+ md = md.replace(/&nbsp;/g, " ");
128
+
129
+ md = md.replace(/\n{3,}/g, "\n\n");
130
+ md = md.trim();
131
+
132
+ return md.slice(0, 50000);
133
+ }
@@ -0,0 +1,162 @@
1
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
2
+ import { Text } from "@earendil-works/pi-tui";
3
+ import { Type } from "typebox";
4
+
5
+ const DOMAINS = [
6
+ "general",
7
+ "resource",
8
+ "social_media",
9
+ "finance",
10
+ "academic",
11
+ "legal",
12
+ "health",
13
+ "business",
14
+ "security",
15
+ "ip",
16
+ "code",
17
+ "energy",
18
+ "environment",
19
+ "agriculture",
20
+ "travel",
21
+ "film",
22
+ "gaming",
23
+ ] as const;
24
+
25
+ interface SubDomain {
26
+ sub_domain: string;
27
+ description: string;
28
+ params: Record<string, string>;
29
+ }
30
+
31
+ const SUB_DOMAIN_DB: Record<string, SubDomain[]> = {
32
+ finance: [
33
+ { sub_domain: "finance.us_stock", description: "US stock market data", params: { ticker: "" } },
34
+ { sub_domain: "finance.crypto", description: "Cryptocurrency data", params: { symbol: "" } },
35
+ { sub_domain: "finance.forex", description: "Foreign exchange rates", params: { pair: "" } },
36
+ { sub_domain: "finance.fund", description: "Fund/ETF data", params: { symbol: "" } },
37
+ ],
38
+ academic: [
39
+ { sub_domain: "academic.search", description: "Academic papers and research", params: { doi: "" } },
40
+ { sub_domain: "academic.patent", description: "Patent search", params: { patent_number: "" } },
41
+ ],
42
+ legal: [
43
+ { sub_domain: "legal.legislation", description: "Laws and regulations", params: {} },
44
+ { sub_domain: "legal.case", description: "Legal cases", params: {} },
45
+ ],
46
+ health: [
47
+ { sub_domain: "health.policy", description: "Healthcare policy", params: {} },
48
+ { sub_domain: "health.drug", description: "Drug information", params: { name: "" } },
49
+ { sub_domain: "health.medical", description: "Medical information", params: { condition: "" } },
50
+ ],
51
+ business: [
52
+ { sub_domain: "business.market_research", description: "Market research", params: {} },
53
+ { sub_domain: "business.company", description: "Company information", params: { name: "" } },
54
+ ],
55
+ security: [
56
+ { sub_domain: "security.cve", description: "CVE vulnerability database", params: { cve: "" } },
57
+ { sub_domain: "security.threat", description: "Threat intelligence", params: {} },
58
+ ],
59
+ code: [
60
+ { sub_domain: "code.repository", description: "Code repositories", params: { repo: "" } },
61
+ { sub_domain: "code.docs", code: "Code documentation", params: { library: "" } },
62
+ ],
63
+ environment: [
64
+ { sub_domain: "environment.climate", description: "Climate data", params: {} },
65
+ { sub_domain: "environment.aqi", description: "Air quality index", params: { city: "" } },
66
+ ],
67
+ energy: [
68
+ { sub_domain: "energy.market", description: "Energy market data", params: {} },
69
+ { sub_domain: "energy.renewable", description: "Renewable energy", params: {} },
70
+ ],
71
+ agriculture: [
72
+ { sub_domain: "agriculture.market", description: "Agricultural market", params: {} },
73
+ { sub_domain: "agriculture.weather", description: "Weather data", params: { location: "" } },
74
+ ],
75
+ travel: [
76
+ { sub_domain: "travel.flight", description: "Flight status", params: { iata: "" } },
77
+ { sub_domain: "travel.hotel", description: "Hotel booking", params: {} },
78
+ ],
79
+ film: [
80
+ { sub_domain: "film.movie", description: "Movie information", params: { title: "" } },
81
+ { sub_domain: "film.tv", description: "TV show information", params: { title: "" } },
82
+ ],
83
+ gaming: [
84
+ { sub_domain: "gaming.game", description: "Game information", params: { title: "" } },
85
+ { sub_domain: "gaming.hardware", description: "Gaming hardware", params: {} },
86
+ ],
87
+ social_media: [
88
+ { sub_domain: "social_media.twitter", description: "Twitter/X posts", params: { username: "" } },
89
+ { sub_domain: "social_media.reddit", description: "Reddit posts", params: { subreddit: "" } },
90
+ ],
91
+ ip: [
92
+ { sub_domain: "ip.address", description: "IP address lookup", params: { ip: "" } },
93
+ { sub_domain: "ip.domain", description: "Domain lookup", params: { domain: "" } },
94
+ ],
95
+ };
96
+
97
+ export function registerGetSubDomainsTool(pi: ExtensionAPI): void {
98
+ pi.registerTool({
99
+ name: "get_sub_domains",
100
+ label: "Discover Domains",
101
+ description:
102
+ "Discover available vertical domains and their sub-domains for structured search. Use before web_search when the query targets a specialized vertical (finance, health, legal, etc.).",
103
+ promptGuidelines: [
104
+ "Use get_sub_domains when the query targets a specialized vertical domain.",
105
+ "Use before web_search to discover available sub-domains and parameters.",
106
+ "Pass ALL potentially relevant domains at once for broader coverage.",
107
+ ],
108
+ parameters: Type.Object({
109
+ domains: Type.Array(
110
+ Type.String({
111
+ description: "Domain(s) to discover. Pass all potentially relevant domains.",
112
+ }),
113
+ { description: "List of domains to query. Max 5.", maxItems: 5 },
114
+ ),
115
+ }),
116
+
117
+ async execute(_toolCallId, params) {
118
+ const domains = (params.domains as string[]) ?? [];
119
+ const results: Array<{ domain: string; sub_domain: string; description: string; params: Record<string, string> }> = [];
120
+
121
+ for (const domain of domains) {
122
+ const subs = SUB_DOMAIN_DB[domain];
123
+ if (subs) {
124
+ for (const sub of subs) {
125
+ results.push({ domain, ...sub });
126
+ }
127
+ }
128
+ }
129
+
130
+ if (results.length === 0) {
131
+ return {
132
+ content: [{ type: "text", text: "No sub-domains found for the given domains. Try: finance, academic, legal, health, business, security, code, energy, environment, agriculture, travel, film, gaming." }],
133
+ details: { count: 0 },
134
+ };
135
+ }
136
+
137
+ const markdown = results
138
+ .map((r) => `- **${r.domain}/${r.sub_domain}**: ${r.description} (params: ${Object.keys(r.params).join(", ") || "none"})`)
139
+ .join("\n");
140
+
141
+ return {
142
+ content: [{ type: "text", text: markdown }],
143
+ details: { count: results.length },
144
+ };
145
+ },
146
+
147
+ renderCall(args, theme) {
148
+ const domains = (args.domains as string[]) ?? [];
149
+ return new Text(
150
+ theme.fg("toolTitle", theme.bold("Domains ")) + theme.fg("accent", domains.join(", ")),
151
+ 0,
152
+ 0,
153
+ );
154
+ },
155
+
156
+ renderResult(result, { isPartial }, theme) {
157
+ if (isPartial) return new Text(theme.fg("warning", "Discovering..."), 0, 0);
158
+ const d = result.details as { count?: number } | undefined;
159
+ return new Text(theme.fg("success", `✓ ${d?.count ?? 0} sub-domains found`), 0, 0);
160
+ },
161
+ });
162
+ }
package/src/router.ts CHANGED
@@ -1,29 +1,57 @@
1
1
  import type { SearchProvider } from "./providers/types.js";
2
2
 
3
- export type SearchIntent = "finance" | "academic" | "general" | "docs";
3
+ export type SearchIntent = "finance" | "academic" | "general" | "docs" | "technical" | "news";
4
+
5
+ export interface RoutingConfig {
6
+ primary: string;
7
+ secondary: string[];
8
+ intent: SearchIntent;
9
+ }
4
10
 
5
11
  export function classifyIntent(query: string): SearchIntent {
6
12
  const q = query.toLowerCase();
7
- if (/\b(stock|price|ticker|forex|crypto|market|trade|earnings)\b/.test(q)) return "finance";
8
- if (/\b(paper|research|journal|doi|arxiv|scholar|academic|study)\b/.test(q)) return "academic";
9
- if (/\b(doc|docs|documentation|library|framework|api|sdk|how to|example|syntax)\b/.test(q)) return "docs";
13
+ if (/\b(stock|price|ticker|forex|crypto|market|trade|earnings|fund|etf|ipo)\b/.test(q)) return "finance";
14
+ if (/\b(paper|research|journal|doi|arxiv|scholar|academic|study|thesis)\b/.test(q)) return "academic";
15
+ if (/\b(doc|docs|documentation|library|framework|api|sdk|how to|example|syntax|function|method|class|component)\b/.test(q)) return "docs";
16
+ if (/\b(code|github|repo|repository|pull request|commit|branch|merge)\b/.test(q)) return "technical";
17
+ if (/\b(news|latest|today|breaking|announced|update|release|happened)\b/.test(q)) return "news";
10
18
  return "general";
11
19
  }
12
20
 
13
- const INTENT_PROVIDERS: Record<SearchIntent, { primary: string; secondary: string[] }> = {
14
- finance: { primary: "anysearch", secondary: ["exa", "tavily"] },
15
- academic: { primary: "exa", secondary: ["anysearch", "tavily"] },
16
- general: { primary: "tavily", secondary: ["exa", "anysearch", "firecrawl"] },
17
- docs: { primary: "context7", secondary: ["exa", "tavily"] },
21
+ const INTENT_PROVIDERS: Record<SearchIntent, string[]> = {
22
+ finance: ["anysearch", "exa", "tavily"],
23
+ academic: ["exa", "anysearch", "tavily"],
24
+ docs: ["context7", "exa", "tavily"],
25
+ technical: ["firecrawl", "exa", "tavily"],
26
+ news: ["tavily", "anysearch", "exa"],
27
+ general: ["tavily", "anysearch", "exa", "firecrawl"],
18
28
  };
19
29
 
20
30
  export function routeIntent(
21
31
  intent: SearchIntent,
22
32
  providers: Map<string, SearchProvider>,
23
33
  requestedProvider?: string,
24
- ): { primary: string; secondary: string[] } {
34
+ ): RoutingConfig {
25
35
  if (requestedProvider && providers.has(requestedProvider)) {
26
- return { primary: requestedProvider, secondary: [...providers.keys()].filter((k) => k !== requestedProvider) };
36
+ const secondary = [...providers.keys()].filter((k) => k !== requestedProvider);
37
+ return { primary: requestedProvider, secondary, intent };
38
+ }
39
+
40
+ const candidates = INTENT_PROVIDERS[intent];
41
+ const available = candidates.filter((p) => providers.has(p));
42
+
43
+ if (available.length === 0) {
44
+ const allAvailable = [...providers.keys()];
45
+ return {
46
+ primary: allAvailable[0] ?? "tavily",
47
+ secondary: allAvailable.slice(1),
48
+ intent,
49
+ };
27
50
  }
28
- return INTENT_PROVIDERS[intent];
51
+
52
+ return {
53
+ primary: available[0],
54
+ secondary: available.slice(1),
55
+ intent,
56
+ };
29
57
  }
package/src/web-search.ts CHANGED
@@ -54,6 +54,7 @@ async function executeSingleSearch(
54
54
  ): Promise<{ results: SearchResult[]; provider: string; intent: SearchIntent }> {
55
55
  const intent = classifyIntent(query);
56
56
  const route = routeIntent(intent, providers, requestedProvider);
57
+ void intent;
57
58
 
58
59
  let allResults: SearchResult[] = [];
59
60
  let usedProvider = route.primary;
@@ -90,7 +91,7 @@ async function executeSingleSearch(
90
91
  throw new Error(`All providers failed for "${query}":\n${errors.join("\n")}`);
91
92
  }
92
93
 
93
- return { results: deduplicateResults(allResults), provider: usedProvider, intent };
94
+ return { results: deduplicateResults(allResults), provider: usedProvider, intent: route.intent };
94
95
  }
95
96
 
96
97
  async function executeSingleSearchWithTimeout(
@@ -120,18 +121,19 @@ export function registerWebSearchTool(pi: ExtensionAPI): void {
120
121
  name: "web_search",
121
122
  label: "Web Search",
122
123
  description:
123
- "Search the web with automatic provider selection. For stocks/finance, uses Anysearch. For academic papers, uses Exa. For general web, uses Tavily. Falls back automatically if the primary provider fails. Use include_content with web_fetch for full page reading. Use queries (plural) for parallel multi-angle research.",
124
+ "Search the web with 5 providers (exa, tavily, anysearch, firecrawl, context7). Choose the right provider based on query type. Falls back automatically if the primary provider fails. Use web_fetch for full page content. Use queries (plural) for parallel multi-angle research.",
124
125
  promptSnippet:
125
- "Search the web with automatic or custom routing (set provider='exa' for papers, provider='anysearch' for finance, provider='tavily' for code, provider='brave' for general).",
126
+ "Search the web with automatic or custom routing (set provider='exa' for papers, provider='anysearch' for finance, provider='tavily' for general, provider='context7' for docs).",
126
127
  get promptGuidelines() {
127
128
  return [
128
129
  "Use web_search for information beyond your training data — current events, recent docs, live data.",
129
- "Set provider='anysearch' when searching for stock prices, tickers, forex, or CVE vulnerability hashes.",
130
- "Set provider='exa' when searching for academic research papers, journals, or DOIs.",
131
- "Set provider='tavily' for web pages, coding guides, and fast programming research.",
132
- "Set provider='firecrawl' for scraping-heavy sites or when others fail.",
133
- "Set provider='context7' for library/framework/API documentation, code examples, and how-to guides.",
134
- "Set provider='auto' to let the fast local intent router decide automatically (default).",
130
+ "Choose the right provider based on the query type:",
131
+ " context7 library/framework/API documentation, code examples, how-to guides, syntax questions",
132
+ " exa academic research papers, journals, DOIs, scholarly articles, theses",
133
+ " anysearch stock prices, tickers, forex, crypto, CVE vulnerabilities, financial data",
134
+ " firecrawl scraping-heavy sites, code repos, GitHub content, when others fail",
135
+ " tavily general web search, news, programming guides, fast results (default)",
136
+ "Set provider='auto' to let the local intent router decide automatically.",
135
137
  "After answering, include a \"Sources:\" section with markdown hyperlinks: [Title](URL).",
136
138
  "Use web_fetch after web_search to read full page content — web_search returns snippets only.",
137
139
  "Use {queries:[...]} with 2-4 varied angles for broader coverage.",