@pi-unipi/web-api 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts ADDED
@@ -0,0 +1,100 @@
1
+ /**
2
+ * @unipi/web-api — Extension entry
3
+ *
4
+ * Web search, read, and summarize tools with provider-based backend selection.
5
+ * Provides agent tools: web-search, web-read, web-llm-summarize
6
+ */
7
+
8
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
9
+ import {
10
+ UNIPI_EVENTS,
11
+ MODULES,
12
+ emitEvent,
13
+ getPackageVersion,
14
+ } from "@pi-unipi/core";
15
+ import { registerWebTools, WEB_TOOLS } from "./tools.js";
16
+ import { registerWebCommands, WEB_COMMANDS } from "./commands.js";
17
+ import { webCache } from "./cache.js";
18
+ import { loadConfig } from "./settings.js";
19
+ import "./providers/duckduckgo.js";
20
+ import "./providers/jina-search.js";
21
+ import "./providers/jina-reader.js";
22
+ import "./providers/serpapi.js";
23
+ import "./providers/tavily.js";
24
+ import "./providers/firecrawl.js";
25
+ import "./providers/perplexity.js";
26
+ import "./providers/llm-summarize.js";
27
+
28
+ /** Package version */
29
+ const VERSION = getPackageVersion(new URL(".", import.meta.url).pathname);
30
+
31
+ // Get info registry from global (avoids direct import issues with pi's extension loading)
32
+ function getInfoRegistry() {
33
+ const g = globalThis as any;
34
+ return g.__unipi_info_registry;
35
+ }
36
+
37
+ export default function (pi: ExtensionAPI) {
38
+ // Register skills directory
39
+ const skillsDir = new URL("./skills", import.meta.url).pathname;
40
+ pi.on("resources_discover", async (_event, _ctx) => {
41
+ return {
42
+ skillPaths: [skillsDir],
43
+ };
44
+ });
45
+
46
+ // Register tools and commands
47
+ registerWebTools(pi);
48
+ registerWebCommands(pi);
49
+
50
+ // Session lifecycle
51
+ pi.on("session_start", async (_event, ctx) => {
52
+ // Clean expired cache entries on startup
53
+ webCache.clearExpired();
54
+
55
+ // Announce module (for subagent integration)
56
+ emitEvent(pi, UNIPI_EVENTS.MODULE_READY, {
57
+ name: MODULES.WEB_API,
58
+ version: VERSION,
59
+ commands: [
60
+ `unipi:${WEB_COMMANDS.SETTINGS}`,
61
+ `unipi:${WEB_COMMANDS.CACHE_CLEAR}`,
62
+ ],
63
+ tools: [
64
+ WEB_TOOLS.SEARCH,
65
+ WEB_TOOLS.READ,
66
+ WEB_TOOLS.SUMMARIZE,
67
+ ],
68
+ });
69
+
70
+ // Register info group
71
+ const registry = getInfoRegistry();
72
+ if (registry) {
73
+ registry.registerGroup({
74
+ id: "web-api",
75
+ name: "Web API",
76
+ icon: "🌐",
77
+ priority: 50,
78
+ getData: async () => {
79
+ const config = loadConfig();
80
+ const stats = webCache.getStats();
81
+ const enabledCount = Object.values(config.providers).filter(
82
+ (p) => p.enabled
83
+ ).length;
84
+
85
+ return {
86
+ "Enabled Providers": enabledCount,
87
+ "Cache Entries": stats.totalEntries,
88
+ "Cache Size": `${(stats.totalSizeBytes / 1024).toFixed(1)} KB`,
89
+ "Expired Entries": stats.expiredEntries,
90
+ };
91
+ },
92
+ });
93
+ }
94
+ });
95
+
96
+ pi.on("session_shutdown", async (_event, _ctx) => {
97
+ // Cleanup: clear expired cache entries
98
+ webCache.clearExpired();
99
+ });
100
+ }
@@ -0,0 +1,108 @@
1
+ /**
2
+ * @unipi/web-api — Provider base interface
3
+ *
4
+ * Defines the WebProvider interface and capability types for all providers.
5
+ */
6
+
7
+ /** Supported capabilities for web providers */
8
+ export type WebCapability = "search" | "read" | "summarize";
9
+
10
+ /** Ranking structure for provider selection */
11
+ export interface ProviderRanking {
12
+ search: number;
13
+ read: number;
14
+ summarize: number;
15
+ }
16
+
17
+ /** Search result format */
18
+ export interface SearchResult {
19
+ title: string;
20
+ url: string;
21
+ snippet: string;
22
+ }
23
+
24
+ /** Read result format */
25
+ export interface ReadResult {
26
+ url: string;
27
+ content: string;
28
+ contentType: "markdown" | "text" | "html";
29
+ }
30
+
31
+ /** Summarize result format */
32
+ export interface SummarizeResult {
33
+ url: string;
34
+ summary: string;
35
+ prompt?: string;
36
+ }
37
+
38
+ /** Provider configuration */
39
+ export interface ProviderConfig {
40
+ enabled: boolean;
41
+ apiKey?: string;
42
+ [key: string]: unknown;
43
+ }
44
+
45
+ /**
46
+ * WebProvider interface
47
+ *
48
+ * All web providers must implement this interface.
49
+ * Providers declare their capabilities and ranking for each capability.
50
+ */
51
+ export interface WebProvider {
52
+ /** Unique provider identifier */
53
+ id: string;
54
+
55
+ /** Human-readable provider name */
56
+ name: string;
57
+
58
+ /** Capabilities this provider supports */
59
+ capabilities: WebCapability[];
60
+
61
+ /** Whether this provider requires an API key */
62
+ requiresApiKey: boolean;
63
+
64
+ /** Environment variable name for API key (if requiresApiKey) */
65
+ apiKeyEnv?: string;
66
+
67
+ /**
68
+ * Ranking for capability selection.
69
+ * Lower number = simpler/cheaper provider (preferred for auto-selection).
70
+ * 0 means provider doesn't support that capability.
71
+ */
72
+ ranking: ProviderRanking;
73
+
74
+ /** Provider-specific configuration */
75
+ config: Record<string, unknown>;
76
+
77
+ /**
78
+ * Search the web.
79
+ * @param query - Search query string
80
+ * @param config - Provider-specific configuration
81
+ * @returns Array of search results
82
+ */
83
+ search?(query: string, config?: ProviderConfig): Promise<SearchResult[]>;
84
+
85
+ /**
86
+ * Read and extract content from a URL.
87
+ * @param url - URL to read
88
+ * @param config - Provider-specific configuration
89
+ * @returns Extracted content
90
+ */
91
+ read?(url: string, config?: ProviderConfig): Promise<ReadResult>;
92
+
93
+ /**
94
+ * Summarize web content.
95
+ * @param url - URL to summarize
96
+ * @param prompt - Custom summarization prompt
97
+ * @param config - Provider-specific configuration
98
+ * @returns Summarized content
99
+ */
100
+ summarize?(url: string, prompt?: string, config?: ProviderConfig): Promise<SummarizeResult>;
101
+
102
+ /**
103
+ * Validate API key (optional).
104
+ * @param apiKey - API key to validate
105
+ * @returns true if valid
106
+ */
107
+ validateApiKey?(apiKey: string): Promise<boolean>;
108
+ }
@@ -0,0 +1,115 @@
1
+ /**
2
+ * @unipi/web-api — DuckDuckGo provider
3
+ *
4
+ * Free search provider using DuckDuckGo.
5
+ * Uses DuckDuckGo's HTML search endpoint for results.
6
+ */
7
+
8
+ import type {
9
+ WebProvider,
10
+ SearchResult,
11
+ ProviderConfig,
12
+ } from "./base.js";
13
+ import { registry } from "./registry.js";
14
+
15
+ /** DuckDuckGo search result parsing */
16
+ interface DDGResult {
17
+ title: string;
18
+ url: string;
19
+ snippet: string;
20
+ }
21
+
22
+ /**
23
+ * Parse DuckDuckGo HTML search results.
24
+ * Extracts result titles, URLs, and snippets from the HTML.
25
+ */
26
+ function parseDDGResults(html: string): DDGResult[] {
27
+ const results: DDGResult[] = [];
28
+
29
+ // Match result links and snippets
30
+ // DuckDuckGo results are in <a class="result__a"> tags
31
+ const linkRegex = /<a[^>]*class="result__a"[^>]*href="([^"]*)"[^>]*>([^<]*)<\/a>/g;
32
+ const snippetRegex = /<a[^>]*class="result__snippet"[^>]*>([^<]*)<\/a>/g;
33
+
34
+ const links: { url: string; title: string }[] = [];
35
+ const snippets: string[] = [];
36
+
37
+ let match;
38
+
39
+ // Extract links
40
+ while ((match = linkRegex.exec(html)) !== null) {
41
+ links.push({
42
+ url: match[1],
43
+ title: match[2].trim(),
44
+ });
45
+ }
46
+
47
+ // Extract snippets
48
+ while ((match = snippetRegex.exec(html)) !== null) {
49
+ snippets.push(match[1].trim());
50
+ }
51
+
52
+ // Combine results
53
+ for (let i = 0; i < Math.min(links.length, snippets.length); i++) {
54
+ results.push({
55
+ title: links[i].title,
56
+ url: links[i].url,
57
+ snippet: snippets[i],
58
+ });
59
+ }
60
+
61
+ return results;
62
+ }
63
+
64
+ /**
65
+ * Search DuckDuckGo.
66
+ * @param query - Search query
67
+ * @returns Array of search results
68
+ */
69
+ async function searchDDG(query: string): Promise<SearchResult[]> {
70
+ const encodedQuery = encodeURIComponent(query);
71
+ const url = `https://html.duckduckgo.com/html/?q=${encodedQuery}`;
72
+
73
+ const response = await fetch(url, {
74
+ headers: {
75
+ "User-Agent":
76
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
77
+ },
78
+ });
79
+
80
+ if (!response.ok) {
81
+ throw new Error(`DuckDuckGo search failed: ${response.status} ${response.statusText}`);
82
+ }
83
+
84
+ const html = await response.text();
85
+ const results = parseDDGResults(html);
86
+
87
+ return results.map((r) => ({
88
+ title: r.title,
89
+ url: r.url,
90
+ snippet: r.snippet,
91
+ }));
92
+ }
93
+
94
+ /** DuckDuckGo provider implementation */
95
+ const duckduckgoProvider: WebProvider = {
96
+ id: "duckduckgo",
97
+ name: "DuckDuckGo",
98
+ capabilities: ["search"],
99
+ requiresApiKey: false,
100
+ ranking: {
101
+ search: 1,
102
+ read: 0,
103
+ summarize: 0,
104
+ },
105
+ config: {},
106
+
107
+ async search(query: string, _config?: ProviderConfig): Promise<SearchResult[]> {
108
+ return searchDDG(query);
109
+ },
110
+ };
111
+
112
+ // Register provider
113
+ registry.register(duckduckgoProvider);
114
+
115
+ export { duckduckgoProvider };
@@ -0,0 +1,105 @@
1
+ /**
2
+ * @unipi/web-api — Firecrawl provider
3
+ *
4
+ * Paid content extraction provider using Firecrawl API.
5
+ * Advanced web scraping with JavaScript rendering support.
6
+ * Requires API key (FIRECRAWL_API_KEY environment variable).
7
+ */
8
+
9
+ import type {
10
+ WebProvider,
11
+ ReadResult,
12
+ ProviderConfig,
13
+ } from "./base.js";
14
+ import { registry } from "./registry.js";
15
+
16
+ /** Firecrawl API response format */
17
+ interface FirecrawlResponse {
18
+ success: boolean;
19
+ data: {
20
+ markdown: string;
21
+ html?: string;
22
+ metadata?: {
23
+ title?: string;
24
+ description?: string;
25
+ };
26
+ };
27
+ }
28
+
29
+ /**
30
+ * Read content from URL via Firecrawl.
31
+ * @param url - URL to read
32
+ * @param apiKey - Firecrawl API key
33
+ * @returns Extracted content
34
+ */
35
+ async function readFirecrawl(url: string, apiKey: string): Promise<ReadResult> {
36
+ const apiUrl = "https://api.firecrawl.dev/v0/scrape";
37
+
38
+ const response = await fetch(apiUrl, {
39
+ method: "POST",
40
+ headers: {
41
+ "Content-Type": "application/json",
42
+ Authorization: `Bearer ${apiKey}`,
43
+ },
44
+ body: JSON.stringify({
45
+ url: url,
46
+ pageOptions: {
47
+ onlyMainContent: true,
48
+ waitForSelector: "body",
49
+ },
50
+ }),
51
+ });
52
+
53
+ if (!response.ok) {
54
+ throw new Error(`Firecrawl read failed: ${response.status} ${response.statusText}`);
55
+ }
56
+
57
+ const data = (await response.json()) as FirecrawlResponse;
58
+
59
+ if (!data.success) {
60
+ throw new Error("Firecrawl extraction failed");
61
+ }
62
+
63
+ return {
64
+ url: url,
65
+ content: data.data.markdown,
66
+ contentType: "markdown",
67
+ };
68
+ }
69
+
70
+ /** Firecrawl provider implementation */
71
+ const firecrawlProvider: WebProvider = {
72
+ id: "firecrawl",
73
+ name: "Firecrawl",
74
+ capabilities: ["read"],
75
+ requiresApiKey: true,
76
+ apiKeyEnv: "FIRECRAWL_API_KEY",
77
+ ranking: {
78
+ search: 0,
79
+ read: 2,
80
+ summarize: 0,
81
+ },
82
+ config: {},
83
+
84
+ async read(url: string, config?: ProviderConfig): Promise<ReadResult> {
85
+ const apiKey = config?.apiKey || process.env.FIRECRAWL_API_KEY;
86
+ if (!apiKey) {
87
+ throw new Error("Firecrawl requires an API key. Set FIRECRAWL_API_KEY environment variable or configure via /unipi:web-settings");
88
+ }
89
+ return readFirecrawl(url, apiKey);
90
+ },
91
+
92
+ async validateApiKey(apiKey: string): Promise<boolean> {
93
+ try {
94
+ await readFirecrawl("https://example.com", apiKey);
95
+ return true;
96
+ } catch {
97
+ return false;
98
+ }
99
+ },
100
+ };
101
+
102
+ // Register provider
103
+ registry.register(firecrawlProvider);
104
+
105
+ export { firecrawlProvider };
@@ -0,0 +1,89 @@
1
+ /**
2
+ * @unipi/web-api — Jina AI Reader provider
3
+ *
4
+ * Freemium content extraction provider using Jina AI Reader API.
5
+ * Extracts main content from URLs and returns markdown.
6
+ */
7
+
8
+ import type {
9
+ WebProvider,
10
+ ReadResult,
11
+ ProviderConfig,
12
+ } from "./base.js";
13
+ import { registry } from "./registry.js";
14
+
15
+ /** Jina AI Reader API response format */
16
+ interface JinaReaderResponse {
17
+ data: {
18
+ url: string;
19
+ content: string;
20
+ title?: string;
21
+ description?: string;
22
+ };
23
+ }
24
+
25
+ /**
26
+ * Read content from URL via Jina AI Reader.
27
+ * @param url - URL to read
28
+ * @param apiKey - Optional API key for higher rate limits
29
+ * @returns Extracted content
30
+ */
31
+ async function readJina(url: string, apiKey?: string): Promise<ReadResult> {
32
+ const jinaUrl = `https://r.jina.ai/${url}`;
33
+
34
+ const headers: Record<string, string> = {
35
+ Accept: "application/json",
36
+ };
37
+
38
+ if (apiKey) {
39
+ headers["Authorization"] = `Bearer ${apiKey}`;
40
+ }
41
+
42
+ const response = await fetch(jinaUrl, { headers });
43
+
44
+ if (!response.ok) {
45
+ throw new Error(`Jina AI Reader failed: ${response.status} ${response.statusText}`);
46
+ }
47
+
48
+ const data = (await response.json()) as JinaReaderResponse;
49
+
50
+ return {
51
+ url: data.data.url || url,
52
+ content: data.data.content,
53
+ contentType: "markdown",
54
+ };
55
+ }
56
+
57
+ /** Jina AI Reader provider implementation */
58
+ const jinaReaderProvider: WebProvider = {
59
+ id: "jina-reader",
60
+ name: "Jina AI Reader",
61
+ capabilities: ["read"],
62
+ requiresApiKey: false,
63
+ apiKeyEnv: "JINA_API_KEY",
64
+ ranking: {
65
+ search: 0,
66
+ read: 1,
67
+ summarize: 0,
68
+ },
69
+ config: {},
70
+
71
+ async read(url: string, config?: ProviderConfig): Promise<ReadResult> {
72
+ const apiKey = config?.apiKey || process.env.JINA_API_KEY;
73
+ return readJina(url, apiKey);
74
+ },
75
+
76
+ async validateApiKey(apiKey: string): Promise<boolean> {
77
+ try {
78
+ await readJina("https://example.com", apiKey);
79
+ return true;
80
+ } catch {
81
+ return false;
82
+ }
83
+ },
84
+ };
85
+
86
+ // Register provider
87
+ registry.register(jinaReaderProvider);
88
+
89
+ export { jinaReaderProvider };
@@ -0,0 +1,88 @@
1
+ /**
2
+ * @unipi/web-api — Jina AI Search provider
3
+ *
4
+ * Freemium search provider using Jina AI Search API.
5
+ * Free tier available, higher rate limits with API key.
6
+ */
7
+
8
+ import type {
9
+ WebProvider,
10
+ SearchResult,
11
+ ProviderConfig,
12
+ } from "./base.js";
13
+ import { registry } from "./registry.js";
14
+
15
+ /** Jina AI Search API response format */
16
+ interface JinaSearchResponse {
17
+ data: Array<{
18
+ title: string;
19
+ url: string;
20
+ snippet: string;
21
+ }>;
22
+ }
23
+
24
+ /**
25
+ * Search via Jina AI.
26
+ * @param query - Search query
27
+ * @param apiKey - Optional API key for higher rate limits
28
+ * @returns Array of search results
29
+ */
30
+ async function searchJina(query: string, apiKey?: string): Promise<SearchResult[]> {
31
+ const url = `https://s.jina.ai/${encodeURIComponent(query)}`;
32
+
33
+ const headers: Record<string, string> = {
34
+ Accept: "application/json",
35
+ };
36
+
37
+ if (apiKey) {
38
+ headers["Authorization"] = `Bearer ${apiKey}`;
39
+ }
40
+
41
+ const response = await fetch(url, { headers });
42
+
43
+ if (!response.ok) {
44
+ throw new Error(`Jina AI search failed: ${response.status} ${response.statusText}`);
45
+ }
46
+
47
+ const data = (await response.json()) as JinaSearchResponse;
48
+
49
+ return (data.data || []).map((item) => ({
50
+ title: item.title,
51
+ url: item.url,
52
+ snippet: item.snippet,
53
+ }));
54
+ }
55
+
56
+ /** Jina AI Search provider implementation */
57
+ const jinaSearchProvider: WebProvider = {
58
+ id: "jina-search",
59
+ name: "Jina AI Search",
60
+ capabilities: ["search"],
61
+ requiresApiKey: false,
62
+ apiKeyEnv: "JINA_API_KEY",
63
+ ranking: {
64
+ search: 2,
65
+ read: 0,
66
+ summarize: 0,
67
+ },
68
+ config: {},
69
+
70
+ async search(query: string, config?: ProviderConfig): Promise<SearchResult[]> {
71
+ const apiKey = config?.apiKey || process.env.JINA_API_KEY;
72
+ return searchJina(query, apiKey);
73
+ },
74
+
75
+ async validateApiKey(apiKey: string): Promise<boolean> {
76
+ try {
77
+ const results = await searchJina("test", apiKey);
78
+ return Array.isArray(results);
79
+ } catch {
80
+ return false;
81
+ }
82
+ },
83
+ };
84
+
85
+ // Register provider
86
+ registry.register(jinaSearchProvider);
87
+
88
+ export { jinaSearchProvider };
@@ -0,0 +1,71 @@
1
+ /**
2
+ * @unipi/web-api — LLM Summarize provider
3
+ *
4
+ * Summarization provider using pi's existing LLM.
5
+ * No external API key required - uses the LLM already configured in pi.
6
+ */
7
+
8
+ import type {
9
+ WebProvider,
10
+ SummarizeResult,
11
+ ProviderConfig,
12
+ } from "./base.js";
13
+ import { registry } from "./registry.js";
14
+
15
+ /** Default summarization prompt */
16
+ const DEFAULT_SUMMARY_PROMPT = `Summarize the following web content concisely, highlighting the key points.
17
+ Focus on:
18
+ 1. Main topic and purpose
19
+ 2. Key facts and findings
20
+ 3. Important conclusions or recommendations
21
+
22
+ Provide a clear, well-structured summary.`;
23
+
24
+ /**
25
+ * Summarize content using LLM.
26
+ * This provider delegates to pi's built-in LLM for summarization.
27
+ * The actual LLM call happens in the tool execution, not here.
28
+ */
29
+ function createLLMSummarizeResult(
30
+ url: string,
31
+ content: string,
32
+ prompt?: string
33
+ ): SummarizeResult {
34
+ // Return a placeholder - actual LLM call happens in tool execution
35
+ return {
36
+ url: url,
37
+ summary: `[LLM Summary placeholder for ${url}]`,
38
+ prompt: prompt || DEFAULT_SUMMARY_PROMPT,
39
+ };
40
+ }
41
+
42
+ /** LLM Summarize provider implementation */
43
+ const llmSummarizeProvider: WebProvider = {
44
+ id: "llm-summarize",
45
+ name: "LLM Summarize",
46
+ capabilities: ["summarize"],
47
+ requiresApiKey: false,
48
+ ranking: {
49
+ search: 0,
50
+ read: 0,
51
+ summarize: 2,
52
+ },
53
+ config: {
54
+ defaultPrompt: DEFAULT_SUMMARY_PROMPT,
55
+ },
56
+
57
+ async summarize(url: string, prompt?: string, _config?: ProviderConfig): Promise<SummarizeResult> {
58
+ // This is a placeholder - actual implementation will be in the tool
59
+ // The tool will:
60
+ // 1. Fetch content using a read provider
61
+ // 2. Send to LLM with the prompt
62
+ // 3. Return the LLM's summary
63
+
64
+ return createLLMSummarizeResult(url, "", prompt);
65
+ },
66
+ };
67
+
68
+ // Register provider
69
+ registry.register(llmSummarizeProvider);
70
+
71
+ export { llmSummarizeProvider, DEFAULT_SUMMARY_PROMPT };