@pi-unipi/web-api 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +179 -0
- package/package.json +50 -0
- package/skills/web/SKILL.md +108 -0
- package/src/cache.ts +240 -0
- package/src/commands.ts +45 -0
- package/src/index.ts +100 -0
- package/src/providers/base.ts +108 -0
- package/src/providers/duckduckgo.ts +115 -0
- package/src/providers/firecrawl.ts +105 -0
- package/src/providers/jina-reader.ts +89 -0
- package/src/providers/jina-search.ts +88 -0
- package/src/providers/llm-summarize.ts +71 -0
- package/src/providers/perplexity.ts +191 -0
- package/src/providers/registry.ts +128 -0
- package/src/providers/serpapi.ts +86 -0
- package/src/providers/tavily.ts +95 -0
- package/src/settings.ts +263 -0
- package/src/tools.ts +329 -0
- package/src/tui/provider-selector.ts +71 -0
- package/src/tui/settings-dialog.ts +177 -0
package/src/index.ts
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @unipi/web-api — Extension entry
|
|
3
|
+
*
|
|
4
|
+
* Web search, read, and summarize tools with provider-based backend selection.
|
|
5
|
+
* Provides agent tools: web-search, web-read, web-llm-summarize
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
9
|
+
import {
|
|
10
|
+
UNIPI_EVENTS,
|
|
11
|
+
MODULES,
|
|
12
|
+
emitEvent,
|
|
13
|
+
getPackageVersion,
|
|
14
|
+
} from "@pi-unipi/core";
|
|
15
|
+
import { registerWebTools, WEB_TOOLS } from "./tools.js";
|
|
16
|
+
import { registerWebCommands, WEB_COMMANDS } from "./commands.js";
|
|
17
|
+
import { webCache } from "./cache.js";
|
|
18
|
+
import { loadConfig } from "./settings.js";
|
|
19
|
+
import "./providers/duckduckgo.js";
|
|
20
|
+
import "./providers/jina-search.js";
|
|
21
|
+
import "./providers/jina-reader.js";
|
|
22
|
+
import "./providers/serpapi.js";
|
|
23
|
+
import "./providers/tavily.js";
|
|
24
|
+
import "./providers/firecrawl.js";
|
|
25
|
+
import "./providers/perplexity.js";
|
|
26
|
+
import "./providers/llm-summarize.js";
|
|
27
|
+
|
|
28
|
+
/** Package version */
|
|
29
|
+
const VERSION = getPackageVersion(new URL(".", import.meta.url).pathname);
|
|
30
|
+
|
|
31
|
+
// Get info registry from global (avoids direct import issues with pi's extension loading)
|
|
32
|
+
function getInfoRegistry() {
|
|
33
|
+
const g = globalThis as any;
|
|
34
|
+
return g.__unipi_info_registry;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export default function (pi: ExtensionAPI) {
|
|
38
|
+
// Register skills directory
|
|
39
|
+
const skillsDir = new URL("./skills", import.meta.url).pathname;
|
|
40
|
+
pi.on("resources_discover", async (_event, _ctx) => {
|
|
41
|
+
return {
|
|
42
|
+
skillPaths: [skillsDir],
|
|
43
|
+
};
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
// Register tools and commands
|
|
47
|
+
registerWebTools(pi);
|
|
48
|
+
registerWebCommands(pi);
|
|
49
|
+
|
|
50
|
+
// Session lifecycle
|
|
51
|
+
pi.on("session_start", async (_event, ctx) => {
|
|
52
|
+
// Clean expired cache entries on startup
|
|
53
|
+
webCache.clearExpired();
|
|
54
|
+
|
|
55
|
+
// Announce module (for subagent integration)
|
|
56
|
+
emitEvent(pi, UNIPI_EVENTS.MODULE_READY, {
|
|
57
|
+
name: MODULES.WEB_API,
|
|
58
|
+
version: VERSION,
|
|
59
|
+
commands: [
|
|
60
|
+
`unipi:${WEB_COMMANDS.SETTINGS}`,
|
|
61
|
+
`unipi:${WEB_COMMANDS.CACHE_CLEAR}`,
|
|
62
|
+
],
|
|
63
|
+
tools: [
|
|
64
|
+
WEB_TOOLS.SEARCH,
|
|
65
|
+
WEB_TOOLS.READ,
|
|
66
|
+
WEB_TOOLS.SUMMARIZE,
|
|
67
|
+
],
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
// Register info group
|
|
71
|
+
const registry = getInfoRegistry();
|
|
72
|
+
if (registry) {
|
|
73
|
+
registry.registerGroup({
|
|
74
|
+
id: "web-api",
|
|
75
|
+
name: "Web API",
|
|
76
|
+
icon: "🌐",
|
|
77
|
+
priority: 50,
|
|
78
|
+
getData: async () => {
|
|
79
|
+
const config = loadConfig();
|
|
80
|
+
const stats = webCache.getStats();
|
|
81
|
+
const enabledCount = Object.values(config.providers).filter(
|
|
82
|
+
(p) => p.enabled
|
|
83
|
+
).length;
|
|
84
|
+
|
|
85
|
+
return {
|
|
86
|
+
"Enabled Providers": enabledCount,
|
|
87
|
+
"Cache Entries": stats.totalEntries,
|
|
88
|
+
"Cache Size": `${(stats.totalSizeBytes / 1024).toFixed(1)} KB`,
|
|
89
|
+
"Expired Entries": stats.expiredEntries,
|
|
90
|
+
};
|
|
91
|
+
},
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
pi.on("session_shutdown", async (_event, _ctx) => {
|
|
97
|
+
// Cleanup: clear expired cache entries
|
|
98
|
+
webCache.clearExpired();
|
|
99
|
+
});
|
|
100
|
+
}
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @unipi/web-api — Provider base interface
|
|
3
|
+
*
|
|
4
|
+
* Defines the WebProvider interface and capability types for all providers.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/** Supported capabilities for web providers */
|
|
8
|
+
export type WebCapability = "search" | "read" | "summarize";
|
|
9
|
+
|
|
10
|
+
/** Ranking structure for provider selection */
|
|
11
|
+
export interface ProviderRanking {
|
|
12
|
+
search: number;
|
|
13
|
+
read: number;
|
|
14
|
+
summarize: number;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/** Search result format */
|
|
18
|
+
export interface SearchResult {
|
|
19
|
+
title: string;
|
|
20
|
+
url: string;
|
|
21
|
+
snippet: string;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/** Read result format */
|
|
25
|
+
export interface ReadResult {
|
|
26
|
+
url: string;
|
|
27
|
+
content: string;
|
|
28
|
+
contentType: "markdown" | "text" | "html";
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/** Summarize result format */
|
|
32
|
+
export interface SummarizeResult {
|
|
33
|
+
url: string;
|
|
34
|
+
summary: string;
|
|
35
|
+
prompt?: string;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/** Provider configuration */
|
|
39
|
+
export interface ProviderConfig {
|
|
40
|
+
enabled: boolean;
|
|
41
|
+
apiKey?: string;
|
|
42
|
+
[key: string]: unknown;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* WebProvider interface
|
|
47
|
+
*
|
|
48
|
+
* All web providers must implement this interface.
|
|
49
|
+
* Providers declare their capabilities and ranking for each capability.
|
|
50
|
+
*/
|
|
51
|
+
export interface WebProvider {
|
|
52
|
+
/** Unique provider identifier */
|
|
53
|
+
id: string;
|
|
54
|
+
|
|
55
|
+
/** Human-readable provider name */
|
|
56
|
+
name: string;
|
|
57
|
+
|
|
58
|
+
/** Capabilities this provider supports */
|
|
59
|
+
capabilities: WebCapability[];
|
|
60
|
+
|
|
61
|
+
/** Whether this provider requires an API key */
|
|
62
|
+
requiresApiKey: boolean;
|
|
63
|
+
|
|
64
|
+
/** Environment variable name for API key (if requiresApiKey) */
|
|
65
|
+
apiKeyEnv?: string;
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Ranking for capability selection.
|
|
69
|
+
* Lower number = simpler/cheaper provider (preferred for auto-selection).
|
|
70
|
+
* 0 means provider doesn't support that capability.
|
|
71
|
+
*/
|
|
72
|
+
ranking: ProviderRanking;
|
|
73
|
+
|
|
74
|
+
/** Provider-specific configuration */
|
|
75
|
+
config: Record<string, unknown>;
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Search the web.
|
|
79
|
+
* @param query - Search query string
|
|
80
|
+
* @param config - Provider-specific configuration
|
|
81
|
+
* @returns Array of search results
|
|
82
|
+
*/
|
|
83
|
+
search?(query: string, config?: ProviderConfig): Promise<SearchResult[]>;
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Read and extract content from a URL.
|
|
87
|
+
* @param url - URL to read
|
|
88
|
+
* @param config - Provider-specific configuration
|
|
89
|
+
* @returns Extracted content
|
|
90
|
+
*/
|
|
91
|
+
read?(url: string, config?: ProviderConfig): Promise<ReadResult>;
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Summarize web content.
|
|
95
|
+
* @param url - URL to summarize
|
|
96
|
+
* @param prompt - Custom summarization prompt
|
|
97
|
+
* @param config - Provider-specific configuration
|
|
98
|
+
* @returns Summarized content
|
|
99
|
+
*/
|
|
100
|
+
summarize?(url: string, prompt?: string, config?: ProviderConfig): Promise<SummarizeResult>;
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Validate API key (optional).
|
|
104
|
+
* @param apiKey - API key to validate
|
|
105
|
+
* @returns true if valid
|
|
106
|
+
*/
|
|
107
|
+
validateApiKey?(apiKey: string): Promise<boolean>;
|
|
108
|
+
}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @unipi/web-api — DuckDuckGo provider
|
|
3
|
+
*
|
|
4
|
+
* Free search provider using DuckDuckGo.
|
|
5
|
+
* Uses DuckDuckGo's HTML search endpoint for results.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type {
|
|
9
|
+
WebProvider,
|
|
10
|
+
SearchResult,
|
|
11
|
+
ProviderConfig,
|
|
12
|
+
} from "./base.js";
|
|
13
|
+
import { registry } from "./registry.js";
|
|
14
|
+
|
|
15
|
+
/** DuckDuckGo search result parsing */
|
|
16
|
+
interface DDGResult {
|
|
17
|
+
title: string;
|
|
18
|
+
url: string;
|
|
19
|
+
snippet: string;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Parse DuckDuckGo HTML search results.
|
|
24
|
+
* Extracts result titles, URLs, and snippets from the HTML.
|
|
25
|
+
*/
|
|
26
|
+
function parseDDGResults(html: string): DDGResult[] {
|
|
27
|
+
const results: DDGResult[] = [];
|
|
28
|
+
|
|
29
|
+
// Match result links and snippets
|
|
30
|
+
// DuckDuckGo results are in <a class="result__a"> tags
|
|
31
|
+
const linkRegex = /<a[^>]*class="result__a"[^>]*href="([^"]*)"[^>]*>([^<]*)<\/a>/g;
|
|
32
|
+
const snippetRegex = /<a[^>]*class="result__snippet"[^>]*>([^<]*)<\/a>/g;
|
|
33
|
+
|
|
34
|
+
const links: { url: string; title: string }[] = [];
|
|
35
|
+
const snippets: string[] = [];
|
|
36
|
+
|
|
37
|
+
let match;
|
|
38
|
+
|
|
39
|
+
// Extract links
|
|
40
|
+
while ((match = linkRegex.exec(html)) !== null) {
|
|
41
|
+
links.push({
|
|
42
|
+
url: match[1],
|
|
43
|
+
title: match[2].trim(),
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Extract snippets
|
|
48
|
+
while ((match = snippetRegex.exec(html)) !== null) {
|
|
49
|
+
snippets.push(match[1].trim());
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Combine results
|
|
53
|
+
for (let i = 0; i < Math.min(links.length, snippets.length); i++) {
|
|
54
|
+
results.push({
|
|
55
|
+
title: links[i].title,
|
|
56
|
+
url: links[i].url,
|
|
57
|
+
snippet: snippets[i],
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return results;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Search DuckDuckGo.
|
|
66
|
+
* @param query - Search query
|
|
67
|
+
* @returns Array of search results
|
|
68
|
+
*/
|
|
69
|
+
async function searchDDG(query: string): Promise<SearchResult[]> {
|
|
70
|
+
const encodedQuery = encodeURIComponent(query);
|
|
71
|
+
const url = `https://html.duckduckgo.com/html/?q=${encodedQuery}`;
|
|
72
|
+
|
|
73
|
+
const response = await fetch(url, {
|
|
74
|
+
headers: {
|
|
75
|
+
"User-Agent":
|
|
76
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
77
|
+
},
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
if (!response.ok) {
|
|
81
|
+
throw new Error(`DuckDuckGo search failed: ${response.status} ${response.statusText}`);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const html = await response.text();
|
|
85
|
+
const results = parseDDGResults(html);
|
|
86
|
+
|
|
87
|
+
return results.map((r) => ({
|
|
88
|
+
title: r.title,
|
|
89
|
+
url: r.url,
|
|
90
|
+
snippet: r.snippet,
|
|
91
|
+
}));
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/** DuckDuckGo provider implementation */
|
|
95
|
+
const duckduckgoProvider: WebProvider = {
|
|
96
|
+
id: "duckduckgo",
|
|
97
|
+
name: "DuckDuckGo",
|
|
98
|
+
capabilities: ["search"],
|
|
99
|
+
requiresApiKey: false,
|
|
100
|
+
ranking: {
|
|
101
|
+
search: 1,
|
|
102
|
+
read: 0,
|
|
103
|
+
summarize: 0,
|
|
104
|
+
},
|
|
105
|
+
config: {},
|
|
106
|
+
|
|
107
|
+
async search(query: string, _config?: ProviderConfig): Promise<SearchResult[]> {
|
|
108
|
+
return searchDDG(query);
|
|
109
|
+
},
|
|
110
|
+
};
|
|
111
|
+
|
|
112
|
+
// Register provider
|
|
113
|
+
registry.register(duckduckgoProvider);
|
|
114
|
+
|
|
115
|
+
export { duckduckgoProvider };
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @unipi/web-api — Firecrawl provider
|
|
3
|
+
*
|
|
4
|
+
* Paid content extraction provider using Firecrawl API.
|
|
5
|
+
* Advanced web scraping with JavaScript rendering support.
|
|
6
|
+
* Requires API key (FIRECRAWL_API_KEY environment variable).
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import type {
|
|
10
|
+
WebProvider,
|
|
11
|
+
ReadResult,
|
|
12
|
+
ProviderConfig,
|
|
13
|
+
} from "./base.js";
|
|
14
|
+
import { registry } from "./registry.js";
|
|
15
|
+
|
|
16
|
+
/** Firecrawl API response format */
|
|
17
|
+
interface FirecrawlResponse {
|
|
18
|
+
success: boolean;
|
|
19
|
+
data: {
|
|
20
|
+
markdown: string;
|
|
21
|
+
html?: string;
|
|
22
|
+
metadata?: {
|
|
23
|
+
title?: string;
|
|
24
|
+
description?: string;
|
|
25
|
+
};
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Read content from URL via Firecrawl.
|
|
31
|
+
* @param url - URL to read
|
|
32
|
+
* @param apiKey - Firecrawl API key
|
|
33
|
+
* @returns Extracted content
|
|
34
|
+
*/
|
|
35
|
+
async function readFirecrawl(url: string, apiKey: string): Promise<ReadResult> {
|
|
36
|
+
const apiUrl = "https://api.firecrawl.dev/v0/scrape";
|
|
37
|
+
|
|
38
|
+
const response = await fetch(apiUrl, {
|
|
39
|
+
method: "POST",
|
|
40
|
+
headers: {
|
|
41
|
+
"Content-Type": "application/json",
|
|
42
|
+
Authorization: `Bearer ${apiKey}`,
|
|
43
|
+
},
|
|
44
|
+
body: JSON.stringify({
|
|
45
|
+
url: url,
|
|
46
|
+
pageOptions: {
|
|
47
|
+
onlyMainContent: true,
|
|
48
|
+
waitForSelector: "body",
|
|
49
|
+
},
|
|
50
|
+
}),
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
if (!response.ok) {
|
|
54
|
+
throw new Error(`Firecrawl read failed: ${response.status} ${response.statusText}`);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const data = (await response.json()) as FirecrawlResponse;
|
|
58
|
+
|
|
59
|
+
if (!data.success) {
|
|
60
|
+
throw new Error("Firecrawl extraction failed");
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
return {
|
|
64
|
+
url: url,
|
|
65
|
+
content: data.data.markdown,
|
|
66
|
+
contentType: "markdown",
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/** Firecrawl provider implementation */
|
|
71
|
+
const firecrawlProvider: WebProvider = {
|
|
72
|
+
id: "firecrawl",
|
|
73
|
+
name: "Firecrawl",
|
|
74
|
+
capabilities: ["read"],
|
|
75
|
+
requiresApiKey: true,
|
|
76
|
+
apiKeyEnv: "FIRECRAWL_API_KEY",
|
|
77
|
+
ranking: {
|
|
78
|
+
search: 0,
|
|
79
|
+
read: 2,
|
|
80
|
+
summarize: 0,
|
|
81
|
+
},
|
|
82
|
+
config: {},
|
|
83
|
+
|
|
84
|
+
async read(url: string, config?: ProviderConfig): Promise<ReadResult> {
|
|
85
|
+
const apiKey = config?.apiKey || process.env.FIRECRAWL_API_KEY;
|
|
86
|
+
if (!apiKey) {
|
|
87
|
+
throw new Error("Firecrawl requires an API key. Set FIRECRAWL_API_KEY environment variable or configure via /unipi:web-settings");
|
|
88
|
+
}
|
|
89
|
+
return readFirecrawl(url, apiKey);
|
|
90
|
+
},
|
|
91
|
+
|
|
92
|
+
async validateApiKey(apiKey: string): Promise<boolean> {
|
|
93
|
+
try {
|
|
94
|
+
await readFirecrawl("https://example.com", apiKey);
|
|
95
|
+
return true;
|
|
96
|
+
} catch {
|
|
97
|
+
return false;
|
|
98
|
+
}
|
|
99
|
+
},
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
// Register provider
|
|
103
|
+
registry.register(firecrawlProvider);
|
|
104
|
+
|
|
105
|
+
export { firecrawlProvider };
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @unipi/web-api — Jina AI Reader provider
|
|
3
|
+
*
|
|
4
|
+
* Freemium content extraction provider using Jina AI Reader API.
|
|
5
|
+
* Extracts main content from URLs and returns markdown.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type {
|
|
9
|
+
WebProvider,
|
|
10
|
+
ReadResult,
|
|
11
|
+
ProviderConfig,
|
|
12
|
+
} from "./base.js";
|
|
13
|
+
import { registry } from "./registry.js";
|
|
14
|
+
|
|
15
|
+
/** Jina AI Reader API response format */
|
|
16
|
+
interface JinaReaderResponse {
|
|
17
|
+
data: {
|
|
18
|
+
url: string;
|
|
19
|
+
content: string;
|
|
20
|
+
title?: string;
|
|
21
|
+
description?: string;
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Read content from URL via Jina AI Reader.
|
|
27
|
+
* @param url - URL to read
|
|
28
|
+
* @param apiKey - Optional API key for higher rate limits
|
|
29
|
+
* @returns Extracted content
|
|
30
|
+
*/
|
|
31
|
+
async function readJina(url: string, apiKey?: string): Promise<ReadResult> {
|
|
32
|
+
const jinaUrl = `https://r.jina.ai/${url}`;
|
|
33
|
+
|
|
34
|
+
const headers: Record<string, string> = {
|
|
35
|
+
Accept: "application/json",
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
if (apiKey) {
|
|
39
|
+
headers["Authorization"] = `Bearer ${apiKey}`;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const response = await fetch(jinaUrl, { headers });
|
|
43
|
+
|
|
44
|
+
if (!response.ok) {
|
|
45
|
+
throw new Error(`Jina AI Reader failed: ${response.status} ${response.statusText}`);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const data = (await response.json()) as JinaReaderResponse;
|
|
49
|
+
|
|
50
|
+
return {
|
|
51
|
+
url: data.data.url || url,
|
|
52
|
+
content: data.data.content,
|
|
53
|
+
contentType: "markdown",
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/** Jina AI Reader provider implementation */
|
|
58
|
+
const jinaReaderProvider: WebProvider = {
|
|
59
|
+
id: "jina-reader",
|
|
60
|
+
name: "Jina AI Reader",
|
|
61
|
+
capabilities: ["read"],
|
|
62
|
+
requiresApiKey: false,
|
|
63
|
+
apiKeyEnv: "JINA_API_KEY",
|
|
64
|
+
ranking: {
|
|
65
|
+
search: 0,
|
|
66
|
+
read: 1,
|
|
67
|
+
summarize: 0,
|
|
68
|
+
},
|
|
69
|
+
config: {},
|
|
70
|
+
|
|
71
|
+
async read(url: string, config?: ProviderConfig): Promise<ReadResult> {
|
|
72
|
+
const apiKey = config?.apiKey || process.env.JINA_API_KEY;
|
|
73
|
+
return readJina(url, apiKey);
|
|
74
|
+
},
|
|
75
|
+
|
|
76
|
+
async validateApiKey(apiKey: string): Promise<boolean> {
|
|
77
|
+
try {
|
|
78
|
+
await readJina("https://example.com", apiKey);
|
|
79
|
+
return true;
|
|
80
|
+
} catch {
|
|
81
|
+
return false;
|
|
82
|
+
}
|
|
83
|
+
},
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
// Register provider
|
|
87
|
+
registry.register(jinaReaderProvider);
|
|
88
|
+
|
|
89
|
+
export { jinaReaderProvider };
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @unipi/web-api — Jina AI Search provider
|
|
3
|
+
*
|
|
4
|
+
* Freemium search provider using Jina AI Search API.
|
|
5
|
+
* Free tier available, higher rate limits with API key.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type {
|
|
9
|
+
WebProvider,
|
|
10
|
+
SearchResult,
|
|
11
|
+
ProviderConfig,
|
|
12
|
+
} from "./base.js";
|
|
13
|
+
import { registry } from "./registry.js";
|
|
14
|
+
|
|
15
|
+
/** Jina AI Search API response format */
|
|
16
|
+
interface JinaSearchResponse {
|
|
17
|
+
data: Array<{
|
|
18
|
+
title: string;
|
|
19
|
+
url: string;
|
|
20
|
+
snippet: string;
|
|
21
|
+
}>;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Search via Jina AI.
|
|
26
|
+
* @param query - Search query
|
|
27
|
+
* @param apiKey - Optional API key for higher rate limits
|
|
28
|
+
* @returns Array of search results
|
|
29
|
+
*/
|
|
30
|
+
async function searchJina(query: string, apiKey?: string): Promise<SearchResult[]> {
|
|
31
|
+
const url = `https://s.jina.ai/${encodeURIComponent(query)}`;
|
|
32
|
+
|
|
33
|
+
const headers: Record<string, string> = {
|
|
34
|
+
Accept: "application/json",
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
if (apiKey) {
|
|
38
|
+
headers["Authorization"] = `Bearer ${apiKey}`;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const response = await fetch(url, { headers });
|
|
42
|
+
|
|
43
|
+
if (!response.ok) {
|
|
44
|
+
throw new Error(`Jina AI search failed: ${response.status} ${response.statusText}`);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const data = (await response.json()) as JinaSearchResponse;
|
|
48
|
+
|
|
49
|
+
return (data.data || []).map((item) => ({
|
|
50
|
+
title: item.title,
|
|
51
|
+
url: item.url,
|
|
52
|
+
snippet: item.snippet,
|
|
53
|
+
}));
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/** Jina AI Search provider implementation */
|
|
57
|
+
const jinaSearchProvider: WebProvider = {
|
|
58
|
+
id: "jina-search",
|
|
59
|
+
name: "Jina AI Search",
|
|
60
|
+
capabilities: ["search"],
|
|
61
|
+
requiresApiKey: false,
|
|
62
|
+
apiKeyEnv: "JINA_API_KEY",
|
|
63
|
+
ranking: {
|
|
64
|
+
search: 2,
|
|
65
|
+
read: 0,
|
|
66
|
+
summarize: 0,
|
|
67
|
+
},
|
|
68
|
+
config: {},
|
|
69
|
+
|
|
70
|
+
async search(query: string, config?: ProviderConfig): Promise<SearchResult[]> {
|
|
71
|
+
const apiKey = config?.apiKey || process.env.JINA_API_KEY;
|
|
72
|
+
return searchJina(query, apiKey);
|
|
73
|
+
},
|
|
74
|
+
|
|
75
|
+
async validateApiKey(apiKey: string): Promise<boolean> {
|
|
76
|
+
try {
|
|
77
|
+
const results = await searchJina("test", apiKey);
|
|
78
|
+
return Array.isArray(results);
|
|
79
|
+
} catch {
|
|
80
|
+
return false;
|
|
81
|
+
}
|
|
82
|
+
},
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
// Register provider
|
|
86
|
+
registry.register(jinaSearchProvider);
|
|
87
|
+
|
|
88
|
+
export { jinaSearchProvider };
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @unipi/web-api — LLM Summarize provider
|
|
3
|
+
*
|
|
4
|
+
* Summarization provider using pi's existing LLM.
|
|
5
|
+
* No external API key required - uses the LLM already configured in pi.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type {
|
|
9
|
+
WebProvider,
|
|
10
|
+
SummarizeResult,
|
|
11
|
+
ProviderConfig,
|
|
12
|
+
} from "./base.js";
|
|
13
|
+
import { registry } from "./registry.js";
|
|
14
|
+
|
|
15
|
+
/** Default summarization prompt */
|
|
16
|
+
const DEFAULT_SUMMARY_PROMPT = `Summarize the following web content concisely, highlighting the key points.
|
|
17
|
+
Focus on:
|
|
18
|
+
1. Main topic and purpose
|
|
19
|
+
2. Key facts and findings
|
|
20
|
+
3. Important conclusions or recommendations
|
|
21
|
+
|
|
22
|
+
Provide a clear, well-structured summary.`;
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Summarize content using LLM.
|
|
26
|
+
* This provider delegates to pi's built-in LLM for summarization.
|
|
27
|
+
* The actual LLM call happens in the tool execution, not here.
|
|
28
|
+
*/
|
|
29
|
+
function createLLMSummarizeResult(
|
|
30
|
+
url: string,
|
|
31
|
+
content: string,
|
|
32
|
+
prompt?: string
|
|
33
|
+
): SummarizeResult {
|
|
34
|
+
// Return a placeholder - actual LLM call happens in tool execution
|
|
35
|
+
return {
|
|
36
|
+
url: url,
|
|
37
|
+
summary: `[LLM Summary placeholder for ${url}]`,
|
|
38
|
+
prompt: prompt || DEFAULT_SUMMARY_PROMPT,
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/** LLM Summarize provider implementation */
|
|
43
|
+
const llmSummarizeProvider: WebProvider = {
|
|
44
|
+
id: "llm-summarize",
|
|
45
|
+
name: "LLM Summarize",
|
|
46
|
+
capabilities: ["summarize"],
|
|
47
|
+
requiresApiKey: false,
|
|
48
|
+
ranking: {
|
|
49
|
+
search: 0,
|
|
50
|
+
read: 0,
|
|
51
|
+
summarize: 2,
|
|
52
|
+
},
|
|
53
|
+
config: {
|
|
54
|
+
defaultPrompt: DEFAULT_SUMMARY_PROMPT,
|
|
55
|
+
},
|
|
56
|
+
|
|
57
|
+
async summarize(url: string, prompt?: string, _config?: ProviderConfig): Promise<SummarizeResult> {
|
|
58
|
+
// This is a placeholder - actual implementation will be in the tool
|
|
59
|
+
// The tool will:
|
|
60
|
+
// 1. Fetch content using a read provider
|
|
61
|
+
// 2. Send to LLM with the prompt
|
|
62
|
+
// 3. Return the LLM's summary
|
|
63
|
+
|
|
64
|
+
return createLLMSummarizeResult(url, "", prompt);
|
|
65
|
+
},
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
// Register provider
|
|
69
|
+
registry.register(llmSummarizeProvider);
|
|
70
|
+
|
|
71
|
+
export { llmSummarizeProvider, DEFAULT_SUMMARY_PROMPT };
|