@pi-unipi/web-api 0.1.15 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +67 -161
- package/package.json +1 -1
- package/src/engine/dependencies.ts +4 -4
- package/src/index.ts +1 -2
package/README.md
CHANGED
|
@@ -1,48 +1,73 @@
|
|
|
1
1
|
# @pi-unipi/web-api
|
|
2
2
|
|
|
3
|
-
Web search,
|
|
3
|
+
Web search, page reading, and content summarization for the agent. The read path uses a local smart-fetch engine by default — free, no API key, browser-grade TLS fingerprinting that bypasses Cloudflare.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Paid providers (SerpAPI, Tavily, Firecrawl, Perplexity) are available as fallbacks. DuckDuckGo and Jina work out of the box for search.
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
## Commands
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
-
|
|
9
|
+
| Command | Description |
|
|
10
|
+
|---------|-------------|
|
|
11
|
+
| `/unipi:web-settings` | Configure providers, API keys, and smart-fetch defaults |
|
|
12
|
+
| `/unipi:web-cache-clear` | Clear all cached web content |
|
|
12
13
|
|
|
13
|
-
|
|
14
|
+
## Special Triggers
|
|
14
15
|
|
|
15
|
-
|
|
16
|
+
Workflow skills detect web-api and inject web tools for research-type commands:
|
|
16
17
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
- **Subagent integration** — Tools automatically available to spawned subagents
|
|
18
|
+
| Skill | What Changes |
|
|
19
|
+
|-------|--------------|
|
|
20
|
+
| `research` | Full web search, read, summarize |
|
|
21
|
+
| `gather-context` | External documentation lookup |
|
|
22
|
+
| `consultant` | Industry best practices research |
|
|
23
|
+
| `subagents` (explore) | Web research in parallel |
|
|
24
24
|
|
|
25
|
-
|
|
25
|
+
The footer and info-screen don't display web-api data — it's a tool package, not a state package.
|
|
26
26
|
|
|
27
|
-
|
|
28
|
-
|
|
27
|
+
## Agent Tools
|
|
28
|
+
|
|
29
|
+
| Tool | Description |
|
|
30
|
+
|------|-------------|
|
|
31
|
+
| `web_search` | Search the web via provider |
|
|
32
|
+
| `multi_web_content_read` | Extract content from URLs (smart-fetch or provider) |
|
|
33
|
+
| `web_llm_summarize` | Summarize web content via LLM |
|
|
34
|
+
|
|
35
|
+
### web_search
|
|
36
|
+
|
|
37
|
+
```
|
|
38
|
+
# Auto-select cheapest provider
|
|
39
|
+
web_search(query: "TypeScript generics")
|
|
40
|
+
|
|
41
|
+
# Use specific provider
|
|
42
|
+
web_search(query: "latest AI research", source: 4) # Tavily
|
|
29
43
|
```
|
|
30
44
|
|
|
31
|
-
|
|
45
|
+
### multi_web_content_read
|
|
32
46
|
|
|
33
|
-
```
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
47
|
+
```
|
|
48
|
+
# Single URL (smart-fetch engine by default)
|
|
49
|
+
multi_web_content_read(url: "https://example.com/article")
|
|
50
|
+
|
|
51
|
+
# Batch URLs
|
|
52
|
+
multi_web_content_read(url: ["https://example.com/a", "https://example.com/b"])
|
|
53
|
+
|
|
54
|
+
# Provider fallback (Jina Reader)
|
|
55
|
+
multi_web_content_read(url: "https://example.com/article", source: 1)
|
|
56
|
+
|
|
57
|
+
# Custom options
|
|
58
|
+
multi_web_content_read(url: "https://example.com/article", format: "json", maxChars: 10000)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### web_llm_summarize
|
|
62
|
+
|
|
63
|
+
```
|
|
64
|
+
web_llm_summarize(url: "https://example.com/long-article")
|
|
65
|
+
web_llm_summarize(url: "https://example.com/research", prompt: "Extract key findings")
|
|
41
66
|
```
|
|
42
67
|
|
|
43
68
|
## Smart-Fetch Engine
|
|
44
69
|
|
|
45
|
-
|
|
70
|
+
Local content extraction pipeline — no API key required:
|
|
46
71
|
|
|
47
72
|
| Component | Purpose |
|
|
48
73
|
|-----------|---------|
|
|
@@ -50,17 +75,11 @@ The smart-fetch engine is a local content extraction pipeline:
|
|
|
50
75
|
| **defuddle** | Intelligent content extraction from HTML |
|
|
51
76
|
| **linkedom** | Server-side DOM parsing |
|
|
52
77
|
|
|
53
|
-
|
|
54
|
-
- No API key required
|
|
55
|
-
- Browser-level anti-bot bypass
|
|
56
|
-
- Clean markdown output with metadata (title, author, site, word count)
|
|
57
|
-
- Batch concurrent fetching with progress
|
|
58
|
-
- Client-side meta redirect following
|
|
59
|
-
- Multiple output formats (markdown, HTML, text, JSON)
|
|
78
|
+
Outputs clean markdown with metadata (title, author, site, word count). Supports batch concurrent fetching with progress.
|
|
60
79
|
|
|
61
80
|
## Providers
|
|
62
81
|
|
|
63
|
-
### Search
|
|
82
|
+
### Search
|
|
64
83
|
|
|
65
84
|
| Provider | Rank | Cost | API Key |
|
|
66
85
|
|----------|------|------|---------|
|
|
@@ -70,35 +89,27 @@ The smart-fetch engine is a local content extraction pipeline:
|
|
|
70
89
|
| Tavily | 4 | Paid | Required |
|
|
71
90
|
| Perplexity | 5 | Paid | Required |
|
|
72
91
|
|
|
73
|
-
### Read
|
|
92
|
+
### Read
|
|
74
93
|
|
|
75
94
|
| Provider | Rank | Cost | API Key |
|
|
76
95
|
|----------|------|------|---------|
|
|
77
|
-
|
|
|
96
|
+
| Smart-Fetch Engine | 0 | Free | No |
|
|
78
97
|
| Jina AI Reader | 1 | Freemium | Optional |
|
|
79
98
|
| Firecrawl | 2 | Paid | Required |
|
|
80
99
|
| Perplexity | 3 | Paid | Required |
|
|
81
100
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
### Summarize Providers
|
|
101
|
+
### Summarize
|
|
85
102
|
|
|
86
103
|
| Provider | Rank | Cost | API Key |
|
|
87
104
|
|----------|------|------|---------|
|
|
88
105
|
| Perplexity | 1 | Paid | Required |
|
|
89
106
|
| LLM Summarize | 2 | LLM tokens | No |
|
|
90
107
|
|
|
91
|
-
##
|
|
108
|
+
## Configurables
|
|
92
109
|
|
|
93
110
|
### API Keys
|
|
94
111
|
|
|
95
|
-
Configure
|
|
96
|
-
|
|
97
|
-
```
|
|
98
|
-
/unipi:web-settings
|
|
99
|
-
```
|
|
100
|
-
|
|
101
|
-
Or set environment variables:
|
|
112
|
+
Configure via `/unipi:web-settings` (interactive TUI) or environment variables:
|
|
102
113
|
|
|
103
114
|
```bash
|
|
104
115
|
export SERPAPI_KEY="your-key"
|
|
@@ -108,135 +119,30 @@ export PERPLEXITY_API_KEY="your-key"
|
|
|
108
119
|
export JINA_API_KEY="your-key"
|
|
109
120
|
```
|
|
110
121
|
|
|
111
|
-
|
|
122
|
+
Providers auto-enable when you add a valid API key.
|
|
112
123
|
|
|
113
|
-
|
|
124
|
+
### Smart-Fetch Defaults
|
|
114
125
|
|
|
115
|
-
|
|
116
|
-
/unipi:web-settings → "Smart Fetch Defaults"
|
|
117
|
-
```
|
|
126
|
+
Configure browser profile, OS, max chars, timeout via `/unipi:web-settings → "Smart Fetch Defaults"`.
|
|
118
127
|
|
|
119
128
|
### Settings Files
|
|
120
129
|
|
|
121
130
|
- **Auth:** `~/.unipi/config/web-api/auth.json` (API keys, gitignored)
|
|
122
131
|
- **Config:** `~/.unipi/config/web-api/config.json` (provider settings, smart-fetch defaults)
|
|
123
132
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
### Web Search
|
|
127
|
-
|
|
128
|
-
```
|
|
129
|
-
# Auto-select cheapest provider
|
|
130
|
-
web_search(query: "TypeScript generics")
|
|
131
|
-
|
|
132
|
-
# Use specific provider
|
|
133
|
-
web_search(query: "latest AI research", source: 4) # Tavily
|
|
134
|
-
```
|
|
135
|
-
|
|
136
|
-
### Multi Web Content Read
|
|
137
|
-
|
|
138
|
-
```
|
|
139
|
-
# Single URL (uses smart-fetch engine by default)
|
|
140
|
-
multi_web_content_read(url: "https://example.com/article")
|
|
141
|
-
|
|
142
|
-
# Batch URLs
|
|
143
|
-
multi_web_content_read(url: ["https://example.com/a", "https://example.com/b"])
|
|
144
|
-
|
|
145
|
-
# Use provider fallback (Jina Reader)
|
|
146
|
-
multi_web_content_read(url: "https://example.com/article", source: 1)
|
|
147
|
-
|
|
148
|
-
# Custom options
|
|
149
|
-
multi_web_content_read(url: "https://example.com/article", format: "json", maxChars: 10000)
|
|
150
|
-
|
|
151
|
-
# Advanced: custom browser profile
|
|
152
|
-
multi_web_content_read(url: "https://example.com/article", browser: "chrome_145", os: "windows")
|
|
153
|
-
```
|
|
154
|
-
|
|
155
|
-
### Web Summarize
|
|
156
|
-
|
|
157
|
-
```
|
|
158
|
-
# Auto-summarize
|
|
159
|
-
web_llm_summarize(url: "https://example.com/long-article")
|
|
160
|
-
|
|
161
|
-
# Custom prompt
|
|
162
|
-
web_llm_summarize(url: "https://example.com/research", prompt: "Extract key findings")
|
|
163
|
-
```
|
|
164
|
-
|
|
165
|
-
## Commands
|
|
166
|
-
|
|
167
|
-
### /unipi:web-settings
|
|
168
|
-
|
|
169
|
-
Interactive settings dialog for managing providers, API keys, and smart-fetch defaults.
|
|
170
|
-
|
|
171
|
-
- **Auto-enable on key input** — provider is automatically enabled when you add a valid API key
|
|
172
|
-
- **Smart-fetch configuration** — set default browser, OS, timeout, etc.
|
|
173
|
-
- **Cursor memory** — last configured provider moves to the top of the list
|
|
174
|
-
|
|
175
|
-
### /unipi:web-cache-clear
|
|
176
|
-
|
|
177
|
-
Clear all cached web content.
|
|
178
|
-
|
|
179
|
-
## Cache
|
|
133
|
+
### Cache
|
|
180
134
|
|
|
181
135
|
- Default TTL: 1 hour
|
|
182
136
|
- Cache location: `~/.unipi/config/web-api/cache/`
|
|
183
|
-
- Smart-fetch cache keys include URL + browser + format + maxChars
|
|
184
137
|
- Automatic for all read operations
|
|
185
138
|
|
|
186
|
-
## Dependencies
|
|
187
|
-
|
|
188
|
-
| Package | Version | Purpose |
|
|
189
|
-
|---------|---------|---------|
|
|
190
|
-
| wreq-js | ^2.3.0 | TLS fingerprinting |
|
|
191
|
-
| defuddle | ^0.18.1 | Content extraction |
|
|
192
|
-
| linkedom | ^0.18.12 | Server-side DOM |
|
|
193
|
-
| lodash | ^4.17.21 | Filename sanitization |
|
|
194
|
-
| mime-types | ^2.1.35 | MIME type mapping |
|
|
195
|
-
|
|
196
139
|
## Troubleshooting
|
|
197
140
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
If you see "No search provider available":
|
|
201
|
-
|
|
202
|
-
1. Run `/unipi:web-settings`
|
|
203
|
-
2. Add API keys for paid providers (they auto-enable on key input)
|
|
204
|
-
3. Or manually enable a free provider
|
|
205
|
-
|
|
206
|
-
### Smart-fetch fails
|
|
141
|
+
**No provider available:** Run `/unipi:web-settings` and add API keys or enable a free provider.
|
|
207
142
|
|
|
208
|
-
|
|
143
|
+
**Smart-fetch fails:** Try a different browser profile (`browser: "chrome_133"`) or a provider fallback (`source: 1`).
|
|
209
144
|
|
|
210
|
-
|
|
211
|
-
2. Try a provider fallback: `source: 1` (Jina Reader)
|
|
212
|
-
3. Check if the site requires JavaScript execution (not supported)
|
|
213
|
-
|
|
214
|
-
### API key invalid
|
|
215
|
-
|
|
216
|
-
If API key validation fails:
|
|
217
|
-
|
|
218
|
-
1. Check the key is correct
|
|
219
|
-
2. Verify the key has sufficient permissions
|
|
220
|
-
3. Check provider status at their website
|
|
221
|
-
|
|
222
|
-
### Rate limiting
|
|
223
|
-
|
|
224
|
-
If you hit rate limits:
|
|
225
|
-
|
|
226
|
-
1. Add an API key for higher limits
|
|
227
|
-
2. Use the smart-fetch engine (default, no limits)
|
|
228
|
-
3. Use a different provider
|
|
229
|
-
4. Wait and retry
|
|
230
|
-
|
|
231
|
-
## Development
|
|
232
|
-
|
|
233
|
-
```bash
|
|
234
|
-
# Type check
|
|
235
|
-
npx tsc --noEmit
|
|
236
|
-
|
|
237
|
-
# Build
|
|
238
|
-
npm run build
|
|
239
|
-
```
|
|
145
|
+
**Rate limiting:** Add an API key for higher limits, use smart-fetch (no limits), or try a different provider.
|
|
240
146
|
|
|
241
147
|
## License
|
|
242
148
|
|
package/package.json
CHANGED
|
@@ -5,10 +5,10 @@
|
|
|
5
5
|
* Uses dynamic imports to handle optional native binding failures gracefully.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
let wreqModule:
|
|
9
|
-
let defuddleModule:
|
|
10
|
-
let lodashModule:
|
|
11
|
-
let mimeTypesModule:
|
|
8
|
+
let wreqModule: Record<string, unknown> | null = null;
|
|
9
|
+
let defuddleModule: Record<string, unknown> | null = null;
|
|
10
|
+
let lodashModule: Record<string, unknown> | null = null;
|
|
11
|
+
let mimeTypesModule: typeof import("mime-types") | null = null;
|
|
12
12
|
|
|
13
13
|
/**
|
|
14
14
|
* Get the wreq-js module.
|
package/src/index.ts
CHANGED
|
@@ -31,8 +31,7 @@ const VERSION = getPackageVersion(new URL(".", import.meta.url).pathname);
|
|
|
31
31
|
|
|
32
32
|
// Get info registry from global (avoids direct import issues with pi's extension loading)
|
|
33
33
|
function getInfoRegistry() {
|
|
34
|
-
|
|
35
|
-
return g.__unipi_info_registry;
|
|
34
|
+
return globalThis.__unipi_info_registry;
|
|
36
35
|
}
|
|
37
36
|
|
|
38
37
|
export default function (pi: ExtensionAPI) {
|