@pi-unipi/web-api 0.1.14 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +81 -114
- package/package.json +9 -2
- package/skills/web/SKILL.md +54 -11
- package/src/engine/constants.ts +36 -0
- package/src/engine/dependencies.ts +145 -0
- package/src/engine/dom.ts +266 -0
- package/src/engine/extract.ts +642 -0
- package/src/engine/format.ts +306 -0
- package/src/engine/profiles.ts +102 -0
- package/src/engine/types.ts +169 -0
- package/src/index.ts +9 -2
- package/src/providers/base.ts +9 -1
- package/src/settings.ts +70 -4
- package/src/tools.ts +281 -24
- package/src/tui/progress.ts +168 -0
- package/src/tui/result.ts +173 -0
- package/src/tui/settings-dialog.ts +168 -0
package/README.md
CHANGED
|
@@ -1,46 +1,85 @@
|
|
|
1
1
|
# @pi-unipi/web-api
|
|
2
2
|
|
|
3
|
-
Web search,
|
|
3
|
+
Web search, page reading, and content summarization for the agent. The read path uses a local smart-fetch engine by default — free, no API key, browser-grade TLS fingerprinting that bypasses Cloudflare.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Paid providers (SerpAPI, Tavily, Firecrawl, Perplexity) are available as fallbacks. DuckDuckGo and Jina work out of the box for search.
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
## Commands
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
-
|
|
9
|
+
| Command | Description |
|
|
10
|
+
|---------|-------------|
|
|
11
|
+
| `/unipi:web-settings` | Configure providers, API keys, and smart-fetch defaults |
|
|
12
|
+
| `/unipi:web-cache-clear` | Clear all cached web content |
|
|
12
13
|
|
|
13
|
-
|
|
14
|
+
## Special Triggers
|
|
14
15
|
|
|
15
|
-
|
|
16
|
+
Workflow skills detect web-api and inject web tools for research-type commands:
|
|
16
17
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
-
|
|
21
|
-
|
|
18
|
+
| Skill | What Changes |
|
|
19
|
+
|-------|--------------|
|
|
20
|
+
| `research` | Full web search, read, summarize |
|
|
21
|
+
| `gather-context` | External documentation lookup |
|
|
22
|
+
| `consultant` | Industry best practices research |
|
|
23
|
+
| `subagents` (explore) | Web research in parallel |
|
|
22
24
|
|
|
23
|
-
|
|
25
|
+
The footer and info-screen don't display web-api data — it's a tool package, not a state package.
|
|
24
26
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
+
## Agent Tools
|
|
28
|
+
|
|
29
|
+
| Tool | Description |
|
|
30
|
+
|------|-------------|
|
|
31
|
+
| `web_search` | Search the web via provider |
|
|
32
|
+
| `multi_web_content_read` | Extract content from URLs (smart-fetch or provider) |
|
|
33
|
+
| `web_llm_summarize` | Summarize web content via LLM |
|
|
34
|
+
|
|
35
|
+
### web_search
|
|
36
|
+
|
|
37
|
+
```
|
|
38
|
+
# Auto-select cheapest provider
|
|
39
|
+
web_search(query: "TypeScript generics")
|
|
40
|
+
|
|
41
|
+
# Use specific provider
|
|
42
|
+
web_search(query: "latest AI research", source: 4) # Tavily
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### multi_web_content_read
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
# Single URL (smart-fetch engine by default)
|
|
49
|
+
multi_web_content_read(url: "https://example.com/article")
|
|
50
|
+
|
|
51
|
+
# Batch URLs
|
|
52
|
+
multi_web_content_read(url: ["https://example.com/a", "https://example.com/b"])
|
|
53
|
+
|
|
54
|
+
# Provider fallback (Jina Reader)
|
|
55
|
+
multi_web_content_read(url: "https://example.com/article", source: 1)
|
|
56
|
+
|
|
57
|
+
# Custom options
|
|
58
|
+
multi_web_content_read(url: "https://example.com/article", format: "json", maxChars: 10000)
|
|
27
59
|
```
|
|
28
60
|
|
|
29
|
-
|
|
61
|
+
### web_llm_summarize
|
|
30
62
|
|
|
31
|
-
```json
|
|
32
|
-
{
|
|
33
|
-
"pi": {
|
|
34
|
-
"extensions": [
|
|
35
|
-
"node_modules/@pi-unipi/web-api/src/index.ts"
|
|
36
|
-
]
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
63
|
```
|
|
64
|
+
web_llm_summarize(url: "https://example.com/long-article")
|
|
65
|
+
web_llm_summarize(url: "https://example.com/research", prompt: "Extract key findings")
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Smart-Fetch Engine
|
|
69
|
+
|
|
70
|
+
Local content extraction pipeline — no API key required:
|
|
71
|
+
|
|
72
|
+
| Component | Purpose |
|
|
73
|
+
|-----------|---------|
|
|
74
|
+
| **wreq-js** | Browser-grade TLS fingerprinting (bypasses Cloudflare) |
|
|
75
|
+
| **defuddle** | Intelligent content extraction from HTML |
|
|
76
|
+
| **linkedom** | Server-side DOM parsing |
|
|
77
|
+
|
|
78
|
+
Outputs clean markdown with metadata (title, author, site, word count). Supports batch concurrent fetching with progress.
|
|
40
79
|
|
|
41
80
|
## Providers
|
|
42
81
|
|
|
43
|
-
### Search
|
|
82
|
+
### Search
|
|
44
83
|
|
|
45
84
|
| Provider | Rank | Cost | API Key |
|
|
46
85
|
|----------|------|------|---------|
|
|
@@ -50,32 +89,27 @@ Add to your pi configuration:
|
|
|
50
89
|
| Tavily | 4 | Paid | Required |
|
|
51
90
|
| Perplexity | 5 | Paid | Required |
|
|
52
91
|
|
|
53
|
-
### Read
|
|
92
|
+
### Read
|
|
54
93
|
|
|
55
94
|
| Provider | Rank | Cost | API Key |
|
|
56
95
|
|----------|------|------|---------|
|
|
96
|
+
| Smart-Fetch Engine | 0 | Free | No |
|
|
57
97
|
| Jina AI Reader | 1 | Freemium | Optional |
|
|
58
98
|
| Firecrawl | 2 | Paid | Required |
|
|
59
99
|
| Perplexity | 3 | Paid | Required |
|
|
60
100
|
|
|
61
|
-
### Summarize
|
|
101
|
+
### Summarize
|
|
62
102
|
|
|
63
103
|
| Provider | Rank | Cost | API Key |
|
|
64
104
|
|----------|------|------|---------|
|
|
65
105
|
| Perplexity | 1 | Paid | Required |
|
|
66
106
|
| LLM Summarize | 2 | LLM tokens | No |
|
|
67
107
|
|
|
68
|
-
##
|
|
108
|
+
## Configurables
|
|
69
109
|
|
|
70
110
|
### API Keys
|
|
71
111
|
|
|
72
|
-
Configure
|
|
73
|
-
|
|
74
|
-
```
|
|
75
|
-
/unipi:web-settings
|
|
76
|
-
```
|
|
77
|
-
|
|
78
|
-
Or set environment variables:
|
|
112
|
+
Configure via `/unipi:web-settings` (interactive TUI) or environment variables:
|
|
79
113
|
|
|
80
114
|
```bash
|
|
81
115
|
export SERPAPI_KEY="your-key"
|
|
@@ -85,97 +119,30 @@ export PERPLEXITY_API_KEY="your-key"
|
|
|
85
119
|
export JINA_API_KEY="your-key"
|
|
86
120
|
```
|
|
87
121
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
- **Auth:** `~/.unipi/config/web-api/auth.json` (API keys, gitignored)
|
|
91
|
-
- **Config:** `~/.unipi/config/web-api/config.json` (provider settings)
|
|
122
|
+
Providers auto-enable when you add a valid API key.
|
|
92
123
|
|
|
93
|
-
|
|
124
|
+
### Smart-Fetch Defaults
|
|
94
125
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
```
|
|
98
|
-
# Auto-select cheapest provider
|
|
99
|
-
web_search(query: "TypeScript generics")
|
|
126
|
+
Configure browser profile, OS, max chars, timeout via `/unipi:web-settings → "Smart Fetch Defaults"`.
|
|
100
127
|
|
|
101
|
-
|
|
102
|
-
web_search(query: "latest AI research", source: 4) # Tavily
|
|
103
|
-
```
|
|
104
|
-
|
|
105
|
-
### Web Read
|
|
106
|
-
|
|
107
|
-
```
|
|
108
|
-
# Auto-select provider
|
|
109
|
-
web_read(url: "https://example.com/article")
|
|
110
|
-
|
|
111
|
-
# Use specific provider
|
|
112
|
-
web_read(url: "https://example.com/spa", source: 2) # Firecrawl
|
|
113
|
-
```
|
|
114
|
-
|
|
115
|
-
### Web Summarize
|
|
116
|
-
|
|
117
|
-
```
|
|
118
|
-
# Auto-summarize
|
|
119
|
-
web_llm_summarize(url: "https://example.com/long-article")
|
|
120
|
-
|
|
121
|
-
# Custom prompt
|
|
122
|
-
web_llm_summarize(url: "https://example.com/research", prompt: "Extract key findings")
|
|
123
|
-
```
|
|
124
|
-
|
|
125
|
-
## Commands
|
|
126
|
-
|
|
127
|
-
### /unipi:web-settings
|
|
128
|
-
|
|
129
|
-
Interactive settings dialog for managing providers and API keys.
|
|
130
|
-
|
|
131
|
-
- **Auto-enable on key input** — provider is automatically enabled when you add a valid API key (no extra toggle step)
|
|
132
|
-
- **Cursor memory** — last configured provider moves to the top of the list when you return to the menu
|
|
133
|
-
|
|
134
|
-
### /unipi:web-cache-clear
|
|
128
|
+
### Settings Files
|
|
135
129
|
|
|
136
|
-
|
|
130
|
+
- **Auth:** `~/.unipi/config/web-api/auth.json` (API keys, gitignored)
|
|
131
|
+
- **Config:** `~/.unipi/config/web-api/config.json` (provider settings, smart-fetch defaults)
|
|
137
132
|
|
|
138
|
-
|
|
133
|
+
### Cache
|
|
139
134
|
|
|
140
135
|
- Default TTL: 1 hour
|
|
141
136
|
- Cache location: `~/.unipi/config/web-api/cache/`
|
|
142
|
-
- Automatic for
|
|
137
|
+
- Automatic for all read operations
|
|
143
138
|
|
|
144
139
|
## Troubleshooting
|
|
145
140
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
If you see "No search provider available":
|
|
149
|
-
|
|
150
|
-
1. Run `/unipi:web-settings`
|
|
151
|
-
2. Add API keys for paid providers (they auto-enable on key input)
|
|
152
|
-
3. Or manually enable a free provider
|
|
153
|
-
|
|
154
|
-
### API key invalid
|
|
141
|
+
**No provider available:** Run `/unipi:web-settings` and add API keys or enable a free provider.
|
|
155
142
|
|
|
156
|
-
|
|
143
|
+
**Smart-fetch fails:** Try a different browser profile (`browser: "chrome_133"`) or a provider fallback (`source: 1`).
|
|
157
144
|
|
|
158
|
-
|
|
159
|
-
2. Verify the key has sufficient permissions
|
|
160
|
-
3. Check provider status at their website
|
|
161
|
-
|
|
162
|
-
### Rate limiting
|
|
163
|
-
|
|
164
|
-
If you hit rate limits:
|
|
165
|
-
|
|
166
|
-
1. Add an API key for higher limits
|
|
167
|
-
2. Use a different provider
|
|
168
|
-
3. Wait and retry
|
|
169
|
-
|
|
170
|
-
## Development
|
|
171
|
-
|
|
172
|
-
```bash
|
|
173
|
-
# Type check
|
|
174
|
-
npm run typecheck
|
|
175
|
-
|
|
176
|
-
# Build
|
|
177
|
-
npm run build
|
|
178
|
-
```
|
|
145
|
+
**Rate limiting:** Add an API key for higher limits, use smart-fetch (no limits), or try a different provider.
|
|
179
146
|
|
|
180
147
|
## License
|
|
181
148
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pi-unipi/web-api",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.16",
|
|
4
4
|
"description": "Web search, read, and summarize tools with provider-based backend selection for Pi coding agent",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
@@ -38,13 +38,20 @@
|
|
|
38
38
|
"README.md"
|
|
39
39
|
],
|
|
40
40
|
"dependencies": {
|
|
41
|
-
"@pi-unipi/core": "*"
|
|
41
|
+
"@pi-unipi/core": "*",
|
|
42
|
+
"defuddle": "^0.18.1",
|
|
43
|
+
"linkedom": "^0.18.12",
|
|
44
|
+
"lodash": "^4.17.21",
|
|
45
|
+
"mime-types": "^2.1.35",
|
|
46
|
+
"wreq-js": "^2.3.0"
|
|
42
47
|
},
|
|
43
48
|
"peerDependencies": {
|
|
44
49
|
"@mariozechner/pi-coding-agent": "*",
|
|
50
|
+
"@mariozechner/pi-tui": "*",
|
|
45
51
|
"@sinclair/typebox": "*"
|
|
46
52
|
},
|
|
47
53
|
"devDependencies": {
|
|
54
|
+
"@types/lodash": "^4.17.24",
|
|
48
55
|
"@types/node": "^25.6.0"
|
|
49
56
|
}
|
|
50
57
|
}
|
package/skills/web/SKILL.md
CHANGED
|
@@ -5,7 +5,7 @@ description: "Web search, read, and summarize tools with provider-based backend"
|
|
|
5
5
|
|
|
6
6
|
# Web Tools
|
|
7
7
|
|
|
8
|
-
Use these tools to access web content.
|
|
8
|
+
Use these tools to access web content. The read path uses a local smart-fetch engine by default — free, fast, and no API key required.
|
|
9
9
|
|
|
10
10
|
## web_search
|
|
11
11
|
|
|
@@ -24,21 +24,43 @@ web_search(query: "TypeScript generics tutorial")
|
|
|
24
24
|
web_search(query: "latest AI research", source: 4) # Use Tavily
|
|
25
25
|
```
|
|
26
26
|
|
|
27
|
-
##
|
|
27
|
+
## multi_web_content_read
|
|
28
28
|
|
|
29
|
-
Read
|
|
29
|
+
Read and extract content from URLs. Uses the **smart-fetch engine** by default (source=0 or omitted) — free, local, no API key required. Supports single URL or batch URLs.
|
|
30
30
|
|
|
31
|
-
|
|
32
|
-
-
|
|
31
|
+
**Default behavior (source=0):**
|
|
32
|
+
- Browser-grade TLS fingerprinting via wreq-js
|
|
33
|
+
- Intelligent content extraction via defuddle
|
|
34
|
+
- Returns clean markdown with metadata (title, author, site, word count)
|
|
35
|
+
- No API key required
|
|
33
36
|
|
|
34
37
|
**Parameters:**
|
|
35
|
-
- `url` (required): URL
|
|
36
|
-
- `source` (optional): Provider selection (1
|
|
38
|
+
- `url` (required): Single URL string or array of URLs for batch
|
|
39
|
+
- `source` (optional): Provider selection (0=smart-fetch, 1=Jina Reader, 2=Firecrawl, 3=Perplexity)
|
|
40
|
+
- `browser` (optional): TLS fingerprint profile (default: chrome_145)
|
|
41
|
+
- `os` (optional): OS fingerprint (default: windows)
|
|
42
|
+
- `format` (optional): Output format — markdown, html, text, json (default: markdown)
|
|
43
|
+
- `maxChars` (optional): Maximum content characters (default: 50000)
|
|
44
|
+
- `timeoutMs` (optional): Request timeout in ms (default: 15000)
|
|
45
|
+
- `removeImages` (optional): Strip image references (default: false)
|
|
46
|
+
- `includeReplies` (optional): Include comments/replies (default: extractors)
|
|
47
|
+
- `proxy` (optional): Proxy URL
|
|
48
|
+
- `batchConcurrency` (optional): Concurrent requests for batch (default: 8)
|
|
49
|
+
- `verbose` (optional): Include metadata header (default: true)
|
|
37
50
|
|
|
38
51
|
**Examples:**
|
|
39
52
|
```
|
|
40
|
-
|
|
41
|
-
|
|
53
|
+
# Single URL (uses smart-fetch engine by default)
|
|
54
|
+
multi_web_content_read(url: "https://example.com/article")
|
|
55
|
+
|
|
56
|
+
# Batch URLs
|
|
57
|
+
multi_web_content_read(url: ["https://example.com/a", "https://example.com/b"])
|
|
58
|
+
|
|
59
|
+
# Use provider fallback (Jina Reader)
|
|
60
|
+
multi_web_content_read(url: "https://example.com/article", source: 1)
|
|
61
|
+
|
|
62
|
+
# Custom options
|
|
63
|
+
multi_web_content_read(url: "https://example.com/article", format: "json", maxChars: 10000)
|
|
42
64
|
```
|
|
43
65
|
|
|
44
66
|
## web_llm_summarize
|
|
@@ -61,7 +83,7 @@ web_llm_summarize(url: "https://example.com/research", prompt: "Extract key find
|
|
|
61
83
|
|
|
62
84
|
## Provider Selection
|
|
63
85
|
|
|
64
|
-
- Omit `source` for auto-selection (cheapest
|
|
86
|
+
- Omit `source` for auto-selection (smart-fetch engine for read, cheapest for search)
|
|
65
87
|
- Specify `source` number for specific provider
|
|
66
88
|
- If provider unavailable, tool throws descriptive error
|
|
67
89
|
|
|
@@ -75,6 +97,7 @@ web_llm_summarize(url: "https://example.com/research", prompt: "Extract key find
|
|
|
75
97
|
5. Perplexity (paid)
|
|
76
98
|
|
|
77
99
|
**Read providers:**
|
|
100
|
+
0. **Smart-Fetch Engine** (free, local) — default
|
|
78
101
|
1. Jina AI Reader (freemium)
|
|
79
102
|
2. Firecrawl (paid)
|
|
80
103
|
3. Perplexity (paid)
|
|
@@ -83,8 +106,27 @@ web_llm_summarize(url: "https://example.com/research", prompt: "Extract key find
|
|
|
83
106
|
1. Perplexity (paid)
|
|
84
107
|
2. LLM Summarize (uses pi's LLM)
|
|
85
108
|
|
|
109
|
+
## Smart-Fetch Engine
|
|
110
|
+
|
|
111
|
+
The smart-fetch engine is a local content extraction pipeline:
|
|
112
|
+
|
|
113
|
+
- **wreq-js**: Browser-grade TLS fingerprinting (bypasses Cloudflare, etc.)
|
|
114
|
+
- **defuddle**: Intelligent content extraction from HTML
|
|
115
|
+
- **linkedom**: Server-side DOM parsing
|
|
116
|
+
|
|
117
|
+
**Features:**
|
|
118
|
+
- No API key required
|
|
119
|
+
- Browser-level anti-bot bypass
|
|
120
|
+
- Clean markdown output with metadata
|
|
121
|
+
- Batch concurrent fetching with progress
|
|
122
|
+
- Client-side meta redirect following
|
|
123
|
+
- Multiple output formats
|
|
124
|
+
|
|
125
|
+
**Configure defaults** via `/unipi:web-settings` → "Smart Fetch Defaults"
|
|
126
|
+
|
|
86
127
|
## Cost Awareness
|
|
87
128
|
|
|
129
|
+
- **Smart-Fetch Engine:** Free (read only, no API key)
|
|
88
130
|
- **DuckDuckGo:** Free (search only)
|
|
89
131
|
- **Jina:** Freemium (search + read)
|
|
90
132
|
- **SerpAPI/Tavily:** Paid (search)
|
|
@@ -98,6 +140,7 @@ Configure providers via `/unipi:web-settings` command.
|
|
|
98
140
|
|
|
99
141
|
- Add/remove API keys
|
|
100
142
|
- Enable/disable providers
|
|
143
|
+
- Configure smart-fetch defaults
|
|
101
144
|
- View provider status
|
|
102
145
|
|
|
103
146
|
## Cache
|
|
@@ -105,4 +148,4 @@ Configure providers via `/unipi:web-settings` command.
|
|
|
105
148
|
Web content is cached for 1 hour by default.
|
|
106
149
|
|
|
107
150
|
- Clear cache: `/unipi:web-cache-clear`
|
|
108
|
-
- Cache
|
|
151
|
+
- Cache includes smart-fetch results (keyed by URL + browser + format + maxChars)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @unipi/web-api — Engine Constants
|
|
3
|
+
*
|
|
4
|
+
* Default values for the smart-fetch engine.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/** Default browser TLS fingerprint profile */
|
|
8
|
+
export const DEFAULT_BROWSER = "chrome_145";
|
|
9
|
+
|
|
10
|
+
/** Default OS fingerprint */
|
|
11
|
+
export const DEFAULT_OS = "windows";
|
|
12
|
+
|
|
13
|
+
/** Default maximum content length in characters */
|
|
14
|
+
export const DEFAULT_MAX_CHARS = 50000;
|
|
15
|
+
|
|
16
|
+
/** Default request timeout in milliseconds */
|
|
17
|
+
export const DEFAULT_TIMEOUT_MS = 15000;
|
|
18
|
+
|
|
19
|
+
/** Default batch concurrency */
|
|
20
|
+
export const DEFAULT_BATCH_CONCURRENCY = 8;
|
|
21
|
+
|
|
22
|
+
/** Default removeImages setting */
|
|
23
|
+
export const DEFAULT_REMOVE_IMAGES = false;
|
|
24
|
+
|
|
25
|
+
/** Default includeReplies setting */
|
|
26
|
+
export const DEFAULT_INCLUDE_REPLIES: boolean | "extractors" = "extractors";
|
|
27
|
+
|
|
28
|
+
/** Default output format */
|
|
29
|
+
export const DEFAULT_FORMAT = "markdown" as const;
|
|
30
|
+
|
|
31
|
+
/** Default HTTP headers */
|
|
32
|
+
export const DEFAULT_HEADERS: Record<string, string> = {
|
|
33
|
+
Accept:
|
|
34
|
+
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
35
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
36
|
+
};
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @unipi/web-api — Runtime Dependencies
|
|
3
|
+
*
|
|
4
|
+
* Lazy-loaded dependencies for the smart-fetch engine.
|
|
5
|
+
* Uses dynamic imports to handle optional native binding failures gracefully.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
let wreqModule: any = null;
|
|
9
|
+
let defuddleModule: any = null;
|
|
10
|
+
let lodashModule: any = null;
|
|
11
|
+
let mimeTypesModule: any = null;
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Get the wreq-js module.
|
|
15
|
+
* Throws a helpful error if the module is not available.
|
|
16
|
+
*
|
|
17
|
+
* @returns wreq-js module
|
|
18
|
+
*/
|
|
19
|
+
export async function getWreq(): Promise<any> {
|
|
20
|
+
if (wreqModule) {
|
|
21
|
+
return wreqModule;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
try {
|
|
25
|
+
// Use dynamic import for ESM compatibility
|
|
26
|
+
wreqModule = await import("wreq-js");
|
|
27
|
+
return wreqModule;
|
|
28
|
+
} catch (error) {
|
|
29
|
+
throw new Error(
|
|
30
|
+
`wreq-js is not available. ` +
|
|
31
|
+
`This is required for browser-grade TLS fingerprinting. ` +
|
|
32
|
+
`Run: npm install wreq-js\n` +
|
|
33
|
+
`Error: ${error instanceof Error ? error.message : String(error)}`
|
|
34
|
+
);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Get the defuddle module.
|
|
40
|
+
* Throws a helpful error if the module is not available.
|
|
41
|
+
*
|
|
42
|
+
* @returns defuddle module
|
|
43
|
+
*/
|
|
44
|
+
export async function getDefuddle(): Promise<any> {
|
|
45
|
+
if (defuddleModule) {
|
|
46
|
+
return defuddleModule;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
try {
|
|
50
|
+
defuddleModule = await import("defuddle");
|
|
51
|
+
return defuddleModule;
|
|
52
|
+
} catch (error) {
|
|
53
|
+
throw new Error(
|
|
54
|
+
`defuddle is not available. ` +
|
|
55
|
+
`This is required for intelligent content extraction. ` +
|
|
56
|
+
`Run: npm install defuddle\n` +
|
|
57
|
+
`Error: ${error instanceof Error ? error.message : String(error)}`
|
|
58
|
+
);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Get the lodash module.
|
|
64
|
+
*
|
|
65
|
+
* @returns lodash module
|
|
66
|
+
*/
|
|
67
|
+
export async function getLodash(): Promise<any> {
|
|
68
|
+
if (lodashModule) {
|
|
69
|
+
return lodashModule;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
try {
|
|
73
|
+
lodashModule = await import("lodash");
|
|
74
|
+
return lodashModule;
|
|
75
|
+
} catch (error) {
|
|
76
|
+
throw new Error(
|
|
77
|
+
`lodash is not available. ` +
|
|
78
|
+
`Run: npm install lodash\n` +
|
|
79
|
+
`Error: ${error instanceof Error ? error.message : String(error)}`
|
|
80
|
+
);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Get the mime-types module.
|
|
86
|
+
*
|
|
87
|
+
* @returns mime-types module
|
|
88
|
+
*/
|
|
89
|
+
export async function getMimeTypes(): Promise<any> {
|
|
90
|
+
if (mimeTypesModule) {
|
|
91
|
+
return mimeTypesModule;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
try {
|
|
95
|
+
mimeTypesModule = await import("mime-types");
|
|
96
|
+
return mimeTypesModule;
|
|
97
|
+
} catch (error) {
|
|
98
|
+
throw new Error(
|
|
99
|
+
`mime-types is not available. ` +
|
|
100
|
+
`Run: npm install mime-types\n` +
|
|
101
|
+
`Error: ${error instanceof Error ? error.message : String(error)}`
|
|
102
|
+
);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Check if all required dependencies are available.
|
|
108
|
+
*
|
|
109
|
+
* @returns true if all deps are available
|
|
110
|
+
*/
|
|
111
|
+
export async function checkDependencies(): Promise<{
|
|
112
|
+
available: boolean;
|
|
113
|
+
missing: string[];
|
|
114
|
+
}> {
|
|
115
|
+
const missing: string[] = [];
|
|
116
|
+
|
|
117
|
+
try {
|
|
118
|
+
await getWreq();
|
|
119
|
+
} catch {
|
|
120
|
+
missing.push("wreq-js");
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
try {
|
|
124
|
+
await getDefuddle();
|
|
125
|
+
} catch {
|
|
126
|
+
missing.push("defuddle");
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
try {
|
|
130
|
+
await getLodash();
|
|
131
|
+
} catch {
|
|
132
|
+
missing.push("lodash");
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
try {
|
|
136
|
+
await getMimeTypes();
|
|
137
|
+
} catch {
|
|
138
|
+
missing.push("mime-types");
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
return {
|
|
142
|
+
available: missing.length === 0,
|
|
143
|
+
missing,
|
|
144
|
+
};
|
|
145
|
+
}
|