imperium-crawl 1.1.9 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +76 -12
- package/dist/batch/index.d.ts +3 -0
- package/dist/batch/index.d.ts.map +1 -0
- package/dist/batch/index.js +2 -0
- package/dist/batch/index.js.map +1 -0
- package/dist/batch/job-store.d.ts +15 -0
- package/dist/batch/job-store.d.ts.map +1 -0
- package/dist/batch/job-store.js +77 -0
- package/dist/batch/job-store.js.map +1 -0
- package/dist/batch/types.d.ts +20 -0
- package/dist/batch/types.d.ts.map +1 -0
- package/dist/batch/types.js +2 -0
- package/dist/batch/types.js.map +1 -0
- package/dist/cli-config.d.ts +21 -0
- package/dist/cli-config.d.ts.map +1 -0
- package/dist/cli-config.js +51 -0
- package/dist/cli-config.js.map +1 -0
- package/dist/cli-onboarding.d.ts +10 -0
- package/dist/cli-onboarding.d.ts.map +1 -0
- package/dist/cli-onboarding.js +128 -0
- package/dist/cli-onboarding.js.map +1 -0
- package/dist/cli-tui.d.ts +12 -0
- package/dist/cli-tui.d.ts.map +1 -0
- package/dist/cli-tui.js +945 -0
- package/dist/cli-tui.js.map +1 -0
- package/dist/cli-ui.d.ts +26 -0
- package/dist/cli-ui.d.ts.map +1 -0
- package/dist/cli-ui.js +58 -0
- package/dist/cli-ui.js.map +1 -0
- package/dist/cli.d.ts +8 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +161 -35
- package/dist/cli.js.map +1 -1
- package/dist/config.d.ts +6 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +20 -1
- package/dist/config.js.map +1 -1
- package/dist/constants.d.ts +3 -2
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +3 -2
- package/dist/constants.js.map +1 -1
- package/dist/formatters.d.ts +14 -0
- package/dist/formatters.d.ts.map +1 -1
- package/dist/formatters.js +10 -0
- package/dist/formatters.js.map +1 -1
- package/dist/index.js +7 -0
- package/dist/index.js.map +1 -1
- package/dist/llm/extractor.d.ts +18 -0
- package/dist/llm/extractor.d.ts.map +1 -0
- package/dist/llm/extractor.js +104 -0
- package/dist/llm/extractor.js.map +1 -0
- package/dist/llm/index.d.ts +22 -0
- package/dist/llm/index.d.ts.map +1 -0
- package/dist/llm/index.js +36 -0
- package/dist/llm/index.js.map +1 -0
- package/dist/llm/providers/anthropic.d.ts +8 -0
- package/dist/llm/providers/anthropic.d.ts.map +1 -0
- package/dist/llm/providers/anthropic.js +45 -0
- package/dist/llm/providers/anthropic.js.map +1 -0
- package/dist/llm/providers/minimax.d.ts +17 -0
- package/dist/llm/providers/minimax.d.ts.map +1 -0
- package/dist/llm/providers/minimax.js +20 -0
- package/dist/llm/providers/minimax.js.map +1 -0
- package/dist/llm/providers/openai.d.ts +9 -0
- package/dist/llm/providers/openai.d.ts.map +1 -0
- package/dist/llm/providers/openai.js +38 -0
- package/dist/llm/providers/openai.js.map +1 -0
- package/dist/sessions/index.d.ts +3 -0
- package/dist/sessions/index.d.ts.map +1 -0
- package/dist/sessions/index.js +2 -0
- package/dist/sessions/index.js.map +1 -0
- package/dist/sessions/manager.d.ts +15 -0
- package/dist/sessions/manager.d.ts.map +1 -0
- package/dist/sessions/manager.js +85 -0
- package/dist/sessions/manager.js.map +1 -0
- package/dist/sessions/types.d.ts +18 -0
- package/dist/sessions/types.d.ts.map +1 -0
- package/dist/sessions/types.js +2 -0
- package/dist/sessions/types.js.map +1 -0
- package/dist/tools/ai-extract.d.ts +33 -0
- package/dist/tools/ai-extract.d.ts.map +1 -0
- package/dist/tools/ai-extract.js +96 -0
- package/dist/tools/ai-extract.js.map +1 -0
- package/dist/tools/batch-scrape.d.ts +37 -0
- package/dist/tools/batch-scrape.d.ts.map +1 -0
- package/dist/tools/batch-scrape.js +140 -0
- package/dist/tools/batch-scrape.js.map +1 -0
- package/dist/tools/create-skill.d.ts +2 -2
- package/dist/tools/delete-job.d.ts +18 -0
- package/dist/tools/delete-job.d.ts.map +1 -0
- package/dist/tools/delete-job.js +31 -0
- package/dist/tools/delete-job.js.map +1 -0
- package/dist/tools/extract.d.ts +4 -1
- package/dist/tools/extract.d.ts.map +1 -1
- package/dist/tools/extract.js +49 -1
- package/dist/tools/extract.js.map +1 -1
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +15 -0
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/interact.d.ts +96 -0
- package/dist/tools/interact.d.ts.map +1 -0
- package/dist/tools/interact.js +254 -0
- package/dist/tools/interact.js.map +1 -0
- package/dist/tools/job-status.d.ts +18 -0
- package/dist/tools/job-status.d.ts.map +1 -0
- package/dist/tools/job-status.js +42 -0
- package/dist/tools/job-status.js.map +1 -0
- package/dist/tools/list-jobs.d.ts +12 -0
- package/dist/tools/list-jobs.d.ts.map +1 -0
- package/dist/tools/list-jobs.js +50 -0
- package/dist/tools/list-jobs.js.map +1 -0
- package/dist/tools/manifest.d.ts +19 -0
- package/dist/tools/manifest.d.ts.map +1 -0
- package/dist/tools/manifest.js +110 -0
- package/dist/tools/manifest.js.map +1 -0
- package/dist/tools/readability.d.ts +2 -2
- package/dist/tools/scrape.d.ts +4 -4
- package/package.json +14 -2
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# imperium-crawl
|
|
2
2
|
|
|
3
|
-
The most powerful open-source MCP server for web scraping, crawling, and data extraction. **
|
|
3
|
+
The most powerful open-source MCP server for web scraping, crawling, and data extraction. **22 tools. Zero API keys required for scraping. One `npx` command to install.**
|
|
4
4
|
|
|
5
5
|
While others charge $19+/month for basic scraping, imperium-crawl gives you **more features for free** — including capabilities that no other MCP server offers at any price.
|
|
6
6
|
|
|
@@ -24,9 +24,12 @@ While others charge $19+/month for basic scraping, imperium-crawl gives you **mo
|
|
|
24
24
|
| Circuit breaker + jitter backoff | **Yes** | No | No | No | No |
|
|
25
25
|
| URL normalization (11 steps) | **Yes** | No | No | No | No |
|
|
26
26
|
| Adaptive learning (self-improving) | **Yes** | No | No | No | No |
|
|
27
|
+
| AI-powered data extraction | **Yes** | No | No | No | No |
|
|
28
|
+
| Browser automation + sessions | **Yes** | No | No | No | No |
|
|
29
|
+
| Batch processing with resume | **Yes** | No | No | No | No |
|
|
27
30
|
| Self-hosted | **Yes** | No | N/A | Yes | No |
|
|
28
31
|
| Requires external service | **No** | Yes | No | No | Yes |
|
|
29
|
-
| Total tools | **
|
|
32
|
+
| Total tools | **22** | 5 | 2 | 2 | 4 |
|
|
30
33
|
|
|
31
34
|
> **TLDR:** More tools, more features, zero cost, no external dependencies. Self-hosted, open-source, and it runs on your machine.
|
|
32
35
|
|
|
@@ -56,6 +59,8 @@ Add to your MCP client config (Claude Code, Cursor, VS Code, Windsurf, or any MC
|
|
|
56
59
|
"env": {
|
|
57
60
|
"BRAVE_API_KEY": "your-brave-api-key",
|
|
58
61
|
"TWOCAPTCHA_API_KEY": "your-2captcha-api-key",
|
|
62
|
+
"LLM_API_KEY": "your-api-key",
|
|
63
|
+
"LLM_PROVIDER": "anthropic",
|
|
59
64
|
"PROXY_URL": "http://user:pass@proxy:8080",
|
|
60
65
|
"PROXY_URLS": "http://proxy1:8080,socks5://proxy2:1080"
|
|
61
66
|
}
|
|
@@ -64,12 +69,14 @@ Add to your MCP client config (Claude Code, Cursor, VS Code, Windsurf, or any MC
|
|
|
64
69
|
}
|
|
65
70
|
```
|
|
66
71
|
|
|
67
|
-
> **Works out of the box with zero API keys** —
|
|
72
|
+
> **Works out of the box with zero API keys** — 16 tools are fully functional without any configuration (6 scraping + 3 skills + 3 API discovery + 4 batch). To unlock full power, add optional API keys:
|
|
68
73
|
>
|
|
69
74
|
> | Key | What it unlocks | Where to get it |
|
|
70
75
|
> |-----|----------------|-----------------|
|
|
71
76
|
> | `BRAVE_API_KEY` | 4 search tools (web, news, image, video) | [brave.com/search/api](https://brave.com/search/api/) (free tier available) |
|
|
72
77
|
> | `TWOCAPTCHA_API_KEY` | Auto CAPTCHA solving (reCAPTCHA v2/v3, hCaptcha, Turnstile) | [2captcha.com](https://2captcha.com/) |
|
|
78
|
+
> | `LLM_API_KEY` | AI-powered data extraction (`ai_extract` tool) | Anthropic or OpenAI API key |
|
|
79
|
+
> | `CHROME_PROFILE_PATH` | Authenticated browser sessions (use your Chrome cookies) | Path to Chrome user data dir |
|
|
73
80
|
> | `PROXY_URL` | Route all requests through a proxy (http/https/socks4/socks5) | Any proxy provider |
|
|
74
81
|
|
|
75
82
|
### Enable full stealth (Level 3 — headless browser)
|
|
@@ -81,20 +88,21 @@ npx playwright install chromium
|
|
|
81
88
|
|
|
82
89
|
### AI Agent Guide (SKILL.md)
|
|
83
90
|
|
|
84
|
-
imperium-crawl ships with [`SKILL.md`](./SKILL.md) — a structured guide that teaches AI agents (Claude, GPT, etc.) how to use all
|
|
91
|
+
imperium-crawl ships with [`SKILL.md`](./SKILL.md) — a structured guide that teaches AI agents (Claude, GPT, etc.) how to use all 22 tools effectively. It includes 9 proven workflows, decision trees, error recovery strategies, and advanced patterns like manual skill refinement.
|
|
85
92
|
|
|
86
93
|
**Without SKILL.md**, agents can call tools but won't know which tool to try first, when to fallback, or how to chain tools together optimally.
|
|
87
94
|
|
|
88
95
|
**With SKILL.md**, agents follow battle-tested workflows — readability → scrape → extract fallback chains, auto-detect → manual refinement for skills, search → select → deep-scrape for research, and more.
|
|
89
96
|
|
|
90
|
-
**
|
|
97
|
+
**Three ways to connect SKILL.md to any agent:**
|
|
91
98
|
|
|
92
99
|
| Method | Setup | Works with |
|
|
93
100
|
|--------|-------|-----------|
|
|
94
101
|
| **MCP + SKILL.md** | Add imperium-crawl as MCP server + SKILL.md in agent context | Claude Code, Cursor, Windsurf, any MCP client |
|
|
95
102
|
| **CLI + SKILL.md** | `npm i -g imperium-crawl` + SKILL.md in agent context | **Any agent with bash access** — OpenClaw, ChatGPT, GPT agents, custom agents, anything |
|
|
103
|
+
| **TUI mode** | `imperium-crawl tui` — interactive slash-command terminal | Direct human use, demos, debugging |
|
|
96
104
|
|
|
97
|
-
The CLI approach is universal — any agent that can run shell commands can use all
|
|
105
|
+
The CLI approach is universal — any agent that can run shell commands can use all 22 tools. No MCP required.
|
|
98
106
|
|
|
99
107
|
| AI Agent | How to add SKILL.md |
|
|
100
108
|
|----------|-------------------|
|
|
@@ -107,7 +115,7 @@ The CLI approach is universal — any agent that can run shell commands can use
|
|
|
107
115
|
|
|
108
116
|
## CLI Mode
|
|
109
117
|
|
|
110
|
-
imperium-crawl works as both an **MCP server** and a **standalone CLI tool**. All
|
|
118
|
+
imperium-crawl works as both an **MCP server** and a **standalone CLI tool**. All 22 tools are available as subcommands:
|
|
111
119
|
|
|
112
120
|
```bash
|
|
113
121
|
# Scrape a website to markdown
|
|
@@ -119,6 +127,18 @@ imperium-crawl crawl --url https://blog.cloudflare.com --max-depth 2 --max-pages
|
|
|
119
127
|
# Extract structured data with CSS selectors
|
|
120
128
|
imperium-crawl extract --url https://news.ycombinator.com --selectors '{"title":".titleline a","score":".score"}' --items-selector ".athing"
|
|
121
129
|
|
|
130
|
+
# AI-powered extraction — describe what you want in plain English
|
|
131
|
+
imperium-crawl ai-extract --url https://amazon.com/dp/B0D1XD1ZV3 --schema "extract product name, price, rating, and review count"
|
|
132
|
+
|
|
133
|
+
# Browser automation — interact with pages
|
|
134
|
+
imperium-crawl interact --url https://example.com --actions '[{"type":"click","selector":"#login"},{"type":"type","selector":"#email","text":"user@example.com"}]'
|
|
135
|
+
|
|
136
|
+
# Batch scrape multiple URLs in parallel
|
|
137
|
+
imperium-crawl batch-scrape --urls '["https://site1.com","https://site2.com","https://site3.com"]' --concurrency 3
|
|
138
|
+
|
|
139
|
+
# List batch jobs
|
|
140
|
+
imperium-crawl list-jobs
|
|
141
|
+
|
|
122
142
|
# Discover hidden APIs on any website
|
|
123
143
|
imperium-crawl discover-apis --url https://weather.com
|
|
124
144
|
|
|
@@ -127,6 +147,9 @@ imperium-crawl search --query "latest AI news" --count 5
|
|
|
127
147
|
|
|
128
148
|
# Take a screenshot
|
|
129
149
|
imperium-crawl screenshot --url https://github.com --full-page
|
|
150
|
+
|
|
151
|
+
# Interactive setup wizard
|
|
152
|
+
imperium-crawl setup
|
|
130
153
|
```
|
|
131
154
|
|
|
132
155
|
### Output Formats
|
|
@@ -151,6 +174,14 @@ imperium-crawl scrape --url https://example.com --pretty
|
|
|
151
174
|
imperium-crawl scrape --url https://example.com --output result.json
|
|
152
175
|
```
|
|
153
176
|
|
|
177
|
+
### TUI Mode
|
|
178
|
+
|
|
179
|
+
```bash
|
|
180
|
+
imperium-crawl tui
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
Interactive slash-command terminal with parameter prompts, table rendering, markdown display, and session state. Use `/save` to export results and `/again` to re-run the last command.
|
|
184
|
+
|
|
154
185
|
### Help
|
|
155
186
|
|
|
156
187
|
```bash
|
|
@@ -159,11 +190,11 @@ imperium-crawl scrape --help # Help for specific tool
|
|
|
159
190
|
imperium-crawl --version # Show version
|
|
160
191
|
```
|
|
161
192
|
|
|
162
|
-
> **No arguments** = starts as MCP server (stdio). **With subcommand** = runs as CLI tool.
|
|
193
|
+
> **No arguments** = starts as MCP server (stdio). **With subcommand** = runs as CLI tool. **`tui`** = interactive terminal.
|
|
163
194
|
|
|
164
195
|
---
|
|
165
196
|
|
|
166
|
-
##
|
|
197
|
+
## 22 Tools
|
|
167
198
|
|
|
168
199
|
### Scraping (no API key needed)
|
|
169
200
|
|
|
@@ -201,6 +232,27 @@ imperium-crawl --version # Show version
|
|
|
201
232
|
| **query_api** | Call any API endpoint directly with stealth headers. Bypass DOM rendering entirely for 10x faster data access. Use after `discover_apis` to hit endpoints directly. |
|
|
202
233
|
| **monitor_websocket** | Capture real-time WebSocket messages from any page — financial tickers, chat feeds, live dashboards. Returns connection details and message payloads. **No other MCP server does this.** |
|
|
203
234
|
|
|
235
|
+
### AI Extraction (requires LLM API key)
|
|
236
|
+
|
|
237
|
+
| Tool | What It Does |
|
|
238
|
+
|------|-------------|
|
|
239
|
+
| **ai_extract** | AI-powered data extraction — describe what you want in natural language or provide a JSON schema. Supports auto mode (LLM decides what to extract), 3 providers (Anthropic, OpenAI, MiniMax). The `extract` tool also supports `llm_fallback: true` for hybrid CSS→AI extraction. |
|
|
240
|
+
|
|
241
|
+
### Interaction (no API key needed, requires Playwright)
|
|
242
|
+
|
|
243
|
+
| Tool | What It Does |
|
|
244
|
+
|------|-------------|
|
|
245
|
+
| **interact** | Browser automation with 10 action types (click, type, scroll, wait, screenshot, evaluate, select, hover, press, navigate). Session persistence saves/restores cookies across calls — build login flows and multi-step workflows. |
|
|
246
|
+
|
|
247
|
+
### Batch Processing (no API key needed)
|
|
248
|
+
|
|
249
|
+
| Tool | What It Does |
|
|
250
|
+
|------|-------------|
|
|
251
|
+
| **batch_scrape** | Parallel URL scraping with configurable concurrency, soft failure (continues on errors), and resume support via job_id. Optional AI extraction per URL. |
|
|
252
|
+
| **list_jobs** | List all batch jobs with status, progress, and timestamps. |
|
|
253
|
+
| **job_status** | Get full results for a specific batch job including per-URL outcomes. |
|
|
254
|
+
| **delete_job** | Clean up completed or failed batch jobs. |
|
|
255
|
+
|
|
204
256
|
---
|
|
205
257
|
|
|
206
258
|
## Stealth Engine
|
|
@@ -340,7 +392,7 @@ Turn any website into an API. No documentation needed.
|
|
|
340
392
|
- **Per-domain circuit breaker** — 5 consecutive failures opens the circuit for 60s, then half-open probing with automatic recovery
|
|
341
393
|
- **URL normalization** — 11-step pipeline removes tracking params (utm_*, fbclid, gclid), sorts query params, normalizes encoding
|
|
342
394
|
- **Concurrency limiting** — per-domain request throttling via p-queue
|
|
343
|
-
- **Input validation** — all
|
|
395
|
+
- **Input validation** — all 22 tool schemas enforce strict bounds (URL length, query size, concurrency limits, body size)
|
|
344
396
|
- **HTTP transport hardening** — rate limiting (100 req/min), 1MB body limit, 5min request timeout
|
|
345
397
|
- **Proxy support** — single proxy (`PROXY_URL`) or rotating pool (`PROXY_URLS`) with http/https/socks4/socks5 support
|
|
346
398
|
- **Browser pool** — keyed by proxy URL, auto-eviction, configurable pool size
|
|
@@ -372,8 +424,14 @@ Every tool tested against production websites with real anti-bot defenses:
|
|
|
372
424
|
| 🔓 **discover_apis** | Airbnb Paris | **34 hidden APIs** — DataDome anti-bot, Google Maps key, internal APIs |
|
|
373
425
|
| ⚡ **query_api** | jsonplaceholder | Direct JSON API call with stealth headers |
|
|
374
426
|
| 📡 **monitor_websocket** | Binance BTC/USDT | **3 WebSocket connections, 23 live messages** — BTC price live |
|
|
427
|
+
| 🧠 **ai_extract** | Amazon product page | AI extracted name, price, rating, review count — natural language schema |
|
|
428
|
+
| 🖱️ **interact** | Login flow | Click → type email → type password → submit — session cookies persisted |
|
|
429
|
+
| 📦 **batch_scrape** | 10 news sites | Parallel scrape with concurrency 3, soft failure, 9/10 succeeded |
|
|
430
|
+
| 📋 **list_jobs** | — | Lists all batch jobs with status and progress |
|
|
431
|
+
| 📊 **job_status** | Batch job | Full per-URL results with timing and extracted data |
|
|
432
|
+
| 🗑️ **delete_job** | Completed job | Cleaned up job data from disk |
|
|
375
433
|
|
|
376
|
-
> 🏆 **
|
|
434
|
+
> 🏆 **22/22 tools working. 58 hidden APIs discovered. Live crypto feed captured. AI extraction. Browser automation. Zero API keys needed for scraping.**
|
|
377
435
|
|
|
378
436
|
---
|
|
379
437
|
|
|
@@ -389,6 +447,12 @@ Every tool tested against production websites with real anti-bot defenses:
|
|
|
389
447
|
| `PROXY_URLS` | No | Comma-separated proxy URLs for rotation |
|
|
390
448
|
| `BROWSER_POOL_SIZE` | No | Max pooled browser instances (default: 3) |
|
|
391
449
|
| `RESPECT_ROBOTS` | No | Respect robots.txt (default: `true`) |
|
|
450
|
+
| `LLM_API_KEY` | No | Anthropic or OpenAI API key (enables `ai_extract` tool) |
|
|
451
|
+
| `LLM_PROVIDER` | No | `anthropic`, `openai`, or `minimax` (default: `anthropic`) |
|
|
452
|
+
| `LLM_MODEL` | No | Override default LLM model |
|
|
453
|
+
| `CHROME_PROFILE_PATH` | No | Chrome user data dir for authenticated browser sessions |
|
|
454
|
+
| `NO_COLOR` | No | Disable colored output (standard convention) |
|
|
455
|
+
| `CI` | No | Auto-detected; disables TTY features (spinners, colors) |
|
|
392
456
|
|
|
393
457
|
---
|
|
394
458
|
|
|
@@ -399,7 +463,7 @@ git clone https://github.com/ceoimperiumprojects/imperium-crawl
|
|
|
399
463
|
cd imperium-crawl
|
|
400
464
|
npm install
|
|
401
465
|
npm run build
|
|
402
|
-
npm test #
|
|
466
|
+
npm test # 332 tests
|
|
403
467
|
npm start
|
|
404
468
|
```
|
|
405
469
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/batch/index.ts"],"names":[],"mappings":"AAAA,YAAY,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAC3D,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/batch/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import type { BatchJob } from "./types.js";
|
|
2
|
+
export declare class JobStore {
|
|
3
|
+
private cache;
|
|
4
|
+
private dir;
|
|
5
|
+
constructor(dir?: string);
|
|
6
|
+
private jobPath;
|
|
7
|
+
save(job: BatchJob): Promise<void>;
|
|
8
|
+
load(id: string): Promise<BatchJob | null>;
|
|
9
|
+
delete(id: string): Promise<void>;
|
|
10
|
+
list(): Promise<string[]>;
|
|
11
|
+
}
|
|
12
|
+
export declare function getJobStore(): JobStore;
|
|
13
|
+
/** Reset singleton (for testing) */
|
|
14
|
+
export declare function resetJobStore(): void;
|
|
15
|
+
//# sourceMappingURL=job-store.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"job-store.d.ts","sourceRoot":"","sources":["../../src/batch/job-store.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAE3C,qBAAa,QAAQ;IACnB,OAAO,CAAC,KAAK,CAA+B;IAC5C,OAAO,CAAC,GAAG,CAAS;gBAER,GAAG,CAAC,EAAE,MAAM;IAIxB,OAAO,CAAC,OAAO;IAMT,IAAI,CAAC,GAAG,EAAE,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC;IAWlC,IAAI,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC;IAwB1C,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IASjC,IAAI,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;CAUhC;AAMD,wBAAgB,WAAW,IAAI,QAAQ,CAKtC;AAED,oCAAoC;AACpC,wBAAgB,aAAa,IAAI,IAAI,CAEpC"}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { getJobsDir } from "../config.js";
|
|
4
|
+
export class JobStore {
|
|
5
|
+
cache = new Map();
|
|
6
|
+
dir;
|
|
7
|
+
constructor(dir) {
|
|
8
|
+
this.dir = dir ?? getJobsDir();
|
|
9
|
+
}
|
|
10
|
+
jobPath(id) {
|
|
11
|
+
// Sanitize id to prevent path traversal
|
|
12
|
+
const safe = id.replace(/[^a-zA-Z0-9_\-]/g, "_");
|
|
13
|
+
return path.join(this.dir, `${safe}.json`);
|
|
14
|
+
}
|
|
15
|
+
async save(job) {
|
|
16
|
+
const updated = { ...job, updated_at: new Date().toISOString() };
|
|
17
|
+
this.cache.set(job.id, updated);
|
|
18
|
+
await fs.mkdir(this.dir, { recursive: true });
|
|
19
|
+
const filePath = this.jobPath(job.id);
|
|
20
|
+
const tmpPath = filePath + ".tmp";
|
|
21
|
+
await fs.writeFile(tmpPath, JSON.stringify(updated, null, 2), "utf-8");
|
|
22
|
+
await fs.rename(tmpPath, filePath);
|
|
23
|
+
}
|
|
24
|
+
async load(id) {
|
|
25
|
+
if (this.cache.has(id))
|
|
26
|
+
return this.cache.get(id);
|
|
27
|
+
try {
|
|
28
|
+
const data = await fs.readFile(this.jobPath(id), "utf-8");
|
|
29
|
+
const job = JSON.parse(data);
|
|
30
|
+
this.cache.set(id, job);
|
|
31
|
+
return job;
|
|
32
|
+
}
|
|
33
|
+
catch (err) {
|
|
34
|
+
const isEnoent = err &&
|
|
35
|
+
typeof err === "object" &&
|
|
36
|
+
"code" in err &&
|
|
37
|
+
err.code === "ENOENT";
|
|
38
|
+
if (!isEnoent) {
|
|
39
|
+
console.error("[batch] Failed to load job:", err instanceof Error ? err.message : String(err));
|
|
40
|
+
}
|
|
41
|
+
return null;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
async delete(id) {
|
|
45
|
+
this.cache.delete(id);
|
|
46
|
+
try {
|
|
47
|
+
await fs.unlink(this.jobPath(id));
|
|
48
|
+
}
|
|
49
|
+
catch {
|
|
50
|
+
// Job didn't exist on disk — fine
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
async list() {
|
|
54
|
+
try {
|
|
55
|
+
const files = await fs.readdir(this.dir);
|
|
56
|
+
return files
|
|
57
|
+
.filter((f) => f.endsWith(".json") && !f.endsWith(".tmp.json"))
|
|
58
|
+
.map((f) => f.replace(/\.json$/, ""));
|
|
59
|
+
}
|
|
60
|
+
catch {
|
|
61
|
+
return [];
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
// ── Singleton ──
|
|
66
|
+
let store = null;
|
|
67
|
+
export function getJobStore() {
|
|
68
|
+
if (!store) {
|
|
69
|
+
store = new JobStore();
|
|
70
|
+
}
|
|
71
|
+
return store;
|
|
72
|
+
}
|
|
73
|
+
/** Reset singleton (for testing) */
|
|
74
|
+
export function resetJobStore() {
|
|
75
|
+
store = null;
|
|
76
|
+
}
|
|
77
|
+
//# sourceMappingURL=job-store.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"job-store.js","sourceRoot":"","sources":["../../src/batch/job-store.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAG1C,MAAM,OAAO,QAAQ;IACX,KAAK,GAAG,IAAI,GAAG,EAAoB,CAAC;IACpC,GAAG,CAAS;IAEpB,YAAY,GAAY;QACtB,IAAI,CAAC,GAAG,GAAG,GAAG,IAAI,UAAU,EAAE,CAAC;IACjC,CAAC;IAEO,OAAO,CAAC,EAAU;QACxB,wCAAwC;QACxC,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC,kBAAkB,EAAE,GAAG,CAAC,CAAC;QACjD,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,OAAO,CAAC,CAAC;IAC7C,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,GAAa;QACtB,MAAM,OAAO,GAAa,EAAE,GAAG,GAAG,EAAE,UAAU,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,CAAC;QAC3E,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,EAAE,OAAO,CAAC,CAAC;QAEhC,MAAM,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC9C,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACtC,MAAM,OAAO,GAAG,QAAQ,GAAG,MAAM,CAAC;QAClC,MAAM,EAAE,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;QACvE,MAAM,EAAE,CAAC,MAAM,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;IACrC,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,EAAU;QACnB,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;YAAE,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAE,CAAC;QAEnD,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE,OAAO,CAAC,CAAC;YAC1D,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAa,CAAC;YACzC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;YACxB,OAAO,GAAG,CAAC;QACb,CAAC;QAAC,OAAO,GAAY,EAAE,CAAC;YACtB,MAAM,QAAQ,GACZ,GAAG;gBACH,OAAO,GAAG,KAAK,QAAQ;gBACvB,MAAM,IAAI,GAAG;gBACZ,GAA6B,CAAC,IAAI,KAAK,QAAQ,CAAC;YACnD,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,OAAO,CAAC,KAAK,CACX,6BAA6B,EAC7B,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CACjD,CAAC;YACJ,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,EAAU;QACrB,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACtB,IAAI,CAAC;YACH,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,CAAC;QACpC,CAAC;QAAC,MAAM,CAAC;YACP,kCAAkC;QACpC,CAAC;IACH,CAAC;IAED,KAAK,CAAC,IAAI;QACR,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACzC,OAAO,KAAK;iBACT,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;iBAC9D,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,CAAC;QAC1C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;CACF;AAED,kBAAkB;AAElB,IAAI,KAAK,GAAoB,IAAI,CAAC;AAElC,MAAM,UAAU,WAAW;IACzB,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,KAAK,GAAG,IAAI,QAAQ,EAAE,CAAC;IACzB,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,oCAAoC;AACpC,MAAM,UAAU,aAAa;IAC3B,KAAK,GAAG,IAAI,CAAC;AACf,CAAC"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
export interface BatchJobResult {
|
|
2
|
+
url: string;
|
|
3
|
+
success: boolean;
|
|
4
|
+
content?: string;
|
|
5
|
+
data?: unknown;
|
|
6
|
+
error?: string;
|
|
7
|
+
status_code?: number;
|
|
8
|
+
duration_ms: number;
|
|
9
|
+
}
|
|
10
|
+
export interface BatchJob {
|
|
11
|
+
id: string;
|
|
12
|
+
status: "running" | "completed" | "failed";
|
|
13
|
+
urls_total: number;
|
|
14
|
+
urls_completed: number;
|
|
15
|
+
urls_failed: number;
|
|
16
|
+
results: BatchJobResult[];
|
|
17
|
+
created_at: string;
|
|
18
|
+
updated_at: string;
|
|
19
|
+
}
|
|
20
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/batch/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,SAAS,GAAG,WAAW,GAAG,QAAQ,CAAC;IAC3C,UAAU,EAAE,MAAM,CAAC;IACnB,cAAc,EAAE,MAAM,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,cAAc,EAAE,CAAC;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/batch/types.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Config file management for imperium-crawl CLI.
|
|
3
|
+
*
|
|
4
|
+
* Saves API keys to ~/.imperium-crawl/config.json so users don't
|
|
5
|
+
* need to set environment variables manually.
|
|
6
|
+
*
|
|
7
|
+
* Priority: process.env (system) > config.json
|
|
8
|
+
* applyCliConfig() fills in env vars from config only if not already set.
|
|
9
|
+
*/
|
|
10
|
+
export declare function getCliConfigPath(): string;
|
|
11
|
+
export declare function loadCliConfig(): Record<string, string>;
|
|
12
|
+
export declare function saveCliConfig(config: Record<string, string>): void;
|
|
13
|
+
/**
|
|
14
|
+
* Apply config.json values to process.env.
|
|
15
|
+
* System env vars take priority — config values are only applied
|
|
16
|
+
* when the key is not already set.
|
|
17
|
+
*
|
|
18
|
+
* Call this once at startup, before any tool initialization.
|
|
19
|
+
*/
|
|
20
|
+
export declare function applyCliConfig(): void;
|
|
21
|
+
//# sourceMappingURL=cli-config.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli-config.d.ts","sourceRoot":"","sources":["../src/cli-config.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AASH,wBAAgB,gBAAgB,IAAI,MAAM,CAEzC;AAED,wBAAgB,aAAa,IAAI,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAWtD;AAED,wBAAgB,aAAa,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,CAIlE;AAED;;;;;;GAMG;AACH,wBAAgB,cAAc,IAAI,IAAI,CAOrC"}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Config file management for imperium-crawl CLI.
|
|
3
|
+
*
|
|
4
|
+
* Saves API keys to ~/.imperium-crawl/config.json so users don't
|
|
5
|
+
* need to set environment variables manually.
|
|
6
|
+
*
|
|
7
|
+
* Priority: process.env (system) > config.json
|
|
8
|
+
* applyCliConfig() fills in env vars from config only if not already set.
|
|
9
|
+
*/
|
|
10
|
+
import path from "node:path";
|
|
11
|
+
import os from "node:os";
|
|
12
|
+
import { readFileSync, writeFileSync, mkdirSync } from "node:fs";
|
|
13
|
+
import { SKILLS_DIR_NAME } from "./constants.js";
|
|
14
|
+
const CONFIG_FILENAME = "config.json";
|
|
15
|
+
export function getCliConfigPath() {
|
|
16
|
+
return path.join(os.homedir(), SKILLS_DIR_NAME, CONFIG_FILENAME);
|
|
17
|
+
}
|
|
18
|
+
export function loadCliConfig() {
|
|
19
|
+
try {
|
|
20
|
+
const content = readFileSync(getCliConfigPath(), "utf-8");
|
|
21
|
+
const parsed = JSON.parse(content);
|
|
22
|
+
if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
|
|
23
|
+
return parsed;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
catch {
|
|
27
|
+
// File doesn't exist or invalid JSON — return empty config
|
|
28
|
+
}
|
|
29
|
+
return {};
|
|
30
|
+
}
|
|
31
|
+
export function saveCliConfig(config) {
|
|
32
|
+
const configPath = getCliConfigPath();
|
|
33
|
+
mkdirSync(path.dirname(configPath), { recursive: true });
|
|
34
|
+
writeFileSync(configPath, JSON.stringify(config, null, 2) + "\n", "utf-8");
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Apply config.json values to process.env.
|
|
38
|
+
* System env vars take priority — config values are only applied
|
|
39
|
+
* when the key is not already set.
|
|
40
|
+
*
|
|
41
|
+
* Call this once at startup, before any tool initialization.
|
|
42
|
+
*/
|
|
43
|
+
export function applyCliConfig() {
|
|
44
|
+
const config = loadCliConfig();
|
|
45
|
+
for (const [key, value] of Object.entries(config)) {
|
|
46
|
+
if (typeof value === "string" && !process.env[key]) {
|
|
47
|
+
process.env[key] = value;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
//# sourceMappingURL=cli-config.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli-config.js","sourceRoot":"","sources":["../src/cli-config.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AACjE,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AAEjD,MAAM,eAAe,GAAG,aAAa,CAAC;AAEtC,MAAM,UAAU,gBAAgB;IAC9B,OAAO,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,EAAE,eAAe,EAAE,eAAe,CAAC,CAAC;AACnE,CAAC;AAED,MAAM,UAAU,aAAa;IAC3B,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,YAAY,CAAC,gBAAgB,EAAE,EAAE,OAAO,CAAC,CAAC;QAC1D,MAAM,MAAM,GAAY,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAC5C,IAAI,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;YACnE,OAAO,MAAgC,CAAC;QAC1C,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,2DAA2D;IAC7D,CAAC;IACD,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,MAA8B;IAC1D,MAAM,UAAU,GAAG,gBAAgB,EAAE,CAAC;IACtC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACzD,aAAa,CAAC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,GAAG,IAAI,EAAE,OAAO,CAAC,CAAC;AAC7E,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,cAAc;IAC5B,MAAM,MAAM,GAAG,aAAa,EAAE,CAAC;IAC/B,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QAClD,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YACnD,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;QAC3B,CAAC;IACH,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Interactive setup wizard for imperium-crawl CLI.
|
|
3
|
+
*
|
|
4
|
+
* Usage: imperium-crawl setup
|
|
5
|
+
*
|
|
6
|
+
* Guides the user through configuring API keys and saves them to
|
|
7
|
+
* ~/.imperium-crawl/config.json for persistent use.
|
|
8
|
+
*/
|
|
9
|
+
export declare function runSetup(): Promise<void>;
|
|
10
|
+
//# sourceMappingURL=cli-onboarding.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli-onboarding.d.ts","sourceRoot":"","sources":["../src/cli-onboarding.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAeH,wBAAsB,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC,CA2H9C"}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Interactive setup wizard for imperium-crawl CLI.
|
|
3
|
+
*
|
|
4
|
+
* Usage: imperium-crawl setup
|
|
5
|
+
*
|
|
6
|
+
* Guides the user through configuring API keys and saves them to
|
|
7
|
+
* ~/.imperium-crawl/config.json for persistent use.
|
|
8
|
+
*/
|
|
9
|
+
import chalk from "chalk";
|
|
10
|
+
import { input, select, confirm } from "@inquirer/prompts";
|
|
11
|
+
import { loadCliConfig, saveCliConfig, getCliConfigPath } from "./cli-config.js";
|
|
12
|
+
const BANNER = chalk.cyan(`
|
|
13
|
+
██╗███╗ ███╗██████╗ ███████╗██████╗ ██╗██╗ ██╗███╗ ███╗
|
|
14
|
+
██║████╗ ████║██╔══██╗██╔════╝██╔══██╗██║██║ ██║████╗ ████║
|
|
15
|
+
██║██╔████╔██║██████╔╝█████╗ ██████╔╝██║██║ ██║██╔████╔██║
|
|
16
|
+
██║██║╚██╔╝██║██╔═══╝ ██╔══╝ ██╔══██╗██║██║ ██║██║╚██╔╝██║
|
|
17
|
+
██║██║ ╚═╝ ██║██║ ███████╗██║ ██║██║╚██████╔╝██║ ╚═╝ ██║
|
|
18
|
+
╚═╝╚═╝ ╚═╝╚═╝ ╚══════╝╚═╝ ╚═╝╚═╝ ╚═════╝ ╚═╝ ╚═╝
|
|
19
|
+
`);
|
|
20
|
+
export async function runSetup() {
|
|
21
|
+
console.log(BANNER);
|
|
22
|
+
console.log(chalk.bold(" API Key Setup\n"));
|
|
23
|
+
const existing = loadCliConfig();
|
|
24
|
+
const config = { ...existing };
|
|
25
|
+
// ── Brave Search ──────────────────────────────────────────────────
|
|
26
|
+
const hasBrave = !!(process.env.BRAVE_API_KEY || existing.BRAVE_API_KEY);
|
|
27
|
+
if (hasBrave) {
|
|
28
|
+
console.log(chalk.green(" ✓ BRAVE_API_KEY") +
|
|
29
|
+
chalk.dim(" — already configured (search, news_search, image_search, video_search)"));
|
|
30
|
+
}
|
|
31
|
+
else {
|
|
32
|
+
console.log(chalk.dim(" Brave Search enables 4 search tools. Free tier: https://brave.com/search/api/\n"));
|
|
33
|
+
const braveKey = await input({
|
|
34
|
+
message: "Brave Search API key (press Enter to skip):",
|
|
35
|
+
});
|
|
36
|
+
if (braveKey.trim())
|
|
37
|
+
config.BRAVE_API_KEY = braveKey.trim();
|
|
38
|
+
}
|
|
39
|
+
console.log();
|
|
40
|
+
// ── LLM Provider ─────────────────────────────────────────────────
|
|
41
|
+
const hasLLM = !!(process.env.LLM_API_KEY || existing.LLM_API_KEY);
|
|
42
|
+
if (hasLLM) {
|
|
43
|
+
const currentProvider = process.env.LLM_PROVIDER || existing.LLM_PROVIDER || "anthropic";
|
|
44
|
+
console.log(chalk.green(` ✓ LLM_API_KEY (${currentProvider})`) +
|
|
45
|
+
chalk.dim(" — already configured (ai_extract tool)"));
|
|
46
|
+
}
|
|
47
|
+
else {
|
|
48
|
+
console.log(chalk.dim(" LLM key enables the ai_extract tool — natural language data extraction.\n"));
|
|
49
|
+
const provider = await select({
|
|
50
|
+
message: "LLM provider (for ai_extract tool):",
|
|
51
|
+
choices: [
|
|
52
|
+
{
|
|
53
|
+
name: "Anthropic (Claude Haiku — default)",
|
|
54
|
+
value: "anthropic",
|
|
55
|
+
description: "Fast, affordable. Get key: https://console.anthropic.com",
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
name: "OpenAI (GPT-4o mini — default)",
|
|
59
|
+
value: "openai",
|
|
60
|
+
description: "Widely used. Get key: https://platform.openai.com",
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
name: "MiniMax (M2.5 — 200K context, reasoning)",
|
|
64
|
+
value: "minimax",
|
|
65
|
+
description: "Strong model, OpenAI-compatible API.",
|
|
66
|
+
},
|
|
67
|
+
{
|
|
68
|
+
name: "Skip for now",
|
|
69
|
+
value: "skip",
|
|
70
|
+
description: "Configure later via env vars or run setup again.",
|
|
71
|
+
},
|
|
72
|
+
],
|
|
73
|
+
});
|
|
74
|
+
if (provider !== "skip") {
|
|
75
|
+
config.LLM_PROVIDER = provider;
|
|
76
|
+
const providerLabel = provider === "anthropic" ? "Anthropic" : provider === "openai" ? "OpenAI" : "MiniMax";
|
|
77
|
+
const llmKey = await input({
|
|
78
|
+
message: `${providerLabel} API key:`,
|
|
79
|
+
});
|
|
80
|
+
if (llmKey.trim())
|
|
81
|
+
config.LLM_API_KEY = llmKey.trim();
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
console.log();
|
|
85
|
+
// ── 2Captcha ─────────────────────────────────────────────────────
|
|
86
|
+
const hasCaptcha = !!(process.env.TWOCAPTCHA_API_KEY ||
|
|
87
|
+
process.env.TWO_CAPTCHA_API_KEY ||
|
|
88
|
+
existing.TWOCAPTCHA_API_KEY);
|
|
89
|
+
if (hasCaptcha) {
|
|
90
|
+
console.log(chalk.green(" ✓ TWOCAPTCHA_API_KEY") +
|
|
91
|
+
chalk.dim(" — already configured (auto CAPTCHA solving in stealth level 3)"));
|
|
92
|
+
}
|
|
93
|
+
else {
|
|
94
|
+
const wantCaptcha = await confirm({
|
|
95
|
+
message: "Configure 2Captcha for automatic CAPTCHA solving? (optional)",
|
|
96
|
+
default: false,
|
|
97
|
+
});
|
|
98
|
+
if (wantCaptcha) {
|
|
99
|
+
const captchaKey = await input({
|
|
100
|
+
message: "2Captcha API key (https://2captcha.com):",
|
|
101
|
+
});
|
|
102
|
+
if (captchaKey.trim())
|
|
103
|
+
config.TWOCAPTCHA_API_KEY = captchaKey.trim();
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
// ── Save + Summary ────────────────────────────────────────────────
|
|
107
|
+
saveCliConfig(config);
|
|
108
|
+
console.log("\n" + chalk.bold(" ─────────────────────────────────────────"));
|
|
109
|
+
const enabledTools = [];
|
|
110
|
+
if (config.BRAVE_API_KEY || process.env.BRAVE_API_KEY) {
|
|
111
|
+
enabledTools.push("search, news_search, image_search, video_search");
|
|
112
|
+
}
|
|
113
|
+
if (config.LLM_API_KEY || process.env.LLM_API_KEY) {
|
|
114
|
+
enabledTools.push("ai_extract");
|
|
115
|
+
}
|
|
116
|
+
if (config.TWOCAPTCHA_API_KEY || process.env.TWOCAPTCHA_API_KEY) {
|
|
117
|
+
enabledTools.push("CAPTCHA auto-solve (stealth lvl 3)");
|
|
118
|
+
}
|
|
119
|
+
if (enabledTools.length > 0) {
|
|
120
|
+
console.log(chalk.green(`\n 🚀 Ready! Extra tools enabled: ${enabledTools.join(", ")}`));
|
|
121
|
+
}
|
|
122
|
+
else {
|
|
123
|
+
console.log(chalk.yellow("\n ⚠ No API keys configured.") +
|
|
124
|
+
chalk.dim(" Basic scraping tools work without keys."));
|
|
125
|
+
}
|
|
126
|
+
console.log(chalk.dim(`\n Config saved → ${getCliConfigPath()}\n`));
|
|
127
|
+
}
|
|
128
|
+
//# sourceMappingURL=cli-onboarding.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli-onboarding.js","sourceRoot":"","sources":["../src/cli-onboarding.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,mBAAmB,CAAC;AAC3D,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AAEjF,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC;;;;;;;CAOzB,CAAC,CAAC;AAEH,MAAM,CAAC,KAAK,UAAU,QAAQ;IAC5B,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IACpB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC,CAAC;IAE7C,MAAM,QAAQ,GAAG,aAAa,EAAE,CAAC;IACjC,MAAM,MAAM,GAA2B,EAAE,GAAG,QAAQ,EAAE,CAAC;IAEvD,qEAAqE;IACrE,MAAM,QAAQ,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,aAAa,IAAI,QAAQ,CAAC,aAAa,CAAC,CAAC;IACzE,IAAI,QAAQ,EAAE,CAAC;QACb,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,KAAK,CAAC,mBAAmB,CAAC;YAC9B,KAAK,CAAC,GAAG,CAAC,yEAAyE,CAAC,CACvF,CAAC;IACJ,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,mFAAmF,CAAC,CAAC,CAAC;QAC5G,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC;YAC3B,OAAO,EAAE,6CAA6C;SACvD,CAAC,CAAC;QACH,IAAI,QAAQ,CAAC,IAAI,EAAE;YAAE,MAAM,CAAC,aAAa,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC;IAC9D,CAAC;IAED,OAAO,CAAC,GAAG,EAAE,CAAC;IAEd,oEAAoE;IACpE,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,QAAQ,CAAC,WAAW,CAAC,CAAC;IACnE,IAAI,MAAM,EAAE,CAAC;QACX,MAAM,eAAe,GAAG,OAAO,CAAC,GAAG,CAAC,YAAY,IAAI,QAAQ,CAAC,YAAY,IAAI,WAAW,CAAC;QACzF,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,KAAK,CAAC,oBAAoB,eAAe,GAAG,CAAC;YACjD,KAAK,CAAC,GAAG,CAAC,yCAAyC,CAAC,CACvD,CAAC;IACJ,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,6EAA6E,CAAC,CAAC,CAAC;QACtG,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC;YAC5B,OAAO,EAAE,qCAAqC;YAC9C,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,oCAAoC;oBAC1C,KAAK,EAAE,WAAW;oBAClB,WAAW,EAAE,0DAA0D;iBACxE;gBACD;oBACE,IAAI,EAAE,gCAAgC;oBACtC,KAAK,EAAE,QAAQ;oBACf,WAAW,EAAE,mDAAmD;iBACjE;gBACD;oBACE,IAAI,EAAE,0CAA0C;oBAChD,KAAK,EAAE,SAAS;oBAChB,WAAW,EAAE,sCAAsC;iBACpD;gBACD;oBACE,IAAI,EAAE,cAAc;oBACpB,KAAK,EAAE,MAAM;oBACb,WAAW,EAAE,kDAAkD;iBAChE;aACF;SACF,CAAC,CAAC;QAEH,IAAI,QAAQ,KAAK,MAAM,EAAE,CAAC;YACxB,MAAM,CAAC,YAAY,GAAG,QAAQ,CAAC;YAC/B,MAAM,aAAa,GACjB,QAAQ,KAAK,WAAW,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC;YACxF,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC;gBACzB,OAAO,EAAE,GAAG,aAAa,WAAW;aACrC,CAAC,CAAC;YACH,IAAI,MAAM,CAAC,IAAI,EAAE;gBAAE,MAAM,CAAC,WAAW,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC;QACxD,CAAC;IACH,CAAC;IAED,OAAO,CAAC,GAAG,EAAE,CAAC;IAEd,oEAAoE;IACpE,MAAM,UAAU,GAAG,CAAC,CAAC,CACnB,OAAO,CAAC,GAAG,CAAC,kBAAkB;QAC9B,OAAO,CAAC,GAAG,CAAC,mBAAmB;QAC/B,QAAQ,CAAC,kBAAkB,CAC5B,CAAC;IACF,IAAI,UAAU,EAAE,CAAC;QACf,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,KAAK,CAAC,wBAAwB,CAAC;YACnC,KAAK,CAAC,GAAG,CAAC,iEAAiE,CAAC,CAC/E,CAAC;IACJ,CAAC;SAAM,CAAC;QACN,MAAM,WAAW,GAAG,MAAM,OAAO,CAAC;YAChC,OAAO,EAAE,8DAA8D;YACvE,OAAO,EAAE,KAAK;SACf,CAAC,CAAC;QACH,IAAI,WAAW,EAAE,CAAC;YAChB,MAAM,UAAU,GAAG,MAAM,KAAK,CAAC;gBAC7B,OAAO,EAAE,0CAA0C;aACpD,CAAC,CAAC;YACH,IAAI,UAAU,CAAC,IAAI,EAAE;gBAAE,MAAM,CAAC,kBAAkB,GAAG,UAAU,CAAC,IAAI,EAAE,CAAC;QACvE,CAAC;IACH,CAAC;IAED,qEAAqE;IACrE,aAAa,CAAC,MAAM,CAAC,CAAC;IAEtB,OAAO,CAAC,GAAG,CAAC,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,6CAA6C,CAAC,CAAC,CAAC;IAE9E,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,IAAI,MAAM,CAAC,aAAa,IAAI,OAAO,CAAC,GAAG,CAAC,aAAa,EAAE,CAAC;QACtD,YAAY,CAAC,IAAI,CAAC,iDAAiD,CAAC,CAAC;IACvE,CAAC;IACD,IAAI,MAAM,CAAC,WAAW,IAAI,OAAO,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC;QAClD,YAAY,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IAClC,CAAC;IACD,IAAI,MAAM,CAAC,kBAAkB,IAAI,OAAO,CAAC,GAAG,CAAC,kBAAkB,EAAE,CAAC;QAChE,YAAY,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;IAC1D,CAAC;IAED,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5B,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,sCAAsC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;IAC5F,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,MAAM,CAAC,+BAA+B,CAAC;YAC3C,KAAK,CAAC,GAAG,CAAC,0CAA0C,CAAC,CACxD,CAAC;IACJ,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,sBAAsB,gBAAgB,EAAE,IAAI,CAAC,CAAC,CAAC;AACvE,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Full TUI (Terminal User Interface) for imperium-crawl — v3
|
|
3
|
+
*
|
|
4
|
+
* Slash-command-driven UX (Claude Code aesthetic).
|
|
5
|
+
* readline for main prompt, @clack/prompts for param collection only.
|
|
6
|
+
*
|
|
7
|
+
* Activated when: no CLI args AND process.stdout.isTTY.
|
|
8
|
+
* Non-TTY mode (pipe/CI/agents) is unaffected — MCP server runs as before.
|
|
9
|
+
* CLI subcommands (scrape, crawl, etc.) are unaffected — they bypass this.
|
|
10
|
+
*/
|
|
11
|
+
export declare function runTui(): Promise<void>;
|
|
12
|
+
//# sourceMappingURL=cli-tui.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli-tui.d.ts","sourceRoot":"","sources":["../src/cli-tui.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAgjCH,wBAAsB,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC,CAsB5C"}
|