imperium-crawl 1.1.9 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/README.md +76 -12
  2. package/dist/batch/index.d.ts +3 -0
  3. package/dist/batch/index.d.ts.map +1 -0
  4. package/dist/batch/index.js +2 -0
  5. package/dist/batch/index.js.map +1 -0
  6. package/dist/batch/job-store.d.ts +15 -0
  7. package/dist/batch/job-store.d.ts.map +1 -0
  8. package/dist/batch/job-store.js +77 -0
  9. package/dist/batch/job-store.js.map +1 -0
  10. package/dist/batch/types.d.ts +20 -0
  11. package/dist/batch/types.d.ts.map +1 -0
  12. package/dist/batch/types.js +2 -0
  13. package/dist/batch/types.js.map +1 -0
  14. package/dist/cli-config.d.ts +21 -0
  15. package/dist/cli-config.d.ts.map +1 -0
  16. package/dist/cli-config.js +51 -0
  17. package/dist/cli-config.js.map +1 -0
  18. package/dist/cli-onboarding.d.ts +10 -0
  19. package/dist/cli-onboarding.d.ts.map +1 -0
  20. package/dist/cli-onboarding.js +128 -0
  21. package/dist/cli-onboarding.js.map +1 -0
  22. package/dist/cli-tui.d.ts +12 -0
  23. package/dist/cli-tui.d.ts.map +1 -0
  24. package/dist/cli-tui.js +945 -0
  25. package/dist/cli-tui.js.map +1 -0
  26. package/dist/cli-ui.d.ts +26 -0
  27. package/dist/cli-ui.d.ts.map +1 -0
  28. package/dist/cli-ui.js +58 -0
  29. package/dist/cli-ui.js.map +1 -0
  30. package/dist/cli.d.ts +8 -1
  31. package/dist/cli.d.ts.map +1 -1
  32. package/dist/cli.js +161 -35
  33. package/dist/cli.js.map +1 -1
  34. package/dist/config.d.ts +6 -0
  35. package/dist/config.d.ts.map +1 -1
  36. package/dist/config.js +20 -1
  37. package/dist/config.js.map +1 -1
  38. package/dist/constants.d.ts +3 -2
  39. package/dist/constants.d.ts.map +1 -1
  40. package/dist/constants.js +3 -2
  41. package/dist/constants.js.map +1 -1
  42. package/dist/formatters.d.ts +14 -0
  43. package/dist/formatters.d.ts.map +1 -1
  44. package/dist/formatters.js +10 -0
  45. package/dist/formatters.js.map +1 -1
  46. package/dist/index.js +7 -0
  47. package/dist/index.js.map +1 -1
  48. package/dist/llm/extractor.d.ts +18 -0
  49. package/dist/llm/extractor.d.ts.map +1 -0
  50. package/dist/llm/extractor.js +104 -0
  51. package/dist/llm/extractor.js.map +1 -0
  52. package/dist/llm/index.d.ts +22 -0
  53. package/dist/llm/index.d.ts.map +1 -0
  54. package/dist/llm/index.js +36 -0
  55. package/dist/llm/index.js.map +1 -0
  56. package/dist/llm/providers/anthropic.d.ts +8 -0
  57. package/dist/llm/providers/anthropic.d.ts.map +1 -0
  58. package/dist/llm/providers/anthropic.js +45 -0
  59. package/dist/llm/providers/anthropic.js.map +1 -0
  60. package/dist/llm/providers/minimax.d.ts +17 -0
  61. package/dist/llm/providers/minimax.d.ts.map +1 -0
  62. package/dist/llm/providers/minimax.js +20 -0
  63. package/dist/llm/providers/minimax.js.map +1 -0
  64. package/dist/llm/providers/openai.d.ts +9 -0
  65. package/dist/llm/providers/openai.d.ts.map +1 -0
  66. package/dist/llm/providers/openai.js +38 -0
  67. package/dist/llm/providers/openai.js.map +1 -0
  68. package/dist/sessions/index.d.ts +3 -0
  69. package/dist/sessions/index.d.ts.map +1 -0
  70. package/dist/sessions/index.js +2 -0
  71. package/dist/sessions/index.js.map +1 -0
  72. package/dist/sessions/manager.d.ts +15 -0
  73. package/dist/sessions/manager.d.ts.map +1 -0
  74. package/dist/sessions/manager.js +85 -0
  75. package/dist/sessions/manager.js.map +1 -0
  76. package/dist/sessions/types.d.ts +18 -0
  77. package/dist/sessions/types.d.ts.map +1 -0
  78. package/dist/sessions/types.js +2 -0
  79. package/dist/sessions/types.js.map +1 -0
  80. package/dist/tools/ai-extract.d.ts +33 -0
  81. package/dist/tools/ai-extract.d.ts.map +1 -0
  82. package/dist/tools/ai-extract.js +96 -0
  83. package/dist/tools/ai-extract.js.map +1 -0
  84. package/dist/tools/batch-scrape.d.ts +37 -0
  85. package/dist/tools/batch-scrape.d.ts.map +1 -0
  86. package/dist/tools/batch-scrape.js +140 -0
  87. package/dist/tools/batch-scrape.js.map +1 -0
  88. package/dist/tools/create-skill.d.ts +2 -2
  89. package/dist/tools/delete-job.d.ts +18 -0
  90. package/dist/tools/delete-job.d.ts.map +1 -0
  91. package/dist/tools/delete-job.js +31 -0
  92. package/dist/tools/delete-job.js.map +1 -0
  93. package/dist/tools/extract.d.ts +4 -1
  94. package/dist/tools/extract.d.ts.map +1 -1
  95. package/dist/tools/extract.js +49 -1
  96. package/dist/tools/extract.js.map +1 -1
  97. package/dist/tools/index.d.ts.map +1 -1
  98. package/dist/tools/index.js +15 -0
  99. package/dist/tools/index.js.map +1 -1
  100. package/dist/tools/interact.d.ts +96 -0
  101. package/dist/tools/interact.d.ts.map +1 -0
  102. package/dist/tools/interact.js +254 -0
  103. package/dist/tools/interact.js.map +1 -0
  104. package/dist/tools/job-status.d.ts +18 -0
  105. package/dist/tools/job-status.d.ts.map +1 -0
  106. package/dist/tools/job-status.js +42 -0
  107. package/dist/tools/job-status.js.map +1 -0
  108. package/dist/tools/list-jobs.d.ts +12 -0
  109. package/dist/tools/list-jobs.d.ts.map +1 -0
  110. package/dist/tools/list-jobs.js +50 -0
  111. package/dist/tools/list-jobs.js.map +1 -0
  112. package/dist/tools/manifest.d.ts +19 -0
  113. package/dist/tools/manifest.d.ts.map +1 -0
  114. package/dist/tools/manifest.js +110 -0
  115. package/dist/tools/manifest.js.map +1 -0
  116. package/dist/tools/readability.d.ts +2 -2
  117. package/dist/tools/scrape.d.ts +4 -4
  118. package/package.json +14 -2
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # imperium-crawl
2
2
 
3
- The most powerful open-source MCP server for web scraping, crawling, and data extraction. **16 tools. Zero API keys required for scraping. One `npx` command to install.**
3
+ The most powerful open-source MCP server for web scraping, crawling, and data extraction. **22 tools. Zero API keys required for scraping. One `npx` command to install.**
4
4
 
5
5
  While others charge $19+/month for basic scraping, imperium-crawl gives you **more features for free** — including capabilities that no other MCP server offers at any price.
6
6
 
@@ -24,9 +24,12 @@ While others charge $19+/month for basic scraping, imperium-crawl gives you **mo
24
24
  | Circuit breaker + jitter backoff | **Yes** | No | No | No | No |
25
25
  | URL normalization (11 steps) | **Yes** | No | No | No | No |
26
26
  | Adaptive learning (self-improving) | **Yes** | No | No | No | No |
27
+ | AI-powered data extraction | **Yes** | No | No | No | No |
28
+ | Browser automation + sessions | **Yes** | No | No | No | No |
29
+ | Batch processing with resume | **Yes** | No | No | No | No |
27
30
  | Self-hosted | **Yes** | No | N/A | Yes | No |
28
31
  | Requires external service | **No** | Yes | No | No | Yes |
29
- | Total tools | **16** | 5 | 2 | 2 | 4 |
32
+ | Total tools | **22** | 5 | 2 | 2 | 4 |
30
33
 
31
34
  > **TLDR:** More tools, more features, zero cost, no external dependencies. Self-hosted, open-source, and it runs on your machine.
32
35
 
@@ -56,6 +59,8 @@ Add to your MCP client config (Claude Code, Cursor, VS Code, Windsurf, or any MC
56
59
  "env": {
57
60
  "BRAVE_API_KEY": "your-brave-api-key",
58
61
  "TWOCAPTCHA_API_KEY": "your-2captcha-api-key",
62
+ "LLM_API_KEY": "your-api-key",
63
+ "LLM_PROVIDER": "anthropic",
59
64
  "PROXY_URL": "http://user:pass@proxy:8080",
60
65
  "PROXY_URLS": "http://proxy1:8080,socks5://proxy2:1080"
61
66
  }
@@ -64,12 +69,14 @@ Add to your MCP client config (Claude Code, Cursor, VS Code, Windsurf, or any MC
64
69
  }
65
70
  ```
66
71
 
67
- > **Works out of the box with zero API keys** — 12 tools are fully functional without any configuration. To unlock full power, add 2 optional API keys:
72
+ > **Works out of the box with zero API keys** — 16 tools are fully functional without any configuration (6 scraping + 3 skills + 3 API discovery + 4 batch). To unlock full power, add optional API keys:
68
73
  >
69
74
  > | Key | What it unlocks | Where to get it |
70
75
  > |-----|----------------|-----------------|
71
76
  > | `BRAVE_API_KEY` | 4 search tools (web, news, image, video) | [brave.com/search/api](https://brave.com/search/api/) (free tier available) |
72
77
  > | `TWOCAPTCHA_API_KEY` | Auto CAPTCHA solving (reCAPTCHA v2/v3, hCaptcha, Turnstile) | [2captcha.com](https://2captcha.com/) |
78
+ > | `LLM_API_KEY` | AI-powered data extraction (`ai_extract` tool) | Anthropic or OpenAI API key |
79
+ > | `CHROME_PROFILE_PATH` | Authenticated browser sessions (use your Chrome cookies) | Path to Chrome user data dir |
73
80
  > | `PROXY_URL` | Route all requests through a proxy (http/https/socks4/socks5) | Any proxy provider |
74
81
 
75
82
  ### Enable full stealth (Level 3 — headless browser)
@@ -81,20 +88,21 @@ npx playwright install chromium
81
88
 
82
89
  ### AI Agent Guide (SKILL.md)
83
90
 
84
- imperium-crawl ships with [`SKILL.md`](./SKILL.md) — a structured guide that teaches AI agents (Claude, GPT, etc.) how to use all 16 tools effectively. It includes 6 proven workflows, decision trees, error recovery strategies, and advanced patterns like manual skill refinement.
91
+ imperium-crawl ships with [`SKILL.md`](./SKILL.md) — a structured guide that teaches AI agents (Claude, GPT, etc.) how to use all 22 tools effectively. It includes 9 proven workflows, decision trees, error recovery strategies, and advanced patterns like manual skill refinement.
85
92
 
86
93
  **Without SKILL.md**, agents can call tools but won't know which tool to try first, when to fallback, or how to chain tools together optimally.
87
94
 
88
95
  **With SKILL.md**, agents follow battle-tested workflows — readability → scrape → extract fallback chains, auto-detect → manual refinement for skills, search → select → deep-scrape for research, and more.
89
96
 
90
- **Two ways to connect SKILL.md to any agent:**
97
+ **Three ways to connect SKILL.md to any agent:**
91
98
 
92
99
  | Method | Setup | Works with |
93
100
  |--------|-------|-----------|
94
101
  | **MCP + SKILL.md** | Add imperium-crawl as MCP server + SKILL.md in agent context | Claude Code, Cursor, Windsurf, any MCP client |
95
102
  | **CLI + SKILL.md** | `npm i -g imperium-crawl` + SKILL.md in agent context | **Any agent with bash access** — OpenClaw, ChatGPT, GPT agents, custom agents, anything |
103
+ | **TUI mode** | `imperium-crawl tui` — interactive slash-command terminal | Direct human use, demos, debugging |
96
104
 
97
- The CLI approach is universal — any agent that can run shell commands can use all 16 tools. No MCP required.
105
+ The CLI approach is universal — any agent that can run shell commands can use all 22 tools. No MCP required.
98
106
 
99
107
  | AI Agent | How to add SKILL.md |
100
108
  |----------|-------------------|
@@ -107,7 +115,7 @@ The CLI approach is universal — any agent that can run shell commands can use
107
115
 
108
116
  ## CLI Mode
109
117
 
110
- imperium-crawl works as both an **MCP server** and a **standalone CLI tool**. All 16 tools are available as subcommands:
118
+ imperium-crawl works as both an **MCP server** and a **standalone CLI tool**. All 22 tools are available as subcommands:
111
119
 
112
120
  ```bash
113
121
  # Scrape a website to markdown
@@ -119,6 +127,18 @@ imperium-crawl crawl --url https://blog.cloudflare.com --max-depth 2 --max-pages
119
127
  # Extract structured data with CSS selectors
120
128
  imperium-crawl extract --url https://news.ycombinator.com --selectors '{"title":".titleline a","score":".score"}' --items-selector ".athing"
121
129
 
130
+ # AI-powered extraction — describe what you want in plain English
131
+ imperium-crawl ai-extract --url https://amazon.com/dp/B0D1XD1ZV3 --schema "extract product name, price, rating, and review count"
132
+
133
+ # Browser automation — interact with pages
134
+ imperium-crawl interact --url https://example.com --actions '[{"type":"click","selector":"#login"},{"type":"type","selector":"#email","text":"user@example.com"}]'
135
+
136
+ # Batch scrape multiple URLs in parallel
137
+ imperium-crawl batch-scrape --urls '["https://site1.com","https://site2.com","https://site3.com"]' --concurrency 3
138
+
139
+ # List batch jobs
140
+ imperium-crawl list-jobs
141
+
122
142
  # Discover hidden APIs on any website
123
143
  imperium-crawl discover-apis --url https://weather.com
124
144
 
@@ -127,6 +147,9 @@ imperium-crawl search --query "latest AI news" --count 5
127
147
 
128
148
  # Take a screenshot
129
149
  imperium-crawl screenshot --url https://github.com --full-page
150
+
151
+ # Interactive setup wizard
152
+ imperium-crawl setup
130
153
  ```
131
154
 
132
155
  ### Output Formats
@@ -151,6 +174,14 @@ imperium-crawl scrape --url https://example.com --pretty
151
174
  imperium-crawl scrape --url https://example.com --output result.json
152
175
  ```
153
176
 
177
+ ### TUI Mode
178
+
179
+ ```bash
180
+ imperium-crawl tui
181
+ ```
182
+
183
+ Interactive slash-command terminal with parameter prompts, table rendering, markdown display, and session state. Use `/save` to export results and `/again` to re-run the last command.
184
+
154
185
  ### Help
155
186
 
156
187
  ```bash
@@ -159,11 +190,11 @@ imperium-crawl scrape --help # Help for specific tool
159
190
  imperium-crawl --version # Show version
160
191
  ```
161
192
 
162
- > **No arguments** = starts as MCP server (stdio). **With subcommand** = runs as CLI tool.
193
+ > **No arguments** = starts as MCP server (stdio). **With subcommand** = runs as CLI tool. **`tui`** = interactive terminal.
163
194
 
164
195
  ---
165
196
 
166
- ## 16 Tools
197
+ ## 22 Tools
167
198
 
168
199
  ### Scraping (no API key needed)
169
200
 
@@ -201,6 +232,27 @@ imperium-crawl --version # Show version
201
232
  | **query_api** | Call any API endpoint directly with stealth headers. Bypass DOM rendering entirely for 10x faster data access. Use after `discover_apis` to hit endpoints directly. |
202
233
  | **monitor_websocket** | Capture real-time WebSocket messages from any page — financial tickers, chat feeds, live dashboards. Returns connection details and message payloads. **No other MCP server does this.** |
203
234
 
235
+ ### AI Extraction (requires LLM API key)
236
+
237
+ | Tool | What It Does |
238
+ |------|-------------|
239
+ | **ai_extract** | AI-powered data extraction — describe what you want in natural language or provide a JSON schema. Supports auto mode (LLM decides what to extract), 3 providers (Anthropic, OpenAI, MiniMax). The `extract` tool also supports `llm_fallback: true` for hybrid CSS→AI extraction. |
240
+
241
+ ### Interaction (no API key needed, requires Playwright)
242
+
243
+ | Tool | What It Does |
244
+ |------|-------------|
245
+ | **interact** | Browser automation with 10 action types (click, type, scroll, wait, screenshot, evaluate, select, hover, press, navigate). Session persistence saves/restores cookies across calls — build login flows and multi-step workflows. |
246
+
247
+ ### Batch Processing (no API key needed)
248
+
249
+ | Tool | What It Does |
250
+ |------|-------------|
251
+ | **batch_scrape** | Parallel URL scraping with configurable concurrency, soft failure (continues on errors), and resume support via job_id. Optional AI extraction per URL. |
252
+ | **list_jobs** | List all batch jobs with status, progress, and timestamps. |
253
+ | **job_status** | Get full results for a specific batch job including per-URL outcomes. |
254
+ | **delete_job** | Clean up completed or failed batch jobs. |
255
+
204
256
  ---
205
257
 
206
258
  ## Stealth Engine
@@ -340,7 +392,7 @@ Turn any website into an API. No documentation needed.
340
392
  - **Per-domain circuit breaker** — 5 consecutive failures opens the circuit for 60s, then half-open probing with automatic recovery
341
393
  - **URL normalization** — 11-step pipeline removes tracking params (utm_*, fbclid, gclid), sorts query params, normalizes encoding
342
394
  - **Concurrency limiting** — per-domain request throttling via p-queue
343
- - **Input validation** — all 16 tool schemas enforce strict bounds (URL length, query size, concurrency limits, body size)
395
+ - **Input validation** — all 22 tool schemas enforce strict bounds (URL length, query size, concurrency limits, body size)
344
396
  - **HTTP transport hardening** — rate limiting (100 req/min), 1MB body limit, 5min request timeout
345
397
  - **Proxy support** — single proxy (`PROXY_URL`) or rotating pool (`PROXY_URLS`) with http/https/socks4/socks5 support
346
398
  - **Browser pool** — keyed by proxy URL, auto-eviction, configurable pool size
@@ -372,8 +424,14 @@ Every tool tested against production websites with real anti-bot defenses:
372
424
  | 🔓 **discover_apis** | Airbnb Paris | **34 hidden APIs** — DataDome anti-bot, Google Maps key, internal APIs |
373
425
  | ⚡ **query_api** | jsonplaceholder | Direct JSON API call with stealth headers |
374
426
  | 📡 **monitor_websocket** | Binance BTC/USDT | **3 WebSocket connections, 23 live messages** — BTC price live |
427
+ | 🧠 **ai_extract** | Amazon product page | AI extracted name, price, rating, review count — natural language schema |
428
+ | 🖱️ **interact** | Login flow | Click → type email → type password → submit — session cookies persisted |
429
+ | 📦 **batch_scrape** | 10 news sites | Parallel scrape with concurrency 3, soft failure, 9/10 succeeded |
430
+ | 📋 **list_jobs** | — | Lists all batch jobs with status and progress |
431
+ | 📊 **job_status** | Batch job | Full per-URL results with timing and extracted data |
432
+ | 🗑️ **delete_job** | Completed job | Cleaned up job data from disk |
375
433
 
376
- > 🏆 **16/16 tools working. 58 hidden APIs discovered. Live crypto feed captured. Zero API keys needed for scraping.**
434
+ > 🏆 **22/22 tools working. 58 hidden APIs discovered. Live crypto feed captured. AI extraction. Browser automation. Zero API keys needed for scraping.**
377
435
 
378
436
  ---
379
437
 
@@ -389,6 +447,12 @@ Every tool tested against production websites with real anti-bot defenses:
389
447
  | `PROXY_URLS` | No | Comma-separated proxy URLs for rotation |
390
448
  | `BROWSER_POOL_SIZE` | No | Max pooled browser instances (default: 3) |
391
449
  | `RESPECT_ROBOTS` | No | Respect robots.txt (default: `true`) |
450
+ | `LLM_API_KEY` | No | Anthropic or OpenAI API key (enables `ai_extract` tool) |
451
+ | `LLM_PROVIDER` | No | `anthropic`, `openai`, or `minimax` (default: `anthropic`) |
452
+ | `LLM_MODEL` | No | Override default LLM model |
453
+ | `CHROME_PROFILE_PATH` | No | Chrome user data dir for authenticated browser sessions |
454
+ | `NO_COLOR` | No | Disable colored output (standard convention) |
455
+ | `CI` | No | Auto-detected; disables TTY features (spinners, colors) |
392
456
 
393
457
  ---
394
458
 
@@ -399,7 +463,7 @@ git clone https://github.com/ceoimperiumprojects/imperium-crawl
399
463
  cd imperium-crawl
400
464
  npm install
401
465
  npm run build
402
- npm test # 285 tests
466
+ npm test # 332 tests
403
467
  npm start
404
468
  ```
405
469
 
@@ -0,0 +1,3 @@
1
+ export type { BatchJob, BatchJobResult } from "./types.js";
2
+ export { JobStore, getJobStore, resetJobStore } from "./job-store.js";
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/batch/index.ts"],"names":[],"mappings":"AAAA,YAAY,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAC3D,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC"}
@@ -0,0 +1,2 @@
1
+ export { JobStore, getJobStore, resetJobStore } from "./job-store.js";
2
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/batch/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC"}
@@ -0,0 +1,15 @@
1
+ import type { BatchJob } from "./types.js";
2
+ export declare class JobStore {
3
+ private cache;
4
+ private dir;
5
+ constructor(dir?: string);
6
+ private jobPath;
7
+ save(job: BatchJob): Promise<void>;
8
+ load(id: string): Promise<BatchJob | null>;
9
+ delete(id: string): Promise<void>;
10
+ list(): Promise<string[]>;
11
+ }
12
+ export declare function getJobStore(): JobStore;
13
+ /** Reset singleton (for testing) */
14
+ export declare function resetJobStore(): void;
15
+ //# sourceMappingURL=job-store.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"job-store.d.ts","sourceRoot":"","sources":["../../src/batch/job-store.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAE3C,qBAAa,QAAQ;IACnB,OAAO,CAAC,KAAK,CAA+B;IAC5C,OAAO,CAAC,GAAG,CAAS;gBAER,GAAG,CAAC,EAAE,MAAM;IAIxB,OAAO,CAAC,OAAO;IAMT,IAAI,CAAC,GAAG,EAAE,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC;IAWlC,IAAI,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC;IAwB1C,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IASjC,IAAI,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;CAUhC;AAMD,wBAAgB,WAAW,IAAI,QAAQ,CAKtC;AAED,oCAAoC;AACpC,wBAAgB,aAAa,IAAI,IAAI,CAEpC"}
@@ -0,0 +1,77 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { getJobsDir } from "../config.js";
4
+ export class JobStore {
5
+ cache = new Map();
6
+ dir;
7
+ constructor(dir) {
8
+ this.dir = dir ?? getJobsDir();
9
+ }
10
+ jobPath(id) {
11
+ // Sanitize id to prevent path traversal
12
+ const safe = id.replace(/[^a-zA-Z0-9_\-]/g, "_");
13
+ return path.join(this.dir, `${safe}.json`);
14
+ }
15
+ async save(job) {
16
+ const updated = { ...job, updated_at: new Date().toISOString() };
17
+ this.cache.set(job.id, updated);
18
+ await fs.mkdir(this.dir, { recursive: true });
19
+ const filePath = this.jobPath(job.id);
20
+ const tmpPath = filePath + ".tmp";
21
+ await fs.writeFile(tmpPath, JSON.stringify(updated, null, 2), "utf-8");
22
+ await fs.rename(tmpPath, filePath);
23
+ }
24
+ async load(id) {
25
+ if (this.cache.has(id))
26
+ return this.cache.get(id);
27
+ try {
28
+ const data = await fs.readFile(this.jobPath(id), "utf-8");
29
+ const job = JSON.parse(data);
30
+ this.cache.set(id, job);
31
+ return job;
32
+ }
33
+ catch (err) {
34
+ const isEnoent = err &&
35
+ typeof err === "object" &&
36
+ "code" in err &&
37
+ err.code === "ENOENT";
38
+ if (!isEnoent) {
39
+ console.error("[batch] Failed to load job:", err instanceof Error ? err.message : String(err));
40
+ }
41
+ return null;
42
+ }
43
+ }
44
+ async delete(id) {
45
+ this.cache.delete(id);
46
+ try {
47
+ await fs.unlink(this.jobPath(id));
48
+ }
49
+ catch {
50
+ // Job didn't exist on disk — fine
51
+ }
52
+ }
53
+ async list() {
54
+ try {
55
+ const files = await fs.readdir(this.dir);
56
+ return files
57
+ .filter((f) => f.endsWith(".json") && !f.endsWith(".tmp.json"))
58
+ .map((f) => f.replace(/\.json$/, ""));
59
+ }
60
+ catch {
61
+ return [];
62
+ }
63
+ }
64
+ }
65
+ // ── Singleton ──
66
+ let store = null;
67
+ export function getJobStore() {
68
+ if (!store) {
69
+ store = new JobStore();
70
+ }
71
+ return store;
72
+ }
73
+ /** Reset singleton (for testing) */
74
+ export function resetJobStore() {
75
+ store = null;
76
+ }
77
+ //# sourceMappingURL=job-store.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"job-store.js","sourceRoot":"","sources":["../../src/batch/job-store.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAG1C,MAAM,OAAO,QAAQ;IACX,KAAK,GAAG,IAAI,GAAG,EAAoB,CAAC;IACpC,GAAG,CAAS;IAEpB,YAAY,GAAY;QACtB,IAAI,CAAC,GAAG,GAAG,GAAG,IAAI,UAAU,EAAE,CAAC;IACjC,CAAC;IAEO,OAAO,CAAC,EAAU;QACxB,wCAAwC;QACxC,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC,kBAAkB,EAAE,GAAG,CAAC,CAAC;QACjD,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,OAAO,CAAC,CAAC;IAC7C,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,GAAa;QACtB,MAAM,OAAO,GAAa,EAAE,GAAG,GAAG,EAAE,UAAU,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,CAAC;QAC3E,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,EAAE,OAAO,CAAC,CAAC;QAEhC,MAAM,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC9C,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACtC,MAAM,OAAO,GAAG,QAAQ,GAAG,MAAM,CAAC;QAClC,MAAM,EAAE,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;QACvE,MAAM,EAAE,CAAC,MAAM,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;IACrC,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,EAAU;QACnB,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;YAAE,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAE,CAAC;QAEnD,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE,OAAO,CAAC,CAAC;YAC1D,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAa,CAAC;YACzC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;YACxB,OAAO,GAAG,CAAC;QACb,CAAC;QAAC,OAAO,GAAY,EAAE,CAAC;YACtB,MAAM,QAAQ,GACZ,GAAG;gBACH,OAAO,GAAG,KAAK,QAAQ;gBACvB,MAAM,IAAI,GAAG;gBACZ,GAA6B,CAAC,IAAI,KAAK,QAAQ,CAAC;YACnD,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,OAAO,CAAC,KAAK,CACX,6BAA6B,EAC7B,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CACjD,CAAC;YACJ,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,EAAU;QACrB,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACtB,IAAI,CAAC;YACH,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,CAAC;QACpC,CAAC;QAAC,MAAM,CAAC;YACP,kCAAkC;QACpC,CAAC;IACH,CAAC;IAED,KAAK,CAAC,IAAI;QACR,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACzC,OAAO,KAAK;iBACT,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;iBAC9D,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,CAAC;QAC1C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;CACF;AAED,kBAAkB;AAElB,IAAI,KAAK,GAAoB,IAAI,CAAC;AAElC,MAAM,UAAU,WAAW;IACzB,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,KAAK,GAAG,IAAI,QAAQ,EAAE,CAAC;IACzB,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,oCAAoC;AACpC,MAAM,UAAU,aAAa;IAC3B,KAAK,GAAG,IAAI,CAAC;AACf,CAAC"}
@@ -0,0 +1,20 @@
1
+ export interface BatchJobResult {
2
+ url: string;
3
+ success: boolean;
4
+ content?: string;
5
+ data?: unknown;
6
+ error?: string;
7
+ status_code?: number;
8
+ duration_ms: number;
9
+ }
10
+ export interface BatchJob {
11
+ id: string;
12
+ status: "running" | "completed" | "failed";
13
+ urls_total: number;
14
+ urls_completed: number;
15
+ urls_failed: number;
16
+ results: BatchJobResult[];
17
+ created_at: string;
18
+ updated_at: string;
19
+ }
20
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/batch/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,SAAS,GAAG,WAAW,GAAG,QAAQ,CAAC;IAC3C,UAAU,EAAE,MAAM,CAAC;IACnB,cAAc,EAAE,MAAM,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,cAAc,EAAE,CAAC;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/batch/types.ts"],"names":[],"mappings":""}
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Config file management for imperium-crawl CLI.
3
+ *
4
+ * Saves API keys to ~/.imperium-crawl/config.json so users don't
5
+ * need to set environment variables manually.
6
+ *
7
+ * Priority: process.env (system) > config.json
8
+ * applyCliConfig() fills in env vars from config only if not already set.
9
+ */
10
+ export declare function getCliConfigPath(): string;
11
+ export declare function loadCliConfig(): Record<string, string>;
12
+ export declare function saveCliConfig(config: Record<string, string>): void;
13
+ /**
14
+ * Apply config.json values to process.env.
15
+ * System env vars take priority — config values are only applied
16
+ * when the key is not already set.
17
+ *
18
+ * Call this once at startup, before any tool initialization.
19
+ */
20
+ export declare function applyCliConfig(): void;
21
+ //# sourceMappingURL=cli-config.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli-config.d.ts","sourceRoot":"","sources":["../src/cli-config.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AASH,wBAAgB,gBAAgB,IAAI,MAAM,CAEzC;AAED,wBAAgB,aAAa,IAAI,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAWtD;AAED,wBAAgB,aAAa,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,CAIlE;AAED;;;;;;GAMG;AACH,wBAAgB,cAAc,IAAI,IAAI,CAOrC"}
@@ -0,0 +1,51 @@
1
+ /**
2
+ * Config file management for imperium-crawl CLI.
3
+ *
4
+ * Saves API keys to ~/.imperium-crawl/config.json so users don't
5
+ * need to set environment variables manually.
6
+ *
7
+ * Priority: process.env (system) > config.json
8
+ * applyCliConfig() fills in env vars from config only if not already set.
9
+ */
10
+ import path from "node:path";
11
+ import os from "node:os";
12
+ import { readFileSync, writeFileSync, mkdirSync } from "node:fs";
13
+ import { SKILLS_DIR_NAME } from "./constants.js";
14
+ const CONFIG_FILENAME = "config.json";
15
+ export function getCliConfigPath() {
16
+ return path.join(os.homedir(), SKILLS_DIR_NAME, CONFIG_FILENAME);
17
+ }
18
+ export function loadCliConfig() {
19
+ try {
20
+ const content = readFileSync(getCliConfigPath(), "utf-8");
21
+ const parsed = JSON.parse(content);
22
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
23
+ return parsed;
24
+ }
25
+ }
26
+ catch {
27
+ // File doesn't exist or invalid JSON — return empty config
28
+ }
29
+ return {};
30
+ }
31
+ export function saveCliConfig(config) {
32
+ const configPath = getCliConfigPath();
33
+ mkdirSync(path.dirname(configPath), { recursive: true });
34
+ writeFileSync(configPath, JSON.stringify(config, null, 2) + "\n", "utf-8");
35
+ }
36
+ /**
37
+ * Apply config.json values to process.env.
38
+ * System env vars take priority — config values are only applied
39
+ * when the key is not already set.
40
+ *
41
+ * Call this once at startup, before any tool initialization.
42
+ */
43
+ export function applyCliConfig() {
44
+ const config = loadCliConfig();
45
+ for (const [key, value] of Object.entries(config)) {
46
+ if (typeof value === "string" && !process.env[key]) {
47
+ process.env[key] = value;
48
+ }
49
+ }
50
+ }
51
+ //# sourceMappingURL=cli-config.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli-config.js","sourceRoot":"","sources":["../src/cli-config.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AACjE,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AAEjD,MAAM,eAAe,GAAG,aAAa,CAAC;AAEtC,MAAM,UAAU,gBAAgB;IAC9B,OAAO,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,EAAE,eAAe,EAAE,eAAe,CAAC,CAAC;AACnE,CAAC;AAED,MAAM,UAAU,aAAa;IAC3B,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,YAAY,CAAC,gBAAgB,EAAE,EAAE,OAAO,CAAC,CAAC;QAC1D,MAAM,MAAM,GAAY,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAC5C,IAAI,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;YACnE,OAAO,MAAgC,CAAC;QAC1C,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,2DAA2D;IAC7D,CAAC;IACD,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,MAA8B;IAC1D,MAAM,UAAU,GAAG,gBAAgB,EAAE,CAAC;IACtC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACzD,aAAa,CAAC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,GAAG,IAAI,EAAE,OAAO,CAAC,CAAC;AAC7E,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,cAAc;IAC5B,MAAM,MAAM,GAAG,aAAa,EAAE,CAAC;IAC/B,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QAClD,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YACnD,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;QAC3B,CAAC;IACH,CAAC;AACH,CAAC"}
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Interactive setup wizard for imperium-crawl CLI.
3
+ *
4
+ * Usage: imperium-crawl setup
5
+ *
6
+ * Guides the user through configuring API keys and saves them to
7
+ * ~/.imperium-crawl/config.json for persistent use.
8
+ */
9
+ export declare function runSetup(): Promise<void>;
10
+ //# sourceMappingURL=cli-onboarding.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli-onboarding.d.ts","sourceRoot":"","sources":["../src/cli-onboarding.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAeH,wBAAsB,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC,CA2H9C"}
@@ -0,0 +1,128 @@
1
+ /**
2
+ * Interactive setup wizard for imperium-crawl CLI.
3
+ *
4
+ * Usage: imperium-crawl setup
5
+ *
6
+ * Guides the user through configuring API keys and saves them to
7
+ * ~/.imperium-crawl/config.json for persistent use.
8
+ */
9
+ import chalk from "chalk";
10
+ import { input, select, confirm } from "@inquirer/prompts";
11
+ import { loadCliConfig, saveCliConfig, getCliConfigPath } from "./cli-config.js";
12
+ const BANNER = chalk.cyan(`
13
+ ██╗███╗ ███╗██████╗ ███████╗██████╗ ██╗██╗ ██╗███╗ ███╗
14
+ ██║████╗ ████║██╔══██╗██╔════╝██╔══██╗██║██║ ██║████╗ ████║
15
+ ██║██╔████╔██║██████╔╝█████╗ ██████╔╝██║██║ ██║██╔████╔██║
16
+ ██║██║╚██╔╝██║██╔═══╝ ██╔══╝ ██╔══██╗██║██║ ██║██║╚██╔╝██║
17
+ ██║██║ ╚═╝ ██║██║ ███████╗██║ ██║██║╚██████╔╝██║ ╚═╝ ██║
18
+ ╚═╝╚═╝ ╚═╝╚═╝ ╚══════╝╚═╝ ╚═╝╚═╝ ╚═════╝ ╚═╝ ╚═╝
19
+ `);
20
+ export async function runSetup() {
21
+ console.log(BANNER);
22
+ console.log(chalk.bold(" API Key Setup\n"));
23
+ const existing = loadCliConfig();
24
+ const config = { ...existing };
25
+ // ── Brave Search ──────────────────────────────────────────────────
26
+ const hasBrave = !!(process.env.BRAVE_API_KEY || existing.BRAVE_API_KEY);
27
+ if (hasBrave) {
28
+ console.log(chalk.green(" ✓ BRAVE_API_KEY") +
29
+ chalk.dim(" — already configured (search, news_search, image_search, video_search)"));
30
+ }
31
+ else {
32
+ console.log(chalk.dim(" Brave Search enables 4 search tools. Free tier: https://brave.com/search/api/\n"));
33
+ const braveKey = await input({
34
+ message: "Brave Search API key (press Enter to skip):",
35
+ });
36
+ if (braveKey.trim())
37
+ config.BRAVE_API_KEY = braveKey.trim();
38
+ }
39
+ console.log();
40
+ // ── LLM Provider ─────────────────────────────────────────────────
41
+ const hasLLM = !!(process.env.LLM_API_KEY || existing.LLM_API_KEY);
42
+ if (hasLLM) {
43
+ const currentProvider = process.env.LLM_PROVIDER || existing.LLM_PROVIDER || "anthropic";
44
+ console.log(chalk.green(` ✓ LLM_API_KEY (${currentProvider})`) +
45
+ chalk.dim(" — already configured (ai_extract tool)"));
46
+ }
47
+ else {
48
+ console.log(chalk.dim(" LLM key enables the ai_extract tool — natural language data extraction.\n"));
49
+ const provider = await select({
50
+ message: "LLM provider (for ai_extract tool):",
51
+ choices: [
52
+ {
53
+ name: "Anthropic (Claude Haiku — default)",
54
+ value: "anthropic",
55
+ description: "Fast, affordable. Get key: https://console.anthropic.com",
56
+ },
57
+ {
58
+ name: "OpenAI (GPT-4o mini — default)",
59
+ value: "openai",
60
+ description: "Widely used. Get key: https://platform.openai.com",
61
+ },
62
+ {
63
+ name: "MiniMax (M2.5 — 200K context, reasoning)",
64
+ value: "minimax",
65
+ description: "Strong model, OpenAI-compatible API.",
66
+ },
67
+ {
68
+ name: "Skip for now",
69
+ value: "skip",
70
+ description: "Configure later via env vars or run setup again.",
71
+ },
72
+ ],
73
+ });
74
+ if (provider !== "skip") {
75
+ config.LLM_PROVIDER = provider;
76
+ const providerLabel = provider === "anthropic" ? "Anthropic" : provider === "openai" ? "OpenAI" : "MiniMax";
77
+ const llmKey = await input({
78
+ message: `${providerLabel} API key:`,
79
+ });
80
+ if (llmKey.trim())
81
+ config.LLM_API_KEY = llmKey.trim();
82
+ }
83
+ }
84
+ console.log();
85
+ // ── 2Captcha ─────────────────────────────────────────────────────
86
+ const hasCaptcha = !!(process.env.TWOCAPTCHA_API_KEY ||
87
+ process.env.TWO_CAPTCHA_API_KEY ||
88
+ existing.TWOCAPTCHA_API_KEY);
89
+ if (hasCaptcha) {
90
+ console.log(chalk.green(" ✓ TWOCAPTCHA_API_KEY") +
91
+ chalk.dim(" — already configured (auto CAPTCHA solving in stealth level 3)"));
92
+ }
93
+ else {
94
+ const wantCaptcha = await confirm({
95
+ message: "Configure 2Captcha for automatic CAPTCHA solving? (optional)",
96
+ default: false,
97
+ });
98
+ if (wantCaptcha) {
99
+ const captchaKey = await input({
100
+ message: "2Captcha API key (https://2captcha.com):",
101
+ });
102
+ if (captchaKey.trim())
103
+ config.TWOCAPTCHA_API_KEY = captchaKey.trim();
104
+ }
105
+ }
106
+ // ── Save + Summary ────────────────────────────────────────────────
107
+ saveCliConfig(config);
108
+ console.log("\n" + chalk.bold(" ─────────────────────────────────────────"));
109
+ const enabledTools = [];
110
+ if (config.BRAVE_API_KEY || process.env.BRAVE_API_KEY) {
111
+ enabledTools.push("search, news_search, image_search, video_search");
112
+ }
113
+ if (config.LLM_API_KEY || process.env.LLM_API_KEY) {
114
+ enabledTools.push("ai_extract");
115
+ }
116
+ if (config.TWOCAPTCHA_API_KEY || process.env.TWOCAPTCHA_API_KEY) {
117
+ enabledTools.push("CAPTCHA auto-solve (stealth lvl 3)");
118
+ }
119
+ if (enabledTools.length > 0) {
120
+ console.log(chalk.green(`\n 🚀 Ready! Extra tools enabled: ${enabledTools.join(", ")}`));
121
+ }
122
+ else {
123
+ console.log(chalk.yellow("\n ⚠ No API keys configured.") +
124
+ chalk.dim(" Basic scraping tools work without keys."));
125
+ }
126
+ console.log(chalk.dim(`\n Config saved → ${getCliConfigPath()}\n`));
127
+ }
128
+ //# sourceMappingURL=cli-onboarding.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli-onboarding.js","sourceRoot":"","sources":["../src/cli-onboarding.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,mBAAmB,CAAC;AAC3D,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AAEjF,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC;;;;;;;CAOzB,CAAC,CAAC;AAEH,MAAM,CAAC,KAAK,UAAU,QAAQ;IAC5B,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IACpB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC,CAAC;IAE7C,MAAM,QAAQ,GAAG,aAAa,EAAE,CAAC;IACjC,MAAM,MAAM,GAA2B,EAAE,GAAG,QAAQ,EAAE,CAAC;IAEvD,qEAAqE;IACrE,MAAM,QAAQ,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,aAAa,IAAI,QAAQ,CAAC,aAAa,CAAC,CAAC;IACzE,IAAI,QAAQ,EAAE,CAAC;QACb,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,KAAK,CAAC,mBAAmB,CAAC;YAC9B,KAAK,CAAC,GAAG,CAAC,yEAAyE,CAAC,CACvF,CAAC;IACJ,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,mFAAmF,CAAC,CAAC,CAAC;QAC5G,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC;YAC3B,OAAO,EAAE,6CAA6C;SACvD,CAAC,CAAC;QACH,IAAI,QAAQ,CAAC,IAAI,EAAE;YAAE,MAAM,CAAC,aAAa,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC;IAC9D,CAAC;IAED,OAAO,CAAC,GAAG,EAAE,CAAC;IAEd,oEAAoE;IACpE,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,QAAQ,CAAC,WAAW,CAAC,CAAC;IACnE,IAAI,MAAM,EAAE,CAAC;QACX,MAAM,eAAe,GAAG,OAAO,CAAC,GAAG,CAAC,YAAY,IAAI,QAAQ,CAAC,YAAY,IAAI,WAAW,CAAC;QACzF,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,KAAK,CAAC,oBAAoB,eAAe,GAAG,CAAC;YACjD,KAAK,CAAC,GAAG,CAAC,yCAAyC,CAAC,CACvD,CAAC;IACJ,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,6EAA6E,CAAC,CAAC,CAAC;QACtG,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC;YAC5B,OAAO,EAAE,qCAAqC;YAC9C,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,oCAAoC;oBAC1C,KAAK,EAAE,WAAW;oBAClB,WAAW,EAAE,0DAA0D;iBACxE;gBACD;oBACE,IAAI,EAAE,gCAAgC;oBACtC,KAAK,EAAE,QAAQ;oBACf,WAAW,EAAE,mDAAmD;iBACjE;gBACD;oBACE,IAAI,EAAE,0CAA0C;oBAChD,KAAK,EAAE,SAAS;oBAChB,WAAW,EAAE,sCAAsC;iBACpD;gBACD;oBACE,IAAI,EAAE,cAAc;oBACpB,KAAK,EAAE,MAAM;oBACb,WAAW,EAAE,kDAAkD;iBAChE;aACF;SACF,CAAC,CAAC;QAEH,IAAI,QAAQ,KAAK,MAAM,EAAE,CAAC;YACxB,MAAM,CAAC,YAAY,GAAG,QAAQ,CAAC;YAC/B,MAAM,aAAa,GACjB,QAAQ,KAAK,WAAW,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC;YACxF,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC;gBACzB,OAAO,EAAE,GAAG,aAAa,WAAW;aACrC,CAAC,CAAC;YACH,IAAI,MAAM,CAAC,IAAI,EAAE;gBAAE,MAAM,CAAC,WAAW,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC;QACxD,CAAC;IACH,CAAC;IAED,OAAO,CAAC,GAAG,EAAE,CAAC;IAEd,oEAAoE;IACpE,MAAM,UAAU,GAAG,CAAC,CAAC,CACnB,OAAO,CAAC,GAAG,CAAC,kBAAkB;QAC9B,OAAO,CAAC,GAAG,CAAC,mBAAmB;QAC/B,QAAQ,CAAC,kBAAkB,CAC5B,CAAC;IACF,IAAI,UAAU,EAAE,CAAC;QACf,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,KAAK,CAAC,wBAAwB,CAAC;YACnC,KAAK,CAAC,GAAG,CAAC,iEAAiE,CAAC,CAC/E,CAAC;IACJ,CAAC;SAAM,CAAC;QACN,MAAM,WAAW,GAAG,MAAM,OAAO,CAAC;YAChC,OAAO,EAAE,8DAA8D;YACvE,OAAO,EAAE,KAAK;SACf,CAAC,CAAC;QACH,IAAI,WAAW,EAAE,CAAC;YAChB,MAAM,UAAU,GAAG,MAAM,KAAK,CAAC;gBAC7B,OAAO,EAAE,0CAA0C;aACpD,CAAC,CAAC;YACH,IAAI,UAAU,CAAC,IAAI,EAAE;gBAAE,MAAM,CAAC,kBAAkB,GAAG,UAAU,CAAC,IAAI,EAAE,CAAC;QACvE,CAAC;IACH,CAAC;IAED,qEAAqE;IACrE,aAAa,CAAC,MAAM,CAAC,CAAC;IAEtB,OAAO,CAAC,GAAG,CAAC,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,6CAA6C,CAAC,CAAC,CAAC;IAE9E,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,IAAI,MAAM,CAAC,aAAa,IAAI,OAAO,CAAC,GAAG,CAAC,aAAa,EAAE,CAAC;QACtD,YAAY,CAAC,IAAI,CAAC,iDAAiD,CAAC,CAAC;IACvE,CAAC;IACD,IAAI,MAAM,CAAC,WAAW,IAAI,OAAO,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC;QAClD,YAAY,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IAClC,CAAC;IACD,IAAI,MAAM,CAAC,kBAAkB,IAAI,OAAO,CAAC,GAAG,CAAC,kBAAkB,EAAE,CAAC;QAChE,YAAY,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;IAC1D,CAAC;IAED,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5B,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,sCAAsC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;IAC5F,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,MAAM,CAAC,+BAA+B,CAAC;YAC3C,KAAK,CAAC,GAAG,CAAC,0CAA0C,CAAC,CACxD,CAAC;IACJ,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,sBAAsB,gBAAgB,EAAE,IAAI,CAAC,CAAC,CAAC;AACvE,CAAC"}
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Full TUI (Terminal User Interface) for imperium-crawl — v3
3
+ *
4
+ * Slash-command-driven UX (Claude Code aesthetic).
5
+ * readline for main prompt, @clack/prompts for param collection only.
6
+ *
7
+ * Activated when: no CLI args AND process.stdout.isTTY.
8
+ * Non-TTY mode (pipe/CI/agents) is unaffected — MCP server runs as before.
9
+ * CLI subcommands (scrape, crawl, etc.) are unaffected — they bypass this.
10
+ */
11
+ export declare function runTui(): Promise<void>;
12
+ //# sourceMappingURL=cli-tui.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli-tui.d.ts","sourceRoot":"","sources":["../src/cli-tui.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAgjCH,wBAAsB,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC,CAsB5C"}