imperium-crawl 2.3.1 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. package/README.md +146 -11
  2. package/dist/cli-explore.d.ts +30 -0
  3. package/dist/cli-explore.d.ts.map +1 -0
  4. package/dist/cli-explore.js +427 -0
  5. package/dist/cli-explore.js.map +1 -0
  6. package/dist/cli-recorder.d.ts +44 -0
  7. package/dist/cli-recorder.d.ts.map +1 -0
  8. package/dist/cli-recorder.js +67 -0
  9. package/dist/cli-recorder.js.map +1 -0
  10. package/dist/cli.d.ts.map +1 -1
  11. package/dist/cli.js +51 -3
  12. package/dist/cli.js.map +1 -1
  13. package/dist/config.d.ts.map +1 -1
  14. package/dist/config.js +3 -0
  15. package/dist/config.js.map +1 -1
  16. package/dist/constants.d.ts +1 -1
  17. package/dist/constants.d.ts.map +1 -1
  18. package/dist/constants.js +31 -1
  19. package/dist/constants.js.map +1 -1
  20. package/dist/flows/engine.d.ts +7 -0
  21. package/dist/flows/engine.d.ts.map +1 -0
  22. package/dist/flows/engine.js +183 -0
  23. package/dist/flows/engine.js.map +1 -0
  24. package/dist/flows/index.d.ts +6 -0
  25. package/dist/flows/index.d.ts.map +1 -0
  26. package/dist/flows/index.js +6 -0
  27. package/dist/flows/index.js.map +1 -0
  28. package/dist/flows/server.d.ts +11 -0
  29. package/dist/flows/server.d.ts.map +1 -0
  30. package/dist/flows/server.js +81 -0
  31. package/dist/flows/server.js.map +1 -0
  32. package/dist/flows/smart-target.d.ts +9 -0
  33. package/dist/flows/smart-target.d.ts.map +1 -0
  34. package/dist/flows/smart-target.js +84 -0
  35. package/dist/flows/smart-target.js.map +1 -0
  36. package/dist/flows/storage.d.ts +26 -0
  37. package/dist/flows/storage.d.ts.map +1 -0
  38. package/dist/flows/storage.js +118 -0
  39. package/dist/flows/storage.js.map +1 -0
  40. package/dist/flows/templates.d.ts +4 -0
  41. package/dist/flows/templates.d.ts.map +1 -0
  42. package/dist/flows/templates.js +35 -0
  43. package/dist/flows/templates.js.map +1 -0
  44. package/dist/flows/types.d.ts +3356 -0
  45. package/dist/flows/types.d.ts.map +1 -0
  46. package/dist/flows/types.js +133 -0
  47. package/dist/flows/types.js.map +1 -0
  48. package/dist/knowledge/index.d.ts +1 -0
  49. package/dist/knowledge/index.d.ts.map +1 -1
  50. package/dist/knowledge/index.js +1 -0
  51. package/dist/knowledge/index.js.map +1 -1
  52. package/dist/knowledge/record-browser.d.ts +17 -0
  53. package/dist/knowledge/record-browser.d.ts.map +1 -0
  54. package/dist/knowledge/record-browser.js +29 -0
  55. package/dist/knowledge/record-browser.js.map +1 -0
  56. package/dist/knowledge/store.d.ts +19 -0
  57. package/dist/knowledge/store.d.ts.map +1 -1
  58. package/dist/knowledge/store.js +63 -4
  59. package/dist/knowledge/store.js.map +1 -1
  60. package/dist/llm/retry.d.ts +4 -2
  61. package/dist/llm/retry.d.ts.map +1 -1
  62. package/dist/llm/retry.js +15 -4
  63. package/dist/llm/retry.js.map +1 -1
  64. package/dist/sessions/browser-connect.d.ts +30 -0
  65. package/dist/sessions/browser-connect.d.ts.map +1 -0
  66. package/dist/sessions/browser-connect.js +68 -0
  67. package/dist/sessions/browser-connect.js.map +1 -0
  68. package/dist/sessions/browser-state.d.ts +35 -0
  69. package/dist/sessions/browser-state.d.ts.map +1 -0
  70. package/dist/sessions/browser-state.js +74 -0
  71. package/dist/sessions/browser-state.js.map +1 -0
  72. package/dist/sessions/index.d.ts +1 -1
  73. package/dist/sessions/index.d.ts.map +1 -1
  74. package/dist/sessions/index.js +1 -1
  75. package/dist/sessions/index.js.map +1 -1
  76. package/dist/sessions/inject-cookies.d.ts +20 -0
  77. package/dist/sessions/inject-cookies.d.ts.map +1 -0
  78. package/dist/sessions/inject-cookies.js +57 -0
  79. package/dist/sessions/inject-cookies.js.map +1 -0
  80. package/dist/sessions/manager.d.ts +31 -1
  81. package/dist/sessions/manager.d.ts.map +1 -1
  82. package/dist/sessions/manager.js +97 -6
  83. package/dist/sessions/manager.js.map +1 -1
  84. package/dist/sessions/types.d.ts +2 -0
  85. package/dist/sessions/types.d.ts.map +1 -1
  86. package/dist/skills/chain.d.ts +61 -0
  87. package/dist/skills/chain.d.ts.map +1 -0
  88. package/dist/skills/chain.js +182 -0
  89. package/dist/skills/chain.js.map +1 -0
  90. package/dist/skills/conditions.d.ts +14 -0
  91. package/dist/skills/conditions.d.ts.map +1 -0
  92. package/dist/skills/conditions.js +208 -0
  93. package/dist/skills/conditions.js.map +1 -0
  94. package/dist/skills/manager.d.ts +47 -2
  95. package/dist/skills/manager.d.ts.map +1 -1
  96. package/dist/skills/manager.js.map +1 -1
  97. package/dist/skills/parameters.d.ts +49 -0
  98. package/dist/skills/parameters.d.ts.map +1 -0
  99. package/dist/skills/parameters.js +157 -0
  100. package/dist/skills/parameters.js.map +1 -0
  101. package/dist/snapshot/store.d.ts +8 -0
  102. package/dist/snapshot/store.d.ts.map +1 -1
  103. package/dist/snapshot/store.js +48 -0
  104. package/dist/snapshot/store.js.map +1 -1
  105. package/dist/stealth/antibot-detector.d.ts +1 -1
  106. package/dist/stealth/antibot-detector.d.ts.map +1 -1
  107. package/dist/stealth/antibot-detector.js +56 -0
  108. package/dist/stealth/antibot-detector.js.map +1 -1
  109. package/dist/stealth/browser-image-extract.d.ts +43 -0
  110. package/dist/stealth/browser-image-extract.d.ts.map +1 -0
  111. package/dist/stealth/browser-image-extract.js +268 -0
  112. package/dist/stealth/browser-image-extract.js.map +1 -0
  113. package/dist/stealth/browser.d.ts +5 -0
  114. package/dist/stealth/browser.d.ts.map +1 -1
  115. package/dist/stealth/browser.js +82 -1
  116. package/dist/stealth/browser.js.map +1 -1
  117. package/dist/stealth/chrome-profile.d.ts +1 -0
  118. package/dist/stealth/chrome-profile.d.ts.map +1 -1
  119. package/dist/stealth/chrome-profile.js +28 -5
  120. package/dist/stealth/chrome-profile.js.map +1 -1
  121. package/dist/stealth/detector.d.ts +10 -1
  122. package/dist/stealth/detector.d.ts.map +1 -1
  123. package/dist/stealth/detector.js +117 -25
  124. package/dist/stealth/detector.js.map +1 -1
  125. package/dist/stealth/headers.d.ts +1 -1
  126. package/dist/stealth/headers.d.ts.map +1 -1
  127. package/dist/stealth/headers.js +94 -2
  128. package/dist/stealth/headers.js.map +1 -1
  129. package/dist/stealth/index.d.ts +5 -0
  130. package/dist/stealth/index.d.ts.map +1 -1
  131. package/dist/stealth/index.js +257 -27
  132. package/dist/stealth/index.js.map +1 -1
  133. package/dist/stealth/proxy.d.ts +40 -1
  134. package/dist/stealth/proxy.d.ts.map +1 -1
  135. package/dist/stealth/proxy.js +90 -6
  136. package/dist/stealth/proxy.js.map +1 -1
  137. package/dist/tools/action-executor.d.ts +66 -0
  138. package/dist/tools/action-executor.d.ts.map +1 -0
  139. package/dist/tools/action-executor.js +403 -0
  140. package/dist/tools/action-executor.js.map +1 -0
  141. package/dist/tools/batch-download.d.ts +33 -0
  142. package/dist/tools/batch-download.d.ts.map +1 -0
  143. package/dist/tools/batch-download.js +208 -0
  144. package/dist/tools/batch-download.js.map +1 -0
  145. package/dist/tools/batch-scrape.d.ts +2 -2
  146. package/dist/tools/browser.d.ts +100 -0
  147. package/dist/tools/browser.d.ts.map +1 -0
  148. package/dist/tools/browser.js +448 -0
  149. package/dist/tools/browser.js.map +1 -0
  150. package/dist/tools/crawl.d.ts +2 -2
  151. package/dist/tools/create-skill.d.ts +2 -2
  152. package/dist/tools/discover-apis.d.ts +1 -1
  153. package/dist/tools/discover-apis.d.ts.map +1 -1
  154. package/dist/tools/discover-apis.js +3 -0
  155. package/dist/tools/discover-apis.js.map +1 -1
  156. package/dist/tools/download.d.ts +39 -6
  157. package/dist/tools/download.d.ts.map +1 -1
  158. package/dist/tools/download.js +248 -44
  159. package/dist/tools/download.js.map +1 -1
  160. package/dist/tools/extract.d.ts +1 -1
  161. package/dist/tools/image-search.d.ts +1 -1
  162. package/dist/tools/index.d.ts.map +1 -1
  163. package/dist/tools/index.js +26 -0
  164. package/dist/tools/index.js.map +1 -1
  165. package/dist/tools/inspect-flow.d.ts +24 -0
  166. package/dist/tools/inspect-flow.d.ts.map +1 -0
  167. package/dist/tools/inspect-flow.js +23 -0
  168. package/dist/tools/inspect-flow.js.map +1 -0
  169. package/dist/tools/instagram.d.ts +2 -2
  170. package/dist/tools/interact.d.ts +91 -50
  171. package/dist/tools/interact.d.ts.map +1 -1
  172. package/dist/tools/interact.js +80 -299
  173. package/dist/tools/interact.js.map +1 -1
  174. package/dist/tools/knowledge.d.ts +24 -0
  175. package/dist/tools/knowledge.d.ts.map +1 -0
  176. package/dist/tools/knowledge.js +99 -0
  177. package/dist/tools/knowledge.js.map +1 -0
  178. package/dist/tools/list-flows.d.ts +21 -0
  179. package/dist/tools/list-flows.d.ts.map +1 -0
  180. package/dist/tools/list-flows.js +18 -0
  181. package/dist/tools/list-flows.js.map +1 -0
  182. package/dist/tools/list-skills.js +1 -1
  183. package/dist/tools/list-skills.js.map +1 -1
  184. package/dist/tools/manifest.d.ts.map +1 -1
  185. package/dist/tools/manifest.js +48 -0
  186. package/dist/tools/manifest.js.map +1 -1
  187. package/dist/tools/monitor-websocket.d.ts +1 -1
  188. package/dist/tools/monitor.d.ts +46 -0
  189. package/dist/tools/monitor.d.ts.map +1 -0
  190. package/dist/tools/monitor.js +213 -0
  191. package/dist/tools/monitor.js.map +1 -0
  192. package/dist/tools/news-search.d.ts +1 -1
  193. package/dist/tools/pdf-extract.d.ts +38 -0
  194. package/dist/tools/pdf-extract.d.ts.map +1 -0
  195. package/dist/tools/pdf-extract.js +244 -0
  196. package/dist/tools/pdf-extract.js.map +1 -0
  197. package/dist/tools/query-api.d.ts +6 -6
  198. package/dist/tools/readability.d.ts +2 -2
  199. package/dist/tools/record-flow.d.ts +39 -0
  200. package/dist/tools/record-flow.d.ts.map +1 -0
  201. package/dist/tools/record-flow.js +406 -0
  202. package/dist/tools/record-flow.js.map +1 -0
  203. package/dist/tools/reddit.d.ts +4 -4
  204. package/dist/tools/run-flow.d.ts +54 -0
  205. package/dist/tools/run-flow.d.ts.map +1 -0
  206. package/dist/tools/run-flow.js +47 -0
  207. package/dist/tools/run-flow.js.map +1 -0
  208. package/dist/tools/run-skill.d.ts +14 -4
  209. package/dist/tools/run-skill.d.ts.map +1 -1
  210. package/dist/tools/run-skill.js +74 -0
  211. package/dist/tools/run-skill.js.map +1 -1
  212. package/dist/tools/scrape.d.ts +9 -6
  213. package/dist/tools/scrape.d.ts.map +1 -1
  214. package/dist/tools/scrape.js +19 -1
  215. package/dist/tools/scrape.js.map +1 -1
  216. package/dist/tools/screenshot.d.ts.map +1 -1
  217. package/dist/tools/screenshot.js +6 -0
  218. package/dist/tools/screenshot.js.map +1 -1
  219. package/dist/tools/search.d.ts +1 -1
  220. package/dist/tools/serve-flow.d.ts +36 -0
  221. package/dist/tools/serve-flow.d.ts.map +1 -0
  222. package/dist/tools/serve-flow.js +42 -0
  223. package/dist/tools/serve-flow.js.map +1 -0
  224. package/dist/tools/snapshot.d.ts +5 -5
  225. package/dist/tools/snapshot.d.ts.map +1 -1
  226. package/dist/tools/snapshot.js +3 -0
  227. package/dist/tools/snapshot.js.map +1 -1
  228. package/dist/tools/validate-flow.d.ts +24 -0
  229. package/dist/tools/validate-flow.d.ts.map +1 -0
  230. package/dist/tools/validate-flow.js +23 -0
  231. package/dist/tools/validate-flow.js.map +1 -0
  232. package/dist/tools/video-search.d.ts +1 -1
  233. package/dist/tools/watch.d.ts +68 -0
  234. package/dist/tools/watch.d.ts.map +1 -0
  235. package/dist/tools/watch.js +224 -0
  236. package/dist/tools/watch.js.map +1 -0
  237. package/dist/tools/youtube.d.ts +2 -2
  238. package/dist/utils/fetcher.d.ts +13 -4
  239. package/dist/utils/fetcher.d.ts.map +1 -1
  240. package/dist/utils/fetcher.js +153 -23
  241. package/dist/utils/fetcher.js.map +1 -1
  242. package/package.json +19 -5
package/README.md CHANGED
@@ -1,20 +1,34 @@
1
1
  <div align="center">
2
2
 
3
+ <img src="assets/hero-banner.png" alt="imperium-crawl — 3-level auto-escalating stealth engine" width="800" />
4
+
3
5
  # imperium-crawl
4
6
 
5
7
  **The most powerful open-source CLI tool for web scraping, crawling, and data extraction.**
6
8
 
7
- 28 tools. Zero API keys required. One `npx` command.
9
+ 39 tools. Zero API keys required. One `npx` command.
8
10
 
9
11
  [![npm version](https://img.shields.io/npm/v/imperium-crawl.svg)](https://www.npmjs.com/package/imperium-crawl)
10
12
  [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](./LICENSE)
11
- [![Tests](https://img.shields.io/badge/tests-466%20passing-brightgreen.svg)]()
13
+ [![Tests](https://img.shields.io/badge/tests-580%20passing-brightgreen.svg)]()
12
14
  [![npm downloads](https://img.shields.io/npm/dm/imperium-crawl.svg)](https://www.npmjs.com/package/imperium-crawl)
13
15
 
14
16
  </div>
15
17
 
16
18
  ---
17
19
 
20
+ ## What's new in 2.5.0
21
+
22
+ Three new tools for document extraction and content monitoring — all zero-API-key, all native:
23
+
24
+ - **`pdf-extract`** — Pull text, pages, tables, and metadata from any PDF (local or remote) via `pdfjs-dist`. Ideal for regulatory docs, sustainability reports, invoices. Smoke-tested on a 98-page CBAM Guidance PDF (199K chars, confidence 0.99).
25
+ - **`watch`** — Hash-based one-shot change detector for a single URL. Cron-friendly. Fires a webhook on change.
26
+ - **`monitor`** — Multi-URL intelligence digest. Reads a JSON config grouping URLs by topic, emits a markdown digest filtered by minimum change percentage.
27
+
28
+ See [CHANGELOG.md](./CHANGELOG.md) for the full release notes.
29
+
30
+ ---
31
+
18
32
  ## Quick Start
19
33
 
20
34
  Get running in 30 seconds.
@@ -31,7 +45,13 @@ npx -y imperium-crawl scrape --url https://example.com
31
45
  npm install -g imperium-crawl
32
46
  ```
33
47
 
34
- > That's it. 22 of 28 tools work with zero API keys. Add optional keys later to unlock search, AI extraction, and CAPTCHA solving.
48
+ **Install from a local tarball** (e.g. pre-release testing):
49
+
50
+ ```bash
51
+ npm install -g ./imperium-crawl-2.5.0.tgz
52
+ ```
53
+
54
+ > That's it. 33 of 39 tools work with zero API keys. Add optional keys later to unlock search, AI extraction, and CAPTCHA solving.
35
55
 
36
56
  ---
37
57
 
@@ -107,7 +127,7 @@ Scraping 4 URLs (concurrency: 3)...
107
127
  ## Why imperium-crawl?
108
128
 
109
129
  🔓 **Zero API Keys Required**
110
- 22 of 28 tools work out of the box. No accounts, no tokens, no credit cards. Just `npx` and go.
130
+ 33 of 39 tools work out of the box. No accounts, no tokens, no credit cards. Just `npx` and go.
111
131
 
112
132
  🛡️ **3-Level Auto-Escalating Stealth**
113
133
  Headers → TLS fingerprinting → headless browser + CAPTCHA solving. Automatically escalates until it gets through.
@@ -115,7 +135,7 @@ Headers → TLS fingerprinting → headless browser + CAPTCHA solving. Automatic
115
135
  🧠 **Self-Improving**
116
136
  Adaptive learning engine remembers what works per domain. Second visit is 3x faster. The more you use it, the smarter it gets.
117
137
 
118
- 🧰 **28 Tools, 2 Modes**
138
+ 🧰 **33 Tools, 2 Modes**
119
139
  CLI tool or interactive TUI. Scraping, crawling, search, extraction, API discovery, WebSocket monitoring, browser automation, batch processing.
120
140
 
121
141
  📜 **14 Built-in Recipes**
@@ -131,7 +151,7 @@ Teach it once, run forever. Auto-detect patterns on any page, save as reusable s
131
151
  | Feature | **imperium-crawl** | Firecrawl | Crawl4AI | Browserbase | Puppeteer |
132
152
  |---------|:------------------:|:---------:|:--------:|:-----------:|:---------:|
133
153
  | Price | **Free forever** | $19+/month | Free | $0.01/min | Free |
134
- | Total tools | **28** | 5 | 2 | 4 | N/A |
154
+ | Total tools | **33** | 5 | 2 | 4 | N/A |
135
155
  | Stealth levels | **3 (auto-escalate)** | Cloud-based | 1 | Cloud-based | None |
136
156
  | Anti-bot detection | **7 systems** | Partial | Partial | Partial | None |
137
157
  | TLS fingerprinting | **JA3/JA4** | No | No | No | No |
@@ -224,7 +244,7 @@ Second visit to cloudflare.com:
224
244
 
225
245
  ---
226
246
 
227
- ## All 28 Tools
247
+ ## All 39 Tools
228
248
 
229
249
  ### 📄 Scraping (no API key needed)
230
250
 
@@ -272,7 +292,7 @@ Second visit to cloudflare.com:
272
292
 
273
293
  | Tool | What It Does |
274
294
  |------|-------------|
275
- | **interact** | Browser automation with 18 action types (click, type, scroll, wait, screenshot, evaluate, select, hover, press, navigate, drag, upload, storage, cookies, pdf, auth_login). Ref targeting via ARIA snapshot, session encryption, action policy, domain filter, network interception, device emulation. |
295
+ | **interact** | Browser automation with 19 action types (click, type, scroll, wait, screenshot, evaluate, select, hover, press, navigate, drag, upload, storage, cookies, pdf, auth_login, refresh). Ref targeting via ARIA snapshot, session encryption, action policy, domain filter, network interception, device emulation. |
276
296
  | **snapshot** | ARIA-based page snapshot with interactive element refs. Use refs in interact for precise targeting. Annotated screenshots. |
277
297
 
278
298
  ### 📱 Social Media (no API key needed)
@@ -299,6 +319,69 @@ Second visit to cloudflare.com:
299
319
  | **job_status** | Full results for a specific batch job including per-URL outcomes. |
300
320
  | **delete_job** | Clean up completed or failed batch jobs. |
301
321
 
322
+ ### 🧠 Knowledge Engine (no API key needed)
323
+
324
+ | Tool | What It Does |
325
+ |------|-------------|
326
+ | **knowledge** | Dump adaptive knowledge engine stats — per-domain success rates, optimal stealth levels, anti-bot detection history, rate limits. Use to debug scraping issues and understand problematic domains. |
327
+
328
+ ### 📄 Documents (no API key needed)
329
+
330
+ | Tool | What It Does |
331
+ |------|-------------|
332
+ | **pdf_extract** | Extract text, pages, tables, and metadata from a local or remote PDF. Native text-layer strategy via `pdfjs-dist`. OCR + Claude Vision fallbacks deferred to v2.6.0. Use for sustainability reports, invoices, regulatory PDFs. |
333
+
334
+ ```bash
335
+ imperium-crawl pdf-extract --input ./report.pdf --output ./extracted.json
336
+ imperium-crawl pdf-extract --input https://example.com/report.pdf --max-pages 20
337
+ ```
338
+
339
+ ### 👀 Change Tracking (no API key needed)
340
+
341
+ | Tool | What It Does |
342
+ |------|-------------|
343
+ | **watch** | One-shot change detector: scrape a URL, hash its content (readability / markdown / full), compare against the last snapshot, fire a webhook on change. Pair with cron for periodic monitoring. |
344
+ | **monitor** | Portfolio-level change tracker across many URLs grouped by topic. Reads a JSON config, runs `watch` on each URL, emits a markdown digest filtered by minimum change percentage. |
345
+
346
+ ```bash
347
+ # Watch a single URL — run periodically via cron
348
+ imperium-crawl watch --url https://carbonchain.com/pricing \
349
+ --output-dir ./data/watch \
350
+ --webhook https://hooks.example.com/on-change
351
+
352
+ # Monitor many URLs grouped by topic, emit a daily digest
353
+ imperium-crawl monitor --config ./monitor.json --output-dir ./data/monitor
354
+ ```
355
+
356
+ `monitor.json`:
357
+ ```json
358
+ {
359
+ "topics": [
360
+ {
361
+ "name": "Competitor pricing",
362
+ "urls": ["https://carbonchain.com/pricing", "https://spherasolutions.com/cbam"]
363
+ }
364
+ ]
365
+ }
366
+ ```
367
+
368
+ ### 🔁 Imperium Flows (no API key needed; browser workflows may require Playwright)
369
+
370
+ | Tool | What It Does |
371
+ |------|-------------|
372
+ | **record_flow** | Record a headed browser workflow as a generic flow family/variant. Stores smart selector metadata and reusable input placeholders. |
373
+ | **run_flow** | Run a saved flow with runtime JSON input, CAPTCHA policy, browser mode, and evidence collection. |
374
+ | **serve_flow** | Expose saved flows through a local HTTP API. Requires bearer auth when bound publicly. |
375
+ | **list_flows** | List project-local and global flow definitions. |
376
+ | **inspect_flow** | Inspect a saved flow JSON definition. |
377
+ | **validate_flow** | Validate a flow schema and report inputs, steps, and storage path. |
378
+
379
+ ```bash
380
+ imperium-crawl record-flow --family generic-search --variant site-a --url https://example.com
381
+ imperium-crawl run-flow generic-search/site-a --input '{"query":"example"}'
382
+ imperium-crawl serve-flow generic-search --port 8787
383
+ ```
384
+
302
385
  ---
303
386
 
304
387
  ## Setup
@@ -366,6 +449,26 @@ imperium-crawl tui
366
449
 
367
450
  Interactive slash-command terminal with parameter prompts, table rendering, markdown display, and session state. Use `/save` to export results and `/again` to re-run the last command.
368
451
 
452
+ ### Explore REPL
453
+
454
+ Interactively explore a site in a headed browser, then save the session as a reusable skill:
455
+
456
+ ```bash
457
+ imperium-crawl explore https://example.com
458
+ ```
459
+
460
+ ```
461
+ > navigate https://example.com/login
462
+ > type "#email" "user@example.com"
463
+ > type "#password" "{{env:MY_PASSWORD}}"
464
+ > click "#submit"
465
+ > snapshot
466
+ > save-skill my-login
467
+ ✅ Saved skill: my-login (4 actions, 1 parameter detected)
468
+ ```
469
+
470
+ Commands: `navigate`, `click`, `type`, `select`, `hover`, `press`, `scroll`, `wait`, `screenshot`, `snapshot`, `evaluate`, `save-skill`, `history`, `undo`, `status`, `help`, `exit`
471
+
369
472
  ---
370
473
 
371
474
  ## Skills & Recipes
@@ -389,6 +492,37 @@ run_skill({ name: "tc-ai-news" })
389
492
 
390
493
  Skills are saved in `~/.imperium-crawl/skills/` as JSON files — human-readable, editable, portable.
391
494
 
495
+ ### Skill Parameters
496
+
497
+ Use template variables in skills — resolved at run time:
498
+
499
+ ```bash
500
+ # In skill JSON actions:
501
+ { "value": "{{input:query}}" } # passed via --params or prompted
502
+ { "value": "{{env:SITE_PASSWORD}}" } # from environment variable
503
+ { "value": "{{computed:date_today}}" } # auto-computed (date_today, timestamp, random_string, year, month, day)
504
+
505
+ # Run with params:
506
+ imperium-crawl run-skill my-search --params '{"query": "machine learning"}'
507
+ ```
508
+
509
+ ### Skill Chains
510
+
511
+ Chain skills together — output of one step becomes input to the next:
512
+
513
+ ```json
514
+ {
515
+ "type": "chain",
516
+ "name": "search-and-extract",
517
+ "steps": [
518
+ { "skill": "search-results", "output": "search" },
519
+ { "skill": "extract-details", "input": { "url": "$search.results[0].url" }, "output": "details" }
520
+ ]
521
+ }
522
+ ```
523
+
524
+ Variable syntax: `$step_name.field.nested[0]` — simple dot-path access, no eval.
525
+
392
526
  ### Built-in Recipes
393
527
 
394
528
  | Recipe | What It Does |
@@ -435,7 +569,7 @@ Turn any website into an API. No documentation needed.
435
569
 
436
570
  ## AI Agent Guide
437
571
 
438
- imperium-crawl ships with [`SKILL/`](./SKILL/) — a structured guide that teaches AI agents how to use all 28 tools effectively. Includes proven workflows, decision trees, error recovery, and advanced patterns.
572
+ imperium-crawl ships with [`SKILL/`](./SKILL/) — a structured guide that teaches AI agents how to use all 39 tools effectively. Includes proven workflows, decision trees, error recovery, and advanced patterns.
439
573
 
440
574
  ### Two Ways to Connect
441
575
 
@@ -496,13 +630,14 @@ Every tool tested against production websites with real anti-bot defenses:
496
630
  | 📋 **list_jobs** | — | Batch jobs with status and progress |
497
631
  | 📊 **job_status** | Batch job | Full per-URL results with timing |
498
632
  | 🗑️ **delete_job** | Completed job | Cleaned up job data from disk |
633
+ | 🧠 **knowledge** | Local knowledge file | Per-domain stats: stealth levels, success rates, anti-bot systems detected |
499
634
  | 🎬 **youtube** | "web scraping tutorial" | Search results, video details, comments, transcripts — no API key |
500
635
  | 💬 **reddit** | r/webscraping | Subreddit posts, comments, search — public JSON API |
501
636
  | 📸 **instagram** | @nike profile | Profile details, engagement rate, recent posts — internal API |
502
637
  | 📥 **download** | YouTube video, web page images | Auto-detect URL type, download media files — images, video, og:image |
503
638
  | 📡 **rss** | Hacker News RSS | Parsed feed items with title, link, date, author, categories |
504
639
 
505
- > **28/28 tools. 34 hidden APIs on Airbnb. Live BTC feed. Zero API keys for scraping.**
640
+ > **39 tools. 34 hidden APIs on Airbnb. Live BTC feed. Reusable browser flows. Zero API keys for scraping.**
506
641
 
507
642
  ---
508
643
 
@@ -535,7 +670,7 @@ cd imperium-crawl
535
670
  npm install
536
671
  npm run build
537
672
  npm run dev # Watch mode (rebuild on changes)
538
- npm test # 466 tests
673
+ npm test # 546 tests
539
674
  npm start # Start CLI (shows help or TUI)
540
675
  ```
541
676
 
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Explore REPL — interactive browser session with live Playwright.
3
+ *
4
+ * Usage: imperium-crawl explore <url>
5
+ *
6
+ * Opens a headed (visible) browser window and gives the user a readline REPL
7
+ * to execute actions interactively. Every successful action is recorded.
8
+ * At any point, run `save-skill <name>` to export the session as a reusable skill.
9
+ *
10
+ * Commands:
11
+ * navigate <url> Navigate to URL
12
+ * click <selector> Click element
13
+ * type <selector> <text> Fill input field
14
+ * select <selector> <value> Select option
15
+ * wait [ms] Wait N ms (default 1000)
16
+ * screenshot [file] Save screenshot
17
+ * snapshot Show ARIA tree + refs
18
+ * evaluate <script> Run JS in page
19
+ * scroll [up|down] [px] Scroll page
20
+ * hover <selector> Hover element
21
+ * press <key> Press keyboard key
22
+ * save-skill <name> Export recording as skill JSON
23
+ * status Show URL, action count
24
+ * history List recorded actions
25
+ * undo Remove last action
26
+ * help Show command list
27
+ * exit / quit Close browser and exit
28
+ */
29
+ export declare function runExplore(startUrl: string, sessionId?: string): Promise<void>;
30
+ //# sourceMappingURL=cli-explore.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli-explore.d.ts","sourceRoot":"","sources":["../src/cli-explore.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAgGH,wBAAsB,UAAU,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CA6RpF"}