webpeel 0.11.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/README.md +82 -9
  2. package/dist/cli.js +138 -6
  3. package/dist/cli.js.map +1 -1
  4. package/dist/core/actions.d.ts +28 -0
  5. package/dist/core/actions.d.ts.map +1 -1
  6. package/dist/core/actions.js +60 -0
  7. package/dist/core/actions.js.map +1 -1
  8. package/dist/core/bm25-filter.d.ts +67 -0
  9. package/dist/core/bm25-filter.d.ts.map +1 -0
  10. package/dist/core/bm25-filter.js +289 -0
  11. package/dist/core/bm25-filter.js.map +1 -0
  12. package/dist/core/chunking.d.ts +43 -0
  13. package/dist/core/chunking.d.ts.map +1 -0
  14. package/dist/core/chunking.js +182 -0
  15. package/dist/core/chunking.js.map +1 -0
  16. package/dist/core/content-pruner.d.ts +40 -0
  17. package/dist/core/content-pruner.d.ts.map +1 -0
  18. package/dist/core/content-pruner.js +306 -0
  19. package/dist/core/content-pruner.js.map +1 -0
  20. package/dist/core/markdown.d.ts +4 -1
  21. package/dist/core/markdown.d.ts.map +1 -1
  22. package/dist/core/markdown.js +11 -2
  23. package/dist/core/markdown.js.map +1 -1
  24. package/dist/core/research.d.ts +67 -0
  25. package/dist/core/research.d.ts.map +1 -0
  26. package/dist/core/research.js +254 -0
  27. package/dist/core/research.js.map +1 -0
  28. package/dist/index.d.ts.map +1 -1
  29. package/dist/index.js +52 -4
  30. package/dist/index.js.map +1 -1
  31. package/dist/mcp/server.js +107 -2
  32. package/dist/mcp/server.js.map +1 -1
  33. package/dist/server/app.d.ts +14 -0
  34. package/dist/server/app.d.ts.map +1 -0
  35. package/dist/server/app.js +189 -0
  36. package/dist/server/app.js.map +1 -0
  37. package/dist/server/auth-store.d.ts +28 -0
  38. package/dist/server/auth-store.d.ts.map +1 -0
  39. package/dist/server/auth-store.js +89 -0
  40. package/dist/server/auth-store.js.map +1 -0
  41. package/dist/server/job-queue.d.ts +93 -0
  42. package/dist/server/job-queue.d.ts.map +1 -0
  43. package/dist/server/job-queue.js +144 -0
  44. package/dist/server/job-queue.js.map +1 -0
  45. package/dist/server/middleware/auth.d.ts +24 -0
  46. package/dist/server/middleware/auth.d.ts.map +1 -0
  47. package/dist/server/middleware/auth.js +152 -0
  48. package/dist/server/middleware/auth.js.map +1 -0
  49. package/dist/server/middleware/rate-limit.d.ts +23 -0
  50. package/dist/server/middleware/rate-limit.d.ts.map +1 -0
  51. package/dist/server/middleware/rate-limit.js +126 -0
  52. package/dist/server/middleware/rate-limit.js.map +1 -0
  53. package/dist/server/middleware/url-validator.d.ts +16 -0
  54. package/dist/server/middleware/url-validator.d.ts.map +1 -0
  55. package/dist/server/middleware/url-validator.js +187 -0
  56. package/dist/server/middleware/url-validator.js.map +1 -0
  57. package/dist/server/pg-auth-store.d.ts +129 -0
  58. package/dist/server/pg-auth-store.d.ts.map +1 -0
  59. package/dist/server/pg-auth-store.js +457 -0
  60. package/dist/server/pg-auth-store.js.map +1 -0
  61. package/dist/server/pg-job-queue.d.ts +60 -0
  62. package/dist/server/pg-job-queue.d.ts.map +1 -0
  63. package/dist/server/pg-job-queue.js +365 -0
  64. package/dist/server/pg-job-queue.js.map +1 -0
  65. package/dist/server/premium/domain-intel.d.ts +17 -0
  66. package/dist/server/premium/domain-intel.d.ts.map +1 -0
  67. package/dist/server/premium/domain-intel.js +134 -0
  68. package/dist/server/premium/domain-intel.js.map +1 -0
  69. package/dist/server/premium/index.d.ts +18 -0
  70. package/dist/server/premium/index.d.ts.map +1 -0
  71. package/dist/server/premium/index.js +36 -0
  72. package/dist/server/premium/index.js.map +1 -0
  73. package/dist/server/premium/swr-cache.d.ts +15 -0
  74. package/dist/server/premium/swr-cache.d.ts.map +1 -0
  75. package/dist/server/premium/swr-cache.js +35 -0
  76. package/dist/server/premium/swr-cache.js.map +1 -0
  77. package/dist/server/routes/activity.d.ts +7 -0
  78. package/dist/server/routes/activity.d.ts.map +1 -0
  79. package/dist/server/routes/activity.js +66 -0
  80. package/dist/server/routes/activity.js.map +1 -0
  81. package/dist/server/routes/agent.d.ts +12 -0
  82. package/dist/server/routes/agent.d.ts.map +1 -0
  83. package/dist/server/routes/agent.js +356 -0
  84. package/dist/server/routes/agent.js.map +1 -0
  85. package/dist/server/routes/answer.d.ts +6 -0
  86. package/dist/server/routes/answer.d.ts.map +1 -0
  87. package/dist/server/routes/answer.js +124 -0
  88. package/dist/server/routes/answer.js.map +1 -0
  89. package/dist/server/routes/batch.d.ts +7 -0
  90. package/dist/server/routes/batch.d.ts.map +1 -0
  91. package/dist/server/routes/batch.js +287 -0
  92. package/dist/server/routes/batch.js.map +1 -0
  93. package/dist/server/routes/cli-usage.d.ts +7 -0
  94. package/dist/server/routes/cli-usage.d.ts.map +1 -0
  95. package/dist/server/routes/cli-usage.js +121 -0
  96. package/dist/server/routes/cli-usage.js.map +1 -0
  97. package/dist/server/routes/compat.d.ts +24 -0
  98. package/dist/server/routes/compat.d.ts.map +1 -0
  99. package/dist/server/routes/compat.js +651 -0
  100. package/dist/server/routes/compat.js.map +1 -0
  101. package/dist/server/routes/extract.d.ts +9 -0
  102. package/dist/server/routes/extract.d.ts.map +1 -0
  103. package/dist/server/routes/extract.js +121 -0
  104. package/dist/server/routes/extract.js.map +1 -0
  105. package/dist/server/routes/fetch.d.ts +7 -0
  106. package/dist/server/routes/fetch.d.ts.map +1 -0
  107. package/dist/server/routes/fetch.js +537 -0
  108. package/dist/server/routes/fetch.js.map +1 -0
  109. package/dist/server/routes/health.d.ts +8 -0
  110. package/dist/server/routes/health.d.ts.map +1 -0
  111. package/dist/server/routes/health.js +36 -0
  112. package/dist/server/routes/health.js.map +1 -0
  113. package/dist/server/routes/jobs.d.ts +8 -0
  114. package/dist/server/routes/jobs.d.ts.map +1 -0
  115. package/dist/server/routes/jobs.js +374 -0
  116. package/dist/server/routes/jobs.js.map +1 -0
  117. package/dist/server/routes/mcp.d.ts +16 -0
  118. package/dist/server/routes/mcp.d.ts.map +1 -0
  119. package/dist/server/routes/mcp.js +475 -0
  120. package/dist/server/routes/mcp.js.map +1 -0
  121. package/dist/server/routes/oauth.d.ts +10 -0
  122. package/dist/server/routes/oauth.d.ts.map +1 -0
  123. package/dist/server/routes/oauth.js +296 -0
  124. package/dist/server/routes/oauth.js.map +1 -0
  125. package/dist/server/routes/screenshot.d.ts +10 -0
  126. package/dist/server/routes/screenshot.d.ts.map +1 -0
  127. package/dist/server/routes/screenshot.js +217 -0
  128. package/dist/server/routes/screenshot.js.map +1 -0
  129. package/dist/server/routes/search.d.ts +7 -0
  130. package/dist/server/routes/search.d.ts.map +1 -0
  131. package/dist/server/routes/search.js +287 -0
  132. package/dist/server/routes/search.js.map +1 -0
  133. package/dist/server/routes/stats.d.ts +7 -0
  134. package/dist/server/routes/stats.d.ts.map +1 -0
  135. package/dist/server/routes/stats.js +65 -0
  136. package/dist/server/routes/stats.js.map +1 -0
  137. package/dist/server/routes/stripe.d.ts +9 -0
  138. package/dist/server/routes/stripe.d.ts.map +1 -0
  139. package/dist/server/routes/stripe.js +233 -0
  140. package/dist/server/routes/stripe.js.map +1 -0
  141. package/dist/server/routes/users.d.ts +9 -0
  142. package/dist/server/routes/users.d.ts.map +1 -0
  143. package/dist/server/routes/users.js +954 -0
  144. package/dist/server/routes/users.js.map +1 -0
  145. package/dist/server/routes/webhooks.d.ts +15 -0
  146. package/dist/server/routes/webhooks.d.ts.map +1 -0
  147. package/dist/server/routes/webhooks.js +73 -0
  148. package/dist/server/routes/webhooks.js.map +1 -0
  149. package/dist/server/sentry.d.ts +14 -0
  150. package/dist/server/sentry.d.ts.map +1 -0
  151. package/dist/server/sentry.js +39 -0
  152. package/dist/server/sentry.js.map +1 -0
  153. package/dist/types.d.ts +22 -0
  154. package/dist/types.d.ts.map +1 -1
  155. package/dist/types.js.map +1 -1
  156. package/package.json +3 -2
package/README.md CHANGED
@@ -87,12 +87,16 @@ First 25 fetches work instantly, no signup. After that, [sign up free](https://a
87
87
  | **Stealth mode** | ✅ v2, all plans | ✅ | ⚠️ Limited | ❌ |
88
88
  | **Browser profiles** | ✅ Persistent sessions | ❌ | ❌ | ❌ |
89
89
  | **Hotel search** | ✅ Multi-source parallel | ❌ | ❌ | ❌ |
90
- | **CSS schema extraction** | ✅ 6 bundled + auto-detect | ❌ | ❌ | ❌ |
90
+ | **CSS schema extraction** | ✅ 7 bundled + auto-detect | ❌ | ❌ | ❌ |
91
91
  | **LLM extraction** | ✅ BYOK, cost tracking | ⚠️ Cloud only | ❌ | ❌ |
92
92
  | **Firecrawl-compatible** | ✅ Drop-in replacement | ✅ Native | ❌ | ❌ |
93
93
  | **Self-hosting** | ✅ Docker compose | ⚠️ Complex | ❌ | N/A |
94
94
  | **Autonomous agent** | ✅ BYOK any LLM | ⚠️ Locked | ❌ | ❌ |
95
- | **MCP tools** | ✅ 11 tools | 3 | 0 | 1 |
95
+ | **Deep research** | ✅ Multi-source + BM25 | ⚠️ Cloud only | | |
96
+ | **Content pruning** | ✅ 2-pass, 15-33% savings | ❌ | ❌ | ❌ |
97
+ | **BM25 filtering** | ✅ Query-focused | ❌ | ❌ | ❌ |
98
+ | **Python SDK** | ✅ `pip install` | ✅ | ❌ | ❌ |
99
+ | **MCP tools** | ✅ 13 tools | ~6 | 0 | 1 |
96
100
  | **License** | ✅ AGPL-3.0 | AGPL-3.0 | Proprietary | MIT |
97
101
  | **Pricing** | **Free / $9 / $29** | $0 / $16 / $83 | Custom | Free |
98
102
 
@@ -179,7 +183,27 @@ Zero dependencies. Pure Python 3.8+. [Full SDK docs →](python-sdk/README.md)
179
183
 
180
184
  > **Where to add this config:** Claude Desktop → `~/Library/Application Support/Claude/claude_desktop_config.json` · Cursor → Settings → MCP Servers · VS Code → `~/.vscode/mcp.json` · Windsurf → `~/.codeium/windsurf/mcp_config.json`
181
185
 
182
- ### Docker (Self-Hosted)
186
+ ### Docker
187
+
188
+ **MCP Server (stdio — for Claude Desktop, Cursor, Windsurf):**
189
+
190
+ ```bash
191
+ docker run -i webpeel/mcp
192
+ ```
193
+
194
+ **MCP Server (HTTP Streamable transport):**
195
+
196
+ ```bash
197
+ docker run -e MCP_HTTP_MODE=true -p 3100:3100 webpeel/mcp
198
+ ```
199
+
200
+ **API Server (Firecrawl-compatible REST API):**
201
+
202
+ ```bash
203
+ docker run -p 3000:3000 webpeel/api
204
+ ```
205
+
206
+ **Self-Hosted (full stack with database):**
183
207
 
184
208
  ```bash
185
209
  git clone https://github.com/webpeel/webpeel.git
@@ -188,6 +212,19 @@ cd webpeel && docker compose up
188
212
 
189
213
  Full API at `http://localhost:3000`. AGPL-3.0 licensed. [Commercial licensing available](mailto:support@webpeel.dev).
190
214
 
215
+ **MCP config for Docker:**
216
+
217
+ ```json
218
+ {
219
+ "mcpServers": {
220
+ "webpeel": {
221
+ "command": "docker",
222
+ "args": ["run", "-i", "--rm", "webpeel/mcp"]
223
+ }
224
+ }
225
+ }
226
+ ```
227
+
191
228
  ## Features
192
229
 
193
230
  ### 🎯 Smart Escalation
@@ -216,6 +253,36 @@ npx webpeel crawl https://docs.example.com --max-pages 100
216
253
  npx webpeel map https://example.com --max-urls 5000
217
254
  ```
218
255
 
256
+ ### 🔬 Deep Research
257
+
258
+ Multi-source research with BM25 relevance ranking. No API key needed for sources mode.
259
+
260
+ ```bash
261
+ # Get ranked sources with relevance scores
262
+ npx webpeel research "best web scraping tools 2025" --max-sources 5
263
+
264
+ # Full synthesis with LLM (BYOK)
265
+ npx webpeel research "compare Firecrawl vs Crawl4AI" --llm-key sk-...
266
+ ```
267
+
268
+ ### 🧹 Token Efficiency
269
+
270
+ Save 15-77% on AI tokens automatically.
271
+
272
+ ```bash
273
+ # Content pruning (default ON — strips nav/footer/sidebar)
274
+ npx webpeel https://en.wikipedia.org/wiki/Web_scraping
275
+
276
+ # Query-focused filtering (BM25)
277
+ npx webpeel https://en.wikipedia.org/wiki/Web_scraping --focus "legal issues"
278
+
279
+ # Token budget (hard cap)
280
+ npx webpeel https://en.wikipedia.org/wiki/Web_scraping --budget 3000
281
+
282
+ # Combined: prune → focus → budget = 77% savings
283
+ npx webpeel https://en.wikipedia.org/wiki/Web_scraping --focus "legal" --budget 3000
284
+ ```
285
+
219
286
  ### 🤖 Autonomous Agent (BYOK)
220
287
 
221
288
  Give it a prompt, it researches the web using your own LLM key.
@@ -228,24 +295,30 @@ npx webpeel agent "Compare pricing of Notion vs Coda" --llm-key sk-...
228
295
 
229
296
  | Feature | CLI | Node.js | Python | API |
230
297
  |---------|:---:|:-------:|:------:|:---:|
298
+ | Web scraping | ✅ | ✅ | ✅ | ✅ |
299
+ | Deep research | ✅ | ✅ | ✅ | ✅ |
300
+ | Content pruning | ✅ | ✅ | ✅ | ✅ |
301
+ | BM25 query filtering | ✅ | ✅ | — | ✅ |
231
302
  | Structured extraction | ✅ | ✅ | ✅ | ✅ |
232
303
  | CSS schema extraction | ✅ | ✅ | — | ✅ |
233
- | LLM extraction (BYOK) | ✅ | ✅ | | ✅ |
304
+ | LLM extraction (BYOK) | ✅ | ✅ | | ✅ |
305
+ | Page actions | ✅ | ✅ | ✅ | ✅ |
234
306
  | Browser profiles | ✅ | ✅ | — | — |
307
+ | Screenshots | ✅ | ✅ | ✅ | ✅ |
308
+ | Crawling | ✅ | ✅ | ✅ | ✅ |
309
+ | Batch fetching | ✅ | ✅ | ✅ | ✅ |
235
310
  | Hotel search | ✅ | — | — | — |
236
- | Screenshots | ✅ | ✅ | | ✅ |
311
+ | Token budget | ✅ | ✅ | | ✅ |
312
+ | Smart chunking | ✅ | ✅ | — | — |
237
313
  | Branding extraction | ✅ | ✅ | — | — |
238
314
  | Change tracking | ✅ | ✅ | — | — |
239
- | Token budget | ✅ | ✅ | ✅ | ✅ |
240
- | Tag filtering | ✅ | ✅ | ✅ | ✅ |
241
- | Image extraction | ✅ | ✅ | — | ✅ |
242
315
  | AI summarization | ✅ | ✅ | — | ✅ |
243
316
  | Batch processing | — | ✅ | — | ✅ |
244
317
  | PDF extraction | ✅ | ✅ | — | — |
245
318
 
246
319
  ## Integrations
247
320
 
248
- Works with **LangChain**, **LlamaIndex**, **CrewAI**, **Dify**, and **n8n**. [Integration docs →](https://webpeel.dev/docs)
321
+ Works with **CrewAI**, **Dify**, and **n8n** via the Firecrawl-compatible API. LangChain & LlamaIndex integrations coming soon. [Integration docs →](https://webpeel.dev/docs)
249
322
 
250
323
  ## Hosted API
251
324
 
package/dist/cli.js CHANGED
@@ -146,6 +146,11 @@ program
146
146
  .option('--include-tags <tags>', 'Comma-separated HTML tags/selectors to include (e.g., "main,article,.content")')
147
147
  .option('--exclude-tags <tags>', 'Comma-separated HTML tags/selectors to exclude (e.g., "nav,footer,aside")')
148
148
  .option('--only-main-content', 'Shortcut for --include-tags main,article')
149
+ .option('--full-content', 'Return full page content (disable automatic content density pruning)')
150
+ .option('--focus <query>', 'Query-focused filtering — only return content relevant to this query (BM25 ranking)')
151
+ .option('--chunk <size>', 'Split content into N-token chunks for LLM processing (default strategy: semantic)', parseInt)
152
+ .option('--chunk-overlap <tokens>', 'Overlap tokens between chunks (default: 200)', parseInt)
153
+ .option('--chunk-strategy <strategy>', 'Chunking strategy: fixed, semantic (default), paragraph')
149
154
  .option('-H, --header <header...>', 'Custom headers (e.g., "Authorization: Bearer token")')
150
155
  .option('--cookie <cookie...>', 'Cookies to set (e.g., "session=abc123")')
151
156
  .option('--cache <ttl>', 'Cache results locally (e.g., "5m", "1h", "1d") — default: 5m')
@@ -169,7 +174,8 @@ program
169
174
  .option('--extract-all', 'Auto-detect and extract repeated listing items (e.g., search results)')
170
175
  .option('--schema <name>', 'Force a specific extraction schema by name or domain (e.g., "booking.com", "amazon")')
171
176
  .option('--list-schemas', 'List all available extraction schemas and their supported domains')
172
- .option('--scroll-extract [count]', 'Scroll page N times to load lazy content, then extract (implies --render)', (v) => parseInt(v, 10))
177
+ .option('--scroll-extract [count]', 'Scroll page N times to load lazy content (bare flag = smart auto-scroll until stable), then extract (implies --render)', (v) => parseInt(v, 10))
178
+ .option('--scroll-extract-timeout <ms>', 'Total timeout in ms for auto-scroll (default: 30000, only used with bare --scroll-extract)', parseInt)
173
179
  .option('--csv', 'Output extraction results as CSV')
174
180
  .option('--table', 'Output extraction results as a formatted table')
175
181
  .option('--pages <n>', 'Follow pagination "Next" links for N pages (max 10)', (v) => parseInt(v, 10))
@@ -453,11 +459,16 @@ program
453
459
  // --stealth auto-enables --render (stealth requires browser)
454
460
  // --action auto-enables --render (actions require browser)
455
461
  // --scroll-extract implies --render (needs browser)
456
- const scrollExtractCount = options.scrollExtract !== undefined
457
- ? (typeof options.scrollExtract === 'number' ? options.scrollExtract : 3)
458
- : 0;
459
- const useRender = options.render || options.stealth || (actions && actions.length > 0) || scrollExtractCount > 0 || false;
460
- // Inject scroll actions when --scroll-extract is used
462
+ //
463
+ // Bare --scroll-extract (no number) smart autoScroll (detects stable height)
464
+ // --scroll-extract N (with number) → legacy fixed N scrolls via actions
465
+ const scrollExtractRaw = options.scrollExtract;
466
+ const isAutoScroll = scrollExtractRaw !== undefined && typeof scrollExtractRaw !== 'number';
467
+ const scrollExtractCount = isAutoScroll
468
+ ? 0
469
+ : (scrollExtractRaw !== undefined ? scrollExtractRaw : 0);
470
+ const useRender = options.render || options.stealth || (actions && actions.length > 0) || scrollExtractCount > 0 || isAutoScroll || false;
471
+ // Inject scroll actions when --scroll-extract N (fixed count) is used
461
472
  if (scrollExtractCount > 0) {
462
473
  const scrollActions = [];
463
474
  for (let i = 0; i < scrollExtractCount; i++) {
@@ -493,6 +504,11 @@ program
493
504
  headed: options.headed || false,
494
505
  storageState: resolvedStorageState,
495
506
  proxy: options.proxy,
507
+ fullPage: options.fullContent || false,
508
+ // Smart auto-scroll (bare --scroll-extract flag)
509
+ autoScroll: isAutoScroll
510
+ ? { timeout: options.scrollExtractTimeout }
511
+ : undefined,
496
512
  };
497
513
  // Add summary option if requested
498
514
  if (options.summary) {
@@ -569,6 +585,41 @@ program
569
585
  result.tokens = estimateTokens(distilled);
570
586
  }
571
587
  }
588
+ // --- BM25 Query-Focused Filtering ---
589
+ if (options.focus && result.content) {
590
+ const { filterByRelevance } = await import('./core/bm25-filter.js');
591
+ const focusResult = filterByRelevance(result.content, { query: options.focus });
592
+ result.content = focusResult.content;
593
+ result.tokens = estimateTokens(focusResult.content);
594
+ if (isJson) {
595
+ result.focusQuery = options.focus;
596
+ result.focusReduction = focusResult.reductionPercent;
597
+ }
598
+ }
599
+ // --- Smart Chunking ---
600
+ if (options.chunk && options.chunk > 0 && result.content) {
601
+ const { chunkContent } = await import('./core/chunking.js');
602
+ const chunkResult = chunkContent(result.content, {
603
+ chunkSize: options.chunk,
604
+ overlap: options.chunkOverlap || 200,
605
+ strategy: options.chunkStrategy || 'semantic',
606
+ });
607
+ // Replace content with chunked output
608
+ if (isJson) {
609
+ result.chunks = chunkResult.chunks;
610
+ result.totalChunks = chunkResult.totalChunks;
611
+ result.originalTokens = chunkResult.originalTokens;
612
+ // Keep content as first chunk for non-JSON fallback
613
+ result.content = chunkResult.chunks[0]?.content || '';
614
+ result.tokens = chunkResult.chunks[0]?.tokens || 0;
615
+ }
616
+ else {
617
+ // Plain text mode: output chunks separated by markers
618
+ const chunkOutput = chunkResult.chunks.map((c, i) => `--- Chunk ${i + 1}/${chunkResult.totalChunks} (${c.tokens} tokens) ---\n${c.content}`).join('\n\n');
619
+ result.content = chunkOutput;
620
+ result.tokens = chunkResult.totalTokens;
621
+ }
622
+ }
572
623
  // --- #4: Content quality warning ---
573
624
  const isHtmlContent = result.contentType ? result.contentType.toLowerCase().includes('html') : true;
574
625
  const isRedirect = false; // peel() follows redirects — final result is always 200
@@ -3118,6 +3169,87 @@ program
3118
3169
  process.exit(1);
3119
3170
  }
3120
3171
  });
3172
+ // ============================================================
3173
+ // research command — autonomous multi-step web research
3174
+ // ============================================================
3175
+ program
3176
+ .command('research <query>')
3177
+ .description('Conduct autonomous multi-step web research on a topic and synthesize a report')
3178
+ .option('--max-sources <n>', 'Maximum sources to consult (default: 5)', '5')
3179
+ .option('--max-depth <n>', 'Link-following depth (default: 1)', '1')
3180
+ .option('--format <f>', 'Output format: report (default) or sources', 'report')
3181
+ .option('--llm-key <key>', 'LLM API key for synthesis (or env OPENAI_API_KEY)')
3182
+ .option('--llm-model <model>', 'LLM model for synthesis (default: gpt-4o-mini)')
3183
+ .option('--llm-base-url <url>', 'LLM API base URL (default: https://api.openai.com/v1)')
3184
+ .option('--timeout <ms>', 'Max research time in ms (default: 60000)', '60000')
3185
+ .option('--json', 'Output result as JSON')
3186
+ .option('-s, --silent', 'Suppress progress output')
3187
+ .action(async (query, options) => {
3188
+ const isSilent = !!options.silent;
3189
+ const isJson = !!options.json;
3190
+ const maxSources = parseInt(options.maxSources) || 5;
3191
+ const maxDepth = parseInt(options.maxDepth) || 1;
3192
+ const timeout = parseInt(options.timeout) || 60000;
3193
+ const outputFormat = options.format === 'sources' ? 'sources' : 'report';
3194
+ const apiKey = options.llmKey || process.env.OPENAI_API_KEY;
3195
+ const model = options.llmModel;
3196
+ const baseUrl = options.llmBaseUrl;
3197
+ const phaseIcons = {
3198
+ searching: '🔍',
3199
+ fetching: '📄',
3200
+ extracting: '🧠',
3201
+ following: '🔗',
3202
+ synthesizing: '✍️',
3203
+ };
3204
+ try {
3205
+ const { research } = await import('./core/research.js');
3206
+ const result = await research({
3207
+ query,
3208
+ maxSources,
3209
+ maxDepth,
3210
+ timeout,
3211
+ outputFormat: outputFormat,
3212
+ apiKey,
3213
+ model,
3214
+ baseUrl,
3215
+ onProgress: (step) => {
3216
+ if (!isSilent && !isJson) {
3217
+ const icon = phaseIcons[step.phase] ?? '⚙️';
3218
+ const extra = step.sourcesFound !== undefined
3219
+ ? ` (found ${step.sourcesFound})`
3220
+ : step.sourcesFetched !== undefined
3221
+ ? ` (${step.sourcesFetched} fetched)`
3222
+ : '';
3223
+ process.stderr.write(`${icon} ${step.message}${extra}...\n`);
3224
+ }
3225
+ },
3226
+ });
3227
+ if (isJson) {
3228
+ await writeStdout(JSON.stringify(result, null, 2) + '\n');
3229
+ }
3230
+ else {
3231
+ await writeStdout(result.report + '\n');
3232
+ if (!isSilent) {
3233
+ const elapsed = (result.elapsed / 1000).toFixed(1);
3234
+ const cost = result.cost !== undefined ? ` | cost: $${result.cost.toFixed(4)}` : '';
3235
+ process.stderr.write(`\n📊 ${result.sourcesConsulted} sources consulted (${result.totalSourcesFound} found) | ${elapsed}s${cost}\n`);
3236
+ }
3237
+ }
3238
+ await cleanup();
3239
+ process.exit(0);
3240
+ }
3241
+ catch (error) {
3242
+ const msg = error instanceof Error ? error.message : 'Unknown error';
3243
+ if (isJson) {
3244
+ await writeStdout(JSON.stringify({ error: msg, code: 'RESEARCH_FAILED' }) + '\n');
3245
+ }
3246
+ else {
3247
+ console.error(`\nError: ${msg}`);
3248
+ }
3249
+ await cleanup();
3250
+ process.exit(1);
3251
+ }
3252
+ });
3121
3253
  program.parse();
3122
3254
  // ============================================================
3123
3255
  // Time formatting helper