webpeel 0.21.88 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/dist/cli/commands/fetch.js +55 -1
  2. package/dist/cli/commands/guide.d.ts +2 -0
  3. package/dist/cli/commands/guide.js +183 -0
  4. package/dist/cli/commands/screenshot.js +10 -1
  5. package/dist/cli/commands/search.js +52 -0
  6. package/dist/cli/utils.js +6 -0
  7. package/dist/cli.js +2 -0
  8. package/dist/core/actions.d.ts +21 -3
  9. package/dist/core/actions.js +198 -11
  10. package/dist/core/auth-detection.d.ts +35 -0
  11. package/dist/core/auth-detection.js +358 -0
  12. package/dist/core/browser-fetch.d.ts +2 -0
  13. package/dist/core/browser-fetch.js +106 -42
  14. package/dist/core/browser-pool.d.ts +21 -1
  15. package/dist/core/browser-pool.js +92 -8
  16. package/dist/core/business-intel.d.ts +47 -0
  17. package/dist/core/business-intel.js +279 -0
  18. package/dist/core/circuit-breaker.d.ts +44 -0
  19. package/dist/core/circuit-breaker.js +85 -0
  20. package/dist/core/cross-verify.d.ts +27 -0
  21. package/dist/core/cross-verify.js +93 -0
  22. package/dist/core/google-serp-parser.d.ts +82 -0
  23. package/dist/core/google-serp-parser.js +287 -0
  24. package/dist/core/http-fetch.d.ts +4 -1
  25. package/dist/core/http-fetch.js +40 -10
  26. package/dist/core/language-detect.d.ts +18 -0
  27. package/dist/core/language-detect.js +135 -0
  28. package/dist/core/local-search.d.ts +60 -0
  29. package/dist/core/local-search.js +308 -0
  30. package/dist/core/ocr.d.ts +12 -0
  31. package/dist/core/ocr.js +33 -0
  32. package/dist/core/pipeline.d.ts +3 -1
  33. package/dist/core/pipeline.js +91 -12
  34. package/dist/core/proxy-config.d.ts +36 -1
  35. package/dist/core/proxy-config.js +93 -1
  36. package/dist/core/safe-browsing.d.ts +9 -1
  37. package/dist/core/safe-browsing.js +38 -15
  38. package/dist/core/search-engines.d.ts +25 -0
  39. package/dist/core/search-engines.js +182 -0
  40. package/dist/core/search-provider.d.ts +12 -1
  41. package/dist/core/search-provider.js +39 -17
  42. package/dist/core/strategies.d.ts +8 -0
  43. package/dist/core/strategies.js +54 -9
  44. package/dist/core/threat-feeds.d.ts +23 -0
  45. package/dist/core/threat-feeds.js +104 -0
  46. package/dist/core/vertical-search.d.ts +53 -0
  47. package/dist/core/vertical-search.js +231 -0
  48. package/dist/ee/extractors/espn.js +7 -2
  49. package/dist/ee/extractors/kalshi.js +7 -1
  50. package/dist/ee/extractors/polymarket.js +285 -89
  51. package/dist/ee/extractors/reddit.js +131 -1
  52. package/dist/ee/extractors/tradingview.js +7 -1
  53. package/dist/index.d.ts +11 -0
  54. package/dist/index.js +55 -0
  55. package/dist/mcp/handlers/definitions.js +61 -2
  56. package/dist/mcp/handlers/extract.js +2 -0
  57. package/dist/mcp/handlers/meta.js +9 -0
  58. package/dist/mcp/handlers/read.js +15 -0
  59. package/dist/server/app.js +31 -1
  60. package/dist/server/routes/cache-warm.js +4 -2
  61. package/dist/server/routes/fetch.js +21 -4
  62. package/dist/server/routes/health.js +23 -0
  63. package/dist/server/routes/search.js +265 -4
  64. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  65. package/dist/server/routes/smart-search/handlers/cars.js +99 -0
  66. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  67. package/dist/server/routes/smart-search/handlers/flights.js +69 -0
  68. package/dist/server/routes/smart-search/handlers/general.d.ts +2 -0
  69. package/dist/server/routes/smart-search/handlers/general.js +390 -0
  70. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  71. package/dist/server/routes/smart-search/handlers/hotels.js +85 -0
  72. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  73. package/dist/server/routes/smart-search/handlers/products.js +213 -0
  74. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  75. package/dist/server/routes/smart-search/handlers/rental.js +151 -0
  76. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  77. package/dist/server/routes/smart-search/handlers/restaurants.js +205 -0
  78. package/dist/server/routes/smart-search/index.d.ts +19 -0
  79. package/dist/server/routes/smart-search/index.js +508 -0
  80. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  81. package/dist/server/routes/smart-search/intent.js +109 -0
  82. package/dist/server/routes/smart-search/llm.d.ts +8 -0
  83. package/dist/server/routes/smart-search/llm.js +101 -0
  84. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  85. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  86. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  87. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  88. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  89. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  90. package/dist/server/routes/smart-search/types.d.ts +30 -0
  91. package/dist/server/routes/smart-search/types.js +1 -0
  92. package/dist/server/routes/smart-search/utils.d.ts +12 -0
  93. package/dist/server/routes/smart-search/utils.js +97 -0
  94. package/dist/server/routes/smart-search.js +343 -112
  95. package/dist/types.d.ts +34 -3
  96. package/llms.txt +78 -45
  97. package/package.json +2 -1
  98. package/dist/core/challenge-solver.d.ts +0 -72
  99. package/dist/core/challenge-solver.js +0 -720
  100. package/dist/core/domain-extractors-basic.d.ts +0 -36
  101. package/dist/core/domain-extractors-basic.js +0 -28
  102. package/dist/core/domain-extractors-public.d.ts +0 -20
  103. package/dist/core/domain-extractors-public.js +0 -35
  104. package/dist/core/domain-extractors.d.ts +0 -48
  105. package/dist/core/domain-extractors.js +0 -6342
  106. package/dist/ee/extractors/utils.d.ts +0 -12
  107. package/dist/ee/extractors/utils.js +0 -84
  108. package/dist/server/premium/challenge.d.ts +0 -1
  109. package/dist/server/premium/challenge.js +0 -1
  110. package/dist/server/premium/domain-intel.d.ts +0 -16
  111. package/dist/server/premium/domain-intel.js +0 -133
  112. package/dist/server/premium/extractors.d.ts +0 -1
  113. package/dist/server/premium/extractors.js +0 -1
  114. package/dist/server/premium/index.d.ts +0 -20
  115. package/dist/server/premium/index.js +0 -50
  116. package/dist/server/premium/spa-detection.d.ts +0 -2
  117. package/dist/server/premium/spa-detection.js +0 -2
  118. package/dist/server/premium/stability.d.ts +0 -4
  119. package/dist/server/premium/stability.js +0 -29
  120. package/dist/server/premium/swr-cache.d.ts +0 -14
  121. package/dist/server/premium/swr-cache.js +0 -34
@@ -747,7 +747,61 @@ export async function runFetch(url, options) {
747
747
  }
748
748
  }
749
749
  if (!options.silent && !options.json && result.tokens && result.tokens < 50 && !options.render) {
750
- console.error(`\x1b[33m💡 Tip: Page returned very little content. Try --render for JavaScript-heavy sites or --stealth if blocked.\x1b[0m`);
750
+ console.error(`\x1b[33m💡 Tip: Very little content extracted. This may be a JavaScript-rendered page.\x1b[0m`);
751
+ console.error(`\x1b[33m Try: webpeel "${url}" --render\x1b[0m`);
752
+ console.error(`\x1b[33m For infinite scroll/SPAs: --action 'scroll:bottom' --action 'wait:2000'\x1b[0m`);
753
+ console.error(`\x1b[33m Or use --stealth if the site blocks bots.\x1b[0m`);
754
+ }
755
+ // Auth wall detection hint
756
+ if (!options.json && result.authRequired) {
757
+ let authHost = url;
758
+ try {
759
+ authHost = new URL(url).hostname.replace('www.', '');
760
+ }
761
+ catch { /* ignore */ }
762
+ console.error('');
763
+ console.error('\x1b[33m🔐 This page requires authentication.\x1b[0m');
764
+ console.error(`\x1b[36m 1. Create a login profile: webpeel profile create ${authHost}\x1b[0m`);
765
+ console.error('\x1b[36m 2. Log in to the site in the browser that opens\x1b[0m');
766
+ console.error('\x1b[36m 3. Press Ctrl+C when done\x1b[0m');
767
+ console.error(`\x1b[36m 4. Re-run with: webpeel "${url}" --profile ${authHost}\x1b[0m`);
768
+ console.error('');
769
+ }
770
+ }
771
+ // Trust & safety warnings — shown prominently in non-JSON mode
772
+ if (!options.silent && !options.json) {
773
+ const trustData = result.trust;
774
+ const sbData = result.safeBrowsing;
775
+ // Unsafe: safe browsing threats detected
776
+ const allThreats = [
777
+ ...(sbData?.threats ?? []),
778
+ ...(trustData?.threatFeeds?.threats ?? []),
779
+ ].filter((t, i, a) => a.indexOf(t) === i);
780
+ if (sbData && !sbData.safe && allThreats.length > 0) {
781
+ console.error(`\x1b[31m🚨 UNSAFE — Threats detected: ${allThreats.join(', ')}\x1b[0m`);
782
+ }
783
+ else if (trustData?.threatFeeds && !trustData.threatFeeds.safe && trustData.threatFeeds.threats.length > 0) {
784
+ console.error(`\x1b[31m🚨 UNSAFE — Threat feeds flagged: ${trustData.threatFeeds.threats.join(', ')}\x1b[0m`);
785
+ if (trustData.threatFeeds.details) {
786
+ console.error(`\x1b[31m ${trustData.threatFeeds.details}\x1b[0m`);
787
+ }
788
+ }
789
+ else if (trustData && trustData.score < 0.5) {
790
+ // Low trust score
791
+ const tier = trustData.source?.tier ?? 'unknown';
792
+ const label = trustData.source?.label ?? '';
793
+ const reason = tier === 'suspicious'
794
+ ? 'Domain shows suspicious signals'
795
+ : tier === 'new'
796
+ ? 'Domain has limited verifiable presence'
797
+ : label || 'Low credibility domain';
798
+ console.error(`\x1b[33m⚠️ Low trust score (${trustData.score.toFixed(2)}) — ${reason}\x1b[0m`);
799
+ }
800
+ // Show any trust warnings
801
+ if (trustData?.warnings && trustData.warnings.length > 0) {
802
+ for (const warn of trustData.warnings) {
803
+ console.error(`\x1b[33m⚠️ ${warn}\x1b[0m`);
804
+ }
751
805
  }
752
806
  }
753
807
  // Show metadata header
@@ -0,0 +1,2 @@
1
+ import type { Command } from 'commander';
2
+ export declare function registerGuideCommand(program: Command): void;
@@ -0,0 +1,183 @@
1
+ const GUIDE = `
2
+ # WebPeel — AI Usage Guide
3
+
4
+ WebPeel is a fast web fetcher built for AI agents. It handles JS rendering, Cloudflare protection,
5
+ and 55+ domain-specific extractors automatically. Run any webpeel command with --help for options.
6
+
7
+ ## Quick Decision Tree
8
+
9
+ - Static page → webpeel <url>
10
+ - JavaScript SPA (React/Vue/Angular) → webpeel <url> --render
11
+ - Bot-protected site (Cloudflare) → webpeel <url> --stealth
12
+ - Infinite scroll / lazy content → webpeel <url> --render --action 'scroll:bottom' --action 'wait:2000'
13
+ - Need to interact (click, type) → webpeel <url> --render --action 'click:.button' --action 'wait:1000'
14
+ - Screenshot → webpeel screenshot <url>
15
+ - Search the web → webpeel search "query"
16
+ - YouTube transcript → webpeel <youtube-url>
17
+ - PDF content → webpeel <pdf-url>
18
+ - Structured data → webpeel <url> --schema product --json
19
+ - Monitor for changes → webpeel watch <url>
20
+
21
+ ## When to Use --render
22
+
23
+ Use for ANY site that:
24
+ - Shows blank/minimal content without JavaScript
25
+ - Is a Single Page Application (React, Vue, Angular, Svelte, Next.js)
26
+ - Returns less than 50 tokens of content
27
+ - Has dynamic/interactive elements
28
+
29
+ Known SPA sites (auto-detected): Google, Airbnb, Booking.com, Expedia, Indeed, Zillow, Polymarket, and more.
30
+ For sites NOT in the auto-list, add --render manually.
31
+
32
+ Tip: If content looks sparse or empty, ALWAYS retry with --render before concluding the page has no content.
33
+
34
+ ## Browser Actions (--action flag)
35
+
36
+ Actions require --render (auto-enabled when you pass --action). Chain multiple actions with repeated --action flags.
37
+
38
+ Available Actions:
39
+ scroll:bottom — scroll to page bottom (for infinite scroll / lazy-loaded content)
40
+ scroll:top — scroll to top
41
+ scroll:down:500 — scroll down 500px
42
+ scroll:0,1500 — scroll to exact coordinates (x,y)
43
+ wait:2000 — wait 2000ms (useful after navigation or clicks)
44
+ click:.selector — click a CSS element
45
+ type:#input:hello — type text into an input field
46
+ waitFor:.selector — wait for a CSS element to appear in the DOM
47
+ hover:.element — hover over an element (for dropdown menus, tooltips)
48
+
49
+ Common Patterns:
50
+
51
+ # Load all lazy content / infinite scroll
52
+ webpeel <url> --render --action 'scroll:bottom' --action 'wait:2000'
53
+
54
+ # Click "Load More" button then extract
55
+ webpeel <url> --render --action 'click:.load-more' --action 'wait:1000'
56
+
57
+ # Fill a search form and submit
58
+ webpeel <url> --render --action 'type:#search:query' --action 'click:.submit' --action 'wait:2000'
59
+
60
+ # Wait for dynamic content to appear
61
+ webpeel <url> --render --action 'waitFor:.results-list' --action 'wait:500'
62
+
63
+ ## Stealth Mode (--stealth)
64
+
65
+ Use when:
66
+ - Site returns a Cloudflare challenge page
67
+ - Site blocks bots with fingerprinting or rate limiting
68
+ - Normal --render fails with access denied / 403
69
+
70
+ webpeel <url> --stealth
71
+
72
+ Stealth mode auto-enables --render.
73
+
74
+ ## Authentication (Login-Protected Pages)
75
+
76
+ Some pages require you to be logged in (e.g. dashboards, profiles, activity feeds).
77
+ WebPeel detects auth walls automatically and tells you what to do.
78
+
79
+ To access login-protected content:
80
+
81
+ 1. Create a browser profile:
82
+ webpeel profile create polymarket
83
+
84
+ 2. A browser opens — log in to the site normally
85
+
86
+ 3. Press Ctrl+C when done (cookies are saved)
87
+
88
+ 4. Fetch with your profile:
89
+ webpeel "https://polymarket.com/@user" --profile polymarket
90
+
91
+ Profiles are saved in ~/.webpeel/profiles/ and can be reused.
92
+
93
+ webpeel profile list — see all saved profiles
94
+ webpeel profile delete <name> — remove a profile
95
+
96
+ ## 55+ Domain Extractors (automatic)
97
+
98
+ These sites get instant structured data via dedicated API — no browser needed:
99
+ Amazon, Reddit, YouTube, GitHub, Wikipedia, ESPN, Polymarket, Kalshi, TradingView,
100
+ Hacker News, NPM, PyPI, Stack Overflow, and 40+ more.
101
+
102
+ If the URL matches a supported domain, WebPeel uses the extractor automatically.
103
+ You never need to configure this.
104
+
105
+ ## Output Options
106
+
107
+ Default → clean markdown (LLM-optimized, 65-98% token reduction)
108
+ --json → full JSON with metadata, token count, method used
109
+ --raw → full page HTML/text, no smart extraction
110
+ --budget N → distill content to N tokens (smart summarization)
111
+ --schema NAME → extract structured data (product, article, recipe, job, event, contact, review)
112
+ --silent → suppress progress spinner (for piping output)
113
+ --question "..." → answer a specific question about the page (BM25, no LLM needed)
114
+
115
+ ## MCP Server
116
+
117
+ For Claude Desktop, Cursor, VS Code — add to your MCP config:
118
+
119
+ {
120
+ "mcpServers": {
121
+ "webpeel": {
122
+ "command": "npx",
123
+ "args": ["-y", "webpeel", "mcp"]
124
+ }
125
+ }
126
+ }
127
+
128
+ Available MCP tools: webpeel (smart), webpeel_read, webpeel_see, webpeel_find,
129
+ webpeel_extract, webpeel_monitor, webpeel_act
130
+
131
+ ## Troubleshooting
132
+
133
+ Very little content? → Add --render
134
+ Still blocked? → Add --stealth (implies --render)
135
+ SPA not loading data? → --render --action 'wait:3000'
136
+ Screenshot fails? → Run: npx playwright install chromium
137
+ Wrong content for /profile pages? → Content is client-side routed. Use --render.
138
+ Need to extract specific fields? → Use --schema or --json with jq
139
+
140
+ ## Examples
141
+
142
+ # Fetch a static page
143
+ webpeel https://example.com
144
+
145
+ # Fetch a React SPA (Polymarket, Airbnb, etc.)
146
+ webpeel https://polymarket.com --render
147
+
148
+ # Scroll and load all predictions on Polymarket
149
+ webpeel https://polymarket.com --render --action 'scroll:bottom' --action 'wait:2000'
150
+
151
+ # Get Cloudflare-protected site
152
+ webpeel https://someprotectedsite.com --stealth
153
+
154
+ # Extract product data from Amazon
155
+ webpeel https://amazon.com/dp/B09X3PRGT7 --schema product --json
156
+
157
+ # Get YouTube transcript
158
+ webpeel https://youtube.com/watch?v=dQw4w9WgXcQ
159
+
160
+ # Screenshot on mobile viewport
161
+ webpeel screenshot https://stripe.com/pricing --viewport mobile
162
+
163
+ # Search and get top results
164
+ webpeel search "best TypeScript ORM 2024"
165
+
166
+ # Watch a page for price changes
167
+ webpeel watch https://store.example.com/product
168
+
169
+ # Limit output to 500 tokens
170
+ webpeel https://longpage.com --budget 500
171
+
172
+ # Silent JSON output (pipe-friendly)
173
+ webpeel https://example.com --json --silent | jq .tokens
174
+ `.trimStart();
175
+ export function registerGuideCommand(program) {
176
+ program
177
+ .command('guide')
178
+ .description('Print AI-optimized usage guide to stdout')
179
+ .action(() => {
180
+ process.stdout.write(GUIDE);
181
+ process.exit(0);
182
+ });
183
+ }
@@ -119,7 +119,16 @@ export function registerScreenshotCommands(program) {
119
119
  spinner.fail('Screenshot failed');
120
120
  }
121
121
  if (error instanceof Error) {
122
- console.error(`\nError: ${error.message}`);
122
+ const msg = error.message;
123
+ // Detect missing browser binary and give an actionable error
124
+ if (msg.includes("Executable doesn't exist") || msg.includes('browserType.launch') || msg.includes('Chromium is not installed')) {
125
+ console.error('\n\x1b[31m❌ Browser not installed.\x1b[0m');
126
+ console.error('\x1b[36m Run: npx playwright install chromium\x1b[0m');
127
+ console.error('\x1b[36m Then retry your screenshot command.\x1b[0m');
128
+ }
129
+ else {
130
+ console.error(`\nError: ${msg}`);
131
+ }
123
132
  }
124
133
  else {
125
134
  console.error('\nError: Unknown error occurred');
@@ -49,6 +49,10 @@ export function registerSearchCommands(program) {
49
49
  .option('-s, --silent', 'Silent mode')
50
50
  .option('--proxy <url>', 'Proxy URL for requests (http://host:port, socks5://user:pass@host:port)')
51
51
  .option('--fetch', 'Also fetch and include content from each result URL')
52
+ .option('--local', 'Local business search via Google Places / Yelp (requires API key)')
53
+ .option('--location <location>', 'Location for local search (e.g. "Shibuya, Tokyo", "35.6595,139.7004")')
54
+ .option('--language <lang>', 'Language code for local search results (e.g. "ja", "fr")')
55
+ .option('--country <code>', 'ISO 3166-1 alpha-2 country code for local search (e.g. "JP", "FR")')
52
56
  .option('--agent', 'Agent mode: sets --json, --silent, and --budget 4000 (override with --budget N)')
53
57
  .action(async (query, options) => {
54
58
  // --agent sets sensible defaults for AI agents; explicit flags override
@@ -164,6 +168,54 @@ export function registerSearchCommands(program) {
164
168
  process.exit(1);
165
169
  }
166
170
  }
171
+ // ── --local: local business search via Google Places / Yelp ─────────
172
+ if (options.local) {
173
+ const spinner = isSilent ? null : ora('Searching local businesses...').start();
174
+ try {
175
+ const { localSearch } = await import('../../core/local-search.js');
176
+ const localResults = await localSearch({
177
+ query,
178
+ location: options.location,
179
+ language: options.language,
180
+ country: options.country,
181
+ limit: count,
182
+ });
183
+ if (spinner)
184
+ spinner.succeed(`Found ${localResults.results.length} results (${localResults.source})`);
185
+ if (isJson) {
186
+ await writeStdout(JSON.stringify(localResults, null, 2) + '\n');
187
+ }
188
+ else {
189
+ if (localResults.results.length === 0) {
190
+ await writeStdout('No local results found.\n');
191
+ }
192
+ else {
193
+ await writeStdout(`\n📍 Local results for "${query}"${localResults.location ? ` near ${localResults.location}` : ''}\n`);
194
+ await writeStdout(`Source: ${localResults.source}\n\n`);
195
+ for (const [i, r] of localResults.results.entries()) {
196
+ const rating = r.rating ? `⭐${r.rating}` : '';
197
+ const reviews = r.reviewCount ? `(${r.reviewCount.toLocaleString()})` : '';
198
+ const price = r.priceLevel !== undefined ? ` · ${'$'.repeat(Math.max(1, r.priceLevel))}` : '';
199
+ const open = r.isOpen === true ? ' · 🟢 Open' : r.isOpen === false ? ' · 🔴 Closed' : '';
200
+ await writeStdout(`${i + 1}. ${r.name} ${rating} ${reviews}${price}${open}\n`);
201
+ if (r.address)
202
+ await writeStdout(` ${r.address}\n`);
203
+ if (r.googleMapsUrl)
204
+ await writeStdout(` ${r.googleMapsUrl}\n`);
205
+ await writeStdout('\n');
206
+ }
207
+ }
208
+ }
209
+ process.exit(0);
210
+ }
211
+ catch (err) {
212
+ if (spinner)
213
+ spinner.fail('Local search failed');
214
+ console.error(`Error: ${err instanceof Error ? err.message : 'Unknown error'}`);
215
+ console.error('Hint: Set GOOGLE_PLACES_API_KEY or YELP_API_KEY environment variable for local search.');
216
+ process.exit(1);
217
+ }
218
+ }
167
219
  const spinner = isSilent ? null : ora('Searching...').start();
168
220
  try {
169
221
  // Route search through the WebPeel API when a key is configured
package/dist/cli/utils.js CHANGED
@@ -296,7 +296,9 @@ export async function fetchViaApi(url, options, apiKey, apiUrl) {
296
296
  }
297
297
  }
298
298
  // Map API response to PeelResult shape that the CLI already handles
299
+ // Spread all API fields first, then override with normalized names
299
300
  return {
301
+ ...data,
300
302
  url: data.url || url,
301
303
  title: data.metadata?.title || data.title || '',
302
304
  content: data.content || '',
@@ -660,6 +662,10 @@ export async function outputResult(result, options, extra = {}) {
660
662
  output.focusReduction = result.focusReduction;
661
663
  if (result.extracted)
662
664
  output.extracted = result.extracted;
665
+ if (result.trust)
666
+ output.trust = result.trust;
667
+ if (result.safeBrowsing)
668
+ output.safeBrowsing = result.safeBrowsing;
663
669
  if (extra.cached)
664
670
  output.cached = true;
665
671
  if (extra.truncated)
package/dist/cli.js CHANGED
@@ -37,6 +37,7 @@ import { registerAuthCommands } from './cli/commands/auth.js';
37
37
  import { registerScreenshotCommands } from './cli/commands/screenshot.js';
38
38
  import { registerJobsCommands } from './cli/commands/jobs.js';
39
39
  import { registerMonitorCommands } from './cli/commands/monitor.js';
40
+ import { registerGuideCommand } from './cli/commands/guide.js';
40
41
  // ── Early silent/log-level detection (must happen before any async module code) ──
41
42
  // Set WEBPEEL_LOG_LEVEL early so logger checks see it when async IIFEs fire.
42
43
  if (!process.env.WEBPEEL_LOG_LEVEL && process.argv.includes('--silent')) {
@@ -83,5 +84,6 @@ registerAuthCommands(program);
83
84
  registerScreenshotCommands(program);
84
85
  registerJobsCommands(program);
85
86
  registerMonitorCommands(program);
87
+ registerGuideCommand(program);
86
88
  // ── Parse ─────────────────────────────────────────────────────────────────────
87
89
  program.parse();
@@ -28,20 +28,38 @@ export interface AutoScrollResult {
28
28
  finalHeight: number;
29
29
  /** Whether the page content grew during scrolling */
30
30
  contentGrew: boolean;
31
+ /** Whether a virtual/inner scrollable container was found and used */
32
+ scrollContainerFound?: boolean;
33
+ /** Total number of DOM mutations detected during scrolling */
34
+ mutationsDetected?: number;
31
35
  }
32
36
  /**
33
37
  * Normalize a raw actions array to WebPeel's internal PageAction shape.
34
38
  * Accepts Firecrawl-style fields (milliseconds, text, direction/amount).
35
39
  */
36
40
  export declare function normalizeActions(input?: unknown): PageAction[] | undefined;
41
+ /**
42
+ * Detect the most likely scrollable container on the page.
43
+ * Returns a CSS selector string for the container, or null if only window scrolling is needed.
44
+ *
45
+ * Looks for elements with overflow-y: auto|scroll whose scrollHeight > clientHeight,
46
+ * preferring the largest such element. Used by autoScroll and scrollThrough.
47
+ */
48
+ export declare function detectScrollContainer(page: Page): Promise<string | null>;
37
49
  /**
38
50
  * Intelligently scroll the page to load all lazy/infinite-scroll content.
39
51
  *
40
- * Scrolls to the bottom repeatedly, detecting height changes to determine
41
- * when new content has loaded. Stops when:
42
- * - Page height is stable for 2 consecutive checks
52
+ * Improvements over the basic version:
53
+ * 1. Detects virtual/inner scroll containers (Polymarket, React virtualized lists)
54
+ * 2. Uses MutationObserver to detect DOM additions (not just height changes)
55
+ * 3. Gracefully handles execution context destruction (SPA navigation)
56
+ * 4. Stability requires BOTH no height change AND no DOM mutations
57
+ *
58
+ * Stops when:
59
+ * - Height is stable AND no DOM mutations for 2 consecutive checks
43
60
  * - maxScrolls limit is reached
44
61
  * - Total timeout is exceeded
62
+ * - Execution context is destroyed (SPA navigation)
45
63
  */
46
64
  export declare function autoScroll(page: Page, options?: AutoScrollOptions): Promise<AutoScrollResult>;
47
65
  export declare function executeActions(page: Page, actions: PageAction[], screenshotOptions?: {