webpeel 0.21.88 โ 0.21.89
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/commands/fetch.js +55 -1
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/screenshot.js +10 -1
- package/dist/cli/commands/search.js +52 -0
- package/dist/cli/utils.js +6 -0
- package/dist/cli.js +2 -0
- package/dist/core/actions.d.ts +21 -3
- package/dist/core/actions.js +198 -11
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/browser-fetch.d.ts +2 -0
- package/dist/core/browser-fetch.js +106 -42
- package/dist/core/browser-pool.d.ts +21 -1
- package/dist/core/browser-pool.js +92 -8
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/http-fetch.d.ts +4 -1
- package/dist/core/http-fetch.js +40 -10
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/pipeline.d.ts +3 -1
- package/dist/core/pipeline.js +91 -12
- package/dist/core/proxy-config.d.ts +36 -1
- package/dist/core/proxy-config.js +93 -1
- package/dist/core/safe-browsing.d.ts +9 -1
- package/dist/core/safe-browsing.js +38 -15
- package/dist/core/search-provider.d.ts +7 -0
- package/dist/core/search-provider.js +24 -15
- package/dist/core/strategies.d.ts +8 -0
- package/dist/core/strategies.js +54 -9
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/ee/extractors/espn.js +7 -2
- package/dist/ee/extractors/kalshi.js +7 -1
- package/dist/ee/extractors/polymarket.js +285 -89
- package/dist/ee/extractors/reddit.js +131 -1
- package/dist/ee/extractors/tradingview.js +7 -1
- package/dist/index.d.ts +6 -0
- package/dist/index.js +51 -0
- package/dist/mcp/handlers/definitions.js +61 -2
- package/dist/mcp/handlers/extract.js +2 -0
- package/dist/mcp/handlers/meta.js +9 -0
- package/dist/mcp/handlers/read.js +15 -0
- package/dist/server/app.js +30 -0
- package/dist/server/routes/cache-warm.js +4 -2
- package/dist/server/routes/fetch.js +21 -4
- package/dist/server/routes/health.js +23 -0
- package/dist/server/routes/search.js +67 -2
- package/dist/server/routes/smart-search.js +343 -112
- package/dist/types.d.ts +34 -3
- package/llms.txt +78 -45
- package/package.json +2 -1
- package/dist/core/challenge-solver.d.ts +0 -72
- package/dist/core/challenge-solver.js +0 -720
- package/dist/core/domain-extractors-basic.d.ts +0 -36
- package/dist/core/domain-extractors-basic.js +0 -28
- package/dist/core/domain-extractors-public.d.ts +0 -20
- package/dist/core/domain-extractors-public.js +0 -35
- package/dist/core/domain-extractors.d.ts +0 -48
- package/dist/core/domain-extractors.js +0 -6342
- package/dist/ee/extractors/utils.d.ts +0 -12
- package/dist/ee/extractors/utils.js +0 -84
- package/dist/server/premium/challenge.d.ts +0 -1
- package/dist/server/premium/challenge.js +0 -1
- package/dist/server/premium/domain-intel.d.ts +0 -16
- package/dist/server/premium/domain-intel.js +0 -133
- package/dist/server/premium/extractors.d.ts +0 -1
- package/dist/server/premium/extractors.js +0 -1
- package/dist/server/premium/index.d.ts +0 -20
- package/dist/server/premium/index.js +0 -50
- package/dist/server/premium/spa-detection.d.ts +0 -2
- package/dist/server/premium/spa-detection.js +0 -2
- package/dist/server/premium/stability.d.ts +0 -4
- package/dist/server/premium/stability.js +0 -29
- package/dist/server/premium/swr-cache.d.ts +0 -14
- package/dist/server/premium/swr-cache.js +0 -34
|
@@ -747,7 +747,61 @@ export async function runFetch(url, options) {
|
|
|
747
747
|
}
|
|
748
748
|
}
|
|
749
749
|
if (!options.silent && !options.json && result.tokens && result.tokens < 50 && !options.render) {
|
|
750
|
-
console.error(`\x1b[33m๐ก Tip:
|
|
750
|
+
console.error(`\x1b[33m๐ก Tip: Very little content extracted. This may be a JavaScript-rendered page.\x1b[0m`);
|
|
751
|
+
console.error(`\x1b[33m Try: webpeel "${url}" --render\x1b[0m`);
|
|
752
|
+
console.error(`\x1b[33m For infinite scroll/SPAs: --action 'scroll:bottom' --action 'wait:2000'\x1b[0m`);
|
|
753
|
+
console.error(`\x1b[33m Or use --stealth if the site blocks bots.\x1b[0m`);
|
|
754
|
+
}
|
|
755
|
+
// Auth wall detection hint
|
|
756
|
+
if (!options.json && result.authRequired) {
|
|
757
|
+
let authHost = url;
|
|
758
|
+
try {
|
|
759
|
+
authHost = new URL(url).hostname.replace('www.', '');
|
|
760
|
+
}
|
|
761
|
+
catch { /* ignore */ }
|
|
762
|
+
console.error('');
|
|
763
|
+
console.error('\x1b[33m๐ This page requires authentication.\x1b[0m');
|
|
764
|
+
console.error(`\x1b[36m 1. Create a login profile: webpeel profile create ${authHost}\x1b[0m`);
|
|
765
|
+
console.error('\x1b[36m 2. Log in to the site in the browser that opens\x1b[0m');
|
|
766
|
+
console.error('\x1b[36m 3. Press Ctrl+C when done\x1b[0m');
|
|
767
|
+
console.error(`\x1b[36m 4. Re-run with: webpeel "${url}" --profile ${authHost}\x1b[0m`);
|
|
768
|
+
console.error('');
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
// Trust & safety warnings โ shown prominently in non-JSON mode
|
|
772
|
+
if (!options.silent && !options.json) {
|
|
773
|
+
const trustData = result.trust;
|
|
774
|
+
const sbData = result.safeBrowsing;
|
|
775
|
+
// Unsafe: safe browsing threats detected
|
|
776
|
+
const allThreats = [
|
|
777
|
+
...(sbData?.threats ?? []),
|
|
778
|
+
...(trustData?.threatFeeds?.threats ?? []),
|
|
779
|
+
].filter((t, i, a) => a.indexOf(t) === i);
|
|
780
|
+
if (sbData && !sbData.safe && allThreats.length > 0) {
|
|
781
|
+
console.error(`\x1b[31m๐จ UNSAFE โ Threats detected: ${allThreats.join(', ')}\x1b[0m`);
|
|
782
|
+
}
|
|
783
|
+
else if (trustData?.threatFeeds && !trustData.threatFeeds.safe && trustData.threatFeeds.threats.length > 0) {
|
|
784
|
+
console.error(`\x1b[31m๐จ UNSAFE โ Threat feeds flagged: ${trustData.threatFeeds.threats.join(', ')}\x1b[0m`);
|
|
785
|
+
if (trustData.threatFeeds.details) {
|
|
786
|
+
console.error(`\x1b[31m ${trustData.threatFeeds.details}\x1b[0m`);
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
else if (trustData && trustData.score < 0.5) {
|
|
790
|
+
// Low trust score
|
|
791
|
+
const tier = trustData.source?.tier ?? 'unknown';
|
|
792
|
+
const label = trustData.source?.label ?? '';
|
|
793
|
+
const reason = tier === 'suspicious'
|
|
794
|
+
? 'Domain shows suspicious signals'
|
|
795
|
+
: tier === 'new'
|
|
796
|
+
? 'Domain has limited verifiable presence'
|
|
797
|
+
: label || 'Low credibility domain';
|
|
798
|
+
console.error(`\x1b[33mโ ๏ธ Low trust score (${trustData.score.toFixed(2)}) โ ${reason}\x1b[0m`);
|
|
799
|
+
}
|
|
800
|
+
// Show any trust warnings
|
|
801
|
+
if (trustData?.warnings && trustData.warnings.length > 0) {
|
|
802
|
+
for (const warn of trustData.warnings) {
|
|
803
|
+
console.error(`\x1b[33mโ ๏ธ ${warn}\x1b[0m`);
|
|
804
|
+
}
|
|
751
805
|
}
|
|
752
806
|
}
|
|
753
807
|
// Show metadata header
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
const GUIDE = `
|
|
2
|
+
# WebPeel โ AI Usage Guide
|
|
3
|
+
|
|
4
|
+
WebPeel is a fast web fetcher built for AI agents. It handles JS rendering, Cloudflare protection,
|
|
5
|
+
and 55+ domain-specific extractors automatically. Run any webpeel command with --help for options.
|
|
6
|
+
|
|
7
|
+
## Quick Decision Tree
|
|
8
|
+
|
|
9
|
+
- Static page โ webpeel <url>
|
|
10
|
+
- JavaScript SPA (React/Vue/Angular) โ webpeel <url> --render
|
|
11
|
+
- Bot-protected site (Cloudflare) โ webpeel <url> --stealth
|
|
12
|
+
- Infinite scroll / lazy content โ webpeel <url> --render --action 'scroll:bottom' --action 'wait:2000'
|
|
13
|
+
- Need to interact (click, type) โ webpeel <url> --render --action 'click:.button' --action 'wait:1000'
|
|
14
|
+
- Screenshot โ webpeel screenshot <url>
|
|
15
|
+
- Search the web โ webpeel search "query"
|
|
16
|
+
- YouTube transcript โ webpeel <youtube-url>
|
|
17
|
+
- PDF content โ webpeel <pdf-url>
|
|
18
|
+
- Structured data โ webpeel <url> --schema product --json
|
|
19
|
+
- Monitor for changes โ webpeel watch <url>
|
|
20
|
+
|
|
21
|
+
## When to Use --render
|
|
22
|
+
|
|
23
|
+
Use for ANY site that:
|
|
24
|
+
- Shows blank/minimal content without JavaScript
|
|
25
|
+
- Is a Single Page Application (React, Vue, Angular, Svelte, Next.js)
|
|
26
|
+
- Returns less than 50 tokens of content
|
|
27
|
+
- Has dynamic/interactive elements
|
|
28
|
+
|
|
29
|
+
Known SPA sites (auto-detected): Google, Airbnb, Booking.com, Expedia, Indeed, Zillow, Polymarket, and more.
|
|
30
|
+
For sites NOT in the auto-list, add --render manually.
|
|
31
|
+
|
|
32
|
+
Tip: If content looks sparse or empty, ALWAYS retry with --render before concluding the page has no content.
|
|
33
|
+
|
|
34
|
+
## Browser Actions (--action flag)
|
|
35
|
+
|
|
36
|
+
Actions require --render (auto-enabled when you pass --action). Chain multiple actions with repeated --action flags.
|
|
37
|
+
|
|
38
|
+
Available Actions:
|
|
39
|
+
scroll:bottom โ scroll to page bottom (for infinite scroll / lazy-loaded content)
|
|
40
|
+
scroll:top โ scroll to top
|
|
41
|
+
scroll:down:500 โ scroll down 500px
|
|
42
|
+
scroll:0,1500 โ scroll to exact coordinates (x,y)
|
|
43
|
+
wait:2000 โ wait 2000ms (useful after navigation or clicks)
|
|
44
|
+
click:.selector โ click a CSS element
|
|
45
|
+
type:#input:hello โ type text into an input field
|
|
46
|
+
waitFor:.selector โ wait for a CSS element to appear in the DOM
|
|
47
|
+
hover:.element โ hover over an element (for dropdown menus, tooltips)
|
|
48
|
+
|
|
49
|
+
Common Patterns:
|
|
50
|
+
|
|
51
|
+
# Load all lazy content / infinite scroll
|
|
52
|
+
webpeel <url> --render --action 'scroll:bottom' --action 'wait:2000'
|
|
53
|
+
|
|
54
|
+
# Click "Load More" button then extract
|
|
55
|
+
webpeel <url> --render --action 'click:.load-more' --action 'wait:1000'
|
|
56
|
+
|
|
57
|
+
# Fill a search form and submit
|
|
58
|
+
webpeel <url> --render --action 'type:#search:query' --action 'click:.submit' --action 'wait:2000'
|
|
59
|
+
|
|
60
|
+
# Wait for dynamic content to appear
|
|
61
|
+
webpeel <url> --render --action 'waitFor:.results-list' --action 'wait:500'
|
|
62
|
+
|
|
63
|
+
## Stealth Mode (--stealth)
|
|
64
|
+
|
|
65
|
+
Use when:
|
|
66
|
+
- Site returns a Cloudflare challenge page
|
|
67
|
+
- Site blocks bots with fingerprinting or rate limiting
|
|
68
|
+
- Normal --render fails with access denied / 403
|
|
69
|
+
|
|
70
|
+
webpeel <url> --stealth
|
|
71
|
+
|
|
72
|
+
Stealth mode auto-enables --render.
|
|
73
|
+
|
|
74
|
+
## Authentication (Login-Protected Pages)
|
|
75
|
+
|
|
76
|
+
Some pages require you to be logged in (e.g. dashboards, profiles, activity feeds).
|
|
77
|
+
WebPeel detects auth walls automatically and tells you what to do.
|
|
78
|
+
|
|
79
|
+
To access login-protected content:
|
|
80
|
+
|
|
81
|
+
1. Create a browser profile:
|
|
82
|
+
webpeel profile create polymarket
|
|
83
|
+
|
|
84
|
+
2. A browser opens โ log in to the site normally
|
|
85
|
+
|
|
86
|
+
3. Press Ctrl+C when done (cookies are saved)
|
|
87
|
+
|
|
88
|
+
4. Fetch with your profile:
|
|
89
|
+
webpeel "https://polymarket.com/@user" --profile polymarket
|
|
90
|
+
|
|
91
|
+
Profiles are saved in ~/.webpeel/profiles/ and can be reused.
|
|
92
|
+
|
|
93
|
+
webpeel profile list โ see all saved profiles
|
|
94
|
+
webpeel profile delete <name> โ remove a profile
|
|
95
|
+
|
|
96
|
+
## 55+ Domain Extractors (automatic)
|
|
97
|
+
|
|
98
|
+
These sites get instant structured data via dedicated API โ no browser needed:
|
|
99
|
+
Amazon, Reddit, YouTube, GitHub, Wikipedia, ESPN, Polymarket, Kalshi, TradingView,
|
|
100
|
+
Hacker News, NPM, PyPI, Stack Overflow, and 40+ more.
|
|
101
|
+
|
|
102
|
+
If the URL matches a supported domain, WebPeel uses the extractor automatically.
|
|
103
|
+
You never need to configure this.
|
|
104
|
+
|
|
105
|
+
## Output Options
|
|
106
|
+
|
|
107
|
+
Default โ clean markdown (LLM-optimized, 65-98% token reduction)
|
|
108
|
+
--json โ full JSON with metadata, token count, method used
|
|
109
|
+
--raw โ full page HTML/text, no smart extraction
|
|
110
|
+
--budget N โ distill content to N tokens (smart summarization)
|
|
111
|
+
--schema NAME โ extract structured data (product, article, recipe, job, event, contact, review)
|
|
112
|
+
--silent โ suppress progress spinner (for piping output)
|
|
113
|
+
--question "..." โ answer a specific question about the page (BM25, no LLM needed)
|
|
114
|
+
|
|
115
|
+
## MCP Server
|
|
116
|
+
|
|
117
|
+
For Claude Desktop, Cursor, VS Code โ add to your MCP config:
|
|
118
|
+
|
|
119
|
+
{
|
|
120
|
+
"mcpServers": {
|
|
121
|
+
"webpeel": {
|
|
122
|
+
"command": "npx",
|
|
123
|
+
"args": ["-y", "webpeel", "mcp"]
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
Available MCP tools: webpeel (smart), webpeel_read, webpeel_see, webpeel_find,
|
|
129
|
+
webpeel_extract, webpeel_monitor, webpeel_act
|
|
130
|
+
|
|
131
|
+
## Troubleshooting
|
|
132
|
+
|
|
133
|
+
Very little content? โ Add --render
|
|
134
|
+
Still blocked? โ Add --stealth (implies --render)
|
|
135
|
+
SPA not loading data? โ --render --action 'wait:3000'
|
|
136
|
+
Screenshot fails? โ Run: npx playwright install chromium
|
|
137
|
+
Wrong content for /profile pages? โ Content is client-side routed. Use --render.
|
|
138
|
+
Need to extract specific fields? โ Use --schema or --json with jq
|
|
139
|
+
|
|
140
|
+
## Examples
|
|
141
|
+
|
|
142
|
+
# Fetch a static page
|
|
143
|
+
webpeel https://example.com
|
|
144
|
+
|
|
145
|
+
# Fetch a React SPA (Polymarket, Airbnb, etc.)
|
|
146
|
+
webpeel https://polymarket.com --render
|
|
147
|
+
|
|
148
|
+
# Scroll and load all predictions on Polymarket
|
|
149
|
+
webpeel https://polymarket.com --render --action 'scroll:bottom' --action 'wait:2000'
|
|
150
|
+
|
|
151
|
+
# Get Cloudflare-protected site
|
|
152
|
+
webpeel https://someprotectedsite.com --stealth
|
|
153
|
+
|
|
154
|
+
# Extract product data from Amazon
|
|
155
|
+
webpeel https://amazon.com/dp/B09X3PRGT7 --schema product --json
|
|
156
|
+
|
|
157
|
+
# Get YouTube transcript
|
|
158
|
+
webpeel https://youtube.com/watch?v=dQw4w9WgXcQ
|
|
159
|
+
|
|
160
|
+
# Screenshot on mobile viewport
|
|
161
|
+
webpeel screenshot https://stripe.com/pricing --viewport mobile
|
|
162
|
+
|
|
163
|
+
# Search and get top results
|
|
164
|
+
webpeel search "best TypeScript ORM 2024"
|
|
165
|
+
|
|
166
|
+
# Watch a page for price changes
|
|
167
|
+
webpeel watch https://store.example.com/product
|
|
168
|
+
|
|
169
|
+
# Limit output to 500 tokens
|
|
170
|
+
webpeel https://longpage.com --budget 500
|
|
171
|
+
|
|
172
|
+
# Silent JSON output (pipe-friendly)
|
|
173
|
+
webpeel https://example.com --json --silent | jq .tokens
|
|
174
|
+
`.trimStart();
|
|
175
|
+
export function registerGuideCommand(program) {
|
|
176
|
+
program
|
|
177
|
+
.command('guide')
|
|
178
|
+
.description('Print AI-optimized usage guide to stdout')
|
|
179
|
+
.action(() => {
|
|
180
|
+
process.stdout.write(GUIDE);
|
|
181
|
+
process.exit(0);
|
|
182
|
+
});
|
|
183
|
+
}
|
|
@@ -119,7 +119,16 @@ export function registerScreenshotCommands(program) {
|
|
|
119
119
|
spinner.fail('Screenshot failed');
|
|
120
120
|
}
|
|
121
121
|
if (error instanceof Error) {
|
|
122
|
-
|
|
122
|
+
const msg = error.message;
|
|
123
|
+
// Detect missing browser binary and give an actionable error
|
|
124
|
+
if (msg.includes("Executable doesn't exist") || msg.includes('browserType.launch') || msg.includes('Chromium is not installed')) {
|
|
125
|
+
console.error('\n\x1b[31mโ Browser not installed.\x1b[0m');
|
|
126
|
+
console.error('\x1b[36m Run: npx playwright install chromium\x1b[0m');
|
|
127
|
+
console.error('\x1b[36m Then retry your screenshot command.\x1b[0m');
|
|
128
|
+
}
|
|
129
|
+
else {
|
|
130
|
+
console.error(`\nError: ${msg}`);
|
|
131
|
+
}
|
|
123
132
|
}
|
|
124
133
|
else {
|
|
125
134
|
console.error('\nError: Unknown error occurred');
|
|
@@ -49,6 +49,10 @@ export function registerSearchCommands(program) {
|
|
|
49
49
|
.option('-s, --silent', 'Silent mode')
|
|
50
50
|
.option('--proxy <url>', 'Proxy URL for requests (http://host:port, socks5://user:pass@host:port)')
|
|
51
51
|
.option('--fetch', 'Also fetch and include content from each result URL')
|
|
52
|
+
.option('--local', 'Local business search via Google Places / Yelp (requires API key)')
|
|
53
|
+
.option('--location <location>', 'Location for local search (e.g. "Shibuya, Tokyo", "35.6595,139.7004")')
|
|
54
|
+
.option('--language <lang>', 'Language code for local search results (e.g. "ja", "fr")')
|
|
55
|
+
.option('--country <code>', 'ISO 3166-1 alpha-2 country code for local search (e.g. "JP", "FR")')
|
|
52
56
|
.option('--agent', 'Agent mode: sets --json, --silent, and --budget 4000 (override with --budget N)')
|
|
53
57
|
.action(async (query, options) => {
|
|
54
58
|
// --agent sets sensible defaults for AI agents; explicit flags override
|
|
@@ -164,6 +168,54 @@ export function registerSearchCommands(program) {
|
|
|
164
168
|
process.exit(1);
|
|
165
169
|
}
|
|
166
170
|
}
|
|
171
|
+
// โโ --local: local business search via Google Places / Yelp โโโโโโโโโ
|
|
172
|
+
if (options.local) {
|
|
173
|
+
const spinner = isSilent ? null : ora('Searching local businesses...').start();
|
|
174
|
+
try {
|
|
175
|
+
const { localSearch } = await import('../../core/local-search.js');
|
|
176
|
+
const localResults = await localSearch({
|
|
177
|
+
query,
|
|
178
|
+
location: options.location,
|
|
179
|
+
language: options.language,
|
|
180
|
+
country: options.country,
|
|
181
|
+
limit: count,
|
|
182
|
+
});
|
|
183
|
+
if (spinner)
|
|
184
|
+
spinner.succeed(`Found ${localResults.results.length} results (${localResults.source})`);
|
|
185
|
+
if (isJson) {
|
|
186
|
+
await writeStdout(JSON.stringify(localResults, null, 2) + '\n');
|
|
187
|
+
}
|
|
188
|
+
else {
|
|
189
|
+
if (localResults.results.length === 0) {
|
|
190
|
+
await writeStdout('No local results found.\n');
|
|
191
|
+
}
|
|
192
|
+
else {
|
|
193
|
+
await writeStdout(`\n๐ Local results for "${query}"${localResults.location ? ` near ${localResults.location}` : ''}\n`);
|
|
194
|
+
await writeStdout(`Source: ${localResults.source}\n\n`);
|
|
195
|
+
for (const [i, r] of localResults.results.entries()) {
|
|
196
|
+
const rating = r.rating ? `โญ${r.rating}` : '';
|
|
197
|
+
const reviews = r.reviewCount ? `(${r.reviewCount.toLocaleString()})` : '';
|
|
198
|
+
const price = r.priceLevel !== undefined ? ` ยท ${'$'.repeat(Math.max(1, r.priceLevel))}` : '';
|
|
199
|
+
const open = r.isOpen === true ? ' ยท ๐ข Open' : r.isOpen === false ? ' ยท ๐ด Closed' : '';
|
|
200
|
+
await writeStdout(`${i + 1}. ${r.name} ${rating} ${reviews}${price}${open}\n`);
|
|
201
|
+
if (r.address)
|
|
202
|
+
await writeStdout(` ${r.address}\n`);
|
|
203
|
+
if (r.googleMapsUrl)
|
|
204
|
+
await writeStdout(` ${r.googleMapsUrl}\n`);
|
|
205
|
+
await writeStdout('\n');
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
process.exit(0);
|
|
210
|
+
}
|
|
211
|
+
catch (err) {
|
|
212
|
+
if (spinner)
|
|
213
|
+
spinner.fail('Local search failed');
|
|
214
|
+
console.error(`Error: ${err instanceof Error ? err.message : 'Unknown error'}`);
|
|
215
|
+
console.error('Hint: Set GOOGLE_PLACES_API_KEY or YELP_API_KEY environment variable for local search.');
|
|
216
|
+
process.exit(1);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
167
219
|
const spinner = isSilent ? null : ora('Searching...').start();
|
|
168
220
|
try {
|
|
169
221
|
// Route search through the WebPeel API when a key is configured
|
package/dist/cli/utils.js
CHANGED
|
@@ -296,7 +296,9 @@ export async function fetchViaApi(url, options, apiKey, apiUrl) {
|
|
|
296
296
|
}
|
|
297
297
|
}
|
|
298
298
|
// Map API response to PeelResult shape that the CLI already handles
|
|
299
|
+
// Spread all API fields first, then override with normalized names
|
|
299
300
|
return {
|
|
301
|
+
...data,
|
|
300
302
|
url: data.url || url,
|
|
301
303
|
title: data.metadata?.title || data.title || '',
|
|
302
304
|
content: data.content || '',
|
|
@@ -660,6 +662,10 @@ export async function outputResult(result, options, extra = {}) {
|
|
|
660
662
|
output.focusReduction = result.focusReduction;
|
|
661
663
|
if (result.extracted)
|
|
662
664
|
output.extracted = result.extracted;
|
|
665
|
+
if (result.trust)
|
|
666
|
+
output.trust = result.trust;
|
|
667
|
+
if (result.safeBrowsing)
|
|
668
|
+
output.safeBrowsing = result.safeBrowsing;
|
|
663
669
|
if (extra.cached)
|
|
664
670
|
output.cached = true;
|
|
665
671
|
if (extra.truncated)
|
package/dist/cli.js
CHANGED
|
@@ -37,6 +37,7 @@ import { registerAuthCommands } from './cli/commands/auth.js';
|
|
|
37
37
|
import { registerScreenshotCommands } from './cli/commands/screenshot.js';
|
|
38
38
|
import { registerJobsCommands } from './cli/commands/jobs.js';
|
|
39
39
|
import { registerMonitorCommands } from './cli/commands/monitor.js';
|
|
40
|
+
import { registerGuideCommand } from './cli/commands/guide.js';
|
|
40
41
|
// โโ Early silent/log-level detection (must happen before any async module code) โโ
|
|
41
42
|
// Set WEBPEEL_LOG_LEVEL early so logger checks see it when async IIFEs fire.
|
|
42
43
|
if (!process.env.WEBPEEL_LOG_LEVEL && process.argv.includes('--silent')) {
|
|
@@ -83,5 +84,6 @@ registerAuthCommands(program);
|
|
|
83
84
|
registerScreenshotCommands(program);
|
|
84
85
|
registerJobsCommands(program);
|
|
85
86
|
registerMonitorCommands(program);
|
|
87
|
+
registerGuideCommand(program);
|
|
86
88
|
// โโ Parse โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
87
89
|
program.parse();
|
package/dist/core/actions.d.ts
CHANGED
|
@@ -28,20 +28,38 @@ export interface AutoScrollResult {
|
|
|
28
28
|
finalHeight: number;
|
|
29
29
|
/** Whether the page content grew during scrolling */
|
|
30
30
|
contentGrew: boolean;
|
|
31
|
+
/** Whether a virtual/inner scrollable container was found and used */
|
|
32
|
+
scrollContainerFound?: boolean;
|
|
33
|
+
/** Total number of DOM mutations detected during scrolling */
|
|
34
|
+
mutationsDetected?: number;
|
|
31
35
|
}
|
|
32
36
|
/**
|
|
33
37
|
* Normalize a raw actions array to WebPeel's internal PageAction shape.
|
|
34
38
|
* Accepts Firecrawl-style fields (milliseconds, text, direction/amount).
|
|
35
39
|
*/
|
|
36
40
|
export declare function normalizeActions(input?: unknown): PageAction[] | undefined;
|
|
41
|
+
/**
|
|
42
|
+
* Detect the most likely scrollable container on the page.
|
|
43
|
+
* Returns a CSS selector string for the container, or null if only window scrolling is needed.
|
|
44
|
+
*
|
|
45
|
+
* Looks for elements with overflow-y: auto|scroll whose scrollHeight > clientHeight,
|
|
46
|
+
* preferring the largest such element. Used by autoScroll and scrollThrough.
|
|
47
|
+
*/
|
|
48
|
+
export declare function detectScrollContainer(page: Page): Promise<string | null>;
|
|
37
49
|
/**
|
|
38
50
|
* Intelligently scroll the page to load all lazy/infinite-scroll content.
|
|
39
51
|
*
|
|
40
|
-
*
|
|
41
|
-
*
|
|
42
|
-
*
|
|
52
|
+
* Improvements over the basic version:
|
|
53
|
+
* 1. Detects virtual/inner scroll containers (Polymarket, React virtualized lists)
|
|
54
|
+
* 2. Uses MutationObserver to detect DOM additions (not just height changes)
|
|
55
|
+
* 3. Gracefully handles execution context destruction (SPA navigation)
|
|
56
|
+
* 4. Stability requires BOTH no height change AND no DOM mutations
|
|
57
|
+
*
|
|
58
|
+
* Stops when:
|
|
59
|
+
* - Height is stable AND no DOM mutations for 2 consecutive checks
|
|
43
60
|
* - maxScrolls limit is reached
|
|
44
61
|
* - Total timeout is exceeded
|
|
62
|
+
* - Execution context is destroyed (SPA navigation)
|
|
45
63
|
*/
|
|
46
64
|
export declare function autoScroll(page: Page, options?: AutoScrollOptions): Promise<AutoScrollResult>;
|
|
47
65
|
export declare function executeActions(page: Page, actions: PageAction[], screenshotOptions?: {
|