webpeel 0.21.72 → 0.21.74

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -682,13 +682,21 @@ export async function runFetch(url, options) {
682
682
  result = await fetchViaApi(url, peelOptions, fetchApiKey, fetchApiUrl);
683
683
  }
684
684
  else {
685
- // No API key — show helpful message instead of trying local mode
685
+ // No API key — fall back to local peel() mode (runs locally, no API needed)
686
686
  if (spinner)
687
- spinner.fail('Authentication required');
688
- console.error('No API key configured. Run: webpeel auth <your-key>');
689
- console.error('Get a free key at: https://app.webpeel.dev/keys');
690
- await cleanup();
691
- process.exit(2);
687
+ spinner.text = 'Fetching locally (no API key)…';
688
+ const startLocal = Date.now();
689
+ const { peel } = await import('../../index.js');
690
+ const localResult = await peel(url, peelOptions);
691
+ const elapsed = Date.now() - startLocal;
692
+ // Normalize to the shape fetchViaApi returns
693
+ result = {
694
+ ...localResult,
695
+ elapsed: localResult.elapsed ?? elapsed,
696
+ method: localResult.method ?? 'local',
697
+ tokens: localResult.tokens ?? Math.ceil((localResult.content?.length ?? 0) / 4),
698
+ cached: false,
699
+ };
692
700
  }
693
701
  // Update lastUsed timestamp for named profiles
694
702
  if (resolvedProfileName) {
@@ -588,4 +588,100 @@ export function registerSearchCommands(program) {
588
588
  process.exit(1);
589
589
  }
590
590
  });
591
+ // ── extractors command ────────────────────────────────────────────────────
592
+ program
593
+ .command('extractors')
594
+ .alias('list-extractors')
595
+ .description('List all supported domain extractors')
596
+ .option('--json', 'Output as JSON')
597
+ .action((options) => {
598
+ const extractors = [
599
+ // Social
600
+ { domain: 'twitter.com / x.com', category: 'Social', description: 'Tweets, threads, profiles' },
601
+ { domain: 'reddit.com', category: 'Social', description: 'Subreddits, posts, comments' },
602
+ { domain: 'instagram.com', category: 'Social', description: 'Photos, reels, profiles' },
603
+ { domain: 'tiktok.com', category: 'Social', description: 'Video metadata, captions' },
604
+ { domain: 'pinterest.com', category: 'Social', description: 'Pins, boards' },
605
+ { domain: 'linkedin.com', category: 'Social', description: 'Profiles, job listings' },
606
+ { domain: 'facebook.com', category: 'Social', description: 'Marketplace listings' },
607
+ // Video / Audio
608
+ { domain: 'youtube.com', category: 'Video', description: 'Transcripts, metadata, comments' },
609
+ { domain: 'twitch.tv', category: 'Video', description: 'Streams, clips, channel info' },
610
+ { domain: 'soundcloud.com', category: 'Audio', description: 'Tracks, playlists' },
611
+ { domain: 'open.spotify.com', category: 'Audio', description: 'Tracks, albums, playlists' },
612
+ // Tech / Dev
613
+ { domain: 'github.com', category: 'Dev', description: 'Repos, issues, PRs, code' },
614
+ { domain: 'stackoverflow.com', category: 'Dev', description: 'Questions, answers' },
615
+ { domain: 'npmjs.com', category: 'Dev', description: 'Package metadata, readme' },
616
+ { domain: 'pypi.org', category: 'Dev', description: 'Package metadata, readme' },
617
+ { domain: 'dev.to', category: 'Dev', description: 'Articles, comments' },
618
+ // News / Articles
619
+ { domain: 'news.ycombinator.com', category: 'News', description: 'HN posts, comments, Ask/Show HN' },
620
+ { domain: 'medium.com', category: 'Articles', description: 'Articles, publications' },
621
+ { domain: 'substack.com / *.substack.com', category: 'Articles', description: 'Newsletters, posts' },
622
+ { domain: 'nytimes.com', category: 'News', description: 'Articles, headlines' },
623
+ { domain: 'bbc.com', category: 'News', description: 'Articles, headlines' },
624
+ { domain: 'cnn.com', category: 'News', description: 'Articles, headlines' },
625
+ // Shopping / E-commerce
626
+ { domain: 'amazon.com', category: 'Shopping', description: 'Products, prices, reviews' },
627
+ { domain: 'bestbuy.com', category: 'Shopping', description: 'Products, prices, specs' },
628
+ { domain: 'walmart.com', category: 'Shopping', description: 'Products, prices' },
629
+ { domain: 'ebay.com', category: 'Shopping', description: 'Listings, prices' },
630
+ { domain: 'etsy.com', category: 'Shopping', description: 'Handmade listings' },
631
+ // Local / Real Estate
632
+ { domain: 'yelp.com', category: 'Local', description: 'Business info, reviews (needs YELP_API_KEY)' },
633
+ { domain: 'craigslist.org', category: 'Local', description: 'Listings, classifieds' },
634
+ { domain: 'zillow.com', category: 'Real Estate', description: 'Property listings, estimates' },
635
+ { domain: 'redfin.com', category: 'Real Estate', description: 'Property listings, prices' },
636
+ { domain: 'cars.com', category: 'Automotive', description: 'Car listings, prices' },
637
+ // Knowledge / Academic
638
+ { domain: 'en.wikipedia.org', category: 'Knowledge', description: 'Articles, structured data' },
639
+ { domain: 'arxiv.org', category: 'Academic', description: 'Papers, abstracts, metadata' },
640
+ { domain: 'semanticscholar.org', category: 'Academic', description: 'Papers, citations' },
641
+ { domain: 'pubmed.ncbi.nlm.nih.gov', category: 'Academic', description: 'Medical papers, abstracts' },
642
+ { domain: 'imdb.com', category: 'Knowledge', description: 'Movies, TV shows, cast' },
643
+ { domain: 'allrecipes.com', category: 'Knowledge', description: 'Recipes, ingredients, steps' },
644
+ // Finance / Markets
645
+ { domain: 'polymarket.com', category: 'Finance', description: 'Prediction markets' },
646
+ { domain: 'kalshi.com', category: 'Finance', description: 'Prediction markets' },
647
+ { domain: 'tradingview.com', category: 'Finance', description: 'Charts, indicators, ideas' },
648
+ { domain: 'coingecko.com', category: 'Finance', description: 'Crypto prices, market data' },
649
+ { domain: 'coinmarketcap.com', category: 'Finance', description: 'Crypto prices, market data' },
650
+ // Sports / Betting
651
+ { domain: 'espn.com', category: 'Sports', description: 'Scores, stats, news' },
652
+ { domain: 'draftkings.com', category: 'Betting', description: 'Odds, lines' },
653
+ { domain: 'fanduel.com', category: 'Betting', description: 'Odds, lines' },
654
+ { domain: 'betmgm.com', category: 'Betting', description: 'Odds, lines' },
655
+ // Entertainment
656
+ { domain: 'producthunt.com', category: 'Tech', description: 'Product launches, upvotes' },
657
+ // Documents
658
+ { domain: '*.pdf URLs', category: 'Documents', description: 'PDF text extraction' },
659
+ // Weather
660
+ { domain: 'weather.com', category: 'Weather', description: 'Forecasts, conditions' },
661
+ { domain: 'accuweather.com', category: 'Weather', description: 'Forecasts, conditions' },
662
+ { domain: 'api.open-meteo.com', category: 'Weather', description: 'Free weather API' },
663
+ ];
664
+ if (options.json) {
665
+ console.log(JSON.stringify(extractors, null, 2));
666
+ return;
667
+ }
668
+ // Group by category
669
+ const byCategory = new Map();
670
+ for (const e of extractors) {
671
+ if (!byCategory.has(e.category))
672
+ byCategory.set(e.category, []);
673
+ byCategory.get(e.category).push(e);
674
+ }
675
+ console.log(`\n🔌 WebPeel Domain Extractors (${extractors.length} total)\n`);
676
+ for (const [cat, items] of byCategory) {
677
+ console.log(` ${cat}`);
678
+ for (const item of items) {
679
+ const pad = 35;
680
+ const domainPad = item.domain.padEnd(pad);
681
+ console.log(` ${domainPad} ${item.description}`);
682
+ }
683
+ console.log('');
684
+ }
685
+ console.log(' Run `webpeel <url>` to use these automatically based on the URL.');
686
+ });
591
687
  }
package/dist/cli/utils.js CHANGED
@@ -255,7 +255,37 @@ export async function fetchViaApi(url, options, apiKey, apiUrl) {
255
255
  err.statusCode = res.status;
256
256
  throw err;
257
257
  }
258
- const data = await res.json();
258
+ let data = await res.json();
259
+ // Handle async job queue mode — API returns { jobId, pollUrl } and we need to poll
260
+ if (data.jobId && data.pollUrl && !data.content) {
261
+ const pollEndpoint = `${apiUrl}${data.pollUrl}`;
262
+ const maxPollMs = 90_000; // 90s max
263
+ const pollInterval = 1_000; // 1s intervals
264
+ const start = Date.now();
265
+ while (Date.now() - start < maxPollMs) {
266
+ await new Promise(r => setTimeout(r, pollInterval));
267
+ const pollRes = await fetch(pollEndpoint, {
268
+ headers: { Authorization: `Bearer ${apiKey}` },
269
+ signal: AbortSignal.timeout(10_000),
270
+ });
271
+ if (!pollRes.ok) {
272
+ throw new Error(`Job poll failed: HTTP ${pollRes.status}`);
273
+ }
274
+ const pollData = await pollRes.json();
275
+ if (pollData.status === 'completed' || pollData.content) {
276
+ data = pollData.result || pollData;
277
+ break;
278
+ }
279
+ if (pollData.status === 'failed' || pollData.status === 'error') {
280
+ throw new Error(pollData.error?.message || pollData.error || 'Job failed on server');
281
+ }
282
+ // Still processing — keep polling
283
+ }
284
+ // If we exited the loop without data, warn
285
+ if (!data.content && data.jobId) {
286
+ throw new Error('Job timed out waiting for server response. Try again or use local mode (unset WEBPEEL_API_KEY).');
287
+ }
288
+ }
259
289
  // Map API response to PeelResult shape that the CLI already handles
260
290
  return {
261
291
  url: data.url || url,
package/dist/cli.js CHANGED
@@ -14,6 +14,20 @@
14
14
  * npx webpeel --help - Condensed help
15
15
  * npx webpeel --help-all - Full option reference
16
16
  */
17
+ // ── Auto-load .env from cwd (lightweight, no dotenv dependency) ──────────────
18
+ // Must happen BEFORE any imports that read env vars (e.g., WEBPEEL_API_KEY)
19
+ import { readFileSync, existsSync } from 'fs';
20
+ import { resolve } from 'path';
21
+ {
22
+ const envPath = resolve(process.cwd(), '.env');
23
+ if (existsSync(envPath)) {
24
+ for (const line of readFileSync(envPath, 'utf-8').split('\n')) {
25
+ const m = line.match(/^([A-Z_][A-Z0-9_]*)=(.*)$/);
26
+ if (m && !process.env[m[1]])
27
+ process.env[m[1]] = m[2].replace(/^["']|["']$/g, '');
28
+ }
29
+ }
30
+ }
17
31
  import { Command } from 'commander';
18
32
  import { VERB_ALIASES, cliVersion, checkForUpdates, buildCommanderHelp, buildCondensedHelp, } from './cli/utils.js';
19
33
  import { registerFetchCommands } from './cli/commands/fetch.js';
@@ -121,6 +121,8 @@ const REGISTRY = [
121
121
  { match: (h) => h === 'yelp.com' || h === 'www.yelp.com', extractor: yelpExtractor },
122
122
  { match: (h) => h === 'zillow.com' || h === 'www.zillow.com', extractor: zillowExtractor },
123
123
  { match: (h) => h === 'redfin.com' || h === 'www.redfin.com', extractor: redfinExtractor },
124
+ // ── Travel ──────────────────────────────────────────────────────────────
125
+ { match: (h, url = '') => (h === 'www.google.com' || h === 'google.com') && url.includes('/travel/flights'), extractor: googleFlightsExtractor },
124
126
  ];
125
127
  /**
126
128
  * Returns the domain extractor for a URL, or null if none matches.
@@ -5860,3 +5862,155 @@ async function redfinExtractor(_html, url) {
5860
5862
  return null;
5861
5863
  }
5862
5864
  }
5865
+ // ---------------------------------------------------------------------------
5866
+ // Google Flights extractor
5867
+ // ---------------------------------------------------------------------------
5868
+ async function googleFlightsExtractor(_html, url) {
5869
+ if (!url.includes('/travel/flights'))
5870
+ return null;
5871
+ // Google Flights is a SPA. The _html parameter is usually readability-processed markdown
5872
+ // (from the pipeline's post-fetch processing), which looks like:
5873
+ // - 7:15 PM
5874
+ // 7:15 PM on Sat, Apr 4
5875
+ // – 10:29 PM
5876
+ // United
5877
+ // 3 hr 14 min
5878
+ // EWR
5879
+ // ...
5880
+ // $188
5881
+ //
5882
+ // This markdown is much easier to parse than raw HTML.
5883
+ let text = _html;
5884
+ // If this is raw HTML (contains <!DOCTYPE or <html), strip HTML tags
5885
+ if (text.includes('<!DOCTYPE') || text.includes('<html')) {
5886
+ text = text
5887
+ .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
5888
+ .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
5889
+ .replace(/<[^>]+>/g, '\n')
5890
+ .replace(/&amp;/g, '&')
5891
+ .replace(/&lt;/g, '<')
5892
+ .replace(/&gt;/g, '>')
5893
+ .replace(/&#\d+;/g, '')
5894
+ .replace(/\n{2,}/g, '\n');
5895
+ }
5896
+ const lines = text.split('\n').map(l => l.trim()).filter(Boolean);
5897
+ const AIRLINES = ['United', 'Delta', 'American', 'JetBlue', 'Spirit', 'Frontier', 'Southwest', 'Breeze', 'Alaska', 'Hawaiian', 'Sun Country', 'Avelo'];
5898
+ const flights = [];
5899
+ for (let i = 0; i < lines.length; i++) {
5900
+ const line = lines[i];
5901
+ // Detect departure time
5902
+ const departMatch = line.match(/^(?:-\s+)?(\d{1,2}:\d{2}\s*[AP]M)$/);
5903
+ if (!departMatch)
5904
+ continue;
5905
+ const departTime = departMatch[1];
5906
+ let departDate = '', arriveTime = '', airline = '', duration = '';
5907
+ let fromAirport = '', toAirport = '', stops = '', bags = '';
5908
+ let price = 0;
5909
+ for (let j = i + 1; j < Math.min(i + 45, lines.length); j++) {
5910
+ const l = lines[j];
5911
+ // Date
5912
+ const dateM = l.match(/on\s+(\w+,\s+\w+\s+\d+)/);
5913
+ if (dateM && !departDate) {
5914
+ departDate = dateM[1];
5915
+ continue;
5916
+ }
5917
+ // Arrival time
5918
+ const arrM = l.match(/^[–\-–—]\s*(\d{1,2}:\d{2}\s*[AP]M)$/) || l.match(/^(\d{1,2}:\d{2}\s*[AP]M)\s+on\s/);
5919
+ if (arrM && !arriveTime && departTime) {
5920
+ arriveTime = arrM[1];
5921
+ continue;
5922
+ }
5923
+ // Arrival time: also check for "10:29 PM on Sat, Apr 4" pattern (second occurrence)
5924
+ if (!arriveTime && l.match(/^\d{1,2}:\d{2}\s*[AP]M\s+on\s/)) {
5925
+ const m = l.match(/^(\d{1,2}:\d{2}\s*[AP]M)/);
5926
+ if (m) {
5927
+ arriveTime = m[1];
5928
+ continue;
5929
+ }
5930
+ }
5931
+ // Airline
5932
+ if (!airline) {
5933
+ for (const a of AIRLINES) {
5934
+ if (l === a || l.startsWith(a + 'Operated') || l.startsWith(a + ' ')) {
5935
+ airline = a;
5936
+ break;
5937
+ }
5938
+ }
5939
+ if (airline)
5940
+ continue;
5941
+ }
5942
+ // Duration
5943
+ if (!duration && l.match(/^\d+\s+hr\s+\d+\s+min$/)) {
5944
+ duration = l;
5945
+ continue;
5946
+ }
5947
+ // Airport codes
5948
+ if (l.match(/^[A-Z]{3}$/) && !fromAirport) {
5949
+ fromAirport = l;
5950
+ continue;
5951
+ }
5952
+ if (l.match(/^[A-Z]{3}$/) && fromAirport && !toAirport && l !== fromAirport) {
5953
+ toAirport = l;
5954
+ continue;
5955
+ }
5956
+ // Stops
5957
+ if (!stops && (l === 'Nonstop' || l.match(/^\d+\s+stop/))) {
5958
+ stops = l;
5959
+ continue;
5960
+ }
5961
+ // Bags
5962
+ if (l.includes('carry-on bag') && !bags) {
5963
+ bags = l.includes('not included') ? 'Carry-on NOT included (extra fee)' : 'Carry-on included';
5964
+ continue;
5965
+ }
5966
+ // Price — first occurrence only
5967
+ const priceM = l.match(/^\$(\d[\d,]*)$/);
5968
+ if (priceM && !price) {
5969
+ price = parseInt(priceM[1].replace(',', ''));
5970
+ break;
5971
+ }
5972
+ }
5973
+ if (departTime && arriveTime && airline && price) {
5974
+ flights.push({ departTime, arriveTime, departDate, airline, duration, fromAirport, toAirport, stops: stops || 'Unknown', price, priceStr: `$${price}`, bags });
5975
+ }
5976
+ }
5977
+ // Deduplicate
5978
+ const seen = new Set();
5979
+ const unique = flights.filter(f => {
5980
+ const key = `${f.departTime}-${f.airline}-${f.price}`;
5981
+ if (seen.has(key))
5982
+ return false;
5983
+ seen.add(key);
5984
+ return true;
5985
+ });
5986
+ if (unique.length === 0)
5987
+ return null;
5988
+ unique.sort((a, b) => a.price - b.price);
5989
+ // Parse route from URL
5990
+ const u = new URL(url);
5991
+ const query = (u.searchParams.get('q') || '').replace(/Flights?\s+(from\s+)?/i, '').replace(/\s+one\s+way/i, '').trim();
5992
+ const md = [
5993
+ `# ✈️ Flights — ${query || 'Search Results'}`,
5994
+ '',
5995
+ `*${unique.length} flights found · Source: [Google Flights](${url})*`,
5996
+ `*Prices include taxes + fees for 1 adult. Book directly via airline.*`,
5997
+ '',
5998
+ ];
5999
+ for (let idx = 0; idx < unique.length; idx++) {
6000
+ const f = unique[idx];
6001
+ md.push(`## ${idx + 1}. ${f.airline} — ${f.priceStr}`);
6002
+ md.push(`🕐 Depart **${f.departTime}** → Arrive **${f.arriveTime}**${f.departDate ? ` · ${f.departDate}` : ''}`);
6003
+ md.push(`🛫 ${f.fromAirport} → ${f.toAirport} · ${f.duration} · ${f.stops}`);
6004
+ if (f.bags)
6005
+ md.push(`🧳 ${f.bags}`);
6006
+ md.push('');
6007
+ }
6008
+ md.push('---');
6009
+ md.push(`📌 *Prices change frequently. [View live prices on Google Flights](${url})*`);
6010
+ return {
6011
+ domain: 'google.com/travel/flights',
6012
+ type: 'flights',
6013
+ structured: { flights: unique, route: query, source: 'Google Flights', sourceUrl: url },
6014
+ cleanContent: md.join('\n'),
6015
+ };
6016
+ }
@@ -991,7 +991,12 @@ export async function postProcess(ctx) {
991
991
  if (getDomainExtractor(fetchResult.url) && !ctx.domainApiHandled) {
992
992
  try {
993
993
  ctx.timer.mark('domainExtract');
994
- const ddResult = await extractDomainData(fetchResult.html, fetchResult.url);
994
+ // Try raw HTML first, then fall back to readability-processed content
995
+ // (some SPAs like Google Flights have data only after readability processing)
996
+ let ddResult = await extractDomainData(fetchResult.html, fetchResult.url);
997
+ if (!ddResult && ctx.content) {
998
+ ddResult = await extractDomainData(ctx.content, fetchResult.url);
999
+ }
995
1000
  ctx.timer.end('domainExtract');
996
1001
  if (ddResult) {
997
1002
  ctx.domainData = ddResult;
@@ -22,6 +22,7 @@ export interface SearchFallbackResult {
22
22
  }
23
23
  /**
24
24
  * Search for a URL using the best available search provider and extract the snippet.
25
+ * Richer fallback: tries multiple engines if the first returns < 100 tokens.
25
26
  * Returns the title, snippet, and any extracted product data.
26
27
  */
27
28
  export declare function searchFallback(url: string): Promise<SearchFallbackResult>;
@@ -7,7 +7,7 @@
7
7
  * Google CSE API → Brave API → Google stealth → DDG
8
8
  * This avoids direct HTML scraping which is blocked by CAPTCHAs on datacenter IPs.
9
9
  */
10
- import { getBestSearchProvider } from './search-provider.js';
10
+ import { getBestSearchProvider, DuckDuckGoProvider } from './search-provider.js';
11
11
  /**
12
12
  * Detect if a URL is likely a product page.
13
13
  */
@@ -121,8 +121,15 @@ function buildCachedContent(url, title, snippet, productData) {
121
121
  lines.push(`*⚠️ Limited content — original page blocked direct access. For full data, configure GOOGLE_SEARCH_KEY or BRAVE_SEARCH_KEY.*`);
122
122
  return lines.join('\n');
123
123
  }
124
+ /**
125
+ * Count approximate tokens in a string (1 token ≈ 4 chars).
126
+ */
127
+ function countTokens(text) {
128
+ return Math.ceil(text.length / 4);
129
+ }
124
130
  /**
125
131
  * Search for a URL using the best available search provider and extract the snippet.
132
+ * Richer fallback: tries multiple engines if the first returns < 100 tokens.
126
133
  * Returns the title, snippet, and any extracted product data.
127
134
  */
128
135
  export async function searchFallback(url) {
@@ -142,16 +149,42 @@ export async function searchFallback(url) {
142
149
  }
143
150
  const searchQuery = buildSearchQuery(url);
144
151
  const { provider, apiKey } = getBestSearchProvider();
145
- const results = await provider.searchWeb(searchQuery, {
146
- count: 3,
147
- apiKey,
148
- });
149
- if (!results || results.length === 0) {
150
- return emptyResult;
152
+ // Map provider ID to our source type
153
+ const sourceMap = {
154
+ duckduckgo: 'duckduckgo',
155
+ brave: 'google',
156
+ stealth: 'duckduckgo',
157
+ google: 'google',
158
+ };
159
+ // Try the primary (best) provider first
160
+ let results = await provider.searchWeb(searchQuery, { count: 5, apiKey }).catch(() => []);
161
+ // If primary returns sparse results (< 100 tokens), try DDG as secondary
162
+ const combinedSnippets = [];
163
+ let title = '';
164
+ let source = sourceMap[provider.id] ?? 'google';
165
+ if (results.length > 0) {
166
+ title = results[0].title?.trim() || '';
167
+ combinedSnippets.push(...results.map(r => r.snippet?.trim()).filter(Boolean));
168
+ }
169
+ const primaryTokens = countTokens(combinedSnippets.join(' '));
170
+ if (primaryTokens < 100) {
171
+ // Try DDG as a secondary engine to supplement
172
+ try {
173
+ const ddgProvider = new DuckDuckGoProvider();
174
+ const ddgResults = await ddgProvider.searchWeb(searchQuery, { count: 5 });
175
+ if (ddgResults.length > 0) {
176
+ if (!title)
177
+ title = ddgResults[0].title?.trim() || '';
178
+ if (source !== 'duckduckgo')
179
+ source = 'duckduckgo';
180
+ combinedSnippets.push(...ddgResults.map(r => r.snippet?.trim()).filter(Boolean));
181
+ }
182
+ }
183
+ catch { /* ignore secondary failure */ }
151
184
  }
152
- const topResult = results[0];
153
- const title = topResult.title?.trim() || '';
154
- const snippet = topResult.snippet?.trim() || '';
185
+ // Also try Google Cache URL as a last-resort content source
186
+ const allSnippets = [...new Set(combinedSnippets)]; // deduplicate
187
+ const snippet = allSnippets.slice(0, 3).join('\n\n');
155
188
  if (!title && !snippet) {
156
189
  return emptyResult;
157
190
  }
@@ -159,14 +192,6 @@ export async function searchFallback(url) {
159
192
  ? extractProductData(title, snippet)
160
193
  : undefined;
161
194
  const cachedContent = buildCachedContent(url, title, snippet, productData);
162
- // Map provider ID to our source type
163
- const sourceMap = {
164
- duckduckgo: 'duckduckgo',
165
- brave: 'google',
166
- stealth: 'duckduckgo',
167
- google: 'google',
168
- };
169
- const source = sourceMap[provider.id] ?? 'google';
170
195
  return {
171
196
  title,
172
197
  snippet,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "webpeel",
3
- "version": "0.21.72",
3
+ "version": "0.21.74",
4
4
  "description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
5
5
  "author": "Jake Liu",
6
6
  "license": "AGPL-3.0-only",
@@ -8,7 +8,7 @@
8
8
  "main": "./dist/index.js",
9
9
  "types": "./dist/index.d.ts",
10
10
  "bin": {
11
- "webpeel": "dist/cli.bundle.cjs"
11
+ "webpeel": "dist/cli.js"
12
12
  },
13
13
  "exports": {
14
14
  ".": {