webpeel 0.21.89 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/core/cross-verify.d.ts +27 -0
  2. package/dist/core/cross-verify.js +93 -0
  3. package/dist/core/google-serp-parser.d.ts +82 -0
  4. package/dist/core/google-serp-parser.js +287 -0
  5. package/dist/core/search-engines.d.ts +25 -0
  6. package/dist/core/search-engines.js +182 -0
  7. package/dist/core/search-provider.d.ts +5 -1
  8. package/dist/core/search-provider.js +15 -2
  9. package/dist/core/vertical-search.d.ts +53 -0
  10. package/dist/core/vertical-search.js +231 -0
  11. package/dist/index.d.ts +5 -0
  12. package/dist/index.js +4 -0
  13. package/dist/server/app.js +1 -1
  14. package/dist/server/routes/search.js +199 -3
  15. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  16. package/dist/server/routes/smart-search/handlers/cars.js +99 -0
  17. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  18. package/dist/server/routes/smart-search/handlers/flights.js +69 -0
  19. package/dist/server/routes/smart-search/handlers/general.d.ts +2 -0
  20. package/dist/server/routes/smart-search/handlers/general.js +390 -0
  21. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  22. package/dist/server/routes/smart-search/handlers/hotels.js +85 -0
  23. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  24. package/dist/server/routes/smart-search/handlers/products.js +213 -0
  25. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  26. package/dist/server/routes/smart-search/handlers/rental.js +151 -0
  27. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  28. package/dist/server/routes/smart-search/handlers/restaurants.js +205 -0
  29. package/dist/server/routes/smart-search/index.d.ts +19 -0
  30. package/dist/server/routes/smart-search/index.js +508 -0
  31. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  32. package/dist/server/routes/smart-search/intent.js +109 -0
  33. package/dist/server/routes/smart-search/llm.d.ts +8 -0
  34. package/dist/server/routes/smart-search/llm.js +101 -0
  35. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  36. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  37. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  38. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  39. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  40. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  41. package/dist/server/routes/smart-search/types.d.ts +30 -0
  42. package/dist/server/routes/smart-search/types.js +1 -0
  43. package/dist/server/routes/smart-search/utils.d.ts +12 -0
  44. package/dist/server/routes/smart-search/utils.js +97 -0
  45. package/package.json +1 -1
@@ -0,0 +1,213 @@
1
+ import { peel } from '../../../../index.js';
2
+ import { getBestSearchProvider } from '../../../../core/search-provider.js';
3
+ import { addAffiliateTag, getStoreInfo, parsePrice, cleanProductTitle } from '../utils.js';
4
+ import { callLLMQuick, sanitizeSearchQuery, PROMPT_INJECTION_DEFENSE } from '../llm.js';
5
+ export async function handleProductSearch(intent) {
6
+ const t0 = Date.now();
7
+ // Build clean product keyword (strip noise words)
8
+ const keyword = intent.query
9
+ .replace(/\b(buy|shop|shopping|purchase|order|deal|discount|sale|price|cheap|cheapest|best price|under)\b/gi, '')
10
+ .replace(/\$\d[\d,]*/g, '')
11
+ .replace(/\s+/g, ' ')
12
+ .trim() || intent.query;
13
+ // Parallel site-specific searches
14
+ const { provider: searchProvider } = getBestSearchProvider();
15
+ const isBulk = /\b(bulk|wholesale|1000|500|case|pallet|box of|pack of|carton)\b/i.test(intent.query);
16
+ const isGrocery = intent.params.isGrocery === 'true' || /\b(grocery|milk|eggs|bread|butter|cheese|chicken|produce)\b/i.test(intent.query);
17
+ const isCollectible = /\b(pokemon|pokémon|magic\s*the\s*gathering|mtg|yu-?gi-?oh|trading\s*card|tcg|baseball\s*card|sports\s*card|collectible\s*card|figurine|funko|hot\s*wheels|lego\s*set|vintage\s*toy|action\s*figure|comic\s*book|vinyl\s*record|rare\s*coin|stamp\s*collection)\b/i.test(intent.query);
18
+ let rawResults;
19
+ let redditResults;
20
+ if (isCollectible) {
21
+ const [tcgSettled, ebaySettled, etsySettled, fbAmazonSettled, redditSettled] = await Promise.allSettled([
22
+ searchProvider.searchWeb(`${keyword} price site:tcgplayer.com`, { count: 2 }),
23
+ searchProvider.searchWeb(`${keyword} price site:ebay.com sold`, { count: 2 }),
24
+ searchProvider.searchWeb(`${keyword} price site:etsy.com OR site:mercari.com`, { count: 3 }),
25
+ searchProvider.searchWeb(`${keyword} price site:facebook.com/marketplace OR site:amazon.com`, { count: 3 }),
26
+ searchProvider.searchWeb(`${keyword} cheapest reddit where to buy`, { count: 3 }),
27
+ ]);
28
+ rawResults = [
29
+ ...(tcgSettled.status === 'fulfilled' ? tcgSettled.value : []),
30
+ ...(ebaySettled.status === 'fulfilled' ? ebaySettled.value : []),
31
+ ...(etsySettled.status === 'fulfilled' ? etsySettled.value : []),
32
+ ...(fbAmazonSettled.status === 'fulfilled' ? fbAmazonSettled.value : []),
33
+ ];
34
+ redditResults = redditSettled.status === 'fulfilled' ? redditSettled.value : [];
35
+ }
36
+ else if (isGrocery) {
37
+ // Search grocery-specific sites
38
+ const [instacartSettled, walmartGrocerySettled, freshSettled, redditGrocerySettled] = await Promise.allSettled([
39
+ searchProvider.searchWeb(`${keyword} price site:instacart.com`, { count: 2 }),
40
+ searchProvider.searchWeb(`${keyword} price site:walmart.com/grocery OR site:walmart.com`, { count: 2 }),
41
+ searchProvider.searchWeb(`${keyword} price site:freshdirect.com OR site:wholefoodsmarket.com`, { count: 3 }),
42
+ searchProvider.searchWeb(`${keyword} cheapest grocery store reddit`, { count: 3 }),
43
+ ]);
44
+ rawResults = [
45
+ ...(instacartSettled.status === 'fulfilled' ? instacartSettled.value : []),
46
+ ...(walmartGrocerySettled.status === 'fulfilled' ? walmartGrocerySettled.value : []),
47
+ ...(freshSettled.status === 'fulfilled' ? freshSettled.value : []),
48
+ ];
49
+ redditResults = redditGrocerySettled.status === 'fulfilled' ? redditGrocerySettled.value : [];
50
+ }
51
+ else {
52
+ const [amazonSettled, walmartSettled, bestbuySettled, targetSettled, redditSettled] = await Promise.allSettled([
53
+ searchProvider.searchWeb(`${keyword} site:amazon.com ${isBulk ? '' : 'price'}`, { count: 3 }),
54
+ searchProvider.searchWeb(`${keyword} site:walmart.com price`, { count: 2 }),
55
+ searchProvider.searchWeb(`${keyword} site:bestbuy.com OR site:target.com price`, { count: 2 }),
56
+ isBulk
57
+ ? searchProvider.searchWeb(`${keyword} wholesale bulk site:uline.com OR site:alibaba.com OR site:staples.com OR site:webstaurantstore.com`, { count: 3 })
58
+ : searchProvider.searchWeb(`${keyword} site:ebay.com OR site:etsy.com price`, { count: 3 }),
59
+ searchProvider.searchWeb(`${keyword} reddit review best worth it`, { count: 2 }),
60
+ ]);
61
+ rawResults = [
62
+ ...(amazonSettled.status === 'fulfilled' ? amazonSettled.value : []),
63
+ ...(walmartSettled.status === 'fulfilled' ? walmartSettled.value : []),
64
+ ...(bestbuySettled.status === 'fulfilled' ? bestbuySettled.value : []),
65
+ ...(targetSettled.status === 'fulfilled' ? targetSettled.value : []),
66
+ ];
67
+ redditResults = redditSettled.status === 'fulfilled' ? redditSettled.value : [];
68
+ }
69
+ // Parse structured product listings from search results
70
+ // DEEP SCRAPE: Visit top marketplace pages to extract real prices (collectibles only)
71
+ let uniqueListings = [];
72
+ if (isCollectible) {
73
+ const scrapableUrls = rawResults
74
+ .filter(r => r.url && (r.url.includes('tcgplayer.com') ||
75
+ r.url.includes('ebay.com') ||
76
+ r.url.includes('amazon.com') ||
77
+ r.url.includes('etsy.com') ||
78
+ r.url.includes('mercari.com')))
79
+ .slice(0, 4)
80
+ .map(r => r.url);
81
+ const deepResults = await Promise.allSettled(scrapableUrls.map(url => peel(url, { render: false, timeout: 5000 })
82
+ .then(result => ({ url, content: result.content, title: result.title, tokens: result.tokens }))
83
+ .catch(() => null)));
84
+ const deepListings = [];
85
+ for (const settled of deepResults) {
86
+ if (settled.status !== 'fulfilled' || !settled.value)
87
+ continue;
88
+ const { url, content: pageContent } = settled.value;
89
+ if (!pageContent)
90
+ continue;
91
+ const sourceName = url.includes('tcgplayer') ? 'TCGPlayer'
92
+ : url.includes('ebay') ? 'eBay'
93
+ : url.includes('amazon') ? 'Amazon'
94
+ : url.includes('etsy') ? 'Etsy'
95
+ : url.includes('mercari') ? 'Mercari'
96
+ : new URL(url).hostname;
97
+ const lines = pageContent.split('\n');
98
+ for (const line of lines) {
99
+ const pm = line.match(/\$(\d{1,6}(?:\.\d{2})?)/);
100
+ if (!pm)
101
+ continue;
102
+ const price = parseFloat(pm[1]);
103
+ if (price < 0.5 || price > 50000)
104
+ continue;
105
+ const titleText = line.replace(/\$[\d,.]+/g, '').replace(/[|·\-–—]/g, ' ').trim().slice(0, 100);
106
+ if (titleText.length < 5)
107
+ continue;
108
+ const conditionMatch = line.match(/\b(Near Mint|NM|Lightly Played|LP|Moderately Played|MP|Heavily Played|HP|Damaged|DMG|New|Used|Like New|Good|Very Good|Excellent)\b/i);
109
+ deepListings.push({
110
+ title: titleText,
111
+ price: '$' + price.toFixed(2),
112
+ priceValue: price,
113
+ url,
114
+ source: sourceName,
115
+ condition: conditionMatch ? conditionMatch[1] : undefined,
116
+ });
117
+ }
118
+ }
119
+ deepListings.sort((a, b) => a.priceValue - b.priceValue);
120
+ const seen = new Set();
121
+ uniqueListings = deepListings.filter(l => {
122
+ const key = l.price + l.source;
123
+ if (seen.has(key))
124
+ return false;
125
+ seen.add(key);
126
+ return true;
127
+ }).slice(0, 6);
128
+ }
129
+ let listings = rawResults
130
+ .filter(r => r.url && getStoreInfo(r.url) !== null)
131
+ .map(r => {
132
+ const storeInfo = getStoreInfo(r.url);
133
+ const textToSearch = `${r.title || ''} ${r.snippet || ''}`;
134
+ // Extract price from snippet/title
135
+ const price = parsePrice(textToSearch);
136
+ // Extract rating from snippet
137
+ const ratingMatch = (r.snippet || '').match(/(\d+(?:\.\d)?)\s*(?:out of 5|stars?|★)/i);
138
+ const rating = ratingMatch ? parseFloat(ratingMatch[1]) : undefined;
139
+ // Extract review count
140
+ const reviewMatch = (r.snippet || '').match(/([\d,]+)\s*(?:ratings?|reviews?)/i);
141
+ const reviewCount = reviewMatch ? reviewMatch[1].replace(/,/g, '') : undefined;
142
+ // Clean up title
143
+ const title = cleanProductTitle(r.title || '');
144
+ // Extract brand from title — common patterns: "Brand Name Product..." or known brands
145
+ const KNOWN_BRANDS = /\b(Sony|Bose|Apple|Samsung|LG|JBL|Sennheiser|Audio-Technica|Beats|Jabra|Anker|Soundcore|AKG|Shure|Skullcandy|Plantronics|HyperX|SteelSeries|Razer|Corsair|Logitech|Dell|HP|Lenovo|Asus|Acer|MSI|Microsoft|Google|Amazon|Kindle|Echo|Ring|Roku|Dyson|iRobot|Roomba|Ninja|KitchenAid|Instant Pot|Keurig|Breville|Philips|Panasonic|Canon|Nikon|GoPro|DJI|Fitbit|Garmin|Xiaomi|OnePlus|Nothing|Motorola|Nokia|TCL|Hisense|Vizio|Sonos|Marshall|Bang & Olufsen|B&O|Nike|Adidas|New Balance|Puma|Under Armour|North Face|Patagonia|Columbia|Levi's|Oakley|Ray-Ban|Gucci|Coach|Kate Spade|Michael Kors|Samsonite|Osprey|Yeti|Hydro Flask|Stanley|Weber|Traeger|DeWalt|Makita|Milwaukee|Bosch|Black\+Decker|Craftsman|Ryobi)\b/i;
146
+ const brandMatch = (r.title || '').match(KNOWN_BRANDS);
147
+ const brand = brandMatch ? brandMatch[1] : undefined;
148
+ // Image from SearXNG (imageUrl field if available)
149
+ const image = r.imageUrl ?? undefined;
150
+ return {
151
+ title,
152
+ brand,
153
+ price,
154
+ rating,
155
+ reviewCount,
156
+ url: addAffiliateTag(r.url),
157
+ snippet: r.snippet,
158
+ store: storeInfo.store,
159
+ image,
160
+ };
161
+ })
162
+ .slice(0, 10);
163
+ // Replace listings with deep-scraped results for collectibles (if any found)
164
+ if (isCollectible && uniqueListings.length > 0) {
165
+ listings = uniqueListings.map(l => ({
166
+ title: l.title,
167
+ brand: undefined,
168
+ price: l.price,
169
+ rating: undefined,
170
+ reviewCount: undefined,
171
+ url: l.url,
172
+ snippet: l.condition ? `Condition: ${l.condition}` : '',
173
+ store: l.source,
174
+ image: undefined,
175
+ }));
176
+ }
177
+ const amazonUrl = addAffiliateTag(`https://www.amazon.com/s?k=${encodeURIComponent(keyword)}`);
178
+ const content = listings.length > 0
179
+ ? `# 🛍️ Products — ${keyword}\n\n${listings.map((l, i) => `${i + 1}. **${l.title}** — ${l.price || 'see price'} [${l.store}](${l.url})\n ${l.snippet || ''}`).join('\n\n')}`
180
+ : `# 🛍️ Products — ${keyword}\n\nNo structured listings found. Try a more specific query.`;
181
+ // AI synthesis: recommend best value option
182
+ let answer;
183
+ if (process.env.OLLAMA_URL) {
184
+ const productInfo = listings.length > 0
185
+ ? listings.slice(0, 5).map(l => `${l.brand ? l.brand + ' ' : ''}${l.title}: ${l.price || 'N/A'} at ${l.store}${l.rating ? `, ${l.rating}★` : ''}${l.reviewCount ? ` (${l.reviewCount} reviews)` : ''}`).join(', ')
186
+ : 'no specific listings found';
187
+ const redditSnippets = redditResults.slice(0, 2).map(r => `${r.title}: ${r.snippet || ''}`).join('\n');
188
+ const deepPriceInfo = uniqueListings.length > 0
189
+ ? '\n\nReal prices found:\n' + uniqueListings.slice(0, 5).map((l, i) => `${i + 1}. ${l.title} — ${l.price} on ${l.source}${l.condition ? ` (${l.condition})` : ''}`).join('\n')
190
+ : '';
191
+ const aiPrompt = isCollectible
192
+ ? `${PROMPT_INJECTION_DEFENSE}You are a collectibles price expert. The user wants: "${sanitizeSearchQuery(intent.query)}". Products found: ${productInfo}.${deepPriceInfo} Reddit says: ${redditSnippets || 'none'}. List the cheapest options with exact prices, condition (near mint/lightly played/etc), and which store. Be specific with dollar amounts. Max 200 words. Cite sources inline as [1], [2], [3].`
193
+ : `${PROMPT_INJECTION_DEFENSE}You are a shopping advisor. The user wants: "${sanitizeSearchQuery(intent.query)}". Products found: ${productInfo}. Reddit says: ${redditSnippets || 'no reviews'}. ${listings.length > 0 ? 'Recommend the best value option. Mention the brand name, specific model, price, and store. Be specific.' : 'Give general buying advice with specific brand and model recommendations based on Reddit.'} Max 200 words. Cite sources inline as [1], [2], [3].`;
194
+ const aiText = await callLLMQuick(aiPrompt, { maxTokens: 250, timeoutMs: 5000, temperature: 0.4 });
195
+ if (aiText && aiText.length > 20)
196
+ answer = aiText;
197
+ }
198
+ return {
199
+ type: 'products',
200
+ source: listings.length > 0 ? 'Shopping + Reddit' : 'Web',
201
+ sourceUrl: amazonUrl,
202
+ content,
203
+ title: `${keyword} — Shopping`,
204
+ structured: { listings },
205
+ tokens: content.split(' ').length,
206
+ fetchTimeMs: Date.now() - t0,
207
+ ...(answer !== undefined ? { answer } : {}),
208
+ sources: [
209
+ { type: 'shopping', count: listings.length },
210
+ { type: 'reddit', threads: redditResults.slice(0, 3).map(r => ({ title: r.title, url: r.url, snippet: r.snippet })) },
211
+ ],
212
+ };
213
+ }
@@ -0,0 +1,2 @@
1
+ import type { SearchIntent, SmartSearchResult } from '../types.js';
2
+ export declare function handleRentalSearch(intent: SearchIntent): Promise<SmartSearchResult>;
@@ -0,0 +1,151 @@
1
+ import { getBestSearchProvider } from '../../../../core/search-provider.js';
2
+ import { addAffiliateTag, parsePrice, extractPriceValue } from '../utils.js';
3
+ import { callLLMQuick, PROMPT_INJECTION_DEFENSE } from '../llm.js';
4
+ export async function handleRentalSearch(intent) {
5
+ const t0 = Date.now();
6
+ // Extract location from query
7
+ const locMatch = intent.query.match(/\b(?:in|at|near|from|around)\s+(.+?)(?:\s+(?:for|under|from|to|between|\$|cheap|best).*)?$/i);
8
+ const location = locMatch ? locMatch[1].trim() : '';
9
+ // Extract dates if present
10
+ const dateMatch = intent.query.match(/(?:from|between)\s+(\w+\s+\d+)\s+(?:to|and|through|-)\s+(\w+\s+\d+)/i);
11
+ const dates = dateMatch ? { from: dateMatch[1], to: dateMatch[2] } : null;
12
+ // Extract budget if present
13
+ const budgetMatch = intent.query.match(/(?:under|\$|budget|max|cheaper than)\s*\$?(\d+)/i);
14
+ const budget = budgetMatch ? budgetMatch[1] : null;
15
+ const { provider: searchProvider } = getBestSearchProvider();
16
+ // Search for aggregator results that include prices + Reddit tips
17
+ const [aggregatorSettled, turoSettled, redditSettled] = await Promise.allSettled([
18
+ searchProvider.searchWeb(`car rental ${location || 'near me'} ${dates ? `${dates.from} to ${dates.to}` : ''} price cheapest site:kayak.com OR site:priceline.com OR site:expedia.com`, { count: 8 }),
19
+ searchProvider.searchWeb(`car rental ${location || ''} site:turo.com OR site:enterprise.com OR site:hertz.com`, { count: 3 }),
20
+ searchProvider.searchWeb(`car rental ${location || ''} reddit tips best deal cheapest`, { count: 2 }),
21
+ ]);
22
+ const rentalResults = [
23
+ ...(aggregatorSettled.status === 'fulfilled' ? aggregatorSettled.value : []),
24
+ ...(turoSettled.status === 'fulfilled' ? turoSettled.value : []),
25
+ ];
26
+ const redditResults = redditSettled.status === 'fulfilled' ? redditSettled.value : [];
27
+ // Known aggregators and direct providers
28
+ const RENTAL_SITES = {
29
+ 'kayak.com': { name: 'Kayak', type: 'aggregator' },
30
+ 'priceline.com': { name: 'Priceline', type: 'aggregator' },
31
+ 'cheapflights.com': { name: 'Cheapflights', type: 'aggregator' },
32
+ 'momondo.com': { name: 'Momondo', type: 'aggregator' },
33
+ 'skyscanner.com': { name: 'Skyscanner', type: 'aggregator' },
34
+ 'trip.com': { name: 'Trip.com', type: 'aggregator' },
35
+ 'carrentals.com': { name: 'CarRentals.com', type: 'aggregator' },
36
+ 'rentalcars.com': { name: 'RentalCars.com', type: 'aggregator' },
37
+ 'stressfreecarrental.com': { name: 'StressFree', type: 'aggregator' },
38
+ 'happycar.com': { name: 'HappyCar', type: 'aggregator' },
39
+ 'enterprise.com': { name: 'Enterprise', type: 'direct' },
40
+ 'hertz.com': { name: 'Hertz', type: 'direct' },
41
+ 'avis.com': { name: 'Avis', type: 'direct' },
42
+ 'budget.com': { name: 'Budget', type: 'direct' },
43
+ 'turo.com': { name: 'Turo', type: 'direct' },
44
+ 'sixt.com': { name: 'Sixt', type: 'direct' },
45
+ 'nationalcar.com': { name: 'National', type: 'direct' },
46
+ 'alamo.com': { name: 'Alamo', type: 'direct' },
47
+ 'costcotravel.com': { name: 'Costco Travel', type: 'direct' },
48
+ 'expedia.com': { name: 'Expedia', type: 'aggregator' },
49
+ };
50
+ const getSiteInfo = (url) => {
51
+ try {
52
+ const hostname = new URL(url).hostname.replace('www.', '');
53
+ for (const [domain, info] of Object.entries(RENTAL_SITES)) {
54
+ if (hostname === domain || hostname.endsWith('.' + domain)) {
55
+ return { company: info.name, siteType: info.type };
56
+ }
57
+ }
58
+ return null;
59
+ }
60
+ catch {
61
+ return null;
62
+ }
63
+ };
64
+ // Deduplicate by company — keep the most location-specific URL per company
65
+ const seen = new Map();
66
+ for (const r of rentalResults) {
67
+ const siteInfo = getSiteInfo(r.url);
68
+ if (!siteInfo)
69
+ continue;
70
+ const existing = seen.get(siteInfo.company);
71
+ // Prefer URLs that mention the location (more specific = better)
72
+ const locLower = (location || '').toLowerCase().replace(/\s+/g, '');
73
+ const urlLower = r.url.toLowerCase().replace(/[\s-]/g, '');
74
+ const isLocationSpecific = locLower && urlLower.includes(locLower.substring(0, 5));
75
+ if (!existing || isLocationSpecific) {
76
+ seen.set(siteInfo.company, r);
77
+ }
78
+ }
79
+ const listings = [...seen.entries()]
80
+ .map(([company, r]) => {
81
+ const siteInfo = getSiteInfo(r.url);
82
+ // Extract price from BOTH title and snippet; prefer title (more prominent = more accurate)
83
+ const titlePrice = parsePrice(r.title || '');
84
+ const snippetPrice = parsePrice(r.snippet || '');
85
+ const price = titlePrice || snippetPrice;
86
+ const priceValue = extractPriceValue(price);
87
+ return {
88
+ name: r.title?.replace(/\s*[-|–—].*$/, '').trim() || `${company} Car Rental`,
89
+ company,
90
+ siteType: siteInfo.siteType,
91
+ url: addAffiliateTag(r.url),
92
+ snippet: r.snippet || '',
93
+ price,
94
+ priceValue,
95
+ };
96
+ });
97
+ // Sort: aggregators with prices first (lowest price first), then aggregators without prices, then direct providers
98
+ listings.sort((a, b) => {
99
+ const aVal = a.priceValue ?? Infinity;
100
+ const bVal = b.priceValue ?? Infinity;
101
+ if (aVal !== bVal)
102
+ return aVal - bVal;
103
+ if (a.siteType !== b.siteType)
104
+ return a.siteType === 'aggregator' ? -1 : 1;
105
+ return 0;
106
+ });
107
+ const topListings = listings.slice(0, 6);
108
+ // Also add direct booking links for major providers if they didn't appear in search
109
+ const searchLocation = encodeURIComponent(location || 'New York');
110
+ const directLinks = [
111
+ { company: 'Kayak', siteType: 'aggregator', url: `https://www.kayak.com/cars/${searchLocation}`, name: 'Compare all rental companies' },
112
+ { company: 'Enterprise', siteType: 'direct', url: `https://www.enterprise.com/en/car-rental/locations/us.html`, name: 'Enterprise Rent-A-Car' },
113
+ { company: 'Hertz', siteType: 'direct', url: `https://www.hertz.com/rentacar/reservation/`, name: 'Hertz Car Rental' },
114
+ { company: 'Avis', siteType: 'direct', url: `https://www.avis.com/en/home`, name: 'Avis Car Rental' },
115
+ { company: 'Budget', siteType: 'direct', url: `https://www.budget.com/en/home`, name: 'Budget Car Rental' },
116
+ ].filter(d => !topListings.some(l => l.company === d.company));
117
+ const allListings = [
118
+ ...topListings,
119
+ ...directLinks.map(d => ({ ...d, snippet: '', price: undefined, priceValue: undefined })),
120
+ ];
121
+ // Build markdown content
122
+ const content = `# 🔑 Car Rentals${location ? ` — ${location}` : ''}${dates ? ` (${dates.from} to ${dates.to})` : ''}\n\n` +
123
+ allListings.map((l, i) => `${i + 1}. **${l.name}** — ${l.company}${l.price ? ` · ${l.price}/day` : ''}${l.siteType === 'aggregator' ? ' *(compares prices)*' : ''}\n ${l.snippet}`).join('\n\n');
124
+ // AI synthesis: use extracted prices + Reddit tips
125
+ let answer;
126
+ if (process.env.OLLAMA_URL) {
127
+ const priceInfo = allListings.filter(l => l.price).map(l => `${l.company}: ${l.price}/day`).join(', ');
128
+ const redditContent = redditResults.slice(0, 3).map(r => `${r.title}: ${r.snippet || ''}`).join('\n');
129
+ const aiPrompt = `${PROMPT_INJECTION_DEFENSE}You are a car rental advisor. ONLY use information from the sources below. User wants to rent a car${location ? ' in ' + location : ''}.${dates ? ` Dates: ${dates.from} to ${dates.to}.` : ''}${budget ? ` Budget: $${budget}/day.` : ''} Prices found: ${priceInfo || 'no prices extracted yet — refer to sites below'}. Reddit tips: ${redditContent || 'none'}. Give a 2-3 sentence recommendation based ONLY on sources. Mention the cheapest option and actual price. Max 200 words. Cite sources inline as [1], [2], [3].`;
130
+ const aiText = await callLLMQuick(aiPrompt, { maxTokens: 250, timeoutMs: 5000, temperature: 0.4 });
131
+ if (aiText && aiText.length > 20)
132
+ answer = aiText;
133
+ }
134
+ return {
135
+ type: 'rental',
136
+ source: 'Car Rentals + Reddit',
137
+ sourceUrl: `https://www.kayak.com/cars/${searchLocation}`,
138
+ content,
139
+ title: `Car Rentals${location ? ` in ${location}` : ''}`,
140
+ structured: { listings: allListings },
141
+ tokens: content.split(/\s+/).length,
142
+ fetchTimeMs: Date.now() - t0,
143
+ loadingMessage: 'Searching for rental cars...',
144
+ ...(answer !== undefined ? { answer } : {}),
145
+ sources: [
146
+ { type: 'rental', count: topListings.length },
147
+ { type: 'reddit', threads: redditResults.slice(0, 3).map(r => ({ title: r.title, url: r.url, snippet: r.snippet })) },
148
+ ],
149
+ };
150
+ }
151
+ // ─── Restaurant source fetchers ───────────────────────────────────────────
@@ -0,0 +1,2 @@
1
+ import type { SearchIntent, SmartSearchResult } from '../types.js';
2
+ export declare function handleRestaurantSearch(intent: SearchIntent, requestLanguage?: string): Promise<SmartSearchResult>;
@@ -0,0 +1,205 @@
1
+ import { localSearch } from '../../../../core/local-search.js';
2
+ import { callLLMQuick, sanitizeSearchQuery, PROMPT_INJECTION_DEFENSE } from '../llm.js';
3
+ import { fetchYelpResults } from '../sources/yelp.js';
4
+ import { fetchRedditResults } from '../sources/reddit.js';
5
+ import { fetchYouTubeResults } from '../sources/youtube.js';
6
+ export async function handleRestaurantSearch(intent, requestLanguage) {
7
+ const t0 = Date.now();
8
+ const location = intent.params.location || 'New York, NY';
9
+ const keyword = intent.query
10
+ .replace(/\b(best|top|good|cheap|affordable|near me|near|around|in|find|search|looking for)\b/gi, '')
11
+ .replace(/\s+/g, ' ')
12
+ .trim();
13
+ // ── Run ALL data sources in parallel for speed ──────────────────────────
14
+ // Previously sequential: localSearch → Yelp → Reddit+YouTube = 20-30s
15
+ // Now parallel: everything races at once = 8-10s max
16
+ const hasPlacesKey = !!process.env.GOOGLE_PLACES_API_KEY;
17
+ const [localSearchSettled, yelpSettled, redditSettled, youtubeSettled] = await Promise.allSettled([
18
+ // Google Places (primary when key available)
19
+ hasPlacesKey
20
+ ? Promise.race([
21
+ localSearch({ query: keyword || intent.query, location, language: requestLanguage, limit: 10 }),
22
+ new Promise((_, rej) => setTimeout(() => rej(new Error('local search timeout')), 8000)),
23
+ ])
24
+ : Promise.resolve(null),
25
+ // Yelp (secondary / fallback)
26
+ Promise.race([
27
+ fetchYelpResults(keyword, location).then(v => v),
28
+ new Promise((_, rej) => setTimeout(() => rej(new Error('yelp timeout')), 8000)),
29
+ ]),
30
+ // Reddit (best-effort supplementary)
31
+ Promise.race([
32
+ fetchRedditResults(keyword, location),
33
+ new Promise((_, rej) => setTimeout(() => rej(new Error('reddit timeout')), 6000)),
34
+ ]),
35
+ // YouTube (best-effort supplementary)
36
+ Promise.race([
37
+ fetchYouTubeResults(keyword, location),
38
+ new Promise((_, rej) => setTimeout(() => rej(new Error('youtube timeout')), 5000)),
39
+ ]),
40
+ ]);
41
+ const googlePlacesData = localSearchSettled.status === 'fulfilled' ? localSearchSettled.value : null;
42
+ if (googlePlacesData && googlePlacesData.results?.length > 0) {
43
+ console.log(`[smart-search] localSearch() returned ${googlePlacesData.results.length} results from ${googlePlacesData.source}`);
44
+ }
45
+ // Skip Yelp data if Google Places already has enough results
46
+ const skipYelp = googlePlacesData && googlePlacesData.results?.length >= 5;
47
+ const yelpData = (!skipYelp && yelpSettled.status === 'fulfilled') ? yelpSettled.value : null;
48
+ const redditData = redditSettled.status === 'fulfilled' ? redditSettled.value : null;
49
+ const youtubeData = youtubeSettled.status === 'fulfilled' ? youtubeSettled.value : null;
50
+ // Re-rank: composite score = rating * log2(reviewCount + 1)
51
+ // This naturally surfaces high-rated places with meaningful review volume
52
+ if (yelpData && yelpData.businesses.length > 0) {
53
+ yelpData.businesses.sort((a, b) => {
54
+ const scoreA = (a.rating || 0) * Math.log2((a.reviewCount || 0) + 1);
55
+ const scoreB = (b.rating || 0) * Math.log2((b.reviewCount || 0) + 1);
56
+ return scoreB - scoreA;
57
+ });
58
+ // For "best" queries, filter to minimum 50 reviews
59
+ const isBestQuery = /\b(best|top|highest rated)\b/i.test(intent.query);
60
+ if (isBestQuery) {
61
+ const filtered = yelpData.businesses.filter((b) => (b.reviewCount || 0) >= 50);
62
+ if (filtered.length >= 3) {
63
+ yelpData.businesses = filtered;
64
+ }
65
+ }
66
+ // Remove permanently closed businesses
67
+ yelpData.businesses = yelpData.businesses.filter((b) => !b.isClosed);
68
+ }
69
+ // ── Build markdown content from all sources ──────────────────────────
70
+ const contentParts = [];
71
+ // Google Places section (shown first when available — higher quality data)
72
+ if (googlePlacesData && googlePlacesData.results.length > 0) {
73
+ const priceLevelStr = (lvl) => lvl !== undefined ? '$'.repeat(Math.max(1, lvl)) : '';
74
+ contentParts.push(`## Google Places (${googlePlacesData.results.length} results)`);
75
+ googlePlacesData.results.slice(0, 10).forEach((b, i) => {
76
+ const name = b.name || 'Unknown';
77
+ const rating = b.rating ? `⭐${b.rating}` : '';
78
+ const reviews = b.reviewCount ? `(${b.reviewCount.toLocaleString()} reviews)` : '';
79
+ const price = b.priceLevel !== undefined ? ` · ${priceLevelStr(b.priceLevel)}` : '';
80
+ const openStatus = b.isOpen === true ? ' · 🟢 Open Now' : (b.isOpen === false ? ' · 🔴 Closed' : '');
81
+ const todayHours = b.hours?.length > 0 ? ` · 🕐 ${b.hours[0]}` : '';
82
+ const mapsLink = b.googleMapsUrl ? ` · [📍 Maps](${b.googleMapsUrl})` : '';
83
+ const addr = b.address || '';
84
+ contentParts.push(`${i + 1}. **${name}** ${rating} ${reviews}${price}${openStatus}${todayHours}${mapsLink}${addr ? ` — ${addr}` : ''}`);
85
+ });
86
+ contentParts.push('');
87
+ }
88
+ // Yelp section
89
+ if (yelpData) {
90
+ const businesses = yelpData.businesses;
91
+ if (businesses.length > 0) {
92
+ contentParts.push(`## Yelp (${businesses.length} restaurants)`);
93
+ businesses.slice(0, 10).forEach((b, i) => {
94
+ const name = b.name || b.title || 'Unknown';
95
+ const rating = b.rating ? `⭐${b.rating}` : '';
96
+ const reviews = b.reviewCount ? `(${b.reviewCount.toLocaleString()} reviews)` : '';
97
+ const address = b.address || b.location || '';
98
+ const price = b.price ? ` · ${b.price}` : '';
99
+ const openStatus = b.isClosed ? ' · ⛔ Permanently Closed' : (b.isOpenNow ? ' · 🟢 Open Now' : ' · 🔴 Closed');
100
+ const todayHours = b.todayHours && b.todayHours !== 'Closed today' ? ` · 🕐 ${b.todayHours}` : (b.todayHours === 'Closed today' ? ' · 🕐 Closed today' : '');
101
+ const txns = b.transactions?.length > 0 ? ` · ${b.transactions.map((t) => t === 'delivery' ? '🚗 Delivery' : t === 'pickup' ? '📦 Pickup' : t).join(' ')}` : '';
102
+ const mapsLink = b.googleMapsUrl ? ` · [📍 Google Maps](${b.googleMapsUrl})` : '';
103
+ contentParts.push(`${i + 1}. **${name}** ${rating} ${reviews}${price}${openStatus}${todayHours}${txns}${mapsLink}${address ? ` — ${address}` : ''}`);
104
+ });
105
+ }
106
+ else if (yelpData.content) {
107
+ contentParts.push(`## Yelp\n${yelpData.content.substring(0, 800)}`);
108
+ }
109
+ }
110
+ // Reddit section
111
+ if (redditData) {
112
+ contentParts.push('');
113
+ contentParts.push('## Reddit Recommendations');
114
+ if (redditData.thread) {
115
+ contentParts.push(`**${redditData.thread.title}**`);
116
+ if (redditData.thread.content) {
117
+ contentParts.push(redditData.thread.content.substring(0, 600));
118
+ }
119
+ }
120
+ if (redditData.otherThreads.length > 0) {
121
+ contentParts.push('');
122
+ redditData.otherThreads.slice(0, 3).forEach(t => {
123
+ contentParts.push(`- [${t.title}](${t.url}) — ${t.snippet || ''}`);
124
+ });
125
+ }
126
+ }
127
+ // YouTube section
128
+ if (youtubeData && youtubeData.videos.length > 0) {
129
+ contentParts.push('');
130
+ contentParts.push('## YouTube Reviews');
131
+ youtubeData.videos.forEach(v => {
132
+ contentParts.push(`🎬 [${v.title}](${v.url}) — ${v.snippet || ''}`);
133
+ });
134
+ }
135
+ const combinedContent = contentParts.join('\n');
136
+ // ── Build sources array for dashboard tabs ────────────────────────────
137
+ const sources = [];
138
+ if (googlePlacesData)
139
+ sources.push({ title: 'Google Places', url: `https://maps.google.com/?q=${encodeURIComponent(keyword + ' ' + location)}`, domain: 'google.com' });
140
+ if (yelpData)
141
+ sources.push({ title: 'Yelp', url: yelpData.url, domain: 'yelp.com' });
142
+ if (redditData?.thread)
143
+ sources.push({ title: redditData.thread.title, url: redditData.thread.url, domain: 'reddit.com' });
144
+ if (youtubeData?.videos[0])
145
+ sources.push({ title: youtubeData.videos[0].title, url: youtubeData.videos[0].url, domain: 'youtube.com' });
146
+ // ── AI Synthesis via Qwen/Ollama (optional) ───────────────────────────
147
+ // Build a Yelp-only summary first (fast, doesn't wait for Reddit)
148
+ // then enrich with Reddit/YouTube if they arrived
149
+ let answer;
150
+ const ollamaUrl = process.env.OLLAMA_URL;
151
+ if (ollamaUrl && yelpData && yelpData.businesses.length > 0) {
152
+ try {
153
+ const yelpLines = yelpData.businesses.slice(0, 3).map((b, i) => {
154
+ const openStatus = b.isClosed ? 'PERMANENTLY CLOSED' : (b.isOpenNow ? 'OPEN NOW' : 'Closed right now');
155
+ const txns = b.transactions?.length > 0 ? `Available: ${b.transactions.join(', ')}` : '';
156
+ const googleInfo = b.googleRating ? ` | Google: ⭐${b.googleRating} (${b.googleReviewCount} reviews)` : '';
157
+ return `[${i + 1}] ${b.name} ⭐${b.rating} (${b.reviewCount?.toLocaleString()} reviews) ${b.price || ''} — ${b.address}
158
+ ${openStatus} | Today: ${b.todayHours || 'hours not available'} | ${txns} | Categories: ${b.categories || ''}${googleInfo}
159
+ URL: ${b.url || ''}`;
160
+ }).join('\n');
161
+ const yelpCitations = yelpData.businesses.slice(0, 3).map((b, i) => `[${i + 1}] ${b.url || 'yelp.com'}`).join('\n');
162
+ const redditHint = redditData?.otherThreads?.slice(0, 2).map((t) => t.title).join('; ') || '';
163
+ const systemPrompt = `${PROMPT_INJECTION_DEFENSE}Recommend top 3 restaurants. For each: name with inline citation [1][2][3], why it's good, open/closed status, hours.
164
+ Cite sources inline using [1], [2], [3] notation matching the numbered sources. At the end, list Sources with their URLs.
165
+ Be specific. Max 200 words.
166
+ `;
167
+ const userMessage = `Query: ${sanitizeSearchQuery(intent.query)}\n\nTop restaurants:\n${yelpLines}${redditHint ? '\n\nReddit mentions: ' + redditHint : ''}\n\nSources:\n${yelpCitations}`;
168
+ const text = await callLLMQuick(`${systemPrompt}\n\n${userMessage}`, { maxTokens: 250, timeoutMs: 5000, temperature: 0.3 });
169
+ if (text)
170
+ answer = text;
171
+ }
172
+ catch (err) {
173
+ console.warn('[restaurant-search] LLM synthesis failed (graceful fallback):', err.message);
174
+ }
175
+ }
176
+ // If ALL sources completely failed, surface an error
177
+ if (!googlePlacesData && !yelpData && !redditData && !youtubeData) {
178
+ throw new Error('All restaurant sources failed');
179
+ }
180
+ const yelpUrl = yelpData?.url || `https://www.yelp.com/search?find_desc=${encodeURIComponent(keyword)}&find_loc=${encodeURIComponent(location)}`;
181
+ // Build source label based on what we actually used
182
+ const sourceLabel = [
183
+ googlePlacesData ? 'Google Places' : null,
184
+ yelpData ? 'Yelp' : null,
185
+ redditData ? 'Reddit' : null,
186
+ youtubeData ? 'YouTube' : null,
187
+ ].filter(Boolean).join(' + ') || 'Yelp + Reddit + YouTube';
188
+ // Merge structured data: prefer Google Places, fall back to Yelp
189
+ const structuredData = googlePlacesData
190
+ ? { businesses: googlePlacesData.results, googlePlaces: true }
191
+ : yelpData?.domainData?.structured;
192
+ return {
193
+ type: 'restaurants',
194
+ source: sourceLabel,
195
+ sourceUrl: yelpUrl,
196
+ content: combinedContent,
197
+ title: `${keyword} in ${location}`,
198
+ domainData: googlePlacesData ? { structured: structuredData } : yelpData?.domainData,
199
+ structured: structuredData,
200
+ tokens: combinedContent.split(/\s+/).length,
201
+ fetchTimeMs: Date.now() - t0,
202
+ ...(answer !== undefined ? { answer } : {}),
203
+ ...(sources.length > 0 ? { sources } : {}),
204
+ };
205
+ }
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Smart Search endpoint — intent detection + travel/commerce routing
3
+ * POST /v1/search/smart
4
+ *
5
+ * Detects user intent from natural language and routes to the best source:
6
+ * - cars → Cars.com with browser rendering + structured extraction
7
+ * - flights → Google Flights with browser rendering + flight extractor
8
+ * - hotels → Google Hotels with browser rendering
9
+ * - rental → Kayak with browser rendering + rental extractor
10
+ * - restaurants → Yelp Fusion API extractor
11
+ * - products → Amazon search with structured extraction
12
+ * - general → SearXNG with smart enrichment (peel() for top 3)
13
+ */
14
+ import { Router } from 'express';
15
+ import '../../types.js';
16
+ import { AuthStore } from '../../auth-store.js';
17
+ export type { SearchIntent, SmartSearchResult } from './types.js';
18
+ export { detectSearchIntent } from './intent.js';
19
+ export declare function createSmartSearchRouter(authStore: AuthStore): Router;