webpeel 0.21.83 → 0.21.84

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,6 +18,7 @@ import { createRateLimitMiddleware, RateLimiter } from './middleware/rate-limit.
18
18
  import { createHealthRouter } from './routes/health.js';
19
19
  import { createFetchRouter } from './routes/fetch.js';
20
20
  import { createSearchRouter } from './routes/search.js';
21
+ import { createSmartSearchRouter } from './routes/smart-search.js';
21
22
  import { createUserRouter } from './routes/users.js';
22
23
  import { createStripeRouter, createBillingPortalRouter } from './routes/stripe.js';
23
24
  import { createOAuthRouter } from './routes/oauth.js';
@@ -315,6 +316,8 @@ export function createApp(config = {}) {
315
316
  app.use('/v1/screenshot', requireScope('full', 'read'));
316
317
  app.use(createScreenshotRouter(authStore));
317
318
  app.use(createSearchRouter(authStore));
319
+ // /v1/search/smart — intent detection + travel/commerce routing (POST)
320
+ app.use(createSmartSearchRouter(authStore));
318
321
  // /v1/research — lightweight research (search → fetch → compile), BYOK LLM optional
319
322
  app.use('/v1/research', requireScope('full', 'read'));
320
323
  app.use(createResearchRouter());
@@ -0,0 +1,35 @@
1
+ /**
2
+ * Smart Search endpoint — intent detection + travel/commerce routing
3
+ * POST /v1/search/smart
4
+ *
5
+ * Detects user intent from natural language and routes to the best source:
6
+ * - cars → Cars.com with browser rendering + structured extraction
7
+ * - flights → Google Flights with browser rendering + flight extractor
8
+ * - hotels → Google Hotels with browser rendering
9
+ * - rental → Kayak with browser rendering + rental extractor
10
+ * - restaurants → Yelp Fusion API extractor
11
+ * - general → SearXNG with smart enrichment (peel() for top 2)
12
+ */
13
+ import { Router } from 'express';
14
+ import '../types.js';
15
+ import { AuthStore } from '../auth-store.js';
16
+ export interface SearchIntent {
17
+ type: 'cars' | 'flights' | 'hotels' | 'rental' | 'restaurants' | 'general';
18
+ query: string;
19
+ params: Record<string, string>;
20
+ }
21
+ export interface SmartSearchResult {
22
+ type: 'cars' | 'flights' | 'hotels' | 'rental' | 'restaurants' | 'general';
23
+ source: string;
24
+ sourceUrl: string;
25
+ content: string;
26
+ title?: string;
27
+ domainData?: any;
28
+ structured?: any;
29
+ results?: any[];
30
+ tokens: number;
31
+ fetchTimeMs: number;
32
+ loadingMessage?: string;
33
+ }
34
+ export declare function detectSearchIntent(query: string): SearchIntent;
35
+ export declare function createSmartSearchRouter(authStore: AuthStore): Router;
@@ -0,0 +1,355 @@
1
+ /**
2
+ * Smart Search endpoint — intent detection + travel/commerce routing
3
+ * POST /v1/search/smart
4
+ *
5
+ * Detects user intent from natural language and routes to the best source:
6
+ * - cars → Cars.com with browser rendering + structured extraction
7
+ * - flights → Google Flights with browser rendering + flight extractor
8
+ * - hotels → Google Hotels with browser rendering
9
+ * - rental → Kayak with browser rendering + rental extractor
10
+ * - restaurants → Yelp Fusion API extractor
11
+ * - general → SearXNG with smart enrichment (peel() for top 2)
12
+ */
13
+ import { Router } from 'express';
14
+ import '../types.js'; // Augments Express.Request with requestId, auth
15
+ import { peel } from '../../index.js';
16
+ import { getBestSearchProvider, } from '../../core/search-provider.js';
17
+ import { getSourceCredibility } from '../../core/source-credibility.js';
18
+ // ─── Intent Detection ──────────────────────────────────────────────────────
19
+ export function detectSearchIntent(query) {
20
+ const q = query.toLowerCase();
21
+ // Cars: vehicle name/type + buying signals
22
+ if (/\b(car|cars|vehicle|sedan|suv|truck|honda|toyota|tesla|bmw|ford|chevy|chevrolet|nissan|hyundai|kia|mazda|subaru|lexus|audi|mercedes|volkswagen|jeep|dodge|ram|buick|cadillac|gmc|chrysler|acura|infiniti|volvo|porsche|mini|fiat|mitsubishi)\b/.test(q) &&
23
+ /\b(buy|cheap|under|budget|price|used|new|for sale|listing|deal)\b/.test(q)) {
24
+ const priceMatch = q.match(/(?:under|\$|budget|max)\s*\$?(\d[\d,]*)/);
25
+ const zipMatch = q.match(/\b(\d{5})\b/);
26
+ return {
27
+ type: 'cars',
28
+ query: q,
29
+ params: {
30
+ maxPrice: priceMatch ? priceMatch[1].replace(/,/g, '') : '',
31
+ zip: zipMatch ? zipMatch[1] : '10001',
32
+ },
33
+ };
34
+ }
35
+ // Flights: "flight", "fly", city-to-city patterns with dates
36
+ if (/\b(flight|flights|fly|flying|airline|plane)\b/.test(q) ||
37
+ (/\b(from|to)\b.*\b(to|from)\b/.test(q) && /\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|\d{1,2}\/\d{1,2})\b/.test(q))) {
38
+ return { type: 'flights', query: q, params: {} };
39
+ }
40
+ // Hotels: "hotel", "stay", "accommodation", etc. + location signal
41
+ if (/\b(hotel|hotels|motel|stay|accommodation|lodging|inn|resort|airbnb|hostel)\b/.test(q) &&
42
+ /\b(in|near|at|around|cheap|best|book)\b/.test(q)) {
43
+ return { type: 'hotels', query: q, params: {} };
44
+ }
45
+ // Car rental: "rent a car", "car rental", "rental car"
46
+ if (/\b(rent|rental)\b.*\b(car|vehicle|suv)\b/.test(q) ||
47
+ /\bcar\s+rental\b/.test(q)) {
48
+ return { type: 'rental', query: q, params: {} };
49
+ }
50
+ // Restaurants: food/dining + location/quality signal
51
+ if (/\b(restaurant|restaurants|food|eat|dinner|lunch|pizza|sushi|burger|cafe|bar|bistro|brunch|breakfast)\b/.test(q) &&
52
+ /\b(in|near|best|top|good|cheap)\b/.test(q)) {
53
+ return { type: 'restaurants', query: q, params: {} };
54
+ }
55
+ return { type: 'general', query: q, params: {} };
56
+ }
57
+ // ─── Intent Handlers ───────────────────────────────────────────────────────
58
+ async function handleCarSearch(intent) {
59
+ const t0 = Date.now();
60
+ // Build a clean keyword: strip the common car/buy/deal words to surface the actual vehicle name
61
+ const keyword = intent.query
62
+ .replace(/\b(buy|cheap|under|budget|price|used|new|for sale|listing|deal|car|cars)\b/gi, '')
63
+ .replace(/\s+/g, ' ')
64
+ .trim();
65
+ const params = new URLSearchParams({
66
+ keyword,
67
+ sort: 'list_price',
68
+ stock_type: 'all',
69
+ zip: intent.params.zip || '10001',
70
+ maximum_distance: '50',
71
+ });
72
+ if (intent.params.maxPrice)
73
+ params.set('list_price_max', intent.params.maxPrice);
74
+ const url = `https://www.cars.com/shopping/results/?${params.toString()}`;
75
+ try {
76
+ const result = await peel(url, { render: true, timeout: 25000 });
77
+ return {
78
+ type: 'cars',
79
+ source: 'Cars.com',
80
+ sourceUrl: url,
81
+ content: result.content,
82
+ title: result.title,
83
+ domainData: result.domainData,
84
+ structured: result.domainData?.structured,
85
+ tokens: result.tokens,
86
+ fetchTimeMs: Date.now() - t0,
87
+ };
88
+ }
89
+ catch (err) {
90
+ throw new Error(`Cars.com search failed: ${err.message}`);
91
+ }
92
+ }
93
+ async function handleFlightSearch(intent) {
94
+ const t0 = Date.now();
95
+ const gfUrl = `https://www.google.com/travel/flights?q=Flights+${encodeURIComponent(intent.query)}+one+way`;
96
+ try {
97
+ const result = await peel(gfUrl, { render: true, timeout: 30000 });
98
+ return {
99
+ type: 'flights',
100
+ source: 'Google Flights',
101
+ sourceUrl: gfUrl,
102
+ content: result.content,
103
+ title: result.title,
104
+ domainData: result.domainData,
105
+ structured: result.domainData?.structured,
106
+ tokens: result.tokens,
107
+ fetchTimeMs: Date.now() - t0,
108
+ };
109
+ }
110
+ catch (err) {
111
+ throw new Error(`Google Flights search failed: ${err.message}`);
112
+ }
113
+ }
114
+ async function handleHotelSearch(intent) {
115
+ const t0 = Date.now();
116
+ const ghUrl = `https://www.google.com/travel/hotels?q=${encodeURIComponent(intent.query)}`;
117
+ try {
118
+ const result = await peel(ghUrl, { render: true, timeout: 30000 });
119
+ return {
120
+ type: 'hotels',
121
+ source: 'Google Hotels',
122
+ sourceUrl: ghUrl,
123
+ content: result.content,
124
+ title: result.title,
125
+ domainData: result.domainData,
126
+ structured: result.domainData?.structured,
127
+ tokens: result.tokens,
128
+ fetchTimeMs: Date.now() - t0,
129
+ };
130
+ }
131
+ catch (err) {
132
+ throw new Error(`Google Hotels search failed: ${err.message}`);
133
+ }
134
+ }
135
+ async function handleRentalSearch(intent) {
136
+ const t0 = Date.now();
137
+ // Build Kayak car rental URL: /cars/<location>/<date-range>
138
+ // For simplicity, use a search-style URL that will browser-render fine
139
+ const encodedQuery = encodeURIComponent(intent.query.replace(/\b(rent|rental|car|a|vehicle|suv)\b/gi, '').trim() || intent.query);
140
+ const kayakUrl = `https://www.kayak.com/cars/${encodedQuery}/2025-04-10/2025-04-13/`;
141
+ try {
142
+ const result = await peel(kayakUrl, { render: true, timeout: 30000 });
143
+ return {
144
+ type: 'rental',
145
+ source: 'Kayak',
146
+ sourceUrl: kayakUrl,
147
+ content: result.content,
148
+ title: result.title,
149
+ domainData: result.domainData,
150
+ structured: result.domainData?.structured,
151
+ tokens: result.tokens,
152
+ fetchTimeMs: Date.now() - t0,
153
+ };
154
+ }
155
+ catch (err) {
156
+ throw new Error(`Kayak car rental search failed: ${err.message}`);
157
+ }
158
+ }
159
+ async function handleRestaurantSearch(intent) {
160
+ const t0 = Date.now();
161
+ const yelpUrl = `https://www.yelp.com/search?find_desc=${encodeURIComponent(intent.query.replace(/\b(best|top|good|cheap|near me)\b/gi, '').trim())}&find_loc=${encodeURIComponent('New York, NY')}`;
162
+ try {
163
+ const result = await peel(yelpUrl, { render: true, timeout: 25000 });
164
+ return {
165
+ type: 'restaurants',
166
+ source: 'Yelp',
167
+ sourceUrl: yelpUrl,
168
+ content: result.content,
169
+ title: result.title,
170
+ domainData: result.domainData,
171
+ structured: result.domainData?.structured,
172
+ tokens: result.tokens,
173
+ fetchTimeMs: Date.now() - t0,
174
+ };
175
+ }
176
+ catch (err) {
177
+ throw new Error(`Yelp search failed: ${err.message}`);
178
+ }
179
+ }
180
+ async function handleGeneralSearch(query) {
181
+ const t0 = Date.now();
182
+ const { provider: searchProvider } = getBestSearchProvider();
183
+ const rawResults = await searchProvider.searchWeb(query, { count: 10 });
184
+ const getDomain = (url) => {
185
+ try {
186
+ return new URL(url).hostname.replace(/^www\./, '');
187
+ }
188
+ catch {
189
+ return '';
190
+ }
191
+ };
192
+ const tierOrder = { official: 0, established: 1, community: 2, new: 3, suspicious: 4 };
193
+ let results = rawResults
194
+ .map((r) => {
195
+ const cred = getSourceCredibility(r.url);
196
+ return {
197
+ title: r.title,
198
+ url: r.url,
199
+ snippet: r.snippet,
200
+ domain: getDomain(r.url),
201
+ credibility: cred,
202
+ };
203
+ })
204
+ .sort((a, b) => {
205
+ const aTier = tierOrder[a.credibility?.tier || 'new'] ?? 3;
206
+ const bTier = tierOrder[b.credibility?.tier || 'new'] ?? 3;
207
+ return aTier - bTier;
208
+ })
209
+ .map((r, i) => ({ ...r, rank: i + 1 }));
210
+ // Enrich top 2 results with peel() for richer content
211
+ const top2 = results.slice(0, 2);
212
+ const enriched = await Promise.allSettled(top2.map(async (r) => {
213
+ try {
214
+ const peeled = await peel(r.url, { render: true, timeout: 15000, maxTokens: 2000 });
215
+ return { url: r.url, content: peeled.content?.substring(0, 1500), fetchTimeMs: peeled.elapsed };
216
+ }
217
+ catch {
218
+ return { url: r.url, content: null, fetchTimeMs: 0 };
219
+ }
220
+ }));
221
+ for (const settled of enriched) {
222
+ if (settled.status === 'fulfilled' && settled.value.content) {
223
+ const match = results.find((r) => r.url === settled.value.url);
224
+ if (match) {
225
+ match.content = settled.value.content;
226
+ match.fetchTimeMs = settled.value.fetchTimeMs;
227
+ }
228
+ }
229
+ }
230
+ const content = results
231
+ .map((r, i) => `${i + 1}. **${r.title}**\n ${r.url}\n ${r.snippet}`)
232
+ .join('\n\n');
233
+ return {
234
+ type: 'general',
235
+ source: 'Web Search',
236
+ sourceUrl: `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`,
237
+ content,
238
+ results,
239
+ tokens: content.split(/\s+/).length,
240
+ fetchTimeMs: Date.now() - t0,
241
+ };
242
+ }
243
+ // ─── Loading message by intent type ────────────────────────────────────────
244
+ function getLoadingMessage(type) {
245
+ const msgs = {
246
+ cars: 'Searching cars on Cars.com…',
247
+ flights: 'Finding flights on Google Flights…',
248
+ hotels: 'Looking up hotels on Google Hotels…',
249
+ rental: 'Searching rental cars on Kayak…',
250
+ restaurants: 'Finding restaurants on Yelp…',
251
+ general: 'Searching the web…',
252
+ };
253
+ return msgs[type] || 'Searching…';
254
+ }
255
+ // ─── Router ────────────────────────────────────────────────────────────────
256
+ export function createSmartSearchRouter(authStore) {
257
+ const router = Router();
258
+ router.post('/v1/search/smart', async (req, res) => {
259
+ try {
260
+ // Require authentication
261
+ const authId = req.auth?.keyInfo?.accountId || req.user?.userId;
262
+ if (!authId) {
263
+ res.status(401).json({
264
+ success: false,
265
+ error: {
266
+ type: 'authentication_required',
267
+ message: 'API key required. Get one free at https://app.webpeel.dev',
268
+ docs: 'https://webpeel.dev/docs/api-reference#authentication',
269
+ },
270
+ requestId: req.requestId,
271
+ });
272
+ return;
273
+ }
274
+ const { q, location, zip } = req.body;
275
+ if (!q || typeof q !== 'string' || !q.trim()) {
276
+ res.status(400).json({
277
+ success: false,
278
+ error: {
279
+ type: 'invalid_request',
280
+ message: 'Missing or invalid "q" field in request body',
281
+ hint: 'POST /v1/search/smart with JSON body: { "q": "your search query" }',
282
+ docs: 'https://webpeel.dev/docs/api-reference#smart-search',
283
+ },
284
+ requestId: req.requestId,
285
+ });
286
+ return;
287
+ }
288
+ const query = q.trim();
289
+ const intent = detectSearchIntent(query);
290
+ // Override zip from request body if provided
291
+ if (zip && intent.params) {
292
+ intent.params.zip = zip;
293
+ }
294
+ // Also try to extract location context from query if "location" is provided
295
+ if (location && intent.type === 'restaurants') {
296
+ // Will be passed in URL construction
297
+ intent.location = location;
298
+ }
299
+ let smartResult;
300
+ switch (intent.type) {
301
+ case 'cars':
302
+ smartResult = await handleCarSearch(intent);
303
+ break;
304
+ case 'flights':
305
+ smartResult = await handleFlightSearch(intent);
306
+ break;
307
+ case 'hotels':
308
+ smartResult = await handleHotelSearch(intent);
309
+ break;
310
+ case 'rental':
311
+ smartResult = await handleRentalSearch(intent);
312
+ break;
313
+ case 'restaurants':
314
+ smartResult = await handleRestaurantSearch(intent);
315
+ break;
316
+ default:
317
+ smartResult = await handleGeneralSearch(query);
318
+ }
319
+ // Add loading message hint for frontend UX
320
+ smartResult.loadingMessage = getLoadingMessage(intent.type);
321
+ // Track usage
322
+ const pgStore = authStore;
323
+ if (req.auth?.keyInfo?.key && typeof pgStore.trackUsage === 'function') {
324
+ if (typeof pgStore.trackBurstUsage === 'function') {
325
+ await pgStore.trackBurstUsage(req.auth.keyInfo.key);
326
+ }
327
+ if (!req.auth?.softLimited) {
328
+ await pgStore.trackUsage(req.auth.keyInfo.key, 'smart-search');
329
+ }
330
+ }
331
+ res.setHeader('X-Intent-Type', intent.type);
332
+ res.setHeader('X-Source', smartResult.source);
333
+ res.setHeader('X-Processing-Time', smartResult.fetchTimeMs.toString());
334
+ res.setHeader('Cache-Control', 'no-store');
335
+ res.json({
336
+ success: true,
337
+ data: smartResult,
338
+ });
339
+ }
340
+ catch (error) {
341
+ const err = error;
342
+ console.error('Smart search error:', err.message, err.stack);
343
+ res.status(500).json({
344
+ success: false,
345
+ error: {
346
+ type: 'smart_search_failed',
347
+ message: err.message || 'Smart search failed. Please try again.',
348
+ docs: 'https://webpeel.dev/docs/api-reference#smart-search',
349
+ },
350
+ requestId: req.requestId,
351
+ });
352
+ }
353
+ });
354
+ return router;
355
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "webpeel",
3
- "version": "0.21.83",
3
+ "version": "0.21.84",
4
4
  "description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
5
5
  "author": "Jake Liu",
6
6
  "license": "AGPL-3.0-only",