webpeel 0.20.2 → 0.20.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +384 -0
- package/dist/server/auth-store.d.ts +27 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/email-service.d.ts +21 -0
- package/dist/server/email-service.js +79 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/auth.d.ts +28 -0
- package/dist/server/middleware/auth.js +221 -0
- package/dist/server/middleware/rate-limit.d.ts +24 -0
- package/dist/server/middleware/rate-limit.js +167 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +186 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +132 -0
- package/dist/server/pg-auth-store.js +472 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/premium/domain-intel.d.ts +16 -0
- package/dist/server/premium/domain-intel.js +133 -0
- package/dist/server/premium/index.d.ts +17 -0
- package/dist/server/premium/index.js +35 -0
- package/dist/server/premium/swr-cache.d.ts +14 -0
- package/dist/server/premium/swr-cache.js +34 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +74 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +229 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +8 -0
- package/dist/server/routes/extract.js +235 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +999 -0
- package/dist/server/routes/health.d.ts +7 -0
- package/dist/server/routes/health.js +19 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +573 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +141 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +816 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +303 -0
- package/dist/server/routes/session.d.ts +15 -0
- package/dist/server/routes/session.js +397 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +294 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1671 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +13 -0
- package/dist/server/sentry.js +38 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/package.json +2 -1
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Search endpoint with caching — supports DuckDuckGo (default) and Brave (BYOK)
|
|
3
|
+
*/
|
|
4
|
+
import { Router } from 'express';
|
|
5
|
+
import { fetch as undiciFetch } from 'undici';
|
|
6
|
+
import { load } from 'cheerio';
|
|
7
|
+
import { LRUCache } from 'lru-cache';
|
|
8
|
+
import { peel } from '../../index.js';
|
|
9
|
+
import { getSearchProvider, getBestSearchProvider, } from '../../core/search-provider.js';
|
|
10
|
+
export function createSearchRouter(authStore) {
|
|
11
|
+
const router = Router();
|
|
12
|
+
// LRU cache: 15 minute TTL, max 500 entries, 50MB total size
|
|
13
|
+
const cache = new LRUCache({
|
|
14
|
+
max: 500,
|
|
15
|
+
ttl: 15 * 60 * 1000, // 15 minutes
|
|
16
|
+
maxSize: 50 * 1024 * 1024, // 50MB
|
|
17
|
+
sizeCalculation: (entry) => {
|
|
18
|
+
return JSON.stringify(entry).length;
|
|
19
|
+
},
|
|
20
|
+
});
|
|
21
|
+
router.get('/v1/search', async (req, res) => {
|
|
22
|
+
try {
|
|
23
|
+
// Require authentication
|
|
24
|
+
const searchAuthId = req.auth?.keyInfo?.accountId || req.user?.userId;
|
|
25
|
+
if (!searchAuthId) {
|
|
26
|
+
res.status(401).json({ success: false, error: { type: 'authentication_required', message: 'API key required. Get one free at https://app.webpeel.dev', docs: 'https://webpeel.dev/docs/api-reference#authentication' }, requestId: req.requestId });
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
const { q, count, scrapeResults, sources, categories, tbs, country, location } = req.query;
|
|
30
|
+
// --- Search provider (new: BYOK Brave support) ---
|
|
31
|
+
const providerParam = (req.query.provider || '').toLowerCase() || 'auto';
|
|
32
|
+
const validProviders = ['duckduckgo', 'brave', 'stealth', 'google'];
|
|
33
|
+
const providerId = validProviders.includes(providerParam)
|
|
34
|
+
? providerParam
|
|
35
|
+
: providerParam === 'auto' ? 'auto' : 'duckduckgo';
|
|
36
|
+
// API key: query param, header, or empty
|
|
37
|
+
const searchApiKey = req.query.searchApiKey ||
|
|
38
|
+
req.headers['x-search-api-key'] ||
|
|
39
|
+
'';
|
|
40
|
+
// Validate query parameter
|
|
41
|
+
if (!q || typeof q !== 'string') {
|
|
42
|
+
res.status(400).json({ success: false, error: { type: 'invalid_request', message: 'Missing or invalid "q" parameter. Pass a search query: GET /v1/search?q=your+search+terms', hint: 'Example: curl "https://api.webpeel.dev/v1/search?q=latest+AI+news&count=5"', docs: 'https://webpeel.dev/docs/api-reference#search' }, requestId: req.requestId });
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
// Parse and validate count
|
|
46
|
+
const resultCount = count ? parseInt(count, 10) : 5;
|
|
47
|
+
if (isNaN(resultCount) || resultCount < 1 || resultCount > 10) {
|
|
48
|
+
res.status(400).json({ success: false, error: { type: 'invalid_request', message: 'Invalid "count" parameter: must be between 1 and 10', hint: 'Use a count value between 1 and 10', docs: 'https://webpeel.dev/docs/errors#invalid_request' }, requestId: req.requestId });
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
// Parse sources parameter (comma-separated: web,news,images)
|
|
52
|
+
const sourcesStr = sources || 'web';
|
|
53
|
+
const sourcesArray = sourcesStr.split(',').map(s => s.trim());
|
|
54
|
+
const shouldScrape = scrapeResults === 'true';
|
|
55
|
+
// Parse new search parameters
|
|
56
|
+
const categoriesStr = categories || '';
|
|
57
|
+
const tbsStr = tbs || '';
|
|
58
|
+
const countryStr = country || '';
|
|
59
|
+
const locationStr = location || '';
|
|
60
|
+
// Build cache key (include all parameters)
|
|
61
|
+
const cacheKey = `search:${providerId}:${q}:${resultCount}:${sourcesStr}:${shouldScrape}:${categoriesStr}:${tbsStr}:${countryStr}:${locationStr}`;
|
|
62
|
+
// Check cache
|
|
63
|
+
const cached = cache.get(cacheKey);
|
|
64
|
+
if (cached) {
|
|
65
|
+
res.setHeader('X-Cache', 'HIT');
|
|
66
|
+
res.setHeader('X-Cache-Age', Math.floor((Date.now() - cached.timestamp) / 1000).toString());
|
|
67
|
+
res.json({
|
|
68
|
+
success: true,
|
|
69
|
+
data: cached.data,
|
|
70
|
+
});
|
|
71
|
+
return;
|
|
72
|
+
}
|
|
73
|
+
const startTime = Date.now();
|
|
74
|
+
const data = {};
|
|
75
|
+
// Fetch web results via the search-provider abstraction
|
|
76
|
+
if (sourcesArray.includes('web')) {
|
|
77
|
+
// When provider=auto (default), use getBestSearchProvider which picks
|
|
78
|
+
// the best available provider based on configured API keys.
|
|
79
|
+
// When a specific provider is requested, use that directly.
|
|
80
|
+
let searchProvider;
|
|
81
|
+
let effectiveApiKey;
|
|
82
|
+
if (providerId === 'auto') {
|
|
83
|
+
const best = getBestSearchProvider();
|
|
84
|
+
searchProvider = best.provider;
|
|
85
|
+
effectiveApiKey = searchApiKey || best.apiKey;
|
|
86
|
+
}
|
|
87
|
+
else {
|
|
88
|
+
searchProvider = getSearchProvider(providerId);
|
|
89
|
+
effectiveApiKey = searchApiKey || undefined;
|
|
90
|
+
}
|
|
91
|
+
let providerResults = await searchProvider.searchWeb(q, {
|
|
92
|
+
count: resultCount,
|
|
93
|
+
apiKey: effectiveApiKey,
|
|
94
|
+
tbs: tbsStr || undefined,
|
|
95
|
+
country: countryStr || undefined,
|
|
96
|
+
location: locationStr || undefined,
|
|
97
|
+
});
|
|
98
|
+
// Map to SearchResult (with optional content field)
|
|
99
|
+
let results = providerResults.map(r => ({
|
|
100
|
+
title: r.title,
|
|
101
|
+
url: r.url,
|
|
102
|
+
snippet: r.snippet,
|
|
103
|
+
}));
|
|
104
|
+
// Apply category filtering if specified
|
|
105
|
+
if (categoriesStr) {
|
|
106
|
+
const categoryList = categoriesStr.split(',').map(c => c.trim().toLowerCase());
|
|
107
|
+
results = results.filter(result => {
|
|
108
|
+
const urlLower = result.url.toLowerCase();
|
|
109
|
+
return categoryList.some(category => {
|
|
110
|
+
switch (category) {
|
|
111
|
+
case 'github':
|
|
112
|
+
return urlLower.includes('github.com');
|
|
113
|
+
case 'pdf':
|
|
114
|
+
return urlLower.endsWith('.pdf');
|
|
115
|
+
case 'docs':
|
|
116
|
+
case 'documentation':
|
|
117
|
+
return urlLower.includes('/docs') || urlLower.includes('/documentation');
|
|
118
|
+
case 'blog':
|
|
119
|
+
return urlLower.includes('blog') || urlLower.includes('/post/');
|
|
120
|
+
case 'news':
|
|
121
|
+
return urlLower.includes('news') || urlLower.includes('/article/');
|
|
122
|
+
case 'video':
|
|
123
|
+
return urlLower.includes('youtube.com') || urlLower.includes('vimeo.com');
|
|
124
|
+
case 'social':
|
|
125
|
+
return urlLower.includes('twitter.com') || urlLower.includes('x.com') ||
|
|
126
|
+
urlLower.includes('facebook.com') || urlLower.includes('linkedin.com');
|
|
127
|
+
default:
|
|
128
|
+
return urlLower.includes(category);
|
|
129
|
+
}
|
|
130
|
+
});
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
// Scrape each result URL if requested
|
|
134
|
+
if (shouldScrape) {
|
|
135
|
+
for (const result of results) {
|
|
136
|
+
try {
|
|
137
|
+
const peelResult = await peel(result.url, {
|
|
138
|
+
format: 'markdown',
|
|
139
|
+
maxTokens: 2000,
|
|
140
|
+
});
|
|
141
|
+
result.content = peelResult.content;
|
|
142
|
+
}
|
|
143
|
+
catch (error) {
|
|
144
|
+
result.content = `[Failed to scrape: ${error.message}]`;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
data.web = results;
|
|
149
|
+
}
|
|
150
|
+
// Fetch news results (DDG only — Brave news is not supported via HTML scraping)
|
|
151
|
+
if (sourcesArray.includes('news')) {
|
|
152
|
+
const newsUrl = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(q)}&t=news`;
|
|
153
|
+
const response = await undiciFetch(newsUrl, {
|
|
154
|
+
headers: {
|
|
155
|
+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
|
|
156
|
+
},
|
|
157
|
+
});
|
|
158
|
+
if (response.ok) {
|
|
159
|
+
const html = await response.text();
|
|
160
|
+
const $ = load(html);
|
|
161
|
+
const results = [];
|
|
162
|
+
$('.result').each((_i, elem) => {
|
|
163
|
+
if (results.length >= resultCount)
|
|
164
|
+
return;
|
|
165
|
+
const $result = $(elem);
|
|
166
|
+
let title = $result.find('.result__title').text().trim();
|
|
167
|
+
const rawUrl = $result.find('.result__a').attr('href') || '';
|
|
168
|
+
let snippet = $result.find('.result__snippet').text().trim();
|
|
169
|
+
const sourceText = $result.find('.result__extras__url').text().trim();
|
|
170
|
+
if (!title || !rawUrl)
|
|
171
|
+
return;
|
|
172
|
+
let url = rawUrl;
|
|
173
|
+
try {
|
|
174
|
+
const ddgUrl = new URL(rawUrl, 'https://duckduckgo.com');
|
|
175
|
+
const uddg = ddgUrl.searchParams.get('uddg');
|
|
176
|
+
if (uddg) {
|
|
177
|
+
url = decodeURIComponent(uddg);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
catch (e) {
|
|
181
|
+
if (process.env.DEBUG)
|
|
182
|
+
console.debug('[webpeel]', 'ddg url parse failed:', e instanceof Error ? e.message : e);
|
|
183
|
+
}
|
|
184
|
+
try {
|
|
185
|
+
const parsed = new URL(url);
|
|
186
|
+
if (!['http:', 'https:'].includes(parsed.protocol)) {
|
|
187
|
+
return;
|
|
188
|
+
}
|
|
189
|
+
url = parsed.href;
|
|
190
|
+
}
|
|
191
|
+
catch {
|
|
192
|
+
return;
|
|
193
|
+
}
|
|
194
|
+
title = title.slice(0, 200);
|
|
195
|
+
snippet = snippet.slice(0, 500);
|
|
196
|
+
results.push({
|
|
197
|
+
title,
|
|
198
|
+
url,
|
|
199
|
+
snippet,
|
|
200
|
+
source: sourceText.slice(0, 100),
|
|
201
|
+
});
|
|
202
|
+
});
|
|
203
|
+
data.news = results;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
// Fetch image results (DDG only)
|
|
207
|
+
if (sourcesArray.includes('images')) {
|
|
208
|
+
const imagesUrl = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(q)}&t=images`;
|
|
209
|
+
const response = await undiciFetch(imagesUrl, {
|
|
210
|
+
headers: {
|
|
211
|
+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
|
|
212
|
+
},
|
|
213
|
+
});
|
|
214
|
+
if (response.ok) {
|
|
215
|
+
const html = await response.text();
|
|
216
|
+
const $ = load(html);
|
|
217
|
+
const results = [];
|
|
218
|
+
$('.result').each((_i, elem) => {
|
|
219
|
+
if (results.length >= resultCount)
|
|
220
|
+
return;
|
|
221
|
+
const $result = $(elem);
|
|
222
|
+
const title = $result.find('.result__title').text().trim();
|
|
223
|
+
const thumbnail = $result.find('.result__image img').attr('src') || '';
|
|
224
|
+
const rawUrl = $result.find('.result__a').attr('href') || '';
|
|
225
|
+
const sourceText = $result.find('.result__extras__url').text().trim();
|
|
226
|
+
if (!title || !rawUrl || !thumbnail)
|
|
227
|
+
return;
|
|
228
|
+
let url = rawUrl;
|
|
229
|
+
try {
|
|
230
|
+
const ddgUrl = new URL(rawUrl, 'https://duckduckgo.com');
|
|
231
|
+
const uddg = ddgUrl.searchParams.get('uddg');
|
|
232
|
+
if (uddg) {
|
|
233
|
+
url = decodeURIComponent(uddg);
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
catch (e) {
|
|
237
|
+
if (process.env.DEBUG)
|
|
238
|
+
console.debug('[webpeel]', 'ddg url parse failed:', e instanceof Error ? e.message : e);
|
|
239
|
+
}
|
|
240
|
+
results.push({
|
|
241
|
+
title: title.slice(0, 200),
|
|
242
|
+
url,
|
|
243
|
+
thumbnail,
|
|
244
|
+
source: sourceText.slice(0, 100),
|
|
245
|
+
});
|
|
246
|
+
});
|
|
247
|
+
data.images = results;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
const elapsed = Date.now() - startTime;
|
|
251
|
+
// Track usage
|
|
252
|
+
const isSoftLimited = req.auth?.softLimited === true;
|
|
253
|
+
const hasExtraUsage = req.auth?.extraUsageAvailable === true;
|
|
254
|
+
const pgStore = authStore;
|
|
255
|
+
if (req.auth?.keyInfo?.key && typeof pgStore.trackBurstUsage === 'function') {
|
|
256
|
+
// Track burst usage (always)
|
|
257
|
+
await pgStore.trackBurstUsage(req.auth.keyInfo.key);
|
|
258
|
+
// If soft-limited with extra usage available, charge to extra usage
|
|
259
|
+
if (isSoftLimited && hasExtraUsage) {
|
|
260
|
+
const extraResult = await pgStore.trackExtraUsage(req.auth.keyInfo.key, 'search', `search:${q}`, elapsed, 200);
|
|
261
|
+
if (extraResult.success) {
|
|
262
|
+
res.setHeader('X-Extra-Usage-Charged', `$${extraResult.cost.toFixed(4)}`);
|
|
263
|
+
res.setHeader('X-Extra-Usage-New-Balance', extraResult.newBalance.toFixed(2));
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
else if (!isSoftLimited) {
|
|
267
|
+
// Normal weekly usage tracking
|
|
268
|
+
await pgStore.trackUsage(req.auth.keyInfo.key, 'search');
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
// Cache results
|
|
272
|
+
cache.set(cacheKey, {
|
|
273
|
+
data,
|
|
274
|
+
timestamp: Date.now(),
|
|
275
|
+
});
|
|
276
|
+
// Add headers
|
|
277
|
+
res.setHeader('X-Cache', 'MISS');
|
|
278
|
+
res.setHeader('X-Credits-Used', '1');
|
|
279
|
+
res.setHeader('X-Processing-Time', elapsed.toString());
|
|
280
|
+
res.setHeader('X-Fetch-Type', 'search');
|
|
281
|
+
res.json({
|
|
282
|
+
success: true,
|
|
283
|
+
data,
|
|
284
|
+
});
|
|
285
|
+
}
|
|
286
|
+
catch (error) {
|
|
287
|
+
const err = error;
|
|
288
|
+
// SECURITY: Generic error message to prevent information disclosure
|
|
289
|
+
console.error('Search error:', err); // Log full error server-side
|
|
290
|
+
res.status(500).json({
|
|
291
|
+
success: false,
|
|
292
|
+
error: {
|
|
293
|
+
type: 'search_failed',
|
|
294
|
+
message: 'Search request failed. If using Brave provider, verify your API key. Otherwise try again.',
|
|
295
|
+
hint: 'Free search uses DuckDuckGo (no key required). For higher quality, add provider=brave&searchApiKey=YOUR_KEY',
|
|
296
|
+
docs: 'https://webpeel.dev/docs/api-reference#search',
|
|
297
|
+
},
|
|
298
|
+
requestId: req.requestId,
|
|
299
|
+
});
|
|
300
|
+
}
|
|
301
|
+
});
|
|
302
|
+
return router;
|
|
303
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Browser Session API — stateful Playwright sessions
|
|
3
|
+
*
|
|
4
|
+
* POST /v1/session → create session, returns { sessionId, expiresAt }
|
|
5
|
+
* GET /v1/session/:id → get current page content (Readability text)
|
|
6
|
+
* POST /v1/session/:id/navigate → navigate to URL { url }
|
|
7
|
+
* POST /v1/session/:id/act → execute PageActions array
|
|
8
|
+
* GET /v1/session/:id/screenshot → take screenshot (image/png)
|
|
9
|
+
* DELETE /v1/session/:id → close session
|
|
10
|
+
*
|
|
11
|
+
* Use cases: login flows, multi-step automation, UI testing.
|
|
12
|
+
* This is what Browserbase charges $500/mo for — built into WebPeel.
|
|
13
|
+
*/
|
|
14
|
+
import { Router } from 'express';
|
|
15
|
+
export declare function createSessionRouter(): Router;
|