webpeel 0.20.2 → 0.20.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/dist/server/app.d.ts +14 -0
  2. package/dist/server/app.js +384 -0
  3. package/dist/server/auth-store.d.ts +27 -0
  4. package/dist/server/auth-store.js +88 -0
  5. package/dist/server/email-service.d.ts +21 -0
  6. package/dist/server/email-service.js +79 -0
  7. package/dist/server/job-queue.d.ts +100 -0
  8. package/dist/server/job-queue.js +145 -0
  9. package/dist/server/logger.d.ts +10 -0
  10. package/dist/server/logger.js +37 -0
  11. package/dist/server/middleware/auth.d.ts +28 -0
  12. package/dist/server/middleware/auth.js +221 -0
  13. package/dist/server/middleware/rate-limit.d.ts +24 -0
  14. package/dist/server/middleware/rate-limit.js +167 -0
  15. package/dist/server/middleware/url-validator.d.ts +15 -0
  16. package/dist/server/middleware/url-validator.js +186 -0
  17. package/dist/server/openapi.yaml +6418 -0
  18. package/dist/server/pg-auth-store.d.ts +132 -0
  19. package/dist/server/pg-auth-store.js +472 -0
  20. package/dist/server/pg-job-queue.d.ts +59 -0
  21. package/dist/server/pg-job-queue.js +375 -0
  22. package/dist/server/premium/domain-intel.d.ts +16 -0
  23. package/dist/server/premium/domain-intel.js +133 -0
  24. package/dist/server/premium/index.d.ts +17 -0
  25. package/dist/server/premium/index.js +35 -0
  26. package/dist/server/premium/swr-cache.d.ts +14 -0
  27. package/dist/server/premium/swr-cache.js +34 -0
  28. package/dist/server/routes/activity.d.ts +6 -0
  29. package/dist/server/routes/activity.js +74 -0
  30. package/dist/server/routes/answer.d.ts +5 -0
  31. package/dist/server/routes/answer.js +125 -0
  32. package/dist/server/routes/ask.d.ts +28 -0
  33. package/dist/server/routes/ask.js +229 -0
  34. package/dist/server/routes/batch.d.ts +6 -0
  35. package/dist/server/routes/batch.js +493 -0
  36. package/dist/server/routes/cli-usage.d.ts +6 -0
  37. package/dist/server/routes/cli-usage.js +127 -0
  38. package/dist/server/routes/compat.d.ts +23 -0
  39. package/dist/server/routes/compat.js +652 -0
  40. package/dist/server/routes/deep-fetch.d.ts +8 -0
  41. package/dist/server/routes/deep-fetch.js +57 -0
  42. package/dist/server/routes/demo.d.ts +24 -0
  43. package/dist/server/routes/demo.js +517 -0
  44. package/dist/server/routes/do.d.ts +8 -0
  45. package/dist/server/routes/do.js +72 -0
  46. package/dist/server/routes/extract.d.ts +8 -0
  47. package/dist/server/routes/extract.js +235 -0
  48. package/dist/server/routes/fetch.d.ts +7 -0
  49. package/dist/server/routes/fetch.js +999 -0
  50. package/dist/server/routes/health.d.ts +7 -0
  51. package/dist/server/routes/health.js +19 -0
  52. package/dist/server/routes/jobs.d.ts +7 -0
  53. package/dist/server/routes/jobs.js +573 -0
  54. package/dist/server/routes/mcp.d.ts +14 -0
  55. package/dist/server/routes/mcp.js +141 -0
  56. package/dist/server/routes/oauth.d.ts +9 -0
  57. package/dist/server/routes/oauth.js +396 -0
  58. package/dist/server/routes/playground.d.ts +17 -0
  59. package/dist/server/routes/playground.js +283 -0
  60. package/dist/server/routes/screenshot.d.ts +22 -0
  61. package/dist/server/routes/screenshot.js +816 -0
  62. package/dist/server/routes/search.d.ts +6 -0
  63. package/dist/server/routes/search.js +303 -0
  64. package/dist/server/routes/session.d.ts +15 -0
  65. package/dist/server/routes/session.js +397 -0
  66. package/dist/server/routes/stats.d.ts +6 -0
  67. package/dist/server/routes/stats.js +71 -0
  68. package/dist/server/routes/stripe.d.ts +15 -0
  69. package/dist/server/routes/stripe.js +294 -0
  70. package/dist/server/routes/users.d.ts +8 -0
  71. package/dist/server/routes/users.js +1671 -0
  72. package/dist/server/routes/watch.d.ts +15 -0
  73. package/dist/server/routes/watch.js +309 -0
  74. package/dist/server/routes/webhooks.d.ts +26 -0
  75. package/dist/server/routes/webhooks.js +170 -0
  76. package/dist/server/routes/youtube.d.ts +6 -0
  77. package/dist/server/routes/youtube.js +130 -0
  78. package/dist/server/sentry.d.ts +13 -0
  79. package/dist/server/sentry.js +38 -0
  80. package/dist/server/types.d.ts +15 -0
  81. package/dist/server/types.js +7 -0
  82. package/dist/server/utils/response.d.ts +44 -0
  83. package/dist/server/utils/response.js +69 -0
  84. package/dist/server/utils/sse.d.ts +22 -0
  85. package/dist/server/utils/sse.js +38 -0
  86. package/package.json +2 -1
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Search endpoint with caching — supports DuckDuckGo (default) and Brave (BYOK)
3
+ */
4
+ import { Router } from 'express';
5
+ import { AuthStore } from '../auth-store.js';
6
+ export declare function createSearchRouter(authStore: AuthStore): Router;
@@ -0,0 +1,303 @@
1
+ /**
2
+ * Search endpoint with caching — supports DuckDuckGo (default) and Brave (BYOK)
3
+ */
4
+ import { Router } from 'express';
5
+ import { fetch as undiciFetch } from 'undici';
6
+ import { load } from 'cheerio';
7
+ import { LRUCache } from 'lru-cache';
8
+ import { peel } from '../../index.js';
9
+ import { getSearchProvider, getBestSearchProvider, } from '../../core/search-provider.js';
10
+ export function createSearchRouter(authStore) {
11
+ const router = Router();
12
+ // LRU cache: 15 minute TTL, max 500 entries, 50MB total size
13
+ const cache = new LRUCache({
14
+ max: 500,
15
+ ttl: 15 * 60 * 1000, // 15 minutes
16
+ maxSize: 50 * 1024 * 1024, // 50MB
17
+ sizeCalculation: (entry) => {
18
+ return JSON.stringify(entry).length;
19
+ },
20
+ });
21
+ router.get('/v1/search', async (req, res) => {
22
+ try {
23
+ // Require authentication
24
+ const searchAuthId = req.auth?.keyInfo?.accountId || req.user?.userId;
25
+ if (!searchAuthId) {
26
+ res.status(401).json({ success: false, error: { type: 'authentication_required', message: 'API key required. Get one free at https://app.webpeel.dev', docs: 'https://webpeel.dev/docs/api-reference#authentication' }, requestId: req.requestId });
27
+ return;
28
+ }
29
+ const { q, count, scrapeResults, sources, categories, tbs, country, location } = req.query;
30
+ // --- Search provider (new: BYOK Brave support) ---
31
+ const providerParam = (req.query.provider || '').toLowerCase() || 'auto';
32
+ const validProviders = ['duckduckgo', 'brave', 'stealth', 'google'];
33
+ const providerId = validProviders.includes(providerParam)
34
+ ? providerParam
35
+ : providerParam === 'auto' ? 'auto' : 'duckduckgo';
36
+ // API key: query param, header, or empty
37
+ const searchApiKey = req.query.searchApiKey ||
38
+ req.headers['x-search-api-key'] ||
39
+ '';
40
+ // Validate query parameter
41
+ if (!q || typeof q !== 'string') {
42
+ res.status(400).json({ success: false, error: { type: 'invalid_request', message: 'Missing or invalid "q" parameter. Pass a search query: GET /v1/search?q=your+search+terms', hint: 'Example: curl "https://api.webpeel.dev/v1/search?q=latest+AI+news&count=5"', docs: 'https://webpeel.dev/docs/api-reference#search' }, requestId: req.requestId });
43
+ return;
44
+ }
45
+ // Parse and validate count
46
+ const resultCount = count ? parseInt(count, 10) : 5;
47
+ if (isNaN(resultCount) || resultCount < 1 || resultCount > 10) {
48
+ res.status(400).json({ success: false, error: { type: 'invalid_request', message: 'Invalid "count" parameter: must be between 1 and 10', hint: 'Use a count value between 1 and 10', docs: 'https://webpeel.dev/docs/errors#invalid_request' }, requestId: req.requestId });
49
+ return;
50
+ }
51
+ // Parse sources parameter (comma-separated: web,news,images)
52
+ const sourcesStr = sources || 'web';
53
+ const sourcesArray = sourcesStr.split(',').map(s => s.trim());
54
+ const shouldScrape = scrapeResults === 'true';
55
+ // Parse new search parameters
56
+ const categoriesStr = categories || '';
57
+ const tbsStr = tbs || '';
58
+ const countryStr = country || '';
59
+ const locationStr = location || '';
60
+ // Build cache key (include all parameters)
61
+ const cacheKey = `search:${providerId}:${q}:${resultCount}:${sourcesStr}:${shouldScrape}:${categoriesStr}:${tbsStr}:${countryStr}:${locationStr}`;
62
+ // Check cache
63
+ const cached = cache.get(cacheKey);
64
+ if (cached) {
65
+ res.setHeader('X-Cache', 'HIT');
66
+ res.setHeader('X-Cache-Age', Math.floor((Date.now() - cached.timestamp) / 1000).toString());
67
+ res.json({
68
+ success: true,
69
+ data: cached.data,
70
+ });
71
+ return;
72
+ }
73
+ const startTime = Date.now();
74
+ const data = {};
75
+ // Fetch web results via the search-provider abstraction
76
+ if (sourcesArray.includes('web')) {
77
+ // When provider=auto (default), use getBestSearchProvider which picks
78
+ // the best available provider based on configured API keys.
79
+ // When a specific provider is requested, use that directly.
80
+ let searchProvider;
81
+ let effectiveApiKey;
82
+ if (providerId === 'auto') {
83
+ const best = getBestSearchProvider();
84
+ searchProvider = best.provider;
85
+ effectiveApiKey = searchApiKey || best.apiKey;
86
+ }
87
+ else {
88
+ searchProvider = getSearchProvider(providerId);
89
+ effectiveApiKey = searchApiKey || undefined;
90
+ }
91
+ let providerResults = await searchProvider.searchWeb(q, {
92
+ count: resultCount,
93
+ apiKey: effectiveApiKey,
94
+ tbs: tbsStr || undefined,
95
+ country: countryStr || undefined,
96
+ location: locationStr || undefined,
97
+ });
98
+ // Map to SearchResult (with optional content field)
99
+ let results = providerResults.map(r => ({
100
+ title: r.title,
101
+ url: r.url,
102
+ snippet: r.snippet,
103
+ }));
104
+ // Apply category filtering if specified
105
+ if (categoriesStr) {
106
+ const categoryList = categoriesStr.split(',').map(c => c.trim().toLowerCase());
107
+ results = results.filter(result => {
108
+ const urlLower = result.url.toLowerCase();
109
+ return categoryList.some(category => {
110
+ switch (category) {
111
+ case 'github':
112
+ return urlLower.includes('github.com');
113
+ case 'pdf':
114
+ return urlLower.endsWith('.pdf');
115
+ case 'docs':
116
+ case 'documentation':
117
+ return urlLower.includes('/docs') || urlLower.includes('/documentation');
118
+ case 'blog':
119
+ return urlLower.includes('blog') || urlLower.includes('/post/');
120
+ case 'news':
121
+ return urlLower.includes('news') || urlLower.includes('/article/');
122
+ case 'video':
123
+ return urlLower.includes('youtube.com') || urlLower.includes('vimeo.com');
124
+ case 'social':
125
+ return urlLower.includes('twitter.com') || urlLower.includes('x.com') ||
126
+ urlLower.includes('facebook.com') || urlLower.includes('linkedin.com');
127
+ default:
128
+ return urlLower.includes(category);
129
+ }
130
+ });
131
+ });
132
+ }
133
+ // Scrape each result URL if requested
134
+ if (shouldScrape) {
135
+ for (const result of results) {
136
+ try {
137
+ const peelResult = await peel(result.url, {
138
+ format: 'markdown',
139
+ maxTokens: 2000,
140
+ });
141
+ result.content = peelResult.content;
142
+ }
143
+ catch (error) {
144
+ result.content = `[Failed to scrape: ${error.message}]`;
145
+ }
146
+ }
147
+ }
148
+ data.web = results;
149
+ }
150
+ // Fetch news results (DDG only — Brave news is not supported via HTML scraping)
151
+ if (sourcesArray.includes('news')) {
152
+ const newsUrl = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(q)}&t=news`;
153
+ const response = await undiciFetch(newsUrl, {
154
+ headers: {
155
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
156
+ },
157
+ });
158
+ if (response.ok) {
159
+ const html = await response.text();
160
+ const $ = load(html);
161
+ const results = [];
162
+ $('.result').each((_i, elem) => {
163
+ if (results.length >= resultCount)
164
+ return;
165
+ const $result = $(elem);
166
+ let title = $result.find('.result__title').text().trim();
167
+ const rawUrl = $result.find('.result__a').attr('href') || '';
168
+ let snippet = $result.find('.result__snippet').text().trim();
169
+ const sourceText = $result.find('.result__extras__url').text().trim();
170
+ if (!title || !rawUrl)
171
+ return;
172
+ let url = rawUrl;
173
+ try {
174
+ const ddgUrl = new URL(rawUrl, 'https://duckduckgo.com');
175
+ const uddg = ddgUrl.searchParams.get('uddg');
176
+ if (uddg) {
177
+ url = decodeURIComponent(uddg);
178
+ }
179
+ }
180
+ catch (e) {
181
+ if (process.env.DEBUG)
182
+ console.debug('[webpeel]', 'ddg url parse failed:', e instanceof Error ? e.message : e);
183
+ }
184
+ try {
185
+ const parsed = new URL(url);
186
+ if (!['http:', 'https:'].includes(parsed.protocol)) {
187
+ return;
188
+ }
189
+ url = parsed.href;
190
+ }
191
+ catch {
192
+ return;
193
+ }
194
+ title = title.slice(0, 200);
195
+ snippet = snippet.slice(0, 500);
196
+ results.push({
197
+ title,
198
+ url,
199
+ snippet,
200
+ source: sourceText.slice(0, 100),
201
+ });
202
+ });
203
+ data.news = results;
204
+ }
205
+ }
206
+ // Fetch image results (DDG only)
207
+ if (sourcesArray.includes('images')) {
208
+ const imagesUrl = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(q)}&t=images`;
209
+ const response = await undiciFetch(imagesUrl, {
210
+ headers: {
211
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
212
+ },
213
+ });
214
+ if (response.ok) {
215
+ const html = await response.text();
216
+ const $ = load(html);
217
+ const results = [];
218
+ $('.result').each((_i, elem) => {
219
+ if (results.length >= resultCount)
220
+ return;
221
+ const $result = $(elem);
222
+ const title = $result.find('.result__title').text().trim();
223
+ const thumbnail = $result.find('.result__image img').attr('src') || '';
224
+ const rawUrl = $result.find('.result__a').attr('href') || '';
225
+ const sourceText = $result.find('.result__extras__url').text().trim();
226
+ if (!title || !rawUrl || !thumbnail)
227
+ return;
228
+ let url = rawUrl;
229
+ try {
230
+ const ddgUrl = new URL(rawUrl, 'https://duckduckgo.com');
231
+ const uddg = ddgUrl.searchParams.get('uddg');
232
+ if (uddg) {
233
+ url = decodeURIComponent(uddg);
234
+ }
235
+ }
236
+ catch (e) {
237
+ if (process.env.DEBUG)
238
+ console.debug('[webpeel]', 'ddg url parse failed:', e instanceof Error ? e.message : e);
239
+ }
240
+ results.push({
241
+ title: title.slice(0, 200),
242
+ url,
243
+ thumbnail,
244
+ source: sourceText.slice(0, 100),
245
+ });
246
+ });
247
+ data.images = results;
248
+ }
249
+ }
250
+ const elapsed = Date.now() - startTime;
251
+ // Track usage
252
+ const isSoftLimited = req.auth?.softLimited === true;
253
+ const hasExtraUsage = req.auth?.extraUsageAvailable === true;
254
+ const pgStore = authStore;
255
+ if (req.auth?.keyInfo?.key && typeof pgStore.trackBurstUsage === 'function') {
256
+ // Track burst usage (always)
257
+ await pgStore.trackBurstUsage(req.auth.keyInfo.key);
258
+ // If soft-limited with extra usage available, charge to extra usage
259
+ if (isSoftLimited && hasExtraUsage) {
260
+ const extraResult = await pgStore.trackExtraUsage(req.auth.keyInfo.key, 'search', `search:${q}`, elapsed, 200);
261
+ if (extraResult.success) {
262
+ res.setHeader('X-Extra-Usage-Charged', `$${extraResult.cost.toFixed(4)}`);
263
+ res.setHeader('X-Extra-Usage-New-Balance', extraResult.newBalance.toFixed(2));
264
+ }
265
+ }
266
+ else if (!isSoftLimited) {
267
+ // Normal weekly usage tracking
268
+ await pgStore.trackUsage(req.auth.keyInfo.key, 'search');
269
+ }
270
+ }
271
+ // Cache results
272
+ cache.set(cacheKey, {
273
+ data,
274
+ timestamp: Date.now(),
275
+ });
276
+ // Add headers
277
+ res.setHeader('X-Cache', 'MISS');
278
+ res.setHeader('X-Credits-Used', '1');
279
+ res.setHeader('X-Processing-Time', elapsed.toString());
280
+ res.setHeader('X-Fetch-Type', 'search');
281
+ res.json({
282
+ success: true,
283
+ data,
284
+ });
285
+ }
286
+ catch (error) {
287
+ const err = error;
288
+ // SECURITY: Generic error message to prevent information disclosure
289
+ console.error('Search error:', err); // Log full error server-side
290
+ res.status(500).json({
291
+ success: false,
292
+ error: {
293
+ type: 'search_failed',
294
+ message: 'Search request failed. If using Brave provider, verify your API key. Otherwise try again.',
295
+ hint: 'Free search uses DuckDuckGo (no key required). For higher quality, add provider=brave&searchApiKey=YOUR_KEY',
296
+ docs: 'https://webpeel.dev/docs/api-reference#search',
297
+ },
298
+ requestId: req.requestId,
299
+ });
300
+ }
301
+ });
302
+ return router;
303
+ }
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Browser Session API — stateful Playwright sessions
3
+ *
4
+ * POST /v1/session → create session, returns { sessionId, expiresAt }
5
+ * GET /v1/session/:id → get current page content (Readability text)
6
+ * POST /v1/session/:id/navigate → navigate to URL { url }
7
+ * POST /v1/session/:id/act → execute PageActions array
8
+ * GET /v1/session/:id/screenshot → take screenshot (image/png)
9
+ * DELETE /v1/session/:id → close session
10
+ *
11
+ * Use cases: login flows, multi-step automation, UI testing.
12
+ * This is what Browserbase charges $500/mo for — built into WebPeel.
13
+ */
14
+ import { Router } from 'express';
15
+ export declare function createSessionRouter(): Router;