webpeel 0.20.2 → 0.20.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/dist/server/app.d.ts +14 -0
  2. package/dist/server/app.js +384 -0
  3. package/dist/server/auth-store.d.ts +27 -0
  4. package/dist/server/auth-store.js +88 -0
  5. package/dist/server/email-service.d.ts +21 -0
  6. package/dist/server/email-service.js +79 -0
  7. package/dist/server/job-queue.d.ts +100 -0
  8. package/dist/server/job-queue.js +145 -0
  9. package/dist/server/logger.d.ts +10 -0
  10. package/dist/server/logger.js +37 -0
  11. package/dist/server/middleware/auth.d.ts +28 -0
  12. package/dist/server/middleware/auth.js +221 -0
  13. package/dist/server/middleware/rate-limit.d.ts +24 -0
  14. package/dist/server/middleware/rate-limit.js +167 -0
  15. package/dist/server/middleware/url-validator.d.ts +15 -0
  16. package/dist/server/middleware/url-validator.js +186 -0
  17. package/dist/server/openapi.yaml +6418 -0
  18. package/dist/server/pg-auth-store.d.ts +132 -0
  19. package/dist/server/pg-auth-store.js +472 -0
  20. package/dist/server/pg-job-queue.d.ts +59 -0
  21. package/dist/server/pg-job-queue.js +375 -0
  22. package/dist/server/premium/domain-intel.d.ts +16 -0
  23. package/dist/server/premium/domain-intel.js +133 -0
  24. package/dist/server/premium/index.d.ts +17 -0
  25. package/dist/server/premium/index.js +35 -0
  26. package/dist/server/premium/swr-cache.d.ts +14 -0
  27. package/dist/server/premium/swr-cache.js +34 -0
  28. package/dist/server/routes/activity.d.ts +6 -0
  29. package/dist/server/routes/activity.js +74 -0
  30. package/dist/server/routes/answer.d.ts +5 -0
  31. package/dist/server/routes/answer.js +125 -0
  32. package/dist/server/routes/ask.d.ts +28 -0
  33. package/dist/server/routes/ask.js +229 -0
  34. package/dist/server/routes/batch.d.ts +6 -0
  35. package/dist/server/routes/batch.js +493 -0
  36. package/dist/server/routes/cli-usage.d.ts +6 -0
  37. package/dist/server/routes/cli-usage.js +127 -0
  38. package/dist/server/routes/compat.d.ts +23 -0
  39. package/dist/server/routes/compat.js +652 -0
  40. package/dist/server/routes/deep-fetch.d.ts +8 -0
  41. package/dist/server/routes/deep-fetch.js +57 -0
  42. package/dist/server/routes/demo.d.ts +24 -0
  43. package/dist/server/routes/demo.js +517 -0
  44. package/dist/server/routes/do.d.ts +8 -0
  45. package/dist/server/routes/do.js +72 -0
  46. package/dist/server/routes/extract.d.ts +8 -0
  47. package/dist/server/routes/extract.js +235 -0
  48. package/dist/server/routes/fetch.d.ts +7 -0
  49. package/dist/server/routes/fetch.js +999 -0
  50. package/dist/server/routes/health.d.ts +7 -0
  51. package/dist/server/routes/health.js +19 -0
  52. package/dist/server/routes/jobs.d.ts +7 -0
  53. package/dist/server/routes/jobs.js +573 -0
  54. package/dist/server/routes/mcp.d.ts +14 -0
  55. package/dist/server/routes/mcp.js +141 -0
  56. package/dist/server/routes/oauth.d.ts +9 -0
  57. package/dist/server/routes/oauth.js +396 -0
  58. package/dist/server/routes/playground.d.ts +17 -0
  59. package/dist/server/routes/playground.js +283 -0
  60. package/dist/server/routes/screenshot.d.ts +22 -0
  61. package/dist/server/routes/screenshot.js +816 -0
  62. package/dist/server/routes/search.d.ts +6 -0
  63. package/dist/server/routes/search.js +303 -0
  64. package/dist/server/routes/session.d.ts +15 -0
  65. package/dist/server/routes/session.js +397 -0
  66. package/dist/server/routes/stats.d.ts +6 -0
  67. package/dist/server/routes/stats.js +71 -0
  68. package/dist/server/routes/stripe.d.ts +15 -0
  69. package/dist/server/routes/stripe.js +294 -0
  70. package/dist/server/routes/users.d.ts +8 -0
  71. package/dist/server/routes/users.js +1671 -0
  72. package/dist/server/routes/watch.d.ts +15 -0
  73. package/dist/server/routes/watch.js +309 -0
  74. package/dist/server/routes/webhooks.d.ts +26 -0
  75. package/dist/server/routes/webhooks.js +170 -0
  76. package/dist/server/routes/youtube.d.ts +6 -0
  77. package/dist/server/routes/youtube.js +130 -0
  78. package/dist/server/sentry.d.ts +13 -0
  79. package/dist/server/sentry.js +38 -0
  80. package/dist/server/types.d.ts +15 -0
  81. package/dist/server/types.js +7 -0
  82. package/dist/server/utils/response.d.ts +44 -0
  83. package/dist/server/utils/response.js +69 -0
  84. package/dist/server/utils/sse.d.ts +22 -0
  85. package/dist/server/utils/sse.js +38 -0
  86. package/package.json +2 -1
@@ -0,0 +1,999 @@
1
+ /**
2
+ * Fetch endpoint with caching
3
+ */
4
+ import { Router } from 'express';
5
+ import '../types.js'; // Augments Express.Request with requestId
6
+ import { peel } from '../../index.js';
7
+ import { normalizeActions } from '../../core/actions.js';
8
+ import { extractInlineJson } from '../../core/extract-inline.js';
9
+ import { LRUCache } from 'lru-cache';
10
+ import { validateUrlForSSRF, SSRFError } from '../middleware/url-validator.js';
11
+ import { wantsEnvelope, successResponse } from '../utils/response.js';
12
+ import { getSchemaTemplate } from '../../core/schema-templates.js';
13
+ import { quickAnswer } from '../../core/quick-answer.js';
14
+ import { sendUsageAlertEmail } from '../email-service.js';
15
+ // ── Helper: extractive summarizer (TF-IDF-like sentence scoring) ─────────────
16
+ function extractSummary(content, maxWords = 150) {
17
+ if (!content)
18
+ return '';
19
+ const sentences = content
20
+ .split(/(?<=[.!?])\s+/)
21
+ .map(s => s.trim())
22
+ .filter(s => s.length > 40 && s.length < 600);
23
+ if (sentences.length === 0) {
24
+ const words = content.split(/\s+/);
25
+ return words.slice(0, maxWords).join(' ') + (words.length > maxWords ? '\u2026' : '');
26
+ }
27
+ if (sentences.length <= 3)
28
+ return sentences.join(' ');
29
+ const allWords = content.toLowerCase().split(/\W+/).filter(w => w.length > 3);
30
+ const wordFreq = {};
31
+ for (const w of allWords)
32
+ wordFreq[w] = (wordFreq[w] || 0) + 1;
33
+ const maxFreq = Math.max(1, ...Object.values(wordFreq));
34
+ const scored = sentences.map((sentence, idx) => {
35
+ const words = sentence.toLowerCase().split(/\W+/).filter(w => w.length > 3);
36
+ const score = words.reduce((sum, w) => sum + (wordFreq[w] || 0) / maxFreq, 0) / Math.max(1, words.length);
37
+ const posBonus = idx === 0 ? 0.3 : idx === sentences.length - 1 ? 0.1 : 0;
38
+ return { sentence, score: score + posBonus, idx };
39
+ });
40
+ scored.sort((a, b) => b.score - a.score);
41
+ const selected = [];
42
+ let wc = 0;
43
+ for (const item of scored) {
44
+ const itemWc = item.sentence.split(/\s+/).length;
45
+ if (wc + itemWc > maxWords * 1.3)
46
+ break;
47
+ selected.push(item);
48
+ wc += itemWc;
49
+ if (selected.length >= 5)
50
+ break;
51
+ }
52
+ selected.sort((a, b) => a.idx - b.idx);
53
+ return selected.map(s => s.sentence).join(' ');
54
+ }
55
+ // ── Helper: check usage and determine if alert email should be sent ───────────
56
+ async function checkAndTriggerAlert(pgStore, userId) {
57
+ const getCurrentWeek = () => {
58
+ const now = new Date();
59
+ const year = now.getUTCFullYear();
60
+ const jan4 = new Date(Date.UTC(year, 0, 4));
61
+ const weekNum = Math.ceil(((now.getTime() - jan4.getTime()) / 86400000 + jan4.getUTCDay() + 1) / 7);
62
+ return `${year}-W${String(weekNum).padStart(2, '0')}`;
63
+ };
64
+ const currentWeek = getCurrentWeek();
65
+ const result = await pgStore.pool.query(`SELECT u.email, u.name, u.tier, u.alert_threshold, u.alert_email, u.alert_sent_at,
66
+ u.weekly_limit,
67
+ COALESCE(SUM(wu.total_count), 0) AS total_used,
68
+ u.weekly_limit + COALESCE(MAX(wu.rollover_credits), 0) AS total_available
69
+ FROM users u
70
+ LEFT JOIN api_keys ak ON ak.user_id = u.id
71
+ LEFT JOIN weekly_usage wu ON wu.api_key_id = ak.id AND wu.week = $2
72
+ WHERE u.id = $1
73
+ GROUP BY u.id, u.email, u.name, u.tier, u.alert_threshold, u.alert_email, u.alert_sent_at, u.weekly_limit`, [userId, currentWeek]);
74
+ const row = result.rows[0];
75
+ if (!row || !row.alert_threshold)
76
+ return { shouldSendAlert: false };
77
+ const used = parseInt(row.total_used, 10) || 0;
78
+ const total = parseInt(row.total_available, 10) || row.weekly_limit || 999;
79
+ const usagePercent = total > 0 ? Math.round((used / total) * 100) : 0;
80
+ // Only alert if: crosses threshold AND haven't sent alert this week
81
+ const lastAlert = row.alert_sent_at ? new Date(row.alert_sent_at) : null;
82
+ const oneWeekAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000);
83
+ const alreadySentThisWeek = lastAlert !== null && lastAlert > oneWeekAgo;
84
+ return {
85
+ shouldSendAlert: usagePercent >= row.alert_threshold && !alreadySentThisWeek,
86
+ usagePercent,
87
+ used,
88
+ total,
89
+ userEmail: row.email,
90
+ userName: row.name || undefined,
91
+ userTier: row.tier,
92
+ alertEmail: row.alert_email || undefined,
93
+ };
94
+ }
95
+ const VALID_LLM_PROVIDERS = ['openai', 'anthropic', 'google'];
96
+ export function createFetchRouter(authStore) {
97
+ const router = Router();
98
+ // LRU cache: 5 minute TTL, max 500 entries, 100MB total size
99
+ const cache = new LRUCache({
100
+ max: 500,
101
+ ttl: 5 * 60 * 1000, // 5 minutes default
102
+ maxSize: 100 * 1024 * 1024, // 100MB
103
+ sizeCalculation: (entry) => {
104
+ return JSON.stringify(entry).length;
105
+ },
106
+ });
107
+ router.get('/v1/fetch', async (req, res) => {
108
+ try {
109
+ // Require authentication — API key or JWT session
110
+ const userId = req.auth?.keyInfo?.accountId || req.user?.userId;
111
+ if (!userId) {
112
+ res.status(401).json({
113
+ success: false,
114
+ error: {
115
+ type: 'unauthorized',
116
+ message: 'API key required. Get one free at https://app.webpeel.dev/keys',
117
+ hint: 'Get a free API key at https://app.webpeel.dev/keys',
118
+ docs: 'https://webpeel.dev/docs/errors#unauthorized',
119
+ },
120
+ requestId: req.requestId,
121
+ });
122
+ return;
123
+ }
124
+ const { url, render, wait, format, includeTags, excludeTags, images, location, languages, onlyMainContent, actions, maxAge, storeInCache, stream, noCache, cacheTtl, budget, question, summary, readable, stealth, screenshot, maxTokens, selector, exclude, fullPage, raw, lite, timeout, schema, detail, } = req.query;
125
+ const detailMode = detail || 'standard';
126
+ // Validate URL parameter
127
+ if (!url || typeof url !== 'string') {
128
+ res.status(400).json({
129
+ success: false,
130
+ error: {
131
+ type: 'invalid_request',
132
+ message: 'Missing or invalid "url" parameter.',
133
+ hint: 'Pass a URL as a query parameter: GET /v1/fetch?url=https://example.com',
134
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
135
+ },
136
+ requestId: req.requestId,
137
+ });
138
+ return;
139
+ }
140
+ // SECURITY: Validate URL format and length
141
+ if (url.length > 2048) {
142
+ res.status(400).json({
143
+ success: false,
144
+ error: {
145
+ type: 'invalid_url',
146
+ message: 'URL too long (max 2048 characters)',
147
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
148
+ },
149
+ requestId: req.requestId,
150
+ });
151
+ return;
152
+ }
153
+ try {
154
+ const parsed = new URL(url);
155
+ // Normalize URL for consistent caching
156
+ const normalizedUrl = parsed.href;
157
+ // Use normalized URL for cache key
158
+ if (normalizedUrl !== url) {
159
+ // URL was normalized, update for caching
160
+ }
161
+ }
162
+ catch {
163
+ res.status(400).json({
164
+ success: false,
165
+ error: {
166
+ type: 'invalid_url',
167
+ message: 'Invalid URL format',
168
+ hint: 'Ensure the URL includes a scheme (https://) and a valid hostname',
169
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
170
+ },
171
+ requestId: req.requestId,
172
+ });
173
+ return;
174
+ }
175
+ // SECURITY: Validate URL to prevent SSRF attacks
176
+ try {
177
+ validateUrlForSSRF(url);
178
+ }
179
+ catch (error) {
180
+ if (error instanceof SSRFError) {
181
+ res.status(400).json({
182
+ success: false,
183
+ error: {
184
+ type: 'forbidden_url',
185
+ message: 'Cannot fetch localhost, private networks, or non-HTTP URLs',
186
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
187
+ },
188
+ requestId: req.requestId,
189
+ });
190
+ return;
191
+ }
192
+ throw error;
193
+ }
194
+ // Parse actions query param (JSON-encoded array)
195
+ let parsedActions;
196
+ if (actions && typeof actions === 'string') {
197
+ try {
198
+ const raw = JSON.parse(actions);
199
+ parsedActions = normalizeActions(raw);
200
+ }
201
+ catch (e) {
202
+ res.status(400).json({
203
+ success: false,
204
+ error: {
205
+ type: 'invalid_request',
206
+ message: 'Invalid "actions" parameter: must be a valid JSON array',
207
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
208
+ },
209
+ requestId: req.requestId,
210
+ });
211
+ return;
212
+ }
213
+ }
214
+ // Build cache key (include new parameters)
215
+ const actionsKey = parsedActions ? JSON.stringify(parsedActions) : '';
216
+ const cacheKey = `fetch:${url}:${render}:${wait}:${format}:${includeTags}:${excludeTags}:${images}:${location}:${languages}:${onlyMainContent}:${stream}:${actionsKey}:${budget}:${question}:${summary}:${readable}:${stealth}:${screenshot}:${maxTokens}:${selector}:${exclude}:${fullPage}:${raw}`;
217
+ // Cache bypass: ?noCache=true or Cache-Control: no-cache header
218
+ const bypassCache = noCache === 'true' || req.headers['cache-control'] === 'no-cache';
219
+ // Per-request TTL (cacheTtl in seconds, default 300s = 5 min)
220
+ const cacheTtlMs = cacheTtl !== undefined
221
+ ? parseInt(cacheTtl, 10) * 1000
222
+ : 5 * 60 * 1000;
223
+ // Check cache (with maxAge support)
224
+ const maxAgeMs = maxAge !== undefined ? parseInt(maxAge, 10) : 172800000; // Default 2 days
225
+ if (!bypassCache) {
226
+ const cached = cache.get(cacheKey);
227
+ if (cached && maxAgeMs > 0) {
228
+ const cacheAge = Date.now() - cached.timestamp;
229
+ if (cacheAge < maxAgeMs && cacheAge < cacheTtlMs) {
230
+ res.setHeader('X-Cache', 'HIT');
231
+ res.setHeader('X-Cache-Age', Math.floor(cacheAge / 1000).toString());
232
+ if (wantsEnvelope(req)) {
233
+ successResponse(res, cached.result, {
234
+ requestId: req.requestId,
235
+ cached: true,
236
+ });
237
+ }
238
+ else {
239
+ res.json(cached.result);
240
+ }
241
+ return;
242
+ }
243
+ }
244
+ }
245
+ // Parse options
246
+ const isSoftLimited = req.auth?.softLimited === true;
247
+ const hasExtraUsage = req.auth?.extraUsageAvailable === true;
248
+ // Parse tag arrays from comma-separated strings
249
+ const includeTagsArray = includeTags
250
+ ? includeTags.split(',').map(t => t.trim()).filter(Boolean)
251
+ : undefined;
252
+ const excludeTagsArray = excludeTags
253
+ ? excludeTags.split(',').map(t => t.trim()).filter(Boolean)
254
+ : undefined;
255
+ const languagesArray = languages
256
+ ? languages.split(',').map(l => l.trim()).filter(Boolean)
257
+ : undefined;
258
+ // onlyMainContent is a shortcut for common include tags
259
+ const finalIncludeTags = onlyMainContent === 'true'
260
+ ? ['main', 'article', '.content', '#content']
261
+ : includeTagsArray;
262
+ // When actions are present, force browser mode (skip HTTP fast path)
263
+ const hasActions = parsedActions && parsedActions.length > 0;
264
+ const shouldRender = hasActions || render === 'true';
265
+ const options = {
266
+ // SOFT LIMIT: When over quota AND no extra usage, force HTTP-only
267
+ // If extra usage is available, allow full functionality
268
+ // Exception: actions always require render
269
+ render: (isSoftLimited && !hasExtraUsage && !hasActions) ? false : shouldRender,
270
+ wait: (isSoftLimited && !hasExtraUsage) ? 0 : (wait ? parseInt(wait, 10) : undefined),
271
+ format: format || 'markdown',
272
+ stream: stream === 'true',
273
+ includeTags: finalIncludeTags,
274
+ excludeTags: excludeTagsArray,
275
+ images: images === 'true',
276
+ actions: parsedActions,
277
+ location: location || languagesArray ? {
278
+ country: location,
279
+ languages: languagesArray,
280
+ } : undefined,
281
+ budget: budget ? parseInt(budget, 10) : undefined,
282
+ question: question,
283
+ readable: readable === 'true',
284
+ stealth: (isSoftLimited && !hasExtraUsage) ? false : stealth === 'true',
285
+ screenshot: (isSoftLimited && !hasExtraUsage) ? false : screenshot === 'true',
286
+ maxTokens: maxTokens ? parseInt(maxTokens, 10) : undefined,
287
+ selector: selector,
288
+ exclude: exclude ? exclude.split(',').map(s => s.trim()).filter(Boolean) : undefined,
289
+ fullPage: fullPage === 'true',
290
+ raw: raw === 'true',
291
+ lite: lite === 'true',
292
+ timeout: timeout ? parseInt(timeout, 10) : undefined,
293
+ };
294
+ // Auto-budget: default to 4000 tokens for API requests when no budget specified
295
+ // Opt-out: budget=0 explicitly disables. Lite mode disables auto-budget.
296
+ if (options.budget === undefined && !options.lite) {
297
+ options.budget = 4000;
298
+ res.setHeader('X-Auto-Budget', '4000');
299
+ }
300
+ // Inform the user if their request was degraded
301
+ if (isSoftLimited && !hasExtraUsage && render === 'true' && !hasActions) {
302
+ res.setHeader('X-Degraded', 'render=true downgraded to HTTP-only (quota exceeded)');
303
+ }
304
+ if (isSoftLimited && !hasExtraUsage && stealth === 'true') {
305
+ res.setHeader('X-Degraded', 'stealth=true downgraded (quota exceeded)');
306
+ }
307
+ if (isSoftLimited && !hasExtraUsage && screenshot === 'true') {
308
+ res.setHeader('X-Degraded', 'screenshot=true downgraded (quota exceeded)');
309
+ }
310
+ // Validate wait parameter
311
+ if (options.wait !== undefined && (isNaN(options.wait) || options.wait < 0 || options.wait > 60000)) {
312
+ res.status(400).json({
313
+ success: false,
314
+ error: {
315
+ type: 'invalid_request',
316
+ message: 'Invalid "wait" parameter: must be between 0 and 60000ms',
317
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
318
+ },
319
+ requestId: req.requestId,
320
+ });
321
+ return;
322
+ }
323
+ // Validate format parameter
324
+ if (!['markdown', 'text', 'html', 'clean'].includes(options.format || '')) {
325
+ res.status(400).json({
326
+ success: false,
327
+ error: {
328
+ type: 'invalid_request',
329
+ message: 'Invalid "format" parameter: must be "markdown", "text", "html", or "clean"',
330
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
331
+ },
332
+ requestId: req.requestId,
333
+ });
334
+ return;
335
+ }
336
+ const shouldStream = options.stream === true;
337
+ if (shouldStream) {
338
+ res.setHeader('X-Stream', 'true');
339
+ if (typeof res.flushHeaders === 'function') {
340
+ res.flushHeaders();
341
+ }
342
+ }
343
+ // Fetch content
344
+ const startTime = Date.now();
345
+ const result = await peel(url, options);
346
+ const elapsed = Date.now() - startTime;
347
+ // --- BM25 Schema Template Extraction (GET, no LLM needed) ---
348
+ if (schema && typeof schema === 'string' && result.content) {
349
+ const template = getSchemaTemplate(schema);
350
+ if (template) {
351
+ const { quickAnswer } = await import('../../core/quick-answer.js');
352
+ const { smartExtractSchemaFields } = await import('../../core/schema-postprocess.js');
353
+ const extracted = smartExtractSchemaFields(result.content, template.fields, quickAnswer, {
354
+ pageTitle: result.title,
355
+ pageUrl: result.url,
356
+ metadata: result.metadata,
357
+ });
358
+ result.extracted = extracted;
359
+ }
360
+ }
361
+ // Determine fetch type from the result method
362
+ const fetchType = result.method === 'stealth' ? 'stealth' :
363
+ result.method === 'browser' ? 'stealth' : 'basic';
364
+ // Log request to database (PostgreSQL only)
365
+ const pgStore = authStore;
366
+ // Log usage for BOTH API key auth AND JWT session auth
367
+ const logUserId = req.auth?.keyInfo?.accountId || req.user?.userId;
368
+ if (logUserId && typeof pgStore.pool !== 'undefined') {
369
+ pgStore.pool.query(`INSERT INTO usage_logs
370
+ (user_id, endpoint, url, method, processing_time_ms, status_code, ip_address, user_agent, tokens_used)
371
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)`, [
372
+ logUserId,
373
+ 'fetch',
374
+ url,
375
+ fetchType,
376
+ elapsed,
377
+ 200,
378
+ req.ip || req.socket.remoteAddress,
379
+ req.get('user-agent'),
380
+ result?.tokens || null,
381
+ ]).catch((err) => {
382
+ console.error('Failed to log request to usage_logs:', err);
383
+ });
384
+ }
385
+ // Track usage (check for trackBurstUsage method to detect PostgresAuthStore)
386
+ if (req.auth?.keyInfo?.key && typeof pgStore.trackBurstUsage === 'function') {
387
+ // Track burst usage (always)
388
+ await pgStore.trackBurstUsage(req.auth.keyInfo.key);
389
+ // If soft-limited with extra usage available, charge to extra usage
390
+ if (isSoftLimited && hasExtraUsage) {
391
+ const extraResult = await pgStore.trackExtraUsage(req.auth.keyInfo.key, fetchType, url, elapsed, 200 // PeelResult doesn't include statusCode, assume success
392
+ );
393
+ if (extraResult.success) {
394
+ res.setHeader('X-Extra-Usage-Charged', `$${extraResult.cost.toFixed(4)}`);
395
+ res.setHeader('X-Extra-Usage-New-Balance', extraResult.newBalance.toFixed(2));
396
+ }
397
+ else {
398
+ // Extra usage failed - fall back to soft limit
399
+ res.setHeader('X-Degraded', 'Extra usage insufficient, degraded to soft limit');
400
+ }
401
+ }
402
+ else if (!isSoftLimited) {
403
+ // Normal weekly usage tracking
404
+ await pgStore.trackUsage(req.auth.keyInfo.key, fetchType);
405
+ }
406
+ // If soft-limited WITHOUT extra usage, don't track (already over quota)
407
+ }
408
+ // Check usage alert (fire-and-forget, never block the response)
409
+ if (req.auth?.keyInfo?.accountId && typeof pgStore.pool !== 'undefined') {
410
+ try {
411
+ const alertResult = await checkAndTriggerAlert(pgStore, req.auth.keyInfo.accountId);
412
+ if (alertResult.shouldSendAlert && alertResult.usagePercent !== undefined) {
413
+ await sendUsageAlertEmail({
414
+ toEmail: alertResult.alertEmail || alertResult.userEmail,
415
+ userName: alertResult.userName,
416
+ usagePercent: alertResult.usagePercent,
417
+ used: alertResult.used,
418
+ total: alertResult.total,
419
+ tier: alertResult.userTier,
420
+ });
421
+ // Mark alert as sent so we don't spam (rate-limited to once/week)
422
+ await pgStore.pool.query('UPDATE users SET alert_sent_at = NOW() WHERE id = $1', [req.auth.keyInfo.accountId]);
423
+ }
424
+ }
425
+ catch (alertErr) {
426
+ // Never let alert errors affect the main response
427
+ console.warn('[alert] Failed to check/send alert:', alertErr);
428
+ }
429
+ }
430
+ // Cache result (unless storeInCache is explicitly false or cache bypass requested)
431
+ if (storeInCache !== 'false' && !bypassCache) {
432
+ cache.set(cacheKey, {
433
+ result,
434
+ timestamp: Date.now(),
435
+ }, { ttl: cacheTtlMs });
436
+ }
437
+ // Apply ?detail=brief mode: truncate content and prepend TL;DR
438
+ if (detailMode === 'brief' && result.content) {
439
+ const words = result.content.split(/\s+/);
440
+ const truncatedWords = words.slice(0, 500);
441
+ const truncated = truncatedWords.join(' ');
442
+ // Extract TL;DR from first non-empty paragraph
443
+ const firstPara = result.content
444
+ .split(/\n{2,}/)
445
+ .map((p) => p.replace(/^#+\s*/, '').trim())
446
+ .find((p) => p.length > 40 && !p.startsWith('!') && !p.startsWith('['));
447
+ const tldr = firstPara
448
+ ? firstPara.replace(/\s+/g, ' ').slice(0, 300) + (firstPara.length > 300 ? '...' : '')
449
+ : truncated.slice(0, 200) + '...';
450
+ result.content = `**TL;DR:** ${tldr}\n\n---\n\n${truncated}${words.length > 500 ? '\n\n*[Content truncated — use ?detail=full for complete output]*' : ''}`;
451
+ const tokenEstimate = Math.round(truncatedWords.length * 0.75);
452
+ res.setHeader('X-Detail-Mode', 'brief');
453
+ res.setHeader('X-Token-Estimate', tokenEstimate.toString());
454
+ }
455
+ // --- question → answer field (GET) ---
456
+ // When ?question= is provided, run quickAnswer() on the fetched content
457
+ // and expose the result as an `answer` field in the response.
458
+ const getAnswerResult = (question && typeof question === 'string' && result.content)
459
+ ? quickAnswer({ question, content: result.content, url: result.url })
460
+ : undefined;
461
+ // --- summary field (GET) ---
462
+ // When ?summary=true, return a truncated 500-word summary in a `summary` field.
463
+ const getSummaryText = (summary === 'true' && result.content)
464
+ ? extractSummary(result.content)
465
+ : undefined;
466
+ // Add usage headers (kept for backward compat; also surfaced in envelope metadata)
467
+ res.setHeader('X-Cache', 'MISS');
468
+ res.setHeader('X-Credits-Used', '1');
469
+ res.setHeader('X-Processing-Time', elapsed.toString());
470
+ res.setHeader('X-Fetch-Type', fetchType);
471
+ // Build response — extend result with optional answer/summary fields
472
+ const getResponseBody = { ...result };
473
+ if (getAnswerResult !== undefined)
474
+ getResponseBody.answer = getAnswerResult.answer;
475
+ if (getSummaryText !== undefined)
476
+ getResponseBody.summary = getSummaryText;
477
+ if (wantsEnvelope(req)) {
478
+ successResponse(res, getResponseBody, {
479
+ requestId: req.requestId,
480
+ processingTimeMs: elapsed,
481
+ creditsUsed: 1,
482
+ cached: false,
483
+ fetchType,
484
+ });
485
+ }
486
+ else {
487
+ res.json(getResponseBody);
488
+ }
489
+ }
490
+ catch (error) {
491
+ const err = error;
492
+ // Log error to database (PostgreSQL only)
493
+ const pgStore = authStore;
494
+ if (req.auth?.keyInfo?.accountId && typeof pgStore.pool !== 'undefined') {
495
+ const url = req.query.url;
496
+ const render = req.query.render === 'true';
497
+ const fetchType = render ? 'stealth' : 'basic';
498
+ pgStore.pool.query(`INSERT INTO usage_logs
499
+ (user_id, endpoint, url, method, status_code, error, ip_address, user_agent, tokens_used)
500
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)`, [
501
+ req.auth.keyInfo.accountId,
502
+ 'fetch',
503
+ url,
504
+ fetchType,
505
+ 500,
506
+ err.message || 'Unknown error',
507
+ req.ip || req.socket.remoteAddress,
508
+ req.get('user-agent'),
509
+ null,
510
+ ]).catch((logErr) => {
511
+ console.error('Failed to log error to usage_logs:', logErr);
512
+ });
513
+ }
514
+ // SECURITY: Sanitize error messages to prevent information disclosure
515
+ if (err.code) {
516
+ // WebPeelError from core library - safe to expose with helpful context
517
+ const safeMessage = err.message.replace(/[<>"']/g, ''); // Remove HTML chars
518
+ const statusCode = err.code === 'TIMEOUT' ? 504
519
+ : err.code === 'BLOCKED' ? 403
520
+ : err.code === 'NETWORK' ? 502
521
+ : 500;
522
+ const hints = {
523
+ TIMEOUT: 'Try increasing timeout with ?wait=10000, or use render=true for JS-heavy sites.',
524
+ BLOCKED: 'This site blocks automated requests. Try adding render=true or use stealth mode (costs 5 credits).',
525
+ NETWORK: 'Could not reach the target URL. Verify the URL is correct and the site is online.',
526
+ };
527
+ res.status(statusCode).json({
528
+ success: false,
529
+ error: {
530
+ type: err.code,
531
+ message: safeMessage,
532
+ hint: hints[err.code] || undefined,
533
+ docs: 'https://webpeel.dev/docs/api-reference#errors',
534
+ },
535
+ requestId: req.requestId,
536
+ });
537
+ }
538
+ else {
539
+ // Unexpected error - generic message only
540
+ console.error('Fetch error:', err); // Log full error server-side
541
+ res.status(500).json({
542
+ success: false,
543
+ error: {
544
+ type: 'internal_error',
545
+ message: 'An unexpected error occurred while fetching the URL. If this persists, check https://webpeel.dev/status',
546
+ docs: 'https://webpeel.dev/docs/api-reference#errors',
547
+ },
548
+ requestId: req.requestId,
549
+ });
550
+ }
551
+ }
552
+ });
553
+ // -----------------------------------------------------------------------
554
+ // POST /v1/fetch — same as GET but accepts JSON body with extract param
555
+ // POST /v2/scrape — alias with identical behaviour
556
+ // -----------------------------------------------------------------------
557
+ async function handlePostFetch(req, res) {
558
+ try {
559
+ // Require authentication — API key or JWT session
560
+ const postUserId = req.auth?.keyInfo?.accountId || req.user?.userId;
561
+ if (!postUserId) {
562
+ res.status(401).json({
563
+ success: false,
564
+ error: {
565
+ type: 'unauthorized',
566
+ message: 'API key required. Get one free at https://app.webpeel.dev/keys',
567
+ hint: 'Get a free API key at https://app.webpeel.dev/keys',
568
+ docs: 'https://webpeel.dev/docs/errors#unauthorized',
569
+ },
570
+ requestId: req.requestId,
571
+ });
572
+ return;
573
+ }
574
+ const { url, render, wait, format, includeTags, excludeTags, images, location, languages, onlyMainContent, actions: rawActions, storeInCache: storeFlag,
575
+ // Cache control
576
+ noCache: noCacheBody, cacheTtl: cacheTtlBody,
577
+ // Inline extraction (BYOK)
578
+ extract, llmProvider, llmApiKey, llmModel,
579
+ // Firecrawl-compatible formats array
580
+ formats, stream,
581
+ // Extended peel options
582
+ budget, question, summary: summaryParam, readable, stealth, screenshot, maxTokens, selector, exclude, fullPage, raw, lite, timeout, proxies, chunk, device, viewportWidth, viewportHeight, waitUntil, waitSelector, blockResources, cloaked, schema: bodySchema, } = req.body;
583
+ // --- Validate URL -------------------------------------------------------
584
+ if (!url || typeof url !== 'string') {
585
+ res.status(400).json({
586
+ success: false,
587
+ error: {
588
+ type: 'invalid_request',
589
+ message: 'Missing or invalid "url" in request body.',
590
+ hint: 'Send JSON: { "url": "https://example.com" }',
591
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
592
+ },
593
+ requestId: req.requestId,
594
+ });
595
+ return;
596
+ }
597
+ if (url.length > 2048) {
598
+ res.status(400).json({
599
+ success: false,
600
+ error: {
601
+ type: 'invalid_url',
602
+ message: 'URL too long (max 2048 characters)',
603
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
604
+ },
605
+ requestId: req.requestId,
606
+ });
607
+ return;
608
+ }
609
+ try {
610
+ new URL(url);
611
+ }
612
+ catch {
613
+ res.status(400).json({
614
+ success: false,
615
+ error: {
616
+ type: 'invalid_url',
617
+ message: 'Invalid URL format',
618
+ hint: 'Ensure the URL includes a scheme (https://) and a valid hostname',
619
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
620
+ },
621
+ requestId: req.requestId,
622
+ });
623
+ return;
624
+ }
625
+ try {
626
+ validateUrlForSSRF(url);
627
+ }
628
+ catch (error) {
629
+ if (error instanceof SSRFError) {
630
+ res.status(400).json({
631
+ success: false,
632
+ error: {
633
+ type: 'forbidden_url',
634
+ message: 'Cannot fetch localhost, private networks, or non-HTTP URLs',
635
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
636
+ },
637
+ requestId: req.requestId,
638
+ });
639
+ return;
640
+ }
641
+ throw error;
642
+ }
643
+ // --- Parse and normalize actions -----------------------------------------
644
+ let postActions;
645
+ if (rawActions !== undefined) {
646
+ try {
647
+ postActions = normalizeActions(rawActions);
648
+ }
649
+ catch (e) {
650
+ res.status(400).json({
651
+ success: false,
652
+ error: {
653
+ type: 'invalid_request',
654
+ message: `Invalid "actions" parameter: ${e.message}`,
655
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
656
+ },
657
+ requestId: req.requestId,
658
+ });
659
+ return;
660
+ }
661
+ }
662
+ // --- Cache bypass and lookup -------------------------------------------
663
+ const postBypassCache = noCacheBody === true || req.headers['cache-control'] === 'no-cache';
664
+ const postCacheTtlMs = typeof cacheTtlBody === 'number' ? cacheTtlBody * 1000 : 5 * 60 * 1000;
665
+ const postActionsKey = postActions ? JSON.stringify(postActions) : '';
666
+ const postCacheKey = `fetch:${url}:${render}:${wait}:${format}:${JSON.stringify(includeTags)}:${JSON.stringify(excludeTags)}:${images}:${location}:${JSON.stringify(languages)}:${onlyMainContent}:${stream}:${postActionsKey}:${budget}:${question}:${summaryParam}:${readable}:${stealth}:${screenshot}:${maxTokens}:${selector}:${JSON.stringify(exclude)}:${fullPage}:${raw}`;
667
+ if (!postBypassCache && !extract) {
668
+ const cached = cache.get(postCacheKey);
669
+ if (cached) {
670
+ const cacheAge = Date.now() - cached.timestamp;
671
+ if (cacheAge < postCacheTtlMs) {
672
+ res.setHeader('X-Cache', 'HIT');
673
+ res.setHeader('X-Cache-Age', Math.floor(cacheAge / 1000).toString());
674
+ if (wantsEnvelope(req)) {
675
+ successResponse(res, cached.result, {
676
+ requestId: req.requestId,
677
+ cached: true,
678
+ });
679
+ }
680
+ else {
681
+ res.json(cached.result);
682
+ }
683
+ return;
684
+ }
685
+ }
686
+ }
687
+ // --- Resolve inline extract from body or Firecrawl-compatible formats ---
688
+ let resolvedExtract = extract;
689
+ if (!resolvedExtract && Array.isArray(formats)) {
690
+ const jsonFormat = formats.find((f) => (typeof f === 'object' && f !== null && f.type === 'json') ||
691
+ (typeof f === 'string' && f === 'json'));
692
+ if (jsonFormat && typeof jsonFormat === 'object' && (jsonFormat.schema || jsonFormat.prompt)) {
693
+ resolvedExtract = {
694
+ schema: jsonFormat.schema,
695
+ prompt: jsonFormat.prompt,
696
+ };
697
+ }
698
+ }
699
+ // Resolve schema template names (e.g. "product", "article") to field objects
700
+ if (resolvedExtract && typeof resolvedExtract.schema === 'string') {
701
+ const tmpl = getSchemaTemplate(resolvedExtract.schema);
702
+ if (tmpl) {
703
+ resolvedExtract = { ...resolvedExtract, schema: tmpl.fields };
704
+ }
705
+ else {
706
+ // Try parsing as JSON string
707
+ try {
708
+ resolvedExtract = { ...resolvedExtract, schema: JSON.parse(resolvedExtract.schema) };
709
+ }
710
+ catch { /* leave as-is */ }
711
+ }
712
+ }
713
+ // Validate LLM params if extraction is requested
714
+ if (resolvedExtract && (resolvedExtract.schema || resolvedExtract.prompt)) {
715
+ if (!llmProvider || !VALID_LLM_PROVIDERS.includes(llmProvider)) {
716
+ res.status(400).json({
717
+ success: false,
718
+ error: {
719
+ type: 'invalid_request',
720
+ message: `"llmProvider" is required for inline extraction and must be one of: ${VALID_LLM_PROVIDERS.join(', ')}`,
721
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
722
+ },
723
+ requestId: req.requestId,
724
+ });
725
+ return;
726
+ }
727
+ if (!llmApiKey || typeof llmApiKey !== 'string' || llmApiKey.trim().length === 0) {
728
+ res.status(400).json({
729
+ success: false,
730
+ error: {
731
+ type: 'invalid_request',
732
+ message: 'Missing or invalid "llmApiKey" (BYOK required for inline extraction)',
733
+ hint: 'Pass your LLM provider API key in the "llmApiKey" field',
734
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
735
+ },
736
+ requestId: req.requestId,
737
+ });
738
+ return;
739
+ }
740
+ }
741
+ // --- Build PeelOptions ---------------------------------------------------
742
+ const isSoftLimited = req.auth?.softLimited === true;
743
+ const hasExtraUsage = req.auth?.extraUsageAvailable === true;
744
+ const includeTagsArray = Array.isArray(includeTags) ? includeTags : undefined;
745
+ const excludeTagsArray = Array.isArray(excludeTags) ? excludeTags : undefined;
746
+ const languagesArray = Array.isArray(languages) ? languages : undefined;
747
+ const finalIncludeTags = onlyMainContent === true
748
+ ? ['main', 'article', '.content', '#content']
749
+ : includeTagsArray;
750
+ const resolvedFormat = format || 'markdown';
751
+ if (!['markdown', 'text', 'html', 'clean'].includes(resolvedFormat)) {
752
+ res.status(400).json({
753
+ success: false,
754
+ error: {
755
+ type: 'invalid_request',
756
+ message: 'Invalid "format" parameter: must be "markdown", "text", "html", or "clean"',
757
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
758
+ },
759
+ requestId: req.requestId,
760
+ });
761
+ return;
762
+ }
763
+ const resolvedWait = typeof wait === 'number' ? wait : undefined;
764
+ if (resolvedWait !== undefined && (isNaN(resolvedWait) || resolvedWait < 0 || resolvedWait > 60000)) {
765
+ res.status(400).json({
766
+ success: false,
767
+ error: {
768
+ type: 'invalid_request',
769
+ message: 'Invalid "wait" parameter: must be between 0 and 60000ms',
770
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
771
+ },
772
+ requestId: req.requestId,
773
+ });
774
+ return;
775
+ }
776
+ // When actions are present, force browser mode
777
+ const postHasActions = postActions && postActions.length > 0;
778
+ const postShouldRender = postHasActions || render === true;
779
+ // Normalize exclude: accept string (comma-separated) or string array
780
+ const excludeArray = exclude
781
+ ? (Array.isArray(exclude) ? exclude : exclude.split(',').map(s => s.trim()).filter(Boolean))
782
+ : undefined;
783
+ const options = {
784
+ render: (isSoftLimited && !hasExtraUsage && !postHasActions) ? false : postShouldRender,
785
+ wait: (isSoftLimited && !hasExtraUsage) ? 0 : resolvedWait,
786
+ format: resolvedFormat,
787
+ stream: stream === true,
788
+ includeTags: finalIncludeTags,
789
+ excludeTags: excludeTagsArray,
790
+ images: images === true,
791
+ actions: postActions,
792
+ location: location || languagesArray ? {
793
+ country: location,
794
+ languages: languagesArray,
795
+ } : undefined,
796
+ budget: typeof budget === 'number' ? budget : undefined,
797
+ question: question,
798
+ readable: readable === true,
799
+ stealth: (isSoftLimited && !hasExtraUsage) ? false : stealth === true,
800
+ screenshot: (isSoftLimited && !hasExtraUsage) ? false : screenshot === true,
801
+ maxTokens: typeof maxTokens === 'number' ? maxTokens : undefined,
802
+ selector: selector,
803
+ exclude: excludeArray,
804
+ fullPage: fullPage === true,
805
+ raw: raw === true,
806
+ lite: lite === true,
807
+ timeout: typeof timeout === 'number' ? timeout : undefined,
808
+ proxies: Array.isArray(proxies) ? proxies : undefined,
809
+ device: device,
810
+ viewportWidth: typeof viewportWidth === 'number' ? viewportWidth : undefined,
811
+ viewportHeight: typeof viewportHeight === 'number' ? viewportHeight : undefined,
812
+ waitUntil: waitUntil,
813
+ waitSelector: waitSelector,
814
+ blockResources: Array.isArray(blockResources) ? blockResources : undefined,
815
+ };
816
+ if (cloaked)
817
+ options.cloaked = cloaked;
818
+ if (chunk)
819
+ options.chunk = chunk === true ? true : chunk;
820
+ // Auto-budget: default to 4000 tokens for API requests when no budget specified
821
+ // Opt-out: budget=0 explicitly disables. Lite mode disables auto-budget.
822
+ if (options.budget === undefined && !options.lite) {
823
+ options.budget = 4000;
824
+ res.setHeader('X-Auto-Budget', '4000');
825
+ }
826
+ if (isSoftLimited && !hasExtraUsage && render === true && !postHasActions) {
827
+ res.setHeader('X-Degraded', 'render=true downgraded to HTTP-only (quota exceeded)');
828
+ }
829
+ if (isSoftLimited && !hasExtraUsage && stealth === true) {
830
+ res.setHeader('X-Degraded', 'stealth=true downgraded (quota exceeded)');
831
+ }
832
+ if (isSoftLimited && !hasExtraUsage && screenshot === true) {
833
+ res.setHeader('X-Degraded', 'screenshot=true downgraded (quota exceeded)');
834
+ }
835
+ const shouldStream = options.stream === true;
836
+ if (shouldStream) {
837
+ res.setHeader('X-Stream', 'true');
838
+ if (typeof res.flushHeaders === 'function') {
839
+ res.flushHeaders();
840
+ }
841
+ }
842
+ // --- Fetch content -------------------------------------------------------
843
+ const startTime = Date.now();
844
+ const result = await peel(url, options);
845
+ const elapsed = Date.now() - startTime;
846
+ // --- BM25 Schema Template Extraction (POST, no LLM needed) ---
847
+ if (bodySchema && typeof bodySchema === 'string' && result.content) {
848
+ const template = getSchemaTemplate(bodySchema);
849
+ if (template) {
850
+ const { quickAnswer } = await import('../../core/quick-answer.js');
851
+ const { smartExtractSchemaFields } = await import('../../core/schema-postprocess.js');
852
+ const extracted = smartExtractSchemaFields(result.content, template.fields, quickAnswer, {
853
+ pageTitle: result.title,
854
+ pageUrl: result.url,
855
+ metadata: result.metadata,
856
+ });
857
+ result.extracted = extracted;
858
+ }
859
+ }
860
+ // --- Inline extraction (post-fetch) -------------------------------------
861
+ let jsonData;
862
+ let extractTokensUsed;
863
+ if (resolvedExtract && (resolvedExtract.schema || resolvedExtract.prompt) && llmApiKey) {
864
+ const extractResult = await extractInlineJson(result.content, {
865
+ schema: resolvedExtract.schema,
866
+ prompt: resolvedExtract.prompt,
867
+ llmProvider: llmProvider,
868
+ llmApiKey: llmApiKey.trim(),
869
+ llmModel,
870
+ });
871
+ jsonData = extractResult.data;
872
+ extractTokensUsed = extractResult.tokensUsed;
873
+ }
874
+ // --- Usage tracking (same as GET) ----------------------------------------
875
+ const fetchType = result.method === 'stealth' ? 'stealth' :
876
+ result.method === 'browser' ? 'stealth' : 'basic';
877
+ const pgStore = authStore;
878
+ if (req.auth?.keyInfo?.accountId && typeof pgStore.pool !== 'undefined') {
879
+ pgStore.pool.query(`INSERT INTO usage_logs
880
+ (user_id, endpoint, url, method, processing_time_ms, status_code, ip_address, user_agent, tokens_used)
881
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)`, [
882
+ req.auth.keyInfo.accountId,
883
+ 'fetch',
884
+ url,
885
+ fetchType,
886
+ elapsed,
887
+ 200,
888
+ req.ip || req.socket.remoteAddress,
889
+ req.get('user-agent'),
890
+ result?.tokens || null,
891
+ ]).catch((err) => {
892
+ console.error('Failed to log request to usage_logs:', err);
893
+ });
894
+ }
895
+ if (req.auth?.keyInfo?.key && typeof pgStore.trackBurstUsage === 'function') {
896
+ await pgStore.trackBurstUsage(req.auth.keyInfo.key);
897
+ if (isSoftLimited && hasExtraUsage) {
898
+ const extraResult = await pgStore.trackExtraUsage(req.auth.keyInfo.key, fetchType, url, elapsed, 200);
899
+ if (extraResult.success) {
900
+ res.setHeader('X-Extra-Usage-Charged', `$${extraResult.cost.toFixed(4)}`);
901
+ res.setHeader('X-Extra-Usage-New-Balance', extraResult.newBalance.toFixed(2));
902
+ }
903
+ else {
904
+ res.setHeader('X-Degraded', 'Extra usage insufficient, degraded to soft limit');
905
+ }
906
+ }
907
+ else if (!isSoftLimited) {
908
+ await pgStore.trackUsage(req.auth.keyInfo.key, fetchType);
909
+ }
910
+ }
911
+ // Cache result (skip extraction results — they depend on user's LLM keys)
912
+ if (storeFlag !== false && !postBypassCache && !resolvedExtract) {
913
+ cache.set(postCacheKey, { result, timestamp: Date.now() }, { ttl: postCacheTtlMs });
914
+ }
915
+ // --- question → answer field (POST) ---
916
+ // When question is provided, run quickAnswer() on the fetched content
917
+ // and expose the result as an `answer` field in the response.
918
+ const postAnswerResult = (question && typeof question === 'string' && result.content)
919
+ ? quickAnswer({ question, content: result.content, url: result.url })
920
+ : undefined;
921
+ // --- summary field (POST) ---
922
+ // When summary: true, return a truncated 500-word summary in a `summary` field.
923
+ const postSummaryText = (summaryParam === true && result.content)
924
+ ? extractSummary(result.content)
925
+ : undefined;
926
+ // --- Build response ------------------------------------------------------
927
+ // Headers kept for backward compat; also surfaced in envelope metadata.
928
+ res.setHeader('X-Cache', 'MISS');
929
+ res.setHeader('X-Credits-Used', '1');
930
+ res.setHeader('X-Processing-Time', elapsed.toString());
931
+ res.setHeader('X-Fetch-Type', fetchType);
932
+ const responseBody = { ...result };
933
+ if (jsonData !== undefined) {
934
+ responseBody.json = jsonData;
935
+ }
936
+ if (extractTokensUsed) {
937
+ responseBody.extractTokensUsed = extractTokensUsed;
938
+ }
939
+ if (postAnswerResult !== undefined) {
940
+ responseBody.answer = postAnswerResult.answer;
941
+ }
942
+ if (postSummaryText !== undefined) {
943
+ responseBody.summary = postSummaryText;
944
+ }
945
+ if (wantsEnvelope(req)) {
946
+ successResponse(res, responseBody, {
947
+ requestId: req.requestId,
948
+ processingTimeMs: elapsed,
949
+ creditsUsed: 1,
950
+ cached: false,
951
+ fetchType,
952
+ });
953
+ }
954
+ else {
955
+ res.json(responseBody);
956
+ }
957
+ }
958
+ catch (error) {
959
+ const err = error;
960
+ console.error('POST fetch/scrape error:', err);
961
+ if (err.code) {
962
+ const safeMessage = err.message.replace(/[<>"']/g, '');
963
+ const statusCode = err.code === 'TIMEOUT' ? 504
964
+ : err.code === 'BLOCKED' ? 403
965
+ : err.code === 'NETWORK' ? 502
966
+ : 500;
967
+ const hints = {
968
+ TIMEOUT: 'Try increasing timeout, or set render:true for JS-heavy sites.',
969
+ BLOCKED: 'Site blocks automated requests. Try render:true or stealth mode.',
970
+ NETWORK: 'Could not reach the target URL. Verify it is correct and online.',
971
+ };
972
+ res.status(statusCode).json({
973
+ success: false,
974
+ error: {
975
+ type: err.code,
976
+ message: safeMessage,
977
+ hint: hints[err.code] || undefined,
978
+ docs: 'https://webpeel.dev/docs/api-reference#errors',
979
+ },
980
+ requestId: req.requestId,
981
+ });
982
+ }
983
+ else {
984
+ res.status(500).json({
985
+ success: false,
986
+ error: {
987
+ type: 'internal_error',
988
+ message: 'An unexpected error occurred. If this persists, check https://webpeel.dev/status',
989
+ docs: 'https://webpeel.dev/docs/api-reference#errors',
990
+ },
991
+ requestId: req.requestId,
992
+ });
993
+ }
994
+ }
995
+ }
996
+ router.post('/v1/fetch', handlePostFetch);
997
+ router.post('/v2/scrape', handlePostFetch);
998
+ return router;
999
+ }