webpeel 0.20.2 → 0.20.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/dist/server/app.d.ts +14 -0
  2. package/dist/server/app.js +384 -0
  3. package/dist/server/auth-store.d.ts +27 -0
  4. package/dist/server/auth-store.js +88 -0
  5. package/dist/server/email-service.d.ts +21 -0
  6. package/dist/server/email-service.js +79 -0
  7. package/dist/server/job-queue.d.ts +100 -0
  8. package/dist/server/job-queue.js +145 -0
  9. package/dist/server/logger.d.ts +10 -0
  10. package/dist/server/logger.js +37 -0
  11. package/dist/server/middleware/auth.d.ts +28 -0
  12. package/dist/server/middleware/auth.js +221 -0
  13. package/dist/server/middleware/rate-limit.d.ts +24 -0
  14. package/dist/server/middleware/rate-limit.js +167 -0
  15. package/dist/server/middleware/url-validator.d.ts +15 -0
  16. package/dist/server/middleware/url-validator.js +186 -0
  17. package/dist/server/openapi.yaml +6418 -0
  18. package/dist/server/pg-auth-store.d.ts +132 -0
  19. package/dist/server/pg-auth-store.js +472 -0
  20. package/dist/server/pg-job-queue.d.ts +59 -0
  21. package/dist/server/pg-job-queue.js +375 -0
  22. package/dist/server/premium/domain-intel.d.ts +16 -0
  23. package/dist/server/premium/domain-intel.js +133 -0
  24. package/dist/server/premium/index.d.ts +17 -0
  25. package/dist/server/premium/index.js +35 -0
  26. package/dist/server/premium/swr-cache.d.ts +14 -0
  27. package/dist/server/premium/swr-cache.js +34 -0
  28. package/dist/server/routes/activity.d.ts +6 -0
  29. package/dist/server/routes/activity.js +74 -0
  30. package/dist/server/routes/answer.d.ts +5 -0
  31. package/dist/server/routes/answer.js +125 -0
  32. package/dist/server/routes/ask.d.ts +28 -0
  33. package/dist/server/routes/ask.js +229 -0
  34. package/dist/server/routes/batch.d.ts +6 -0
  35. package/dist/server/routes/batch.js +493 -0
  36. package/dist/server/routes/cli-usage.d.ts +6 -0
  37. package/dist/server/routes/cli-usage.js +127 -0
  38. package/dist/server/routes/compat.d.ts +23 -0
  39. package/dist/server/routes/compat.js +652 -0
  40. package/dist/server/routes/deep-fetch.d.ts +8 -0
  41. package/dist/server/routes/deep-fetch.js +57 -0
  42. package/dist/server/routes/demo.d.ts +24 -0
  43. package/dist/server/routes/demo.js +517 -0
  44. package/dist/server/routes/do.d.ts +8 -0
  45. package/dist/server/routes/do.js +72 -0
  46. package/dist/server/routes/extract.d.ts +8 -0
  47. package/dist/server/routes/extract.js +235 -0
  48. package/dist/server/routes/fetch.d.ts +7 -0
  49. package/dist/server/routes/fetch.js +999 -0
  50. package/dist/server/routes/health.d.ts +7 -0
  51. package/dist/server/routes/health.js +19 -0
  52. package/dist/server/routes/jobs.d.ts +7 -0
  53. package/dist/server/routes/jobs.js +573 -0
  54. package/dist/server/routes/mcp.d.ts +14 -0
  55. package/dist/server/routes/mcp.js +141 -0
  56. package/dist/server/routes/oauth.d.ts +9 -0
  57. package/dist/server/routes/oauth.js +396 -0
  58. package/dist/server/routes/playground.d.ts +17 -0
  59. package/dist/server/routes/playground.js +283 -0
  60. package/dist/server/routes/screenshot.d.ts +22 -0
  61. package/dist/server/routes/screenshot.js +816 -0
  62. package/dist/server/routes/search.d.ts +6 -0
  63. package/dist/server/routes/search.js +303 -0
  64. package/dist/server/routes/session.d.ts +15 -0
  65. package/dist/server/routes/session.js +397 -0
  66. package/dist/server/routes/stats.d.ts +6 -0
  67. package/dist/server/routes/stats.js +71 -0
  68. package/dist/server/routes/stripe.d.ts +15 -0
  69. package/dist/server/routes/stripe.js +294 -0
  70. package/dist/server/routes/users.d.ts +8 -0
  71. package/dist/server/routes/users.js +1671 -0
  72. package/dist/server/routes/watch.d.ts +15 -0
  73. package/dist/server/routes/watch.js +309 -0
  74. package/dist/server/routes/webhooks.d.ts +26 -0
  75. package/dist/server/routes/webhooks.js +170 -0
  76. package/dist/server/routes/youtube.d.ts +6 -0
  77. package/dist/server/routes/youtube.js +130 -0
  78. package/dist/server/sentry.d.ts +13 -0
  79. package/dist/server/sentry.js +38 -0
  80. package/dist/server/types.d.ts +15 -0
  81. package/dist/server/types.js +7 -0
  82. package/dist/server/utils/response.d.ts +44 -0
  83. package/dist/server/utils/response.js +69 -0
  84. package/dist/server/utils/sse.d.ts +22 -0
  85. package/dist/server/utils/sse.js +38 -0
  86. package/package.json +2 -1
@@ -0,0 +1,652 @@
1
+ /**
2
+ * Firecrawl API Compatibility Layer
3
+ *
4
+ * Drop-in replacement for Firecrawl's API - users can switch by ONLY changing the base URL.
5
+ * This is our killer acquisition feature.
6
+ *
7
+ * NOTE: Error responses in this file intentionally use Firecrawl's format:
8
+ * { success: false, error: "Human-readable message" }
9
+ * This is required for Firecrawl drop-in compatibility and differs from the
10
+ * standard WebPeel API error format: { error: "error_code", message: "description" }.
11
+ * Do NOT change this format — it would break Firecrawl-compatible integrations.
12
+ *
13
+ * Implements Firecrawl endpoints:
14
+ * - POST /v1/scrape
15
+ * - POST /v2/scrape (v2 with formats: ["screenshot"] support)
16
+ * - POST /v1/crawl
17
+ * - GET /v1/crawl/:id
18
+ * - POST /v1/search
19
+ * - POST /v1/map
20
+ */
21
+ import { Router } from 'express';
22
+ import { peel } from '../../index.js';
23
+ import { crawl } from '../../core/crawler.js';
24
+ import { mapDomain } from '../../core/map.js';
25
+ import { takeScreenshot } from '../../core/screenshot.js';
26
+ import { normalizeActions } from '../../core/actions.js';
27
+ import { extractInlineJson } from '../../core/extract-inline.js';
28
+ import { validateUrlForSSRF, SSRFError } from '../middleware/url-validator.js';
29
+ const VALID_LLM_PROVIDERS = ['openai', 'anthropic', 'google'];
30
+ /**
31
+ * Map Firecrawl's action format to our PageAction format.
32
+ * Delegates to the shared normalizeActions helper so behaviour stays
33
+ * consistent across all API surfaces.
34
+ */
35
+ function mapFirecrawlActions(actions) {
36
+ if (!actions || !Array.isArray(actions))
37
+ return undefined;
38
+ return normalizeActions(actions);
39
+ }
40
+ export function createCompatRouter(jobQueue) {
41
+ const router = Router();
42
+ /**
43
+ * POST /v1/scrape - Firecrawl's main scrape endpoint
44
+ *
45
+ * Maps to our peel() function
46
+ */
47
+ router.post('/v1/scrape', async (req, res) => {
48
+ try {
49
+ const { url, formats = ['markdown'], onlyMainContent = true, // Firecrawl defaults to true
50
+ includeTags, excludeTags, waitFor, timeout, actions, headers, location,
51
+ // Inline extraction (BYOK)
52
+ extract: extractParam, llmProvider, llmApiKey, llmModel, stream, } = req.body;
53
+ // Validate URL
54
+ if (!url || typeof url !== 'string') {
55
+ res.status(400).json({
56
+ success: false,
57
+ error: 'Missing or invalid "url" parameter',
58
+ });
59
+ return;
60
+ }
61
+ // SECURITY: Validate URL to prevent SSRF attacks
62
+ try {
63
+ validateUrlForSSRF(url);
64
+ }
65
+ catch (error) {
66
+ if (error instanceof SSRFError) {
67
+ res.status(400).json({
68
+ success: false,
69
+ error: 'blocked_url',
70
+ message: 'Cannot fetch localhost, private networks, or non-HTTP URLs',
71
+ });
72
+ return;
73
+ }
74
+ throw error;
75
+ }
76
+ // Determine if we need to render based on Firecrawl params
77
+ const needsRender = waitFor !== undefined || actions !== undefined;
78
+ // Map Firecrawl parameters to our PeelOptions
79
+ // onlyMainContent=true (default) → raw=false (use smart extraction)
80
+ // onlyMainContent=false → raw=true (return everything)
81
+ const options = {
82
+ render: needsRender,
83
+ wait: waitFor,
84
+ timeout: timeout || 30000,
85
+ stream: stream === true,
86
+ includeTags: Array.isArray(includeTags) ? includeTags : undefined,
87
+ excludeTags: Array.isArray(excludeTags) ? excludeTags : undefined,
88
+ raw: onlyMainContent === false,
89
+ actions: mapFirecrawlActions(actions),
90
+ headers,
91
+ screenshot: formats.includes('screenshot'),
92
+ images: formats.includes('images'),
93
+ format: 'markdown', // Always use markdown as base
94
+ };
95
+ // If location is provided, map it
96
+ if (location) {
97
+ options.location = {
98
+ country: location.country,
99
+ languages: location.languages,
100
+ };
101
+ }
102
+ if (options.stream) {
103
+ res.setHeader('X-Stream', 'true');
104
+ if (typeof res.flushHeaders === 'function') {
105
+ res.flushHeaders();
106
+ }
107
+ }
108
+ // Execute peel
109
+ const result = await peel(url, options);
110
+ // Build Firecrawl-compatible response
111
+ const data = {
112
+ markdown: result.content,
113
+ metadata: {
114
+ ...result.metadata,
115
+ title: result.title,
116
+ description: result.metadata?.description || '',
117
+ language: result.metadata?.language || 'en',
118
+ sourceURL: result.url,
119
+ statusCode: 200,
120
+ },
121
+ };
122
+ // Add optional formats
123
+ if (formats.includes('html')) {
124
+ // Re-fetch with HTML format if requested
125
+ const htmlResult = await peel(url, { ...options, format: 'html' });
126
+ data.html = htmlResult.content;
127
+ }
128
+ if (formats.includes('rawHtml')) {
129
+ const rawResult = await peel(url, { ...options, format: 'html', raw: true });
130
+ data.rawHtml = rawResult.content;
131
+ }
132
+ if (formats.includes('links')) {
133
+ data.links = result.links;
134
+ }
135
+ if (formats.includes('screenshot') && result.screenshot) {
136
+ data.screenshot = `data:image/png;base64,${result.screenshot}`;
137
+ }
138
+ if (formats.includes('images') && result.images) {
139
+ data.images = result.images;
140
+ }
141
+ // --- Inline JSON extraction via LLM (BYOK) ---
142
+ // Resolve extract from: (1) top-level extract param, (2) formats array object
143
+ let resolvedExtract;
144
+ if (extractParam && typeof extractParam === 'object' && (extractParam.schema || extractParam.prompt)) {
145
+ resolvedExtract = extractParam;
146
+ }
147
+ if (!resolvedExtract) {
148
+ const jsonFormatObj = formats.find((f) => typeof f === 'object' && f !== null && f.type === 'json' && (f.schema || f.prompt));
149
+ if (jsonFormatObj) {
150
+ resolvedExtract = { schema: jsonFormatObj.schema, prompt: jsonFormatObj.prompt };
151
+ }
152
+ }
153
+ if (resolvedExtract && llmApiKey && llmProvider && VALID_LLM_PROVIDERS.includes(llmProvider)) {
154
+ const extractResult = await extractInlineJson(result.content, {
155
+ schema: resolvedExtract.schema,
156
+ prompt: resolvedExtract.prompt,
157
+ llmProvider: llmProvider,
158
+ llmApiKey: llmApiKey.trim(),
159
+ llmModel,
160
+ });
161
+ data.json = extractResult.data;
162
+ data.extractTokensUsed = extractResult.tokensUsed;
163
+ }
164
+ else if (formats.includes('json')) {
165
+ // Fallback: return structured metadata as JSON (no LLM)
166
+ data.json = result.extracted || result.metadata;
167
+ }
168
+ if (formats.includes('branding')) {
169
+ data.branding = result.branding;
170
+ }
171
+ if (formats.includes('summary')) {
172
+ data.summary = result.summary;
173
+ }
174
+ res.json({
175
+ success: true,
176
+ data,
177
+ });
178
+ }
179
+ catch (error) {
180
+ console.error('Firecrawl /v1/scrape error:', error);
181
+ res.status(500).json({
182
+ success: false,
183
+ error: 'An unexpected error occurred. Please try again.',
184
+ });
185
+ }
186
+ });
187
+ /**
188
+ * POST /v1/crawl - Firecrawl's crawl endpoint (async)
189
+ *
190
+ * Maps to our crawl() function with job queue
191
+ */
192
+ router.post('/v1/crawl', async (req, res) => {
193
+ try {
194
+ const { url, limit = 100, maxDepth = 3, includePaths = [], excludePaths = [], scrapeOptions = {}, webhook, } = req.body;
195
+ // Validate URL
196
+ if (!url || typeof url !== 'string') {
197
+ res.status(400).json({
198
+ success: false,
199
+ error: 'Missing or invalid "url" parameter',
200
+ });
201
+ return;
202
+ }
203
+ try {
204
+ new URL(url);
205
+ }
206
+ catch {
207
+ res.status(400).json({
208
+ success: false,
209
+ error: 'Invalid URL format',
210
+ });
211
+ return;
212
+ }
213
+ // SECURITY: Validate URL to prevent SSRF attacks
214
+ try {
215
+ validateUrlForSSRF(url);
216
+ }
217
+ catch (error) {
218
+ if (error instanceof SSRFError) {
219
+ res.status(400).json({
220
+ success: false,
221
+ error: 'blocked_url',
222
+ message: 'Cannot fetch localhost, private networks, or non-HTTP URLs',
223
+ });
224
+ return;
225
+ }
226
+ throw error;
227
+ }
228
+ // Create job (with owner for authorization)
229
+ const ownerId = req.auth?.keyInfo?.accountId;
230
+ const job = await jobQueue.createJob('crawl', webhook, ownerId);
231
+ // Start crawl in background
232
+ setImmediate(async () => {
233
+ try {
234
+ jobQueue.updateJob(job.id, { status: 'processing' });
235
+ // Build crawl options
236
+ const crawlOptions = {
237
+ maxPages: limit,
238
+ maxDepth,
239
+ tier: req.auth?.tier,
240
+ onProgress: (progress) => {
241
+ const total = progress.crawled + progress.queued;
242
+ jobQueue.updateJob(job.id, {
243
+ total,
244
+ completed: progress.crawled,
245
+ creditsUsed: progress.crawled,
246
+ });
247
+ },
248
+ // Map scrapeOptions to PeelOptions
249
+ ...scrapeOptions,
250
+ };
251
+ // Add path filters if provided
252
+ if (includePaths.length > 0) {
253
+ crawlOptions.includePatterns = includePaths;
254
+ }
255
+ if (excludePaths.length > 0) {
256
+ crawlOptions.excludePatterns = excludePaths;
257
+ }
258
+ // Run crawl
259
+ const results = await crawl(url, crawlOptions);
260
+ // Map results to Firecrawl format
261
+ const firecrawlResults = results.map(r => ({
262
+ url: r.url,
263
+ markdown: r.markdown,
264
+ metadata: {
265
+ title: r.title,
266
+ description: '',
267
+ sourceURL: r.url,
268
+ statusCode: 200,
269
+ },
270
+ links: r.links,
271
+ }));
272
+ // Update job with results
273
+ jobQueue.updateJob(job.id, {
274
+ status: 'completed',
275
+ data: firecrawlResults,
276
+ total: results.length,
277
+ completed: results.length,
278
+ creditsUsed: results.length,
279
+ });
280
+ }
281
+ catch (error) {
282
+ jobQueue.updateJob(job.id, {
283
+ status: 'failed',
284
+ error: error.message || 'Unknown error',
285
+ });
286
+ }
287
+ });
288
+ // Return job ID immediately (Firecrawl format)
289
+ res.json({
290
+ success: true,
291
+ id: job.id,
292
+ });
293
+ }
294
+ catch (error) {
295
+ console.error('Firecrawl /v1/crawl error:', error);
296
+ res.status(500).json({
297
+ success: false,
298
+ error: 'An unexpected error occurred. Please try again.',
299
+ });
300
+ }
301
+ });
302
+ /**
303
+ * GET /v1/crawl/:id - Get crawl job status (Firecrawl format)
304
+ */
305
+ router.get('/v1/crawl/:id', async (req, res) => {
306
+ try {
307
+ const id = req.params.id;
308
+ const job = await jobQueue.getJob(id);
309
+ if (!job) {
310
+ res.status(404).json({
311
+ success: false,
312
+ error: 'Job not found',
313
+ });
314
+ return;
315
+ }
316
+ // SECURITY: Verify the requester owns this job
317
+ const requestOwnerId = req.auth?.keyInfo?.accountId;
318
+ if (job.ownerId && requestOwnerId && job.ownerId !== requestOwnerId) {
319
+ res.status(404).json({
320
+ success: false,
321
+ error: 'Job not found',
322
+ });
323
+ return;
324
+ }
325
+ // Map our job status to Firecrawl's status format
326
+ const firecrawlStatus = job.status === 'processing' ? 'scraping' : job.status;
327
+ res.json({
328
+ success: true,
329
+ status: firecrawlStatus,
330
+ completed: job.completed || 0,
331
+ total: job.total || 0,
332
+ creditsUsed: job.creditsUsed || 0,
333
+ expiresAt: job.expiresAt,
334
+ data: job.data || [],
335
+ });
336
+ }
337
+ catch (error) {
338
+ console.error('Firecrawl GET /v1/crawl/:id error:', error);
339
+ res.status(500).json({
340
+ success: false,
341
+ error: 'An unexpected error occurred. Please try again.',
342
+ });
343
+ }
344
+ });
345
+ /**
346
+ * POST /v1/search - Firecrawl's search endpoint
347
+ *
348
+ * Uses DuckDuckGo search with optional scraping
349
+ */
350
+ router.post('/v1/search', async (req, res) => {
351
+ try {
352
+ const { query, limit = 5, scrapeOptions = {}, } = req.body;
353
+ // Validate query
354
+ if (!query || typeof query !== 'string') {
355
+ res.status(400).json({
356
+ success: false,
357
+ error: 'Missing or invalid "query" parameter',
358
+ });
359
+ return;
360
+ }
361
+ // Use our search route logic (DuckDuckGo HTML scraping)
362
+ const { fetch: undiciFetch } = await import('undici');
363
+ const { load } = await import('cheerio');
364
+ const searchUrl = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`;
365
+ const response = await undiciFetch(searchUrl, {
366
+ headers: {
367
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
368
+ },
369
+ });
370
+ if (!response.ok) {
371
+ throw new Error(`Search failed: HTTP ${response.status}`);
372
+ }
373
+ const html = await response.text();
374
+ const $ = load(html);
375
+ const results = [];
376
+ $('.result').each((_i, elem) => {
377
+ if (results.length >= limit)
378
+ return;
379
+ const $result = $(elem);
380
+ let title = $result.find('.result__title').text().trim();
381
+ const rawUrl = $result.find('.result__a').attr('href') || '';
382
+ let snippet = $result.find('.result__snippet').text().trim();
383
+ if (!title || !rawUrl)
384
+ return;
385
+ // Extract actual URL from DuckDuckGo redirect
386
+ let url = rawUrl;
387
+ try {
388
+ const ddgUrl = new URL(rawUrl, 'https://duckduckgo.com');
389
+ const uddg = ddgUrl.searchParams.get('uddg');
390
+ if (uddg) {
391
+ url = decodeURIComponent(uddg);
392
+ }
393
+ }
394
+ catch (e) {
395
+ if (process.env.DEBUG)
396
+ console.debug('[webpeel]', 'ddg url parse failed:', e instanceof Error ? e.message : e);
397
+ }
398
+ // Validate URL
399
+ try {
400
+ const parsed = new URL(url);
401
+ if (!['http:', 'https:'].includes(parsed.protocol)) {
402
+ return;
403
+ }
404
+ url = parsed.href;
405
+ }
406
+ catch {
407
+ return;
408
+ }
409
+ results.push({ title, url, snippet });
410
+ });
411
+ // If scraping is requested, fetch each result
412
+ const firecrawlResults = await Promise.all(results.map(async (result) => {
413
+ try {
414
+ // Scrape the URL with provided options
415
+ const peelResult = await peel(result.url, {
416
+ format: 'markdown',
417
+ timeout: 10000,
418
+ ...scrapeOptions,
419
+ });
420
+ return {
421
+ url: result.url,
422
+ markdown: peelResult.content,
423
+ metadata: {
424
+ title: peelResult.title || result.title,
425
+ description: result.snippet,
426
+ sourceURL: result.url,
427
+ statusCode: 200,
428
+ ...peelResult.metadata,
429
+ },
430
+ };
431
+ }
432
+ catch (error) {
433
+ // Return basic result if scraping fails
434
+ return {
435
+ url: result.url,
436
+ markdown: '',
437
+ metadata: {
438
+ title: result.title,
439
+ description: result.snippet,
440
+ sourceURL: result.url,
441
+ error: error.message,
442
+ },
443
+ };
444
+ }
445
+ }));
446
+ res.json({
447
+ success: true,
448
+ data: firecrawlResults,
449
+ });
450
+ }
451
+ catch (error) {
452
+ console.error('Firecrawl /v1/search error:', error);
453
+ res.status(500).json({
454
+ success: false,
455
+ error: 'An unexpected error occurred. Please try again.',
456
+ });
457
+ }
458
+ });
459
+ /**
460
+ * POST /v1/map - Firecrawl's map endpoint
461
+ *
462
+ * Maps to our mapDomain() function
463
+ */
464
+ router.post('/v1/map', async (req, res) => {
465
+ try {
466
+ const { url, limit = 5000, search, } = req.body;
467
+ // Validate URL
468
+ if (!url || typeof url !== 'string') {
469
+ res.status(400).json({
470
+ success: false,
471
+ error: 'Missing or invalid "url" parameter',
472
+ });
473
+ return;
474
+ }
475
+ try {
476
+ new URL(url);
477
+ }
478
+ catch {
479
+ res.status(400).json({
480
+ success: false,
481
+ error: 'Invalid URL format',
482
+ });
483
+ return;
484
+ }
485
+ // SECURITY: Validate URL to prevent SSRF attacks
486
+ try {
487
+ validateUrlForSSRF(url);
488
+ }
489
+ catch (error) {
490
+ if (error instanceof SSRFError) {
491
+ res.status(400).json({
492
+ success: false,
493
+ error: 'blocked_url',
494
+ message: 'Cannot fetch localhost, private networks, or non-HTTP URLs',
495
+ });
496
+ return;
497
+ }
498
+ throw error;
499
+ }
500
+ // Run mapDomain
501
+ const result = await mapDomain(url, {
502
+ maxUrls: limit,
503
+ search,
504
+ });
505
+ res.json({
506
+ success: true,
507
+ links: result.urls,
508
+ });
509
+ }
510
+ catch (error) {
511
+ console.error('Firecrawl /v1/map error:', error);
512
+ res.status(500).json({
513
+ success: false,
514
+ error: 'An unexpected error occurred. Please try again.',
515
+ });
516
+ }
517
+ });
518
+ /**
519
+ * POST /v2/scrape - Firecrawl v2-compatible scrape with screenshot support
520
+ *
521
+ * Same as /v1/scrape but adds first-class screenshot support.
522
+ * When formats includes "screenshot" (and nothing else), returns
523
+ * a screenshot directly; otherwise falls through to peel() like v1.
524
+ */
525
+ router.post('/v2/scrape', async (req, res) => {
526
+ try {
527
+ const { url, formats = ['markdown'], onlyMainContent = true, includeTags, excludeTags, waitFor, timeout, actions, headers, location,
528
+ // Screenshot-specific v2 options
529
+ fullPage, width, height, screenshotFormat, quality, stream, } = req.body;
530
+ // Validate URL
531
+ if (!url || typeof url !== 'string') {
532
+ res.status(400).json({
533
+ success: false,
534
+ error: 'Missing or invalid "url" parameter',
535
+ });
536
+ return;
537
+ }
538
+ // SECURITY: Validate URL to prevent SSRF attacks
539
+ try {
540
+ validateUrlForSSRF(url);
541
+ }
542
+ catch (error) {
543
+ if (error instanceof SSRFError) {
544
+ res.status(400).json({
545
+ success: false,
546
+ error: 'blocked_url',
547
+ message: 'Cannot fetch localhost, private networks, or non-HTTP URLs',
548
+ });
549
+ return;
550
+ }
551
+ throw error;
552
+ }
553
+ const wantsScreenshot = formats.includes('screenshot') || formats.includes('screenshot@fullPage');
554
+ // If screenshot-only request, use the dedicated screenshot function
555
+ if (wantsScreenshot && formats.length === 1) {
556
+ const result = await takeScreenshot(url, {
557
+ fullPage: fullPage === true || formats[0] === 'screenshot@fullPage',
558
+ width: typeof width === 'number' ? width : undefined,
559
+ height: typeof height === 'number' ? height : undefined,
560
+ format: screenshotFormat || 'png',
561
+ quality: typeof quality === 'number' ? quality : undefined,
562
+ waitFor: typeof waitFor === 'number' ? waitFor : undefined,
563
+ timeout: typeof timeout === 'number' ? timeout : 30000,
564
+ actions: mapFirecrawlActions(actions),
565
+ headers,
566
+ });
567
+ res.json({
568
+ success: true,
569
+ data: {
570
+ screenshot: `data:${result.contentType};base64,${result.screenshot}`,
571
+ metadata: {
572
+ sourceURL: result.url,
573
+ statusCode: 200,
574
+ format: result.format,
575
+ },
576
+ },
577
+ });
578
+ return;
579
+ }
580
+ // Otherwise, fall through to peel() like v1/scrape
581
+ const needsRender = waitFor !== undefined || actions !== undefined || wantsScreenshot;
582
+ const options = {
583
+ render: needsRender,
584
+ wait: waitFor,
585
+ timeout: timeout || 30000,
586
+ stream: stream === true,
587
+ includeTags: Array.isArray(includeTags) ? includeTags : undefined,
588
+ excludeTags: Array.isArray(excludeTags) ? excludeTags : undefined,
589
+ raw: onlyMainContent === false,
590
+ actions: mapFirecrawlActions(actions),
591
+ headers,
592
+ screenshot: wantsScreenshot,
593
+ screenshotFullPage: fullPage === true,
594
+ images: formats.includes('images'),
595
+ format: 'markdown',
596
+ };
597
+ if (location) {
598
+ options.location = {
599
+ country: location.country,
600
+ languages: location.languages,
601
+ };
602
+ }
603
+ if (options.stream) {
604
+ res.setHeader('X-Stream', 'true');
605
+ if (typeof res.flushHeaders === 'function') {
606
+ res.flushHeaders();
607
+ }
608
+ }
609
+ const result = await peel(url, options);
610
+ const data = {
611
+ markdown: result.content,
612
+ metadata: {
613
+ title: result.title,
614
+ description: result.metadata.description || '',
615
+ language: 'en',
616
+ sourceURL: result.url,
617
+ statusCode: 200,
618
+ ...result.metadata,
619
+ },
620
+ };
621
+ if (formats.includes('html')) {
622
+ const htmlResult = await peel(url, { ...options, format: 'html' });
623
+ data.html = htmlResult.content;
624
+ }
625
+ if (formats.includes('rawHtml')) {
626
+ const rawResult = await peel(url, { ...options, format: 'html', raw: true });
627
+ data.rawHtml = rawResult.content;
628
+ }
629
+ if (formats.includes('links')) {
630
+ data.links = result.links;
631
+ }
632
+ if (wantsScreenshot && result.screenshot) {
633
+ data.screenshot = `data:image/png;base64,${result.screenshot}`;
634
+ }
635
+ if (formats.includes('images') && result.images) {
636
+ data.images = result.images;
637
+ }
638
+ res.json({
639
+ success: true,
640
+ data,
641
+ });
642
+ }
643
+ catch (error) {
644
+ console.error('Firecrawl /v2/scrape error:', error);
645
+ res.status(500).json({
646
+ success: false,
647
+ error: 'An unexpected error occurred. Please try again.',
648
+ });
649
+ }
650
+ });
651
+ return router;
652
+ }
@@ -0,0 +1,8 @@
1
+ /**
2
+ * POST /v1/deep-fetch
3
+ *
4
+ * Deep web intelligence endpoint: search + fetch + synthesize + structure.
5
+ * Body: { query, count?, format?, maxChars? }
6
+ */
7
+ import { Router } from 'express';
8
+ export declare function createDeepFetchRouter(): Router;