@apteva/integrations 0.3.39 → 0.3.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "slug": "firecrawl",
3
3
  "name": "Firecrawl",
4
- "description": "Web scraping and crawling API that transforms websites into LLM-ready markdown or structured data. Perfect for AI applications, research, and data extraction.",
4
+ "description": "Web scraping and crawling API. Turns any website into LLM-ready markdown, HTML, screenshots, links, or structured JSON. Handles JavaScript rendering, anti-bot bypass, sitemap discovery, web search, and natural-language structured extraction. v2 API.",
5
5
  "logo": "https://www.google.com/s2/favicons?domain=firecrawl.dev&sz=128",
6
6
  "categories": [
7
7
  "scraping",
@@ -10,28 +10,27 @@
10
10
  "markdown",
11
11
  "ai",
12
12
  "llm",
13
- "search"
13
+ "search",
14
+ "extraction"
14
15
  ],
15
- "base_url": "",
16
+ "base_url": "https://api.firecrawl.dev/v2",
16
17
  "auth": {
17
- "types": [
18
- "bearer"
19
- ],
18
+ "types": ["bearer"],
20
19
  "headers": {
21
20
  "Authorization": "Bearer {{token}}"
22
21
  },
23
22
  "credential_fields": [
24
23
  {
25
24
  "name": "token",
26
- "label": "Token"
25
+ "label": "API key (starts with fc-)"
27
26
  }
28
27
  ]
29
28
  },
30
29
  "tools": [
31
30
  {
32
31
  "name": "scrape",
33
- "description": "Scrape a single URL and get its content in LLM-ready format (markdown, HTML, or structured data). Handles JavaScript rendering, anti-bot bypass, and content extraction.",
34
- "method": "GET",
32
+ "description": "Scrape a single URL and return its content in LLM-ready formats (markdown, HTML, links, screenshots, or structured JSON). Handles JavaScript rendering and anti-bot bypass. Use for one-off page extraction. For multiple URLs, use crawl or batch_scrape.",
33
+ "method": "POST",
35
34
  "path": "/scrape",
36
35
  "input_schema": {
37
36
  "type": "object",
@@ -44,74 +43,99 @@
44
43
  "type": "array",
45
44
  "items": {
46
45
  "type": "string",
47
- "enum": [
48
- "markdown",
49
- "html",
50
- "rawHtml",
51
- "links",
52
- "screenshot",
53
- "screenshot@fullPage"
54
- ]
46
+ "enum": ["markdown", "html", "rawHtml", "links", "screenshot", "screenshot@fullPage", "json"]
55
47
  },
56
- "default": [
57
- "markdown"
58
- ],
59
- "description": "Output formats to return"
48
+ "default": ["markdown"],
49
+ "description": "Output formats to return. Default is markdown only."
60
50
  },
61
51
  "onlyMainContent": {
62
52
  "type": "boolean",
63
53
  "default": true,
64
- "description": "Only return the main content, excluding headers, footers, nav, etc."
54
+ "description": "Strip headers, navs, footers, and other boilerplate. Default true."
65
55
  },
66
56
  "includeTags": {
67
57
  "type": "array",
68
- "items": {
69
- "type": "string"
70
- },
71
- "description": "HTML tags to include (e.g., ['article', 'main'])"
58
+ "items": { "type": "string" },
59
+ "description": "HTML tags or CSS selectors to include (e.g. ['article', 'main', '.post-body'])"
72
60
  },
73
61
  "excludeTags": {
74
62
  "type": "array",
75
- "items": {
76
- "type": "string"
77
- },
78
- "description": "HTML tags to exclude (e.g., ['nav', 'footer'])"
63
+ "items": { "type": "string" },
64
+ "description": "HTML tags or CSS selectors to exclude (e.g. ['nav', 'footer', '.ads'])"
65
+ },
66
+ "headers": {
67
+ "type": "object",
68
+ "description": "Custom request headers (cookies, user-agent, etc.)"
79
69
  },
80
70
  "waitFor": {
81
71
  "type": "integer",
82
72
  "default": 0,
83
- "description": "Milliseconds to wait for JavaScript to execute"
73
+ "description": "Milliseconds to wait for JavaScript to finish rendering before scraping"
84
74
  },
85
75
  "mobile": {
86
76
  "type": "boolean",
87
77
  "default": false,
88
- "description": "Use mobile user agent"
78
+ "description": "Emulate a mobile device"
79
+ },
80
+ "skipTlsVerification": {
81
+ "type": "boolean",
82
+ "default": true,
83
+ "description": "Skip TLS certificate verification"
89
84
  },
90
85
  "timeout": {
91
86
  "type": "integer",
92
- "default": 30000,
93
- "description": "Timeout in milliseconds"
87
+ "default": 60000,
88
+ "minimum": 1000,
89
+ "maximum": 300000,
90
+ "description": "Request timeout in milliseconds (1000–300000)"
94
91
  },
95
92
  "removeBase64Images": {
96
93
  "type": "boolean",
97
94
  "default": true,
98
- "description": "Remove base64 encoded images from output"
95
+ "description": "Strip base64-encoded images from the markdown output"
99
96
  },
100
97
  "blockAds": {
101
98
  "type": "boolean",
102
99
  "default": true,
103
- "description": "Block ads and trackers"
100
+ "description": "Block ads, trackers, and cookie-consent popups"
101
+ },
102
+ "proxy": {
103
+ "type": "string",
104
+ "enum": ["basic", "enhanced", "auto"],
105
+ "default": "auto",
106
+ "description": "Proxy type. 'enhanced' is best for anti-bot sites; 'auto' picks per-request."
107
+ },
108
+ "actions": {
109
+ "type": "array",
110
+ "items": { "type": "object" },
111
+ "description": "Pre-scrape browser actions to perform (click, type, scroll, wait, etc.). See Firecrawl docs for the action schema."
112
+ },
113
+ "location": {
114
+ "type": "object",
115
+ "description": "Geographic location settings (country, languages) for proxy and emulation"
116
+ },
117
+ "maxAge": {
118
+ "type": "integer",
119
+ "default": 172800000,
120
+ "description": "Use cached content if it's no older than N milliseconds. Default 48h."
121
+ },
122
+ "storeInCache": {
123
+ "type": "boolean",
124
+ "default": true,
125
+ "description": "Whether to store this scrape in Firecrawl's cache"
126
+ },
127
+ "jsonOptions": {
128
+ "type": "object",
129
+ "description": "When 'json' is in formats, structured-extraction options: { schema, prompt, systemPrompt }"
104
130
  }
105
131
  },
106
- "required": [
107
- "url"
108
- ]
132
+ "required": ["url"]
109
133
  }
110
134
  },
111
135
  {
112
136
  "name": "crawl",
113
- "description": "Start crawling a website to scrape all pages. Returns a job ID to check status. Use firecrawl-crawl-status to get results.",
114
- "method": "GET",
137
+ "description": "Start an asynchronous crawl of an entire website. Returns a job id immediately — poll firecrawl_crawl_status with that id to retrieve scraped pages as they complete. Use for multi-page extraction. For one URL use scrape; for a fixed URL list use batch_scrape.",
138
+ "method": "POST",
115
139
  "path": "/crawl",
116
140
  "input_schema": {
117
141
  "type": "object",
@@ -120,81 +144,116 @@
120
144
  "type": "string",
121
145
  "description": "The starting URL to crawl"
122
146
  },
123
- "maxDepth": {
124
- "type": "integer",
125
- "default": 2,
126
- "description": "Maximum link depth to crawl"
127
- },
128
- "maxDiscoveryDepth": {
129
- "type": "integer",
130
- "description": "Maximum depth for URL discovery (separate from content depth)"
147
+ "prompt": {
148
+ "type": "string",
149
+ "description": "Natural-language description of what to crawl. Firecrawl uses this to auto-derive include/exclude paths and depth."
131
150
  },
132
151
  "limit": {
133
152
  "type": "integer",
134
- "default": 10,
153
+ "default": 10000,
135
154
  "description": "Maximum number of pages to crawl"
136
155
  },
156
+ "maxDiscoveryDepth": {
157
+ "type": "integer",
158
+ "description": "Maximum link-discovery depth (independent from page-content depth)"
159
+ },
137
160
  "includePaths": {
138
161
  "type": "array",
139
- "items": {
140
- "type": "string"
141
- },
142
- "description": "URL patterns to include (e.g., ['/blog/*', '/docs/*'])"
162
+ "items": { "type": "string" },
163
+ "description": "Regex patterns of URL pathnames to include (e.g. ['/blog/.*', '/docs/.*'])"
143
164
  },
144
165
  "excludePaths": {
145
166
  "type": "array",
146
- "items": {
147
- "type": "string"
148
- },
149
- "description": "URL patterns to exclude (e.g., ['/admin/*'])"
167
+ "items": { "type": "string" },
168
+ "description": "Regex patterns of URL pathnames to exclude (e.g. ['/admin/.*', '/login.*'])"
150
169
  },
151
- "ignoreSitemap": {
170
+ "regexOnFullURL": {
152
171
  "type": "boolean",
153
172
  "default": false,
154
- "description": "Ignore the site's sitemap.xml"
173
+ "description": "Match include/exclude regexes against the full URL instead of just the pathname"
155
174
  },
156
- "allowBackwardLinks": {
175
+ "ignoreQueryParameters": {
157
176
  "type": "boolean",
158
177
  "default": false,
159
- "description": "Allow crawling links that go up in the URL hierarchy"
178
+ "description": "Treat URLs that differ only by query string as the same page"
179
+ },
180
+ "sitemap": {
181
+ "type": "string",
182
+ "enum": ["skip", "include", "only"],
183
+ "default": "include",
184
+ "description": "Sitemap handling: 'skip' ignores it, 'include' uses it as a hint, 'only' restricts crawl to sitemap URLs"
185
+ },
186
+ "crawlEntireDomain": {
187
+ "type": "boolean",
188
+ "default": false,
189
+ "description": "Allow following sibling and parent URLs, not just children"
190
+ },
191
+ "allowSubdomains": {
192
+ "type": "boolean",
193
+ "default": false,
194
+ "description": "Follow links into subdomains of the starting URL"
160
195
  },
161
196
  "allowExternalLinks": {
162
197
  "type": "boolean",
163
198
  "default": false,
164
- "description": "Allow crawling external links"
199
+ "description": "Follow links to other domains"
200
+ },
201
+ "delay": {
202
+ "type": "number",
203
+ "description": "Seconds to wait between scrapes — use this to respect rate limits"
204
+ },
205
+ "maxConcurrency": {
206
+ "type": "integer",
207
+ "description": "Maximum number of pages scraped in parallel"
165
208
  },
166
209
  "webhook": {
167
- "type": "string",
168
- "description": "Webhook URL to receive results when crawl completes"
210
+ "type": "object",
211
+ "description": "Webhook config for crawl events: { url, headers, metadata }"
212
+ },
213
+ "scrapeOptions": {
214
+ "type": "object",
215
+ "description": "Per-page scrape options applied to every crawled URL — same shape as the scrape tool's body (formats, onlyMainContent, headers, waitFor, etc.)"
169
216
  }
170
217
  },
171
- "required": [
172
- "url"
173
- ]
218
+ "required": ["url"]
174
219
  }
175
220
  },
176
221
  {
177
222
  "name": "crawl_status",
178
- "description": "Get the status and results of a crawl job. Returns scraped pages when complete.",
223
+ "description": "Poll a running crawl job by id. Returns counts (total/completed), status (scraping/completed/failed), credit usage, and any pages scraped so far. Call repeatedly until status='completed'.",
179
224
  "method": "GET",
180
- "path": "/crawl-status",
225
+ "path": "/crawl/{id}",
226
+ "input_schema": {
227
+ "type": "object",
228
+ "properties": {
229
+ "id": {
230
+ "type": "string",
231
+ "description": "The crawl job id returned by firecrawl_crawl"
232
+ }
233
+ },
234
+ "required": ["id"]
235
+ }
236
+ },
237
+ {
238
+ "name": "crawl_cancel",
239
+ "description": "Cancel a running crawl job by id. Useful when a crawl is taking longer than expected or hitting unexpected pages.",
240
+ "method": "DELETE",
241
+ "path": "/crawl/{id}",
181
242
  "input_schema": {
182
243
  "type": "object",
183
244
  "properties": {
184
- "jobId": {
245
+ "id": {
185
246
  "type": "string",
186
- "description": "The crawl job ID returned from firecrawl-crawl"
247
+ "description": "The crawl job id returned by firecrawl_crawl"
187
248
  }
188
249
  },
189
- "required": [
190
- "jobId"
191
- ]
250
+ "required": ["id"]
192
251
  }
193
252
  },
194
253
  {
195
254
  "name": "map",
196
- "description": "Get all URLs from a website (sitemap discovery). Returns a list of URLs without scraping content. Uses 1 credit per website.",
197
- "method": "GET",
255
+ "description": "Discover all URLs on a website without scraping their content. Fast — uses sitemap.xml + light crawling. Use this to plan a targeted scrape, audit site structure, or filter by keyword via 'search'. 1 credit per website.",
256
+ "method": "POST",
198
257
  "path": "/map",
199
258
  "input_schema": {
200
259
  "type": "object",
@@ -205,73 +264,342 @@
205
264
  },
206
265
  "search": {
207
266
  "type": "string",
208
- "description": "Filter URLs containing this search term"
267
+ "description": "Filter/order returned URLs by relevance to this search query (e.g. 'pricing', 'blog')"
209
268
  },
210
- "ignoreSitemap": {
269
+ "sitemap": {
270
+ "type": "string",
271
+ "enum": ["skip", "include", "only"],
272
+ "default": "include",
273
+ "description": "Sitemap handling: 'skip' bypasses it, 'include' uses it + crawls, 'only' uses sitemap exclusively"
274
+ },
275
+ "includeSubdomains": {
211
276
  "type": "boolean",
212
- "default": false,
213
- "description": "Ignore the site's sitemap.xml and discover URLs by crawling"
277
+ "default": true,
278
+ "description": "Include URLs from subdomains"
214
279
  },
215
- "sitemapOnly": {
280
+ "ignoreQueryParameters": {
216
281
  "type": "boolean",
217
- "default": false,
218
- "description": "Only use sitemap.xml, don't discover URLs by crawling"
282
+ "default": true,
283
+ "description": "Drop query parameters from returned URLs"
219
284
  },
220
- "includeSubdomains": {
285
+ "ignoreCache": {
221
286
  "type": "boolean",
222
287
  "default": false,
223
- "description": "Include URLs from subdomains"
288
+ "description": "Bypass Firecrawl's sitemap cache and re-fetch fresh URLs"
224
289
  },
225
290
  "limit": {
226
291
  "type": "integer",
227
292
  "default": 5000,
228
- "description": "Maximum number of URLs to return"
293
+ "maximum": 100000,
294
+ "description": "Maximum URLs to return"
295
+ },
296
+ "timeout": {
297
+ "type": "integer",
298
+ "description": "Request timeout in milliseconds"
299
+ },
300
+ "location": {
301
+ "type": "object",
302
+ "description": "Geographic location settings (country, languages) for proxy and emulation"
229
303
  }
230
304
  },
231
- "required": [
232
- "url"
233
- ]
305
+ "required": ["url"]
234
306
  }
235
307
  },
236
308
  {
237
309
  "name": "search",
238
- "description": "Search the web and scrape results in one API call. Perfect for research, finding leads, competitive analysis, or SEO. Each result is scraped and returned as markdown.",
239
- "method": "GET",
310
+ "description": "Search the web and optionally scrape result pages in one call. Perfect for research, competitive analysis, lead generation, and SEO. Set scrapeOptions to also receive the full content of each result.",
311
+ "method": "POST",
240
312
  "path": "/search",
241
313
  "input_schema": {
242
314
  "type": "object",
243
315
  "properties": {
244
316
  "query": {
245
317
  "type": "string",
246
- "description": "The search query"
318
+ "description": "Search query (max 500 characters)"
247
319
  },
248
320
  "limit": {
249
321
  "type": "integer",
250
322
  "default": 5,
251
- "description": "Number of search results to scrape (1 credit per result)"
323
+ "minimum": 1,
324
+ "maximum": 100,
325
+ "description": "Number of results to return (1 credit per result if scrapeOptions is set)"
326
+ },
327
+ "sources": {
328
+ "type": "array",
329
+ "items": {
330
+ "type": "object",
331
+ "properties": {
332
+ "type": {
333
+ "type": "string",
334
+ "enum": ["web", "images", "news"]
335
+ }
336
+ }
337
+ },
338
+ "default": [{ "type": "web" }],
339
+ "description": "Search sources to query. Default is web."
252
340
  },
253
- "lang": {
341
+ "categories": {
342
+ "type": "array",
343
+ "items": {
344
+ "type": "string",
345
+ "enum": ["github", "research", "pdf"]
346
+ },
347
+ "description": "Filter results by category"
348
+ },
349
+ "tbs": {
254
350
  "type": "string",
255
- "default": "en",
256
- "description": "Language code (e.g., 'en', 'es', 'fr')"
351
+ "description": "Time-based search filter (Google syntax: 'qdr:d' = past day, 'qdr:w' = past week, 'qdr:m' = past month, 'qdr:y' = past year)"
257
352
  },
258
353
  "country": {
259
354
  "type": "string",
260
- "default": "us",
261
- "description": "Country code for localized results (e.g., 'us', 'uk', 'de')"
355
+ "default": "US",
356
+ "description": "ISO country code for localized results (e.g. 'US', 'GB', 'DE')"
262
357
  },
263
358
  "location": {
264
359
  "type": "string",
265
- "description": "Specific location for local search results"
360
+ "description": "Specific location string for local search results (e.g. 'San Francisco, California, United States')"
266
361
  },
267
- "tbs": {
362
+ "timeout": {
363
+ "type": "integer",
364
+ "default": 60000,
365
+ "description": "Request timeout in milliseconds"
366
+ },
367
+ "ignoreInvalidURLs": {
368
+ "type": "boolean",
369
+ "default": false,
370
+ "description": "Skip results with invalid URLs instead of erroring"
371
+ },
372
+ "scrapeOptions": {
373
+ "type": "object",
374
+ "description": "When set, scrape each search result and return its content. Same shape as the scrape tool's body."
375
+ }
376
+ },
377
+ "required": ["query"]
378
+ }
379
+ },
380
+ {
381
+ "name": "extract",
382
+ "description": "Extract structured JSON from one or more URLs using natural language and/or a JSON schema. Returns a job id — poll firecrawl_extract_status to retrieve results. Use this when you want typed data instead of raw markdown.",
383
+ "method": "POST",
384
+ "path": "/extract",
385
+ "input_schema": {
386
+ "type": "object",
387
+ "properties": {
388
+ "urls": {
389
+ "type": "array",
390
+ "items": { "type": "string" },
391
+ "description": "URLs to extract from. Glob patterns supported (e.g. 'https://example.com/blog/*')."
392
+ },
393
+ "prompt": {
268
394
  "type": "string",
269
- "description": "Time-based search filter (e.g., 'qdr:d' for past day, 'qdr:w' for past week)"
395
+ "description": "Natural-language description of what to extract. Used alongside 'schema' to guide extraction."
396
+ },
397
+ "schema": {
398
+ "type": "object",
399
+ "description": "JSON Schema describing the structure of the desired output"
400
+ },
401
+ "enableWebSearch": {
402
+ "type": "boolean",
403
+ "default": false,
404
+ "description": "Allow Firecrawl to perform supplementary web searches if the target pages don't contain enough info"
405
+ },
406
+ "ignoreSitemap": {
407
+ "type": "boolean",
408
+ "default": false,
409
+ "description": "Bypass sitemap.xml when scanning the target sites"
410
+ },
411
+ "includeSubdomains": {
412
+ "type": "boolean",
413
+ "default": true,
414
+ "description": "Extend extraction to subdomains of the provided URLs"
415
+ },
416
+ "showSources": {
417
+ "type": "boolean",
418
+ "default": false,
419
+ "description": "Include the source URLs that contributed to each extracted field in the response"
420
+ },
421
+ "ignoreInvalidURLs": {
422
+ "type": "boolean",
423
+ "default": true,
424
+ "description": "Skip invalid URLs and continue with the rest"
425
+ },
426
+ "scrapeOptions": {
427
+ "type": "object",
428
+ "description": "Per-page scrape options applied while extracting"
429
+ }
430
+ },
431
+ "required": ["urls"]
432
+ }
433
+ },
434
+ {
435
+ "name": "extract_status",
436
+ "description": "Poll an extract job by id. Returns the structured data once status='completed'.",
437
+ "method": "GET",
438
+ "path": "/extract/{id}",
439
+ "input_schema": {
440
+ "type": "object",
441
+ "properties": {
442
+ "id": {
443
+ "type": "string",
444
+ "description": "The extract job id returned by firecrawl_extract"
270
445
  }
271
446
  },
272
- "required": [
273
- "query"
274
- ]
447
+ "required": ["id"]
448
+ }
449
+ },
450
+ {
451
+ "name": "batch_scrape",
452
+ "description": "Scrape a fixed list of URLs in one job. Returns a job id — poll firecrawl_batch_scrape_status to retrieve results. Use this when you already have the list of URLs (vs crawl, which discovers them).",
453
+ "method": "POST",
454
+ "path": "/batch/scrape",
455
+ "input_schema": {
456
+ "type": "object",
457
+ "properties": {
458
+ "urls": {
459
+ "type": "array",
460
+ "items": { "type": "string" },
461
+ "description": "List of URLs to scrape"
462
+ },
463
+ "formats": {
464
+ "type": "array",
465
+ "items": {
466
+ "type": "string",
467
+ "enum": ["markdown", "html", "rawHtml", "links", "screenshot", "screenshot@fullPage", "json"]
468
+ },
469
+ "default": ["markdown"],
470
+ "description": "Output formats applied to every URL"
471
+ },
472
+ "onlyMainContent": {
473
+ "type": "boolean",
474
+ "default": true,
475
+ "description": "Strip headers, navs, footers, and other boilerplate"
476
+ },
477
+ "includeTags": {
478
+ "type": "array",
479
+ "items": { "type": "string" }
480
+ },
481
+ "excludeTags": {
482
+ "type": "array",
483
+ "items": { "type": "string" }
484
+ },
485
+ "headers": { "type": "object" },
486
+ "waitFor": { "type": "integer", "default": 0 },
487
+ "timeout": { "type": "integer", "default": 60000 },
488
+ "blockAds": { "type": "boolean", "default": true },
489
+ "removeBase64Images": { "type": "boolean", "default": true },
490
+ "ignoreInvalidURLs": {
491
+ "type": "boolean",
492
+ "default": false,
493
+ "description": "Skip invalid URLs instead of failing the whole batch"
494
+ },
495
+ "webhook": {
496
+ "type": "object",
497
+ "description": "Webhook config for batch events: { url, headers, metadata }"
498
+ }
499
+ },
500
+ "required": ["urls"]
501
+ }
502
+ },
503
+ {
504
+ "name": "batch_scrape_status",
505
+ "description": "Poll a batch scrape job by id. Returns counts and any scraped pages so far.",
506
+ "method": "GET",
507
+ "path": "/batch/scrape/{id}",
508
+ "input_schema": {
509
+ "type": "object",
510
+ "properties": {
511
+ "id": {
512
+ "type": "string",
513
+ "description": "The batch scrape job id returned by firecrawl_batch_scrape"
514
+ }
515
+ },
516
+ "required": ["id"]
517
+ }
518
+ },
519
+ {
520
+ "name": "batch_scrape_cancel",
521
+ "description": "Cancel a running batch scrape job.",
522
+ "method": "DELETE",
523
+ "path": "/batch/scrape/{id}",
524
+ "input_schema": {
525
+ "type": "object",
526
+ "properties": {
527
+ "id": {
528
+ "type": "string",
529
+ "description": "The batch scrape job id returned by firecrawl_batch_scrape"
530
+ }
531
+ },
532
+ "required": ["id"]
533
+ }
534
+ },
535
+ {
536
+ "name": "crawl_errors",
537
+ "description": "Retrieve per-page errors for a crawl job. Use after crawl_status reports failures or when total > completed, to see exactly which URLs failed and why.",
538
+ "method": "GET",
539
+ "path": "/crawl/{id}/errors",
540
+ "input_schema": {
541
+ "type": "object",
542
+ "properties": {
543
+ "id": {
544
+ "type": "string",
545
+ "description": "The crawl job id returned by firecrawl_crawl"
546
+ }
547
+ },
548
+ "required": ["id"]
549
+ }
550
+ },
551
+ {
552
+ "name": "batch_scrape_errors",
553
+ "description": "Retrieve per-URL errors for a batch scrape job. Use after batch_scrape_status reports failures.",
554
+ "method": "GET",
555
+ "path": "/batch/scrape/{id}/errors",
556
+ "input_schema": {
557
+ "type": "object",
558
+ "properties": {
559
+ "id": {
560
+ "type": "string",
561
+ "description": "The batch scrape job id returned by firecrawl_batch_scrape"
562
+ }
563
+ },
564
+ "required": ["id"]
565
+ }
566
+ },
567
+ {
568
+ "name": "crawl_active",
569
+ "description": "List all crawl jobs currently running for this account. No arguments. Useful for monitoring without needing to track job ids yourself.",
570
+ "method": "GET",
571
+ "path": "/crawl/active",
572
+ "input_schema": {
573
+ "type": "object",
574
+ "properties": {}
575
+ }
576
+ },
577
+ {
578
+ "name": "crawl_params_preview",
579
+ "description": "Preview what a crawl request would do — Firecrawl resolves the prompt/include/exclude/sitemap config into the actual normalized parameters and an estimated page count without spending credits. Use before launching expensive crawls.",
580
+ "method": "POST",
581
+ "path": "/crawl/params-preview",
582
+ "input_schema": {
583
+ "type": "object",
584
+ "properties": {
585
+ "url": {
586
+ "type": "string",
587
+ "description": "The starting URL to crawl"
588
+ },
589
+ "prompt": {
590
+ "type": "string",
591
+ "description": "Natural-language description of what to crawl"
592
+ },
593
+ "limit": { "type": "integer" },
594
+ "maxDiscoveryDepth": { "type": "integer" },
595
+ "includePaths": { "type": "array", "items": { "type": "string" } },
596
+ "excludePaths": { "type": "array", "items": { "type": "string" } },
597
+ "sitemap": { "type": "string", "enum": ["skip", "include", "only"] },
598
+ "crawlEntireDomain": { "type": "boolean" },
599
+ "allowSubdomains": { "type": "boolean" },
600
+ "allowExternalLinks": { "type": "boolean" }
601
+ },
602
+ "required": ["url"]
275
603
  }
276
604
  }
277
605
  ]