n8n-nodes-firecrawl-v2 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 THE NEXOVA
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,251 @@
1
+ # n8n-nodes-firecrawl-v2
2
+
3
+ Community node for [Firecrawl](https://firecrawl.dev) **v2 API** on n8n. Scrape, crawl, map, search, and extract web content with full JavaScript rendering and AI-powered extraction.
4
+
5
+ Works with both **Firecrawl Cloud** and **self-hosted** instances.
6
+
7
+ Built by [THE NEXOVA](https://thenexova.com). Full guide: [n8n Firecrawl Node: Web Scraping, Crawling, and AI Extraction Guide](https://thenexova.com/n8n-firecrawl-node-web-scraping-crawling-and-ai-extraction-guide/)
8
+
9
+ ## Installation
10
+
11
+ ### n8n Community Nodes
12
+
13
+ 1. Go to **Settings** > **Community Nodes**
14
+ 2. Select **Install**
15
+ 3. Enter `n8n-nodes-firecrawl-v2`
16
+ 4. Agree to the risks and click **Install**
17
+
18
+ ### Manual (Self-Hosted)
19
+
20
+ ```bash
21
+ cd ~/.n8n
22
+ npm install n8n-nodes-firecrawl-v2
23
+ ```
24
+
25
+ Restart n8n after installation.
26
+
27
+ ## Credentials
28
+
29
+ | Field | Default | Description |
30
+ |-------|---------|-------------|
31
+ | Base URL | `https://api.firecrawl.dev/v2` | Change for self-hosted. **Must include `/v2`**. |
32
+ | API Key | | Your Firecrawl API key |
33
+
34
+ Authentication: `Authorization: Bearer {apiKey}`. Tested via `POST /scrape` on `https://example.com`.
35
+
36
+ ## Operations
37
+
38
+ ### 1. Scrape
39
+
40
+ Scrape content from a single URL with JS rendering.
41
+
42
+ **Endpoint:** `POST /scrape`
43
+
44
+ | Parameter | Type | Default | Description |
45
+ |-----------|------|---------|-------------|
46
+ | `url` | String | | Target URL (required) |
47
+
48
+ **Scrape Options:**
49
+
50
+ | Parameter | Default | Description |
51
+ |-----------|---------|-------------|
52
+ | `formats` | `markdown` | `markdown`, `html`, `rawHtml`, `links`, `screenshot`, `json`, `summary`, `images`, `audio`, `changeTracking` |
53
+ | `onlyMainContent` | `true` | Strip headers, nav, footers |
54
+ | `includeTags` | | CSS selectors to keep (e.g., `article, .content`) |
55
+ | `excludeTags` | | CSS selectors to remove (e.g., `nav, .sidebar`) |
56
+ | `waitFor` | `0` | Wait for JS render (ms). Increase for SPA pages. |
57
+ | `timeout` | `30000` | Request timeout (ms), max 300,000 |
58
+ | `mobile` | `false` | Emulate mobile viewport |
59
+ | `blockAds` | `true` | Block ads and cookie popups |
60
+ | `proxy` | `auto` | Proxy: `auto`, `basic`, `enhanced` |
61
+ | `locationCountry` | | ISO country code (e.g., `VN`, `US`) |
62
+ | `locationLanguages` | | Locale codes (e.g., `vi-VN, en-US`) |
63
+
64
+ **Sample output:**
65
+
66
+ ```json
67
+ {
68
+ "markdown": "# Page Title\n\nExtracted content...",
69
+ "metadata": {
70
+ "title": "Page Title",
71
+ "sourceURL": "https://example.com",
72
+ "statusCode": 200
73
+ }
74
+ }
75
+ ```
76
+
77
+ ### 2. Crawl
78
+
79
+ Crawl an entire website. Async job with optional polling.
80
+
81
+ **Endpoint:** `POST /crawl`
82
+
83
+ | Parameter | Default | Description |
84
+ |-----------|---------|-------------|
85
+ | `crawlUrl` | | Starting URL (required) |
86
+ | `waitForCompletion` | `false` | Poll until job finishes |
87
+ | `maxPollTime` | `300` | Max wait in seconds |
88
+
89
+ **Crawl Options:**
90
+
91
+ | Parameter | Default | Description |
92
+ |-----------|---------|-------------|
93
+ | `limit` | `100` | Max pages |
94
+ | `maxDiscoveryDepth` | `2` | Max link depth |
95
+ | `includePaths` | | Regex patterns to include (e.g., `/blog/*`) |
96
+ | `excludePaths` | | Regex patterns to exclude (e.g., `/admin/*`) |
97
+ | `sitemap` | `include` | `include`, `skip`, or `only` |
98
+ | `crawlEntireDomain` | `false` | Follow sibling/parent links |
99
+ | `allowExternalLinks` | `false` | Follow external links |
100
+ | `allowSubdomains` | `false` | Crawl subdomains |
101
+ | `delay` | `0` | Seconds between requests (forces concurrency=1) |
102
+ | `formats` | `markdown` | Output format per page |
103
+ | `onlyMainContent` | `true` | Strip boilerplate |
104
+
105
+ > When `waitForCompletion` is off, output only contains the job `id`. Use **Get Crawl Status** to fetch results. Polling interval: 2 seconds.
106
+
107
+ ### 3. Get Crawl Status
108
+
109
+ **Endpoint:** `GET /crawl/{crawlId}` | **Parameter:** `crawlId` (job ID)
110
+
111
+ ### 4. Cancel Crawl
112
+
113
+ **Endpoint:** `DELETE /crawl/{crawlId}` | **Parameter:** `cancelCrawlId` (job ID)
114
+
115
+ ### 5. Map
116
+
117
+ Discover all URLs on a website without scraping content. Faster than Crawl.
118
+
119
+ **Endpoint:** `POST /map`
120
+
121
+ | Parameter | Default | Description |
122
+ |-----------|---------|-------------|
123
+ | `mapUrl` | | Starting URL (required) |
124
+ | `search` | | Search query to rank by relevance |
125
+ | `includeSubdomains` | `true` | Include subdomain URLs |
126
+ | `limit` | `5000` | Max URLs (max: 100,000) |
127
+ | `ignoreQueryParameters` | `true` | Deduplicate by stripping query strings |
128
+ | `ignoreCache` | `false` | Bypass sitemap cache |
129
+
130
+ ### 6. Search
131
+
132
+ Web search with optional page scraping.
133
+
134
+ **Endpoint:** `POST /search`
135
+
136
+ | Parameter | Default | Description |
137
+ |-----------|---------|-------------|
138
+ | `searchQuery` | | Keywords, max 500 chars (required) |
139
+ | `limit` | `5` | Results count (1-100) |
140
+ | `country` | `US` | ISO country code |
141
+ | `tbs` | Any Time | Time filter: past hour/day/week/month/year |
142
+ | `formats` | `markdown` | Content format for results |
143
+ | `onlyMainContent` | `true` | Strip boilerplate |
144
+
145
+ ### 7. Extract
146
+
147
+ AI-powered structured data extraction using natural language prompts.
148
+
149
+ **Endpoint:** `POST /extract`
150
+
151
+ | Parameter | Default | Description |
152
+ |-----------|---------|-------------|
153
+ | `extractUrls` | | Comma-separated URLs (glob patterns supported: `https://example.com/*`) |
154
+ | `extractPrompt` | | Natural language instruction |
155
+ | `extractSchema` | | Optional JSON Schema for output structure |
156
+ | `extractWaitForCompletion` | `true` | Wait for results (**defaults ON**, unlike Crawl/Batch) |
157
+ | `extractMaxPollTime` | `300` | Max wait in seconds |
158
+
159
+ **Extract Options:**
160
+
161
+ | Parameter | Default | Description |
162
+ |-----------|---------|-------------|
163
+ | `enableWebSearch` | `false` | Use web search for additional data |
164
+ | `showSources` | `false` | Include source URLs |
165
+
166
+ **Example:**
167
+
168
+ ```
169
+ Prompt: "Extract company name, phone, address from this page"
170
+ Schema: {
171
+ "type": "object",
172
+ "properties": {
173
+ "company_name": { "type": "string" },
174
+ "phone": { "type": "string" },
175
+ "address": { "type": "string" }
176
+ }
177
+ }
178
+ ```
179
+
180
+ ### 8. Get Extract Status
181
+
182
+ **Endpoint:** `GET /extract/{extractId}` | **Parameter:** `extractId` (job ID)
183
+
184
+ ### 9. Batch Scrape
185
+
186
+ Scrape multiple URLs asynchronously.
187
+
188
+ **Endpoint:** `POST /batch/scrape`
189
+
190
+ | Parameter | Default | Description |
191
+ |-----------|---------|-------------|
192
+ | `batchUrls` | | Comma-separated URLs |
193
+ | `batchWaitForCompletion` | `false` | Wait for all to finish |
194
+ | `batchMaxPollTime` | `300` | Max wait in seconds |
195
+
196
+ Options: `formats`, `onlyMainContent`, `maxConcurrency`.
197
+
198
+ ### 10. Get Batch Scrape Status
199
+
200
+ **Endpoint:** `GET /batch/scrape/{batchScrapeId}` | **Parameter:** `batchScrapeId` (job ID)
201
+
202
+ ## Technical Notes
203
+
204
+ - **Async operations** (Crawl, Extract, Batch Scrape) return a job ID by default. Enable `waitForCompletion` to get results inline. Polling interval: 2 seconds.
205
+ - **Extract defaults to `waitForCompletion: true`**, while Crawl and Batch Scrape default to `false`.
206
+ - **Scrape supports 10 formats** (including `json`, `summary`, `audio`). Crawl, Search, and Batch Scrape support 5 basic formats.
207
+ - **Comma-separated inputs:** `includeTags`, `excludeTags`, `includePaths`, `excludePaths`, `extractUrls`, `batchUrls` all accept comma-separated lists.
208
+ - **Self-hosted Base URL** must include `/v2` (e.g., `http://firecrawl:3002/v2`).
209
+ - **Error handling:** Supports `continueOnFail`. On error, output is `{ "error": "message" }`.
210
+
211
+ ## Workflow Examples
212
+
213
+ **Competitive intelligence:**
214
+
215
+ ```
216
+ Schedule Trigger (weekly)
217
+ -> Firecrawl: Map (competitor URL)
218
+ -> Firecrawl: Batch Scrape (URLs from Map)
219
+ -> Code Node (diff with last week)
220
+ -> Google Sheets + Slack notification
221
+ ```
222
+
223
+ **AI data extraction:**
224
+
225
+ ```
226
+ Manual Trigger
227
+ -> Firecrawl: Extract (directory URL, prompt, schema)
228
+ -> Google Sheets: Append rows
229
+ ```
230
+
231
+ **Content monitoring:**
232
+
233
+ ```
234
+ Schedule Trigger (daily)
235
+ -> Firecrawl: Scrape (formats: changeTracking)
236
+ -> IF (changes detected) -> Email alert
237
+ ```
238
+
239
+ ## Compatibility
240
+
241
+ - n8n: >= 1.0.0
242
+ - Firecrawl API: v2
243
+ - Tested with self-hosted Firecrawl and Firecrawl Cloud
244
+
245
+ ## About
246
+
247
+ [THE NEXOVA](https://thenexova.com) builds automation infrastructure for businesses. Need custom n8n nodes, self-hosted Firecrawl deployment, or scraping workflow design? [Get in touch](https://thenexova.com/contact/).
248
+
249
+ ## License
250
+
251
+ [MIT](LICENSE)
@@ -0,0 +1,9 @@
1
+ import { IAuthenticateGeneric, ICredentialTestRequest, ICredentialType, INodeProperties } from 'n8n-workflow';
2
+ export declare class FirecrawlApi implements ICredentialType {
3
+ name: string;
4
+ displayName: string;
5
+ documentationUrl: string;
6
+ properties: INodeProperties[];
7
+ authenticate: IAuthenticateGeneric;
8
+ test: ICredentialTestRequest;
9
+ }
@@ -0,0 +1,48 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.FirecrawlApi = void 0;
4
+ class FirecrawlApi {
5
+ constructor() {
6
+ this.name = 'firecrawlApi';
7
+ this.displayName = 'Firecrawl API';
8
+ this.documentationUrl = 'https://docs.firecrawl.dev';
9
+ this.properties = [
10
+ {
11
+ displayName: 'Base URL',
12
+ name: 'baseUrl',
13
+ type: 'string',
14
+ default: 'https://api.firecrawl.dev/v2',
15
+ placeholder: 'https://api.firecrawl.dev/v2',
16
+ description: 'Base URL including API version. For self-hosted instances, use your server URL (e.g. http://localhost:3002/v2)',
17
+ },
18
+ {
19
+ displayName: 'API Key',
20
+ name: 'apiKey',
21
+ type: 'string',
22
+ typeOptions: { password: true },
23
+ default: '',
24
+ description: 'Your Firecrawl API key',
25
+ },
26
+ ];
27
+ this.authenticate = {
28
+ type: 'generic',
29
+ properties: {
30
+ headers: {
31
+ Authorization: '=Bearer {{$credentials.apiKey}}',
32
+ },
33
+ },
34
+ };
35
+ this.test = {
36
+ request: {
37
+ baseURL: '={{$credentials.baseUrl}}',
38
+ url: '/scrape',
39
+ method: 'POST',
40
+ body: {
41
+ url: 'https://example.com',
42
+ formats: ['markdown'],
43
+ },
44
+ },
45
+ };
46
+ }
47
+ }
48
+ exports.FirecrawlApi = FirecrawlApi;
@@ -0,0 +1,5 @@
1
+ import { IExecuteFunctions, INodeExecutionData, INodeType, INodeTypeDescription } from 'n8n-workflow';
2
+ export declare class Firecrawl implements INodeType {
3
+ description: INodeTypeDescription;
4
+ execute(this: IExecuteFunctions): Promise<INodeExecutionData[][]>;
5
+ }