n8n-nodes-firecrawl-v2 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +251 -0
- package/dist/credentials/FirecrawlApi.credentials.d.ts +9 -0
- package/dist/credentials/FirecrawlApi.credentials.js +48 -0
- package/dist/nodes/Firecrawl/Firecrawl.node.d.ts +5 -0
- package/dist/nodes/Firecrawl/Firecrawl.node.js +1027 -0
- package/dist/nodes/Firecrawl/firecrawl.svg +7 -0
- package/package.json +61 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 THE NEXOVA
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
# n8n-nodes-firecrawl-v2
|
|
2
|
+
|
|
3
|
+
Community node for [Firecrawl](https://firecrawl.dev) **v2 API** on n8n. Scrape, crawl, map, search, and extract web content with full JavaScript rendering and AI-powered extraction.
|
|
4
|
+
|
|
5
|
+
Works with both **Firecrawl Cloud** and **self-hosted** instances.
|
|
6
|
+
|
|
7
|
+
Built by [THE NEXOVA](https://thenexova.com). Full guide: [n8n Firecrawl Node: Web Scraping, Crawling, and AI Extraction Guide](https://thenexova.com/n8n-firecrawl-node-web-scraping-crawling-and-ai-extraction-guide/)
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
### n8n Community Nodes
|
|
12
|
+
|
|
13
|
+
1. Go to **Settings** > **Community Nodes**
|
|
14
|
+
2. Select **Install**
|
|
15
|
+
3. Enter `n8n-nodes-firecrawl-v2`
|
|
16
|
+
4. Agree to the risks and click **Install**
|
|
17
|
+
|
|
18
|
+
### Manual (Self-Hosted)
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
cd ~/.n8n
|
|
22
|
+
npm install n8n-nodes-firecrawl-v2
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Restart n8n after installation.
|
|
26
|
+
|
|
27
|
+
## Credentials
|
|
28
|
+
|
|
29
|
+
| Field | Default | Description |
|
|
30
|
+
|-------|---------|-------------|
|
|
31
|
+
| Base URL | `https://api.firecrawl.dev/v2` | Change for self-hosted. **Must include `/v2`**. |
|
|
32
|
+
| API Key | | Your Firecrawl API key |
|
|
33
|
+
|
|
34
|
+
Authentication: `Authorization: Bearer {apiKey}`. Tested via `POST /scrape` on `https://example.com`.
|
|
35
|
+
|
|
36
|
+
## Operations
|
|
37
|
+
|
|
38
|
+
### 1. Scrape
|
|
39
|
+
|
|
40
|
+
Scrape content from a single URL with JS rendering.
|
|
41
|
+
|
|
42
|
+
**Endpoint:** `POST /scrape`
|
|
43
|
+
|
|
44
|
+
| Parameter | Type | Default | Description |
|
|
45
|
+
|-----------|------|---------|-------------|
|
|
46
|
+
| `url` | String | | Target URL (required) |
|
|
47
|
+
|
|
48
|
+
**Scrape Options:**
|
|
49
|
+
|
|
50
|
+
| Parameter | Default | Description |
|
|
51
|
+
|-----------|---------|-------------|
|
|
52
|
+
| `formats` | `markdown` | `markdown`, `html`, `rawHtml`, `links`, `screenshot`, `json`, `summary`, `images`, `audio`, `changeTracking` |
|
|
53
|
+
| `onlyMainContent` | `true` | Strip headers, nav, footers |
|
|
54
|
+
| `includeTags` | | CSS selectors to keep (e.g., `article, .content`) |
|
|
55
|
+
| `excludeTags` | | CSS selectors to remove (e.g., `nav, .sidebar`) |
|
|
56
|
+
| `waitFor` | `0` | Wait for JS render (ms). Increase for SPA pages. |
|
|
57
|
+
| `timeout` | `30000` | Request timeout (ms), max 300,000 |
|
|
58
|
+
| `mobile` | `false` | Emulate mobile viewport |
|
|
59
|
+
| `blockAds` | `true` | Block ads and cookie popups |
|
|
60
|
+
| `proxy` | `auto` | Proxy: `auto`, `basic`, `enhanced` |
|
|
61
|
+
| `locationCountry` | | ISO country code (e.g., `VN`, `US`) |
|
|
62
|
+
| `locationLanguages` | | Locale codes (e.g., `vi-VN, en-US`) |
|
|
63
|
+
|
|
64
|
+
**Sample output:**
|
|
65
|
+
|
|
66
|
+
```json
|
|
67
|
+
{
|
|
68
|
+
"markdown": "# Page Title\n\nExtracted content...",
|
|
69
|
+
"metadata": {
|
|
70
|
+
"title": "Page Title",
|
|
71
|
+
"sourceURL": "https://example.com",
|
|
72
|
+
"statusCode": 200
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### 2. Crawl
|
|
78
|
+
|
|
79
|
+
Crawl an entire website. Async job with optional polling.
|
|
80
|
+
|
|
81
|
+
**Endpoint:** `POST /crawl`
|
|
82
|
+
|
|
83
|
+
| Parameter | Default | Description |
|
|
84
|
+
|-----------|---------|-------------|
|
|
85
|
+
| `crawlUrl` | | Starting URL (required) |
|
|
86
|
+
| `waitForCompletion` | `false` | Poll until job finishes |
|
|
87
|
+
| `maxPollTime` | `300` | Max wait in seconds |
|
|
88
|
+
|
|
89
|
+
**Crawl Options:**
|
|
90
|
+
|
|
91
|
+
| Parameter | Default | Description |
|
|
92
|
+
|-----------|---------|-------------|
|
|
93
|
+
| `limit` | `100` | Max pages |
|
|
94
|
+
| `maxDiscoveryDepth` | `2` | Max link depth |
|
|
95
|
+
| `includePaths` | | Regex patterns to include (e.g., `/blog/*`) |
|
|
96
|
+
| `excludePaths` | | Regex patterns to exclude (e.g., `/admin/*`) |
|
|
97
|
+
| `sitemap` | `include` | `include`, `skip`, or `only` |
|
|
98
|
+
| `crawlEntireDomain` | `false` | Follow sibling/parent links |
|
|
99
|
+
| `allowExternalLinks` | `false` | Follow external links |
|
|
100
|
+
| `allowSubdomains` | `false` | Crawl subdomains |
|
|
101
|
+
| `delay` | `0` | Seconds between requests (forces concurrency=1) |
|
|
102
|
+
| `formats` | `markdown` | Output format per page |
|
|
103
|
+
| `onlyMainContent` | `true` | Strip boilerplate |
|
|
104
|
+
|
|
105
|
+
> When `waitForCompletion` is off, output only contains the job `id`. Use **Get Crawl Status** to fetch results. Polling interval: 2 seconds.
|
|
106
|
+
|
|
107
|
+
### 3. Get Crawl Status
|
|
108
|
+
|
|
109
|
+
**Endpoint:** `GET /crawl/{crawlId}` | **Parameter:** `crawlId` (job ID)
|
|
110
|
+
|
|
111
|
+
### 4. Cancel Crawl
|
|
112
|
+
|
|
113
|
+
**Endpoint:** `DELETE /crawl/{crawlId}` | **Parameter:** `cancelCrawlId` (job ID)
|
|
114
|
+
|
|
115
|
+
### 5. Map
|
|
116
|
+
|
|
117
|
+
Discover all URLs on a website without scraping content. Faster than Crawl.
|
|
118
|
+
|
|
119
|
+
**Endpoint:** `POST /map`
|
|
120
|
+
|
|
121
|
+
| Parameter | Default | Description |
|
|
122
|
+
|-----------|---------|-------------|
|
|
123
|
+
| `mapUrl` | | Starting URL (required) |
|
|
124
|
+
| `search` | | Search query to rank by relevance |
|
|
125
|
+
| `includeSubdomains` | `true` | Include subdomain URLs |
|
|
126
|
+
| `limit` | `5000` | Max URLs (max: 100,000) |
|
|
127
|
+
| `ignoreQueryParameters` | `true` | Deduplicate by stripping query strings |
|
|
128
|
+
| `ignoreCache` | `false` | Bypass sitemap cache |
|
|
129
|
+
|
|
130
|
+
### 6. Search
|
|
131
|
+
|
|
132
|
+
Web search with optional page scraping.
|
|
133
|
+
|
|
134
|
+
**Endpoint:** `POST /search`
|
|
135
|
+
|
|
136
|
+
| Parameter | Default | Description |
|
|
137
|
+
|-----------|---------|-------------|
|
|
138
|
+
| `searchQuery` | | Keywords, max 500 chars (required) |
|
|
139
|
+
| `limit` | `5` | Results count (1-100) |
|
|
140
|
+
| `country` | `US` | ISO country code |
|
|
141
|
+
| `tbs` | Any Time | Time filter: past hour/day/week/month/year |
|
|
142
|
+
| `formats` | `markdown` | Content format for results |
|
|
143
|
+
| `onlyMainContent` | `true` | Strip boilerplate |
|
|
144
|
+
|
|
145
|
+
### 7. Extract
|
|
146
|
+
|
|
147
|
+
AI-powered structured data extraction using natural language prompts.
|
|
148
|
+
|
|
149
|
+
**Endpoint:** `POST /extract`
|
|
150
|
+
|
|
151
|
+
| Parameter | Default | Description |
|
|
152
|
+
|-----------|---------|-------------|
|
|
153
|
+
| `extractUrls` | | Comma-separated URLs (glob patterns supported: `https://example.com/*`) |
|
|
154
|
+
| `extractPrompt` | | Natural language instruction |
|
|
155
|
+
| `extractSchema` | | Optional JSON Schema for output structure |
|
|
156
|
+
| `extractWaitForCompletion` | `true` | Wait for results (**defaults ON**, unlike Crawl/Batch) |
|
|
157
|
+
| `extractMaxPollTime` | `300` | Max wait in seconds |
|
|
158
|
+
|
|
159
|
+
**Extract Options:**
|
|
160
|
+
|
|
161
|
+
| Parameter | Default | Description |
|
|
162
|
+
|-----------|---------|-------------|
|
|
163
|
+
| `enableWebSearch` | `false` | Use web search for additional data |
|
|
164
|
+
| `showSources` | `false` | Include source URLs |
|
|
165
|
+
|
|
166
|
+
**Example:**
|
|
167
|
+
|
|
168
|
+
```
|
|
169
|
+
Prompt: "Extract company name, phone, address from this page"
|
|
170
|
+
Schema: {
|
|
171
|
+
"type": "object",
|
|
172
|
+
"properties": {
|
|
173
|
+
"company_name": { "type": "string" },
|
|
174
|
+
"phone": { "type": "string" },
|
|
175
|
+
"address": { "type": "string" }
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
### 8. Get Extract Status
|
|
181
|
+
|
|
182
|
+
**Endpoint:** `GET /extract/{extractId}` | **Parameter:** `extractId` (job ID)
|
|
183
|
+
|
|
184
|
+
### 9. Batch Scrape
|
|
185
|
+
|
|
186
|
+
Scrape multiple URLs asynchronously.
|
|
187
|
+
|
|
188
|
+
**Endpoint:** `POST /batch/scrape`
|
|
189
|
+
|
|
190
|
+
| Parameter | Default | Description |
|
|
191
|
+
|-----------|---------|-------------|
|
|
192
|
+
| `batchUrls` | | Comma-separated URLs |
|
|
193
|
+
| `batchWaitForCompletion` | `false` | Wait for all to finish |
|
|
194
|
+
| `batchMaxPollTime` | `300` | Max wait in seconds |
|
|
195
|
+
|
|
196
|
+
Options: `formats`, `onlyMainContent`, `maxConcurrency`.
|
|
197
|
+
|
|
198
|
+
### 10. Get Batch Scrape Status
|
|
199
|
+
|
|
200
|
+
**Endpoint:** `GET /batch/scrape/{batchScrapeId}` | **Parameter:** `batchScrapeId` (job ID)
|
|
201
|
+
|
|
202
|
+
## Technical Notes
|
|
203
|
+
|
|
204
|
+
- **Async operations** (Crawl, Extract, Batch Scrape) return a job ID by default. Enable `waitForCompletion` to get results inline. Polling interval: 2 seconds.
|
|
205
|
+
- **Extract defaults to `waitForCompletion: true`**, while Crawl and Batch Scrape default to `false`.
|
|
206
|
+
- **Scrape supports 10 formats** (including `json`, `summary`, `audio`). Crawl, Search, and Batch Scrape support 5 basic formats.
|
|
207
|
+
- **Comma-separated inputs:** `includeTags`, `excludeTags`, `includePaths`, `excludePaths`, `extractUrls`, `batchUrls` all accept comma-separated lists.
|
|
208
|
+
- **Self-hosted Base URL** must include `/v2` (e.g., `http://firecrawl:3002/v2`).
|
|
209
|
+
- **Error handling:** Supports `continueOnFail`. On error, output is `{ "error": "message" }`.
|
|
210
|
+
|
|
211
|
+
## Workflow Examples
|
|
212
|
+
|
|
213
|
+
**Competitive intelligence:**
|
|
214
|
+
|
|
215
|
+
```
|
|
216
|
+
Schedule Trigger (weekly)
|
|
217
|
+
-> Firecrawl: Map (competitor URL)
|
|
218
|
+
-> Firecrawl: Batch Scrape (URLs from Map)
|
|
219
|
+
-> Code Node (diff with last week)
|
|
220
|
+
-> Google Sheets + Slack notification
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
**AI data extraction:**
|
|
224
|
+
|
|
225
|
+
```
|
|
226
|
+
Manual Trigger
|
|
227
|
+
-> Firecrawl: Extract (directory URL, prompt, schema)
|
|
228
|
+
-> Google Sheets: Append rows
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
**Content monitoring:**
|
|
232
|
+
|
|
233
|
+
```
|
|
234
|
+
Schedule Trigger (daily)
|
|
235
|
+
-> Firecrawl: Scrape (formats: changeTracking)
|
|
236
|
+
-> IF (changes detected) -> Email alert
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
## Compatibility
|
|
240
|
+
|
|
241
|
+
- n8n: >= 1.0.0
|
|
242
|
+
- Firecrawl API: v2
|
|
243
|
+
- Tested with self-hosted Firecrawl and Firecrawl Cloud
|
|
244
|
+
|
|
245
|
+
## About
|
|
246
|
+
|
|
247
|
+
[THE NEXOVA](https://thenexova.com) builds automation infrastructure for businesses. Need custom n8n nodes, self-hosted Firecrawl deployment, or scraping workflow design? [Get in touch](https://thenexova.com/contact/).
|
|
248
|
+
|
|
249
|
+
## License
|
|
250
|
+
|
|
251
|
+
[MIT](LICENSE)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { IAuthenticateGeneric, ICredentialTestRequest, ICredentialType, INodeProperties } from 'n8n-workflow';
|
|
2
|
+
export declare class FirecrawlApi implements ICredentialType {
|
|
3
|
+
name: string;
|
|
4
|
+
displayName: string;
|
|
5
|
+
documentationUrl: string;
|
|
6
|
+
properties: INodeProperties[];
|
|
7
|
+
authenticate: IAuthenticateGeneric;
|
|
8
|
+
test: ICredentialTestRequest;
|
|
9
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.FirecrawlApi = void 0;
|
|
4
|
+
class FirecrawlApi {
|
|
5
|
+
constructor() {
|
|
6
|
+
this.name = 'firecrawlApi';
|
|
7
|
+
this.displayName = 'Firecrawl API';
|
|
8
|
+
this.documentationUrl = 'https://docs.firecrawl.dev';
|
|
9
|
+
this.properties = [
|
|
10
|
+
{
|
|
11
|
+
displayName: 'Base URL',
|
|
12
|
+
name: 'baseUrl',
|
|
13
|
+
type: 'string',
|
|
14
|
+
default: 'https://api.firecrawl.dev/v2',
|
|
15
|
+
placeholder: 'https://api.firecrawl.dev/v2',
|
|
16
|
+
description: 'Base URL including API version. For self-hosted instances, use your server URL (e.g. http://localhost:3002/v2)',
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
displayName: 'API Key',
|
|
20
|
+
name: 'apiKey',
|
|
21
|
+
type: 'string',
|
|
22
|
+
typeOptions: { password: true },
|
|
23
|
+
default: '',
|
|
24
|
+
description: 'Your Firecrawl API key',
|
|
25
|
+
},
|
|
26
|
+
];
|
|
27
|
+
this.authenticate = {
|
|
28
|
+
type: 'generic',
|
|
29
|
+
properties: {
|
|
30
|
+
headers: {
|
|
31
|
+
Authorization: '=Bearer {{$credentials.apiKey}}',
|
|
32
|
+
},
|
|
33
|
+
},
|
|
34
|
+
};
|
|
35
|
+
this.test = {
|
|
36
|
+
request: {
|
|
37
|
+
baseURL: '={{$credentials.baseUrl}}',
|
|
38
|
+
url: '/scrape',
|
|
39
|
+
method: 'POST',
|
|
40
|
+
body: {
|
|
41
|
+
url: 'https://example.com',
|
|
42
|
+
formats: ['markdown'],
|
|
43
|
+
},
|
|
44
|
+
},
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
exports.FirecrawlApi = FirecrawlApi;
|