n8n-nodes-firecrawl-latest 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +19 -0
- package/README.md +232 -0
- package/dist/credentials/FirecrawlApi.credentials.js +22 -0
- package/dist/icons/flames-icon.svg +144 -0
- package/dist/nodes/Firecrawl/FireCrawlScraper.node.js +156 -0
- package/dist/nodes/Firecrawl/resources/batchScrape/batchScrape.methods.js +253 -0
- package/dist/nodes/Firecrawl/resources/batchScrape/batchScrape.properties.js +205 -0
- package/dist/nodes/Firecrawl/resources/crawler/crawler.methods.js +281 -0
- package/dist/nodes/Firecrawl/resources/crawler/crawler.properties.js +313 -0
- package/dist/nodes/Firecrawl/resources/deepResearch/deepResearch.methods.js +171 -0
- package/dist/nodes/Firecrawl/resources/deepResearch/deepResearch.properties.js +200 -0
- package/dist/nodes/Firecrawl/resources/extract/extract.methods.js +424 -0
- package/dist/nodes/Firecrawl/resources/extract/extract.properties.js +339 -0
- package/dist/nodes/Firecrawl/resources/llmsText/llmsText.methods.js +124 -0
- package/dist/nodes/Firecrawl/resources/llmsText/llmsText.properties.js +87 -0
- package/dist/nodes/Firecrawl/resources/map/map.methods.js +52 -0
- package/dist/nodes/Firecrawl/resources/map/map.properties.js +22 -0
- package/dist/nodes/Firecrawl/resources/scrape/scrape.methods.js +203 -0
- package/dist/nodes/Firecrawl/resources/scrape/scrape.properties.js +348 -0
- package/dist/nodes/HttpBin/HttpBin.node.js +59 -0
- package/dist/nodes/HttpBin/HttpVerbDescription.js +246 -0
- package/dist/nodes/HttpBin/httpbin.svg +18 -0
- package/index.js +7 -0
- package/package.json +58 -0
@@ -0,0 +1,253 @@
|
|
1
|
+
"use strict";
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
4
|
+
};
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
6
|
+
exports.batchScrapeMethods = void 0;
|
7
|
+
const firecrawl_js_1 = __importDefault(require("@mendable/firecrawl-js"));
|
8
|
+
const n8n_workflow_1 = require("n8n-workflow");
|
9
|
+
// Helper function to parse URLs input
|
10
|
+
function parseUrlsInput(urlsInput) {
|
11
|
+
if (!urlsInput)
|
12
|
+
return [];
|
13
|
+
// Check if input is already a JSON array
|
14
|
+
if (urlsInput.trim().startsWith('[') && urlsInput.trim().endsWith(']')) {
|
15
|
+
try {
|
16
|
+
const parsed = JSON.parse(urlsInput);
|
17
|
+
if (Array.isArray(parsed)) {
|
18
|
+
return parsed.map((url) => url.toString().trim());
|
19
|
+
}
|
20
|
+
}
|
21
|
+
catch (e) {
|
22
|
+
// If JSON parsing fails, continue with other methods
|
23
|
+
}
|
24
|
+
}
|
25
|
+
// Split by comma if it's a comma-separated list
|
26
|
+
if (urlsInput.includes(',')) {
|
27
|
+
return urlsInput.split(',').map((url) => url.trim());
|
28
|
+
}
|
29
|
+
// Single URL case
|
30
|
+
return [urlsInput.trim()];
|
31
|
+
}
|
32
|
+
// Helper function to generate schema from JSON example
|
33
|
+
function generateSchemaFromExample(jsonExample) {
|
34
|
+
if (jsonExample === null) {
|
35
|
+
return { type: 'null' };
|
36
|
+
}
|
37
|
+
if (typeof jsonExample === 'string') {
|
38
|
+
return { type: 'string' };
|
39
|
+
}
|
40
|
+
if (typeof jsonExample === 'number') {
|
41
|
+
return { type: 'number' };
|
42
|
+
}
|
43
|
+
if (typeof jsonExample === 'boolean') {
|
44
|
+
return { type: 'boolean' };
|
45
|
+
}
|
46
|
+
if (Array.isArray(jsonExample)) {
|
47
|
+
if (jsonExample.length === 0) {
|
48
|
+
return {
|
49
|
+
type: 'array',
|
50
|
+
items: { type: 'string' }, // Default to string items for empty arrays
|
51
|
+
};
|
52
|
+
}
|
53
|
+
// Use the first item as a sample for the items schema
|
54
|
+
const itemSchema = generateSchemaFromExample(jsonExample[0]);
|
55
|
+
return {
|
56
|
+
type: 'array',
|
57
|
+
items: itemSchema,
|
58
|
+
};
|
59
|
+
}
|
60
|
+
if (typeof jsonExample === 'object') {
|
61
|
+
const properties = {};
|
62
|
+
for (const [key, value] of Object.entries(jsonExample)) {
|
63
|
+
properties[key] = generateSchemaFromExample(value);
|
64
|
+
}
|
65
|
+
return {
|
66
|
+
type: 'object',
|
67
|
+
properties,
|
68
|
+
required: Object.keys(properties),
|
69
|
+
};
|
70
|
+
}
|
71
|
+
// Default fallback
|
72
|
+
return { type: 'string' };
|
73
|
+
}
|
74
|
+
exports.batchScrapeMethods = {
|
75
|
+
async execute() {
|
76
|
+
const items = this.getInputData();
|
77
|
+
const returnData = [];
|
78
|
+
// Get credentials
|
79
|
+
const credentials = await this.getCredentials('firecrawlApi');
|
80
|
+
const apiKey = credentials.apiKey;
|
81
|
+
// Initialize Firecrawl app
|
82
|
+
const firecrawl = new firecrawl_js_1.default({ apiKey });
|
83
|
+
// Process each item
|
84
|
+
for (let i = 0; i < items.length; i++) {
|
85
|
+
try {
|
86
|
+
// Get parameters
|
87
|
+
const urlsInput = this.getNodeParameter('urls', i);
|
88
|
+
const urls = parseUrlsInput(urlsInput);
|
89
|
+
const outputFormats = this.getNodeParameter('outputFormats', i);
|
90
|
+
const operationMode = this.getNodeParameter('operationMode', i);
|
91
|
+
const includeExtract = this.getNodeParameter('includeExtract', i, false);
|
92
|
+
const enableDebugLogs = this.getNodeParameter('enableDebugLogs', i, false);
|
93
|
+
if (urls.length === 0) {
|
94
|
+
throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'No valid URLs provided', {
|
95
|
+
itemIndex: i,
|
96
|
+
});
|
97
|
+
}
|
98
|
+
// Prepare the options for batch scraping
|
99
|
+
const options = {
|
100
|
+
formats: outputFormats,
|
101
|
+
};
|
102
|
+
// Add extraction if needed
|
103
|
+
if (includeExtract) {
|
104
|
+
const extractionPrompt = this.getNodeParameter('extractionPrompt', i);
|
105
|
+
const schemaDefinitionType = this.getNodeParameter('schemaDefinitionType', i);
|
106
|
+
options.formats.push('extract');
|
107
|
+
options.extract = {
|
108
|
+
prompt: extractionPrompt,
|
109
|
+
};
|
110
|
+
// Add schema if selected
|
111
|
+
if (schemaDefinitionType === 'example') {
|
112
|
+
const jsonExample = JSON.parse(this.getNodeParameter('jsonExample', i));
|
113
|
+
const schema = generateSchemaFromExample(jsonExample);
|
114
|
+
options.extract.schema = schema;
|
115
|
+
}
|
116
|
+
else if (schemaDefinitionType === 'manual') {
|
117
|
+
const schema = JSON.parse(this.getNodeParameter('schemaDefinition', i));
|
118
|
+
options.extract.schema = schema;
|
119
|
+
}
|
120
|
+
}
|
121
|
+
// Log the batch scrape parameters if debug is enabled
|
122
|
+
if (enableDebugLogs) {
|
123
|
+
console.log('URLs:', urls);
|
124
|
+
console.log('Options:', JSON.stringify(options, null, 2));
|
125
|
+
}
|
126
|
+
// Handle different operation modes
|
127
|
+
if (operationMode === 'sync') {
|
128
|
+
// Synchronous mode - wait for the scraping to complete
|
129
|
+
const results = await firecrawl.batchScrapeUrls(urls, options);
|
130
|
+
// Log the results if debug is enabled
|
131
|
+
if (enableDebugLogs) {
|
132
|
+
console.log('Batch Scrape Results:', JSON.stringify(results, null, 2));
|
133
|
+
}
|
134
|
+
if (!results.success && 'error' in results) {
|
135
|
+
returnData.push({
|
136
|
+
json: {
|
137
|
+
success: results.success,
|
138
|
+
status: 'error',
|
139
|
+
error: results.error,
|
140
|
+
debug: enableDebugLogs ? { urls, options } : undefined,
|
141
|
+
},
|
142
|
+
});
|
143
|
+
}
|
144
|
+
else {
|
145
|
+
returnData.push({
|
146
|
+
json: {
|
147
|
+
success: results.success,
|
148
|
+
status: 'completed',
|
149
|
+
data: 'data' in results ? results.data : undefined,
|
150
|
+
creditsUsed: 'creditsUsed' in results ? results.creditsUsed : undefined,
|
151
|
+
total: 'total' in results ? results.total : undefined,
|
152
|
+
completed: 'completed' in results ? results.completed : undefined,
|
153
|
+
error: 'error' in results ? results.error : undefined,
|
154
|
+
debug: enableDebugLogs ? { urls, options } : undefined,
|
155
|
+
},
|
156
|
+
});
|
157
|
+
}
|
158
|
+
}
|
159
|
+
else {
|
160
|
+
// Asynchronous mode - start the process or check status
|
161
|
+
const jobId = this.getNodeParameter('jobId', i, '');
|
162
|
+
if (jobId && jobId.trim() !== '') {
|
163
|
+
// Check status of an existing job
|
164
|
+
const status = await firecrawl.checkBatchScrapeStatus(jobId);
|
165
|
+
// Log the status if debug is enabled
|
166
|
+
if (enableDebugLogs) {
|
167
|
+
console.log('Batch Scrape Status:', JSON.stringify(status, null, 2));
|
168
|
+
}
|
169
|
+
if (!status.success && 'error' in status) {
|
170
|
+
// It's an error response
|
171
|
+
returnData.push({
|
172
|
+
json: {
|
173
|
+
success: status.success,
|
174
|
+
status: 'error',
|
175
|
+
error: status.error,
|
176
|
+
jobId,
|
177
|
+
debug: enableDebugLogs ? { jobId } : undefined,
|
178
|
+
},
|
179
|
+
});
|
180
|
+
}
|
181
|
+
else {
|
182
|
+
// It's a successful status response
|
183
|
+
returnData.push({
|
184
|
+
json: {
|
185
|
+
success: status.success,
|
186
|
+
status: 'status' in status ? status.status : 'unknown',
|
187
|
+
data: 'data' in status ? status.data : undefined,
|
188
|
+
creditsUsed: 'creditsUsed' in status ? status.creditsUsed : undefined,
|
189
|
+
total: 'total' in status ? status.total : undefined,
|
190
|
+
completed: 'completed' in status ? status.completed : undefined,
|
191
|
+
expiresAt: 'expiresAt' in status ? status.expiresAt : undefined,
|
192
|
+
next: 'next' in status ? status.next : undefined,
|
193
|
+
error: 'error' in status ? status.error : undefined,
|
194
|
+
jobId,
|
195
|
+
debug: enableDebugLogs ? { jobId } : undefined,
|
196
|
+
},
|
197
|
+
});
|
198
|
+
}
|
199
|
+
}
|
200
|
+
else {
|
201
|
+
// Start a new asynchronous job
|
202
|
+
const job = await firecrawl.asyncBatchScrapeUrls(urls, options);
|
203
|
+
// Log the job if debug is enabled
|
204
|
+
if (enableDebugLogs) {
|
205
|
+
console.log('Batch Scrape Job:', JSON.stringify(job, null, 2));
|
206
|
+
}
|
207
|
+
if (!job.success && 'error' in job) {
|
208
|
+
// It's an error response
|
209
|
+
returnData.push({
|
210
|
+
json: {
|
211
|
+
success: job.success,
|
212
|
+
status: 'error',
|
213
|
+
error: job.error,
|
214
|
+
debug: enableDebugLogs ? { urls, options } : undefined,
|
215
|
+
},
|
216
|
+
});
|
217
|
+
}
|
218
|
+
else {
|
219
|
+
// It's a successful job response
|
220
|
+
returnData.push({
|
221
|
+
json: {
|
222
|
+
success: job.success,
|
223
|
+
status: 'started',
|
224
|
+
jobId: 'id' in job ? job.id : undefined,
|
225
|
+
url: 'url' in job ? job.url : undefined,
|
226
|
+
message: 'Batch scraping started successfully. Use the job ID to check status.',
|
227
|
+
debug: enableDebugLogs ? { urls, options } : undefined,
|
228
|
+
},
|
229
|
+
});
|
230
|
+
}
|
231
|
+
}
|
232
|
+
}
|
233
|
+
}
|
234
|
+
catch (error) {
|
235
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
236
|
+
console.error('Batch Scrape error:', errorMessage);
|
237
|
+
if (this.continueOnFail()) {
|
238
|
+
returnData.push({
|
239
|
+
json: {
|
240
|
+
success: false,
|
241
|
+
error: errorMessage,
|
242
|
+
},
|
243
|
+
});
|
244
|
+
continue;
|
245
|
+
}
|
246
|
+
throw new n8n_workflow_1.NodeOperationError(this.getNode(), error, {
|
247
|
+
itemIndex: i,
|
248
|
+
});
|
249
|
+
}
|
250
|
+
}
|
251
|
+
return [returnData];
|
252
|
+
},
|
253
|
+
};
|
@@ -0,0 +1,205 @@
|
|
1
|
+
"use strict";
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
3
|
+
exports.batchScrapeProperties = void 0;
|
4
|
+
// Fields for the Batch Scrape resource
|
5
|
+
const batchScrapeFields = [
|
6
|
+
{
|
7
|
+
displayName: 'URLs',
|
8
|
+
name: 'urls',
|
9
|
+
type: 'string',
|
10
|
+
displayOptions: {
|
11
|
+
show: {
|
12
|
+
resource: ['batchScrape'],
|
13
|
+
},
|
14
|
+
},
|
15
|
+
default: '',
|
16
|
+
required: true,
|
17
|
+
description: 'The URLs to scrape. For multiple URLs, provide a comma-separated list or a JSON array in string format.',
|
18
|
+
placeholder: 'https://example.com, https://example.org or ["https://example.com", "https://example.org"]',
|
19
|
+
},
|
20
|
+
{
|
21
|
+
displayName: 'Output Formats',
|
22
|
+
name: 'outputFormats',
|
23
|
+
type: 'multiOptions',
|
24
|
+
displayOptions: {
|
25
|
+
show: {
|
26
|
+
resource: ['batchScrape'],
|
27
|
+
},
|
28
|
+
},
|
29
|
+
options: [
|
30
|
+
{
|
31
|
+
name: 'Full Page Screenshot',
|
32
|
+
value: 'screenshot@fullPage',
|
33
|
+
description: 'Return a screenshot of the entire page',
|
34
|
+
},
|
35
|
+
{
|
36
|
+
name: 'HTML',
|
37
|
+
value: 'html',
|
38
|
+
description: 'Return the content in HTML format (with some cleaning)',
|
39
|
+
},
|
40
|
+
{
|
41
|
+
name: 'Links',
|
42
|
+
value: 'links',
|
43
|
+
description: 'Return a list of links found on the page',
|
44
|
+
},
|
45
|
+
{
|
46
|
+
name: 'Markdown',
|
47
|
+
value: 'markdown',
|
48
|
+
description: 'Return the content in Markdown format',
|
49
|
+
},
|
50
|
+
{
|
51
|
+
name: 'Raw HTML',
|
52
|
+
value: 'rawHtml',
|
53
|
+
description: 'Return the raw HTML content with no modifications',
|
54
|
+
},
|
55
|
+
{
|
56
|
+
name: 'Screenshot',
|
57
|
+
value: 'screenshot',
|
58
|
+
description: 'Return a screenshot of the visible part of the page',
|
59
|
+
},
|
60
|
+
],
|
61
|
+
default: ['markdown'],
|
62
|
+
description: 'The formats in which to return the scraped content',
|
63
|
+
},
|
64
|
+
{
|
65
|
+
displayName: 'Include Extract',
|
66
|
+
name: 'includeExtract',
|
67
|
+
type: 'boolean',
|
68
|
+
displayOptions: {
|
69
|
+
show: {
|
70
|
+
resource: ['batchScrape'],
|
71
|
+
},
|
72
|
+
},
|
73
|
+
default: false,
|
74
|
+
description: 'Whether to include structured data extraction with the batch scrape',
|
75
|
+
},
|
76
|
+
{
|
77
|
+
displayName: 'Extraction Prompt',
|
78
|
+
name: 'extractionPrompt',
|
79
|
+
type: 'string',
|
80
|
+
displayOptions: {
|
81
|
+
show: {
|
82
|
+
resource: ['batchScrape'],
|
83
|
+
includeExtract: [true],
|
84
|
+
},
|
85
|
+
},
|
86
|
+
default: '',
|
87
|
+
required: true,
|
88
|
+
description: 'The prompt to guide the extraction process',
|
89
|
+
placeholder: 'Extract the title and description from each page',
|
90
|
+
},
|
91
|
+
{
|
92
|
+
displayName: 'Schema Definition Type',
|
93
|
+
name: 'schemaDefinitionType',
|
94
|
+
type: 'options',
|
95
|
+
displayOptions: {
|
96
|
+
show: {
|
97
|
+
resource: ['batchScrape'],
|
98
|
+
includeExtract: [true],
|
99
|
+
},
|
100
|
+
},
|
101
|
+
options: [
|
102
|
+
{
|
103
|
+
name: 'Generate From JSON Example',
|
104
|
+
value: 'example',
|
105
|
+
description: 'Generate schema from a JSON example',
|
106
|
+
},
|
107
|
+
{
|
108
|
+
name: 'Define Below',
|
109
|
+
value: 'manual',
|
110
|
+
description: 'Define schema manually in JSON Schema format',
|
111
|
+
},
|
112
|
+
],
|
113
|
+
default: 'manual',
|
114
|
+
description: 'How to define the schema for extraction',
|
115
|
+
},
|
116
|
+
{
|
117
|
+
displayName: 'JSON Example',
|
118
|
+
name: 'jsonExample',
|
119
|
+
type: 'json',
|
120
|
+
typeOptions: {
|
121
|
+
alwaysOpenEditWindow: true,
|
122
|
+
rows: 8,
|
123
|
+
},
|
124
|
+
displayOptions: {
|
125
|
+
show: {
|
126
|
+
resource: ['batchScrape'],
|
127
|
+
includeExtract: [true],
|
128
|
+
schemaDefinitionType: ['example'],
|
129
|
+
},
|
130
|
+
},
|
131
|
+
default: '{\n "title": "Example Page Title",\n "description": "This is an example page description"\n}',
|
132
|
+
description: 'A JSON example that represents the data structure you want to extract',
|
133
|
+
hint: 'Provide a JSON object that represents the structure you want to extract',
|
134
|
+
},
|
135
|
+
{
|
136
|
+
displayName: 'Schema Definition',
|
137
|
+
name: 'schemaDefinition',
|
138
|
+
type: 'json',
|
139
|
+
typeOptions: {
|
140
|
+
alwaysOpenEditWindow: true,
|
141
|
+
rows: 12,
|
142
|
+
},
|
143
|
+
displayOptions: {
|
144
|
+
show: {
|
145
|
+
resource: ['batchScrape'],
|
146
|
+
includeExtract: [true],
|
147
|
+
schemaDefinitionType: ['manual'],
|
148
|
+
},
|
149
|
+
},
|
150
|
+
default: '{\n "type": "object",\n "properties": {\n "title": {\n "type": "string",\n "description": "The title of the page"\n },\n "description": {\n "type": "string",\n "description": "The meta description or summary of the page"\n }\n },\n "required": ["title", "description"]\n}',
|
151
|
+
description: 'The schema definition in standard JSON Schema format. Define the structure you want to extract.',
|
152
|
+
hint: 'Use standard JSON Schema format with "type", "properties", and optional "required" fields',
|
153
|
+
},
|
154
|
+
{
|
155
|
+
displayName: 'Operation Mode',
|
156
|
+
name: 'operationMode',
|
157
|
+
type: 'options',
|
158
|
+
displayOptions: {
|
159
|
+
show: {
|
160
|
+
resource: ['batchScrape'],
|
161
|
+
},
|
162
|
+
},
|
163
|
+
options: [
|
164
|
+
{
|
165
|
+
name: 'Synchronous',
|
166
|
+
value: 'sync',
|
167
|
+
description: 'Wait for the scraping to complete (suitable for smaller batches)',
|
168
|
+
},
|
169
|
+
{
|
170
|
+
name: 'Asynchronous',
|
171
|
+
value: 'async',
|
172
|
+
description: 'Start the scraping process and return a job ID (suitable for larger batches)',
|
173
|
+
},
|
174
|
+
],
|
175
|
+
default: 'sync',
|
176
|
+
description: 'Whether to wait for the scraping to complete or just start the process',
|
177
|
+
},
|
178
|
+
{
|
179
|
+
displayName: 'Job ID',
|
180
|
+
name: 'jobId',
|
181
|
+
type: 'string',
|
182
|
+
displayOptions: {
|
183
|
+
show: {
|
184
|
+
resource: ['batchScrape'],
|
185
|
+
operationMode: ['async'],
|
186
|
+
},
|
187
|
+
},
|
188
|
+
default: '',
|
189
|
+
description: 'Job ID for checking the status of an existing batch scrape (leave empty to start a new job)',
|
190
|
+
},
|
191
|
+
{
|
192
|
+
displayName: 'Enable Debug Logs',
|
193
|
+
name: 'enableDebugLogs',
|
194
|
+
type: 'boolean',
|
195
|
+
displayOptions: {
|
196
|
+
show: {
|
197
|
+
resource: ['batchScrape'],
|
198
|
+
},
|
199
|
+
},
|
200
|
+
default: false,
|
201
|
+
description: 'Whether to enable debug logs in the output',
|
202
|
+
},
|
203
|
+
];
|
204
|
+
// Export all properties for the Batch Scrape resource
|
205
|
+
exports.batchScrapeProperties = [...batchScrapeFields];
|