n8n-nodes-firecrawl-latest 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +19 -0
- package/README.md +232 -0
- package/dist/credentials/FirecrawlApi.credentials.js +22 -0
- package/dist/icons/flames-icon.svg +144 -0
- package/dist/nodes/Firecrawl/FireCrawlScraper.node.js +156 -0
- package/dist/nodes/Firecrawl/resources/batchScrape/batchScrape.methods.js +253 -0
- package/dist/nodes/Firecrawl/resources/batchScrape/batchScrape.properties.js +205 -0
- package/dist/nodes/Firecrawl/resources/crawler/crawler.methods.js +281 -0
- package/dist/nodes/Firecrawl/resources/crawler/crawler.properties.js +313 -0
- package/dist/nodes/Firecrawl/resources/deepResearch/deepResearch.methods.js +171 -0
- package/dist/nodes/Firecrawl/resources/deepResearch/deepResearch.properties.js +200 -0
- package/dist/nodes/Firecrawl/resources/extract/extract.methods.js +424 -0
- package/dist/nodes/Firecrawl/resources/extract/extract.properties.js +339 -0
- package/dist/nodes/Firecrawl/resources/llmsText/llmsText.methods.js +124 -0
- package/dist/nodes/Firecrawl/resources/llmsText/llmsText.properties.js +87 -0
- package/dist/nodes/Firecrawl/resources/map/map.methods.js +52 -0
- package/dist/nodes/Firecrawl/resources/map/map.properties.js +22 -0
- package/dist/nodes/Firecrawl/resources/scrape/scrape.methods.js +203 -0
- package/dist/nodes/Firecrawl/resources/scrape/scrape.properties.js +348 -0
- package/dist/nodes/HttpBin/HttpBin.node.js +59 -0
- package/dist/nodes/HttpBin/HttpVerbDescription.js +246 -0
- package/dist/nodes/HttpBin/httpbin.svg +18 -0
- package/index.js +7 -0
- package/package.json +58 -0
@@ -0,0 +1,424 @@
|
|
1
|
+
"use strict";
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
4
|
+
};
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
6
|
+
exports.extractMethods = void 0;
|
7
|
+
const firecrawl_js_1 = __importDefault(require("@mendable/firecrawl-js"));
|
8
|
+
const n8n_workflow_1 = require("n8n-workflow");
|
9
|
+
// Helper function to parse URLs input
|
10
|
+
function parseUrlsInput(urlsInput) {
|
11
|
+
if (!urlsInput)
|
12
|
+
return [];
|
13
|
+
// Check if input is already a JSON array
|
14
|
+
if (urlsInput.trim().startsWith('[') && urlsInput.trim().endsWith(']')) {
|
15
|
+
try {
|
16
|
+
const parsed = JSON.parse(urlsInput);
|
17
|
+
if (Array.isArray(parsed)) {
|
18
|
+
return parsed.map((url) => url.toString().trim());
|
19
|
+
}
|
20
|
+
}
|
21
|
+
catch (e) {
|
22
|
+
// If JSON parsing fails, continue with other methods
|
23
|
+
}
|
24
|
+
}
|
25
|
+
// Split by comma if it's a comma-separated list
|
26
|
+
if (urlsInput.includes(',')) {
|
27
|
+
return urlsInput.split(',').map((url) => url.trim());
|
28
|
+
}
|
29
|
+
// Single URL case
|
30
|
+
return [urlsInput.trim()];
|
31
|
+
}
|
32
|
+
// Helper function to generate schema from JSON example
|
33
|
+
function generateSchemaFromExample(jsonExample) {
|
34
|
+
if (jsonExample === null) {
|
35
|
+
return { type: 'null' };
|
36
|
+
}
|
37
|
+
if (typeof jsonExample === 'string') {
|
38
|
+
return { type: 'string' };
|
39
|
+
}
|
40
|
+
if (typeof jsonExample === 'number') {
|
41
|
+
return { type: 'number' };
|
42
|
+
}
|
43
|
+
if (typeof jsonExample === 'boolean') {
|
44
|
+
return { type: 'boolean' };
|
45
|
+
}
|
46
|
+
if (Array.isArray(jsonExample)) {
|
47
|
+
if (jsonExample.length === 0) {
|
48
|
+
return {
|
49
|
+
type: 'array',
|
50
|
+
items: { type: 'string' }, // Default to string items for empty arrays
|
51
|
+
};
|
52
|
+
}
|
53
|
+
// Use the first item as a sample for the items schema
|
54
|
+
const itemSchema = generateSchemaFromExample(jsonExample[0]);
|
55
|
+
return {
|
56
|
+
type: 'array',
|
57
|
+
items: itemSchema,
|
58
|
+
};
|
59
|
+
}
|
60
|
+
if (typeof jsonExample === 'object') {
|
61
|
+
const properties = {};
|
62
|
+
for (const [key, value] of Object.entries(jsonExample)) {
|
63
|
+
properties[key] = generateSchemaFromExample(value);
|
64
|
+
}
|
65
|
+
return {
|
66
|
+
type: 'object',
|
67
|
+
properties,
|
68
|
+
required: Object.keys(properties),
|
69
|
+
};
|
70
|
+
}
|
71
|
+
// Default fallback
|
72
|
+
return { type: 'string' };
|
73
|
+
}
|
74
|
+
exports.extractMethods = {
|
75
|
+
async execute() {
|
76
|
+
const items = this.getInputData();
|
77
|
+
const returnData = [];
|
78
|
+
// Get credentials
|
79
|
+
const credentials = await this.getCredentials('firecrawlApi');
|
80
|
+
const apiKey = credentials.apiKey;
|
81
|
+
// Initialize Firecrawl app
|
82
|
+
const firecrawl = new firecrawl_js_1.default({ apiKey });
|
83
|
+
// Process each item
|
84
|
+
for (let i = 0; i < items.length; i++) {
|
85
|
+
try {
|
86
|
+
// Get API version
|
87
|
+
const version = this.getNodeParameter('version', i, 'v1');
|
88
|
+
const operationMode = this.getNodeParameter('operationMode', i);
|
89
|
+
const extractionMethod = this.getNodeParameter('extractionMethod', i);
|
90
|
+
const enableDebugLogs = this.getNodeParameter('enableDebugLogs', i, false);
|
91
|
+
const trackChanges = this.getNodeParameter('trackChanges', i, false);
|
92
|
+
// Get URLs (except for URL-less mode)
|
93
|
+
let urls = [];
|
94
|
+
if (operationMode !== 'urlless') {
|
95
|
+
const urlsInput = this.getNodeParameter('urls', i);
|
96
|
+
urls = parseUrlsInput(urlsInput);
|
97
|
+
if (urls.length === 0) {
|
98
|
+
throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'No valid URLs provided', {
|
99
|
+
itemIndex: i,
|
100
|
+
});
|
101
|
+
}
|
102
|
+
}
|
103
|
+
// Optional parameters
|
104
|
+
const enableWebSearch = this.getNodeParameter('enableWebSearch', i, false);
|
105
|
+
// Create extraction options
|
106
|
+
const extractionOptions = {
|
107
|
+
enableWebSearch,
|
108
|
+
};
|
109
|
+
// Add change tracking options if enabled
|
110
|
+
if (trackChanges) {
|
111
|
+
const changeTrackingModes = this.getNodeParameter('changeTrackingMode', i, ['git-diff']);
|
112
|
+
extractionOptions.changeTracking = {
|
113
|
+
modes: changeTrackingModes,
|
114
|
+
};
|
115
|
+
}
|
116
|
+
// Handle extraction method
|
117
|
+
if (extractionMethod === 'simple') {
|
118
|
+
const extractionPrompt = this.getNodeParameter('extractionPrompt', i, '');
|
119
|
+
const systemPrompt = this.getNodeParameter('systemPrompt', i, '');
|
120
|
+
extractionOptions.prompt = extractionPrompt;
|
121
|
+
if (systemPrompt) {
|
122
|
+
extractionOptions.systemPrompt = systemPrompt;
|
123
|
+
}
|
124
|
+
}
|
125
|
+
else if (extractionMethod === 'schema') {
|
126
|
+
const schemaPrompt = this.getNodeParameter('schemaPrompt', i, '');
|
127
|
+
const systemPrompt = this.getNodeParameter('systemPrompt', i, '');
|
128
|
+
const schemaDefinitionType = this.getNodeParameter('schemaDefinitionType', i);
|
129
|
+
if (schemaPrompt) {
|
130
|
+
extractionOptions.prompt = schemaPrompt;
|
131
|
+
}
|
132
|
+
if (systemPrompt) {
|
133
|
+
extractionOptions.systemPrompt = systemPrompt;
|
134
|
+
}
|
135
|
+
let schema;
|
136
|
+
if (schemaDefinitionType === 'example') {
|
137
|
+
const jsonExample = JSON.parse(this.getNodeParameter('jsonExample', i));
|
138
|
+
schema = generateSchemaFromExample(jsonExample);
|
139
|
+
}
|
140
|
+
else {
|
141
|
+
// Manual schema definition
|
142
|
+
schema = JSON.parse(this.getNodeParameter('schemaDefinition', i));
|
143
|
+
}
|
144
|
+
extractionOptions.schema = schema;
|
145
|
+
}
|
146
|
+
// Add FIRE-1 agent options for v2
|
147
|
+
if (version === 'v2') {
|
148
|
+
const useFire1Agent = this.getNodeParameter('useFire1Agent', i, true);
|
149
|
+
if (useFire1Agent) {
|
150
|
+
extractionOptions.agent = {
|
151
|
+
model: 'FIRE-1',
|
152
|
+
};
|
153
|
+
// Check if page actions are enabled
|
154
|
+
const enablePageActions = this.getNodeParameter('enablePageActions', i, false);
|
155
|
+
if (enablePageActions) {
|
156
|
+
const pageActions = JSON.parse(this.getNodeParameter('pageActions', i, '[]'));
|
157
|
+
extractionOptions.agent.actions = pageActions;
|
158
|
+
}
|
159
|
+
}
|
160
|
+
}
|
161
|
+
// Log the extraction parameters if debug is enabled
|
162
|
+
if (enableDebugLogs) {
|
163
|
+
console.log('URLs:', urls);
|
164
|
+
console.log('Extraction options:', JSON.stringify(extractionOptions, null, 2));
|
165
|
+
}
|
166
|
+
// Handle different operation modes
|
167
|
+
if (operationMode === 'urlless') {
|
168
|
+
// URL-less extraction - use search-based extraction
|
169
|
+
// This fixes the error on line 195 by passing an object instead of using urls parameter
|
170
|
+
const extractionResult = await firecrawl.extract({
|
171
|
+
...extractionOptions,
|
172
|
+
enableWebSearch: true, // Always enable web search for URL-less mode
|
173
|
+
});
|
174
|
+
// Log the results if debug is enabled
|
175
|
+
if (enableDebugLogs) {
|
176
|
+
console.log('Extraction results:', JSON.stringify(extractionResult, null, 2));
|
177
|
+
}
|
178
|
+
// Add results to return data
|
179
|
+
if (!extractionResult.success && 'error' in extractionResult) {
|
180
|
+
returnData.push({
|
181
|
+
json: {
|
182
|
+
success: false,
|
183
|
+
error: extractionResult.error,
|
184
|
+
debug: enableDebugLogs ? { options: extractionOptions } : undefined,
|
185
|
+
},
|
186
|
+
});
|
187
|
+
}
|
188
|
+
else {
|
189
|
+
returnData.push({
|
190
|
+
json: {
|
191
|
+
success: true,
|
192
|
+
data: 'data' in extractionResult ? extractionResult.data : undefined,
|
193
|
+
status: 'status' in extractionResult ? extractionResult.status : 'completed',
|
194
|
+
expiresAt: 'expiresAt' in extractionResult ? extractionResult.expiresAt : undefined,
|
195
|
+
debug: enableDebugLogs ? { options: extractionOptions } : undefined,
|
196
|
+
},
|
197
|
+
});
|
198
|
+
}
|
199
|
+
}
|
200
|
+
else if (operationMode === 'single' || urls.length === 1) {
|
201
|
+
// Check if using async mode
|
202
|
+
const useAsyncMode = this.getNodeParameter('useAsyncMode', i, false);
|
203
|
+
if (useAsyncMode) {
|
204
|
+
// Get job ID if provided
|
205
|
+
const jobId = this.getNodeParameter('jobId', i, '');
|
206
|
+
if (jobId && jobId.trim() !== '') {
|
207
|
+
// Check status of an existing job
|
208
|
+
const status = await firecrawl.getExtractStatus(jobId);
|
209
|
+
if (!status.success && 'error' in status) {
|
210
|
+
returnData.push({
|
211
|
+
json: {
|
212
|
+
success: false,
|
213
|
+
error: status.error,
|
214
|
+
jobId,
|
215
|
+
debug: enableDebugLogs ? { jobId } : undefined,
|
216
|
+
},
|
217
|
+
});
|
218
|
+
}
|
219
|
+
else {
|
220
|
+
returnData.push({
|
221
|
+
json: {
|
222
|
+
success: true,
|
223
|
+
status: 'status' in status ? status.status : 'unknown',
|
224
|
+
progress: 'progress' in status ? status.progress : undefined,
|
225
|
+
results: 'results' in status ? status.results : undefined,
|
226
|
+
jobId,
|
227
|
+
debug: enableDebugLogs ? { jobId } : undefined,
|
228
|
+
},
|
229
|
+
});
|
230
|
+
}
|
231
|
+
}
|
232
|
+
else {
|
233
|
+
// Start a new asynchronous job
|
234
|
+
// Use the appropriate endpoint based on version
|
235
|
+
let extractJob;
|
236
|
+
if (version === 'v2') {
|
237
|
+
extractJob = await firecrawl.asyncExtract(urls, {
|
238
|
+
...extractionOptions,
|
239
|
+
v2: true,
|
240
|
+
});
|
241
|
+
}
|
242
|
+
else {
|
243
|
+
extractJob = await firecrawl.asyncExtract(urls, extractionOptions);
|
244
|
+
}
|
245
|
+
if (!extractJob.success && 'error' in extractJob) {
|
246
|
+
returnData.push({
|
247
|
+
json: {
|
248
|
+
success: false,
|
249
|
+
error: extractJob.error,
|
250
|
+
debug: enableDebugLogs ? { urls, options: extractionOptions } : undefined,
|
251
|
+
},
|
252
|
+
});
|
253
|
+
}
|
254
|
+
else {
|
255
|
+
returnData.push({
|
256
|
+
json: {
|
257
|
+
success: true,
|
258
|
+
status: 'started',
|
259
|
+
jobId: 'jobId' in extractJob ? extractJob.jobId : undefined,
|
260
|
+
message: 'Extraction started successfully. Use the job ID to check status.',
|
261
|
+
debug: enableDebugLogs ? { urls, options: extractionOptions } : undefined,
|
262
|
+
},
|
263
|
+
});
|
264
|
+
}
|
265
|
+
}
|
266
|
+
}
|
267
|
+
else {
|
268
|
+
// Synchronous extraction - wait for the result
|
269
|
+
// Use the appropriate endpoint based on version
|
270
|
+
let extractionResult;
|
271
|
+
if (version === 'v2') {
|
272
|
+
extractionResult = await firecrawl.extract(urls, {
|
273
|
+
...extractionOptions,
|
274
|
+
v2: true,
|
275
|
+
});
|
276
|
+
}
|
277
|
+
else {
|
278
|
+
extractionResult = await firecrawl.extract(urls, extractionOptions);
|
279
|
+
}
|
280
|
+
if (!extractionResult.success && 'error' in extractionResult) {
|
281
|
+
returnData.push({
|
282
|
+
json: {
|
283
|
+
success: false,
|
284
|
+
error: extractionResult.error,
|
285
|
+
debug: enableDebugLogs ? { urls, options: extractionOptions } : undefined,
|
286
|
+
},
|
287
|
+
});
|
288
|
+
}
|
289
|
+
else {
|
290
|
+
returnData.push({
|
291
|
+
json: {
|
292
|
+
success: true,
|
293
|
+
data: 'data' in extractionResult ? extractionResult.data : undefined,
|
294
|
+
status: 'status' in extractionResult ? extractionResult.status : 'completed',
|
295
|
+
expiresAt: 'expiresAt' in extractionResult ? extractionResult.expiresAt : undefined,
|
296
|
+
debug: enableDebugLogs ? { urls, options: extractionOptions } : undefined,
|
297
|
+
},
|
298
|
+
});
|
299
|
+
}
|
300
|
+
}
|
301
|
+
}
|
302
|
+
else {
|
303
|
+
// Batch mode - check if async or sync is needed
|
304
|
+
const useAsyncMode = this.getNodeParameter('useAsyncMode', i, false);
|
305
|
+
if (useAsyncMode) {
|
306
|
+
// Get job ID if provided
|
307
|
+
const jobId = this.getNodeParameter('jobId', i, '');
|
308
|
+
if (jobId && jobId.trim() !== '') {
|
309
|
+
// Check status of an existing job
|
310
|
+
const status = await firecrawl.getExtractStatus(jobId);
|
311
|
+
if (!status.success && 'error' in status) {
|
312
|
+
returnData.push({
|
313
|
+
json: {
|
314
|
+
success: false,
|
315
|
+
error: status.error,
|
316
|
+
jobId,
|
317
|
+
debug: enableDebugLogs ? { jobId } : undefined,
|
318
|
+
},
|
319
|
+
});
|
320
|
+
}
|
321
|
+
else {
|
322
|
+
returnData.push({
|
323
|
+
json: {
|
324
|
+
success: true,
|
325
|
+
status: 'status' in status ? status.status : 'unknown',
|
326
|
+
progress: 'progress' in status ? status.progress : undefined,
|
327
|
+
results: 'results' in status ? status.results : undefined,
|
328
|
+
jobId,
|
329
|
+
debug: enableDebugLogs ? { jobId } : undefined,
|
330
|
+
},
|
331
|
+
});
|
332
|
+
}
|
333
|
+
}
|
334
|
+
else {
|
335
|
+
// Start a new asynchronous job
|
336
|
+
// Use the appropriate endpoint based on version
|
337
|
+
let extractJob;
|
338
|
+
if (version === 'v2') {
|
339
|
+
extractJob = await firecrawl.asyncExtract(urls, {
|
340
|
+
...extractionOptions,
|
341
|
+
v2: true,
|
342
|
+
});
|
343
|
+
}
|
344
|
+
else {
|
345
|
+
extractJob = await firecrawl.asyncExtract(urls, extractionOptions);
|
346
|
+
}
|
347
|
+
if (!extractJob.success && 'error' in extractJob) {
|
348
|
+
returnData.push({
|
349
|
+
json: {
|
350
|
+
success: false,
|
351
|
+
error: extractJob.error,
|
352
|
+
debug: enableDebugLogs ? { urls, options: extractionOptions } : undefined,
|
353
|
+
},
|
354
|
+
});
|
355
|
+
}
|
356
|
+
else {
|
357
|
+
returnData.push({
|
358
|
+
json: {
|
359
|
+
success: true,
|
360
|
+
status: 'started',
|
361
|
+
jobId: 'jobId' in extractJob ? extractJob.jobId : undefined,
|
362
|
+
message: 'Extraction started successfully. Use the job ID to check status.',
|
363
|
+
debug: enableDebugLogs ? { urls, options: extractionOptions } : undefined,
|
364
|
+
},
|
365
|
+
});
|
366
|
+
}
|
367
|
+
}
|
368
|
+
}
|
369
|
+
else {
|
370
|
+
// Synchronous batch extraction - wait for all results
|
371
|
+
// Use the appropriate endpoint based on version
|
372
|
+
let extractionResult;
|
373
|
+
if (version === 'v2') {
|
374
|
+
extractionResult = await firecrawl.extract(urls, {
|
375
|
+
...extractionOptions,
|
376
|
+
v2: true,
|
377
|
+
});
|
378
|
+
}
|
379
|
+
else {
|
380
|
+
extractionResult = await firecrawl.extract(urls, extractionOptions);
|
381
|
+
}
|
382
|
+
if (!extractionResult.success && 'error' in extractionResult) {
|
383
|
+
returnData.push({
|
384
|
+
json: {
|
385
|
+
success: false,
|
386
|
+
error: extractionResult.error,
|
387
|
+
debug: enableDebugLogs ? { urls, options: extractionOptions } : undefined,
|
388
|
+
},
|
389
|
+
});
|
390
|
+
}
|
391
|
+
else {
|
392
|
+
returnData.push({
|
393
|
+
json: {
|
394
|
+
success: true,
|
395
|
+
data: 'data' in extractionResult ? extractionResult.data : undefined,
|
396
|
+
status: 'status' in extractionResult ? extractionResult.status : 'completed',
|
397
|
+
expiresAt: 'expiresAt' in extractionResult ? extractionResult.expiresAt : undefined,
|
398
|
+
debug: enableDebugLogs ? { urls, options: extractionOptions } : undefined,
|
399
|
+
},
|
400
|
+
});
|
401
|
+
}
|
402
|
+
}
|
403
|
+
}
|
404
|
+
}
|
405
|
+
catch (error) {
|
406
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
407
|
+
console.error('Extraction error:', errorMessage);
|
408
|
+
if (this.continueOnFail()) {
|
409
|
+
returnData.push({
|
410
|
+
json: {
|
411
|
+
success: false,
|
412
|
+
error: errorMessage,
|
413
|
+
},
|
414
|
+
});
|
415
|
+
continue;
|
416
|
+
}
|
417
|
+
throw new n8n_workflow_1.NodeOperationError(this.getNode(), error, {
|
418
|
+
itemIndex: i,
|
419
|
+
});
|
420
|
+
}
|
421
|
+
}
|
422
|
+
return [returnData];
|
423
|
+
},
|
424
|
+
};
|