firecrawl 1.11.2 → 1.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +116 -20
- package/dist/index.d.cts +14 -0
- package/dist/index.d.ts +14 -0
- package/dist/index.js +116 -20
- package/package.json +1 -1
- package/src/index.js +1002 -0
- package/src/index.ts +137 -22
package/dist/index.cjs
CHANGED
|
@@ -264,23 +264,35 @@ var FirecrawlApp = class {
|
|
|
264
264
|
if ("data" in statusData) {
|
|
265
265
|
let data = statusData.data;
|
|
266
266
|
while (typeof statusData === "object" && "next" in statusData) {
|
|
267
|
+
if (data.length === 0) {
|
|
268
|
+
break;
|
|
269
|
+
}
|
|
267
270
|
statusData = (await this.getRequest(statusData.next, headers)).data;
|
|
268
271
|
data = data.concat(statusData.data);
|
|
269
272
|
}
|
|
270
273
|
allData = data;
|
|
271
274
|
}
|
|
272
275
|
}
|
|
273
|
-
|
|
276
|
+
let resp = {
|
|
274
277
|
success: response.data.success,
|
|
275
278
|
status: response.data.status,
|
|
276
279
|
total: response.data.total,
|
|
277
280
|
completed: response.data.completed,
|
|
278
281
|
creditsUsed: response.data.creditsUsed,
|
|
279
282
|
expiresAt: new Date(response.data.expiresAt),
|
|
280
|
-
|
|
281
|
-
data: allData,
|
|
282
|
-
error: response.data.error
|
|
283
|
+
data: allData
|
|
283
284
|
};
|
|
285
|
+
if (!response.data.success && response.data.error) {
|
|
286
|
+
resp = {
|
|
287
|
+
...resp,
|
|
288
|
+
success: false,
|
|
289
|
+
error: response.data.error
|
|
290
|
+
};
|
|
291
|
+
}
|
|
292
|
+
if (response.data.next) {
|
|
293
|
+
resp.next = response.data.next;
|
|
294
|
+
}
|
|
295
|
+
return resp;
|
|
284
296
|
} else {
|
|
285
297
|
this.handleError(response, "check crawl status");
|
|
286
298
|
}
|
|
@@ -459,23 +471,35 @@ var FirecrawlApp = class {
|
|
|
459
471
|
if ("data" in statusData) {
|
|
460
472
|
let data = statusData.data;
|
|
461
473
|
while (typeof statusData === "object" && "next" in statusData) {
|
|
474
|
+
if (data.length === 0) {
|
|
475
|
+
break;
|
|
476
|
+
}
|
|
462
477
|
statusData = (await this.getRequest(statusData.next, headers)).data;
|
|
463
478
|
data = data.concat(statusData.data);
|
|
464
479
|
}
|
|
465
480
|
allData = data;
|
|
466
481
|
}
|
|
467
482
|
}
|
|
468
|
-
|
|
483
|
+
let resp = {
|
|
469
484
|
success: response.data.success,
|
|
470
485
|
status: response.data.status,
|
|
471
486
|
total: response.data.total,
|
|
472
487
|
completed: response.data.completed,
|
|
473
488
|
creditsUsed: response.data.creditsUsed,
|
|
474
489
|
expiresAt: new Date(response.data.expiresAt),
|
|
475
|
-
|
|
476
|
-
data: allData,
|
|
477
|
-
error: response.data.error
|
|
490
|
+
data: allData
|
|
478
491
|
};
|
|
492
|
+
if (!response.data.success && response.data.error) {
|
|
493
|
+
resp = {
|
|
494
|
+
...resp,
|
|
495
|
+
success: false,
|
|
496
|
+
error: response.data.error
|
|
497
|
+
};
|
|
498
|
+
}
|
|
499
|
+
if (response.data.next) {
|
|
500
|
+
resp.next = response.data.next;
|
|
501
|
+
}
|
|
502
|
+
return resp;
|
|
479
503
|
} else {
|
|
480
504
|
this.handleError(response, "check batch scrape status");
|
|
481
505
|
}
|
|
@@ -509,21 +533,34 @@ var FirecrawlApp = class {
|
|
|
509
533
|
try {
|
|
510
534
|
const response = await this.postRequest(
|
|
511
535
|
this.apiUrl + `/v1/extract`,
|
|
512
|
-
{ ...jsonData, schema: jsonSchema },
|
|
536
|
+
{ ...jsonData, schema: jsonSchema, origin: "api-sdk" },
|
|
513
537
|
headers
|
|
514
538
|
);
|
|
515
539
|
if (response.status === 200) {
|
|
516
|
-
const
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
540
|
+
const jobId = response.data.id;
|
|
541
|
+
let extractStatus;
|
|
542
|
+
do {
|
|
543
|
+
const statusResponse = await this.getRequest(
|
|
544
|
+
`${this.apiUrl}/v1/extract/${jobId}`,
|
|
545
|
+
headers
|
|
546
|
+
);
|
|
547
|
+
extractStatus = statusResponse.data;
|
|
548
|
+
if (extractStatus.status === "completed") {
|
|
549
|
+
if (extractStatus.success) {
|
|
550
|
+
return {
|
|
551
|
+
success: true,
|
|
552
|
+
data: extractStatus.data,
|
|
553
|
+
warning: extractStatus.warning,
|
|
554
|
+
error: extractStatus.error
|
|
555
|
+
};
|
|
556
|
+
} else {
|
|
557
|
+
throw new FirecrawlError(`Failed to extract data. Error: ${extractStatus.error}`, statusResponse.status);
|
|
558
|
+
}
|
|
559
|
+
} else if (extractStatus.status === "failed" || extractStatus.status === "cancelled") {
|
|
560
|
+
throw new FirecrawlError(`Extract job ${extractStatus.status}. Error: ${extractStatus.error}`, statusResponse.status);
|
|
561
|
+
}
|
|
562
|
+
await new Promise((resolve) => setTimeout(resolve, 1e3));
|
|
563
|
+
} while (extractStatus.status !== "completed");
|
|
527
564
|
} else {
|
|
528
565
|
this.handleError(response, "extract");
|
|
529
566
|
}
|
|
@@ -532,6 +569,62 @@ var FirecrawlApp = class {
|
|
|
532
569
|
}
|
|
533
570
|
return { success: false, error: "Internal server error." };
|
|
534
571
|
}
|
|
572
|
+
/**
|
|
573
|
+
* Initiates an asynchronous extract job for a URL using the Firecrawl API.
|
|
574
|
+
* @param url - The URL to extract data from.
|
|
575
|
+
* @param params - Additional parameters for the extract request.
|
|
576
|
+
* @param idempotencyKey - Optional idempotency key for the request.
|
|
577
|
+
* @returns The response from the extract operation.
|
|
578
|
+
*/
|
|
579
|
+
async asyncExtract(url, params, idempotencyKey) {
|
|
580
|
+
const headers = this.prepareHeaders(idempotencyKey);
|
|
581
|
+
let jsonData = { url, ...params };
|
|
582
|
+
let jsonSchema;
|
|
583
|
+
try {
|
|
584
|
+
if (params?.schema instanceof zt.ZodType) {
|
|
585
|
+
jsonSchema = (0, import_zod_to_json_schema.zodToJsonSchema)(params.schema);
|
|
586
|
+
} else {
|
|
587
|
+
jsonSchema = params?.schema;
|
|
588
|
+
}
|
|
589
|
+
} catch (error) {
|
|
590
|
+
throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
|
|
591
|
+
}
|
|
592
|
+
try {
|
|
593
|
+
const response = await this.postRequest(
|
|
594
|
+
this.apiUrl + `/v1/extract`,
|
|
595
|
+
{ ...jsonData, schema: jsonSchema },
|
|
596
|
+
headers
|
|
597
|
+
);
|
|
598
|
+
if (response.status === 200) {
|
|
599
|
+
return response.data;
|
|
600
|
+
} else {
|
|
601
|
+
this.handleError(response, "start extract job");
|
|
602
|
+
}
|
|
603
|
+
} catch (error) {
|
|
604
|
+
throw new FirecrawlError(error.message, 500);
|
|
605
|
+
}
|
|
606
|
+
return { success: false, error: "Internal server error." };
|
|
607
|
+
}
|
|
608
|
+
/**
|
|
609
|
+
* Retrieves the status of an extract job.
|
|
610
|
+
* @param jobId - The ID of the extract job.
|
|
611
|
+
* @returns The status of the extract job.
|
|
612
|
+
*/
|
|
613
|
+
async getExtractStatus(jobId) {
|
|
614
|
+
try {
|
|
615
|
+
const response = await this.getRequest(
|
|
616
|
+
`${this.apiUrl}/v1/extract/${jobId}`,
|
|
617
|
+
this.prepareHeaders()
|
|
618
|
+
);
|
|
619
|
+
if (response.status === 200) {
|
|
620
|
+
return response.data;
|
|
621
|
+
} else {
|
|
622
|
+
this.handleError(response, "get extract status");
|
|
623
|
+
}
|
|
624
|
+
} catch (error) {
|
|
625
|
+
throw new FirecrawlError(error.message, 500);
|
|
626
|
+
}
|
|
627
|
+
}
|
|
535
628
|
/**
|
|
536
629
|
* Prepares the headers for an API request.
|
|
537
630
|
* @param idempotencyKey - Optional key to ensure idempotency.
|
|
@@ -609,6 +702,9 @@ var FirecrawlApp = class {
|
|
|
609
702
|
if ("data" in statusData) {
|
|
610
703
|
let data = statusData.data;
|
|
611
704
|
while (typeof statusData === "object" && "next" in statusData) {
|
|
705
|
+
if (data.length === 0) {
|
|
706
|
+
break;
|
|
707
|
+
}
|
|
612
708
|
statusResponse = await this.getRequest(statusData.next, headers);
|
|
613
709
|
statusData = statusResponse.data;
|
|
614
710
|
data = data.concat(statusData.data);
|
package/dist/index.d.cts
CHANGED
|
@@ -384,6 +384,20 @@ declare class FirecrawlApp {
|
|
|
384
384
|
* @returns The response from the extract operation.
|
|
385
385
|
*/
|
|
386
386
|
extract<T extends zt.ZodSchema = any>(urls: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
|
|
387
|
+
/**
|
|
388
|
+
* Initiates an asynchronous extract job for a URL using the Firecrawl API.
|
|
389
|
+
* @param url - The URL to extract data from.
|
|
390
|
+
* @param params - Additional parameters for the extract request.
|
|
391
|
+
* @param idempotencyKey - Optional idempotency key for the request.
|
|
392
|
+
* @returns The response from the extract operation.
|
|
393
|
+
*/
|
|
394
|
+
asyncExtract(url: string, params?: ExtractParams, idempotencyKey?: string): Promise<ExtractResponse | ErrorResponse>;
|
|
395
|
+
/**
|
|
396
|
+
* Retrieves the status of an extract job.
|
|
397
|
+
* @param jobId - The ID of the extract job.
|
|
398
|
+
* @returns The status of the extract job.
|
|
399
|
+
*/
|
|
400
|
+
getExtractStatus(jobId: string): Promise<any>;
|
|
387
401
|
/**
|
|
388
402
|
* Prepares the headers for an API request.
|
|
389
403
|
* @param idempotencyKey - Optional key to ensure idempotency.
|
package/dist/index.d.ts
CHANGED
|
@@ -384,6 +384,20 @@ declare class FirecrawlApp {
|
|
|
384
384
|
* @returns The response from the extract operation.
|
|
385
385
|
*/
|
|
386
386
|
extract<T extends zt.ZodSchema = any>(urls: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
|
|
387
|
+
/**
|
|
388
|
+
* Initiates an asynchronous extract job for a URL using the Firecrawl API.
|
|
389
|
+
* @param url - The URL to extract data from.
|
|
390
|
+
* @param params - Additional parameters for the extract request.
|
|
391
|
+
* @param idempotencyKey - Optional idempotency key for the request.
|
|
392
|
+
* @returns The response from the extract operation.
|
|
393
|
+
*/
|
|
394
|
+
asyncExtract(url: string, params?: ExtractParams, idempotencyKey?: string): Promise<ExtractResponse | ErrorResponse>;
|
|
395
|
+
/**
|
|
396
|
+
* Retrieves the status of an extract job.
|
|
397
|
+
* @param jobId - The ID of the extract job.
|
|
398
|
+
* @returns The status of the extract job.
|
|
399
|
+
*/
|
|
400
|
+
getExtractStatus(jobId: string): Promise<any>;
|
|
387
401
|
/**
|
|
388
402
|
* Prepares the headers for an API request.
|
|
389
403
|
* @param idempotencyKey - Optional key to ensure idempotency.
|
package/dist/index.js
CHANGED
|
@@ -228,23 +228,35 @@ var FirecrawlApp = class {
|
|
|
228
228
|
if ("data" in statusData) {
|
|
229
229
|
let data = statusData.data;
|
|
230
230
|
while (typeof statusData === "object" && "next" in statusData) {
|
|
231
|
+
if (data.length === 0) {
|
|
232
|
+
break;
|
|
233
|
+
}
|
|
231
234
|
statusData = (await this.getRequest(statusData.next, headers)).data;
|
|
232
235
|
data = data.concat(statusData.data);
|
|
233
236
|
}
|
|
234
237
|
allData = data;
|
|
235
238
|
}
|
|
236
239
|
}
|
|
237
|
-
|
|
240
|
+
let resp = {
|
|
238
241
|
success: response.data.success,
|
|
239
242
|
status: response.data.status,
|
|
240
243
|
total: response.data.total,
|
|
241
244
|
completed: response.data.completed,
|
|
242
245
|
creditsUsed: response.data.creditsUsed,
|
|
243
246
|
expiresAt: new Date(response.data.expiresAt),
|
|
244
|
-
|
|
245
|
-
data: allData,
|
|
246
|
-
error: response.data.error
|
|
247
|
+
data: allData
|
|
247
248
|
};
|
|
249
|
+
if (!response.data.success && response.data.error) {
|
|
250
|
+
resp = {
|
|
251
|
+
...resp,
|
|
252
|
+
success: false,
|
|
253
|
+
error: response.data.error
|
|
254
|
+
};
|
|
255
|
+
}
|
|
256
|
+
if (response.data.next) {
|
|
257
|
+
resp.next = response.data.next;
|
|
258
|
+
}
|
|
259
|
+
return resp;
|
|
248
260
|
} else {
|
|
249
261
|
this.handleError(response, "check crawl status");
|
|
250
262
|
}
|
|
@@ -423,23 +435,35 @@ var FirecrawlApp = class {
|
|
|
423
435
|
if ("data" in statusData) {
|
|
424
436
|
let data = statusData.data;
|
|
425
437
|
while (typeof statusData === "object" && "next" in statusData) {
|
|
438
|
+
if (data.length === 0) {
|
|
439
|
+
break;
|
|
440
|
+
}
|
|
426
441
|
statusData = (await this.getRequest(statusData.next, headers)).data;
|
|
427
442
|
data = data.concat(statusData.data);
|
|
428
443
|
}
|
|
429
444
|
allData = data;
|
|
430
445
|
}
|
|
431
446
|
}
|
|
432
|
-
|
|
447
|
+
let resp = {
|
|
433
448
|
success: response.data.success,
|
|
434
449
|
status: response.data.status,
|
|
435
450
|
total: response.data.total,
|
|
436
451
|
completed: response.data.completed,
|
|
437
452
|
creditsUsed: response.data.creditsUsed,
|
|
438
453
|
expiresAt: new Date(response.data.expiresAt),
|
|
439
|
-
|
|
440
|
-
data: allData,
|
|
441
|
-
error: response.data.error
|
|
454
|
+
data: allData
|
|
442
455
|
};
|
|
456
|
+
if (!response.data.success && response.data.error) {
|
|
457
|
+
resp = {
|
|
458
|
+
...resp,
|
|
459
|
+
success: false,
|
|
460
|
+
error: response.data.error
|
|
461
|
+
};
|
|
462
|
+
}
|
|
463
|
+
if (response.data.next) {
|
|
464
|
+
resp.next = response.data.next;
|
|
465
|
+
}
|
|
466
|
+
return resp;
|
|
443
467
|
} else {
|
|
444
468
|
this.handleError(response, "check batch scrape status");
|
|
445
469
|
}
|
|
@@ -473,21 +497,34 @@ var FirecrawlApp = class {
|
|
|
473
497
|
try {
|
|
474
498
|
const response = await this.postRequest(
|
|
475
499
|
this.apiUrl + `/v1/extract`,
|
|
476
|
-
{ ...jsonData, schema: jsonSchema },
|
|
500
|
+
{ ...jsonData, schema: jsonSchema, origin: "api-sdk" },
|
|
477
501
|
headers
|
|
478
502
|
);
|
|
479
503
|
if (response.status === 200) {
|
|
480
|
-
const
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
504
|
+
const jobId = response.data.id;
|
|
505
|
+
let extractStatus;
|
|
506
|
+
do {
|
|
507
|
+
const statusResponse = await this.getRequest(
|
|
508
|
+
`${this.apiUrl}/v1/extract/${jobId}`,
|
|
509
|
+
headers
|
|
510
|
+
);
|
|
511
|
+
extractStatus = statusResponse.data;
|
|
512
|
+
if (extractStatus.status === "completed") {
|
|
513
|
+
if (extractStatus.success) {
|
|
514
|
+
return {
|
|
515
|
+
success: true,
|
|
516
|
+
data: extractStatus.data,
|
|
517
|
+
warning: extractStatus.warning,
|
|
518
|
+
error: extractStatus.error
|
|
519
|
+
};
|
|
520
|
+
} else {
|
|
521
|
+
throw new FirecrawlError(`Failed to extract data. Error: ${extractStatus.error}`, statusResponse.status);
|
|
522
|
+
}
|
|
523
|
+
} else if (extractStatus.status === "failed" || extractStatus.status === "cancelled") {
|
|
524
|
+
throw new FirecrawlError(`Extract job ${extractStatus.status}. Error: ${extractStatus.error}`, statusResponse.status);
|
|
525
|
+
}
|
|
526
|
+
await new Promise((resolve) => setTimeout(resolve, 1e3));
|
|
527
|
+
} while (extractStatus.status !== "completed");
|
|
491
528
|
} else {
|
|
492
529
|
this.handleError(response, "extract");
|
|
493
530
|
}
|
|
@@ -496,6 +533,62 @@ var FirecrawlApp = class {
|
|
|
496
533
|
}
|
|
497
534
|
return { success: false, error: "Internal server error." };
|
|
498
535
|
}
|
|
536
|
+
/**
|
|
537
|
+
* Initiates an asynchronous extract job for a URL using the Firecrawl API.
|
|
538
|
+
* @param url - The URL to extract data from.
|
|
539
|
+
* @param params - Additional parameters for the extract request.
|
|
540
|
+
* @param idempotencyKey - Optional idempotency key for the request.
|
|
541
|
+
* @returns The response from the extract operation.
|
|
542
|
+
*/
|
|
543
|
+
async asyncExtract(url, params, idempotencyKey) {
|
|
544
|
+
const headers = this.prepareHeaders(idempotencyKey);
|
|
545
|
+
let jsonData = { url, ...params };
|
|
546
|
+
let jsonSchema;
|
|
547
|
+
try {
|
|
548
|
+
if (params?.schema instanceof zt.ZodType) {
|
|
549
|
+
jsonSchema = zodToJsonSchema(params.schema);
|
|
550
|
+
} else {
|
|
551
|
+
jsonSchema = params?.schema;
|
|
552
|
+
}
|
|
553
|
+
} catch (error) {
|
|
554
|
+
throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
|
|
555
|
+
}
|
|
556
|
+
try {
|
|
557
|
+
const response = await this.postRequest(
|
|
558
|
+
this.apiUrl + `/v1/extract`,
|
|
559
|
+
{ ...jsonData, schema: jsonSchema },
|
|
560
|
+
headers
|
|
561
|
+
);
|
|
562
|
+
if (response.status === 200) {
|
|
563
|
+
return response.data;
|
|
564
|
+
} else {
|
|
565
|
+
this.handleError(response, "start extract job");
|
|
566
|
+
}
|
|
567
|
+
} catch (error) {
|
|
568
|
+
throw new FirecrawlError(error.message, 500);
|
|
569
|
+
}
|
|
570
|
+
return { success: false, error: "Internal server error." };
|
|
571
|
+
}
|
|
572
|
+
/**
|
|
573
|
+
* Retrieves the status of an extract job.
|
|
574
|
+
* @param jobId - The ID of the extract job.
|
|
575
|
+
* @returns The status of the extract job.
|
|
576
|
+
*/
|
|
577
|
+
async getExtractStatus(jobId) {
|
|
578
|
+
try {
|
|
579
|
+
const response = await this.getRequest(
|
|
580
|
+
`${this.apiUrl}/v1/extract/${jobId}`,
|
|
581
|
+
this.prepareHeaders()
|
|
582
|
+
);
|
|
583
|
+
if (response.status === 200) {
|
|
584
|
+
return response.data;
|
|
585
|
+
} else {
|
|
586
|
+
this.handleError(response, "get extract status");
|
|
587
|
+
}
|
|
588
|
+
} catch (error) {
|
|
589
|
+
throw new FirecrawlError(error.message, 500);
|
|
590
|
+
}
|
|
591
|
+
}
|
|
499
592
|
/**
|
|
500
593
|
* Prepares the headers for an API request.
|
|
501
594
|
* @param idempotencyKey - Optional key to ensure idempotency.
|
|
@@ -573,6 +666,9 @@ var FirecrawlApp = class {
|
|
|
573
666
|
if ("data" in statusData) {
|
|
574
667
|
let data = statusData.data;
|
|
575
668
|
while (typeof statusData === "object" && "next" in statusData) {
|
|
669
|
+
if (data.length === 0) {
|
|
670
|
+
break;
|
|
671
|
+
}
|
|
576
672
|
statusResponse = await this.getRequest(statusData.next, headers);
|
|
577
673
|
statusData = statusResponse.data;
|
|
578
674
|
data = data.concat(statusData.data);
|