firecrawl 1.11.2 → 1.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +116 -20
- package/dist/index.d.cts +14 -0
- package/dist/index.d.ts +14 -0
- package/dist/index.js +116 -20
- package/package.json +1 -1
- package/src/index.ts +137 -22
package/dist/index.cjs
CHANGED
|
@@ -264,23 +264,35 @@ var FirecrawlApp = class {
|
|
|
264
264
|
if ("data" in statusData) {
|
|
265
265
|
let data = statusData.data;
|
|
266
266
|
while (typeof statusData === "object" && "next" in statusData) {
|
|
267
|
+
if (data.length === 0) {
|
|
268
|
+
break;
|
|
269
|
+
}
|
|
267
270
|
statusData = (await this.getRequest(statusData.next, headers)).data;
|
|
268
271
|
data = data.concat(statusData.data);
|
|
269
272
|
}
|
|
270
273
|
allData = data;
|
|
271
274
|
}
|
|
272
275
|
}
|
|
273
|
-
|
|
276
|
+
let resp = {
|
|
274
277
|
success: response.data.success,
|
|
275
278
|
status: response.data.status,
|
|
276
279
|
total: response.data.total,
|
|
277
280
|
completed: response.data.completed,
|
|
278
281
|
creditsUsed: response.data.creditsUsed,
|
|
279
282
|
expiresAt: new Date(response.data.expiresAt),
|
|
280
|
-
|
|
281
|
-
data: allData,
|
|
282
|
-
error: response.data.error
|
|
283
|
+
data: allData
|
|
283
284
|
};
|
|
285
|
+
if (!response.data.success && response.data.error) {
|
|
286
|
+
resp = {
|
|
287
|
+
...resp,
|
|
288
|
+
success: false,
|
|
289
|
+
error: response.data.error
|
|
290
|
+
};
|
|
291
|
+
}
|
|
292
|
+
if (response.data.next) {
|
|
293
|
+
resp.next = response.data.next;
|
|
294
|
+
}
|
|
295
|
+
return resp;
|
|
284
296
|
} else {
|
|
285
297
|
this.handleError(response, "check crawl status");
|
|
286
298
|
}
|
|
@@ -459,23 +471,35 @@ var FirecrawlApp = class {
|
|
|
459
471
|
if ("data" in statusData) {
|
|
460
472
|
let data = statusData.data;
|
|
461
473
|
while (typeof statusData === "object" && "next" in statusData) {
|
|
474
|
+
if (data.length === 0) {
|
|
475
|
+
break;
|
|
476
|
+
}
|
|
462
477
|
statusData = (await this.getRequest(statusData.next, headers)).data;
|
|
463
478
|
data = data.concat(statusData.data);
|
|
464
479
|
}
|
|
465
480
|
allData = data;
|
|
466
481
|
}
|
|
467
482
|
}
|
|
468
|
-
|
|
483
|
+
let resp = {
|
|
469
484
|
success: response.data.success,
|
|
470
485
|
status: response.data.status,
|
|
471
486
|
total: response.data.total,
|
|
472
487
|
completed: response.data.completed,
|
|
473
488
|
creditsUsed: response.data.creditsUsed,
|
|
474
489
|
expiresAt: new Date(response.data.expiresAt),
|
|
475
|
-
|
|
476
|
-
data: allData,
|
|
477
|
-
error: response.data.error
|
|
490
|
+
data: allData
|
|
478
491
|
};
|
|
492
|
+
if (!response.data.success && response.data.error) {
|
|
493
|
+
resp = {
|
|
494
|
+
...resp,
|
|
495
|
+
success: false,
|
|
496
|
+
error: response.data.error
|
|
497
|
+
};
|
|
498
|
+
}
|
|
499
|
+
if (response.data.next) {
|
|
500
|
+
resp.next = response.data.next;
|
|
501
|
+
}
|
|
502
|
+
return resp;
|
|
479
503
|
} else {
|
|
480
504
|
this.handleError(response, "check batch scrape status");
|
|
481
505
|
}
|
|
@@ -509,21 +533,34 @@ var FirecrawlApp = class {
|
|
|
509
533
|
try {
|
|
510
534
|
const response = await this.postRequest(
|
|
511
535
|
this.apiUrl + `/v1/extract`,
|
|
512
|
-
{ ...jsonData, schema: jsonSchema },
|
|
536
|
+
{ ...jsonData, schema: jsonSchema, origin: "api-sdk" },
|
|
513
537
|
headers
|
|
514
538
|
);
|
|
515
539
|
if (response.status === 200) {
|
|
516
|
-
const
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
540
|
+
const jobId = response.data.id;
|
|
541
|
+
let extractStatus;
|
|
542
|
+
do {
|
|
543
|
+
const statusResponse = await this.getRequest(
|
|
544
|
+
`${this.apiUrl}/v1/extract/${jobId}`,
|
|
545
|
+
headers
|
|
546
|
+
);
|
|
547
|
+
extractStatus = statusResponse.data;
|
|
548
|
+
if (extractStatus.status === "completed") {
|
|
549
|
+
if (extractStatus.success) {
|
|
550
|
+
return {
|
|
551
|
+
success: true,
|
|
552
|
+
data: extractStatus.data,
|
|
553
|
+
warning: extractStatus.warning,
|
|
554
|
+
error: extractStatus.error
|
|
555
|
+
};
|
|
556
|
+
} else {
|
|
557
|
+
throw new FirecrawlError(`Failed to extract data. Error: ${extractStatus.error}`, statusResponse.status);
|
|
558
|
+
}
|
|
559
|
+
} else if (extractStatus.status === "failed" || extractStatus.status === "cancelled") {
|
|
560
|
+
throw new FirecrawlError(`Extract job ${extractStatus.status}. Error: ${extractStatus.error}`, statusResponse.status);
|
|
561
|
+
}
|
|
562
|
+
await new Promise((resolve) => setTimeout(resolve, 1e3));
|
|
563
|
+
} while (extractStatus.status !== "completed");
|
|
527
564
|
} else {
|
|
528
565
|
this.handleError(response, "extract");
|
|
529
566
|
}
|
|
@@ -532,6 +569,62 @@ var FirecrawlApp = class {
|
|
|
532
569
|
}
|
|
533
570
|
return { success: false, error: "Internal server error." };
|
|
534
571
|
}
|
|
572
|
+
/**
|
|
573
|
+
* Initiates an asynchronous extract job for a URL using the Firecrawl API.
|
|
574
|
+
* @param url - The URL to extract data from.
|
|
575
|
+
* @param params - Additional parameters for the extract request.
|
|
576
|
+
* @param idempotencyKey - Optional idempotency key for the request.
|
|
577
|
+
* @returns The response from the extract operation.
|
|
578
|
+
*/
|
|
579
|
+
async asyncExtract(urls, params, idempotencyKey) {
|
|
580
|
+
const headers = this.prepareHeaders(idempotencyKey);
|
|
581
|
+
let jsonData = { urls, ...params };
|
|
582
|
+
let jsonSchema;
|
|
583
|
+
try {
|
|
584
|
+
if (params?.schema instanceof zt.ZodType) {
|
|
585
|
+
jsonSchema = (0, import_zod_to_json_schema.zodToJsonSchema)(params.schema);
|
|
586
|
+
} else {
|
|
587
|
+
jsonSchema = params?.schema;
|
|
588
|
+
}
|
|
589
|
+
} catch (error) {
|
|
590
|
+
throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
|
|
591
|
+
}
|
|
592
|
+
try {
|
|
593
|
+
const response = await this.postRequest(
|
|
594
|
+
this.apiUrl + `/v1/extract`,
|
|
595
|
+
{ ...jsonData, schema: jsonSchema },
|
|
596
|
+
headers
|
|
597
|
+
);
|
|
598
|
+
if (response.status === 200) {
|
|
599
|
+
return response.data;
|
|
600
|
+
} else {
|
|
601
|
+
this.handleError(response, "start extract job");
|
|
602
|
+
}
|
|
603
|
+
} catch (error) {
|
|
604
|
+
throw new FirecrawlError(error.message, 500);
|
|
605
|
+
}
|
|
606
|
+
return { success: false, error: "Internal server error." };
|
|
607
|
+
}
|
|
608
|
+
/**
|
|
609
|
+
* Retrieves the status of an extract job.
|
|
610
|
+
* @param jobId - The ID of the extract job.
|
|
611
|
+
* @returns The status of the extract job.
|
|
612
|
+
*/
|
|
613
|
+
async getExtractStatus(jobId) {
|
|
614
|
+
try {
|
|
615
|
+
const response = await this.getRequest(
|
|
616
|
+
`${this.apiUrl}/v1/extract/${jobId}`,
|
|
617
|
+
this.prepareHeaders()
|
|
618
|
+
);
|
|
619
|
+
if (response.status === 200) {
|
|
620
|
+
return response.data;
|
|
621
|
+
} else {
|
|
622
|
+
this.handleError(response, "get extract status");
|
|
623
|
+
}
|
|
624
|
+
} catch (error) {
|
|
625
|
+
throw new FirecrawlError(error.message, 500);
|
|
626
|
+
}
|
|
627
|
+
}
|
|
535
628
|
/**
|
|
536
629
|
* Prepares the headers for an API request.
|
|
537
630
|
* @param idempotencyKey - Optional key to ensure idempotency.
|
|
@@ -609,6 +702,9 @@ var FirecrawlApp = class {
|
|
|
609
702
|
if ("data" in statusData) {
|
|
610
703
|
let data = statusData.data;
|
|
611
704
|
while (typeof statusData === "object" && "next" in statusData) {
|
|
705
|
+
if (data.length === 0) {
|
|
706
|
+
break;
|
|
707
|
+
}
|
|
612
708
|
statusResponse = await this.getRequest(statusData.next, headers);
|
|
613
709
|
statusData = statusResponse.data;
|
|
614
710
|
data = data.concat(statusData.data);
|
package/dist/index.d.cts
CHANGED
|
@@ -384,6 +384,20 @@ declare class FirecrawlApp {
|
|
|
384
384
|
* @returns The response from the extract operation.
|
|
385
385
|
*/
|
|
386
386
|
extract<T extends zt.ZodSchema = any>(urls: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
|
|
387
|
+
/**
|
|
388
|
+
* Initiates an asynchronous extract job for a URL using the Firecrawl API.
|
|
389
|
+
* @param url - The URL to extract data from.
|
|
390
|
+
* @param params - Additional parameters for the extract request.
|
|
391
|
+
* @param idempotencyKey - Optional idempotency key for the request.
|
|
392
|
+
* @returns The response from the extract operation.
|
|
393
|
+
*/
|
|
394
|
+
asyncExtract(urls: string[], params?: ExtractParams, idempotencyKey?: string): Promise<ExtractResponse | ErrorResponse>;
|
|
395
|
+
/**
|
|
396
|
+
* Retrieves the status of an extract job.
|
|
397
|
+
* @param jobId - The ID of the extract job.
|
|
398
|
+
* @returns The status of the extract job.
|
|
399
|
+
*/
|
|
400
|
+
getExtractStatus(jobId: string): Promise<any>;
|
|
387
401
|
/**
|
|
388
402
|
* Prepares the headers for an API request.
|
|
389
403
|
* @param idempotencyKey - Optional key to ensure idempotency.
|
package/dist/index.d.ts
CHANGED
|
@@ -384,6 +384,20 @@ declare class FirecrawlApp {
|
|
|
384
384
|
* @returns The response from the extract operation.
|
|
385
385
|
*/
|
|
386
386
|
extract<T extends zt.ZodSchema = any>(urls: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
|
|
387
|
+
/**
|
|
388
|
+
* Initiates an asynchronous extract job for a URL using the Firecrawl API.
|
|
389
|
+
* @param url - The URL to extract data from.
|
|
390
|
+
* @param params - Additional parameters for the extract request.
|
|
391
|
+
* @param idempotencyKey - Optional idempotency key for the request.
|
|
392
|
+
* @returns The response from the extract operation.
|
|
393
|
+
*/
|
|
394
|
+
asyncExtract(urls: string[], params?: ExtractParams, idempotencyKey?: string): Promise<ExtractResponse | ErrorResponse>;
|
|
395
|
+
/**
|
|
396
|
+
* Retrieves the status of an extract job.
|
|
397
|
+
* @param jobId - The ID of the extract job.
|
|
398
|
+
* @returns The status of the extract job.
|
|
399
|
+
*/
|
|
400
|
+
getExtractStatus(jobId: string): Promise<any>;
|
|
387
401
|
/**
|
|
388
402
|
* Prepares the headers for an API request.
|
|
389
403
|
* @param idempotencyKey - Optional key to ensure idempotency.
|
package/dist/index.js
CHANGED
|
@@ -228,23 +228,35 @@ var FirecrawlApp = class {
|
|
|
228
228
|
if ("data" in statusData) {
|
|
229
229
|
let data = statusData.data;
|
|
230
230
|
while (typeof statusData === "object" && "next" in statusData) {
|
|
231
|
+
if (data.length === 0) {
|
|
232
|
+
break;
|
|
233
|
+
}
|
|
231
234
|
statusData = (await this.getRequest(statusData.next, headers)).data;
|
|
232
235
|
data = data.concat(statusData.data);
|
|
233
236
|
}
|
|
234
237
|
allData = data;
|
|
235
238
|
}
|
|
236
239
|
}
|
|
237
|
-
|
|
240
|
+
let resp = {
|
|
238
241
|
success: response.data.success,
|
|
239
242
|
status: response.data.status,
|
|
240
243
|
total: response.data.total,
|
|
241
244
|
completed: response.data.completed,
|
|
242
245
|
creditsUsed: response.data.creditsUsed,
|
|
243
246
|
expiresAt: new Date(response.data.expiresAt),
|
|
244
|
-
|
|
245
|
-
data: allData,
|
|
246
|
-
error: response.data.error
|
|
247
|
+
data: allData
|
|
247
248
|
};
|
|
249
|
+
if (!response.data.success && response.data.error) {
|
|
250
|
+
resp = {
|
|
251
|
+
...resp,
|
|
252
|
+
success: false,
|
|
253
|
+
error: response.data.error
|
|
254
|
+
};
|
|
255
|
+
}
|
|
256
|
+
if (response.data.next) {
|
|
257
|
+
resp.next = response.data.next;
|
|
258
|
+
}
|
|
259
|
+
return resp;
|
|
248
260
|
} else {
|
|
249
261
|
this.handleError(response, "check crawl status");
|
|
250
262
|
}
|
|
@@ -423,23 +435,35 @@ var FirecrawlApp = class {
|
|
|
423
435
|
if ("data" in statusData) {
|
|
424
436
|
let data = statusData.data;
|
|
425
437
|
while (typeof statusData === "object" && "next" in statusData) {
|
|
438
|
+
if (data.length === 0) {
|
|
439
|
+
break;
|
|
440
|
+
}
|
|
426
441
|
statusData = (await this.getRequest(statusData.next, headers)).data;
|
|
427
442
|
data = data.concat(statusData.data);
|
|
428
443
|
}
|
|
429
444
|
allData = data;
|
|
430
445
|
}
|
|
431
446
|
}
|
|
432
|
-
|
|
447
|
+
let resp = {
|
|
433
448
|
success: response.data.success,
|
|
434
449
|
status: response.data.status,
|
|
435
450
|
total: response.data.total,
|
|
436
451
|
completed: response.data.completed,
|
|
437
452
|
creditsUsed: response.data.creditsUsed,
|
|
438
453
|
expiresAt: new Date(response.data.expiresAt),
|
|
439
|
-
|
|
440
|
-
data: allData,
|
|
441
|
-
error: response.data.error
|
|
454
|
+
data: allData
|
|
442
455
|
};
|
|
456
|
+
if (!response.data.success && response.data.error) {
|
|
457
|
+
resp = {
|
|
458
|
+
...resp,
|
|
459
|
+
success: false,
|
|
460
|
+
error: response.data.error
|
|
461
|
+
};
|
|
462
|
+
}
|
|
463
|
+
if (response.data.next) {
|
|
464
|
+
resp.next = response.data.next;
|
|
465
|
+
}
|
|
466
|
+
return resp;
|
|
443
467
|
} else {
|
|
444
468
|
this.handleError(response, "check batch scrape status");
|
|
445
469
|
}
|
|
@@ -473,21 +497,34 @@ var FirecrawlApp = class {
|
|
|
473
497
|
try {
|
|
474
498
|
const response = await this.postRequest(
|
|
475
499
|
this.apiUrl + `/v1/extract`,
|
|
476
|
-
{ ...jsonData, schema: jsonSchema },
|
|
500
|
+
{ ...jsonData, schema: jsonSchema, origin: "api-sdk" },
|
|
477
501
|
headers
|
|
478
502
|
);
|
|
479
503
|
if (response.status === 200) {
|
|
480
|
-
const
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
504
|
+
const jobId = response.data.id;
|
|
505
|
+
let extractStatus;
|
|
506
|
+
do {
|
|
507
|
+
const statusResponse = await this.getRequest(
|
|
508
|
+
`${this.apiUrl}/v1/extract/${jobId}`,
|
|
509
|
+
headers
|
|
510
|
+
);
|
|
511
|
+
extractStatus = statusResponse.data;
|
|
512
|
+
if (extractStatus.status === "completed") {
|
|
513
|
+
if (extractStatus.success) {
|
|
514
|
+
return {
|
|
515
|
+
success: true,
|
|
516
|
+
data: extractStatus.data,
|
|
517
|
+
warning: extractStatus.warning,
|
|
518
|
+
error: extractStatus.error
|
|
519
|
+
};
|
|
520
|
+
} else {
|
|
521
|
+
throw new FirecrawlError(`Failed to extract data. Error: ${extractStatus.error}`, statusResponse.status);
|
|
522
|
+
}
|
|
523
|
+
} else if (extractStatus.status === "failed" || extractStatus.status === "cancelled") {
|
|
524
|
+
throw new FirecrawlError(`Extract job ${extractStatus.status}. Error: ${extractStatus.error}`, statusResponse.status);
|
|
525
|
+
}
|
|
526
|
+
await new Promise((resolve) => setTimeout(resolve, 1e3));
|
|
527
|
+
} while (extractStatus.status !== "completed");
|
|
491
528
|
} else {
|
|
492
529
|
this.handleError(response, "extract");
|
|
493
530
|
}
|
|
@@ -496,6 +533,62 @@ var FirecrawlApp = class {
|
|
|
496
533
|
}
|
|
497
534
|
return { success: false, error: "Internal server error." };
|
|
498
535
|
}
|
|
536
|
+
/**
|
|
537
|
+
* Initiates an asynchronous extract job for a URL using the Firecrawl API.
|
|
538
|
+
* @param url - The URL to extract data from.
|
|
539
|
+
* @param params - Additional parameters for the extract request.
|
|
540
|
+
* @param idempotencyKey - Optional idempotency key for the request.
|
|
541
|
+
* @returns The response from the extract operation.
|
|
542
|
+
*/
|
|
543
|
+
async asyncExtract(urls, params, idempotencyKey) {
|
|
544
|
+
const headers = this.prepareHeaders(idempotencyKey);
|
|
545
|
+
let jsonData = { urls, ...params };
|
|
546
|
+
let jsonSchema;
|
|
547
|
+
try {
|
|
548
|
+
if (params?.schema instanceof zt.ZodType) {
|
|
549
|
+
jsonSchema = zodToJsonSchema(params.schema);
|
|
550
|
+
} else {
|
|
551
|
+
jsonSchema = params?.schema;
|
|
552
|
+
}
|
|
553
|
+
} catch (error) {
|
|
554
|
+
throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
|
|
555
|
+
}
|
|
556
|
+
try {
|
|
557
|
+
const response = await this.postRequest(
|
|
558
|
+
this.apiUrl + `/v1/extract`,
|
|
559
|
+
{ ...jsonData, schema: jsonSchema },
|
|
560
|
+
headers
|
|
561
|
+
);
|
|
562
|
+
if (response.status === 200) {
|
|
563
|
+
return response.data;
|
|
564
|
+
} else {
|
|
565
|
+
this.handleError(response, "start extract job");
|
|
566
|
+
}
|
|
567
|
+
} catch (error) {
|
|
568
|
+
throw new FirecrawlError(error.message, 500);
|
|
569
|
+
}
|
|
570
|
+
return { success: false, error: "Internal server error." };
|
|
571
|
+
}
|
|
572
|
+
/**
|
|
573
|
+
* Retrieves the status of an extract job.
|
|
574
|
+
* @param jobId - The ID of the extract job.
|
|
575
|
+
* @returns The status of the extract job.
|
|
576
|
+
*/
|
|
577
|
+
async getExtractStatus(jobId) {
|
|
578
|
+
try {
|
|
579
|
+
const response = await this.getRequest(
|
|
580
|
+
`${this.apiUrl}/v1/extract/${jobId}`,
|
|
581
|
+
this.prepareHeaders()
|
|
582
|
+
);
|
|
583
|
+
if (response.status === 200) {
|
|
584
|
+
return response.data;
|
|
585
|
+
} else {
|
|
586
|
+
this.handleError(response, "get extract status");
|
|
587
|
+
}
|
|
588
|
+
} catch (error) {
|
|
589
|
+
throw new FirecrawlError(error.message, 500);
|
|
590
|
+
}
|
|
591
|
+
}
|
|
499
592
|
/**
|
|
500
593
|
* Prepares the headers for an API request.
|
|
501
594
|
* @param idempotencyKey - Optional key to ensure idempotency.
|
|
@@ -573,6 +666,9 @@ var FirecrawlApp = class {
|
|
|
573
666
|
if ("data" in statusData) {
|
|
574
667
|
let data = statusData.data;
|
|
575
668
|
while (typeof statusData === "object" && "next" in statusData) {
|
|
669
|
+
if (data.length === 0) {
|
|
670
|
+
break;
|
|
671
|
+
}
|
|
576
672
|
statusResponse = await this.getRequest(statusData.next, headers);
|
|
577
673
|
statusData = statusResponse.data;
|
|
578
674
|
data = data.concat(statusData.data);
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -565,23 +565,39 @@ export default class FirecrawlApp {
|
|
|
565
565
|
if ("data" in statusData) {
|
|
566
566
|
let data = statusData.data;
|
|
567
567
|
while (typeof statusData === 'object' && 'next' in statusData) {
|
|
568
|
+
if (data.length === 0) {
|
|
569
|
+
break
|
|
570
|
+
}
|
|
568
571
|
statusData = (await this.getRequest(statusData.next, headers)).data;
|
|
569
572
|
data = data.concat(statusData.data);
|
|
570
573
|
}
|
|
571
574
|
allData = data;
|
|
572
575
|
}
|
|
573
576
|
}
|
|
574
|
-
|
|
577
|
+
|
|
578
|
+
let resp: CrawlStatusResponse | ErrorResponse = {
|
|
575
579
|
success: response.data.success,
|
|
576
580
|
status: response.data.status,
|
|
577
581
|
total: response.data.total,
|
|
578
582
|
completed: response.data.completed,
|
|
579
583
|
creditsUsed: response.data.creditsUsed,
|
|
580
584
|
expiresAt: new Date(response.data.expiresAt),
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
+
data: allData
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
if (!response.data.success && response.data.error) {
|
|
589
|
+
resp = {
|
|
590
|
+
...resp,
|
|
591
|
+
success: false,
|
|
592
|
+
error: response.data.error
|
|
593
|
+
} as ErrorResponse;
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
if (response.data.next) {
|
|
597
|
+
(resp as CrawlStatusResponse).next = response.data.next;
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
return resp;
|
|
585
601
|
} else {
|
|
586
602
|
this.handleError(response, "check crawl status");
|
|
587
603
|
}
|
|
@@ -799,23 +815,39 @@ export default class FirecrawlApp {
|
|
|
799
815
|
if ("data" in statusData) {
|
|
800
816
|
let data = statusData.data;
|
|
801
817
|
while (typeof statusData === 'object' && 'next' in statusData) {
|
|
818
|
+
if (data.length === 0) {
|
|
819
|
+
break
|
|
820
|
+
}
|
|
802
821
|
statusData = (await this.getRequest(statusData.next, headers)).data;
|
|
803
822
|
data = data.concat(statusData.data);
|
|
804
823
|
}
|
|
805
824
|
allData = data;
|
|
806
825
|
}
|
|
807
826
|
}
|
|
808
|
-
|
|
827
|
+
|
|
828
|
+
let resp: BatchScrapeStatusResponse | ErrorResponse = {
|
|
809
829
|
success: response.data.success,
|
|
810
830
|
status: response.data.status,
|
|
811
831
|
total: response.data.total,
|
|
812
832
|
completed: response.data.completed,
|
|
813
833
|
creditsUsed: response.data.creditsUsed,
|
|
814
834
|
expiresAt: new Date(response.data.expiresAt),
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
835
|
+
data: allData
|
|
836
|
+
}
|
|
837
|
+
|
|
838
|
+
if (!response.data.success && response.data.error) {
|
|
839
|
+
resp = {
|
|
840
|
+
...resp,
|
|
841
|
+
success: false,
|
|
842
|
+
error: response.data.error
|
|
843
|
+
} as ErrorResponse;
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
if (response.data.next) {
|
|
847
|
+
(resp as BatchScrapeStatusResponse).next = response.data.next;
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
return resp;
|
|
819
851
|
} else {
|
|
820
852
|
this.handleError(response, "check batch scrape status");
|
|
821
853
|
}
|
|
@@ -852,21 +884,35 @@ export default class FirecrawlApp {
|
|
|
852
884
|
try {
|
|
853
885
|
const response: AxiosResponse = await this.postRequest(
|
|
854
886
|
this.apiUrl + `/v1/extract`,
|
|
855
|
-
{ ...jsonData, schema: jsonSchema },
|
|
887
|
+
{ ...jsonData, schema: jsonSchema, origin: "api-sdk" },
|
|
856
888
|
headers
|
|
857
889
|
);
|
|
890
|
+
|
|
858
891
|
if (response.status === 200) {
|
|
859
|
-
const
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
892
|
+
const jobId = response.data.id;
|
|
893
|
+
let extractStatus;
|
|
894
|
+
do {
|
|
895
|
+
const statusResponse: AxiosResponse = await this.getRequest(
|
|
896
|
+
`${this.apiUrl}/v1/extract/${jobId}`,
|
|
897
|
+
headers
|
|
898
|
+
);
|
|
899
|
+
extractStatus = statusResponse.data;
|
|
900
|
+
if (extractStatus.status === "completed") {
|
|
901
|
+
if (extractStatus.success) {
|
|
902
|
+
return {
|
|
903
|
+
success: true,
|
|
904
|
+
data: extractStatus.data,
|
|
905
|
+
warning: extractStatus.warning,
|
|
906
|
+
error: extractStatus.error
|
|
907
|
+
};
|
|
908
|
+
} else {
|
|
909
|
+
throw new FirecrawlError(`Failed to extract data. Error: ${extractStatus.error}`, statusResponse.status);
|
|
910
|
+
}
|
|
911
|
+
} else if (extractStatus.status === "failed" || extractStatus.status === "cancelled") {
|
|
912
|
+
throw new FirecrawlError(`Extract job ${extractStatus.status}. Error: ${extractStatus.error}`, statusResponse.status);
|
|
913
|
+
}
|
|
914
|
+
await new Promise(resolve => setTimeout(resolve, 1000)); // Polling interval
|
|
915
|
+
} while (extractStatus.status !== "completed");
|
|
870
916
|
} else {
|
|
871
917
|
this.handleError(response, "extract");
|
|
872
918
|
}
|
|
@@ -876,6 +922,72 @@ export default class FirecrawlApp {
|
|
|
876
922
|
return { success: false, error: "Internal server error." };
|
|
877
923
|
}
|
|
878
924
|
|
|
925
|
+
/**
|
|
926
|
+
* Initiates an asynchronous extract job for a URL using the Firecrawl API.
|
|
927
|
+
* @param url - The URL to extract data from.
|
|
928
|
+
* @param params - Additional parameters for the extract request.
|
|
929
|
+
* @param idempotencyKey - Optional idempotency key for the request.
|
|
930
|
+
* @returns The response from the extract operation.
|
|
931
|
+
*/
|
|
932
|
+
async asyncExtract(
|
|
933
|
+
urls: string[],
|
|
934
|
+
params?: ExtractParams,
|
|
935
|
+
idempotencyKey?: string
|
|
936
|
+
): Promise<ExtractResponse | ErrorResponse> {
|
|
937
|
+
const headers = this.prepareHeaders(idempotencyKey);
|
|
938
|
+
let jsonData: any = { urls, ...params };
|
|
939
|
+
let jsonSchema: any;
|
|
940
|
+
|
|
941
|
+
try {
|
|
942
|
+
if (params?.schema instanceof zt.ZodType) {
|
|
943
|
+
jsonSchema = zodToJsonSchema(params.schema);
|
|
944
|
+
} else {
|
|
945
|
+
jsonSchema = params?.schema;
|
|
946
|
+
}
|
|
947
|
+
} catch (error: any) {
|
|
948
|
+
throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
|
|
949
|
+
}
|
|
950
|
+
|
|
951
|
+
try {
|
|
952
|
+
const response: AxiosResponse = await this.postRequest(
|
|
953
|
+
this.apiUrl + `/v1/extract`,
|
|
954
|
+
{ ...jsonData, schema: jsonSchema },
|
|
955
|
+
headers
|
|
956
|
+
);
|
|
957
|
+
|
|
958
|
+
if (response.status === 200) {
|
|
959
|
+
return response.data;
|
|
960
|
+
} else {
|
|
961
|
+
this.handleError(response, "start extract job");
|
|
962
|
+
}
|
|
963
|
+
} catch (error: any) {
|
|
964
|
+
throw new FirecrawlError(error.message, 500);
|
|
965
|
+
}
|
|
966
|
+
return { success: false, error: "Internal server error." };
|
|
967
|
+
}
|
|
968
|
+
|
|
969
|
+
/**
|
|
970
|
+
* Retrieves the status of an extract job.
|
|
971
|
+
* @param jobId - The ID of the extract job.
|
|
972
|
+
* @returns The status of the extract job.
|
|
973
|
+
*/
|
|
974
|
+
async getExtractStatus(jobId: string): Promise<any> {
|
|
975
|
+
try {
|
|
976
|
+
const response: AxiosResponse = await this.getRequest(
|
|
977
|
+
`${this.apiUrl}/v1/extract/${jobId}`,
|
|
978
|
+
this.prepareHeaders()
|
|
979
|
+
);
|
|
980
|
+
|
|
981
|
+
if (response.status === 200) {
|
|
982
|
+
return response.data;
|
|
983
|
+
} else {
|
|
984
|
+
this.handleError(response, "get extract status");
|
|
985
|
+
}
|
|
986
|
+
} catch (error: any) {
|
|
987
|
+
throw new FirecrawlError(error.message, 500);
|
|
988
|
+
}
|
|
989
|
+
}
|
|
990
|
+
|
|
879
991
|
/**
|
|
880
992
|
* Prepares the headers for an API request.
|
|
881
993
|
* @param idempotencyKey - Optional key to ensure idempotency.
|
|
@@ -971,6 +1083,9 @@ export default class FirecrawlApp {
|
|
|
971
1083
|
if ("data" in statusData) {
|
|
972
1084
|
let data = statusData.data;
|
|
973
1085
|
while (typeof statusData === 'object' && 'next' in statusData) {
|
|
1086
|
+
if (data.length === 0) {
|
|
1087
|
+
break
|
|
1088
|
+
}
|
|
974
1089
|
statusResponse = await this.getRequest(statusData.next, headers);
|
|
975
1090
|
statusData = statusResponse.data;
|
|
976
1091
|
data = data.concat(statusData.data);
|