firecrawl 1.13.0 → 1.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +26 -4
- package/dist/index.d.cts +8 -2
- package/dist/index.d.ts +8 -2
- package/dist/index.js +26 -4
- package/package.json +1 -1
- package/src/index.ts +28 -4
package/dist/index.cjs
CHANGED
|
@@ -245,16 +245,26 @@ var FirecrawlApp = class {
|
|
|
245
245
|
* Checks the status of a crawl job using the Firecrawl API.
|
|
246
246
|
* @param id - The ID of the crawl operation.
|
|
247
247
|
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
|
248
|
+
* @param nextURL - The `next` URL from the previous crawl status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
|
|
249
|
+
* @param skip - How many entries to skip to paginate. Only required if you're not providing `nextURL`. Only used when `getAllData = false`.
|
|
250
|
+
* @param limit - How many entries to return. Only used when `getAllData = false`.
|
|
248
251
|
* @returns The response containing the job status.
|
|
249
252
|
*/
|
|
250
|
-
async checkCrawlStatus(id, getAllData = false) {
|
|
253
|
+
async checkCrawlStatus(id, getAllData = false, nextURL, skip, limit) {
|
|
251
254
|
if (!id) {
|
|
252
255
|
throw new FirecrawlError("No crawl ID provided", 400);
|
|
253
256
|
}
|
|
254
257
|
const headers = this.prepareHeaders();
|
|
258
|
+
const targetURL = new URL(nextURL ?? `${this.apiUrl}/v1/crawl/${id}`);
|
|
259
|
+
if (skip !== void 0) {
|
|
260
|
+
targetURL.searchParams.set("skip", skip.toString());
|
|
261
|
+
}
|
|
262
|
+
if (limit !== void 0) {
|
|
263
|
+
targetURL.searchParams.set("skip", limit.toString());
|
|
264
|
+
}
|
|
255
265
|
try {
|
|
256
266
|
const response = await this.getRequest(
|
|
257
|
-
|
|
267
|
+
targetURL.href,
|
|
258
268
|
headers
|
|
259
269
|
);
|
|
260
270
|
if (response.status === 200) {
|
|
@@ -279,6 +289,7 @@ var FirecrawlApp = class {
|
|
|
279
289
|
total: response.data.total,
|
|
280
290
|
completed: response.data.completed,
|
|
281
291
|
creditsUsed: response.data.creditsUsed,
|
|
292
|
+
next: getAllData ? void 0 : response.data.next,
|
|
282
293
|
expiresAt: new Date(response.data.expiresAt),
|
|
283
294
|
data: allData
|
|
284
295
|
};
|
|
@@ -452,16 +463,26 @@ var FirecrawlApp = class {
|
|
|
452
463
|
* Checks the status of a batch scrape job using the Firecrawl API.
|
|
453
464
|
* @param id - The ID of the batch scrape operation.
|
|
454
465
|
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
|
466
|
+
* @param nextURL - The `next` URL from the previous batch scrape status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
|
|
467
|
+
* @param skip - How many entries to skip to paginate. Only used when `getAllData = false`.
|
|
468
|
+
* @param limit - How many entries to return. Only used when `getAllData = false`.
|
|
455
469
|
* @returns The response containing the job status.
|
|
456
470
|
*/
|
|
457
|
-
async checkBatchScrapeStatus(id, getAllData = false) {
|
|
471
|
+
async checkBatchScrapeStatus(id, getAllData = false, nextURL, skip, limit) {
|
|
458
472
|
if (!id) {
|
|
459
473
|
throw new FirecrawlError("No batch scrape ID provided", 400);
|
|
460
474
|
}
|
|
461
475
|
const headers = this.prepareHeaders();
|
|
476
|
+
const targetURL = new URL(nextURL ?? `${this.apiUrl}/v1/batch/scrape/${id}`);
|
|
477
|
+
if (skip !== void 0) {
|
|
478
|
+
targetURL.searchParams.set("skip", skip.toString());
|
|
479
|
+
}
|
|
480
|
+
if (limit !== void 0) {
|
|
481
|
+
targetURL.searchParams.set("skip", limit.toString());
|
|
482
|
+
}
|
|
462
483
|
try {
|
|
463
484
|
const response = await this.getRequest(
|
|
464
|
-
|
|
485
|
+
targetURL.href,
|
|
465
486
|
headers
|
|
466
487
|
);
|
|
467
488
|
if (response.status === 200) {
|
|
@@ -486,6 +507,7 @@ var FirecrawlApp = class {
|
|
|
486
507
|
total: response.data.total,
|
|
487
508
|
completed: response.data.completed,
|
|
488
509
|
creditsUsed: response.data.creditsUsed,
|
|
510
|
+
next: getAllData ? void 0 : response.data.next,
|
|
489
511
|
expiresAt: new Date(response.data.expiresAt),
|
|
490
512
|
data: allData
|
|
491
513
|
};
|
package/dist/index.d.cts
CHANGED
|
@@ -326,9 +326,12 @@ declare class FirecrawlApp {
|
|
|
326
326
|
* Checks the status of a crawl job using the Firecrawl API.
|
|
327
327
|
* @param id - The ID of the crawl operation.
|
|
328
328
|
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
|
329
|
+
* @param nextURL - The `next` URL from the previous crawl status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
|
|
330
|
+
* @param skip - How many entries to skip to paginate. Only required if you're not providing `nextURL`. Only used when `getAllData = false`.
|
|
331
|
+
* @param limit - How many entries to return. Only used when `getAllData = false`.
|
|
329
332
|
* @returns The response containing the job status.
|
|
330
333
|
*/
|
|
331
|
-
checkCrawlStatus(id?: string, getAllData?: boolean): Promise<CrawlStatusResponse | ErrorResponse>;
|
|
334
|
+
checkCrawlStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<CrawlStatusResponse | ErrorResponse>;
|
|
332
335
|
/**
|
|
333
336
|
* Cancels a crawl job using the Firecrawl API.
|
|
334
337
|
* @param id - The ID of the crawl operation.
|
|
@@ -373,9 +376,12 @@ declare class FirecrawlApp {
|
|
|
373
376
|
* Checks the status of a batch scrape job using the Firecrawl API.
|
|
374
377
|
* @param id - The ID of the batch scrape operation.
|
|
375
378
|
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
|
379
|
+
* @param nextURL - The `next` URL from the previous batch scrape status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
|
|
380
|
+
* @param skip - How many entries to skip to paginate. Only used when `getAllData = false`.
|
|
381
|
+
* @param limit - How many entries to return. Only used when `getAllData = false`.
|
|
376
382
|
* @returns The response containing the job status.
|
|
377
383
|
*/
|
|
378
|
-
checkBatchScrapeStatus(id?: string, getAllData?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
384
|
+
checkBatchScrapeStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
379
385
|
/**
|
|
380
386
|
* Extracts information from URLs using the Firecrawl API.
|
|
381
387
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
package/dist/index.d.ts
CHANGED
|
@@ -326,9 +326,12 @@ declare class FirecrawlApp {
|
|
|
326
326
|
* Checks the status of a crawl job using the Firecrawl API.
|
|
327
327
|
* @param id - The ID of the crawl operation.
|
|
328
328
|
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
|
329
|
+
* @param nextURL - The `next` URL from the previous crawl status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
|
|
330
|
+
* @param skip - How many entries to skip to paginate. Only required if you're not providing `nextURL`. Only used when `getAllData = false`.
|
|
331
|
+
* @param limit - How many entries to return. Only used when `getAllData = false`.
|
|
329
332
|
* @returns The response containing the job status.
|
|
330
333
|
*/
|
|
331
|
-
checkCrawlStatus(id?: string, getAllData?: boolean): Promise<CrawlStatusResponse | ErrorResponse>;
|
|
334
|
+
checkCrawlStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<CrawlStatusResponse | ErrorResponse>;
|
|
332
335
|
/**
|
|
333
336
|
* Cancels a crawl job using the Firecrawl API.
|
|
334
337
|
* @param id - The ID of the crawl operation.
|
|
@@ -373,9 +376,12 @@ declare class FirecrawlApp {
|
|
|
373
376
|
* Checks the status of a batch scrape job using the Firecrawl API.
|
|
374
377
|
* @param id - The ID of the batch scrape operation.
|
|
375
378
|
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
|
379
|
+
* @param nextURL - The `next` URL from the previous batch scrape status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
|
|
380
|
+
* @param skip - How many entries to skip to paginate. Only used when `getAllData = false`.
|
|
381
|
+
* @param limit - How many entries to return. Only used when `getAllData = false`.
|
|
376
382
|
* @returns The response containing the job status.
|
|
377
383
|
*/
|
|
378
|
-
checkBatchScrapeStatus(id?: string, getAllData?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
384
|
+
checkBatchScrapeStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
379
385
|
/**
|
|
380
386
|
* Extracts information from URLs using the Firecrawl API.
|
|
381
387
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
package/dist/index.js
CHANGED
|
@@ -209,16 +209,26 @@ var FirecrawlApp = class {
|
|
|
209
209
|
* Checks the status of a crawl job using the Firecrawl API.
|
|
210
210
|
* @param id - The ID of the crawl operation.
|
|
211
211
|
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
|
212
|
+
* @param nextURL - The `next` URL from the previous crawl status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
|
|
213
|
+
* @param skip - How many entries to skip to paginate. Only required if you're not providing `nextURL`. Only used when `getAllData = false`.
|
|
214
|
+
* @param limit - How many entries to return. Only used when `getAllData = false`.
|
|
212
215
|
* @returns The response containing the job status.
|
|
213
216
|
*/
|
|
214
|
-
async checkCrawlStatus(id, getAllData = false) {
|
|
217
|
+
async checkCrawlStatus(id, getAllData = false, nextURL, skip, limit) {
|
|
215
218
|
if (!id) {
|
|
216
219
|
throw new FirecrawlError("No crawl ID provided", 400);
|
|
217
220
|
}
|
|
218
221
|
const headers = this.prepareHeaders();
|
|
222
|
+
const targetURL = new URL(nextURL ?? `${this.apiUrl}/v1/crawl/${id}`);
|
|
223
|
+
if (skip !== void 0) {
|
|
224
|
+
targetURL.searchParams.set("skip", skip.toString());
|
|
225
|
+
}
|
|
226
|
+
if (limit !== void 0) {
|
|
227
|
+
targetURL.searchParams.set("skip", limit.toString());
|
|
228
|
+
}
|
|
219
229
|
try {
|
|
220
230
|
const response = await this.getRequest(
|
|
221
|
-
|
|
231
|
+
targetURL.href,
|
|
222
232
|
headers
|
|
223
233
|
);
|
|
224
234
|
if (response.status === 200) {
|
|
@@ -243,6 +253,7 @@ var FirecrawlApp = class {
|
|
|
243
253
|
total: response.data.total,
|
|
244
254
|
completed: response.data.completed,
|
|
245
255
|
creditsUsed: response.data.creditsUsed,
|
|
256
|
+
next: getAllData ? void 0 : response.data.next,
|
|
246
257
|
expiresAt: new Date(response.data.expiresAt),
|
|
247
258
|
data: allData
|
|
248
259
|
};
|
|
@@ -416,16 +427,26 @@ var FirecrawlApp = class {
|
|
|
416
427
|
* Checks the status of a batch scrape job using the Firecrawl API.
|
|
417
428
|
* @param id - The ID of the batch scrape operation.
|
|
418
429
|
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
|
430
|
+
* @param nextURL - The `next` URL from the previous batch scrape status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
|
|
431
|
+
* @param skip - How many entries to skip to paginate. Only used when `getAllData = false`.
|
|
432
|
+
* @param limit - How many entries to return. Only used when `getAllData = false`.
|
|
419
433
|
* @returns The response containing the job status.
|
|
420
434
|
*/
|
|
421
|
-
async checkBatchScrapeStatus(id, getAllData = false) {
|
|
435
|
+
async checkBatchScrapeStatus(id, getAllData = false, nextURL, skip, limit) {
|
|
422
436
|
if (!id) {
|
|
423
437
|
throw new FirecrawlError("No batch scrape ID provided", 400);
|
|
424
438
|
}
|
|
425
439
|
const headers = this.prepareHeaders();
|
|
440
|
+
const targetURL = new URL(nextURL ?? `${this.apiUrl}/v1/batch/scrape/${id}`);
|
|
441
|
+
if (skip !== void 0) {
|
|
442
|
+
targetURL.searchParams.set("skip", skip.toString());
|
|
443
|
+
}
|
|
444
|
+
if (limit !== void 0) {
|
|
445
|
+
targetURL.searchParams.set("skip", limit.toString());
|
|
446
|
+
}
|
|
426
447
|
try {
|
|
427
448
|
const response = await this.getRequest(
|
|
428
|
-
|
|
449
|
+
targetURL.href,
|
|
429
450
|
headers
|
|
430
451
|
);
|
|
431
452
|
if (response.status === 200) {
|
|
@@ -450,6 +471,7 @@ var FirecrawlApp = class {
|
|
|
450
471
|
total: response.data.total,
|
|
451
472
|
completed: response.data.completed,
|
|
452
473
|
creditsUsed: response.data.creditsUsed,
|
|
474
|
+
next: getAllData ? void 0 : response.data.next,
|
|
453
475
|
expiresAt: new Date(response.data.expiresAt),
|
|
454
476
|
data: allData
|
|
455
477
|
};
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -545,17 +545,28 @@ export default class FirecrawlApp {
|
|
|
545
545
|
* Checks the status of a crawl job using the Firecrawl API.
|
|
546
546
|
* @param id - The ID of the crawl operation.
|
|
547
547
|
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
|
548
|
+
* @param nextURL - The `next` URL from the previous crawl status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
|
|
549
|
+
* @param skip - How many entries to skip to paginate. Only required if you're not providing `nextURL`. Only used when `getAllData = false`.
|
|
550
|
+
* @param limit - How many entries to return. Only used when `getAllData = false`.
|
|
548
551
|
* @returns The response containing the job status.
|
|
549
552
|
*/
|
|
550
|
-
async checkCrawlStatus(id?: string, getAllData = false): Promise<CrawlStatusResponse | ErrorResponse> {
|
|
553
|
+
async checkCrawlStatus(id?: string, getAllData = false, nextURL?: string, skip?: number, limit?: number): Promise<CrawlStatusResponse | ErrorResponse> {
|
|
551
554
|
if (!id) {
|
|
552
555
|
throw new FirecrawlError("No crawl ID provided", 400);
|
|
553
556
|
}
|
|
554
557
|
|
|
555
558
|
const headers: AxiosRequestHeaders = this.prepareHeaders();
|
|
559
|
+
const targetURL = new URL(nextURL ?? `${this.apiUrl}/v1/crawl/${id}`);
|
|
560
|
+
if (skip !== undefined) {
|
|
561
|
+
targetURL.searchParams.set("skip", skip.toString());
|
|
562
|
+
}
|
|
563
|
+
if (limit !== undefined) {
|
|
564
|
+
targetURL.searchParams.set("skip", limit.toString());
|
|
565
|
+
}
|
|
566
|
+
|
|
556
567
|
try {
|
|
557
568
|
const response: AxiosResponse = await this.getRequest(
|
|
558
|
-
|
|
569
|
+
targetURL.href,
|
|
559
570
|
headers
|
|
560
571
|
);
|
|
561
572
|
if (response.status === 200) {
|
|
@@ -581,6 +592,7 @@ export default class FirecrawlApp {
|
|
|
581
592
|
total: response.data.total,
|
|
582
593
|
completed: response.data.completed,
|
|
583
594
|
creditsUsed: response.data.creditsUsed,
|
|
595
|
+
next: getAllData ? undefined : response.data.next,
|
|
584
596
|
expiresAt: new Date(response.data.expiresAt),
|
|
585
597
|
data: allData
|
|
586
598
|
}
|
|
@@ -795,17 +807,28 @@ export default class FirecrawlApp {
|
|
|
795
807
|
* Checks the status of a batch scrape job using the Firecrawl API.
|
|
796
808
|
* @param id - The ID of the batch scrape operation.
|
|
797
809
|
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
|
810
|
+
* @param nextURL - The `next` URL from the previous batch scrape status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
|
|
811
|
+
* @param skip - How many entries to skip to paginate. Only used when `getAllData = false`.
|
|
812
|
+
* @param limit - How many entries to return. Only used when `getAllData = false`.
|
|
798
813
|
* @returns The response containing the job status.
|
|
799
814
|
*/
|
|
800
|
-
async checkBatchScrapeStatus(id?: string, getAllData = false): Promise<BatchScrapeStatusResponse | ErrorResponse> {
|
|
815
|
+
async checkBatchScrapeStatus(id?: string, getAllData = false, nextURL?: string, skip?: number, limit?: number): Promise<BatchScrapeStatusResponse | ErrorResponse> {
|
|
801
816
|
if (!id) {
|
|
802
817
|
throw new FirecrawlError("No batch scrape ID provided", 400);
|
|
803
818
|
}
|
|
804
819
|
|
|
805
820
|
const headers: AxiosRequestHeaders = this.prepareHeaders();
|
|
821
|
+
const targetURL = new URL(nextURL ?? `${this.apiUrl}/v1/batch/scrape/${id}`);
|
|
822
|
+
if (skip !== undefined) {
|
|
823
|
+
targetURL.searchParams.set("skip", skip.toString());
|
|
824
|
+
}
|
|
825
|
+
if (limit !== undefined) {
|
|
826
|
+
targetURL.searchParams.set("skip", limit.toString());
|
|
827
|
+
}
|
|
828
|
+
|
|
806
829
|
try {
|
|
807
830
|
const response: AxiosResponse = await this.getRequest(
|
|
808
|
-
|
|
831
|
+
targetURL.href,
|
|
809
832
|
headers
|
|
810
833
|
);
|
|
811
834
|
if (response.status === 200) {
|
|
@@ -831,6 +854,7 @@ export default class FirecrawlApp {
|
|
|
831
854
|
total: response.data.total,
|
|
832
855
|
completed: response.data.completed,
|
|
833
856
|
creditsUsed: response.data.creditsUsed,
|
|
857
|
+
next: getAllData ? undefined : response.data.next,
|
|
834
858
|
expiresAt: new Date(response.data.expiresAt),
|
|
835
859
|
data: allData
|
|
836
860
|
}
|