firecrawl 1.9.2 → 1.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +6 -6
- package/dist/index.d.cts +5 -3
- package/dist/index.d.ts +5 -3
- package/dist/index.js +6 -6
- package/package.json +1 -1
- package/src/index.ts +11 -4
package/dist/index.cjs
CHANGED
|
@@ -295,9 +295,9 @@ var FirecrawlApp = class {
|
|
|
295
295
|
* @param webhook - Optional webhook for the batch scrape.
|
|
296
296
|
* @returns The response from the crawl operation.
|
|
297
297
|
*/
|
|
298
|
-
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook) {
|
|
298
|
+
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
299
299
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
300
|
-
let jsonData = { urls, ...params };
|
|
300
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params };
|
|
301
301
|
if (jsonData?.extract?.schema) {
|
|
302
302
|
let schema = jsonData.extract.schema;
|
|
303
303
|
try {
|
|
@@ -333,9 +333,9 @@ var FirecrawlApp = class {
|
|
|
333
333
|
}
|
|
334
334
|
return { success: false, error: "Internal server error." };
|
|
335
335
|
}
|
|
336
|
-
async asyncBatchScrapeUrls(urls, params, idempotencyKey) {
|
|
336
|
+
async asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
337
337
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
338
|
-
let jsonData = { urls, ...params ?? {} };
|
|
338
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params ?? {} };
|
|
339
339
|
try {
|
|
340
340
|
const response = await this.postRequest(
|
|
341
341
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -363,8 +363,8 @@ var FirecrawlApp = class {
|
|
|
363
363
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
364
364
|
* @returns A CrawlWatcher instance to monitor the crawl job.
|
|
365
365
|
*/
|
|
366
|
-
async batchScrapeUrlsAndWatch(urls, params, idempotencyKey) {
|
|
367
|
-
const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey);
|
|
366
|
+
async batchScrapeUrlsAndWatch(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
367
|
+
const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs);
|
|
368
368
|
if (crawl.success && crawl.id) {
|
|
369
369
|
const id = crawl.id;
|
|
370
370
|
return new CrawlWatcher(id, this);
|
package/dist/index.d.cts
CHANGED
|
@@ -147,6 +147,7 @@ interface CrawlParams {
|
|
|
147
147
|
webhook?: string | {
|
|
148
148
|
url: string;
|
|
149
149
|
headers?: Record<string, string>;
|
|
150
|
+
metadata?: Record<string, string>;
|
|
150
151
|
};
|
|
151
152
|
deduplicateSimilarURLs?: boolean;
|
|
152
153
|
ignoreQueryParameters?: boolean;
|
|
@@ -170,6 +171,7 @@ interface BatchScrapeResponse {
|
|
|
170
171
|
url?: string;
|
|
171
172
|
success: true;
|
|
172
173
|
error?: string;
|
|
174
|
+
invalidURLs?: string[];
|
|
173
175
|
}
|
|
174
176
|
/**
|
|
175
177
|
* Response interface for job status checks.
|
|
@@ -328,8 +330,8 @@ declare class FirecrawlApp {
|
|
|
328
330
|
* @param webhook - Optional webhook for the batch scrape.
|
|
329
331
|
* @returns The response from the crawl operation.
|
|
330
332
|
*/
|
|
331
|
-
batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"]): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
332
|
-
asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<BatchScrapeResponse | ErrorResponse>;
|
|
333
|
+
batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
334
|
+
asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeResponse | ErrorResponse>;
|
|
333
335
|
/**
|
|
334
336
|
* Initiates a batch scrape job and returns a CrawlWatcher to monitor the job via WebSocket.
|
|
335
337
|
* @param urls - The URL to scrape.
|
|
@@ -337,7 +339,7 @@ declare class FirecrawlApp {
|
|
|
337
339
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
338
340
|
* @returns A CrawlWatcher instance to monitor the crawl job.
|
|
339
341
|
*/
|
|
340
|
-
batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<CrawlWatcher>;
|
|
342
|
+
batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<CrawlWatcher>;
|
|
341
343
|
/**
|
|
342
344
|
* Checks the status of a batch scrape job using the Firecrawl API.
|
|
343
345
|
* @param id - The ID of the batch scrape operation.
|
package/dist/index.d.ts
CHANGED
|
@@ -147,6 +147,7 @@ interface CrawlParams {
|
|
|
147
147
|
webhook?: string | {
|
|
148
148
|
url: string;
|
|
149
149
|
headers?: Record<string, string>;
|
|
150
|
+
metadata?: Record<string, string>;
|
|
150
151
|
};
|
|
151
152
|
deduplicateSimilarURLs?: boolean;
|
|
152
153
|
ignoreQueryParameters?: boolean;
|
|
@@ -170,6 +171,7 @@ interface BatchScrapeResponse {
|
|
|
170
171
|
url?: string;
|
|
171
172
|
success: true;
|
|
172
173
|
error?: string;
|
|
174
|
+
invalidURLs?: string[];
|
|
173
175
|
}
|
|
174
176
|
/**
|
|
175
177
|
* Response interface for job status checks.
|
|
@@ -328,8 +330,8 @@ declare class FirecrawlApp {
|
|
|
328
330
|
* @param webhook - Optional webhook for the batch scrape.
|
|
329
331
|
* @returns The response from the crawl operation.
|
|
330
332
|
*/
|
|
331
|
-
batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"]): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
332
|
-
asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<BatchScrapeResponse | ErrorResponse>;
|
|
333
|
+
batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
334
|
+
asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeResponse | ErrorResponse>;
|
|
333
335
|
/**
|
|
334
336
|
* Initiates a batch scrape job and returns a CrawlWatcher to monitor the job via WebSocket.
|
|
335
337
|
* @param urls - The URL to scrape.
|
|
@@ -337,7 +339,7 @@ declare class FirecrawlApp {
|
|
|
337
339
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
338
340
|
* @returns A CrawlWatcher instance to monitor the crawl job.
|
|
339
341
|
*/
|
|
340
|
-
batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<CrawlWatcher>;
|
|
342
|
+
batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<CrawlWatcher>;
|
|
341
343
|
/**
|
|
342
344
|
* Checks the status of a batch scrape job using the Firecrawl API.
|
|
343
345
|
* @param id - The ID of the batch scrape operation.
|
package/dist/index.js
CHANGED
|
@@ -259,9 +259,9 @@ var FirecrawlApp = class {
|
|
|
259
259
|
* @param webhook - Optional webhook for the batch scrape.
|
|
260
260
|
* @returns The response from the crawl operation.
|
|
261
261
|
*/
|
|
262
|
-
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook) {
|
|
262
|
+
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
263
263
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
264
|
-
let jsonData = { urls, ...params };
|
|
264
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params };
|
|
265
265
|
if (jsonData?.extract?.schema) {
|
|
266
266
|
let schema = jsonData.extract.schema;
|
|
267
267
|
try {
|
|
@@ -297,9 +297,9 @@ var FirecrawlApp = class {
|
|
|
297
297
|
}
|
|
298
298
|
return { success: false, error: "Internal server error." };
|
|
299
299
|
}
|
|
300
|
-
async asyncBatchScrapeUrls(urls, params, idempotencyKey) {
|
|
300
|
+
async asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
301
301
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
302
|
-
let jsonData = { urls, ...params ?? {} };
|
|
302
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params ?? {} };
|
|
303
303
|
try {
|
|
304
304
|
const response = await this.postRequest(
|
|
305
305
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -327,8 +327,8 @@ var FirecrawlApp = class {
|
|
|
327
327
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
328
328
|
* @returns A CrawlWatcher instance to monitor the crawl job.
|
|
329
329
|
*/
|
|
330
|
-
async batchScrapeUrlsAndWatch(urls, params, idempotencyKey) {
|
|
331
|
-
const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey);
|
|
330
|
+
async batchScrapeUrlsAndWatch(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
331
|
+
const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs);
|
|
332
332
|
if (crawl.success && crawl.id) {
|
|
333
333
|
const id = crawl.id;
|
|
334
334
|
return new CrawlWatcher(id, this);
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -157,6 +157,7 @@ export interface CrawlParams {
|
|
|
157
157
|
webhook?: string | {
|
|
158
158
|
url: string;
|
|
159
159
|
headers?: Record<string, string>;
|
|
160
|
+
metadata?: Record<string, string>;
|
|
160
161
|
};
|
|
161
162
|
deduplicateSimilarURLs?: boolean;
|
|
162
163
|
ignoreQueryParameters?: boolean;
|
|
@@ -182,6 +183,7 @@ export interface BatchScrapeResponse {
|
|
|
182
183
|
url?: string;
|
|
183
184
|
success: true;
|
|
184
185
|
error?: string;
|
|
186
|
+
invalidURLs?: string[];
|
|
185
187
|
}
|
|
186
188
|
|
|
187
189
|
/**
|
|
@@ -575,9 +577,10 @@ export default class FirecrawlApp {
|
|
|
575
577
|
pollInterval: number = 2,
|
|
576
578
|
idempotencyKey?: string,
|
|
577
579
|
webhook?: CrawlParams["webhook"],
|
|
580
|
+
ignoreInvalidURLs?: boolean,
|
|
578
581
|
): Promise<BatchScrapeStatusResponse | ErrorResponse> {
|
|
579
582
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
580
|
-
let jsonData: any = { urls, ...params };
|
|
583
|
+
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params };
|
|
581
584
|
if (jsonData?.extract?.schema) {
|
|
582
585
|
let schema = jsonData.extract.schema;
|
|
583
586
|
|
|
@@ -620,10 +623,12 @@ export default class FirecrawlApp {
|
|
|
620
623
|
async asyncBatchScrapeUrls(
|
|
621
624
|
urls: string[],
|
|
622
625
|
params?: ScrapeParams,
|
|
623
|
-
idempotencyKey?: string
|
|
626
|
+
idempotencyKey?: string,
|
|
627
|
+
webhook?: CrawlParams["webhook"],
|
|
628
|
+
ignoreInvalidURLs?: boolean,
|
|
624
629
|
): Promise<BatchScrapeResponse | ErrorResponse> {
|
|
625
630
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
626
|
-
let jsonData: any = { urls, ...(params ?? {}) };
|
|
631
|
+
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...(params ?? {}) };
|
|
627
632
|
try {
|
|
628
633
|
const response: AxiosResponse = await this.postRequest(
|
|
629
634
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -656,8 +661,10 @@ export default class FirecrawlApp {
|
|
|
656
661
|
urls: string[],
|
|
657
662
|
params?: ScrapeParams,
|
|
658
663
|
idempotencyKey?: string,
|
|
664
|
+
webhook?: CrawlParams["webhook"],
|
|
665
|
+
ignoreInvalidURLs?: boolean,
|
|
659
666
|
) {
|
|
660
|
-
const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey);
|
|
667
|
+
const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs);
|
|
661
668
|
|
|
662
669
|
if (crawl.success && crawl.id) {
|
|
663
670
|
const id = crawl.id;
|