firecrawl 1.9.3 → 1.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +6 -6
- package/dist/index.d.cts +5 -3
- package/dist/index.d.ts +5 -3
- package/dist/index.js +6 -6
- package/package.json +1 -1
- package/src/index.ts +11 -4
package/dist/index.cjs
CHANGED
|
@@ -295,9 +295,9 @@ var FirecrawlApp = class {
|
|
|
295
295
|
* @param webhook - Optional webhook for the batch scrape.
|
|
296
296
|
* @returns The response from the crawl operation.
|
|
297
297
|
*/
|
|
298
|
-
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook) {
|
|
298
|
+
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
299
299
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
300
|
-
let jsonData = { urls, ...params };
|
|
300
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params };
|
|
301
301
|
if (jsonData?.extract?.schema) {
|
|
302
302
|
let schema = jsonData.extract.schema;
|
|
303
303
|
try {
|
|
@@ -333,9 +333,9 @@ var FirecrawlApp = class {
|
|
|
333
333
|
}
|
|
334
334
|
return { success: false, error: "Internal server error." };
|
|
335
335
|
}
|
|
336
|
-
async asyncBatchScrapeUrls(urls, params, idempotencyKey) {
|
|
336
|
+
async asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
337
337
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
338
|
-
let jsonData = { urls, ...params ?? {} };
|
|
338
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params ?? {} };
|
|
339
339
|
try {
|
|
340
340
|
const response = await this.postRequest(
|
|
341
341
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -363,8 +363,8 @@ var FirecrawlApp = class {
|
|
|
363
363
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
364
364
|
* @returns A CrawlWatcher instance to monitor the crawl job.
|
|
365
365
|
*/
|
|
366
|
-
async batchScrapeUrlsAndWatch(urls, params, idempotencyKey) {
|
|
367
|
-
const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey);
|
|
366
|
+
async batchScrapeUrlsAndWatch(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
367
|
+
const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs);
|
|
368
368
|
if (crawl.success && crawl.id) {
|
|
369
369
|
const id = crawl.id;
|
|
370
370
|
return new CrawlWatcher(id, this);
|
package/dist/index.d.cts
CHANGED
|
@@ -171,6 +171,7 @@ interface BatchScrapeResponse {
|
|
|
171
171
|
url?: string;
|
|
172
172
|
success: true;
|
|
173
173
|
error?: string;
|
|
174
|
+
invalidURLs?: string[];
|
|
174
175
|
}
|
|
175
176
|
/**
|
|
176
177
|
* Response interface for job status checks.
|
|
@@ -229,6 +230,7 @@ interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
|
|
|
229
230
|
schema?: LLMSchema;
|
|
230
231
|
systemPrompt?: string;
|
|
231
232
|
allowExternalLinks?: boolean;
|
|
233
|
+
includeSubdomains?: boolean;
|
|
232
234
|
}
|
|
233
235
|
/**
|
|
234
236
|
* Response interface for extracting information from URLs.
|
|
@@ -329,8 +331,8 @@ declare class FirecrawlApp {
|
|
|
329
331
|
* @param webhook - Optional webhook for the batch scrape.
|
|
330
332
|
* @returns The response from the crawl operation.
|
|
331
333
|
*/
|
|
332
|
-
batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"]): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
333
|
-
asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<BatchScrapeResponse | ErrorResponse>;
|
|
334
|
+
batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
335
|
+
asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeResponse | ErrorResponse>;
|
|
334
336
|
/**
|
|
335
337
|
* Initiates a batch scrape job and returns a CrawlWatcher to monitor the job via WebSocket.
|
|
336
338
|
* @param urls - The URL to scrape.
|
|
@@ -338,7 +340,7 @@ declare class FirecrawlApp {
|
|
|
338
340
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
339
341
|
* @returns A CrawlWatcher instance to monitor the crawl job.
|
|
340
342
|
*/
|
|
341
|
-
batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<CrawlWatcher>;
|
|
343
|
+
batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<CrawlWatcher>;
|
|
342
344
|
/**
|
|
343
345
|
* Checks the status of a batch scrape job using the Firecrawl API.
|
|
344
346
|
* @param id - The ID of the batch scrape operation.
|
package/dist/index.d.ts
CHANGED
|
@@ -171,6 +171,7 @@ interface BatchScrapeResponse {
|
|
|
171
171
|
url?: string;
|
|
172
172
|
success: true;
|
|
173
173
|
error?: string;
|
|
174
|
+
invalidURLs?: string[];
|
|
174
175
|
}
|
|
175
176
|
/**
|
|
176
177
|
* Response interface for job status checks.
|
|
@@ -229,6 +230,7 @@ interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
|
|
|
229
230
|
schema?: LLMSchema;
|
|
230
231
|
systemPrompt?: string;
|
|
231
232
|
allowExternalLinks?: boolean;
|
|
233
|
+
includeSubdomains?: boolean;
|
|
232
234
|
}
|
|
233
235
|
/**
|
|
234
236
|
* Response interface for extracting information from URLs.
|
|
@@ -329,8 +331,8 @@ declare class FirecrawlApp {
|
|
|
329
331
|
* @param webhook - Optional webhook for the batch scrape.
|
|
330
332
|
* @returns The response from the crawl operation.
|
|
331
333
|
*/
|
|
332
|
-
batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"]): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
333
|
-
asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<BatchScrapeResponse | ErrorResponse>;
|
|
334
|
+
batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
335
|
+
asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeResponse | ErrorResponse>;
|
|
334
336
|
/**
|
|
335
337
|
* Initiates a batch scrape job and returns a CrawlWatcher to monitor the job via WebSocket.
|
|
336
338
|
* @param urls - The URL to scrape.
|
|
@@ -338,7 +340,7 @@ declare class FirecrawlApp {
|
|
|
338
340
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
339
341
|
* @returns A CrawlWatcher instance to monitor the crawl job.
|
|
340
342
|
*/
|
|
341
|
-
batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<CrawlWatcher>;
|
|
343
|
+
batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<CrawlWatcher>;
|
|
342
344
|
/**
|
|
343
345
|
* Checks the status of a batch scrape job using the Firecrawl API.
|
|
344
346
|
* @param id - The ID of the batch scrape operation.
|
package/dist/index.js
CHANGED
|
@@ -259,9 +259,9 @@ var FirecrawlApp = class {
|
|
|
259
259
|
* @param webhook - Optional webhook for the batch scrape.
|
|
260
260
|
* @returns The response from the crawl operation.
|
|
261
261
|
*/
|
|
262
|
-
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook) {
|
|
262
|
+
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
263
263
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
264
|
-
let jsonData = { urls, ...params };
|
|
264
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params };
|
|
265
265
|
if (jsonData?.extract?.schema) {
|
|
266
266
|
let schema = jsonData.extract.schema;
|
|
267
267
|
try {
|
|
@@ -297,9 +297,9 @@ var FirecrawlApp = class {
|
|
|
297
297
|
}
|
|
298
298
|
return { success: false, error: "Internal server error." };
|
|
299
299
|
}
|
|
300
|
-
async asyncBatchScrapeUrls(urls, params, idempotencyKey) {
|
|
300
|
+
async asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
301
301
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
302
|
-
let jsonData = { urls, ...params ?? {} };
|
|
302
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params ?? {} };
|
|
303
303
|
try {
|
|
304
304
|
const response = await this.postRequest(
|
|
305
305
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -327,8 +327,8 @@ var FirecrawlApp = class {
|
|
|
327
327
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
328
328
|
* @returns A CrawlWatcher instance to monitor the crawl job.
|
|
329
329
|
*/
|
|
330
|
-
async batchScrapeUrlsAndWatch(urls, params, idempotencyKey) {
|
|
331
|
-
const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey);
|
|
330
|
+
async batchScrapeUrlsAndWatch(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
331
|
+
const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs);
|
|
332
332
|
if (crawl.success && crawl.id) {
|
|
333
333
|
const id = crawl.id;
|
|
334
334
|
return new CrawlWatcher(id, this);
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -183,6 +183,7 @@ export interface BatchScrapeResponse {
|
|
|
183
183
|
url?: string;
|
|
184
184
|
success: true;
|
|
185
185
|
error?: string;
|
|
186
|
+
invalidURLs?: string[];
|
|
186
187
|
}
|
|
187
188
|
|
|
188
189
|
/**
|
|
@@ -246,6 +247,7 @@ export interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
|
|
|
246
247
|
schema?: LLMSchema;
|
|
247
248
|
systemPrompt?: string;
|
|
248
249
|
allowExternalLinks?: boolean;
|
|
250
|
+
includeSubdomains?: boolean;
|
|
249
251
|
}
|
|
250
252
|
|
|
251
253
|
/**
|
|
@@ -576,9 +578,10 @@ export default class FirecrawlApp {
|
|
|
576
578
|
pollInterval: number = 2,
|
|
577
579
|
idempotencyKey?: string,
|
|
578
580
|
webhook?: CrawlParams["webhook"],
|
|
581
|
+
ignoreInvalidURLs?: boolean,
|
|
579
582
|
): Promise<BatchScrapeStatusResponse | ErrorResponse> {
|
|
580
583
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
581
|
-
let jsonData: any = { urls, ...params };
|
|
584
|
+
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params };
|
|
582
585
|
if (jsonData?.extract?.schema) {
|
|
583
586
|
let schema = jsonData.extract.schema;
|
|
584
587
|
|
|
@@ -621,10 +624,12 @@ export default class FirecrawlApp {
|
|
|
621
624
|
async asyncBatchScrapeUrls(
|
|
622
625
|
urls: string[],
|
|
623
626
|
params?: ScrapeParams,
|
|
624
|
-
idempotencyKey?: string
|
|
627
|
+
idempotencyKey?: string,
|
|
628
|
+
webhook?: CrawlParams["webhook"],
|
|
629
|
+
ignoreInvalidURLs?: boolean,
|
|
625
630
|
): Promise<BatchScrapeResponse | ErrorResponse> {
|
|
626
631
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
627
|
-
let jsonData: any = { urls, ...(params ?? {}) };
|
|
632
|
+
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...(params ?? {}) };
|
|
628
633
|
try {
|
|
629
634
|
const response: AxiosResponse = await this.postRequest(
|
|
630
635
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -657,8 +662,10 @@ export default class FirecrawlApp {
|
|
|
657
662
|
urls: string[],
|
|
658
663
|
params?: ScrapeParams,
|
|
659
664
|
idempotencyKey?: string,
|
|
665
|
+
webhook?: CrawlParams["webhook"],
|
|
666
|
+
ignoreInvalidURLs?: boolean,
|
|
660
667
|
) {
|
|
661
|
-
const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey);
|
|
668
|
+
const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs);
|
|
662
669
|
|
|
663
670
|
if (crawl.success && crawl.id) {
|
|
664
671
|
const id = crawl.id;
|