firecrawl 1.7.0 → 1.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/index.d.cts +32 -4
- package/dist/index.d.ts +32 -4
- package/package.json +1 -1
- package/src/index.ts +33 -3
package/README.md
CHANGED
|
@@ -147,7 +147,7 @@ watch.addEventListener("done", state => {
|
|
|
147
147
|
|
|
148
148
|
### Batch scraping multiple URLs
|
|
149
149
|
|
|
150
|
-
To batch scrape multiple URLs with error handling, use the `batchScrapeUrls` method. It takes the starting URLs and optional parameters as arguments. The `params` argument allows you to specify additional options for the
|
|
150
|
+
To batch scrape multiple URLs with error handling, use the `batchScrapeUrls` method. It takes the starting URLs and optional parameters as arguments. The `params` argument allows you to specify additional options for the batch scrape job, such as the output formats.
|
|
151
151
|
|
|
152
152
|
```js
|
|
153
153
|
const batchScrapeResponse = await app.batchScrapeUrls(['https://firecrawl.dev', 'https://mendable.ai'], {
|
|
@@ -158,10 +158,10 @@ const batchScrapeResponse = await app.batchScrapeUrls(['https://firecrawl.dev',
|
|
|
158
158
|
|
|
159
159
|
#### Asynchronous batch scrape
|
|
160
160
|
|
|
161
|
-
To initiate an asynchronous batch scrape, utilize the `
|
|
161
|
+
To initiate an asynchronous batch scrape, utilize the `asyncBatchScrapeUrls` method. This method requires the starting URLs and optional parameters as inputs. The params argument enables you to define various settings for the scrape, such as the output formats. Upon successful initiation, this method returns an ID, which is essential for subsequently checking the status of the batch scrape.
|
|
162
162
|
|
|
163
163
|
```js
|
|
164
|
-
const
|
|
164
|
+
const asyncBatchScrapeResult = await app.asyncBatchScrapeUrls(['https://firecrawl.dev', 'https://mendable.ai'], { formats: ['markdown', 'html'] });
|
|
165
165
|
```
|
|
166
166
|
|
|
167
167
|
#### Batch scrape with WebSockets
|
package/dist/index.d.cts
CHANGED
|
@@ -77,6 +77,10 @@ interface CrawlScrapeOptions {
|
|
|
77
77
|
onlyMainContent?: boolean;
|
|
78
78
|
waitFor?: number;
|
|
79
79
|
timeout?: number;
|
|
80
|
+
location?: {
|
|
81
|
+
country?: string;
|
|
82
|
+
languages?: string[];
|
|
83
|
+
};
|
|
80
84
|
}
|
|
81
85
|
type Action = {
|
|
82
86
|
type: "wait";
|
|
@@ -142,6 +146,16 @@ interface CrawlResponse {
|
|
|
142
146
|
success: true;
|
|
143
147
|
error?: string;
|
|
144
148
|
}
|
|
149
|
+
/**
|
|
150
|
+
* Response interface for batch scrape operations.
|
|
151
|
+
* Defines the structure of the response received after initiating a crawl.
|
|
152
|
+
*/
|
|
153
|
+
interface BatchScrapeResponse {
|
|
154
|
+
id?: string;
|
|
155
|
+
url?: string;
|
|
156
|
+
success: true;
|
|
157
|
+
error?: string;
|
|
158
|
+
}
|
|
145
159
|
/**
|
|
146
160
|
* Response interface for job status checks.
|
|
147
161
|
* Provides detailed status of a crawl job including progress and results.
|
|
@@ -156,6 +170,20 @@ interface CrawlStatusResponse {
|
|
|
156
170
|
next?: string;
|
|
157
171
|
data: FirecrawlDocument<undefined>[];
|
|
158
172
|
}
|
|
173
|
+
/**
|
|
174
|
+
* Response interface for batch scrape job status checks.
|
|
175
|
+
* Provides detailed status of a batch scrape job including progress and results.
|
|
176
|
+
*/
|
|
177
|
+
interface BatchScrapeStatusResponse {
|
|
178
|
+
success: true;
|
|
179
|
+
status: "scraping" | "completed" | "failed" | "cancelled";
|
|
180
|
+
completed: number;
|
|
181
|
+
total: number;
|
|
182
|
+
creditsUsed: number;
|
|
183
|
+
expiresAt: Date;
|
|
184
|
+
next?: string;
|
|
185
|
+
data: FirecrawlDocument<undefined>[];
|
|
186
|
+
}
|
|
159
187
|
/**
|
|
160
188
|
* Parameters for mapping operations.
|
|
161
189
|
* Defines options for mapping URLs during a crawl.
|
|
@@ -263,8 +291,8 @@ declare class FirecrawlApp {
|
|
|
263
291
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
264
292
|
* @returns The response from the crawl operation.
|
|
265
293
|
*/
|
|
266
|
-
batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string): Promise<
|
|
267
|
-
asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<
|
|
294
|
+
batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
295
|
+
asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<BatchScrapeResponse | ErrorResponse>;
|
|
268
296
|
/**
|
|
269
297
|
* Initiates a batch scrape job and returns a CrawlWatcher to monitor the job via WebSocket.
|
|
270
298
|
* @param urls - The URL to scrape.
|
|
@@ -279,7 +307,7 @@ declare class FirecrawlApp {
|
|
|
279
307
|
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
|
280
308
|
* @returns The response containing the job status.
|
|
281
309
|
*/
|
|
282
|
-
checkBatchScrapeStatus(id?: string, getAllData?: boolean): Promise<
|
|
310
|
+
checkBatchScrapeStatus(id?: string, getAllData?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
283
311
|
/**
|
|
284
312
|
* Prepares the headers for an API request.
|
|
285
313
|
* @param idempotencyKey - Optional key to ensure idempotency.
|
|
@@ -344,4 +372,4 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
|
|
344
372
|
close(): void;
|
|
345
373
|
}
|
|
346
374
|
|
|
347
|
-
export { type Action, type ActionsResult, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, FirecrawlApp as default };
|
|
375
|
+
export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, FirecrawlApp as default };
|
package/dist/index.d.ts
CHANGED
|
@@ -77,6 +77,10 @@ interface CrawlScrapeOptions {
|
|
|
77
77
|
onlyMainContent?: boolean;
|
|
78
78
|
waitFor?: number;
|
|
79
79
|
timeout?: number;
|
|
80
|
+
location?: {
|
|
81
|
+
country?: string;
|
|
82
|
+
languages?: string[];
|
|
83
|
+
};
|
|
80
84
|
}
|
|
81
85
|
type Action = {
|
|
82
86
|
type: "wait";
|
|
@@ -142,6 +146,16 @@ interface CrawlResponse {
|
|
|
142
146
|
success: true;
|
|
143
147
|
error?: string;
|
|
144
148
|
}
|
|
149
|
+
/**
|
|
150
|
+
* Response interface for batch scrape operations.
|
|
151
|
+
* Defines the structure of the response received after initiating a crawl.
|
|
152
|
+
*/
|
|
153
|
+
interface BatchScrapeResponse {
|
|
154
|
+
id?: string;
|
|
155
|
+
url?: string;
|
|
156
|
+
success: true;
|
|
157
|
+
error?: string;
|
|
158
|
+
}
|
|
145
159
|
/**
|
|
146
160
|
* Response interface for job status checks.
|
|
147
161
|
* Provides detailed status of a crawl job including progress and results.
|
|
@@ -156,6 +170,20 @@ interface CrawlStatusResponse {
|
|
|
156
170
|
next?: string;
|
|
157
171
|
data: FirecrawlDocument<undefined>[];
|
|
158
172
|
}
|
|
173
|
+
/**
|
|
174
|
+
* Response interface for batch scrape job status checks.
|
|
175
|
+
* Provides detailed status of a batch scrape job including progress and results.
|
|
176
|
+
*/
|
|
177
|
+
interface BatchScrapeStatusResponse {
|
|
178
|
+
success: true;
|
|
179
|
+
status: "scraping" | "completed" | "failed" | "cancelled";
|
|
180
|
+
completed: number;
|
|
181
|
+
total: number;
|
|
182
|
+
creditsUsed: number;
|
|
183
|
+
expiresAt: Date;
|
|
184
|
+
next?: string;
|
|
185
|
+
data: FirecrawlDocument<undefined>[];
|
|
186
|
+
}
|
|
159
187
|
/**
|
|
160
188
|
* Parameters for mapping operations.
|
|
161
189
|
* Defines options for mapping URLs during a crawl.
|
|
@@ -263,8 +291,8 @@ declare class FirecrawlApp {
|
|
|
263
291
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
264
292
|
* @returns The response from the crawl operation.
|
|
265
293
|
*/
|
|
266
|
-
batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string): Promise<
|
|
267
|
-
asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<
|
|
294
|
+
batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
295
|
+
asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<BatchScrapeResponse | ErrorResponse>;
|
|
268
296
|
/**
|
|
269
297
|
* Initiates a batch scrape job and returns a CrawlWatcher to monitor the job via WebSocket.
|
|
270
298
|
* @param urls - The URL to scrape.
|
|
@@ -279,7 +307,7 @@ declare class FirecrawlApp {
|
|
|
279
307
|
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
|
280
308
|
* @returns The response containing the job status.
|
|
281
309
|
*/
|
|
282
|
-
checkBatchScrapeStatus(id?: string, getAllData?: boolean): Promise<
|
|
310
|
+
checkBatchScrapeStatus(id?: string, getAllData?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
283
311
|
/**
|
|
284
312
|
* Prepares the headers for an API request.
|
|
285
313
|
* @param idempotencyKey - Optional key to ensure idempotency.
|
|
@@ -344,4 +372,4 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
|
|
344
372
|
close(): void;
|
|
345
373
|
}
|
|
346
374
|
|
|
347
|
-
export { type Action, type ActionsResult, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, FirecrawlApp as default };
|
|
375
|
+
export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, FirecrawlApp as default };
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -82,6 +82,10 @@ export interface CrawlScrapeOptions {
|
|
|
82
82
|
onlyMainContent?: boolean;
|
|
83
83
|
waitFor?: number;
|
|
84
84
|
timeout?: number;
|
|
85
|
+
location?: {
|
|
86
|
+
country?: string;
|
|
87
|
+
languages?: string[];
|
|
88
|
+
};
|
|
85
89
|
}
|
|
86
90
|
|
|
87
91
|
export type Action = {
|
|
@@ -154,6 +158,17 @@ export interface CrawlResponse {
|
|
|
154
158
|
error?: string;
|
|
155
159
|
}
|
|
156
160
|
|
|
161
|
+
/**
|
|
162
|
+
* Response interface for batch scrape operations.
|
|
163
|
+
* Defines the structure of the response received after initiating a crawl.
|
|
164
|
+
*/
|
|
165
|
+
export interface BatchScrapeResponse {
|
|
166
|
+
id?: string;
|
|
167
|
+
url?: string;
|
|
168
|
+
success: true;
|
|
169
|
+
error?: string;
|
|
170
|
+
}
|
|
171
|
+
|
|
157
172
|
/**
|
|
158
173
|
* Response interface for job status checks.
|
|
159
174
|
* Provides detailed status of a crawl job including progress and results.
|
|
@@ -169,6 +184,21 @@ export interface CrawlStatusResponse {
|
|
|
169
184
|
data: FirecrawlDocument<undefined>[];
|
|
170
185
|
};
|
|
171
186
|
|
|
187
|
+
/**
|
|
188
|
+
* Response interface for batch scrape job status checks.
|
|
189
|
+
* Provides detailed status of a batch scrape job including progress and results.
|
|
190
|
+
*/
|
|
191
|
+
export interface BatchScrapeStatusResponse {
|
|
192
|
+
success: true;
|
|
193
|
+
status: "scraping" | "completed" | "failed" | "cancelled";
|
|
194
|
+
completed: number;
|
|
195
|
+
total: number;
|
|
196
|
+
creditsUsed: number;
|
|
197
|
+
expiresAt: Date;
|
|
198
|
+
next?: string;
|
|
199
|
+
data: FirecrawlDocument<undefined>[];
|
|
200
|
+
};
|
|
201
|
+
|
|
172
202
|
/**
|
|
173
203
|
* Parameters for mapping operations.
|
|
174
204
|
* Defines options for mapping URLs during a crawl.
|
|
@@ -506,7 +536,7 @@ export default class FirecrawlApp {
|
|
|
506
536
|
params?: ScrapeParams,
|
|
507
537
|
pollInterval: number = 2,
|
|
508
538
|
idempotencyKey?: string
|
|
509
|
-
): Promise<
|
|
539
|
+
): Promise<BatchScrapeStatusResponse | ErrorResponse> {
|
|
510
540
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
511
541
|
let jsonData: any = { urls, ...(params ?? {}) };
|
|
512
542
|
try {
|
|
@@ -535,7 +565,7 @@ export default class FirecrawlApp {
|
|
|
535
565
|
urls: string[],
|
|
536
566
|
params?: ScrapeParams,
|
|
537
567
|
idempotencyKey?: string
|
|
538
|
-
): Promise<
|
|
568
|
+
): Promise<BatchScrapeResponse | ErrorResponse> {
|
|
539
569
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
540
570
|
let jsonData: any = { urls, ...(params ?? {}) };
|
|
541
571
|
try {
|
|
@@ -587,7 +617,7 @@ export default class FirecrawlApp {
|
|
|
587
617
|
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
|
588
618
|
* @returns The response containing the job status.
|
|
589
619
|
*/
|
|
590
|
-
async checkBatchScrapeStatus(id?: string, getAllData = false): Promise<
|
|
620
|
+
async checkBatchScrapeStatus(id?: string, getAllData = false): Promise<BatchScrapeStatusResponse | ErrorResponse> {
|
|
591
621
|
if (!id) {
|
|
592
622
|
throw new FirecrawlError("No batch scrape ID provided", 400);
|
|
593
623
|
}
|