firecrawl 1.10.0 → 1.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +98 -34
- package/dist/index.d.cts +32 -5
- package/dist/index.d.ts +32 -5
- package/dist/index.js +98 -34
- package/package.json +1 -1
- package/src/__tests__/v1/e2e_withAuth/index.test.ts +39 -2
- package/src/index.ts +140 -42
package/dist/index.cjs
CHANGED
|
@@ -117,13 +117,73 @@ var FirecrawlApp = class {
|
|
|
117
117
|
return { success: false, error: "Internal server error." };
|
|
118
118
|
}
|
|
119
119
|
/**
|
|
120
|
-
*
|
|
120
|
+
* Searches using the Firecrawl API and optionally scrapes the results.
|
|
121
121
|
* @param query - The search query string.
|
|
122
|
-
* @param params -
|
|
123
|
-
* @returns
|
|
122
|
+
* @param params - Optional parameters for the search request.
|
|
123
|
+
* @returns The response from the search operation.
|
|
124
124
|
*/
|
|
125
125
|
async search(query, params) {
|
|
126
|
-
|
|
126
|
+
const headers = {
|
|
127
|
+
"Content-Type": "application/json",
|
|
128
|
+
Authorization: `Bearer ${this.apiKey}`
|
|
129
|
+
};
|
|
130
|
+
let jsonData = {
|
|
131
|
+
query,
|
|
132
|
+
limit: params?.limit ?? 5,
|
|
133
|
+
tbs: params?.tbs,
|
|
134
|
+
filter: params?.filter,
|
|
135
|
+
lang: params?.lang ?? "en",
|
|
136
|
+
country: params?.country ?? "us",
|
|
137
|
+
location: params?.location,
|
|
138
|
+
origin: params?.origin ?? "api",
|
|
139
|
+
timeout: params?.timeout ?? 6e4,
|
|
140
|
+
scrapeOptions: params?.scrapeOptions ?? { formats: [] }
|
|
141
|
+
};
|
|
142
|
+
if (jsonData?.scrapeOptions?.extract?.schema) {
|
|
143
|
+
let schema = jsonData.scrapeOptions.extract.schema;
|
|
144
|
+
try {
|
|
145
|
+
schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
|
|
146
|
+
} catch (error) {
|
|
147
|
+
}
|
|
148
|
+
jsonData = {
|
|
149
|
+
...jsonData,
|
|
150
|
+
scrapeOptions: {
|
|
151
|
+
...jsonData.scrapeOptions,
|
|
152
|
+
extract: {
|
|
153
|
+
...jsonData.scrapeOptions.extract,
|
|
154
|
+
schema
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
try {
|
|
160
|
+
const response = await this.postRequest(
|
|
161
|
+
this.apiUrl + `/v1/search`,
|
|
162
|
+
jsonData,
|
|
163
|
+
headers
|
|
164
|
+
);
|
|
165
|
+
if (response.status === 200) {
|
|
166
|
+
const responseData = response.data;
|
|
167
|
+
if (responseData.success) {
|
|
168
|
+
return {
|
|
169
|
+
success: true,
|
|
170
|
+
data: responseData.data,
|
|
171
|
+
warning: responseData.warning
|
|
172
|
+
};
|
|
173
|
+
} else {
|
|
174
|
+
throw new FirecrawlError(`Failed to search. Error: ${responseData.error}`, response.status);
|
|
175
|
+
}
|
|
176
|
+
} else {
|
|
177
|
+
this.handleError(response, "search");
|
|
178
|
+
}
|
|
179
|
+
} catch (error) {
|
|
180
|
+
if (error.response?.data?.error) {
|
|
181
|
+
throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status);
|
|
182
|
+
} else {
|
|
183
|
+
throw new FirecrawlError(error.message, 500);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
return { success: false, error: "Internal server error.", data: [] };
|
|
127
187
|
}
|
|
128
188
|
/**
|
|
129
189
|
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
@@ -202,7 +262,7 @@ var FirecrawlApp = class {
|
|
|
202
262
|
let statusData = response.data;
|
|
203
263
|
if ("data" in statusData) {
|
|
204
264
|
let data = statusData.data;
|
|
205
|
-
while ("next" in statusData) {
|
|
265
|
+
while (typeof statusData === "object" && "next" in statusData) {
|
|
206
266
|
statusData = (await this.getRequest(statusData.next, headers)).data;
|
|
207
267
|
data = data.concat(statusData.data);
|
|
208
268
|
}
|
|
@@ -397,7 +457,7 @@ var FirecrawlApp = class {
|
|
|
397
457
|
let statusData = response.data;
|
|
398
458
|
if ("data" in statusData) {
|
|
399
459
|
let data = statusData.data;
|
|
400
|
-
while ("next" in statusData) {
|
|
460
|
+
while (typeof statusData === "object" && "next" in statusData) {
|
|
401
461
|
statusData = (await this.getRequest(statusData.next, headers)).data;
|
|
402
462
|
data = data.concat(statusData.data);
|
|
403
463
|
}
|
|
@@ -533,40 +593,44 @@ var FirecrawlApp = class {
|
|
|
533
593
|
* @returns The final job status or data.
|
|
534
594
|
*/
|
|
535
595
|
async monitorJobStatus(id, headers, checkInterval) {
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
if ("
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
596
|
+
try {
|
|
597
|
+
while (true) {
|
|
598
|
+
let statusResponse = await this.getRequest(
|
|
599
|
+
`${this.apiUrl}/v1/crawl/${id}`,
|
|
600
|
+
headers
|
|
601
|
+
);
|
|
602
|
+
if (statusResponse.status === 200) {
|
|
603
|
+
let statusData = statusResponse.data;
|
|
604
|
+
if (statusData.status === "completed") {
|
|
605
|
+
if ("data" in statusData) {
|
|
606
|
+
let data = statusData.data;
|
|
607
|
+
while (typeof statusData === "object" && "next" in statusData) {
|
|
608
|
+
statusResponse = await this.getRequest(statusData.next, headers);
|
|
609
|
+
statusData = statusResponse.data;
|
|
610
|
+
data = data.concat(statusData.data);
|
|
611
|
+
}
|
|
612
|
+
statusData.data = data;
|
|
613
|
+
return statusData;
|
|
614
|
+
} else {
|
|
615
|
+
throw new FirecrawlError("Crawl job completed but no data was returned", 500);
|
|
550
616
|
}
|
|
551
|
-
|
|
552
|
-
|
|
617
|
+
} else if (["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)) {
|
|
618
|
+
checkInterval = Math.max(checkInterval, 2);
|
|
619
|
+
await new Promise(
|
|
620
|
+
(resolve) => setTimeout(resolve, checkInterval * 1e3)
|
|
621
|
+
);
|
|
553
622
|
} else {
|
|
554
|
-
throw new FirecrawlError(
|
|
623
|
+
throw new FirecrawlError(
|
|
624
|
+
`Crawl job failed or was stopped. Status: ${statusData.status}`,
|
|
625
|
+
500
|
|
626
|
+
);
|
|
555
627
|
}
|
|
556
|
-
} else if (["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)) {
|
|
557
|
-
checkInterval = Math.max(checkInterval, 2);
|
|
558
|
-
await new Promise(
|
|
559
|
-
(resolve) => setTimeout(resolve, checkInterval * 1e3)
|
|
560
|
-
);
|
|
561
628
|
} else {
|
|
562
|
-
|
|
563
|
-
`Crawl job failed or was stopped. Status: ${statusData.status}`,
|
|
564
|
-
500
|
|
565
|
-
);
|
|
629
|
+
this.handleError(statusResponse, "check crawl status");
|
|
566
630
|
}
|
|
567
|
-
} else {
|
|
568
|
-
this.handleError(statusResponse, "check crawl status");
|
|
569
631
|
}
|
|
632
|
+
} catch (error) {
|
|
633
|
+
throw new FirecrawlError(error, 500);
|
|
570
634
|
}
|
|
571
635
|
}
|
|
572
636
|
/**
|
package/dist/index.d.cts
CHANGED
|
@@ -64,6 +64,8 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
64
64
|
screenshot?: string;
|
|
65
65
|
metadata?: FirecrawlDocumentMetadata;
|
|
66
66
|
actions: ActionsSchema;
|
|
67
|
+
title?: string;
|
|
68
|
+
description?: string;
|
|
67
69
|
}
|
|
68
70
|
/**
|
|
69
71
|
* Parameters for scraping operations.
|
|
@@ -258,6 +260,31 @@ declare class FirecrawlError extends Error {
|
|
|
258
260
|
statusCode: number;
|
|
259
261
|
constructor(message: string, statusCode: number);
|
|
260
262
|
}
|
|
263
|
+
/**
|
|
264
|
+
* Parameters for search operations.
|
|
265
|
+
* Defines options for searching and scraping search results.
|
|
266
|
+
*/
|
|
267
|
+
interface SearchParams {
|
|
268
|
+
limit?: number;
|
|
269
|
+
tbs?: string;
|
|
270
|
+
filter?: string;
|
|
271
|
+
lang?: string;
|
|
272
|
+
country?: string;
|
|
273
|
+
location?: string;
|
|
274
|
+
origin?: string;
|
|
275
|
+
timeout?: number;
|
|
276
|
+
scrapeOptions?: ScrapeParams;
|
|
277
|
+
}
|
|
278
|
+
/**
|
|
279
|
+
* Response interface for search operations.
|
|
280
|
+
* Defines the structure of the response received after a search operation.
|
|
281
|
+
*/
|
|
282
|
+
interface SearchResponse {
|
|
283
|
+
success: boolean;
|
|
284
|
+
data: FirecrawlDocument<undefined>[];
|
|
285
|
+
warning?: string;
|
|
286
|
+
error?: string;
|
|
287
|
+
}
|
|
261
288
|
/**
|
|
262
289
|
* Main class for interacting with the Firecrawl API.
|
|
263
290
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
@@ -279,12 +306,12 @@ declare class FirecrawlApp {
|
|
|
279
306
|
*/
|
|
280
307
|
scrapeUrl<T extends zt.ZodSchema, ActionsSchema extends (Action[] | undefined) = undefined>(url: string, params?: ScrapeParams<T, ActionsSchema>): Promise<ScrapeResponse<zt.infer<T>, ActionsSchema extends Action[] ? ActionsResult : never> | ErrorResponse>;
|
|
281
308
|
/**
|
|
282
|
-
*
|
|
309
|
+
* Searches using the Firecrawl API and optionally scrapes the results.
|
|
283
310
|
* @param query - The search query string.
|
|
284
|
-
* @param params -
|
|
285
|
-
* @returns
|
|
311
|
+
* @param params - Optional parameters for the search request.
|
|
312
|
+
* @returns The response from the search operation.
|
|
286
313
|
*/
|
|
287
|
-
search(query: string, params?: any): Promise<
|
|
314
|
+
search(query: string, params?: SearchParams | Record<string, any>): Promise<SearchResponse>;
|
|
288
315
|
/**
|
|
289
316
|
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
290
317
|
* @param url - The URL to crawl.
|
|
@@ -422,4 +449,4 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
|
|
422
449
|
close(): void;
|
|
423
450
|
}
|
|
424
451
|
|
|
425
|
-
export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, FirecrawlApp as default };
|
|
452
|
+
export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
|
package/dist/index.d.ts
CHANGED
|
@@ -64,6 +64,8 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
64
64
|
screenshot?: string;
|
|
65
65
|
metadata?: FirecrawlDocumentMetadata;
|
|
66
66
|
actions: ActionsSchema;
|
|
67
|
+
title?: string;
|
|
68
|
+
description?: string;
|
|
67
69
|
}
|
|
68
70
|
/**
|
|
69
71
|
* Parameters for scraping operations.
|
|
@@ -258,6 +260,31 @@ declare class FirecrawlError extends Error {
|
|
|
258
260
|
statusCode: number;
|
|
259
261
|
constructor(message: string, statusCode: number);
|
|
260
262
|
}
|
|
263
|
+
/**
|
|
264
|
+
* Parameters for search operations.
|
|
265
|
+
* Defines options for searching and scraping search results.
|
|
266
|
+
*/
|
|
267
|
+
interface SearchParams {
|
|
268
|
+
limit?: number;
|
|
269
|
+
tbs?: string;
|
|
270
|
+
filter?: string;
|
|
271
|
+
lang?: string;
|
|
272
|
+
country?: string;
|
|
273
|
+
location?: string;
|
|
274
|
+
origin?: string;
|
|
275
|
+
timeout?: number;
|
|
276
|
+
scrapeOptions?: ScrapeParams;
|
|
277
|
+
}
|
|
278
|
+
/**
|
|
279
|
+
* Response interface for search operations.
|
|
280
|
+
* Defines the structure of the response received after a search operation.
|
|
281
|
+
*/
|
|
282
|
+
interface SearchResponse {
|
|
283
|
+
success: boolean;
|
|
284
|
+
data: FirecrawlDocument<undefined>[];
|
|
285
|
+
warning?: string;
|
|
286
|
+
error?: string;
|
|
287
|
+
}
|
|
261
288
|
/**
|
|
262
289
|
* Main class for interacting with the Firecrawl API.
|
|
263
290
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
@@ -279,12 +306,12 @@ declare class FirecrawlApp {
|
|
|
279
306
|
*/
|
|
280
307
|
scrapeUrl<T extends zt.ZodSchema, ActionsSchema extends (Action[] | undefined) = undefined>(url: string, params?: ScrapeParams<T, ActionsSchema>): Promise<ScrapeResponse<zt.infer<T>, ActionsSchema extends Action[] ? ActionsResult : never> | ErrorResponse>;
|
|
281
308
|
/**
|
|
282
|
-
*
|
|
309
|
+
* Searches using the Firecrawl API and optionally scrapes the results.
|
|
283
310
|
* @param query - The search query string.
|
|
284
|
-
* @param params -
|
|
285
|
-
* @returns
|
|
311
|
+
* @param params - Optional parameters for the search request.
|
|
312
|
+
* @returns The response from the search operation.
|
|
286
313
|
*/
|
|
287
|
-
search(query: string, params?: any): Promise<
|
|
314
|
+
search(query: string, params?: SearchParams | Record<string, any>): Promise<SearchResponse>;
|
|
288
315
|
/**
|
|
289
316
|
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
290
317
|
* @param url - The URL to crawl.
|
|
@@ -422,4 +449,4 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
|
|
422
449
|
close(): void;
|
|
423
450
|
}
|
|
424
451
|
|
|
425
|
-
export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, FirecrawlApp as default };
|
|
452
|
+
export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
|
package/dist/index.js
CHANGED
|
@@ -81,13 +81,73 @@ var FirecrawlApp = class {
|
|
|
81
81
|
return { success: false, error: "Internal server error." };
|
|
82
82
|
}
|
|
83
83
|
/**
|
|
84
|
-
*
|
|
84
|
+
* Searches using the Firecrawl API and optionally scrapes the results.
|
|
85
85
|
* @param query - The search query string.
|
|
86
|
-
* @param params -
|
|
87
|
-
* @returns
|
|
86
|
+
* @param params - Optional parameters for the search request.
|
|
87
|
+
* @returns The response from the search operation.
|
|
88
88
|
*/
|
|
89
89
|
async search(query, params) {
|
|
90
|
-
|
|
90
|
+
const headers = {
|
|
91
|
+
"Content-Type": "application/json",
|
|
92
|
+
Authorization: `Bearer ${this.apiKey}`
|
|
93
|
+
};
|
|
94
|
+
let jsonData = {
|
|
95
|
+
query,
|
|
96
|
+
limit: params?.limit ?? 5,
|
|
97
|
+
tbs: params?.tbs,
|
|
98
|
+
filter: params?.filter,
|
|
99
|
+
lang: params?.lang ?? "en",
|
|
100
|
+
country: params?.country ?? "us",
|
|
101
|
+
location: params?.location,
|
|
102
|
+
origin: params?.origin ?? "api",
|
|
103
|
+
timeout: params?.timeout ?? 6e4,
|
|
104
|
+
scrapeOptions: params?.scrapeOptions ?? { formats: [] }
|
|
105
|
+
};
|
|
106
|
+
if (jsonData?.scrapeOptions?.extract?.schema) {
|
|
107
|
+
let schema = jsonData.scrapeOptions.extract.schema;
|
|
108
|
+
try {
|
|
109
|
+
schema = zodToJsonSchema(schema);
|
|
110
|
+
} catch (error) {
|
|
111
|
+
}
|
|
112
|
+
jsonData = {
|
|
113
|
+
...jsonData,
|
|
114
|
+
scrapeOptions: {
|
|
115
|
+
...jsonData.scrapeOptions,
|
|
116
|
+
extract: {
|
|
117
|
+
...jsonData.scrapeOptions.extract,
|
|
118
|
+
schema
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
try {
|
|
124
|
+
const response = await this.postRequest(
|
|
125
|
+
this.apiUrl + `/v1/search`,
|
|
126
|
+
jsonData,
|
|
127
|
+
headers
|
|
128
|
+
);
|
|
129
|
+
if (response.status === 200) {
|
|
130
|
+
const responseData = response.data;
|
|
131
|
+
if (responseData.success) {
|
|
132
|
+
return {
|
|
133
|
+
success: true,
|
|
134
|
+
data: responseData.data,
|
|
135
|
+
warning: responseData.warning
|
|
136
|
+
};
|
|
137
|
+
} else {
|
|
138
|
+
throw new FirecrawlError(`Failed to search. Error: ${responseData.error}`, response.status);
|
|
139
|
+
}
|
|
140
|
+
} else {
|
|
141
|
+
this.handleError(response, "search");
|
|
142
|
+
}
|
|
143
|
+
} catch (error) {
|
|
144
|
+
if (error.response?.data?.error) {
|
|
145
|
+
throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status);
|
|
146
|
+
} else {
|
|
147
|
+
throw new FirecrawlError(error.message, 500);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
return { success: false, error: "Internal server error.", data: [] };
|
|
91
151
|
}
|
|
92
152
|
/**
|
|
93
153
|
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
@@ -166,7 +226,7 @@ var FirecrawlApp = class {
|
|
|
166
226
|
let statusData = response.data;
|
|
167
227
|
if ("data" in statusData) {
|
|
168
228
|
let data = statusData.data;
|
|
169
|
-
while ("next" in statusData) {
|
|
229
|
+
while (typeof statusData === "object" && "next" in statusData) {
|
|
170
230
|
statusData = (await this.getRequest(statusData.next, headers)).data;
|
|
171
231
|
data = data.concat(statusData.data);
|
|
172
232
|
}
|
|
@@ -361,7 +421,7 @@ var FirecrawlApp = class {
|
|
|
361
421
|
let statusData = response.data;
|
|
362
422
|
if ("data" in statusData) {
|
|
363
423
|
let data = statusData.data;
|
|
364
|
-
while ("next" in statusData) {
|
|
424
|
+
while (typeof statusData === "object" && "next" in statusData) {
|
|
365
425
|
statusData = (await this.getRequest(statusData.next, headers)).data;
|
|
366
426
|
data = data.concat(statusData.data);
|
|
367
427
|
}
|
|
@@ -497,40 +557,44 @@ var FirecrawlApp = class {
|
|
|
497
557
|
* @returns The final job status or data.
|
|
498
558
|
*/
|
|
499
559
|
async monitorJobStatus(id, headers, checkInterval) {
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
if ("
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
560
|
+
try {
|
|
561
|
+
while (true) {
|
|
562
|
+
let statusResponse = await this.getRequest(
|
|
563
|
+
`${this.apiUrl}/v1/crawl/${id}`,
|
|
564
|
+
headers
|
|
565
|
+
);
|
|
566
|
+
if (statusResponse.status === 200) {
|
|
567
|
+
let statusData = statusResponse.data;
|
|
568
|
+
if (statusData.status === "completed") {
|
|
569
|
+
if ("data" in statusData) {
|
|
570
|
+
let data = statusData.data;
|
|
571
|
+
while (typeof statusData === "object" && "next" in statusData) {
|
|
572
|
+
statusResponse = await this.getRequest(statusData.next, headers);
|
|
573
|
+
statusData = statusResponse.data;
|
|
574
|
+
data = data.concat(statusData.data);
|
|
575
|
+
}
|
|
576
|
+
statusData.data = data;
|
|
577
|
+
return statusData;
|
|
578
|
+
} else {
|
|
579
|
+
throw new FirecrawlError("Crawl job completed but no data was returned", 500);
|
|
514
580
|
}
|
|
515
|
-
|
|
516
|
-
|
|
581
|
+
} else if (["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)) {
|
|
582
|
+
checkInterval = Math.max(checkInterval, 2);
|
|
583
|
+
await new Promise(
|
|
584
|
+
(resolve) => setTimeout(resolve, checkInterval * 1e3)
|
|
585
|
+
);
|
|
517
586
|
} else {
|
|
518
|
-
throw new FirecrawlError(
|
|
587
|
+
throw new FirecrawlError(
|
|
588
|
+
`Crawl job failed or was stopped. Status: ${statusData.status}`,
|
|
589
|
+
500
|
|
590
|
+
);
|
|
519
591
|
}
|
|
520
|
-
} else if (["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)) {
|
|
521
|
-
checkInterval = Math.max(checkInterval, 2);
|
|
522
|
-
await new Promise(
|
|
523
|
-
(resolve) => setTimeout(resolve, checkInterval * 1e3)
|
|
524
|
-
);
|
|
525
592
|
} else {
|
|
526
|
-
|
|
527
|
-
`Crawl job failed or was stopped. Status: ${statusData.status}`,
|
|
528
|
-
500
|
|
529
|
-
);
|
|
593
|
+
this.handleError(statusResponse, "check crawl status");
|
|
530
594
|
}
|
|
531
|
-
} else {
|
|
532
|
-
this.handleError(statusResponse, "check crawl status");
|
|
533
595
|
}
|
|
596
|
+
} catch (error) {
|
|
597
|
+
throw new FirecrawlError(error, 500);
|
|
534
598
|
}
|
|
535
599
|
}
|
|
536
600
|
/**
|
package/package.json
CHANGED
|
@@ -381,8 +381,45 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
381
381
|
expect(filteredLinks?.length).toBeGreaterThan(0);
|
|
382
382
|
}, 30000); // 30 seconds timeout
|
|
383
383
|
|
|
384
|
-
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
test('should search with string query', async () => {
|
|
385
387
|
const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: TEST_API_KEY });
|
|
386
|
-
await
|
|
388
|
+
const response = await app.search("firecrawl");
|
|
389
|
+
expect(response.success).toBe(true);
|
|
390
|
+
console.log(response.data);
|
|
391
|
+
expect(response.data?.length).toBeGreaterThan(0);
|
|
392
|
+
expect(response.data?.[0]?.markdown).toBeDefined();
|
|
393
|
+
expect(response.data?.[0]?.metadata).toBeDefined();
|
|
394
|
+
expect(response.data?.[0]?.metadata?.title).toBeDefined();
|
|
395
|
+
expect(response.data?.[0]?.metadata?.description).toBeDefined();
|
|
396
|
+
});
|
|
397
|
+
|
|
398
|
+
test('should search with params object', async () => {
|
|
399
|
+
const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: TEST_API_KEY });
|
|
400
|
+
const response = await app.search("firecrawl", {
|
|
401
|
+
limit: 3,
|
|
402
|
+
lang: 'en',
|
|
403
|
+
country: 'us',
|
|
404
|
+
scrapeOptions: {
|
|
405
|
+
formats: ['markdown', 'html', 'links'],
|
|
406
|
+
onlyMainContent: true
|
|
407
|
+
}
|
|
408
|
+
});
|
|
409
|
+
expect(response.success).toBe(true);
|
|
410
|
+
expect(response.data.length).toBeLessThanOrEqual(3);
|
|
411
|
+
for (const doc of response.data) {
|
|
412
|
+
expect(doc.markdown).toBeDefined();
|
|
413
|
+
expect(doc.html).toBeDefined();
|
|
414
|
+
expect(doc.links).toBeDefined();
|
|
415
|
+
expect(doc.metadata).toBeDefined();
|
|
416
|
+
expect(doc.metadata?.title).toBeDefined();
|
|
417
|
+
expect(doc.metadata?.description).toBeDefined();
|
|
418
|
+
}
|
|
419
|
+
});
|
|
420
|
+
|
|
421
|
+
test('should handle invalid API key for search', async () => {
|
|
422
|
+
const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: "invalid_api_key" });
|
|
423
|
+
await expect(app.search("test query")).rejects.toThrow("Request failed with status code 404");
|
|
387
424
|
});
|
|
388
425
|
});
|
package/src/index.ts
CHANGED
|
@@ -68,6 +68,9 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
|
|
|
68
68
|
screenshot?: string;
|
|
69
69
|
metadata?: FirecrawlDocumentMetadata;
|
|
70
70
|
actions: ActionsSchema;
|
|
71
|
+
// v1 search only
|
|
72
|
+
title?: string;
|
|
73
|
+
description?: string;
|
|
71
74
|
}
|
|
72
75
|
|
|
73
76
|
/**
|
|
@@ -282,6 +285,33 @@ export class FirecrawlError extends Error {
|
|
|
282
285
|
}
|
|
283
286
|
}
|
|
284
287
|
|
|
288
|
+
/**
|
|
289
|
+
* Parameters for search operations.
|
|
290
|
+
* Defines options for searching and scraping search results.
|
|
291
|
+
*/
|
|
292
|
+
export interface SearchParams {
|
|
293
|
+
limit?: number;
|
|
294
|
+
tbs?: string;
|
|
295
|
+
filter?: string;
|
|
296
|
+
lang?: string;
|
|
297
|
+
country?: string;
|
|
298
|
+
location?: string;
|
|
299
|
+
origin?: string;
|
|
300
|
+
timeout?: number;
|
|
301
|
+
scrapeOptions?: ScrapeParams;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Response interface for search operations.
|
|
306
|
+
* Defines the structure of the response received after a search operation.
|
|
307
|
+
*/
|
|
308
|
+
export interface SearchResponse {
|
|
309
|
+
success: boolean;
|
|
310
|
+
data: FirecrawlDocument<undefined>[];
|
|
311
|
+
warning?: string;
|
|
312
|
+
error?: string;
|
|
313
|
+
}
|
|
314
|
+
|
|
285
315
|
/**
|
|
286
316
|
* Main class for interacting with the Firecrawl API.
|
|
287
317
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
@@ -369,16 +399,80 @@ export default class FirecrawlApp {
|
|
|
369
399
|
}
|
|
370
400
|
|
|
371
401
|
/**
|
|
372
|
-
*
|
|
402
|
+
* Searches using the Firecrawl API and optionally scrapes the results.
|
|
373
403
|
* @param query - The search query string.
|
|
374
|
-
* @param params -
|
|
375
|
-
* @returns
|
|
404
|
+
* @param params - Optional parameters for the search request.
|
|
405
|
+
* @returns The response from the search operation.
|
|
376
406
|
*/
|
|
377
|
-
async search(
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
407
|
+
async search(query: string, params?: SearchParams | Record<string, any>): Promise<SearchResponse> {
|
|
408
|
+
const headers: AxiosRequestHeaders = {
|
|
409
|
+
"Content-Type": "application/json",
|
|
410
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
411
|
+
} as AxiosRequestHeaders;
|
|
412
|
+
|
|
413
|
+
let jsonData: any = {
|
|
414
|
+
query,
|
|
415
|
+
limit: params?.limit ?? 5,
|
|
416
|
+
tbs: params?.tbs,
|
|
417
|
+
filter: params?.filter,
|
|
418
|
+
lang: params?.lang ?? "en",
|
|
419
|
+
country: params?.country ?? "us",
|
|
420
|
+
location: params?.location,
|
|
421
|
+
origin: params?.origin ?? "api",
|
|
422
|
+
timeout: params?.timeout ?? 60000,
|
|
423
|
+
scrapeOptions: params?.scrapeOptions ?? { formats: [] },
|
|
424
|
+
};
|
|
425
|
+
|
|
426
|
+
if (jsonData?.scrapeOptions?.extract?.schema) {
|
|
427
|
+
let schema = jsonData.scrapeOptions.extract.schema;
|
|
428
|
+
|
|
429
|
+
// Try parsing the schema as a Zod schema
|
|
430
|
+
try {
|
|
431
|
+
schema = zodToJsonSchema(schema);
|
|
432
|
+
} catch (error) {
|
|
433
|
+
|
|
434
|
+
}
|
|
435
|
+
jsonData = {
|
|
436
|
+
...jsonData,
|
|
437
|
+
scrapeOptions: {
|
|
438
|
+
...jsonData.scrapeOptions,
|
|
439
|
+
extract: {
|
|
440
|
+
...jsonData.scrapeOptions.extract,
|
|
441
|
+
schema: schema,
|
|
442
|
+
},
|
|
443
|
+
},
|
|
444
|
+
};
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
try {
|
|
448
|
+
const response: AxiosResponse = await this.postRequest(
|
|
449
|
+
this.apiUrl + `/v1/search`,
|
|
450
|
+
jsonData,
|
|
451
|
+
headers
|
|
452
|
+
);
|
|
453
|
+
|
|
454
|
+
if (response.status === 200) {
|
|
455
|
+
const responseData = response.data;
|
|
456
|
+
if (responseData.success) {
|
|
457
|
+
return {
|
|
458
|
+
success: true,
|
|
459
|
+
data: responseData.data as FirecrawlDocument<any>[],
|
|
460
|
+
warning: responseData.warning,
|
|
461
|
+
};
|
|
462
|
+
} else {
|
|
463
|
+
throw new FirecrawlError(`Failed to search. Error: ${responseData.error}`, response.status);
|
|
464
|
+
}
|
|
465
|
+
} else {
|
|
466
|
+
this.handleError(response, "search");
|
|
467
|
+
}
|
|
468
|
+
} catch (error: any) {
|
|
469
|
+
if (error.response?.data?.error) {
|
|
470
|
+
throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
|
|
471
|
+
} else {
|
|
472
|
+
throw new FirecrawlError(error.message, 500);
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
return { success: false, error: "Internal server error.", data: [] };
|
|
382
476
|
}
|
|
383
477
|
|
|
384
478
|
/**
|
|
@@ -470,7 +564,7 @@ export default class FirecrawlApp {
|
|
|
470
564
|
let statusData = response.data
|
|
471
565
|
if ("data" in statusData) {
|
|
472
566
|
let data = statusData.data;
|
|
473
|
-
while ('next' in statusData) {
|
|
567
|
+
while (typeof statusData === 'object' && 'next' in statusData) {
|
|
474
568
|
statusData = (await this.getRequest(statusData.next, headers)).data;
|
|
475
569
|
data = data.concat(statusData.data);
|
|
476
570
|
}
|
|
@@ -704,7 +798,7 @@ export default class FirecrawlApp {
|
|
|
704
798
|
let statusData = response.data
|
|
705
799
|
if ("data" in statusData) {
|
|
706
800
|
let data = statusData.data;
|
|
707
|
-
while ('next' in statusData) {
|
|
801
|
+
while (typeof statusData === 'object' && 'next' in statusData) {
|
|
708
802
|
statusData = (await this.getRequest(statusData.next, headers)).data;
|
|
709
803
|
data = data.concat(statusData.data);
|
|
710
804
|
}
|
|
@@ -863,42 +957,46 @@ export default class FirecrawlApp {
|
|
|
863
957
|
headers: AxiosRequestHeaders,
|
|
864
958
|
checkInterval: number
|
|
865
959
|
): Promise<CrawlStatusResponse | ErrorResponse> {
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
if ("
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
960
|
+
try {
|
|
961
|
+
while (true) {
|
|
962
|
+
let statusResponse: AxiosResponse = await this.getRequest(
|
|
963
|
+
`${this.apiUrl}/v1/crawl/${id}`,
|
|
964
|
+
headers
|
|
965
|
+
);
|
|
966
|
+
if (statusResponse.status === 200) {
|
|
967
|
+
let statusData = statusResponse.data;
|
|
968
|
+
if (statusData.status === "completed") {
|
|
969
|
+
if ("data" in statusData) {
|
|
970
|
+
let data = statusData.data;
|
|
971
|
+
while (typeof statusData === 'object' && 'next' in statusData) {
|
|
972
|
+
statusResponse = await this.getRequest(statusData.next, headers);
|
|
973
|
+
statusData = statusResponse.data;
|
|
974
|
+
data = data.concat(statusData.data);
|
|
975
|
+
}
|
|
976
|
+
statusData.data = data;
|
|
977
|
+
return statusData;
|
|
978
|
+
} else {
|
|
979
|
+
throw new FirecrawlError("Crawl job completed but no data was returned", 500);
|
|
880
980
|
}
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
981
|
+
} else if (
|
|
982
|
+
["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)
|
|
983
|
+
) {
|
|
984
|
+
checkInterval = Math.max(checkInterval, 2);
|
|
985
|
+
await new Promise((resolve) =>
|
|
986
|
+
setTimeout(resolve, checkInterval * 1000)
|
|
987
|
+
);
|
|
988
|
+
} else {
|
|
989
|
+
throw new FirecrawlError(
|
|
990
|
+
`Crawl job failed or was stopped. Status: ${statusData.status}`,
|
|
991
|
+
500
|
|
992
|
+
);
|
|
993
|
+
}
|
|
893
994
|
} else {
|
|
894
|
-
|
|
895
|
-
`Crawl job failed or was stopped. Status: ${statusData.status}`,
|
|
896
|
-
500
|
|
897
|
-
);
|
|
995
|
+
this.handleError(statusResponse, "check crawl status");
|
|
898
996
|
}
|
|
899
|
-
} else {
|
|
900
|
-
this.handleError(statusResponse, "check crawl status");
|
|
901
997
|
}
|
|
998
|
+
} catch (error: any) {
|
|
999
|
+
throw new FirecrawlError(error, 500);
|
|
902
1000
|
}
|
|
903
1001
|
}
|
|
904
1002
|
|