@mendable/firecrawl-js 1.0.4 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -9
- package/build/cjs/index.js +140 -133
- package/build/esm/index.js +138 -133
- package/package.json +3 -1
- package/src/__tests__/e2e_withAuth/index.test.ts +0 -1
- package/src/index.ts +223 -306
- package/types/index.d.ts +54 -162
package/types/index.d.ts
CHANGED
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
import { AxiosResponse, AxiosRequestHeaders } from "axios";
|
|
2
2
|
import { z } from "zod";
|
|
3
|
+
import { TypedEventTarget } from "typescript-event-target";
|
|
3
4
|
/**
|
|
4
5
|
* Configuration interface for FirecrawlApp.
|
|
5
6
|
* @param apiKey - Optional API key for authentication.
|
|
6
7
|
* @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'.
|
|
7
|
-
* @param version - API version, either 'v0' or 'v1'.
|
|
8
8
|
*/
|
|
9
9
|
export interface FirecrawlAppConfig {
|
|
10
10
|
apiKey?: string | null;
|
|
11
11
|
apiUrl?: string | null;
|
|
12
|
-
version?: "v0" | "v1";
|
|
13
12
|
}
|
|
14
13
|
/**
|
|
15
14
|
* Metadata for a Firecrawl document.
|
|
@@ -50,15 +49,6 @@ export interface FirecrawlDocumentMetadata {
|
|
|
50
49
|
error?: string;
|
|
51
50
|
[key: string]: any;
|
|
52
51
|
}
|
|
53
|
-
/**
|
|
54
|
-
* Metadata for a Firecrawl document on v0.
|
|
55
|
-
* Similar to FirecrawlDocumentMetadata but includes properties specific to API version v0.
|
|
56
|
-
*/
|
|
57
|
-
export interface FirecrawlDocumentMetadataV0 {
|
|
58
|
-
pageStatusCode?: number;
|
|
59
|
-
pageError?: string;
|
|
60
|
-
[key: string]: any;
|
|
61
|
-
}
|
|
62
52
|
/**
|
|
63
53
|
* Document interface for Firecrawl.
|
|
64
54
|
* Represents a document retrieved or processed by Firecrawl.
|
|
@@ -69,65 +59,26 @@ export interface FirecrawlDocument {
|
|
|
69
59
|
html?: string;
|
|
70
60
|
rawHtml?: string;
|
|
71
61
|
links?: string[];
|
|
62
|
+
extract?: Record<any, any>;
|
|
72
63
|
screenshot?: string;
|
|
73
|
-
metadata
|
|
74
|
-
}
|
|
75
|
-
/**
|
|
76
|
-
* Document interface for Firecrawl on v0.
|
|
77
|
-
* Represents a document specifically for API version v0 with additional properties.
|
|
78
|
-
*/
|
|
79
|
-
export interface FirecrawlDocumentV0 {
|
|
80
|
-
id?: string;
|
|
81
|
-
url?: string;
|
|
82
|
-
content: string;
|
|
83
|
-
markdown?: string;
|
|
84
|
-
html?: string;
|
|
85
|
-
llm_extraction?: Record<string, any>;
|
|
86
|
-
createdAt?: Date;
|
|
87
|
-
updatedAt?: Date;
|
|
88
|
-
type?: string;
|
|
89
|
-
metadata: FirecrawlDocumentMetadataV0;
|
|
90
|
-
childrenLinks?: string[];
|
|
91
|
-
provider?: string;
|
|
92
|
-
warning?: string;
|
|
93
|
-
index?: number;
|
|
64
|
+
metadata?: FirecrawlDocumentMetadata;
|
|
94
65
|
}
|
|
95
66
|
/**
|
|
96
67
|
* Parameters for scraping operations.
|
|
97
68
|
* Defines the options and configurations available for scraping web content.
|
|
98
69
|
*/
|
|
99
70
|
export interface ScrapeParams {
|
|
100
|
-
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot")[];
|
|
71
|
+
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract" | "full@scrennshot")[];
|
|
101
72
|
headers?: Record<string, string>;
|
|
102
73
|
includeTags?: string[];
|
|
103
74
|
excludeTags?: string[];
|
|
104
75
|
onlyMainContent?: boolean;
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
/**
|
|
110
|
-
* Parameters for scraping operations on v0.
|
|
111
|
-
* Includes page and extractor options specific to API version v0.
|
|
112
|
-
*/
|
|
113
|
-
export interface ScrapeParamsV0 {
|
|
114
|
-
pageOptions?: {
|
|
115
|
-
headers?: Record<string, string>;
|
|
116
|
-
includeHtml?: boolean;
|
|
117
|
-
includeRawHtml?: boolean;
|
|
118
|
-
onlyIncludeTags?: string[];
|
|
119
|
-
onlyMainContent?: boolean;
|
|
120
|
-
removeTags?: string[];
|
|
121
|
-
replaceAllPathsWithAbsolutePaths?: boolean;
|
|
122
|
-
screenshot?: boolean;
|
|
123
|
-
fullPageScreenshot?: boolean;
|
|
124
|
-
waitFor?: number;
|
|
125
|
-
};
|
|
126
|
-
extractorOptions?: {
|
|
127
|
-
mode?: "markdown" | "llm-extraction" | "llm-extraction-from-raw-html" | "llm-extraction-from-markdown";
|
|
128
|
-
extractionPrompt?: string;
|
|
129
|
-
extractionSchema?: Record<string, any> | z.ZodSchema | any;
|
|
76
|
+
extract?: {
|
|
77
|
+
prompt?: string;
|
|
78
|
+
schema?: z.ZodSchema | any;
|
|
79
|
+
systemPrompt?: string;
|
|
130
80
|
};
|
|
81
|
+
waitFor?: number;
|
|
131
82
|
timeout?: number;
|
|
132
83
|
}
|
|
133
84
|
/**
|
|
@@ -135,19 +86,10 @@ export interface ScrapeParamsV0 {
|
|
|
135
86
|
* Defines the structure of the response received after a scraping operation.
|
|
136
87
|
*/
|
|
137
88
|
export interface ScrapeResponse extends FirecrawlDocument {
|
|
138
|
-
success:
|
|
89
|
+
success: true;
|
|
139
90
|
warning?: string;
|
|
140
91
|
error?: string;
|
|
141
92
|
}
|
|
142
|
-
/**
|
|
143
|
-
* Response interface for scraping operations on v0.
|
|
144
|
-
* Similar to ScrapeResponse but tailored for responses from API version v0.
|
|
145
|
-
*/
|
|
146
|
-
export interface ScrapeResponseV0 {
|
|
147
|
-
success: boolean;
|
|
148
|
-
data?: FirecrawlDocumentV0;
|
|
149
|
-
error?: string;
|
|
150
|
-
}
|
|
151
93
|
/**
|
|
152
94
|
* Parameters for crawling operations.
|
|
153
95
|
* Includes options for both scraping and mapping during a crawl.
|
|
@@ -162,36 +104,6 @@ export interface CrawlParams {
|
|
|
162
104
|
ignoreSitemap?: boolean;
|
|
163
105
|
scrapeOptions?: ScrapeParams;
|
|
164
106
|
}
|
|
165
|
-
/**
|
|
166
|
-
* Parameters for crawling operations on v0.
|
|
167
|
-
* Tailored for API version v0, includes specific options for crawling.
|
|
168
|
-
*/
|
|
169
|
-
export interface CrawlParamsV0 {
|
|
170
|
-
crawlerOptions?: {
|
|
171
|
-
includes?: string[];
|
|
172
|
-
excludes?: string[];
|
|
173
|
-
generateImgAltText?: boolean;
|
|
174
|
-
returnOnlyUrls?: boolean;
|
|
175
|
-
maxDepth?: number;
|
|
176
|
-
mode?: "default" | "fast";
|
|
177
|
-
ignoreSitemap?: boolean;
|
|
178
|
-
limit?: number;
|
|
179
|
-
allowBackwardCrawling?: boolean;
|
|
180
|
-
allowExternalContentLinks?: boolean;
|
|
181
|
-
};
|
|
182
|
-
pageOptions?: {
|
|
183
|
-
headers?: Record<string, string>;
|
|
184
|
-
includeHtml?: boolean;
|
|
185
|
-
includeRawHtml?: boolean;
|
|
186
|
-
onlyIncludeTags?: string[];
|
|
187
|
-
onlyMainContent?: boolean;
|
|
188
|
-
removeTags?: string[];
|
|
189
|
-
replaceAllPathsWithAbsolutePaths?: boolean;
|
|
190
|
-
screenshot?: boolean;
|
|
191
|
-
fullPageScreenshot?: boolean;
|
|
192
|
-
waitFor?: number;
|
|
193
|
-
};
|
|
194
|
-
}
|
|
195
107
|
/**
|
|
196
108
|
* Response interface for crawling operations.
|
|
197
109
|
* Defines the structure of the response received after initiating a crawl.
|
|
@@ -199,16 +111,7 @@ export interface CrawlParamsV0 {
|
|
|
199
111
|
export interface CrawlResponse {
|
|
200
112
|
id?: string;
|
|
201
113
|
url?: string;
|
|
202
|
-
success:
|
|
203
|
-
error?: string;
|
|
204
|
-
}
|
|
205
|
-
/**
|
|
206
|
-
* Response interface for crawling operations on v0.
|
|
207
|
-
* Similar to CrawlResponse but tailored for responses from API version v0.
|
|
208
|
-
*/
|
|
209
|
-
export interface CrawlResponseV0 {
|
|
210
|
-
jobId?: string;
|
|
211
|
-
success: boolean;
|
|
114
|
+
success: true;
|
|
212
115
|
error?: string;
|
|
213
116
|
}
|
|
214
117
|
/**
|
|
@@ -216,7 +119,7 @@ export interface CrawlResponseV0 {
|
|
|
216
119
|
* Provides detailed status of a crawl job including progress and results.
|
|
217
120
|
*/
|
|
218
121
|
export interface CrawlStatusResponse {
|
|
219
|
-
success:
|
|
122
|
+
success: true;
|
|
220
123
|
total: number;
|
|
221
124
|
completed: number;
|
|
222
125
|
creditsUsed: number;
|
|
@@ -226,21 +129,6 @@ export interface CrawlStatusResponse {
|
|
|
226
129
|
data?: FirecrawlDocument[];
|
|
227
130
|
error?: string;
|
|
228
131
|
}
|
|
229
|
-
/**
|
|
230
|
-
* Response interface for job status checks on v0.
|
|
231
|
-
* Tailored for API version v0, provides status and partial data of a crawl job.
|
|
232
|
-
*/
|
|
233
|
-
export interface CrawlStatusResponseV0 {
|
|
234
|
-
success: boolean;
|
|
235
|
-
status: string;
|
|
236
|
-
current?: number;
|
|
237
|
-
current_url?: string;
|
|
238
|
-
current_step?: string;
|
|
239
|
-
total?: number;
|
|
240
|
-
data?: FirecrawlDocumentV0[];
|
|
241
|
-
partial_data?: FirecrawlDocumentV0[];
|
|
242
|
-
error?: string;
|
|
243
|
-
}
|
|
244
132
|
/**
|
|
245
133
|
* Parameters for mapping operations.
|
|
246
134
|
* Defines options for mapping URLs during a crawl.
|
|
@@ -256,78 +144,62 @@ export interface MapParams {
|
|
|
256
144
|
* Defines the structure of the response received after a mapping operation.
|
|
257
145
|
*/
|
|
258
146
|
export interface MapResponse {
|
|
259
|
-
success:
|
|
147
|
+
success: true;
|
|
260
148
|
links?: string[];
|
|
261
149
|
error?: string;
|
|
262
150
|
}
|
|
263
151
|
/**
|
|
264
|
-
*
|
|
265
|
-
*
|
|
152
|
+
* Error response interface.
|
|
153
|
+
* Defines the structure of the response received when an error occurs.
|
|
266
154
|
*/
|
|
267
|
-
export interface
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
fetchPageContent?: boolean;
|
|
271
|
-
includeHtml?: boolean;
|
|
272
|
-
includeRawHtml?: boolean;
|
|
273
|
-
};
|
|
274
|
-
searchOptions?: {
|
|
275
|
-
limit?: number;
|
|
276
|
-
};
|
|
277
|
-
}
|
|
278
|
-
/**
|
|
279
|
-
* Response interface for searching operations on v0.
|
|
280
|
-
* Defines the structure of the response received after a search operation on v0.
|
|
281
|
-
*/
|
|
282
|
-
export interface SearchResponseV0 {
|
|
283
|
-
success: boolean;
|
|
284
|
-
data?: FirecrawlDocumentV0[];
|
|
285
|
-
error?: string;
|
|
155
|
+
export interface ErrorResponse {
|
|
156
|
+
success: false;
|
|
157
|
+
error: string;
|
|
286
158
|
}
|
|
287
159
|
/**
|
|
288
160
|
* Main class for interacting with the Firecrawl API.
|
|
289
161
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
290
162
|
*/
|
|
291
|
-
export default class FirecrawlApp
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
version: T;
|
|
163
|
+
export default class FirecrawlApp {
|
|
164
|
+
apiKey: string;
|
|
165
|
+
apiUrl: string;
|
|
295
166
|
/**
|
|
296
167
|
* Initializes a new instance of the FirecrawlApp class.
|
|
297
168
|
* @param config - Configuration options for the FirecrawlApp instance.
|
|
298
169
|
*/
|
|
299
|
-
constructor({ apiKey, apiUrl
|
|
170
|
+
constructor({ apiKey, apiUrl }: FirecrawlAppConfig);
|
|
300
171
|
/**
|
|
301
172
|
* Scrapes a URL using the Firecrawl API.
|
|
302
173
|
* @param url - The URL to scrape.
|
|
303
174
|
* @param params - Additional parameters for the scrape request.
|
|
304
175
|
* @returns The response from the scrape operation.
|
|
305
176
|
*/
|
|
306
|
-
scrapeUrl(url: string, params?: ScrapeParams
|
|
177
|
+
scrapeUrl(url: string, params?: ScrapeParams): Promise<ScrapeResponse | ErrorResponse>;
|
|
307
178
|
/**
|
|
308
|
-
*
|
|
309
|
-
* @param query - The query
|
|
310
|
-
* @param params - Additional parameters for the search
|
|
311
|
-
* @returns
|
|
179
|
+
* This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
|
|
180
|
+
* @param query - The search query string.
|
|
181
|
+
* @param params - Additional parameters for the search.
|
|
182
|
+
* @returns Throws an error advising to use version 0 of the API.
|
|
312
183
|
*/
|
|
313
|
-
search(query: string, params?:
|
|
184
|
+
search(query: string, params?: any): Promise<any>;
|
|
314
185
|
/**
|
|
315
186
|
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
316
187
|
* @param url - The URL to crawl.
|
|
317
188
|
* @param params - Additional parameters for the crawl request.
|
|
318
|
-
* @param waitUntilDone - Whether to wait for the crawl job to complete.
|
|
319
189
|
* @param pollInterval - Time in seconds for job status checks.
|
|
320
190
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
321
191
|
* @returns The response from the crawl operation.
|
|
322
192
|
*/
|
|
323
|
-
crawlUrl(url: string, params?:
|
|
193
|
+
crawlUrl(url: string, params?: CrawlParams, pollInterval?: number, idempotencyKey?: string): Promise<CrawlStatusResponse | ErrorResponse>;
|
|
194
|
+
asyncCrawlUrl(url: string, params?: CrawlParams, idempotencyKey?: string): Promise<CrawlResponse | ErrorResponse>;
|
|
324
195
|
/**
|
|
325
196
|
* Checks the status of a crawl job using the Firecrawl API.
|
|
326
197
|
* @param id - The ID of the crawl operation.
|
|
327
198
|
* @returns The response containing the job status.
|
|
328
199
|
*/
|
|
329
|
-
checkCrawlStatus(id?: string): Promise<
|
|
330
|
-
|
|
200
|
+
checkCrawlStatus(id?: string): Promise<CrawlStatusResponse | ErrorResponse>;
|
|
201
|
+
crawlUrlAndWatch(url: string, params?: CrawlParams, idempotencyKey?: string): Promise<CrawlWatcher>;
|
|
202
|
+
mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse>;
|
|
331
203
|
/**
|
|
332
204
|
* Prepares the headers for an API request.
|
|
333
205
|
* @param idempotencyKey - Optional key to ensure idempotency.
|
|
@@ -357,7 +229,7 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
357
229
|
* @param checkUrl - Optional URL to check the status (used for v1 API)
|
|
358
230
|
* @returns The final job status or data.
|
|
359
231
|
*/
|
|
360
|
-
monitorJobStatus(id: string, headers: AxiosRequestHeaders, checkInterval: number
|
|
232
|
+
monitorJobStatus(id: string, headers: AxiosRequestHeaders, checkInterval: number): Promise<CrawlStatusResponse>;
|
|
361
233
|
/**
|
|
362
234
|
* Handles errors from API responses.
|
|
363
235
|
* @param {AxiosResponse} response - The response from the API.
|
|
@@ -365,3 +237,23 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
365
237
|
*/
|
|
366
238
|
handleError(response: AxiosResponse, action: string): void;
|
|
367
239
|
}
|
|
240
|
+
interface CrawlWatcherEvents {
|
|
241
|
+
document: CustomEvent<FirecrawlDocument>;
|
|
242
|
+
done: CustomEvent<{
|
|
243
|
+
status: CrawlStatusResponse["status"];
|
|
244
|
+
data: FirecrawlDocument[];
|
|
245
|
+
}>;
|
|
246
|
+
error: CustomEvent<{
|
|
247
|
+
status: CrawlStatusResponse["status"];
|
|
248
|
+
data: FirecrawlDocument[];
|
|
249
|
+
error: string;
|
|
250
|
+
}>;
|
|
251
|
+
}
|
|
252
|
+
export declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
|
253
|
+
private ws;
|
|
254
|
+
data: FirecrawlDocument[];
|
|
255
|
+
status: CrawlStatusResponse["status"];
|
|
256
|
+
constructor(id: string, app: FirecrawlApp);
|
|
257
|
+
close(): void;
|
|
258
|
+
}
|
|
259
|
+
export {};
|