@mendable/firecrawl-js 0.0.35 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/tsconfig.json CHANGED
@@ -11,7 +11,7 @@
11
11
  // "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */
12
12
 
13
13
  /* Language and Environment */
14
- "target": "es2016", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
14
+ "target": "es2020", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
15
15
  // "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
16
16
  // "jsx": "preserve", /* Specify what JSX code is generated. */
17
17
  // "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */
@@ -25,9 +25,9 @@
25
25
  // "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */
26
26
 
27
27
  /* Modules */
28
- "module": "NodeNext", /* Specify what module code is generated. */
28
+ "module": "commonjs", /* Specify what module code is generated. */
29
29
  "rootDir": "./src", /* Specify the root folder within your source files. */
30
- "moduleResolution": "nodenext", /* Specify how TypeScript looks up a file from a given module specifier. */
30
+ "moduleResolution": "node", /* Specify how TypeScript looks up a file from a given module specifier. */
31
31
  // "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */
32
32
  // "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */
33
33
  // "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */
package/types/index.d.ts CHANGED
@@ -2,13 +2,18 @@ import { AxiosResponse, AxiosRequestHeaders } from "axios";
2
2
  import { z } from "zod";
3
3
  /**
4
4
  * Configuration interface for FirecrawlApp.
5
+ * @param apiKey - Optional API key for authentication.
6
+ * @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'.
7
+ * @param version - API version, either 'v0' or 'v1'.
5
8
  */
6
9
  export interface FirecrawlAppConfig {
7
10
  apiKey?: string | null;
8
11
  apiUrl?: string | null;
12
+ version?: "v0" | "v1";
9
13
  }
10
14
  /**
11
15
  * Metadata for a Firecrawl document.
16
+ * Includes various optional properties for document metadata.
12
17
  */
13
18
  export interface FirecrawlDocumentMetadata {
14
19
  title?: string;
@@ -41,14 +46,37 @@ export interface FirecrawlDocumentMetadata {
41
46
  articleTag?: string;
42
47
  articleSection?: string;
43
48
  sourceURL?: string;
49
+ statusCode?: number;
50
+ error?: string;
51
+ [key: string]: any;
52
+ }
53
+ /**
54
+ * Metadata for a Firecrawl document on v0.
55
+ * Similar to FirecrawlDocumentMetadata but includes properties specific to API version v0.
56
+ */
57
+ export interface FirecrawlDocumentMetadataV0 {
44
58
  pageStatusCode?: number;
45
59
  pageError?: string;
46
60
  [key: string]: any;
47
61
  }
48
62
  /**
49
63
  * Document interface for Firecrawl.
64
+ * Represents a document retrieved or processed by Firecrawl.
50
65
  */
51
66
  export interface FirecrawlDocument {
67
+ url?: string;
68
+ markdown?: string;
69
+ html?: string;
70
+ rawHtml?: string;
71
+ links?: string[];
72
+ screenshot?: string;
73
+ metadata: FirecrawlDocumentMetadata;
74
+ }
75
+ /**
76
+ * Document interface for Firecrawl on v0.
77
+ * Represents a document specifically for API version v0 with additional properties.
78
+ */
79
+ export interface FirecrawlDocumentV0 {
52
80
  id?: string;
53
81
  url?: string;
54
82
  content: string;
@@ -58,132 +86,283 @@ export interface FirecrawlDocument {
58
86
  createdAt?: Date;
59
87
  updatedAt?: Date;
60
88
  type?: string;
61
- metadata: FirecrawlDocumentMetadata;
89
+ metadata: FirecrawlDocumentMetadataV0;
62
90
  childrenLinks?: string[];
63
91
  provider?: string;
64
92
  warning?: string;
65
93
  index?: number;
66
94
  }
95
+ /**
96
+ * Parameters for scraping operations.
97
+ * Defines the options and configurations available for scraping web content.
98
+ */
99
+ export interface ScrapeParams {
100
+ formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot")[];
101
+ headers?: Record<string, string>;
102
+ includeTags?: string[];
103
+ excludeTags?: string[];
104
+ onlyMainContent?: boolean;
105
+ screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile";
106
+ waitFor?: number;
107
+ timeout?: number;
108
+ }
109
+ /**
110
+ * Parameters for scraping operations on v0.
111
+ * Includes page and extractor options specific to API version v0.
112
+ */
113
+ export interface ScrapeParamsV0 {
114
+ pageOptions?: {
115
+ headers?: Record<string, string>;
116
+ includeHtml?: boolean;
117
+ includeRawHtml?: boolean;
118
+ onlyIncludeTags?: string[];
119
+ onlyMainContent?: boolean;
120
+ removeTags?: string[];
121
+ replaceAllPathsWithAbsolutePaths?: boolean;
122
+ screenshot?: boolean;
123
+ fullPageScreenshot?: boolean;
124
+ waitFor?: number;
125
+ };
126
+ extractorOptions?: {
127
+ mode?: "markdown" | "llm-extraction" | "llm-extraction-from-raw-html" | "llm-extraction-from-markdown";
128
+ extractionPrompt?: string;
129
+ extractionSchema?: Record<string, any> | z.ZodSchema | any;
130
+ };
131
+ timeout?: number;
132
+ }
67
133
  /**
68
134
  * Response interface for scraping operations.
135
+ * Defines the structure of the response received after a scraping operation.
69
136
  */
70
- export interface ScrapeResponse {
137
+ export interface ScrapeResponse extends FirecrawlDocument {
71
138
  success: boolean;
72
- data?: FirecrawlDocument;
139
+ warning?: string;
73
140
  error?: string;
74
141
  }
75
142
  /**
76
- * Response interface for searching operations.
143
+ * Response interface for scraping operations on v0.
144
+ * Similar to ScrapeResponse but tailored for responses from API version v0.
77
145
  */
78
- export interface SearchResponse {
146
+ export interface ScrapeResponseV0 {
79
147
  success: boolean;
80
- data?: FirecrawlDocument[];
148
+ data?: FirecrawlDocumentV0;
81
149
  error?: string;
82
150
  }
151
+ /**
152
+ * Parameters for crawling operations.
153
+ * Includes options for both scraping and mapping during a crawl.
154
+ */
155
+ export interface CrawlParams {
156
+ scrapeOptions?: ScrapeParams;
157
+ crawlerOptions?: {
158
+ includePaths?: string[];
159
+ excludePaths?: string[];
160
+ maxDepth?: number;
161
+ limit?: number;
162
+ allowBackwardLinks?: boolean;
163
+ allowExternalLinks?: boolean;
164
+ ignoreSitemap?: boolean;
165
+ };
166
+ }
167
+ /**
168
+ * Parameters for crawling operations on v0.
169
+ * Tailored for API version v0, includes specific options for crawling.
170
+ */
171
+ export interface CrawlParamsV0 {
172
+ crawlerOptions?: {
173
+ includes?: string[];
174
+ excludes?: string[];
175
+ generateImgAltText?: boolean;
176
+ returnOnlyUrls?: boolean;
177
+ maxDepth?: number;
178
+ mode?: "default" | "fast";
179
+ ignoreSitemap?: boolean;
180
+ limit?: number;
181
+ allowBackwardCrawling?: boolean;
182
+ allowExternalContentLinks?: boolean;
183
+ };
184
+ pageOptions?: {
185
+ headers?: Record<string, string>;
186
+ includeHtml?: boolean;
187
+ includeRawHtml?: boolean;
188
+ onlyIncludeTags?: string[];
189
+ onlyMainContent?: boolean;
190
+ removeTags?: string[];
191
+ replaceAllPathsWithAbsolutePaths?: boolean;
192
+ screenshot?: boolean;
193
+ fullPageScreenshot?: boolean;
194
+ waitFor?: number;
195
+ };
196
+ }
83
197
  /**
84
198
  * Response interface for crawling operations.
199
+ * Defines the structure of the response received after initiating a crawl.
85
200
  */
86
201
  export interface CrawlResponse {
202
+ id?: string;
203
+ url?: string;
87
204
  success: boolean;
205
+ error?: string;
206
+ }
207
+ /**
208
+ * Response interface for crawling operations on v0.
209
+ * Similar to CrawlResponse but tailored for responses from API version v0.
210
+ */
211
+ export interface CrawlResponseV0 {
88
212
  jobId?: string;
89
- data?: FirecrawlDocument[];
213
+ success: boolean;
90
214
  error?: string;
91
215
  }
92
216
  /**
93
217
  * Response interface for job status checks.
218
+ * Provides detailed status of a crawl job including progress and results.
94
219
  */
95
- export interface JobStatusResponse {
220
+ export interface CrawlStatusResponse {
221
+ success: boolean;
222
+ total: number;
223
+ completed: number;
224
+ creditsUsed: number;
225
+ expiresAt: Date;
226
+ status: "scraping" | "completed" | "failed";
227
+ next: string;
228
+ data?: FirecrawlDocument[];
229
+ error?: string;
230
+ }
231
+ /**
232
+ * Response interface for job status checks on v0.
233
+ * Tailored for API version v0, provides status and partial data of a crawl job.
234
+ */
235
+ export interface CrawlStatusResponseV0 {
96
236
  success: boolean;
97
237
  status: string;
98
238
  current?: number;
99
239
  current_url?: string;
100
240
  current_step?: string;
101
241
  total?: number;
102
- jobId?: string;
103
- data?: FirecrawlDocument[];
104
- partial_data?: FirecrawlDocument[];
242
+ data?: FirecrawlDocumentV0[];
243
+ partial_data?: FirecrawlDocumentV0[];
105
244
  error?: string;
106
245
  }
107
246
  /**
108
- * Generic parameter interface.
247
+ * Parameters for mapping operations.
248
+ * Defines options for mapping URLs during a crawl.
109
249
  */
110
- export interface Params {
111
- [key: string]: any;
112
- extractorOptions?: {
113
- extractionSchema: z.ZodSchema | any;
114
- mode?: "llm-extraction";
115
- extractionPrompt?: string;
250
+ export interface MapParams {
251
+ includePaths?: string[];
252
+ excludePaths?: string[];
253
+ maxDepth?: number;
254
+ limit?: number;
255
+ allowBackwardLinks?: boolean;
256
+ allowExternalLinks?: boolean;
257
+ ignoreSitemap?: boolean;
258
+ }
259
+ /**
260
+ * Response interface for mapping operations.
261
+ * Defines the structure of the response received after a mapping operation.
262
+ */
263
+ export interface MapResponse {
264
+ success: boolean;
265
+ links?: string[];
266
+ error?: string;
267
+ }
268
+ /**
269
+ * Parameters for searching operations on v0.
270
+ * Tailored for API version v0, includes specific options for searching content.
271
+ */
272
+ export interface SearchParamsV0 {
273
+ pageOptions?: {
274
+ onlyMainContent?: boolean;
275
+ fetchPageContent?: boolean;
276
+ includeHtml?: boolean;
277
+ includeRawHtml?: boolean;
116
278
  };
279
+ searchOptions?: {
280
+ limit?: number;
281
+ };
282
+ }
283
+ /**
284
+ * Response interface for searching operations on v0.
285
+ * Defines the structure of the response received after a search operation on v0.
286
+ */
287
+ export interface SearchResponseV0 {
288
+ success: boolean;
289
+ data?: FirecrawlDocumentV0[];
290
+ error?: string;
117
291
  }
118
292
  /**
119
293
  * Main class for interacting with the Firecrawl API.
294
+ * Provides methods for scraping, searching, crawling, and mapping web content.
120
295
  */
121
- export default class FirecrawlApp {
296
+ export default class FirecrawlApp<T extends "v0" | "v1"> {
122
297
  private apiKey;
123
298
  private apiUrl;
299
+ version: T;
124
300
  /**
125
301
  * Initializes a new instance of the FirecrawlApp class.
126
- * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
302
+ * @param config - Configuration options for the FirecrawlApp instance.
127
303
  */
128
- constructor({ apiKey, apiUrl }: FirecrawlAppConfig);
304
+ constructor({ apiKey, apiUrl, version }: FirecrawlAppConfig);
129
305
  /**
130
306
  * Scrapes a URL using the Firecrawl API.
131
- * @param {string} url - The URL to scrape.
132
- * @param {Params | null} params - Additional parameters for the scrape request.
133
- * @returns {Promise<ScrapeResponse>} The response from the scrape operation.
307
+ * @param url - The URL to scrape.
308
+ * @param params - Additional parameters for the scrape request.
309
+ * @returns The response from the scrape operation.
134
310
  */
135
- scrapeUrl(url: string, params?: Params | null): Promise<ScrapeResponse>;
311
+ scrapeUrl(url: string, params?: ScrapeParams | ScrapeParamsV0): Promise<this['version'] extends 'v0' ? ScrapeResponseV0 : ScrapeResponse>;
136
312
  /**
137
313
  * Searches for a query using the Firecrawl API.
138
- * @param {string} query - The query to search for.
139
- * @param {Params | null} params - Additional parameters for the search request.
140
- * @returns {Promise<SearchResponse>} The response from the search operation.
314
+ * @param query - The query to search for.
315
+ * @param params - Additional parameters for the search request.
316
+ * @returns The response from the search operation.
141
317
  */
142
- search(query: string, params?: Params | null): Promise<SearchResponse>;
318
+ search(query: string, params?: SearchParamsV0): Promise<SearchResponseV0>;
143
319
  /**
144
320
  * Initiates a crawl job for a URL using the Firecrawl API.
145
- * @param {string} url - The URL to crawl.
146
- * @param {Params | null} params - Additional parameters for the crawl request.
147
- * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
148
- * @param {number} pollInterval - Time in seconds for job status checks.
149
- * @param {string} idempotencyKey - Optional idempotency key for the request.
150
- * @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
321
+ * @param url - The URL to crawl.
322
+ * @param params - Additional parameters for the crawl request.
323
+ * @param waitUntilDone - Whether to wait for the crawl job to complete.
324
+ * @param pollInterval - Time in seconds for job status checks.
325
+ * @param idempotencyKey - Optional idempotency key for the request.
326
+ * @returns The response from the crawl operation.
151
327
  */
152
- crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, pollInterval?: number, idempotencyKey?: string): Promise<CrawlResponse | any>;
328
+ crawlUrl(url: string, params?: this['version'] extends 'v0' ? CrawlParamsV0 : CrawlParams, waitUntilDone?: boolean, pollInterval?: number, idempotencyKey?: string): Promise<this['version'] extends 'v0' ? CrawlResponseV0 | CrawlStatusResponseV0 | FirecrawlDocumentV0[] : CrawlResponse | CrawlStatusResponse>;
153
329
  /**
154
330
  * Checks the status of a crawl job using the Firecrawl API.
155
- * @param {string} jobId - The job ID of the crawl operation.
156
- * @returns {Promise<JobStatusResponse>} The response containing the job status.
331
+ * @param id - The ID of the crawl operation.
332
+ * @returns The response containing the job status.
157
333
  */
158
- checkCrawlStatus(jobId: string): Promise<JobStatusResponse>;
334
+ checkCrawlStatus(id?: string): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse>;
335
+ mapUrl(url: string, params?: MapParams): Promise<MapResponse>;
159
336
  /**
160
337
  * Prepares the headers for an API request.
161
- * @returns {AxiosRequestHeaders} The prepared headers.
338
+ * @param idempotencyKey - Optional key to ensure idempotency.
339
+ * @returns The prepared headers.
162
340
  */
163
341
  prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders;
164
342
  /**
165
343
  * Sends a POST request to the specified URL.
166
- * @param {string} url - The URL to send the request to.
167
- * @param {Params} data - The data to send in the request.
168
- * @param {AxiosRequestHeaders} headers - The headers for the request.
169
- * @returns {Promise<AxiosResponse>} The response from the POST request.
344
+ * @param url - The URL to send the request to.
345
+ * @param data - The data to send in the request.
346
+ * @param headers - The headers for the request.
347
+ * @returns The response from the POST request.
170
348
  */
171
- postRequest(url: string, data: Params, headers: AxiosRequestHeaders): Promise<AxiosResponse>;
349
+ postRequest(url: string, data: any, headers: AxiosRequestHeaders): Promise<AxiosResponse>;
172
350
  /**
173
351
  * Sends a GET request to the specified URL.
174
- * @param {string} url - The URL to send the request to.
175
- * @param {AxiosRequestHeaders} headers - The headers for the request.
176
- * @returns {Promise<AxiosResponse>} The response from the GET request.
352
+ * @param url - The URL to send the request to.
353
+ * @param headers - The headers for the request.
354
+ * @returns The response from the GET request.
177
355
  */
178
356
  getRequest(url: string, headers: AxiosRequestHeaders): Promise<AxiosResponse>;
179
357
  /**
180
358
  * Monitors the status of a crawl job until completion or failure.
181
- * @param {string} jobId - The job ID of the crawl operation.
182
- * @param {AxiosRequestHeaders} headers - The headers for the request.
183
- * @param {number} timeout - Timeout in seconds for job status checks.
184
- * @returns {Promise<any>} The final job status or data.
359
+ * @param id - The ID of the crawl operation.
360
+ * @param headers - The headers for the request.
361
+ * @param checkInterval - Interval in seconds for job status checks.
362
+ * @param checkUrl - Optional URL to check the status (used for v1 API)
363
+ * @returns The final job status or data.
185
364
  */
186
- monitorJobStatus(jobId: string, headers: AxiosRequestHeaders, checkInterval: number): Promise<any>;
365
+ monitorJobStatus(id: string, headers: AxiosRequestHeaders, checkInterval: number, checkUrl?: string): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 | FirecrawlDocumentV0[] : CrawlStatusResponse>;
187
366
  /**
188
367
  * Handles errors from API responses.
189
368
  * @param {AxiosResponse} response - The response from the API.