firecrawl 1.10.1 → 1.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -49,16 +49,20 @@ var FirecrawlError = class extends Error {
49
49
  var FirecrawlApp = class {
50
50
  apiKey;
51
51
  apiUrl;
52
+ isCloudService(url) {
53
+ return url.includes("api.firecrawl.dev");
54
+ }
52
55
  /**
53
56
  * Initializes a new instance of the FirecrawlApp class.
54
57
  * @param config - Configuration options for the FirecrawlApp instance.
55
58
  */
56
59
  constructor({ apiKey = null, apiUrl = null }) {
57
- if (typeof apiKey !== "string") {
60
+ const baseUrl = apiUrl || "https://api.firecrawl.dev";
61
+ if (this.isCloudService(baseUrl) && typeof apiKey !== "string") {
58
62
  throw new FirecrawlError("No API key provided", 401);
59
63
  }
60
- this.apiKey = apiKey;
61
- this.apiUrl = apiUrl || "https://api.firecrawl.dev";
64
+ this.apiKey = apiKey || "";
65
+ this.apiUrl = baseUrl;
62
66
  }
63
67
  /**
64
68
  * Scrapes a URL using the Firecrawl API.
@@ -113,13 +117,73 @@ var FirecrawlApp = class {
113
117
  return { success: false, error: "Internal server error." };
114
118
  }
115
119
  /**
116
- * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
120
+ * Searches using the Firecrawl API and optionally scrapes the results.
117
121
  * @param query - The search query string.
118
- * @param params - Additional parameters for the search.
119
- * @returns Throws an error advising to use version 0 of the API.
122
+ * @param params - Optional parameters for the search request.
123
+ * @returns The response from the search operation.
120
124
  */
121
125
  async search(query, params) {
122
- throw new FirecrawlError("Search is not supported in v1, please downgrade Firecrawl to 0.0.36.", 400);
126
+ const headers = {
127
+ "Content-Type": "application/json",
128
+ Authorization: `Bearer ${this.apiKey}`
129
+ };
130
+ let jsonData = {
131
+ query,
132
+ limit: params?.limit ?? 5,
133
+ tbs: params?.tbs,
134
+ filter: params?.filter,
135
+ lang: params?.lang ?? "en",
136
+ country: params?.country ?? "us",
137
+ location: params?.location,
138
+ origin: params?.origin ?? "api",
139
+ timeout: params?.timeout ?? 6e4,
140
+ scrapeOptions: params?.scrapeOptions ?? { formats: [] }
141
+ };
142
+ if (jsonData?.scrapeOptions?.extract?.schema) {
143
+ let schema = jsonData.scrapeOptions.extract.schema;
144
+ try {
145
+ schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
146
+ } catch (error) {
147
+ }
148
+ jsonData = {
149
+ ...jsonData,
150
+ scrapeOptions: {
151
+ ...jsonData.scrapeOptions,
152
+ extract: {
153
+ ...jsonData.scrapeOptions.extract,
154
+ schema
155
+ }
156
+ }
157
+ };
158
+ }
159
+ try {
160
+ const response = await this.postRequest(
161
+ this.apiUrl + `/v1/search`,
162
+ jsonData,
163
+ headers
164
+ );
165
+ if (response.status === 200) {
166
+ const responseData = response.data;
167
+ if (responseData.success) {
168
+ return {
169
+ success: true,
170
+ data: responseData.data,
171
+ warning: responseData.warning
172
+ };
173
+ } else {
174
+ throw new FirecrawlError(`Failed to search. Error: ${responseData.error}`, response.status);
175
+ }
176
+ } else {
177
+ this.handleError(response, "search");
178
+ }
179
+ } catch (error) {
180
+ if (error.response?.data?.error) {
181
+ throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status);
182
+ } else {
183
+ throw new FirecrawlError(error.message, 500);
184
+ }
185
+ }
186
+ return { success: false, error: "Internal server error.", data: [] };
123
187
  }
124
188
  /**
125
189
  * Initiates a crawl job for a URL using the Firecrawl API.
@@ -295,9 +359,9 @@ var FirecrawlApp = class {
295
359
  * @param webhook - Optional webhook for the batch scrape.
296
360
  * @returns The response from the crawl operation.
297
361
  */
298
- async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook) {
362
+ async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
299
363
  const headers = this.prepareHeaders(idempotencyKey);
300
- let jsonData = { urls, ...params };
364
+ let jsonData = { urls, webhook, ignoreInvalidURLs, ...params };
301
365
  if (jsonData?.extract?.schema) {
302
366
  let schema = jsonData.extract.schema;
303
367
  try {
@@ -333,9 +397,9 @@ var FirecrawlApp = class {
333
397
  }
334
398
  return { success: false, error: "Internal server error." };
335
399
  }
336
- async asyncBatchScrapeUrls(urls, params, idempotencyKey) {
400
+ async asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
337
401
  const headers = this.prepareHeaders(idempotencyKey);
338
- let jsonData = { urls, ...params ?? {} };
402
+ let jsonData = { urls, webhook, ignoreInvalidURLs, ...params ?? {} };
339
403
  try {
340
404
  const response = await this.postRequest(
341
405
  this.apiUrl + `/v1/batch/scrape`,
@@ -363,8 +427,8 @@ var FirecrawlApp = class {
363
427
  * @param idempotencyKey - Optional idempotency key for the request.
364
428
  * @returns A CrawlWatcher instance to monitor the crawl job.
365
429
  */
366
- async batchScrapeUrlsAndWatch(urls, params, idempotencyKey) {
367
- const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey);
430
+ async batchScrapeUrlsAndWatch(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
431
+ const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs);
368
432
  if (crawl.success && crawl.id) {
369
433
  const id = crawl.id;
370
434
  return new CrawlWatcher(id, this);
@@ -428,9 +492,6 @@ var FirecrawlApp = class {
428
492
  */
429
493
  async extract(urls, params) {
430
494
  const headers = this.prepareHeaders();
431
- if (!params?.prompt) {
432
- throw new FirecrawlError("Prompt is required", 400);
433
- }
434
495
  let jsonData = { urls, ...params };
435
496
  let jsonSchema;
436
497
  try {
@@ -593,8 +654,10 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
593
654
  ws;
594
655
  data;
595
656
  status;
657
+ id;
596
658
  constructor(id, app) {
597
659
  super();
660
+ this.id = id;
598
661
  this.ws = new import_isows.WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
599
662
  this.status = "scraping";
600
663
  this.data = [];
@@ -604,7 +667,8 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
604
667
  this.dispatchTypedEvent("done", new CustomEvent("done", {
605
668
  detail: {
606
669
  status: this.status,
607
- data: this.data
670
+ data: this.data,
671
+ id: this.id
608
672
  }
609
673
  }));
610
674
  } else if (msg.type === "error") {
@@ -613,7 +677,8 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
613
677
  detail: {
614
678
  status: this.status,
615
679
  data: this.data,
616
- error: msg.error
680
+ error: msg.error,
681
+ id: this.id
617
682
  }
618
683
  }));
619
684
  } else if (msg.type === "catchup") {
@@ -621,12 +686,18 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
621
686
  this.data.push(...msg.data.data ?? []);
622
687
  for (const doc of this.data) {
623
688
  this.dispatchTypedEvent("document", new CustomEvent("document", {
624
- detail: doc
689
+ detail: {
690
+ ...doc,
691
+ id: this.id
692
+ }
625
693
  }));
626
694
  }
627
695
  } else if (msg.type === "document") {
628
696
  this.dispatchTypedEvent("document", new CustomEvent("document", {
629
- detail: msg.data
697
+ detail: {
698
+ ...msg.data,
699
+ id: this.id
700
+ }
630
701
  }));
631
702
  }
632
703
  };
@@ -635,12 +706,20 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
635
706
  this.ws.close();
636
707
  return;
637
708
  }
638
- const msg = JSON.parse(ev.data);
639
- messageHandler(msg);
709
+ try {
710
+ const msg = JSON.parse(ev.data);
711
+ messageHandler(msg);
712
+ } catch (error) {
713
+ console.error("Error on message", error);
714
+ }
640
715
  }).bind(this);
641
716
  this.ws.onclose = ((ev) => {
642
- const msg = JSON.parse(ev.reason);
643
- messageHandler(msg);
717
+ try {
718
+ const msg = JSON.parse(ev.reason);
719
+ messageHandler(msg);
720
+ } catch (error) {
721
+ console.error("Error on close", error);
722
+ }
644
723
  }).bind(this);
645
724
  this.ws.onerror = ((_) => {
646
725
  this.status = "failed";
@@ -648,7 +727,8 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
648
727
  detail: {
649
728
  status: this.status,
650
729
  data: this.data,
651
- error: "WebSocket error"
730
+ error: "WebSocket error",
731
+ id: this.id
652
732
  }
653
733
  }));
654
734
  }).bind(this);
package/dist/index.d.cts CHANGED
@@ -64,6 +64,8 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
64
64
  screenshot?: string;
65
65
  metadata?: FirecrawlDocumentMetadata;
66
66
  actions: ActionsSchema;
67
+ title?: string;
68
+ description?: string;
67
69
  }
68
70
  /**
69
71
  * Parameters for scraping operations.
@@ -171,6 +173,7 @@ interface BatchScrapeResponse {
171
173
  url?: string;
172
174
  success: true;
173
175
  error?: string;
176
+ invalidURLs?: string[];
174
177
  }
175
178
  /**
176
179
  * Response interface for job status checks.
@@ -225,10 +228,11 @@ interface MapResponse {
225
228
  * Defines options for extracting information from URLs.
226
229
  */
227
230
  interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
228
- prompt: string;
231
+ prompt?: string;
229
232
  schema?: LLMSchema;
230
233
  systemPrompt?: string;
231
234
  allowExternalLinks?: boolean;
235
+ includeSubdomains?: boolean;
232
236
  }
233
237
  /**
234
238
  * Response interface for extracting information from URLs.
@@ -256,6 +260,31 @@ declare class FirecrawlError extends Error {
256
260
  statusCode: number;
257
261
  constructor(message: string, statusCode: number);
258
262
  }
263
+ /**
264
+ * Parameters for search operations.
265
+ * Defines options for searching and scraping search results.
266
+ */
267
+ interface SearchParams {
268
+ limit?: number;
269
+ tbs?: string;
270
+ filter?: string;
271
+ lang?: string;
272
+ country?: string;
273
+ location?: string;
274
+ origin?: string;
275
+ timeout?: number;
276
+ scrapeOptions?: ScrapeParams;
277
+ }
278
+ /**
279
+ * Response interface for search operations.
280
+ * Defines the structure of the response received after a search operation.
281
+ */
282
+ interface SearchResponse {
283
+ success: boolean;
284
+ data: FirecrawlDocument<undefined>[];
285
+ warning?: string;
286
+ error?: string;
287
+ }
259
288
  /**
260
289
  * Main class for interacting with the Firecrawl API.
261
290
  * Provides methods for scraping, searching, crawling, and mapping web content.
@@ -263,6 +292,7 @@ declare class FirecrawlError extends Error {
263
292
  declare class FirecrawlApp {
264
293
  apiKey: string;
265
294
  apiUrl: string;
295
+ private isCloudService;
266
296
  /**
267
297
  * Initializes a new instance of the FirecrawlApp class.
268
298
  * @param config - Configuration options for the FirecrawlApp instance.
@@ -276,12 +306,12 @@ declare class FirecrawlApp {
276
306
  */
277
307
  scrapeUrl<T extends zt.ZodSchema, ActionsSchema extends (Action[] | undefined) = undefined>(url: string, params?: ScrapeParams<T, ActionsSchema>): Promise<ScrapeResponse<zt.infer<T>, ActionsSchema extends Action[] ? ActionsResult : never> | ErrorResponse>;
278
308
  /**
279
- * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
309
+ * Searches using the Firecrawl API and optionally scrapes the results.
280
310
  * @param query - The search query string.
281
- * @param params - Additional parameters for the search.
282
- * @returns Throws an error advising to use version 0 of the API.
311
+ * @param params - Optional parameters for the search request.
312
+ * @returns The response from the search operation.
283
313
  */
284
- search(query: string, params?: any): Promise<any>;
314
+ search(query: string, params?: SearchParams | Record<string, any>): Promise<SearchResponse>;
285
315
  /**
286
316
  * Initiates a crawl job for a URL using the Firecrawl API.
287
317
  * @param url - The URL to crawl.
@@ -329,8 +359,8 @@ declare class FirecrawlApp {
329
359
  * @param webhook - Optional webhook for the batch scrape.
330
360
  * @returns The response from the crawl operation.
331
361
  */
332
- batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"]): Promise<BatchScrapeStatusResponse | ErrorResponse>;
333
- asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<BatchScrapeResponse | ErrorResponse>;
362
+ batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
363
+ asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeResponse | ErrorResponse>;
334
364
  /**
335
365
  * Initiates a batch scrape job and returns a CrawlWatcher to monitor the job via WebSocket.
336
366
  * @param urls - The URL to scrape.
@@ -338,7 +368,7 @@ declare class FirecrawlApp {
338
368
  * @param idempotencyKey - Optional idempotency key for the request.
339
369
  * @returns A CrawlWatcher instance to monitor the crawl job.
340
370
  */
341
- batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<CrawlWatcher>;
371
+ batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<CrawlWatcher>;
342
372
  /**
343
373
  * Checks the status of a batch scrape job using the Firecrawl API.
344
374
  * @param id - The ID of the batch scrape operation.
@@ -414,8 +444,9 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
414
444
  private ws;
415
445
  data: FirecrawlDocument<undefined>[];
416
446
  status: CrawlStatusResponse["status"];
447
+ id: string;
417
448
  constructor(id: string, app: FirecrawlApp);
418
449
  close(): void;
419
450
  }
420
451
 
421
- export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, FirecrawlApp as default };
452
+ export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
package/dist/index.d.ts CHANGED
@@ -64,6 +64,8 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
64
64
  screenshot?: string;
65
65
  metadata?: FirecrawlDocumentMetadata;
66
66
  actions: ActionsSchema;
67
+ title?: string;
68
+ description?: string;
67
69
  }
68
70
  /**
69
71
  * Parameters for scraping operations.
@@ -171,6 +173,7 @@ interface BatchScrapeResponse {
171
173
  url?: string;
172
174
  success: true;
173
175
  error?: string;
176
+ invalidURLs?: string[];
174
177
  }
175
178
  /**
176
179
  * Response interface for job status checks.
@@ -225,10 +228,11 @@ interface MapResponse {
225
228
  * Defines options for extracting information from URLs.
226
229
  */
227
230
  interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
228
- prompt: string;
231
+ prompt?: string;
229
232
  schema?: LLMSchema;
230
233
  systemPrompt?: string;
231
234
  allowExternalLinks?: boolean;
235
+ includeSubdomains?: boolean;
232
236
  }
233
237
  /**
234
238
  * Response interface for extracting information from URLs.
@@ -256,6 +260,31 @@ declare class FirecrawlError extends Error {
256
260
  statusCode: number;
257
261
  constructor(message: string, statusCode: number);
258
262
  }
263
+ /**
264
+ * Parameters for search operations.
265
+ * Defines options for searching and scraping search results.
266
+ */
267
+ interface SearchParams {
268
+ limit?: number;
269
+ tbs?: string;
270
+ filter?: string;
271
+ lang?: string;
272
+ country?: string;
273
+ location?: string;
274
+ origin?: string;
275
+ timeout?: number;
276
+ scrapeOptions?: ScrapeParams;
277
+ }
278
+ /**
279
+ * Response interface for search operations.
280
+ * Defines the structure of the response received after a search operation.
281
+ */
282
+ interface SearchResponse {
283
+ success: boolean;
284
+ data: FirecrawlDocument<undefined>[];
285
+ warning?: string;
286
+ error?: string;
287
+ }
259
288
  /**
260
289
  * Main class for interacting with the Firecrawl API.
261
290
  * Provides methods for scraping, searching, crawling, and mapping web content.
@@ -263,6 +292,7 @@ declare class FirecrawlError extends Error {
263
292
  declare class FirecrawlApp {
264
293
  apiKey: string;
265
294
  apiUrl: string;
295
+ private isCloudService;
266
296
  /**
267
297
  * Initializes a new instance of the FirecrawlApp class.
268
298
  * @param config - Configuration options for the FirecrawlApp instance.
@@ -276,12 +306,12 @@ declare class FirecrawlApp {
276
306
  */
277
307
  scrapeUrl<T extends zt.ZodSchema, ActionsSchema extends (Action[] | undefined) = undefined>(url: string, params?: ScrapeParams<T, ActionsSchema>): Promise<ScrapeResponse<zt.infer<T>, ActionsSchema extends Action[] ? ActionsResult : never> | ErrorResponse>;
278
308
  /**
279
- * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
309
+ * Searches using the Firecrawl API and optionally scrapes the results.
280
310
  * @param query - The search query string.
281
- * @param params - Additional parameters for the search.
282
- * @returns Throws an error advising to use version 0 of the API.
311
+ * @param params - Optional parameters for the search request.
312
+ * @returns The response from the search operation.
283
313
  */
284
- search(query: string, params?: any): Promise<any>;
314
+ search(query: string, params?: SearchParams | Record<string, any>): Promise<SearchResponse>;
285
315
  /**
286
316
  * Initiates a crawl job for a URL using the Firecrawl API.
287
317
  * @param url - The URL to crawl.
@@ -329,8 +359,8 @@ declare class FirecrawlApp {
329
359
  * @param webhook - Optional webhook for the batch scrape.
330
360
  * @returns The response from the crawl operation.
331
361
  */
332
- batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"]): Promise<BatchScrapeStatusResponse | ErrorResponse>;
333
- asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<BatchScrapeResponse | ErrorResponse>;
362
+ batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
363
+ asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeResponse | ErrorResponse>;
334
364
  /**
335
365
  * Initiates a batch scrape job and returns a CrawlWatcher to monitor the job via WebSocket.
336
366
  * @param urls - The URL to scrape.
@@ -338,7 +368,7 @@ declare class FirecrawlApp {
338
368
  * @param idempotencyKey - Optional idempotency key for the request.
339
369
  * @returns A CrawlWatcher instance to monitor the crawl job.
340
370
  */
341
- batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<CrawlWatcher>;
371
+ batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<CrawlWatcher>;
342
372
  /**
343
373
  * Checks the status of a batch scrape job using the Firecrawl API.
344
374
  * @param id - The ID of the batch scrape operation.
@@ -414,8 +444,9 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
414
444
  private ws;
415
445
  data: FirecrawlDocument<undefined>[];
416
446
  status: CrawlStatusResponse["status"];
447
+ id: string;
417
448
  constructor(id: string, app: FirecrawlApp);
418
449
  close(): void;
419
450
  }
420
451
 
421
- export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, FirecrawlApp as default };
452
+ export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
package/dist/index.js CHANGED
@@ -13,16 +13,20 @@ var FirecrawlError = class extends Error {
13
13
  var FirecrawlApp = class {
14
14
  apiKey;
15
15
  apiUrl;
16
+ isCloudService(url) {
17
+ return url.includes("api.firecrawl.dev");
18
+ }
16
19
  /**
17
20
  * Initializes a new instance of the FirecrawlApp class.
18
21
  * @param config - Configuration options for the FirecrawlApp instance.
19
22
  */
20
23
  constructor({ apiKey = null, apiUrl = null }) {
21
- if (typeof apiKey !== "string") {
24
+ const baseUrl = apiUrl || "https://api.firecrawl.dev";
25
+ if (this.isCloudService(baseUrl) && typeof apiKey !== "string") {
22
26
  throw new FirecrawlError("No API key provided", 401);
23
27
  }
24
- this.apiKey = apiKey;
25
- this.apiUrl = apiUrl || "https://api.firecrawl.dev";
28
+ this.apiKey = apiKey || "";
29
+ this.apiUrl = baseUrl;
26
30
  }
27
31
  /**
28
32
  * Scrapes a URL using the Firecrawl API.
@@ -77,13 +81,73 @@ var FirecrawlApp = class {
77
81
  return { success: false, error: "Internal server error." };
78
82
  }
79
83
  /**
80
- * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
84
+ * Searches using the Firecrawl API and optionally scrapes the results.
81
85
  * @param query - The search query string.
82
- * @param params - Additional parameters for the search.
83
- * @returns Throws an error advising to use version 0 of the API.
86
+ * @param params - Optional parameters for the search request.
87
+ * @returns The response from the search operation.
84
88
  */
85
89
  async search(query, params) {
86
- throw new FirecrawlError("Search is not supported in v1, please downgrade Firecrawl to 0.0.36.", 400);
90
+ const headers = {
91
+ "Content-Type": "application/json",
92
+ Authorization: `Bearer ${this.apiKey}`
93
+ };
94
+ let jsonData = {
95
+ query,
96
+ limit: params?.limit ?? 5,
97
+ tbs: params?.tbs,
98
+ filter: params?.filter,
99
+ lang: params?.lang ?? "en",
100
+ country: params?.country ?? "us",
101
+ location: params?.location,
102
+ origin: params?.origin ?? "api",
103
+ timeout: params?.timeout ?? 6e4,
104
+ scrapeOptions: params?.scrapeOptions ?? { formats: [] }
105
+ };
106
+ if (jsonData?.scrapeOptions?.extract?.schema) {
107
+ let schema = jsonData.scrapeOptions.extract.schema;
108
+ try {
109
+ schema = zodToJsonSchema(schema);
110
+ } catch (error) {
111
+ }
112
+ jsonData = {
113
+ ...jsonData,
114
+ scrapeOptions: {
115
+ ...jsonData.scrapeOptions,
116
+ extract: {
117
+ ...jsonData.scrapeOptions.extract,
118
+ schema
119
+ }
120
+ }
121
+ };
122
+ }
123
+ try {
124
+ const response = await this.postRequest(
125
+ this.apiUrl + `/v1/search`,
126
+ jsonData,
127
+ headers
128
+ );
129
+ if (response.status === 200) {
130
+ const responseData = response.data;
131
+ if (responseData.success) {
132
+ return {
133
+ success: true,
134
+ data: responseData.data,
135
+ warning: responseData.warning
136
+ };
137
+ } else {
138
+ throw new FirecrawlError(`Failed to search. Error: ${responseData.error}`, response.status);
139
+ }
140
+ } else {
141
+ this.handleError(response, "search");
142
+ }
143
+ } catch (error) {
144
+ if (error.response?.data?.error) {
145
+ throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status);
146
+ } else {
147
+ throw new FirecrawlError(error.message, 500);
148
+ }
149
+ }
150
+ return { success: false, error: "Internal server error.", data: [] };
87
151
  }
88
152
  /**
89
153
  * Initiates a crawl job for a URL using the Firecrawl API.
@@ -259,9 +323,9 @@ var FirecrawlApp = class {
259
323
  * @param webhook - Optional webhook for the batch scrape.
260
324
  * @returns The response from the crawl operation.
261
325
  */
262
- async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook) {
326
+ async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
263
327
  const headers = this.prepareHeaders(idempotencyKey);
264
- let jsonData = { urls, ...params };
328
+ let jsonData = { urls, webhook, ignoreInvalidURLs, ...params };
265
329
  if (jsonData?.extract?.schema) {
266
330
  let schema = jsonData.extract.schema;
267
331
  try {
@@ -297,9 +361,9 @@ var FirecrawlApp = class {
297
361
  }
298
362
  return { success: false, error: "Internal server error." };
299
363
  }
300
- async asyncBatchScrapeUrls(urls, params, idempotencyKey) {
364
+ async asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
301
365
  const headers = this.prepareHeaders(idempotencyKey);
302
- let jsonData = { urls, ...params ?? {} };
366
+ let jsonData = { urls, webhook, ignoreInvalidURLs, ...params ?? {} };
303
367
  try {
304
368
  const response = await this.postRequest(
305
369
  this.apiUrl + `/v1/batch/scrape`,
@@ -327,8 +391,8 @@ var FirecrawlApp = class {
327
391
  * @param idempotencyKey - Optional idempotency key for the request.
328
392
  * @returns A CrawlWatcher instance to monitor the crawl job.
329
393
  */
330
- async batchScrapeUrlsAndWatch(urls, params, idempotencyKey) {
331
- const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey);
394
+ async batchScrapeUrlsAndWatch(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
395
+ const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs);
332
396
  if (crawl.success && crawl.id) {
333
397
  const id = crawl.id;
334
398
  return new CrawlWatcher(id, this);
@@ -392,9 +456,6 @@ var FirecrawlApp = class {
392
456
  */
393
457
  async extract(urls, params) {
394
458
  const headers = this.prepareHeaders();
395
- if (!params?.prompt) {
396
- throw new FirecrawlError("Prompt is required", 400);
397
- }
398
459
  let jsonData = { urls, ...params };
399
460
  let jsonSchema;
400
461
  try {
@@ -557,8 +618,10 @@ var CrawlWatcher = class extends TypedEventTarget {
557
618
  ws;
558
619
  data;
559
620
  status;
621
+ id;
560
622
  constructor(id, app) {
561
623
  super();
624
+ this.id = id;
562
625
  this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
563
626
  this.status = "scraping";
564
627
  this.data = [];
@@ -568,7 +631,8 @@ var CrawlWatcher = class extends TypedEventTarget {
568
631
  this.dispatchTypedEvent("done", new CustomEvent("done", {
569
632
  detail: {
570
633
  status: this.status,
571
- data: this.data
634
+ data: this.data,
635
+ id: this.id
572
636
  }
573
637
  }));
574
638
  } else if (msg.type === "error") {
@@ -577,7 +641,8 @@ var CrawlWatcher = class extends TypedEventTarget {
577
641
  detail: {
578
642
  status: this.status,
579
643
  data: this.data,
580
- error: msg.error
644
+ error: msg.error,
645
+ id: this.id
581
646
  }
582
647
  }));
583
648
  } else if (msg.type === "catchup") {
@@ -585,12 +650,18 @@ var CrawlWatcher = class extends TypedEventTarget {
585
650
  this.data.push(...msg.data.data ?? []);
586
651
  for (const doc of this.data) {
587
652
  this.dispatchTypedEvent("document", new CustomEvent("document", {
588
- detail: doc
653
+ detail: {
654
+ ...doc,
655
+ id: this.id
656
+ }
589
657
  }));
590
658
  }
591
659
  } else if (msg.type === "document") {
592
660
  this.dispatchTypedEvent("document", new CustomEvent("document", {
593
- detail: msg.data
661
+ detail: {
662
+ ...msg.data,
663
+ id: this.id
664
+ }
594
665
  }));
595
666
  }
596
667
  };
@@ -599,12 +670,20 @@ var CrawlWatcher = class extends TypedEventTarget {
599
670
  this.ws.close();
600
671
  return;
601
672
  }
602
- const msg = JSON.parse(ev.data);
603
- messageHandler(msg);
673
+ try {
674
+ const msg = JSON.parse(ev.data);
675
+ messageHandler(msg);
676
+ } catch (error) {
677
+ console.error("Error on message", error);
678
+ }
604
679
  }).bind(this);
605
680
  this.ws.onclose = ((ev) => {
606
- const msg = JSON.parse(ev.reason);
607
- messageHandler(msg);
681
+ try {
682
+ const msg = JSON.parse(ev.reason);
683
+ messageHandler(msg);
684
+ } catch (error) {
685
+ console.error("Error on close", error);
686
+ }
608
687
  }).bind(this);
609
688
  this.ws.onerror = ((_) => {
610
689
  this.status = "failed";
@@ -612,7 +691,8 @@ var CrawlWatcher = class extends TypedEventTarget {
612
691
  detail: {
613
692
  status: this.status,
614
693
  data: this.data,
615
- error: "WebSocket error"
694
+ error: "WebSocket error",
695
+ id: this.id
616
696
  }
617
697
  }));
618
698
  }).bind(this);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "firecrawl",
3
- "version": "1.10.1",
3
+ "version": "1.11.1",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -1,9 +1,9 @@
1
- import { describe, test, expect, jest } from '@jest/globals';
2
- import axios from 'axios';
3
- import FirecrawlApp from '../index';
1
+ import { describe, expect, jest, test } from '@jest/globals';
4
2
 
5
- import { readFile } from 'fs/promises';
3
+ import FirecrawlApp from '../index';
4
+ import axios from 'axios';
6
5
  import { join } from 'path';
6
+ import { readFile } from 'fs/promises';
7
7
 
8
8
  // Mock jest and set the type
9
9
  jest.mock('axios');
@@ -14,13 +14,22 @@ async function loadFixture(name: string): Promise<string> {
14
14
  return await readFile(join(__dirname, 'fixtures', `${name}.json`), 'utf-8')
15
15
  }
16
16
 
17
+ const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev";
18
+
17
19
  describe('the firecrawl JS SDK', () => {
18
20
 
19
- test('Should require an API key to instantiate FirecrawlApp', async () => {
20
- const fn = () => {
21
- new FirecrawlApp({ apiKey: undefined });
22
- };
23
- expect(fn).toThrow('No API key provided');
21
+ test('Should require an API key only for cloud service', async () => {
22
+ if (API_URL.includes('api.firecrawl.dev')) {
23
+ // Should throw for cloud service
24
+ expect(() => {
25
+ new FirecrawlApp({ apiKey: undefined, apiUrl: API_URL });
26
+ }).toThrow('No API key provided');
27
+ } else {
28
+ // Should not throw for self-hosted
29
+ expect(() => {
30
+ new FirecrawlApp({ apiKey: undefined, apiUrl: API_URL });
31
+ }).not.toThrow();
32
+ }
24
33
  });
25
34
 
26
35
  test('Should return scraped data from a /scrape API call', async () => {
@@ -9,15 +9,28 @@ const TEST_API_KEY = process.env.TEST_API_KEY;
9
9
  const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev";
10
10
 
11
11
  describe('FirecrawlApp E2E Tests', () => {
12
- test.concurrent('should throw error for no API key', async () => {
13
- expect(() => {
14
- new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
15
- }).toThrow("No API key provided");
12
+ test.concurrent('should throw error for no API key only for cloud service', async () => {
13
+ if (API_URL.includes('api.firecrawl.dev')) {
14
+ // Should throw for cloud service
15
+ expect(() => {
16
+ new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
17
+ }).toThrow("No API key provided");
18
+ } else {
19
+ // Should not throw for self-hosted
20
+ expect(() => {
21
+ new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
22
+ }).not.toThrow();
23
+ }
16
24
  });
17
25
 
18
26
  test.concurrent('should throw error for invalid API key on scrape', async () => {
19
- const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
20
- await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
27
+ if (API_URL.includes('api.firecrawl.dev')) {
28
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
29
+ await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Unexpected error occurred while trying to scrape URL. Status code: 404");
30
+ } else {
31
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
32
+ await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
33
+ }
21
34
  });
22
35
 
23
36
  test.concurrent('should throw error for blocklisted URL on scrape', async () => {
@@ -155,14 +168,13 @@ describe('FirecrawlApp E2E Tests', () => {
155
168
  }, 30000); // 30 seconds timeout
156
169
 
157
170
  test.concurrent('should throw error for invalid API key on crawl', async () => {
158
- const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
159
- await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
160
- });
161
-
162
- test.concurrent('should throw error for blocklisted URL on crawl', async () => {
163
- const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
164
- const blocklistedUrl = "https://twitter.com/fake-test";
165
- await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions.");
171
+ if (API_URL.includes('api.firecrawl.dev')) {
172
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
173
+ await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 404");
174
+ } else {
175
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
176
+ await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
177
+ }
166
178
  });
167
179
 
168
180
  test.concurrent('should return successful response for crawl and wait for completion', async () => {
@@ -337,8 +349,13 @@ describe('FirecrawlApp E2E Tests', () => {
337
349
  }, 60000); // 60 seconds timeout
338
350
 
339
351
  test.concurrent('should throw error for invalid API key on map', async () => {
340
- const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
341
- await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
352
+ if (API_URL.includes('api.firecrawl.dev')) {
353
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
354
+ await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 404");
355
+ } else {
356
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
357
+ await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
358
+ }
342
359
  });
343
360
 
344
361
  test.concurrent('should throw error for blocklisted URL on map', async () => {
@@ -355,8 +372,7 @@ describe('FirecrawlApp E2E Tests', () => {
355
372
  }, 30000); // 30 seconds timeout
356
373
 
357
374
  test.concurrent('should return successful response for valid map', async () => {
358
- const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
359
- const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse;
375
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse;
360
376
  expect(response).not.toBeNull();
361
377
 
362
378
  expect(response.links?.length).toBeGreaterThan(0);
@@ -365,8 +381,45 @@ describe('FirecrawlApp E2E Tests', () => {
365
381
  expect(filteredLinks?.length).toBeGreaterThan(0);
366
382
  }, 30000); // 30 seconds timeout
367
383
 
368
- test('should throw NotImplementedError for search on v1', async () => {
384
+
385
+
386
+ test('should search with string query', async () => {
369
387
  const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: TEST_API_KEY });
370
- await expect(app.search("test query")).rejects.toThrow("Search is not supported in v1");
388
+ const response = await app.search("firecrawl");
389
+ expect(response.success).toBe(true);
390
+ console.log(response.data);
391
+ expect(response.data?.length).toBeGreaterThan(0);
392
+ expect(response.data?.[0]?.markdown).toBeDefined();
393
+ expect(response.data?.[0]?.metadata).toBeDefined();
394
+ expect(response.data?.[0]?.metadata?.title).toBeDefined();
395
+ expect(response.data?.[0]?.metadata?.description).toBeDefined();
396
+ });
397
+
398
+ test('should search with params object', async () => {
399
+ const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: TEST_API_KEY });
400
+ const response = await app.search("firecrawl", {
401
+ limit: 3,
402
+ lang: 'en',
403
+ country: 'us',
404
+ scrapeOptions: {
405
+ formats: ['markdown', 'html', 'links'],
406
+ onlyMainContent: true
407
+ }
408
+ });
409
+ expect(response.success).toBe(true);
410
+ expect(response.data.length).toBeLessThanOrEqual(3);
411
+ for (const doc of response.data) {
412
+ expect(doc.markdown).toBeDefined();
413
+ expect(doc.html).toBeDefined();
414
+ expect(doc.links).toBeDefined();
415
+ expect(doc.metadata).toBeDefined();
416
+ expect(doc.metadata?.title).toBeDefined();
417
+ expect(doc.metadata?.description).toBeDefined();
418
+ }
419
+ });
420
+
421
+ test('should handle invalid API key for search', async () => {
422
+ const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: "invalid_api_key" });
423
+ await expect(app.search("test query")).rejects.toThrow("Request failed with status code 404");
371
424
  });
372
425
  });
package/src/index.ts CHANGED
@@ -68,6 +68,9 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
68
68
  screenshot?: string;
69
69
  metadata?: FirecrawlDocumentMetadata;
70
70
  actions: ActionsSchema;
71
+ // v1 search only
72
+ title?: string;
73
+ description?: string;
71
74
  }
72
75
 
73
76
  /**
@@ -183,6 +186,7 @@ export interface BatchScrapeResponse {
183
186
  url?: string;
184
187
  success: true;
185
188
  error?: string;
189
+ invalidURLs?: string[];
186
190
  }
187
191
 
188
192
  /**
@@ -242,10 +246,11 @@ export interface MapResponse {
242
246
  * Defines options for extracting information from URLs.
243
247
  */
244
248
  export interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
245
- prompt: string;
249
+ prompt?: string;
246
250
  schema?: LLMSchema;
247
251
  systemPrompt?: string;
248
252
  allowExternalLinks?: boolean;
253
+ includeSubdomains?: boolean;
249
254
  }
250
255
 
251
256
  /**
@@ -280,6 +285,33 @@ export class FirecrawlError extends Error {
280
285
  }
281
286
  }
282
287
 
288
+ /**
289
+ * Parameters for search operations.
290
+ * Defines options for searching and scraping search results.
291
+ */
292
+ export interface SearchParams {
293
+ limit?: number;
294
+ tbs?: string;
295
+ filter?: string;
296
+ lang?: string;
297
+ country?: string;
298
+ location?: string;
299
+ origin?: string;
300
+ timeout?: number;
301
+ scrapeOptions?: ScrapeParams;
302
+ }
303
+
304
+ /**
305
+ * Response interface for search operations.
306
+ * Defines the structure of the response received after a search operation.
307
+ */
308
+ export interface SearchResponse {
309
+ success: boolean;
310
+ data: FirecrawlDocument<undefined>[];
311
+ warning?: string;
312
+ error?: string;
313
+ }
314
+
283
315
  /**
284
316
  * Main class for interacting with the Firecrawl API.
285
317
  * Provides methods for scraping, searching, crawling, and mapping web content.
@@ -288,17 +320,23 @@ export default class FirecrawlApp {
288
320
  public apiKey: string;
289
321
  public apiUrl: string;
290
322
 
323
+ private isCloudService(url: string): boolean {
324
+ return url.includes('api.firecrawl.dev');
325
+ }
326
+
291
327
  /**
292
328
  * Initializes a new instance of the FirecrawlApp class.
293
329
  * @param config - Configuration options for the FirecrawlApp instance.
294
330
  */
295
331
  constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
296
- if (typeof apiKey !== "string") {
332
+ const baseUrl = apiUrl || "https://api.firecrawl.dev";
333
+
334
+ if (this.isCloudService(baseUrl) && typeof apiKey !== "string") {
297
335
  throw new FirecrawlError("No API key provided", 401);
298
336
  }
299
337
 
300
- this.apiKey = apiKey;
301
- this.apiUrl = apiUrl || "https://api.firecrawl.dev";
338
+ this.apiKey = apiKey || '';
339
+ this.apiUrl = baseUrl;
302
340
  }
303
341
 
304
342
  /**
@@ -361,16 +399,80 @@ export default class FirecrawlApp {
361
399
  }
362
400
 
363
401
  /**
364
- * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
402
+ * Searches using the Firecrawl API and optionally scrapes the results.
365
403
  * @param query - The search query string.
366
- * @param params - Additional parameters for the search.
367
- * @returns Throws an error advising to use version 0 of the API.
404
+ * @param params - Optional parameters for the search request.
405
+ * @returns The response from the search operation.
368
406
  */
369
- async search(
370
- query: string,
371
- params?: any
372
- ): Promise<any> {
373
- throw new FirecrawlError("Search is not supported in v1, please downgrade Firecrawl to 0.0.36.", 400);
407
+ async search(query: string, params?: SearchParams | Record<string, any>): Promise<SearchResponse> {
408
+ const headers: AxiosRequestHeaders = {
409
+ "Content-Type": "application/json",
410
+ Authorization: `Bearer ${this.apiKey}`,
411
+ } as AxiosRequestHeaders;
412
+
413
+ let jsonData: any = {
414
+ query,
415
+ limit: params?.limit ?? 5,
416
+ tbs: params?.tbs,
417
+ filter: params?.filter,
418
+ lang: params?.lang ?? "en",
419
+ country: params?.country ?? "us",
420
+ location: params?.location,
421
+ origin: params?.origin ?? "api",
422
+ timeout: params?.timeout ?? 60000,
423
+ scrapeOptions: params?.scrapeOptions ?? { formats: [] },
424
+ };
425
+
426
+ if (jsonData?.scrapeOptions?.extract?.schema) {
427
+ let schema = jsonData.scrapeOptions.extract.schema;
428
+
429
+ // Try parsing the schema as a Zod schema
430
+ try {
431
+ schema = zodToJsonSchema(schema);
432
+ } catch (error) {
433
+
434
+ }
435
+ jsonData = {
436
+ ...jsonData,
437
+ scrapeOptions: {
438
+ ...jsonData.scrapeOptions,
439
+ extract: {
440
+ ...jsonData.scrapeOptions.extract,
441
+ schema: schema,
442
+ },
443
+ },
444
+ };
445
+ }
446
+
447
+ try {
448
+ const response: AxiosResponse = await this.postRequest(
449
+ this.apiUrl + `/v1/search`,
450
+ jsonData,
451
+ headers
452
+ );
453
+
454
+ if (response.status === 200) {
455
+ const responseData = response.data;
456
+ if (responseData.success) {
457
+ return {
458
+ success: true,
459
+ data: responseData.data as FirecrawlDocument<any>[],
460
+ warning: responseData.warning,
461
+ };
462
+ } else {
463
+ throw new FirecrawlError(`Failed to search. Error: ${responseData.error}`, response.status);
464
+ }
465
+ } else {
466
+ this.handleError(response, "search");
467
+ }
468
+ } catch (error: any) {
469
+ if (error.response?.data?.error) {
470
+ throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
471
+ } else {
472
+ throw new FirecrawlError(error.message, 500);
473
+ }
474
+ }
475
+ return { success: false, error: "Internal server error.", data: [] };
374
476
  }
375
477
 
376
478
  /**
@@ -576,9 +678,10 @@ export default class FirecrawlApp {
576
678
  pollInterval: number = 2,
577
679
  idempotencyKey?: string,
578
680
  webhook?: CrawlParams["webhook"],
681
+ ignoreInvalidURLs?: boolean,
579
682
  ): Promise<BatchScrapeStatusResponse | ErrorResponse> {
580
683
  const headers = this.prepareHeaders(idempotencyKey);
581
- let jsonData: any = { urls, ...params };
684
+ let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params };
582
685
  if (jsonData?.extract?.schema) {
583
686
  let schema = jsonData.extract.schema;
584
687
 
@@ -621,10 +724,12 @@ export default class FirecrawlApp {
621
724
  async asyncBatchScrapeUrls(
622
725
  urls: string[],
623
726
  params?: ScrapeParams,
624
- idempotencyKey?: string
727
+ idempotencyKey?: string,
728
+ webhook?: CrawlParams["webhook"],
729
+ ignoreInvalidURLs?: boolean,
625
730
  ): Promise<BatchScrapeResponse | ErrorResponse> {
626
731
  const headers = this.prepareHeaders(idempotencyKey);
627
- let jsonData: any = { urls, ...(params ?? {}) };
732
+ let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...(params ?? {}) };
628
733
  try {
629
734
  const response: AxiosResponse = await this.postRequest(
630
735
  this.apiUrl + `/v1/batch/scrape`,
@@ -657,8 +762,10 @@ export default class FirecrawlApp {
657
762
  urls: string[],
658
763
  params?: ScrapeParams,
659
764
  idempotencyKey?: string,
765
+ webhook?: CrawlParams["webhook"],
766
+ ignoreInvalidURLs?: boolean,
660
767
  ) {
661
- const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey);
768
+ const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs);
662
769
 
663
770
  if (crawl.success && crawl.id) {
664
771
  const id = crawl.id;
@@ -728,10 +835,6 @@ export default class FirecrawlApp {
728
835
  async extract<T extends zt.ZodSchema = any>(urls: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse> {
729
836
  const headers = this.prepareHeaders();
730
837
 
731
- if (!params?.prompt) {
732
- throw new FirecrawlError("Prompt is required", 400);
733
- }
734
-
735
838
  let jsonData: { urls: string[] } & ExtractParams<T> = { urls, ...params };
736
839
  let jsonSchema: any;
737
840
  try {
@@ -932,9 +1035,11 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
932
1035
  private ws: WebSocket;
933
1036
  public data: FirecrawlDocument<undefined>[];
934
1037
  public status: CrawlStatusResponse["status"];
1038
+ public id: string;
935
1039
 
936
1040
  constructor(id: string, app: FirecrawlApp) {
937
1041
  super();
1042
+ this.id = id;
938
1043
  this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
939
1044
  this.status = "scraping";
940
1045
  this.data = [];
@@ -965,6 +1070,7 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
965
1070
  detail: {
966
1071
  status: this.status,
967
1072
  data: this.data,
1073
+ id: this.id,
968
1074
  },
969
1075
  }));
970
1076
  } else if (msg.type === "error") {
@@ -974,6 +1080,7 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
974
1080
  status: this.status,
975
1081
  data: this.data,
976
1082
  error: msg.error,
1083
+ id: this.id,
977
1084
  },
978
1085
  }));
979
1086
  } else if (msg.type === "catchup") {
@@ -981,12 +1088,18 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
981
1088
  this.data.push(...(msg.data.data ?? []));
982
1089
  for (const doc of this.data) {
983
1090
  this.dispatchTypedEvent("document", new CustomEvent("document", {
984
- detail: doc,
1091
+ detail: {
1092
+ ...doc,
1093
+ id: this.id,
1094
+ },
985
1095
  }));
986
1096
  }
987
1097
  } else if (msg.type === "document") {
988
1098
  this.dispatchTypedEvent("document", new CustomEvent("document", {
989
- detail: msg.data,
1099
+ detail: {
1100
+ ...msg.data,
1101
+ id: this.id,
1102
+ },
990
1103
  }));
991
1104
  }
992
1105
  }
@@ -996,14 +1109,21 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
996
1109
  this.ws.close();
997
1110
  return;
998
1111
  }
999
-
1000
- const msg = JSON.parse(ev.data) as Message;
1001
- messageHandler(msg);
1112
+ try {
1113
+ const msg = JSON.parse(ev.data) as Message;
1114
+ messageHandler(msg);
1115
+ } catch (error) {
1116
+ console.error("Error on message", error);
1117
+ }
1002
1118
  }).bind(this);
1003
1119
 
1004
1120
  this.ws.onclose = ((ev: CloseEvent) => {
1005
- const msg = JSON.parse(ev.reason) as Message;
1006
- messageHandler(msg);
1121
+ try {
1122
+ const msg = JSON.parse(ev.reason) as Message;
1123
+ messageHandler(msg);
1124
+ } catch (error) {
1125
+ console.error("Error on close", error);
1126
+ }
1007
1127
  }).bind(this);
1008
1128
 
1009
1129
  this.ws.onerror = ((_: Event) => {
@@ -1013,6 +1133,7 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
1013
1133
  status: this.status,
1014
1134
  data: this.data,
1015
1135
  error: "WebSocket error",
1136
+ id: this.id,
1016
1137
  },
1017
1138
  }));
1018
1139
  }).bind(this);