firecrawl 1.10.1 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -49,16 +49,20 @@ var FirecrawlError = class extends Error {
49
49
  var FirecrawlApp = class {
50
50
  apiKey;
51
51
  apiUrl;
52
+ isCloudService(url) {
53
+ return url.includes("api.firecrawl.dev");
54
+ }
52
55
  /**
53
56
  * Initializes a new instance of the FirecrawlApp class.
54
57
  * @param config - Configuration options for the FirecrawlApp instance.
55
58
  */
56
59
  constructor({ apiKey = null, apiUrl = null }) {
57
- if (typeof apiKey !== "string") {
60
+ const baseUrl = apiUrl || "https://api.firecrawl.dev";
61
+ if (this.isCloudService(baseUrl) && typeof apiKey !== "string") {
58
62
  throw new FirecrawlError("No API key provided", 401);
59
63
  }
60
- this.apiKey = apiKey;
61
- this.apiUrl = apiUrl || "https://api.firecrawl.dev";
64
+ this.apiKey = apiKey || "";
65
+ this.apiUrl = baseUrl;
62
66
  }
63
67
  /**
64
68
  * Scrapes a URL using the Firecrawl API.
@@ -113,13 +117,73 @@ var FirecrawlApp = class {
113
117
  return { success: false, error: "Internal server error." };
114
118
  }
115
119
  /**
116
- * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
120
+ * Searches using the Firecrawl API and optionally scrapes the results.
117
121
  * @param query - The search query string.
118
- * @param params - Additional parameters for the search.
119
- * @returns Throws an error advising to use version 0 of the API.
122
+ * @param params - Optional parameters for the search request.
123
+ * @returns The response from the search operation.
120
124
  */
121
125
  async search(query, params) {
122
- throw new FirecrawlError("Search is not supported in v1, please downgrade Firecrawl to 0.0.36.", 400);
126
+ const headers = {
127
+ "Content-Type": "application/json",
128
+ Authorization: `Bearer ${this.apiKey}`
129
+ };
130
+ let jsonData = {
131
+ query,
132
+ limit: params?.limit ?? 5,
133
+ tbs: params?.tbs,
134
+ filter: params?.filter,
135
+ lang: params?.lang ?? "en",
136
+ country: params?.country ?? "us",
137
+ location: params?.location,
138
+ origin: params?.origin ?? "api",
139
+ timeout: params?.timeout ?? 6e4,
140
+ scrapeOptions: params?.scrapeOptions ?? { formats: [] }
141
+ };
142
+ if (jsonData?.scrapeOptions?.extract?.schema) {
143
+ let schema = jsonData.scrapeOptions.extract.schema;
144
+ try {
145
+ schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
146
+ } catch (error) {
147
+ }
148
+ jsonData = {
149
+ ...jsonData,
150
+ scrapeOptions: {
151
+ ...jsonData.scrapeOptions,
152
+ extract: {
153
+ ...jsonData.scrapeOptions.extract,
154
+ schema
155
+ }
156
+ }
157
+ };
158
+ }
159
+ try {
160
+ const response = await this.postRequest(
161
+ this.apiUrl + `/v1/search`,
162
+ jsonData,
163
+ headers
164
+ );
165
+ if (response.status === 200) {
166
+ const responseData = response.data;
167
+ if (responseData.success) {
168
+ return {
169
+ success: true,
170
+ data: responseData.data,
171
+ warning: responseData.warning
172
+ };
173
+ } else {
174
+ throw new FirecrawlError(`Failed to search. Error: ${responseData.error}`, response.status);
175
+ }
176
+ } else {
177
+ this.handleError(response, "search");
178
+ }
179
+ } catch (error) {
180
+ if (error.response?.data?.error) {
181
+ throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status);
182
+ } else {
183
+ throw new FirecrawlError(error.message, 500);
184
+ }
185
+ }
186
+ return { success: false, error: "Internal server error.", data: [] };
123
187
  }
124
188
  /**
125
189
  * Initiates a crawl job for a URL using the Firecrawl API.
@@ -295,9 +359,9 @@ var FirecrawlApp = class {
295
359
  * @param webhook - Optional webhook for the batch scrape.
296
360
  * @returns The response from the crawl operation.
297
361
  */
298
- async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook) {
362
+ async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
299
363
  const headers = this.prepareHeaders(idempotencyKey);
300
- let jsonData = { urls, ...params };
364
+ let jsonData = { urls, webhook, ignoreInvalidURLs, ...params };
301
365
  if (jsonData?.extract?.schema) {
302
366
  let schema = jsonData.extract.schema;
303
367
  try {
@@ -333,9 +397,9 @@ var FirecrawlApp = class {
333
397
  }
334
398
  return { success: false, error: "Internal server error." };
335
399
  }
336
- async asyncBatchScrapeUrls(urls, params, idempotencyKey) {
400
+ async asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
337
401
  const headers = this.prepareHeaders(idempotencyKey);
338
- let jsonData = { urls, ...params ?? {} };
402
+ let jsonData = { urls, webhook, ignoreInvalidURLs, ...params ?? {} };
339
403
  try {
340
404
  const response = await this.postRequest(
341
405
  this.apiUrl + `/v1/batch/scrape`,
@@ -363,8 +427,8 @@ var FirecrawlApp = class {
363
427
  * @param idempotencyKey - Optional idempotency key for the request.
364
428
  * @returns A CrawlWatcher instance to monitor the crawl job.
365
429
  */
366
- async batchScrapeUrlsAndWatch(urls, params, idempotencyKey) {
367
- const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey);
430
+ async batchScrapeUrlsAndWatch(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
431
+ const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs);
368
432
  if (crawl.success && crawl.id) {
369
433
  const id = crawl.id;
370
434
  return new CrawlWatcher(id, this);
@@ -593,8 +657,10 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
593
657
  ws;
594
658
  data;
595
659
  status;
660
+ id;
596
661
  constructor(id, app) {
597
662
  super();
663
+ this.id = id;
598
664
  this.ws = new import_isows.WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
599
665
  this.status = "scraping";
600
666
  this.data = [];
@@ -604,7 +670,8 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
604
670
  this.dispatchTypedEvent("done", new CustomEvent("done", {
605
671
  detail: {
606
672
  status: this.status,
607
- data: this.data
673
+ data: this.data,
674
+ id: this.id
608
675
  }
609
676
  }));
610
677
  } else if (msg.type === "error") {
@@ -613,7 +680,8 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
613
680
  detail: {
614
681
  status: this.status,
615
682
  data: this.data,
616
- error: msg.error
683
+ error: msg.error,
684
+ id: this.id
617
685
  }
618
686
  }));
619
687
  } else if (msg.type === "catchup") {
@@ -621,12 +689,18 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
621
689
  this.data.push(...msg.data.data ?? []);
622
690
  for (const doc of this.data) {
623
691
  this.dispatchTypedEvent("document", new CustomEvent("document", {
624
- detail: doc
692
+ detail: {
693
+ ...doc,
694
+ id: this.id
695
+ }
625
696
  }));
626
697
  }
627
698
  } else if (msg.type === "document") {
628
699
  this.dispatchTypedEvent("document", new CustomEvent("document", {
629
- detail: msg.data
700
+ detail: {
701
+ ...msg.data,
702
+ id: this.id
703
+ }
630
704
  }));
631
705
  }
632
706
  };
@@ -635,12 +709,20 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
635
709
  this.ws.close();
636
710
  return;
637
711
  }
638
- const msg = JSON.parse(ev.data);
639
- messageHandler(msg);
712
+ try {
713
+ const msg = JSON.parse(ev.data);
714
+ messageHandler(msg);
715
+ } catch (error) {
716
+ console.error("Error on message", error);
717
+ }
640
718
  }).bind(this);
641
719
  this.ws.onclose = ((ev) => {
642
- const msg = JSON.parse(ev.reason);
643
- messageHandler(msg);
720
+ try {
721
+ const msg = JSON.parse(ev.reason);
722
+ messageHandler(msg);
723
+ } catch (error) {
724
+ console.error("Error on close", error);
725
+ }
644
726
  }).bind(this);
645
727
  this.ws.onerror = ((_) => {
646
728
  this.status = "failed";
@@ -648,7 +730,8 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
648
730
  detail: {
649
731
  status: this.status,
650
732
  data: this.data,
651
- error: "WebSocket error"
733
+ error: "WebSocket error",
734
+ id: this.id
652
735
  }
653
736
  }));
654
737
  }).bind(this);
package/dist/index.d.cts CHANGED
@@ -64,6 +64,8 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
64
64
  screenshot?: string;
65
65
  metadata?: FirecrawlDocumentMetadata;
66
66
  actions: ActionsSchema;
67
+ title?: string;
68
+ description?: string;
67
69
  }
68
70
  /**
69
71
  * Parameters for scraping operations.
@@ -171,6 +173,7 @@ interface BatchScrapeResponse {
171
173
  url?: string;
172
174
  success: true;
173
175
  error?: string;
176
+ invalidURLs?: string[];
174
177
  }
175
178
  /**
176
179
  * Response interface for job status checks.
@@ -225,10 +228,11 @@ interface MapResponse {
225
228
  * Defines options for extracting information from URLs.
226
229
  */
227
230
  interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
228
- prompt: string;
231
+ prompt?: string;
229
232
  schema?: LLMSchema;
230
233
  systemPrompt?: string;
231
234
  allowExternalLinks?: boolean;
235
+ includeSubdomains?: boolean;
232
236
  }
233
237
  /**
234
238
  * Response interface for extracting information from URLs.
@@ -256,6 +260,31 @@ declare class FirecrawlError extends Error {
256
260
  statusCode: number;
257
261
  constructor(message: string, statusCode: number);
258
262
  }
263
+ /**
264
+ * Parameters for search operations.
265
+ * Defines options for searching and scraping search results.
266
+ */
267
+ interface SearchParams {
268
+ limit?: number;
269
+ tbs?: string;
270
+ filter?: string;
271
+ lang?: string;
272
+ country?: string;
273
+ location?: string;
274
+ origin?: string;
275
+ timeout?: number;
276
+ scrapeOptions?: ScrapeParams;
277
+ }
278
+ /**
279
+ * Response interface for search operations.
280
+ * Defines the structure of the response received after a search operation.
281
+ */
282
+ interface SearchResponse {
283
+ success: boolean;
284
+ data: FirecrawlDocument<undefined>[];
285
+ warning?: string;
286
+ error?: string;
287
+ }
259
288
  /**
260
289
  * Main class for interacting with the Firecrawl API.
261
290
  * Provides methods for scraping, searching, crawling, and mapping web content.
@@ -263,6 +292,7 @@ declare class FirecrawlError extends Error {
263
292
  declare class FirecrawlApp {
264
293
  apiKey: string;
265
294
  apiUrl: string;
295
+ private isCloudService;
266
296
  /**
267
297
  * Initializes a new instance of the FirecrawlApp class.
268
298
  * @param config - Configuration options for the FirecrawlApp instance.
@@ -276,12 +306,12 @@ declare class FirecrawlApp {
276
306
  */
277
307
  scrapeUrl<T extends zt.ZodSchema, ActionsSchema extends (Action[] | undefined) = undefined>(url: string, params?: ScrapeParams<T, ActionsSchema>): Promise<ScrapeResponse<zt.infer<T>, ActionsSchema extends Action[] ? ActionsResult : never> | ErrorResponse>;
278
308
  /**
279
- * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
309
+ * Searches using the Firecrawl API and optionally scrapes the results.
280
310
  * @param query - The search query string.
281
- * @param params - Additional parameters for the search.
282
- * @returns Throws an error advising to use version 0 of the API.
311
+ * @param params - Optional parameters for the search request.
312
+ * @returns The response from the search operation.
283
313
  */
284
- search(query: string, params?: any): Promise<any>;
314
+ search(query: string, params?: SearchParams | Record<string, any>): Promise<SearchResponse>;
285
315
  /**
286
316
  * Initiates a crawl job for a URL using the Firecrawl API.
287
317
  * @param url - The URL to crawl.
@@ -329,8 +359,8 @@ declare class FirecrawlApp {
329
359
  * @param webhook - Optional webhook for the batch scrape.
330
360
  * @returns The response from the crawl operation.
331
361
  */
332
- batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"]): Promise<BatchScrapeStatusResponse | ErrorResponse>;
333
- asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<BatchScrapeResponse | ErrorResponse>;
362
+ batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
363
+ asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeResponse | ErrorResponse>;
334
364
  /**
335
365
  * Initiates a batch scrape job and returns a CrawlWatcher to monitor the job via WebSocket.
336
366
  * @param urls - The URL to scrape.
@@ -338,7 +368,7 @@ declare class FirecrawlApp {
338
368
  * @param idempotencyKey - Optional idempotency key for the request.
339
369
  * @returns A CrawlWatcher instance to monitor the crawl job.
340
370
  */
341
- batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<CrawlWatcher>;
371
+ batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<CrawlWatcher>;
342
372
  /**
343
373
  * Checks the status of a batch scrape job using the Firecrawl API.
344
374
  * @param id - The ID of the batch scrape operation.
@@ -414,8 +444,9 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
414
444
  private ws;
415
445
  data: FirecrawlDocument<undefined>[];
416
446
  status: CrawlStatusResponse["status"];
447
+ id: string;
417
448
  constructor(id: string, app: FirecrawlApp);
418
449
  close(): void;
419
450
  }
420
451
 
421
- export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, FirecrawlApp as default };
452
+ export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
package/dist/index.d.ts CHANGED
@@ -64,6 +64,8 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
64
64
  screenshot?: string;
65
65
  metadata?: FirecrawlDocumentMetadata;
66
66
  actions: ActionsSchema;
67
+ title?: string;
68
+ description?: string;
67
69
  }
68
70
  /**
69
71
  * Parameters for scraping operations.
@@ -171,6 +173,7 @@ interface BatchScrapeResponse {
171
173
  url?: string;
172
174
  success: true;
173
175
  error?: string;
176
+ invalidURLs?: string[];
174
177
  }
175
178
  /**
176
179
  * Response interface for job status checks.
@@ -225,10 +228,11 @@ interface MapResponse {
225
228
  * Defines options for extracting information from URLs.
226
229
  */
227
230
  interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
228
- prompt: string;
231
+ prompt?: string;
229
232
  schema?: LLMSchema;
230
233
  systemPrompt?: string;
231
234
  allowExternalLinks?: boolean;
235
+ includeSubdomains?: boolean;
232
236
  }
233
237
  /**
234
238
  * Response interface for extracting information from URLs.
@@ -256,6 +260,31 @@ declare class FirecrawlError extends Error {
256
260
  statusCode: number;
257
261
  constructor(message: string, statusCode: number);
258
262
  }
263
+ /**
264
+ * Parameters for search operations.
265
+ * Defines options for searching and scraping search results.
266
+ */
267
+ interface SearchParams {
268
+ limit?: number;
269
+ tbs?: string;
270
+ filter?: string;
271
+ lang?: string;
272
+ country?: string;
273
+ location?: string;
274
+ origin?: string;
275
+ timeout?: number;
276
+ scrapeOptions?: ScrapeParams;
277
+ }
278
+ /**
279
+ * Response interface for search operations.
280
+ * Defines the structure of the response received after a search operation.
281
+ */
282
+ interface SearchResponse {
283
+ success: boolean;
284
+ data: FirecrawlDocument<undefined>[];
285
+ warning?: string;
286
+ error?: string;
287
+ }
259
288
  /**
260
289
  * Main class for interacting with the Firecrawl API.
261
290
  * Provides methods for scraping, searching, crawling, and mapping web content.
@@ -263,6 +292,7 @@ declare class FirecrawlError extends Error {
263
292
  declare class FirecrawlApp {
264
293
  apiKey: string;
265
294
  apiUrl: string;
295
+ private isCloudService;
266
296
  /**
267
297
  * Initializes a new instance of the FirecrawlApp class.
268
298
  * @param config - Configuration options for the FirecrawlApp instance.
@@ -276,12 +306,12 @@ declare class FirecrawlApp {
276
306
  */
277
307
  scrapeUrl<T extends zt.ZodSchema, ActionsSchema extends (Action[] | undefined) = undefined>(url: string, params?: ScrapeParams<T, ActionsSchema>): Promise<ScrapeResponse<zt.infer<T>, ActionsSchema extends Action[] ? ActionsResult : never> | ErrorResponse>;
278
308
  /**
279
- * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
309
+ * Searches using the Firecrawl API and optionally scrapes the results.
280
310
  * @param query - The search query string.
281
- * @param params - Additional parameters for the search.
282
- * @returns Throws an error advising to use version 0 of the API.
311
+ * @param params - Optional parameters for the search request.
312
+ * @returns The response from the search operation.
283
313
  */
284
- search(query: string, params?: any): Promise<any>;
314
+ search(query: string, params?: SearchParams | Record<string, any>): Promise<SearchResponse>;
285
315
  /**
286
316
  * Initiates a crawl job for a URL using the Firecrawl API.
287
317
  * @param url - The URL to crawl.
@@ -329,8 +359,8 @@ declare class FirecrawlApp {
329
359
  * @param webhook - Optional webhook for the batch scrape.
330
360
  * @returns The response from the crawl operation.
331
361
  */
332
- batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"]): Promise<BatchScrapeStatusResponse | ErrorResponse>;
333
- asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<BatchScrapeResponse | ErrorResponse>;
362
+ batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
363
+ asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeResponse | ErrorResponse>;
334
364
  /**
335
365
  * Initiates a batch scrape job and returns a CrawlWatcher to monitor the job via WebSocket.
336
366
  * @param urls - The URL to scrape.
@@ -338,7 +368,7 @@ declare class FirecrawlApp {
338
368
  * @param idempotencyKey - Optional idempotency key for the request.
339
369
  * @returns A CrawlWatcher instance to monitor the crawl job.
340
370
  */
341
- batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<CrawlWatcher>;
371
+ batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<CrawlWatcher>;
342
372
  /**
343
373
  * Checks the status of a batch scrape job using the Firecrawl API.
344
374
  * @param id - The ID of the batch scrape operation.
@@ -414,8 +444,9 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
414
444
  private ws;
415
445
  data: FirecrawlDocument<undefined>[];
416
446
  status: CrawlStatusResponse["status"];
447
+ id: string;
417
448
  constructor(id: string, app: FirecrawlApp);
418
449
  close(): void;
419
450
  }
420
451
 
421
- export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, FirecrawlApp as default };
452
+ export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
package/dist/index.js CHANGED
@@ -13,16 +13,20 @@ var FirecrawlError = class extends Error {
13
13
  var FirecrawlApp = class {
14
14
  apiKey;
15
15
  apiUrl;
16
+ isCloudService(url) {
17
+ return url.includes("api.firecrawl.dev");
18
+ }
16
19
  /**
17
20
  * Initializes a new instance of the FirecrawlApp class.
18
21
  * @param config - Configuration options for the FirecrawlApp instance.
19
22
  */
20
23
  constructor({ apiKey = null, apiUrl = null }) {
21
- if (typeof apiKey !== "string") {
24
+ const baseUrl = apiUrl || "https://api.firecrawl.dev";
25
+ if (this.isCloudService(baseUrl) && typeof apiKey !== "string") {
22
26
  throw new FirecrawlError("No API key provided", 401);
23
27
  }
24
- this.apiKey = apiKey;
25
- this.apiUrl = apiUrl || "https://api.firecrawl.dev";
28
+ this.apiKey = apiKey || "";
29
+ this.apiUrl = baseUrl;
26
30
  }
27
31
  /**
28
32
  * Scrapes a URL using the Firecrawl API.
@@ -77,13 +81,73 @@ var FirecrawlApp = class {
77
81
  return { success: false, error: "Internal server error." };
78
82
  }
79
83
  /**
80
- * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
84
+ * Searches using the Firecrawl API and optionally scrapes the results.
81
85
  * @param query - The search query string.
82
- * @param params - Additional parameters for the search.
83
- * @returns Throws an error advising to use version 0 of the API.
86
+ * @param params - Optional parameters for the search request.
87
+ * @returns The response from the search operation.
84
88
  */
85
89
  async search(query, params) {
86
- throw new FirecrawlError("Search is not supported in v1, please downgrade Firecrawl to 0.0.36.", 400);
90
+ const headers = {
91
+ "Content-Type": "application/json",
92
+ Authorization: `Bearer ${this.apiKey}`
93
+ };
94
+ let jsonData = {
95
+ query,
96
+ limit: params?.limit ?? 5,
97
+ tbs: params?.tbs,
98
+ filter: params?.filter,
99
+ lang: params?.lang ?? "en",
100
+ country: params?.country ?? "us",
101
+ location: params?.location,
102
+ origin: params?.origin ?? "api",
103
+ timeout: params?.timeout ?? 6e4,
104
+ scrapeOptions: params?.scrapeOptions ?? { formats: [] }
105
+ };
106
+ if (jsonData?.scrapeOptions?.extract?.schema) {
107
+ let schema = jsonData.scrapeOptions.extract.schema;
108
+ try {
109
+ schema = zodToJsonSchema(schema);
110
+ } catch (error) {
111
+ }
112
+ jsonData = {
113
+ ...jsonData,
114
+ scrapeOptions: {
115
+ ...jsonData.scrapeOptions,
116
+ extract: {
117
+ ...jsonData.scrapeOptions.extract,
118
+ schema
119
+ }
120
+ }
121
+ };
122
+ }
123
+ try {
124
+ const response = await this.postRequest(
125
+ this.apiUrl + `/v1/search`,
126
+ jsonData,
127
+ headers
128
+ );
129
+ if (response.status === 200) {
130
+ const responseData = response.data;
131
+ if (responseData.success) {
132
+ return {
133
+ success: true,
134
+ data: responseData.data,
135
+ warning: responseData.warning
136
+ };
137
+ } else {
138
+ throw new FirecrawlError(`Failed to search. Error: ${responseData.error}`, response.status);
139
+ }
140
+ } else {
141
+ this.handleError(response, "search");
142
+ }
143
+ } catch (error) {
144
+ if (error.response?.data?.error) {
145
+ throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status);
146
+ } else {
147
+ throw new FirecrawlError(error.message, 500);
148
+ }
149
+ }
150
+ return { success: false, error: "Internal server error.", data: [] };
87
151
  }
88
152
  /**
89
153
  * Initiates a crawl job for a URL using the Firecrawl API.
@@ -259,9 +323,9 @@ var FirecrawlApp = class {
259
323
  * @param webhook - Optional webhook for the batch scrape.
260
324
  * @returns The response from the crawl operation.
261
325
  */
262
- async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook) {
326
+ async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
263
327
  const headers = this.prepareHeaders(idempotencyKey);
264
- let jsonData = { urls, ...params };
328
+ let jsonData = { urls, webhook, ignoreInvalidURLs, ...params };
265
329
  if (jsonData?.extract?.schema) {
266
330
  let schema = jsonData.extract.schema;
267
331
  try {
@@ -297,9 +361,9 @@ var FirecrawlApp = class {
297
361
  }
298
362
  return { success: false, error: "Internal server error." };
299
363
  }
300
- async asyncBatchScrapeUrls(urls, params, idempotencyKey) {
364
+ async asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
301
365
  const headers = this.prepareHeaders(idempotencyKey);
302
- let jsonData = { urls, ...params ?? {} };
366
+ let jsonData = { urls, webhook, ignoreInvalidURLs, ...params ?? {} };
303
367
  try {
304
368
  const response = await this.postRequest(
305
369
  this.apiUrl + `/v1/batch/scrape`,
@@ -327,8 +391,8 @@ var FirecrawlApp = class {
327
391
  * @param idempotencyKey - Optional idempotency key for the request.
328
392
  * @returns A CrawlWatcher instance to monitor the crawl job.
329
393
  */
330
- async batchScrapeUrlsAndWatch(urls, params, idempotencyKey) {
331
- const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey);
394
+ async batchScrapeUrlsAndWatch(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
395
+ const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs);
332
396
  if (crawl.success && crawl.id) {
333
397
  const id = crawl.id;
334
398
  return new CrawlWatcher(id, this);
@@ -557,8 +621,10 @@ var CrawlWatcher = class extends TypedEventTarget {
557
621
  ws;
558
622
  data;
559
623
  status;
624
+ id;
560
625
  constructor(id, app) {
561
626
  super();
627
+ this.id = id;
562
628
  this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
563
629
  this.status = "scraping";
564
630
  this.data = [];
@@ -568,7 +634,8 @@ var CrawlWatcher = class extends TypedEventTarget {
568
634
  this.dispatchTypedEvent("done", new CustomEvent("done", {
569
635
  detail: {
570
636
  status: this.status,
571
- data: this.data
637
+ data: this.data,
638
+ id: this.id
572
639
  }
573
640
  }));
574
641
  } else if (msg.type === "error") {
@@ -577,7 +644,8 @@ var CrawlWatcher = class extends TypedEventTarget {
577
644
  detail: {
578
645
  status: this.status,
579
646
  data: this.data,
580
- error: msg.error
647
+ error: msg.error,
648
+ id: this.id
581
649
  }
582
650
  }));
583
651
  } else if (msg.type === "catchup") {
@@ -585,12 +653,18 @@ var CrawlWatcher = class extends TypedEventTarget {
585
653
  this.data.push(...msg.data.data ?? []);
586
654
  for (const doc of this.data) {
587
655
  this.dispatchTypedEvent("document", new CustomEvent("document", {
588
- detail: doc
656
+ detail: {
657
+ ...doc,
658
+ id: this.id
659
+ }
589
660
  }));
590
661
  }
591
662
  } else if (msg.type === "document") {
592
663
  this.dispatchTypedEvent("document", new CustomEvent("document", {
593
- detail: msg.data
664
+ detail: {
665
+ ...msg.data,
666
+ id: this.id
667
+ }
594
668
  }));
595
669
  }
596
670
  };
@@ -599,12 +673,20 @@ var CrawlWatcher = class extends TypedEventTarget {
599
673
  this.ws.close();
600
674
  return;
601
675
  }
602
- const msg = JSON.parse(ev.data);
603
- messageHandler(msg);
676
+ try {
677
+ const msg = JSON.parse(ev.data);
678
+ messageHandler(msg);
679
+ } catch (error) {
680
+ console.error("Error on message", error);
681
+ }
604
682
  }).bind(this);
605
683
  this.ws.onclose = ((ev) => {
606
- const msg = JSON.parse(ev.reason);
607
- messageHandler(msg);
684
+ try {
685
+ const msg = JSON.parse(ev.reason);
686
+ messageHandler(msg);
687
+ } catch (error) {
688
+ console.error("Error on close", error);
689
+ }
608
690
  }).bind(this);
609
691
  this.ws.onerror = ((_) => {
610
692
  this.status = "failed";
@@ -612,7 +694,8 @@ var CrawlWatcher = class extends TypedEventTarget {
612
694
  detail: {
613
695
  status: this.status,
614
696
  data: this.data,
615
- error: "WebSocket error"
697
+ error: "WebSocket error",
698
+ id: this.id
616
699
  }
617
700
  }));
618
701
  }).bind(this);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "firecrawl",
3
- "version": "1.10.1",
3
+ "version": "1.11.0",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -1,9 +1,9 @@
1
- import { describe, test, expect, jest } from '@jest/globals';
2
- import axios from 'axios';
3
- import FirecrawlApp from '../index';
1
+ import { describe, expect, jest, test } from '@jest/globals';
4
2
 
5
- import { readFile } from 'fs/promises';
3
+ import FirecrawlApp from '../index';
4
+ import axios from 'axios';
6
5
  import { join } from 'path';
6
+ import { readFile } from 'fs/promises';
7
7
 
8
8
  // Mock jest and set the type
9
9
  jest.mock('axios');
@@ -14,13 +14,22 @@ async function loadFixture(name: string): Promise<string> {
14
14
  return await readFile(join(__dirname, 'fixtures', `${name}.json`), 'utf-8')
15
15
  }
16
16
 
17
+ const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev";
18
+
17
19
  describe('the firecrawl JS SDK', () => {
18
20
 
19
- test('Should require an API key to instantiate FirecrawlApp', async () => {
20
- const fn = () => {
21
- new FirecrawlApp({ apiKey: undefined });
22
- };
23
- expect(fn).toThrow('No API key provided');
21
+ test('Should require an API key only for cloud service', async () => {
22
+ if (API_URL.includes('api.firecrawl.dev')) {
23
+ // Should throw for cloud service
24
+ expect(() => {
25
+ new FirecrawlApp({ apiKey: undefined, apiUrl: API_URL });
26
+ }).toThrow('No API key provided');
27
+ } else {
28
+ // Should not throw for self-hosted
29
+ expect(() => {
30
+ new FirecrawlApp({ apiKey: undefined, apiUrl: API_URL });
31
+ }).not.toThrow();
32
+ }
24
33
  });
25
34
 
26
35
  test('Should return scraped data from a /scrape API call', async () => {
@@ -9,15 +9,28 @@ const TEST_API_KEY = process.env.TEST_API_KEY;
9
9
  const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev";
10
10
 
11
11
  describe('FirecrawlApp E2E Tests', () => {
12
- test.concurrent('should throw error for no API key', async () => {
13
- expect(() => {
14
- new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
15
- }).toThrow("No API key provided");
12
+ test.concurrent('should throw error for no API key only for cloud service', async () => {
13
+ if (API_URL.includes('api.firecrawl.dev')) {
14
+ // Should throw for cloud service
15
+ expect(() => {
16
+ new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
17
+ }).toThrow("No API key provided");
18
+ } else {
19
+ // Should not throw for self-hosted
20
+ expect(() => {
21
+ new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
22
+ }).not.toThrow();
23
+ }
16
24
  });
17
25
 
18
26
  test.concurrent('should throw error for invalid API key on scrape', async () => {
19
- const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
20
- await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
27
+ if (API_URL.includes('api.firecrawl.dev')) {
28
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
29
+ await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Unexpected error occurred while trying to scrape URL. Status code: 404");
30
+ } else {
31
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
32
+ await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
33
+ }
21
34
  });
22
35
 
23
36
  test.concurrent('should throw error for blocklisted URL on scrape', async () => {
@@ -155,14 +168,13 @@ describe('FirecrawlApp E2E Tests', () => {
155
168
  }, 30000); // 30 seconds timeout
156
169
 
157
170
  test.concurrent('should throw error for invalid API key on crawl', async () => {
158
- const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
159
- await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
160
- });
161
-
162
- test.concurrent('should throw error for blocklisted URL on crawl', async () => {
163
- const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
164
- const blocklistedUrl = "https://twitter.com/fake-test";
165
- await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions.");
171
+ if (API_URL.includes('api.firecrawl.dev')) {
172
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
173
+ await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 404");
174
+ } else {
175
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
176
+ await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
177
+ }
166
178
  });
167
179
 
168
180
  test.concurrent('should return successful response for crawl and wait for completion', async () => {
@@ -337,8 +349,13 @@ describe('FirecrawlApp E2E Tests', () => {
337
349
  }, 60000); // 60 seconds timeout
338
350
 
339
351
  test.concurrent('should throw error for invalid API key on map', async () => {
340
- const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
341
- await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
352
+ if (API_URL.includes('api.firecrawl.dev')) {
353
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
354
+ await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 404");
355
+ } else {
356
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
357
+ await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
358
+ }
342
359
  });
343
360
 
344
361
  test.concurrent('should throw error for blocklisted URL on map', async () => {
@@ -355,8 +372,7 @@ describe('FirecrawlApp E2E Tests', () => {
355
372
  }, 30000); // 30 seconds timeout
356
373
 
357
374
  test.concurrent('should return successful response for valid map', async () => {
358
- const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
359
- const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse;
375
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse;
360
376
  expect(response).not.toBeNull();
361
377
 
362
378
  expect(response.links?.length).toBeGreaterThan(0);
@@ -365,8 +381,45 @@ describe('FirecrawlApp E2E Tests', () => {
365
381
  expect(filteredLinks?.length).toBeGreaterThan(0);
366
382
  }, 30000); // 30 seconds timeout
367
383
 
368
- test('should throw NotImplementedError for search on v1', async () => {
384
+
385
+
386
+ test('should search with string query', async () => {
369
387
  const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: TEST_API_KEY });
370
- await expect(app.search("test query")).rejects.toThrow("Search is not supported in v1");
388
+ const response = await app.search("firecrawl");
389
+ expect(response.success).toBe(true);
390
+ console.log(response.data);
391
+ expect(response.data?.length).toBeGreaterThan(0);
392
+ expect(response.data?.[0]?.markdown).toBeDefined();
393
+ expect(response.data?.[0]?.metadata).toBeDefined();
394
+ expect(response.data?.[0]?.metadata?.title).toBeDefined();
395
+ expect(response.data?.[0]?.metadata?.description).toBeDefined();
396
+ });
397
+
398
+ test('should search with params object', async () => {
399
+ const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: TEST_API_KEY });
400
+ const response = await app.search("firecrawl", {
401
+ limit: 3,
402
+ lang: 'en',
403
+ country: 'us',
404
+ scrapeOptions: {
405
+ formats: ['markdown', 'html', 'links'],
406
+ onlyMainContent: true
407
+ }
408
+ });
409
+ expect(response.success).toBe(true);
410
+ expect(response.data.length).toBeLessThanOrEqual(3);
411
+ for (const doc of response.data) {
412
+ expect(doc.markdown).toBeDefined();
413
+ expect(doc.html).toBeDefined();
414
+ expect(doc.links).toBeDefined();
415
+ expect(doc.metadata).toBeDefined();
416
+ expect(doc.metadata?.title).toBeDefined();
417
+ expect(doc.metadata?.description).toBeDefined();
418
+ }
419
+ });
420
+
421
+ test('should handle invalid API key for search', async () => {
422
+ const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: "invalid_api_key" });
423
+ await expect(app.search("test query")).rejects.toThrow("Request failed with status code 404");
371
424
  });
372
425
  });
package/src/index.ts CHANGED
@@ -68,6 +68,9 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
68
68
  screenshot?: string;
69
69
  metadata?: FirecrawlDocumentMetadata;
70
70
  actions: ActionsSchema;
71
+ // v1 search only
72
+ title?: string;
73
+ description?: string;
71
74
  }
72
75
 
73
76
  /**
@@ -183,6 +186,7 @@ export interface BatchScrapeResponse {
183
186
  url?: string;
184
187
  success: true;
185
188
  error?: string;
189
+ invalidURLs?: string[];
186
190
  }
187
191
 
188
192
  /**
@@ -242,10 +246,11 @@ export interface MapResponse {
242
246
  * Defines options for extracting information from URLs.
243
247
  */
244
248
  export interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
245
- prompt: string;
249
+ prompt?: string;
246
250
  schema?: LLMSchema;
247
251
  systemPrompt?: string;
248
252
  allowExternalLinks?: boolean;
253
+ includeSubdomains?: boolean;
249
254
  }
250
255
 
251
256
  /**
@@ -280,6 +285,33 @@ export class FirecrawlError extends Error {
280
285
  }
281
286
  }
282
287
 
288
+ /**
289
+ * Parameters for search operations.
290
+ * Defines options for searching and scraping search results.
291
+ */
292
+ export interface SearchParams {
293
+ limit?: number;
294
+ tbs?: string;
295
+ filter?: string;
296
+ lang?: string;
297
+ country?: string;
298
+ location?: string;
299
+ origin?: string;
300
+ timeout?: number;
301
+ scrapeOptions?: ScrapeParams;
302
+ }
303
+
304
+ /**
305
+ * Response interface for search operations.
306
+ * Defines the structure of the response received after a search operation.
307
+ */
308
+ export interface SearchResponse {
309
+ success: boolean;
310
+ data: FirecrawlDocument<undefined>[];
311
+ warning?: string;
312
+ error?: string;
313
+ }
314
+
283
315
  /**
284
316
  * Main class for interacting with the Firecrawl API.
285
317
  * Provides methods for scraping, searching, crawling, and mapping web content.
@@ -288,17 +320,23 @@ export default class FirecrawlApp {
288
320
  public apiKey: string;
289
321
  public apiUrl: string;
290
322
 
323
+ private isCloudService(url: string): boolean {
324
+ return url.includes('api.firecrawl.dev');
325
+ }
326
+
291
327
  /**
292
328
  * Initializes a new instance of the FirecrawlApp class.
293
329
  * @param config - Configuration options for the FirecrawlApp instance.
294
330
  */
295
331
  constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
296
- if (typeof apiKey !== "string") {
332
+ const baseUrl = apiUrl || "https://api.firecrawl.dev";
333
+
334
+ if (this.isCloudService(baseUrl) && typeof apiKey !== "string") {
297
335
  throw new FirecrawlError("No API key provided", 401);
298
336
  }
299
337
 
300
- this.apiKey = apiKey;
301
- this.apiUrl = apiUrl || "https://api.firecrawl.dev";
338
+ this.apiKey = apiKey || '';
339
+ this.apiUrl = baseUrl;
302
340
  }
303
341
 
304
342
  /**
@@ -361,16 +399,80 @@ export default class FirecrawlApp {
361
399
  }
362
400
 
363
401
  /**
364
- * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
402
+ * Searches using the Firecrawl API and optionally scrapes the results.
365
403
  * @param query - The search query string.
366
- * @param params - Additional parameters for the search.
367
- * @returns Throws an error advising to use version 0 of the API.
404
+ * @param params - Optional parameters for the search request.
405
+ * @returns The response from the search operation.
368
406
  */
369
- async search(
370
- query: string,
371
- params?: any
372
- ): Promise<any> {
373
- throw new FirecrawlError("Search is not supported in v1, please downgrade Firecrawl to 0.0.36.", 400);
407
+ async search(query: string, params?: SearchParams | Record<string, any>): Promise<SearchResponse> {
408
+ const headers: AxiosRequestHeaders = {
409
+ "Content-Type": "application/json",
410
+ Authorization: `Bearer ${this.apiKey}`,
411
+ } as AxiosRequestHeaders;
412
+
413
+ let jsonData: any = {
414
+ query,
415
+ limit: params?.limit ?? 5,
416
+ tbs: params?.tbs,
417
+ filter: params?.filter,
418
+ lang: params?.lang ?? "en",
419
+ country: params?.country ?? "us",
420
+ location: params?.location,
421
+ origin: params?.origin ?? "api",
422
+ timeout: params?.timeout ?? 60000,
423
+ scrapeOptions: params?.scrapeOptions ?? { formats: [] },
424
+ };
425
+
426
+ if (jsonData?.scrapeOptions?.extract?.schema) {
427
+ let schema = jsonData.scrapeOptions.extract.schema;
428
+
429
+ // Try parsing the schema as a Zod schema
430
+ try {
431
+ schema = zodToJsonSchema(schema);
432
+ } catch (error) {
433
+
434
+ }
435
+ jsonData = {
436
+ ...jsonData,
437
+ scrapeOptions: {
438
+ ...jsonData.scrapeOptions,
439
+ extract: {
440
+ ...jsonData.scrapeOptions.extract,
441
+ schema: schema,
442
+ },
443
+ },
444
+ };
445
+ }
446
+
447
+ try {
448
+ const response: AxiosResponse = await this.postRequest(
449
+ this.apiUrl + `/v1/search`,
450
+ jsonData,
451
+ headers
452
+ );
453
+
454
+ if (response.status === 200) {
455
+ const responseData = response.data;
456
+ if (responseData.success) {
457
+ return {
458
+ success: true,
459
+ data: responseData.data as FirecrawlDocument<any>[],
460
+ warning: responseData.warning,
461
+ };
462
+ } else {
463
+ throw new FirecrawlError(`Failed to search. Error: ${responseData.error}`, response.status);
464
+ }
465
+ } else {
466
+ this.handleError(response, "search");
467
+ }
468
+ } catch (error: any) {
469
+ if (error.response?.data?.error) {
470
+ throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
471
+ } else {
472
+ throw new FirecrawlError(error.message, 500);
473
+ }
474
+ }
475
+ return { success: false, error: "Internal server error.", data: [] };
374
476
  }
375
477
 
376
478
  /**
@@ -576,9 +678,10 @@ export default class FirecrawlApp {
576
678
  pollInterval: number = 2,
577
679
  idempotencyKey?: string,
578
680
  webhook?: CrawlParams["webhook"],
681
+ ignoreInvalidURLs?: boolean,
579
682
  ): Promise<BatchScrapeStatusResponse | ErrorResponse> {
580
683
  const headers = this.prepareHeaders(idempotencyKey);
581
- let jsonData: any = { urls, ...params };
684
+ let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params };
582
685
  if (jsonData?.extract?.schema) {
583
686
  let schema = jsonData.extract.schema;
584
687
 
@@ -621,10 +724,12 @@ export default class FirecrawlApp {
621
724
  async asyncBatchScrapeUrls(
622
725
  urls: string[],
623
726
  params?: ScrapeParams,
624
- idempotencyKey?: string
727
+ idempotencyKey?: string,
728
+ webhook?: CrawlParams["webhook"],
729
+ ignoreInvalidURLs?: boolean,
625
730
  ): Promise<BatchScrapeResponse | ErrorResponse> {
626
731
  const headers = this.prepareHeaders(idempotencyKey);
627
- let jsonData: any = { urls, ...(params ?? {}) };
732
+ let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...(params ?? {}) };
628
733
  try {
629
734
  const response: AxiosResponse = await this.postRequest(
630
735
  this.apiUrl + `/v1/batch/scrape`,
@@ -657,8 +762,10 @@ export default class FirecrawlApp {
657
762
  urls: string[],
658
763
  params?: ScrapeParams,
659
764
  idempotencyKey?: string,
765
+ webhook?: CrawlParams["webhook"],
766
+ ignoreInvalidURLs?: boolean,
660
767
  ) {
661
- const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey);
768
+ const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs);
662
769
 
663
770
  if (crawl.success && crawl.id) {
664
771
  const id = crawl.id;
@@ -932,9 +1039,11 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
932
1039
  private ws: WebSocket;
933
1040
  public data: FirecrawlDocument<undefined>[];
934
1041
  public status: CrawlStatusResponse["status"];
1042
+ public id: string;
935
1043
 
936
1044
  constructor(id: string, app: FirecrawlApp) {
937
1045
  super();
1046
+ this.id = id;
938
1047
  this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
939
1048
  this.status = "scraping";
940
1049
  this.data = [];
@@ -965,6 +1074,7 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
965
1074
  detail: {
966
1075
  status: this.status,
967
1076
  data: this.data,
1077
+ id: this.id,
968
1078
  },
969
1079
  }));
970
1080
  } else if (msg.type === "error") {
@@ -974,6 +1084,7 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
974
1084
  status: this.status,
975
1085
  data: this.data,
976
1086
  error: msg.error,
1087
+ id: this.id,
977
1088
  },
978
1089
  }));
979
1090
  } else if (msg.type === "catchup") {
@@ -981,12 +1092,18 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
981
1092
  this.data.push(...(msg.data.data ?? []));
982
1093
  for (const doc of this.data) {
983
1094
  this.dispatchTypedEvent("document", new CustomEvent("document", {
984
- detail: doc,
1095
+ detail: {
1096
+ ...doc,
1097
+ id: this.id,
1098
+ },
985
1099
  }));
986
1100
  }
987
1101
  } else if (msg.type === "document") {
988
1102
  this.dispatchTypedEvent("document", new CustomEvent("document", {
989
- detail: msg.data,
1103
+ detail: {
1104
+ ...msg.data,
1105
+ id: this.id,
1106
+ },
990
1107
  }));
991
1108
  }
992
1109
  }
@@ -996,14 +1113,21 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
996
1113
  this.ws.close();
997
1114
  return;
998
1115
  }
999
-
1000
- const msg = JSON.parse(ev.data) as Message;
1001
- messageHandler(msg);
1116
+ try {
1117
+ const msg = JSON.parse(ev.data) as Message;
1118
+ messageHandler(msg);
1119
+ } catch (error) {
1120
+ console.error("Error on message", error);
1121
+ }
1002
1122
  }).bind(this);
1003
1123
 
1004
1124
  this.ws.onclose = ((ev: CloseEvent) => {
1005
- const msg = JSON.parse(ev.reason) as Message;
1006
- messageHandler(msg);
1125
+ try {
1126
+ const msg = JSON.parse(ev.reason) as Message;
1127
+ messageHandler(msg);
1128
+ } catch (error) {
1129
+ console.error("Error on close", error);
1130
+ }
1007
1131
  }).bind(this);
1008
1132
 
1009
1133
  this.ws.onerror = ((_: Event) => {
@@ -1013,6 +1137,7 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
1013
1137
  status: this.status,
1014
1138
  data: this.data,
1015
1139
  error: "WebSocket error",
1140
+ id: this.id,
1016
1141
  },
1017
1142
  }));
1018
1143
  }).bind(this);