firecrawl 1.8.5 → 1.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -405,6 +405,51 @@ var FirecrawlApp = class {
405
405
  }
406
406
  return { success: false, error: "Internal server error." };
407
407
  }
408
+ /**
409
+ * Extracts information from URLs using the Firecrawl API.
410
+ * Currently in Beta. Expect breaking changes on future minor versions.
411
+ * @param url - The URL to extract information from.
412
+ * @param params - Additional parameters for the extract request.
413
+ * @returns The response from the extract operation.
414
+ */
415
+ async extract(urls, params) {
416
+ const headers = this.prepareHeaders();
417
+ if (!params?.prompt) {
418
+ throw new FirecrawlError("Prompt is required", 400);
419
+ }
420
+ let jsonData = { urls, ...params };
421
+ let jsonSchema;
422
+ try {
423
+ jsonSchema = params?.schema ? (0, import_zod_to_json_schema.zodToJsonSchema)(params.schema) : void 0;
424
+ } catch (error) {
425
+ throw new FirecrawlError("Invalid schema. Use a valid Zod schema.", 400);
426
+ }
427
+ try {
428
+ const response = await this.postRequest(
429
+ this.apiUrl + `/v1/extract`,
430
+ { ...jsonData, schema: jsonSchema },
431
+ headers
432
+ );
433
+ if (response.status === 200) {
434
+ const responseData = response.data;
435
+ if (responseData.success) {
436
+ return {
437
+ success: true,
438
+ data: responseData.data,
439
+ warning: responseData.warning,
440
+ error: responseData.error
441
+ };
442
+ } else {
443
+ throw new FirecrawlError(`Failed to scrape URL. Error: ${responseData.error}`, response.status);
444
+ }
445
+ } else {
446
+ this.handleError(response, "extract");
447
+ }
448
+ } catch (error) {
449
+ throw new FirecrawlError(error.message, 500);
450
+ }
451
+ return { success: false, error: "Internal server error." };
452
+ }
408
453
  /**
409
454
  * Prepares the headers for an API request.
410
455
  * @param idempotencyKey - Optional key to ensure idempotency.
package/dist/index.d.cts CHANGED
@@ -219,6 +219,26 @@ interface MapResponse {
219
219
  links?: string[];
220
220
  error?: string;
221
221
  }
222
+ /**
223
+ * Parameters for extracting information from URLs.
224
+ * Defines options for extracting information from URLs.
225
+ */
226
+ interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
227
+ prompt: string;
228
+ schema?: LLMSchema;
229
+ systemPrompt?: string;
230
+ allowExternalLinks?: boolean;
231
+ }
232
+ /**
233
+ * Response interface for extracting information from URLs.
234
+ * Defines the structure of the response received after extracting information from URLs.
235
+ */
236
+ interface ExtractResponse<LLMSchema extends zt.ZodSchema = any> {
237
+ success: boolean;
238
+ data: LLMSchema;
239
+ error?: string;
240
+ warning?: string;
241
+ }
222
242
  /**
223
243
  * Error response interface.
224
244
  * Defines the structure of the response received when an error occurs.
@@ -325,6 +345,14 @@ declare class FirecrawlApp {
325
345
  * @returns The response containing the job status.
326
346
  */
327
347
  checkBatchScrapeStatus(id?: string, getAllData?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
348
+ /**
349
+ * Extracts information from URLs using the Firecrawl API.
350
+ * Currently in Beta. Expect breaking changes on future minor versions.
351
+ * @param url - The URL to extract information from.
352
+ * @param params - Additional parameters for the extract request.
353
+ * @returns The response from the extract operation.
354
+ */
355
+ extract<T extends zt.ZodSchema = any>(urls: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
328
356
  /**
329
357
  * Prepares the headers for an API request.
330
358
  * @param idempotencyKey - Optional key to ensure idempotency.
@@ -389,4 +417,4 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
389
417
  close(): void;
390
418
  }
391
419
 
392
- export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, FirecrawlApp as default };
420
+ export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, FirecrawlApp as default };
package/dist/index.d.ts CHANGED
@@ -219,6 +219,26 @@ interface MapResponse {
219
219
  links?: string[];
220
220
  error?: string;
221
221
  }
222
+ /**
223
+ * Parameters for extracting information from URLs.
224
+ * Defines options for extracting information from URLs.
225
+ */
226
+ interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
227
+ prompt: string;
228
+ schema?: LLMSchema;
229
+ systemPrompt?: string;
230
+ allowExternalLinks?: boolean;
231
+ }
232
+ /**
233
+ * Response interface for extracting information from URLs.
234
+ * Defines the structure of the response received after extracting information from URLs.
235
+ */
236
+ interface ExtractResponse<LLMSchema extends zt.ZodSchema = any> {
237
+ success: boolean;
238
+ data: LLMSchema;
239
+ error?: string;
240
+ warning?: string;
241
+ }
222
242
  /**
223
243
  * Error response interface.
224
244
  * Defines the structure of the response received when an error occurs.
@@ -325,6 +345,14 @@ declare class FirecrawlApp {
325
345
  * @returns The response containing the job status.
326
346
  */
327
347
  checkBatchScrapeStatus(id?: string, getAllData?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
348
+ /**
349
+ * Extracts information from URLs using the Firecrawl API.
350
+ * Currently in Beta. Expect breaking changes on future minor versions.
351
+ * @param url - The URL to extract information from.
352
+ * @param params - Additional parameters for the extract request.
353
+ * @returns The response from the extract operation.
354
+ */
355
+ extract<T extends zt.ZodSchema = any>(urls: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
328
356
  /**
329
357
  * Prepares the headers for an API request.
330
358
  * @param idempotencyKey - Optional key to ensure idempotency.
@@ -389,4 +417,4 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
389
417
  close(): void;
390
418
  }
391
419
 
392
- export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, FirecrawlApp as default };
420
+ export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, FirecrawlApp as default };
package/dist/index.js CHANGED
@@ -369,6 +369,51 @@ var FirecrawlApp = class {
369
369
  }
370
370
  return { success: false, error: "Internal server error." };
371
371
  }
372
+ /**
373
+ * Extracts information from URLs using the Firecrawl API.
374
+ * Currently in Beta. Expect breaking changes on future minor versions.
375
+ * @param url - The URL to extract information from.
376
+ * @param params - Additional parameters for the extract request.
377
+ * @returns The response from the extract operation.
378
+ */
379
+ async extract(urls, params) {
380
+ const headers = this.prepareHeaders();
381
+ if (!params?.prompt) {
382
+ throw new FirecrawlError("Prompt is required", 400);
383
+ }
384
+ let jsonData = { urls, ...params };
385
+ let jsonSchema;
386
+ try {
387
+ jsonSchema = params?.schema ? zodToJsonSchema(params.schema) : void 0;
388
+ } catch (error) {
389
+ throw new FirecrawlError("Invalid schema. Use a valid Zod schema.", 400);
390
+ }
391
+ try {
392
+ const response = await this.postRequest(
393
+ this.apiUrl + `/v1/extract`,
394
+ { ...jsonData, schema: jsonSchema },
395
+ headers
396
+ );
397
+ if (response.status === 200) {
398
+ const responseData = response.data;
399
+ if (responseData.success) {
400
+ return {
401
+ success: true,
402
+ data: responseData.data,
403
+ warning: responseData.warning,
404
+ error: responseData.error
405
+ };
406
+ } else {
407
+ throw new FirecrawlError(`Failed to scrape URL. Error: ${responseData.error}`, response.status);
408
+ }
409
+ } else {
410
+ this.handleError(response, "extract");
411
+ }
412
+ } catch (error) {
413
+ throw new FirecrawlError(error.message, 500);
414
+ }
415
+ return { success: false, error: "Internal server error." };
416
+ }
372
417
  /**
373
418
  * Prepares the headers for an API request.
374
419
  * @param idempotencyKey - Optional key to ensure idempotency.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "firecrawl",
3
- "version": "1.8.5",
3
+ "version": "1.9.1",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
package/src/index.ts CHANGED
@@ -236,6 +236,28 @@ export interface MapResponse {
236
236
  error?: string;
237
237
  }
238
238
 
239
+ /**
240
+ * Parameters for extracting information from URLs.
241
+ * Defines options for extracting information from URLs.
242
+ */
243
+ export interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
244
+ prompt: string;
245
+ schema?: LLMSchema;
246
+ systemPrompt?: string;
247
+ allowExternalLinks?: boolean;
248
+ }
249
+
250
+ /**
251
+ * Response interface for extracting information from URLs.
252
+ * Defines the structure of the response received after extracting information from URLs.
253
+ */
254
+ export interface ExtractResponse<LLMSchema extends zt.ZodSchema = any> {
255
+ success: boolean;
256
+ data: LLMSchema;
257
+ error?: string;
258
+ warning?: string;
259
+ }
260
+
239
261
  /**
240
262
  * Error response interface.
241
263
  * Defines the structure of the response received when an error occurs.
@@ -245,7 +267,6 @@ export interface ErrorResponse {
245
267
  error: string;
246
268
  }
247
269
 
248
-
249
270
  /**
250
271
  * Custom error class for Firecrawl.
251
272
  * Extends the built-in Error class to include a status code.
@@ -679,6 +700,55 @@ export default class FirecrawlApp {
679
700
  return { success: false, error: "Internal server error." };
680
701
  }
681
702
 
703
+ /**
704
+ * Extracts information from URLs using the Firecrawl API.
705
+ * Currently in Beta. Expect breaking changes on future minor versions.
706
+ * @param url - The URL to extract information from.
707
+ * @param params - Additional parameters for the extract request.
708
+ * @returns The response from the extract operation.
709
+ */
710
+ async extract<T extends zt.ZodSchema = any>(urls: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse> {
711
+ const headers = this.prepareHeaders();
712
+
713
+ if (!params?.prompt) {
714
+ throw new FirecrawlError("Prompt is required", 400);
715
+ }
716
+
717
+ let jsonData: { urls: string[] } & ExtractParams<T> = { urls, ...params };
718
+ let jsonSchema: any;
719
+ try {
720
+ jsonSchema = params?.schema ? zodToJsonSchema(params.schema) : undefined;
721
+ } catch (error: any) {
722
+ throw new FirecrawlError("Invalid schema. Use a valid Zod schema.", 400);
723
+ }
724
+
725
+ try {
726
+ const response: AxiosResponse = await this.postRequest(
727
+ this.apiUrl + `/v1/extract`,
728
+ { ...jsonData, schema: jsonSchema },
729
+ headers
730
+ );
731
+ if (response.status === 200) {
732
+ const responseData = response.data as ExtractResponse<T>;
733
+ if (responseData.success) {
734
+ return {
735
+ success: true,
736
+ data: responseData.data,
737
+ warning: responseData.warning,
738
+ error: responseData.error
739
+ };
740
+ } else {
741
+ throw new FirecrawlError(`Failed to scrape URL. Error: ${responseData.error}`, response.status);
742
+ }
743
+ } else {
744
+ this.handleError(response, "extract");
745
+ }
746
+ } catch (error: any) {
747
+ throw new FirecrawlError(error.message, 500);
748
+ }
749
+ return { success: false, error: "Internal server error." };
750
+ }
751
+
682
752
  /**
683
753
  * Prepares the headers for an API request.
684
754
  * @param idempotencyKey - Optional key to ensure idempotency.