@mendable/firecrawl 4.13.0 → 4.13.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,7 @@ var require_package = __commonJS({
8
8
  "package.json"(exports, module) {
9
9
  module.exports = {
10
10
  name: "@mendable/firecrawl-js",
11
- version: "4.13.0",
11
+ version: "4.13.2",
12
12
  description: "JavaScript SDK for Firecrawl API",
13
13
  main: "dist/index.js",
14
14
  types: "dist/index.d.ts",
@@ -71,7 +71,8 @@ var require_package = __commonJS({
71
71
  },
72
72
  pnpm: {
73
73
  overrides: {
74
- "@isaacs/brace-expansion@<=5.0.0": ">=5.0.1"
74
+ "@isaacs/brace-expansion@<=5.0.0": ">=5.0.1",
75
+ "minimatch@<10.2.1": ">=10.2.1"
75
76
  }
76
77
  }
77
78
  };
package/dist/index.cjs CHANGED
@@ -35,7 +35,7 @@ var require_package = __commonJS({
35
35
  "package.json"(exports2, module2) {
36
36
  module2.exports = {
37
37
  name: "@mendable/firecrawl-js",
38
- version: "4.13.0",
38
+ version: "4.13.2",
39
39
  description: "JavaScript SDK for Firecrawl API",
40
40
  main: "dist/index.js",
41
41
  types: "dist/index.d.ts",
@@ -98,7 +98,8 @@ var require_package = __commonJS({
98
98
  },
99
99
  pnpm: {
100
100
  overrides: {
101
- "@isaacs/brace-expansion@<=5.0.0": ">=5.0.1"
101
+ "@isaacs/brace-expansion@<=5.0.0": ">=5.0.1",
102
+ "minimatch@<10.2.1": ">=10.2.1"
102
103
  }
103
104
  }
104
105
  };
@@ -539,12 +540,14 @@ function prepareCrawlPayload(request) {
539
540
  if (request.maxDiscoveryDepth != null) data.maxDiscoveryDepth = request.maxDiscoveryDepth;
540
541
  if (request.sitemap != null) data.sitemap = request.sitemap;
541
542
  if (request.ignoreQueryParameters != null) data.ignoreQueryParameters = request.ignoreQueryParameters;
543
+ if (request.deduplicateSimilarURLs != null) data.deduplicateSimilarURLs = request.deduplicateSimilarURLs;
542
544
  if (request.limit != null) data.limit = request.limit;
543
545
  if (request.crawlEntireDomain != null) data.crawlEntireDomain = request.crawlEntireDomain;
544
546
  if (request.allowExternalLinks != null) data.allowExternalLinks = request.allowExternalLinks;
545
547
  if (request.allowSubdomains != null) data.allowSubdomains = request.allowSubdomains;
546
548
  if (request.delay != null) data.delay = request.delay;
547
549
  if (request.maxConcurrency != null) data.maxConcurrency = request.maxConcurrency;
550
+ if (request.regexOnFullURL != null) data.regexOnFullURL = request.regexOnFullURL;
548
551
  if (request.webhook != null) data.webhook = request.webhook;
549
552
  if (request.integration != null && request.integration.trim()) data.integration = request.integration.trim();
550
553
  if (request.scrapeOptions) {
@@ -1428,6 +1431,8 @@ var FirecrawlClient = class {
1428
1431
  * Start an extract job (async).
1429
1432
  * @param args Extraction request (urls, schema or prompt, flags).
1430
1433
  * @returns Job id or processing state.
1434
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
1435
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
1431
1436
  */
1432
1437
  async startExtract(args) {
1433
1438
  return startExtract(this.http, args);
@@ -1435,6 +1440,8 @@ var FirecrawlClient = class {
1435
1440
  /**
1436
1441
  * Get extract job status/data.
1437
1442
  * @param jobId Extract job id.
1443
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
1444
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
1438
1445
  */
1439
1446
  async getExtractStatus(jobId) {
1440
1447
  return getExtractStatus(this.http, jobId);
@@ -1443,6 +1450,8 @@ var FirecrawlClient = class {
1443
1450
  * Convenience waiter: start an extract and poll until it finishes.
1444
1451
  * @param args Extraction request plus waiter controls (pollInterval, timeout seconds).
1445
1452
  * @returns Final extract response.
1453
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
1454
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
1446
1455
  */
1447
1456
  async extract(args) {
1448
1457
  return extract(this.http, args);
@@ -2122,10 +2131,11 @@ var FirecrawlApp = class {
2122
2131
  }
2123
2132
  /**
2124
2133
  * Extracts information from URLs using the Firecrawl API.
2125
- * Currently in Beta. Expect breaking changes on future minor versions.
2126
2134
  * @param urls - The URLs to extract information from. Optional if using other methods for data extraction.
2127
2135
  * @param params - Additional parameters for the extract request.
2128
2136
  * @returns The response from the extract operation.
2137
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
2138
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
2129
2139
  */
2130
2140
  async extract(urls, params) {
2131
2141
  const headers = this.prepareHeaders();
@@ -2177,6 +2187,8 @@ var FirecrawlApp = class {
2177
2187
  * @param params - Additional parameters for the extract request.
2178
2188
  * @param idempotencyKey - Optional idempotency key for the request.
2179
2189
  * @returns The response from the extract operation.
2190
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
2191
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
2180
2192
  */
2181
2193
  async asyncExtract(urls, params, idempotencyKey) {
2182
2194
  const headers = this.prepareHeaders(idempotencyKey);
@@ -2202,6 +2214,8 @@ var FirecrawlApp = class {
2202
2214
  * Retrieves the status of an extract job.
2203
2215
  * @param jobId - The ID of the extract job.
2204
2216
  * @returns The status of the extract job.
2217
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
2218
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
2205
2219
  */
2206
2220
  async getExtractStatus(jobId) {
2207
2221
  try {
package/dist/index.d.cts CHANGED
@@ -382,6 +382,7 @@ interface CrawlOptions {
382
382
  maxDiscoveryDepth?: number | null;
383
383
  sitemap?: 'skip' | 'include' | 'only';
384
384
  ignoreQueryParameters?: boolean;
385
+ deduplicateSimilarURLs?: boolean;
385
386
  limit?: number | null;
386
387
  crawlEntireDomain?: boolean;
387
388
  allowExternalLinks?: boolean;
@@ -390,6 +391,7 @@ interface CrawlOptions {
390
391
  maxConcurrency?: number | null;
391
392
  webhook?: string | WebhookConfig | null;
392
393
  scrapeOptions?: ScrapeOptions | null;
394
+ regexOnFullURL?: boolean;
393
395
  zeroDataRetention?: boolean;
394
396
  integration?: string;
395
397
  }
@@ -629,6 +631,10 @@ declare function prepareExtractPayload(args: {
629
631
  integration?: string;
630
632
  agent?: AgentOptions$1;
631
633
  }): Record<string, unknown>;
634
+ /**
635
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
636
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
637
+ */
632
638
  declare function startExtract(http: HttpClient, args: Parameters<typeof prepareExtractPayload>[0]): Promise<ExtractResponse$1>;
633
639
 
634
640
  declare function prepareAgentPayload(args: {
@@ -820,17 +826,23 @@ declare class FirecrawlClient {
820
826
  * Start an extract job (async).
821
827
  * @param args Extraction request (urls, schema or prompt, flags).
822
828
  * @returns Job id or processing state.
829
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
830
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
823
831
  */
824
832
  startExtract(args: Parameters<typeof startExtract>[1]): Promise<ExtractResponse$1>;
825
833
  /**
826
834
  * Get extract job status/data.
827
835
  * @param jobId Extract job id.
836
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
837
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
828
838
  */
829
839
  getExtractStatus(jobId: string): Promise<ExtractResponse$1>;
830
840
  /**
831
841
  * Convenience waiter: start an extract and poll until it finishes.
832
842
  * @param args Extraction request plus waiter controls (pollInterval, timeout seconds).
833
843
  * @returns Final extract response.
844
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
845
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
834
846
  */
835
847
  extract(args: Parameters<typeof startExtract>[1] & {
836
848
  pollInterval?: number;
@@ -1599,10 +1611,11 @@ declare class FirecrawlApp {
1599
1611
  checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
1600
1612
  /**
1601
1613
  * Extracts information from URLs using the Firecrawl API.
1602
- * Currently in Beta. Expect breaking changes on future minor versions.
1603
1614
  * @param urls - The URLs to extract information from. Optional if using other methods for data extraction.
1604
1615
  * @param params - Additional parameters for the extract request.
1605
1616
  * @returns The response from the extract operation.
1617
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
1618
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
1606
1619
  */
1607
1620
  extract<T extends zt.ZodSchema = any>(urls?: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
1608
1621
  /**
@@ -1611,12 +1624,16 @@ declare class FirecrawlApp {
1611
1624
  * @param params - Additional parameters for the extract request.
1612
1625
  * @param idempotencyKey - Optional idempotency key for the request.
1613
1626
  * @returns The response from the extract operation.
1627
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
1628
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
1614
1629
  */
1615
1630
  asyncExtract(urls: string[], params?: ExtractParams, idempotencyKey?: string): Promise<ExtractResponse | ErrorResponse>;
1616
1631
  /**
1617
1632
  * Retrieves the status of an extract job.
1618
1633
  * @param jobId - The ID of the extract job.
1619
1634
  * @returns The status of the extract job.
1635
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
1636
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
1620
1637
  */
1621
1638
  getExtractStatus(jobId: string): Promise<any>;
1622
1639
  /**
package/dist/index.d.ts CHANGED
@@ -382,6 +382,7 @@ interface CrawlOptions {
382
382
  maxDiscoveryDepth?: number | null;
383
383
  sitemap?: 'skip' | 'include' | 'only';
384
384
  ignoreQueryParameters?: boolean;
385
+ deduplicateSimilarURLs?: boolean;
385
386
  limit?: number | null;
386
387
  crawlEntireDomain?: boolean;
387
388
  allowExternalLinks?: boolean;
@@ -390,6 +391,7 @@ interface CrawlOptions {
390
391
  maxConcurrency?: number | null;
391
392
  webhook?: string | WebhookConfig | null;
392
393
  scrapeOptions?: ScrapeOptions | null;
394
+ regexOnFullURL?: boolean;
393
395
  zeroDataRetention?: boolean;
394
396
  integration?: string;
395
397
  }
@@ -629,6 +631,10 @@ declare function prepareExtractPayload(args: {
629
631
  integration?: string;
630
632
  agent?: AgentOptions$1;
631
633
  }): Record<string, unknown>;
634
+ /**
635
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
636
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
637
+ */
632
638
  declare function startExtract(http: HttpClient, args: Parameters<typeof prepareExtractPayload>[0]): Promise<ExtractResponse$1>;
633
639
 
634
640
  declare function prepareAgentPayload(args: {
@@ -820,17 +826,23 @@ declare class FirecrawlClient {
820
826
  * Start an extract job (async).
821
827
  * @param args Extraction request (urls, schema or prompt, flags).
822
828
  * @returns Job id or processing state.
829
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
830
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
823
831
  */
824
832
  startExtract(args: Parameters<typeof startExtract>[1]): Promise<ExtractResponse$1>;
825
833
  /**
826
834
  * Get extract job status/data.
827
835
  * @param jobId Extract job id.
836
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
837
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
828
838
  */
829
839
  getExtractStatus(jobId: string): Promise<ExtractResponse$1>;
830
840
  /**
831
841
  * Convenience waiter: start an extract and poll until it finishes.
832
842
  * @param args Extraction request plus waiter controls (pollInterval, timeout seconds).
833
843
  * @returns Final extract response.
844
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
845
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
834
846
  */
835
847
  extract(args: Parameters<typeof startExtract>[1] & {
836
848
  pollInterval?: number;
@@ -1599,10 +1611,11 @@ declare class FirecrawlApp {
1599
1611
  checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
1600
1612
  /**
1601
1613
  * Extracts information from URLs using the Firecrawl API.
1602
- * Currently in Beta. Expect breaking changes on future minor versions.
1603
1614
  * @param urls - The URLs to extract information from. Optional if using other methods for data extraction.
1604
1615
  * @param params - Additional parameters for the extract request.
1605
1616
  * @returns The response from the extract operation.
1617
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
1618
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
1606
1619
  */
1607
1620
  extract<T extends zt.ZodSchema = any>(urls?: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
1608
1621
  /**
@@ -1611,12 +1624,16 @@ declare class FirecrawlApp {
1611
1624
  * @param params - Additional parameters for the extract request.
1612
1625
  * @param idempotencyKey - Optional idempotency key for the request.
1613
1626
  * @returns The response from the extract operation.
1627
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
1628
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
1614
1629
  */
1615
1630
  asyncExtract(urls: string[], params?: ExtractParams, idempotencyKey?: string): Promise<ExtractResponse | ErrorResponse>;
1616
1631
  /**
1617
1632
  * Retrieves the status of an extract job.
1618
1633
  * @param jobId - The ID of the extract job.
1619
1634
  * @returns The status of the extract job.
1635
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
1636
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
1620
1637
  */
1621
1638
  getExtractStatus(jobId: string): Promise<any>;
1622
1639
  /**
package/dist/index.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  require_package
3
- } from "./chunk-22A4MB4F.js";
3
+ } from "./chunk-YMCI3PLP.js";
4
4
 
5
5
  // src/v2/utils/httpClient.ts
6
6
  import axios from "axios";
@@ -423,12 +423,14 @@ function prepareCrawlPayload(request) {
423
423
  if (request.maxDiscoveryDepth != null) data.maxDiscoveryDepth = request.maxDiscoveryDepth;
424
424
  if (request.sitemap != null) data.sitemap = request.sitemap;
425
425
  if (request.ignoreQueryParameters != null) data.ignoreQueryParameters = request.ignoreQueryParameters;
426
+ if (request.deduplicateSimilarURLs != null) data.deduplicateSimilarURLs = request.deduplicateSimilarURLs;
426
427
  if (request.limit != null) data.limit = request.limit;
427
428
  if (request.crawlEntireDomain != null) data.crawlEntireDomain = request.crawlEntireDomain;
428
429
  if (request.allowExternalLinks != null) data.allowExternalLinks = request.allowExternalLinks;
429
430
  if (request.allowSubdomains != null) data.allowSubdomains = request.allowSubdomains;
430
431
  if (request.delay != null) data.delay = request.delay;
431
432
  if (request.maxConcurrency != null) data.maxConcurrency = request.maxConcurrency;
433
+ if (request.regexOnFullURL != null) data.regexOnFullURL = request.regexOnFullURL;
432
434
  if (request.webhook != null) data.webhook = request.webhook;
433
435
  if (request.integration != null && request.integration.trim()) data.integration = request.integration.trim();
434
436
  if (request.scrapeOptions) {
@@ -1312,6 +1314,8 @@ var FirecrawlClient = class {
1312
1314
  * Start an extract job (async).
1313
1315
  * @param args Extraction request (urls, schema or prompt, flags).
1314
1316
  * @returns Job id or processing state.
1317
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
1318
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
1315
1319
  */
1316
1320
  async startExtract(args) {
1317
1321
  return startExtract(this.http, args);
@@ -1319,6 +1323,8 @@ var FirecrawlClient = class {
1319
1323
  /**
1320
1324
  * Get extract job status/data.
1321
1325
  * @param jobId Extract job id.
1326
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
1327
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
1322
1328
  */
1323
1329
  async getExtractStatus(jobId) {
1324
1330
  return getExtractStatus(this.http, jobId);
@@ -1327,6 +1333,8 @@ var FirecrawlClient = class {
1327
1333
  * Convenience waiter: start an extract and poll until it finishes.
1328
1334
  * @param args Extraction request plus waiter controls (pollInterval, timeout seconds).
1329
1335
  * @returns Final extract response.
1336
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
1337
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
1330
1338
  */
1331
1339
  async extract(args) {
1332
1340
  return extract(this.http, args);
@@ -1465,7 +1473,7 @@ var FirecrawlApp = class {
1465
1473
  if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
1466
1474
  return process.env.npm_package_version;
1467
1475
  }
1468
- const packageJson = await import("./package-5SIMNZMX.js");
1476
+ const packageJson = await import("./package-HWPUIS3T.js");
1469
1477
  return packageJson.default.version;
1470
1478
  } catch (error) {
1471
1479
  const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);
@@ -2006,10 +2014,11 @@ var FirecrawlApp = class {
2006
2014
  }
2007
2015
  /**
2008
2016
  * Extracts information from URLs using the Firecrawl API.
2009
- * Currently in Beta. Expect breaking changes on future minor versions.
2010
2017
  * @param urls - The URLs to extract information from. Optional if using other methods for data extraction.
2011
2018
  * @param params - Additional parameters for the extract request.
2012
2019
  * @returns The response from the extract operation.
2020
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
2021
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
2013
2022
  */
2014
2023
  async extract(urls, params) {
2015
2024
  const headers = this.prepareHeaders();
@@ -2061,6 +2070,8 @@ var FirecrawlApp = class {
2061
2070
  * @param params - Additional parameters for the extract request.
2062
2071
  * @param idempotencyKey - Optional idempotency key for the request.
2063
2072
  * @returns The response from the extract operation.
2073
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
2074
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
2064
2075
  */
2065
2076
  async asyncExtract(urls, params, idempotencyKey) {
2066
2077
  const headers = this.prepareHeaders(idempotencyKey);
@@ -2086,6 +2097,8 @@ var FirecrawlApp = class {
2086
2097
  * Retrieves the status of an extract job.
2087
2098
  * @param jobId - The ID of the extract job.
2088
2099
  * @returns The status of the extract job.
2100
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
2101
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
2089
2102
  */
2090
2103
  async getExtractStatus(jobId) {
2091
2104
  try {
@@ -1,4 +1,4 @@
1
1
  import {
2
2
  require_package
3
- } from "./chunk-22A4MB4F.js";
3
+ } from "./chunk-YMCI3PLP.js";
4
4
  export default require_package();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mendable/firecrawl",
3
- "version": "4.13.0",
3
+ "version": "4.13.2",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
package/src/v1/index.ts CHANGED
@@ -1278,10 +1278,11 @@ export default class FirecrawlApp {
1278
1278
 
1279
1279
  /**
1280
1280
  * Extracts information from URLs using the Firecrawl API.
1281
- * Currently in Beta. Expect breaking changes on future minor versions.
1282
1281
  * @param urls - The URLs to extract information from. Optional if using other methods for data extraction.
1283
1282
  * @param params - Additional parameters for the extract request.
1284
1283
  * @returns The response from the extract operation.
1284
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
1285
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
1285
1286
  */
1286
1287
  async extract<T extends zt.ZodSchema = any>(urls?: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse> {
1287
1288
  const headers = this.prepareHeaders();
@@ -1337,6 +1338,8 @@ export default class FirecrawlApp {
1337
1338
  * @param params - Additional parameters for the extract request.
1338
1339
  * @param idempotencyKey - Optional idempotency key for the request.
1339
1340
  * @returns The response from the extract operation.
1341
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
1342
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
1340
1343
  */
1341
1344
  async asyncExtract(
1342
1345
  urls: string[],
@@ -1369,6 +1372,8 @@ export default class FirecrawlApp {
1369
1372
  * Retrieves the status of an extract job.
1370
1373
  * @param jobId - The ID of the extract job.
1371
1374
  * @returns The status of the extract job.
1375
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
1376
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
1372
1377
  */
1373
1378
  async getExtractStatus(jobId: string): Promise<any> {
1374
1379
  try {
package/src/v2/client.ts CHANGED
@@ -255,6 +255,8 @@ export class FirecrawlClient {
255
255
  * Start an extract job (async).
256
256
  * @param args Extraction request (urls, schema or prompt, flags).
257
257
  * @returns Job id or processing state.
258
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
259
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
258
260
  */
259
261
  async startExtract(args: Parameters<typeof startExtract>[1]): Promise<ExtractResponse> {
260
262
  return startExtract(this.http, args);
@@ -262,6 +264,8 @@ export class FirecrawlClient {
262
264
  /**
263
265
  * Get extract job status/data.
264
266
  * @param jobId Extract job id.
267
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
268
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
265
269
  */
266
270
  async getExtractStatus(jobId: string): Promise<ExtractResponse> {
267
271
  return getExtractStatus(this.http, jobId);
@@ -270,6 +274,8 @@ export class FirecrawlClient {
270
274
  * Convenience waiter: start an extract and poll until it finishes.
271
275
  * @param args Extraction request plus waiter controls (pollInterval, timeout seconds).
272
276
  * @returns Final extract response.
277
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
278
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
273
279
  */
274
280
  async extract(args: Parameters<typeof startExtract>[1] & { pollInterval?: number; timeout?: number }): Promise<ExtractResponse> {
275
281
  return extractWaiter(this.http, args);
@@ -28,12 +28,14 @@ function prepareCrawlPayload(request: CrawlRequest): Record<string, unknown> {
28
28
  if (request.maxDiscoveryDepth != null) data.maxDiscoveryDepth = request.maxDiscoveryDepth;
29
29
  if (request.sitemap != null) data.sitemap = request.sitemap;
30
30
  if (request.ignoreQueryParameters != null) data.ignoreQueryParameters = request.ignoreQueryParameters;
31
+ if (request.deduplicateSimilarURLs != null) data.deduplicateSimilarURLs = request.deduplicateSimilarURLs;
31
32
  if (request.limit != null) data.limit = request.limit;
32
33
  if (request.crawlEntireDomain != null) data.crawlEntireDomain = request.crawlEntireDomain;
33
34
  if (request.allowExternalLinks != null) data.allowExternalLinks = request.allowExternalLinks;
34
35
  if (request.allowSubdomains != null) data.allowSubdomains = request.allowSubdomains;
35
36
  if (request.delay != null) data.delay = request.delay;
36
37
  if (request.maxConcurrency != null) data.maxConcurrency = request.maxConcurrency;
38
+ if (request.regexOnFullURL != null) data.regexOnFullURL = request.regexOnFullURL;
37
39
  if (request.webhook != null) data.webhook = request.webhook;
38
40
  if (request.integration != null && request.integration.trim()) data.integration = request.integration.trim();
39
41
  if (request.scrapeOptions) {
@@ -38,6 +38,10 @@ function prepareExtractPayload(args: {
38
38
  return body;
39
39
  }
40
40
 
41
+ /**
42
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
43
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
44
+ */
41
45
  export async function startExtract(http: HttpClient, args: Parameters<typeof prepareExtractPayload>[0]): Promise<ExtractResponse> {
42
46
  const payload = prepareExtractPayload(args);
43
47
  try {
@@ -50,6 +54,10 @@ export async function startExtract(http: HttpClient, args: Parameters<typeof pre
50
54
  }
51
55
  }
52
56
 
57
+ /**
58
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
59
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
60
+ */
53
61
  export async function getExtractStatus(http: HttpClient, jobId: string): Promise<ExtractResponse> {
54
62
  try {
55
63
  const res = await http.get<ExtractResponse>(`/v2/extract/${jobId}`);
@@ -61,6 +69,10 @@ export async function getExtractStatus(http: HttpClient, jobId: string): Promise
61
69
  }
62
70
  }
63
71
 
72
+ /**
73
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
74
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
75
+ */
64
76
  export async function waitExtract(
65
77
  http: HttpClient,
66
78
  jobId: string,
@@ -76,6 +88,10 @@ export async function waitExtract(
76
88
  }
77
89
  }
78
90
 
91
+ /**
92
+ * @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
93
+ * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
94
+ */
79
95
  export async function extract(
80
96
  http: HttpClient,
81
97
  args: Parameters<typeof prepareExtractPayload>[0] & { pollInterval?: number; timeout?: number }
package/src/v2/types.ts CHANGED
@@ -462,6 +462,7 @@ export interface CrawlOptions {
462
462
  maxDiscoveryDepth?: number | null;
463
463
  sitemap?: 'skip' | 'include' | 'only';
464
464
  ignoreQueryParameters?: boolean;
465
+ deduplicateSimilarURLs?: boolean;
465
466
  limit?: number | null;
466
467
  crawlEntireDomain?: boolean;
467
468
  allowExternalLinks?: boolean;
@@ -470,6 +471,7 @@ export interface CrawlOptions {
470
471
  maxConcurrency?: number | null;
471
472
  webhook?: string | WebhookConfig | null;
472
473
  scrapeOptions?: ScrapeOptions | null;
474
+ regexOnFullURL?: boolean;
473
475
  zeroDataRetention?: boolean;
474
476
  integration?: string;
475
477
  }