@mendable/firecrawl 4.13.0 → 4.13.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-22A4MB4F.js → chunk-YMCI3PLP.js} +3 -2
- package/dist/index.cjs +17 -3
- package/dist/index.d.cts +18 -1
- package/dist/index.d.ts +18 -1
- package/dist/index.js +16 -3
- package/dist/{package-5SIMNZMX.js → package-HWPUIS3T.js} +1 -1
- package/package.json +1 -1
- package/src/v1/index.ts +6 -1
- package/src/v2/client.ts +6 -0
- package/src/v2/methods/crawl.ts +2 -0
- package/src/v2/methods/extract.ts +16 -0
- package/src/v2/types.ts +2 -0
|
@@ -8,7 +8,7 @@ var require_package = __commonJS({
|
|
|
8
8
|
"package.json"(exports, module) {
|
|
9
9
|
module.exports = {
|
|
10
10
|
name: "@mendable/firecrawl-js",
|
|
11
|
-
version: "4.13.
|
|
11
|
+
version: "4.13.2",
|
|
12
12
|
description: "JavaScript SDK for Firecrawl API",
|
|
13
13
|
main: "dist/index.js",
|
|
14
14
|
types: "dist/index.d.ts",
|
|
@@ -71,7 +71,8 @@ var require_package = __commonJS({
|
|
|
71
71
|
},
|
|
72
72
|
pnpm: {
|
|
73
73
|
overrides: {
|
|
74
|
-
"@isaacs/brace-expansion@<=5.0.0": ">=5.0.1"
|
|
74
|
+
"@isaacs/brace-expansion@<=5.0.0": ">=5.0.1",
|
|
75
|
+
"minimatch@<10.2.1": ">=10.2.1"
|
|
75
76
|
}
|
|
76
77
|
}
|
|
77
78
|
};
|
package/dist/index.cjs
CHANGED
|
@@ -35,7 +35,7 @@ var require_package = __commonJS({
|
|
|
35
35
|
"package.json"(exports2, module2) {
|
|
36
36
|
module2.exports = {
|
|
37
37
|
name: "@mendable/firecrawl-js",
|
|
38
|
-
version: "4.13.
|
|
38
|
+
version: "4.13.2",
|
|
39
39
|
description: "JavaScript SDK for Firecrawl API",
|
|
40
40
|
main: "dist/index.js",
|
|
41
41
|
types: "dist/index.d.ts",
|
|
@@ -98,7 +98,8 @@ var require_package = __commonJS({
|
|
|
98
98
|
},
|
|
99
99
|
pnpm: {
|
|
100
100
|
overrides: {
|
|
101
|
-
"@isaacs/brace-expansion@<=5.0.0": ">=5.0.1"
|
|
101
|
+
"@isaacs/brace-expansion@<=5.0.0": ">=5.0.1",
|
|
102
|
+
"minimatch@<10.2.1": ">=10.2.1"
|
|
102
103
|
}
|
|
103
104
|
}
|
|
104
105
|
};
|
|
@@ -539,12 +540,14 @@ function prepareCrawlPayload(request) {
|
|
|
539
540
|
if (request.maxDiscoveryDepth != null) data.maxDiscoveryDepth = request.maxDiscoveryDepth;
|
|
540
541
|
if (request.sitemap != null) data.sitemap = request.sitemap;
|
|
541
542
|
if (request.ignoreQueryParameters != null) data.ignoreQueryParameters = request.ignoreQueryParameters;
|
|
543
|
+
if (request.deduplicateSimilarURLs != null) data.deduplicateSimilarURLs = request.deduplicateSimilarURLs;
|
|
542
544
|
if (request.limit != null) data.limit = request.limit;
|
|
543
545
|
if (request.crawlEntireDomain != null) data.crawlEntireDomain = request.crawlEntireDomain;
|
|
544
546
|
if (request.allowExternalLinks != null) data.allowExternalLinks = request.allowExternalLinks;
|
|
545
547
|
if (request.allowSubdomains != null) data.allowSubdomains = request.allowSubdomains;
|
|
546
548
|
if (request.delay != null) data.delay = request.delay;
|
|
547
549
|
if (request.maxConcurrency != null) data.maxConcurrency = request.maxConcurrency;
|
|
550
|
+
if (request.regexOnFullURL != null) data.regexOnFullURL = request.regexOnFullURL;
|
|
548
551
|
if (request.webhook != null) data.webhook = request.webhook;
|
|
549
552
|
if (request.integration != null && request.integration.trim()) data.integration = request.integration.trim();
|
|
550
553
|
if (request.scrapeOptions) {
|
|
@@ -1428,6 +1431,8 @@ var FirecrawlClient = class {
|
|
|
1428
1431
|
* Start an extract job (async).
|
|
1429
1432
|
* @param args Extraction request (urls, schema or prompt, flags).
|
|
1430
1433
|
* @returns Job id or processing state.
|
|
1434
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
1435
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
1431
1436
|
*/
|
|
1432
1437
|
async startExtract(args) {
|
|
1433
1438
|
return startExtract(this.http, args);
|
|
@@ -1435,6 +1440,8 @@ var FirecrawlClient = class {
|
|
|
1435
1440
|
/**
|
|
1436
1441
|
* Get extract job status/data.
|
|
1437
1442
|
* @param jobId Extract job id.
|
|
1443
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
1444
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
1438
1445
|
*/
|
|
1439
1446
|
async getExtractStatus(jobId) {
|
|
1440
1447
|
return getExtractStatus(this.http, jobId);
|
|
@@ -1443,6 +1450,8 @@ var FirecrawlClient = class {
|
|
|
1443
1450
|
* Convenience waiter: start an extract and poll until it finishes.
|
|
1444
1451
|
* @param args Extraction request plus waiter controls (pollInterval, timeout seconds).
|
|
1445
1452
|
* @returns Final extract response.
|
|
1453
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
1454
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
1446
1455
|
*/
|
|
1447
1456
|
async extract(args) {
|
|
1448
1457
|
return extract(this.http, args);
|
|
@@ -2122,10 +2131,11 @@ var FirecrawlApp = class {
|
|
|
2122
2131
|
}
|
|
2123
2132
|
/**
|
|
2124
2133
|
* Extracts information from URLs using the Firecrawl API.
|
|
2125
|
-
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
2126
2134
|
* @param urls - The URLs to extract information from. Optional if using other methods for data extraction.
|
|
2127
2135
|
* @param params - Additional parameters for the extract request.
|
|
2128
2136
|
* @returns The response from the extract operation.
|
|
2137
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
2138
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
2129
2139
|
*/
|
|
2130
2140
|
async extract(urls, params) {
|
|
2131
2141
|
const headers = this.prepareHeaders();
|
|
@@ -2177,6 +2187,8 @@ var FirecrawlApp = class {
|
|
|
2177
2187
|
* @param params - Additional parameters for the extract request.
|
|
2178
2188
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
2179
2189
|
* @returns The response from the extract operation.
|
|
2190
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
2191
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
2180
2192
|
*/
|
|
2181
2193
|
async asyncExtract(urls, params, idempotencyKey) {
|
|
2182
2194
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
@@ -2202,6 +2214,8 @@ var FirecrawlApp = class {
|
|
|
2202
2214
|
* Retrieves the status of an extract job.
|
|
2203
2215
|
* @param jobId - The ID of the extract job.
|
|
2204
2216
|
* @returns The status of the extract job.
|
|
2217
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
2218
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
2205
2219
|
*/
|
|
2206
2220
|
async getExtractStatus(jobId) {
|
|
2207
2221
|
try {
|
package/dist/index.d.cts
CHANGED
|
@@ -382,6 +382,7 @@ interface CrawlOptions {
|
|
|
382
382
|
maxDiscoveryDepth?: number | null;
|
|
383
383
|
sitemap?: 'skip' | 'include' | 'only';
|
|
384
384
|
ignoreQueryParameters?: boolean;
|
|
385
|
+
deduplicateSimilarURLs?: boolean;
|
|
385
386
|
limit?: number | null;
|
|
386
387
|
crawlEntireDomain?: boolean;
|
|
387
388
|
allowExternalLinks?: boolean;
|
|
@@ -390,6 +391,7 @@ interface CrawlOptions {
|
|
|
390
391
|
maxConcurrency?: number | null;
|
|
391
392
|
webhook?: string | WebhookConfig | null;
|
|
392
393
|
scrapeOptions?: ScrapeOptions | null;
|
|
394
|
+
regexOnFullURL?: boolean;
|
|
393
395
|
zeroDataRetention?: boolean;
|
|
394
396
|
integration?: string;
|
|
395
397
|
}
|
|
@@ -629,6 +631,10 @@ declare function prepareExtractPayload(args: {
|
|
|
629
631
|
integration?: string;
|
|
630
632
|
agent?: AgentOptions$1;
|
|
631
633
|
}): Record<string, unknown>;
|
|
634
|
+
/**
|
|
635
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
636
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
637
|
+
*/
|
|
632
638
|
declare function startExtract(http: HttpClient, args: Parameters<typeof prepareExtractPayload>[0]): Promise<ExtractResponse$1>;
|
|
633
639
|
|
|
634
640
|
declare function prepareAgentPayload(args: {
|
|
@@ -820,17 +826,23 @@ declare class FirecrawlClient {
|
|
|
820
826
|
* Start an extract job (async).
|
|
821
827
|
* @param args Extraction request (urls, schema or prompt, flags).
|
|
822
828
|
* @returns Job id or processing state.
|
|
829
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
830
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
823
831
|
*/
|
|
824
832
|
startExtract(args: Parameters<typeof startExtract>[1]): Promise<ExtractResponse$1>;
|
|
825
833
|
/**
|
|
826
834
|
* Get extract job status/data.
|
|
827
835
|
* @param jobId Extract job id.
|
|
836
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
837
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
828
838
|
*/
|
|
829
839
|
getExtractStatus(jobId: string): Promise<ExtractResponse$1>;
|
|
830
840
|
/**
|
|
831
841
|
* Convenience waiter: start an extract and poll until it finishes.
|
|
832
842
|
* @param args Extraction request plus waiter controls (pollInterval, timeout seconds).
|
|
833
843
|
* @returns Final extract response.
|
|
844
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
845
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
834
846
|
*/
|
|
835
847
|
extract(args: Parameters<typeof startExtract>[1] & {
|
|
836
848
|
pollInterval?: number;
|
|
@@ -1599,10 +1611,11 @@ declare class FirecrawlApp {
|
|
|
1599
1611
|
checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
|
|
1600
1612
|
/**
|
|
1601
1613
|
* Extracts information from URLs using the Firecrawl API.
|
|
1602
|
-
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
1603
1614
|
* @param urls - The URLs to extract information from. Optional if using other methods for data extraction.
|
|
1604
1615
|
* @param params - Additional parameters for the extract request.
|
|
1605
1616
|
* @returns The response from the extract operation.
|
|
1617
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
1618
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
1606
1619
|
*/
|
|
1607
1620
|
extract<T extends zt.ZodSchema = any>(urls?: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
|
|
1608
1621
|
/**
|
|
@@ -1611,12 +1624,16 @@ declare class FirecrawlApp {
|
|
|
1611
1624
|
* @param params - Additional parameters for the extract request.
|
|
1612
1625
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
1613
1626
|
* @returns The response from the extract operation.
|
|
1627
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
1628
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
1614
1629
|
*/
|
|
1615
1630
|
asyncExtract(urls: string[], params?: ExtractParams, idempotencyKey?: string): Promise<ExtractResponse | ErrorResponse>;
|
|
1616
1631
|
/**
|
|
1617
1632
|
* Retrieves the status of an extract job.
|
|
1618
1633
|
* @param jobId - The ID of the extract job.
|
|
1619
1634
|
* @returns The status of the extract job.
|
|
1635
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
1636
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
1620
1637
|
*/
|
|
1621
1638
|
getExtractStatus(jobId: string): Promise<any>;
|
|
1622
1639
|
/**
|
package/dist/index.d.ts
CHANGED
|
@@ -382,6 +382,7 @@ interface CrawlOptions {
|
|
|
382
382
|
maxDiscoveryDepth?: number | null;
|
|
383
383
|
sitemap?: 'skip' | 'include' | 'only';
|
|
384
384
|
ignoreQueryParameters?: boolean;
|
|
385
|
+
deduplicateSimilarURLs?: boolean;
|
|
385
386
|
limit?: number | null;
|
|
386
387
|
crawlEntireDomain?: boolean;
|
|
387
388
|
allowExternalLinks?: boolean;
|
|
@@ -390,6 +391,7 @@ interface CrawlOptions {
|
|
|
390
391
|
maxConcurrency?: number | null;
|
|
391
392
|
webhook?: string | WebhookConfig | null;
|
|
392
393
|
scrapeOptions?: ScrapeOptions | null;
|
|
394
|
+
regexOnFullURL?: boolean;
|
|
393
395
|
zeroDataRetention?: boolean;
|
|
394
396
|
integration?: string;
|
|
395
397
|
}
|
|
@@ -629,6 +631,10 @@ declare function prepareExtractPayload(args: {
|
|
|
629
631
|
integration?: string;
|
|
630
632
|
agent?: AgentOptions$1;
|
|
631
633
|
}): Record<string, unknown>;
|
|
634
|
+
/**
|
|
635
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
636
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
637
|
+
*/
|
|
632
638
|
declare function startExtract(http: HttpClient, args: Parameters<typeof prepareExtractPayload>[0]): Promise<ExtractResponse$1>;
|
|
633
639
|
|
|
634
640
|
declare function prepareAgentPayload(args: {
|
|
@@ -820,17 +826,23 @@ declare class FirecrawlClient {
|
|
|
820
826
|
* Start an extract job (async).
|
|
821
827
|
* @param args Extraction request (urls, schema or prompt, flags).
|
|
822
828
|
* @returns Job id or processing state.
|
|
829
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
830
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
823
831
|
*/
|
|
824
832
|
startExtract(args: Parameters<typeof startExtract>[1]): Promise<ExtractResponse$1>;
|
|
825
833
|
/**
|
|
826
834
|
* Get extract job status/data.
|
|
827
835
|
* @param jobId Extract job id.
|
|
836
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
837
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
828
838
|
*/
|
|
829
839
|
getExtractStatus(jobId: string): Promise<ExtractResponse$1>;
|
|
830
840
|
/**
|
|
831
841
|
* Convenience waiter: start an extract and poll until it finishes.
|
|
832
842
|
* @param args Extraction request plus waiter controls (pollInterval, timeout seconds).
|
|
833
843
|
* @returns Final extract response.
|
|
844
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
845
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
834
846
|
*/
|
|
835
847
|
extract(args: Parameters<typeof startExtract>[1] & {
|
|
836
848
|
pollInterval?: number;
|
|
@@ -1599,10 +1611,11 @@ declare class FirecrawlApp {
|
|
|
1599
1611
|
checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
|
|
1600
1612
|
/**
|
|
1601
1613
|
* Extracts information from URLs using the Firecrawl API.
|
|
1602
|
-
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
1603
1614
|
* @param urls - The URLs to extract information from. Optional if using other methods for data extraction.
|
|
1604
1615
|
* @param params - Additional parameters for the extract request.
|
|
1605
1616
|
* @returns The response from the extract operation.
|
|
1617
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
1618
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
1606
1619
|
*/
|
|
1607
1620
|
extract<T extends zt.ZodSchema = any>(urls?: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
|
|
1608
1621
|
/**
|
|
@@ -1611,12 +1624,16 @@ declare class FirecrawlApp {
|
|
|
1611
1624
|
* @param params - Additional parameters for the extract request.
|
|
1612
1625
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
1613
1626
|
* @returns The response from the extract operation.
|
|
1627
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
1628
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
1614
1629
|
*/
|
|
1615
1630
|
asyncExtract(urls: string[], params?: ExtractParams, idempotencyKey?: string): Promise<ExtractResponse | ErrorResponse>;
|
|
1616
1631
|
/**
|
|
1617
1632
|
* Retrieves the status of an extract job.
|
|
1618
1633
|
* @param jobId - The ID of the extract job.
|
|
1619
1634
|
* @returns The status of the extract job.
|
|
1635
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
1636
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
1620
1637
|
*/
|
|
1621
1638
|
getExtractStatus(jobId: string): Promise<any>;
|
|
1622
1639
|
/**
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
require_package
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-YMCI3PLP.js";
|
|
4
4
|
|
|
5
5
|
// src/v2/utils/httpClient.ts
|
|
6
6
|
import axios from "axios";
|
|
@@ -423,12 +423,14 @@ function prepareCrawlPayload(request) {
|
|
|
423
423
|
if (request.maxDiscoveryDepth != null) data.maxDiscoveryDepth = request.maxDiscoveryDepth;
|
|
424
424
|
if (request.sitemap != null) data.sitemap = request.sitemap;
|
|
425
425
|
if (request.ignoreQueryParameters != null) data.ignoreQueryParameters = request.ignoreQueryParameters;
|
|
426
|
+
if (request.deduplicateSimilarURLs != null) data.deduplicateSimilarURLs = request.deduplicateSimilarURLs;
|
|
426
427
|
if (request.limit != null) data.limit = request.limit;
|
|
427
428
|
if (request.crawlEntireDomain != null) data.crawlEntireDomain = request.crawlEntireDomain;
|
|
428
429
|
if (request.allowExternalLinks != null) data.allowExternalLinks = request.allowExternalLinks;
|
|
429
430
|
if (request.allowSubdomains != null) data.allowSubdomains = request.allowSubdomains;
|
|
430
431
|
if (request.delay != null) data.delay = request.delay;
|
|
431
432
|
if (request.maxConcurrency != null) data.maxConcurrency = request.maxConcurrency;
|
|
433
|
+
if (request.regexOnFullURL != null) data.regexOnFullURL = request.regexOnFullURL;
|
|
432
434
|
if (request.webhook != null) data.webhook = request.webhook;
|
|
433
435
|
if (request.integration != null && request.integration.trim()) data.integration = request.integration.trim();
|
|
434
436
|
if (request.scrapeOptions) {
|
|
@@ -1312,6 +1314,8 @@ var FirecrawlClient = class {
|
|
|
1312
1314
|
* Start an extract job (async).
|
|
1313
1315
|
* @param args Extraction request (urls, schema or prompt, flags).
|
|
1314
1316
|
* @returns Job id or processing state.
|
|
1317
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
1318
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
1315
1319
|
*/
|
|
1316
1320
|
async startExtract(args) {
|
|
1317
1321
|
return startExtract(this.http, args);
|
|
@@ -1319,6 +1323,8 @@ var FirecrawlClient = class {
|
|
|
1319
1323
|
/**
|
|
1320
1324
|
* Get extract job status/data.
|
|
1321
1325
|
* @param jobId Extract job id.
|
|
1326
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
1327
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
1322
1328
|
*/
|
|
1323
1329
|
async getExtractStatus(jobId) {
|
|
1324
1330
|
return getExtractStatus(this.http, jobId);
|
|
@@ -1327,6 +1333,8 @@ var FirecrawlClient = class {
|
|
|
1327
1333
|
* Convenience waiter: start an extract and poll until it finishes.
|
|
1328
1334
|
* @param args Extraction request plus waiter controls (pollInterval, timeout seconds).
|
|
1329
1335
|
* @returns Final extract response.
|
|
1336
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
1337
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
1330
1338
|
*/
|
|
1331
1339
|
async extract(args) {
|
|
1332
1340
|
return extract(this.http, args);
|
|
@@ -1465,7 +1473,7 @@ var FirecrawlApp = class {
|
|
|
1465
1473
|
if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
|
|
1466
1474
|
return process.env.npm_package_version;
|
|
1467
1475
|
}
|
|
1468
|
-
const packageJson = await import("./package-
|
|
1476
|
+
const packageJson = await import("./package-HWPUIS3T.js");
|
|
1469
1477
|
return packageJson.default.version;
|
|
1470
1478
|
} catch (error) {
|
|
1471
1479
|
const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);
|
|
@@ -2006,10 +2014,11 @@ var FirecrawlApp = class {
|
|
|
2006
2014
|
}
|
|
2007
2015
|
/**
|
|
2008
2016
|
* Extracts information from URLs using the Firecrawl API.
|
|
2009
|
-
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
2010
2017
|
* @param urls - The URLs to extract information from. Optional if using other methods for data extraction.
|
|
2011
2018
|
* @param params - Additional parameters for the extract request.
|
|
2012
2019
|
* @returns The response from the extract operation.
|
|
2020
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
2021
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
2013
2022
|
*/
|
|
2014
2023
|
async extract(urls, params) {
|
|
2015
2024
|
const headers = this.prepareHeaders();
|
|
@@ -2061,6 +2070,8 @@ var FirecrawlApp = class {
|
|
|
2061
2070
|
* @param params - Additional parameters for the extract request.
|
|
2062
2071
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
2063
2072
|
* @returns The response from the extract operation.
|
|
2073
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
2074
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
2064
2075
|
*/
|
|
2065
2076
|
async asyncExtract(urls, params, idempotencyKey) {
|
|
2066
2077
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
@@ -2086,6 +2097,8 @@ var FirecrawlApp = class {
|
|
|
2086
2097
|
* Retrieves the status of an extract job.
|
|
2087
2098
|
* @param jobId - The ID of the extract job.
|
|
2088
2099
|
* @returns The status of the extract job.
|
|
2100
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
2101
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
2089
2102
|
*/
|
|
2090
2103
|
async getExtractStatus(jobId) {
|
|
2091
2104
|
try {
|
package/package.json
CHANGED
package/src/v1/index.ts
CHANGED
|
@@ -1278,10 +1278,11 @@ export default class FirecrawlApp {
|
|
|
1278
1278
|
|
|
1279
1279
|
/**
|
|
1280
1280
|
* Extracts information from URLs using the Firecrawl API.
|
|
1281
|
-
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
1282
1281
|
* @param urls - The URLs to extract information from. Optional if using other methods for data extraction.
|
|
1283
1282
|
* @param params - Additional parameters for the extract request.
|
|
1284
1283
|
* @returns The response from the extract operation.
|
|
1284
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
1285
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
1285
1286
|
*/
|
|
1286
1287
|
async extract<T extends zt.ZodSchema = any>(urls?: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse> {
|
|
1287
1288
|
const headers = this.prepareHeaders();
|
|
@@ -1337,6 +1338,8 @@ export default class FirecrawlApp {
|
|
|
1337
1338
|
* @param params - Additional parameters for the extract request.
|
|
1338
1339
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
1339
1340
|
* @returns The response from the extract operation.
|
|
1341
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
1342
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
1340
1343
|
*/
|
|
1341
1344
|
async asyncExtract(
|
|
1342
1345
|
urls: string[],
|
|
@@ -1369,6 +1372,8 @@ export default class FirecrawlApp {
|
|
|
1369
1372
|
* Retrieves the status of an extract job.
|
|
1370
1373
|
* @param jobId - The ID of the extract job.
|
|
1371
1374
|
* @returns The status of the extract job.
|
|
1375
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
1376
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
1372
1377
|
*/
|
|
1373
1378
|
async getExtractStatus(jobId: string): Promise<any> {
|
|
1374
1379
|
try {
|
package/src/v2/client.ts
CHANGED
|
@@ -255,6 +255,8 @@ export class FirecrawlClient {
|
|
|
255
255
|
* Start an extract job (async).
|
|
256
256
|
* @param args Extraction request (urls, schema or prompt, flags).
|
|
257
257
|
* @returns Job id or processing state.
|
|
258
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
259
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
258
260
|
*/
|
|
259
261
|
async startExtract(args: Parameters<typeof startExtract>[1]): Promise<ExtractResponse> {
|
|
260
262
|
return startExtract(this.http, args);
|
|
@@ -262,6 +264,8 @@ export class FirecrawlClient {
|
|
|
262
264
|
/**
|
|
263
265
|
* Get extract job status/data.
|
|
264
266
|
* @param jobId Extract job id.
|
|
267
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
268
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
265
269
|
*/
|
|
266
270
|
async getExtractStatus(jobId: string): Promise<ExtractResponse> {
|
|
267
271
|
return getExtractStatus(this.http, jobId);
|
|
@@ -270,6 +274,8 @@ export class FirecrawlClient {
|
|
|
270
274
|
* Convenience waiter: start an extract and poll until it finishes.
|
|
271
275
|
* @param args Extraction request plus waiter controls (pollInterval, timeout seconds).
|
|
272
276
|
* @returns Final extract response.
|
|
277
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
278
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
273
279
|
*/
|
|
274
280
|
async extract(args: Parameters<typeof startExtract>[1] & { pollInterval?: number; timeout?: number }): Promise<ExtractResponse> {
|
|
275
281
|
return extractWaiter(this.http, args);
|
package/src/v2/methods/crawl.ts
CHANGED
|
@@ -28,12 +28,14 @@ function prepareCrawlPayload(request: CrawlRequest): Record<string, unknown> {
|
|
|
28
28
|
if (request.maxDiscoveryDepth != null) data.maxDiscoveryDepth = request.maxDiscoveryDepth;
|
|
29
29
|
if (request.sitemap != null) data.sitemap = request.sitemap;
|
|
30
30
|
if (request.ignoreQueryParameters != null) data.ignoreQueryParameters = request.ignoreQueryParameters;
|
|
31
|
+
if (request.deduplicateSimilarURLs != null) data.deduplicateSimilarURLs = request.deduplicateSimilarURLs;
|
|
31
32
|
if (request.limit != null) data.limit = request.limit;
|
|
32
33
|
if (request.crawlEntireDomain != null) data.crawlEntireDomain = request.crawlEntireDomain;
|
|
33
34
|
if (request.allowExternalLinks != null) data.allowExternalLinks = request.allowExternalLinks;
|
|
34
35
|
if (request.allowSubdomains != null) data.allowSubdomains = request.allowSubdomains;
|
|
35
36
|
if (request.delay != null) data.delay = request.delay;
|
|
36
37
|
if (request.maxConcurrency != null) data.maxConcurrency = request.maxConcurrency;
|
|
38
|
+
if (request.regexOnFullURL != null) data.regexOnFullURL = request.regexOnFullURL;
|
|
37
39
|
if (request.webhook != null) data.webhook = request.webhook;
|
|
38
40
|
if (request.integration != null && request.integration.trim()) data.integration = request.integration.trim();
|
|
39
41
|
if (request.scrapeOptions) {
|
|
@@ -38,6 +38,10 @@ function prepareExtractPayload(args: {
|
|
|
38
38
|
return body;
|
|
39
39
|
}
|
|
40
40
|
|
|
41
|
+
/**
|
|
42
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
43
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
44
|
+
*/
|
|
41
45
|
export async function startExtract(http: HttpClient, args: Parameters<typeof prepareExtractPayload>[0]): Promise<ExtractResponse> {
|
|
42
46
|
const payload = prepareExtractPayload(args);
|
|
43
47
|
try {
|
|
@@ -50,6 +54,10 @@ export async function startExtract(http: HttpClient, args: Parameters<typeof pre
|
|
|
50
54
|
}
|
|
51
55
|
}
|
|
52
56
|
|
|
57
|
+
/**
|
|
58
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
59
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
60
|
+
*/
|
|
53
61
|
export async function getExtractStatus(http: HttpClient, jobId: string): Promise<ExtractResponse> {
|
|
54
62
|
try {
|
|
55
63
|
const res = await http.get<ExtractResponse>(`/v2/extract/${jobId}`);
|
|
@@ -61,6 +69,10 @@ export async function getExtractStatus(http: HttpClient, jobId: string): Promise
|
|
|
61
69
|
}
|
|
62
70
|
}
|
|
63
71
|
|
|
72
|
+
/**
|
|
73
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
74
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
75
|
+
*/
|
|
64
76
|
export async function waitExtract(
|
|
65
77
|
http: HttpClient,
|
|
66
78
|
jobId: string,
|
|
@@ -76,6 +88,10 @@ export async function waitExtract(
|
|
|
76
88
|
}
|
|
77
89
|
}
|
|
78
90
|
|
|
91
|
+
/**
|
|
92
|
+
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
|
|
93
|
+
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
|
|
94
|
+
*/
|
|
79
95
|
export async function extract(
|
|
80
96
|
http: HttpClient,
|
|
81
97
|
args: Parameters<typeof prepareExtractPayload>[0] & { pollInterval?: number; timeout?: number }
|
package/src/v2/types.ts
CHANGED
|
@@ -462,6 +462,7 @@ export interface CrawlOptions {
|
|
|
462
462
|
maxDiscoveryDepth?: number | null;
|
|
463
463
|
sitemap?: 'skip' | 'include' | 'only';
|
|
464
464
|
ignoreQueryParameters?: boolean;
|
|
465
|
+
deduplicateSimilarURLs?: boolean;
|
|
465
466
|
limit?: number | null;
|
|
466
467
|
crawlEntireDomain?: boolean;
|
|
467
468
|
allowExternalLinks?: boolean;
|
|
@@ -470,6 +471,7 @@ export interface CrawlOptions {
|
|
|
470
471
|
maxConcurrency?: number | null;
|
|
471
472
|
webhook?: string | WebhookConfig | null;
|
|
472
473
|
scrapeOptions?: ScrapeOptions | null;
|
|
474
|
+
regexOnFullURL?: boolean;
|
|
473
475
|
zeroDataRetention?: boolean;
|
|
474
476
|
integration?: string;
|
|
475
477
|
}
|