@mendable/firecrawl 4.3.1 → 4.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-VFP5PS5Z.js → chunk-7RR2L6KO.js} +1 -1
- package/dist/index.cjs +9 -4
- package/dist/index.d.cts +19 -8
- package/dist/index.d.ts +19 -8
- package/dist/index.js +10 -5
- package/dist/{package-ZQ5CJHCA.js → package-FLU7SABK.js} +1 -1
- package/package.json +1 -1
- package/src/v1/index.ts +10 -4
- package/src/v2/methods/batch.ts +2 -2
- package/src/v2/methods/crawl.ts +1 -0
- package/src/v2/methods/extract.ts +2 -0
- package/src/v2/methods/map.ts +5 -3
- package/src/v2/methods/search.ts +1 -0
- package/src/v2/types.ts +6 -1
|
@@ -8,7 +8,7 @@ var require_package = __commonJS({
|
|
|
8
8
|
"package.json"(exports, module) {
|
|
9
9
|
module.exports = {
|
|
10
10
|
name: "@mendable/firecrawl-js",
|
|
11
|
-
version: "4.3.
|
|
11
|
+
version: "4.3.3",
|
|
12
12
|
description: "JavaScript SDK for Firecrawl API",
|
|
13
13
|
main: "dist/index.js",
|
|
14
14
|
types: "dist/index.d.ts",
|
package/dist/index.cjs
CHANGED
|
@@ -35,7 +35,7 @@ var require_package = __commonJS({
|
|
|
35
35
|
"package.json"(exports2, module2) {
|
|
36
36
|
module2.exports = {
|
|
37
37
|
name: "@mendable/firecrawl-js",
|
|
38
|
-
version: "4.3.
|
|
38
|
+
version: "4.3.3",
|
|
39
39
|
description: "JavaScript SDK for Firecrawl API",
|
|
40
40
|
main: "dist/index.js",
|
|
41
41
|
types: "dist/index.d.ts",
|
|
@@ -334,6 +334,7 @@ function prepareSearchPayload(req) {
|
|
|
334
334
|
if (req.location != null) payload.location = req.location;
|
|
335
335
|
if (req.ignoreInvalidURLs != null) payload.ignoreInvalidURLs = req.ignoreInvalidURLs;
|
|
336
336
|
if (req.timeout != null) payload.timeout = req.timeout;
|
|
337
|
+
if (req.integration && req.integration.trim()) payload.integration = req.integration.trim();
|
|
337
338
|
if (req.scrapeOptions) {
|
|
338
339
|
ensureValidScrapeOptions(req.scrapeOptions);
|
|
339
340
|
payload.scrapeOptions = req.scrapeOptions;
|
|
@@ -384,6 +385,8 @@ function prepareMapPayload(url, options) {
|
|
|
384
385
|
if (options.includeSubdomains != null) payload.includeSubdomains = options.includeSubdomains;
|
|
385
386
|
if (options.limit != null) payload.limit = options.limit;
|
|
386
387
|
if (options.timeout != null) payload.timeout = options.timeout;
|
|
388
|
+
if (options.integration != null && options.integration.trim()) payload.integration = options.integration.trim();
|
|
389
|
+
if (options.location != null) payload.location = options.location;
|
|
387
390
|
}
|
|
388
391
|
return payload;
|
|
389
392
|
}
|
|
@@ -455,6 +458,7 @@ function prepareCrawlPayload(request) {
|
|
|
455
458
|
if (request.delay != null) data.delay = request.delay;
|
|
456
459
|
if (request.maxConcurrency != null) data.maxConcurrency = request.maxConcurrency;
|
|
457
460
|
if (request.webhook != null) data.webhook = request.webhook;
|
|
461
|
+
if (request.integration != null && request.integration.trim()) data.integration = request.integration.trim();
|
|
458
462
|
if (request.scrapeOptions) {
|
|
459
463
|
ensureValidScrapeOptions(request.scrapeOptions);
|
|
460
464
|
data.scrapeOptions = request.scrapeOptions;
|
|
@@ -581,8 +585,8 @@ async function startBatchScrape(http, urls, {
|
|
|
581
585
|
ignoreInvalidURLs,
|
|
582
586
|
maxConcurrency,
|
|
583
587
|
zeroDataRetention,
|
|
584
|
-
|
|
585
|
-
|
|
588
|
+
idempotencyKey,
|
|
589
|
+
integration
|
|
586
590
|
} = {}) {
|
|
587
591
|
if (!Array.isArray(urls) || urls.length === 0) throw new Error("URLs list cannot be empty");
|
|
588
592
|
const payload = { urls };
|
|
@@ -595,7 +599,7 @@ async function startBatchScrape(http, urls, {
|
|
|
595
599
|
if (ignoreInvalidURLs != null) payload.ignoreInvalidURLs = ignoreInvalidURLs;
|
|
596
600
|
if (maxConcurrency != null) payload.maxConcurrency = maxConcurrency;
|
|
597
601
|
if (zeroDataRetention != null) payload.zeroDataRetention = zeroDataRetention;
|
|
598
|
-
if (integration != null) payload.integration = integration;
|
|
602
|
+
if (integration != null && integration.trim()) payload.integration = integration.trim();
|
|
599
603
|
try {
|
|
600
604
|
const headers = http.prepareHeaders(idempotencyKey);
|
|
601
605
|
const res = await http.post("/v2/batch/scrape", payload, headers);
|
|
@@ -692,6 +696,7 @@ function prepareExtractPayload(args) {
|
|
|
692
696
|
if (args.enableWebSearch != null) body.enableWebSearch = args.enableWebSearch;
|
|
693
697
|
if (args.showSources != null) body.showSources = args.showSources;
|
|
694
698
|
if (args.ignoreInvalidURLs != null) body.ignoreInvalidURLs = args.ignoreInvalidURLs;
|
|
699
|
+
if (args.integration && args.integration.trim()) body.integration = args.integration.trim();
|
|
695
700
|
if (args.scrapeOptions) {
|
|
696
701
|
ensureValidScrapeOptions(args.scrapeOptions);
|
|
697
702
|
body.scrapeOptions = args.scrapeOptions;
|
package/dist/index.d.cts
CHANGED
|
@@ -41,7 +41,7 @@ interface AttributesFormat extends Format {
|
|
|
41
41
|
}>;
|
|
42
42
|
}
|
|
43
43
|
type FormatOption = FormatString | Format | JsonFormat | ChangeTrackingFormat | ScreenshotFormat | AttributesFormat;
|
|
44
|
-
interface LocationConfig {
|
|
44
|
+
interface LocationConfig$1 {
|
|
45
45
|
country?: string;
|
|
46
46
|
languages?: string[];
|
|
47
47
|
}
|
|
@@ -104,7 +104,7 @@ interface ScrapeOptions {
|
|
|
104
104
|
maxPages?: number;
|
|
105
105
|
}>;
|
|
106
106
|
actions?: ActionOption[];
|
|
107
|
-
location?: LocationConfig;
|
|
107
|
+
location?: LocationConfig$1;
|
|
108
108
|
skipTlsVerification?: boolean;
|
|
109
109
|
removeBase64Images?: boolean;
|
|
110
110
|
fastMode?: boolean;
|
|
@@ -113,6 +113,7 @@ interface ScrapeOptions {
|
|
|
113
113
|
proxy?: "basic" | "stealth" | "auto" | string;
|
|
114
114
|
maxAge?: number;
|
|
115
115
|
storeInCache?: boolean;
|
|
116
|
+
integration?: string;
|
|
116
117
|
}
|
|
117
118
|
interface WebhookConfig {
|
|
118
119
|
url: string;
|
|
@@ -207,6 +208,7 @@ interface SearchRequest {
|
|
|
207
208
|
ignoreInvalidURLs?: boolean;
|
|
208
209
|
timeout?: number;
|
|
209
210
|
scrapeOptions?: ScrapeOptions;
|
|
211
|
+
integration?: string;
|
|
210
212
|
}
|
|
211
213
|
interface CrawlOptions {
|
|
212
214
|
prompt?: string | null;
|
|
@@ -224,6 +226,7 @@ interface CrawlOptions {
|
|
|
224
226
|
webhook?: string | WebhookConfig | null;
|
|
225
227
|
scrapeOptions?: ScrapeOptions | null;
|
|
226
228
|
zeroDataRetention?: boolean;
|
|
229
|
+
integration?: string;
|
|
227
230
|
}
|
|
228
231
|
interface CrawlResponse$1 {
|
|
229
232
|
id: string;
|
|
@@ -245,8 +248,8 @@ interface BatchScrapeOptions {
|
|
|
245
248
|
ignoreInvalidURLs?: boolean;
|
|
246
249
|
maxConcurrency?: number;
|
|
247
250
|
zeroDataRetention?: boolean;
|
|
248
|
-
integration?: string;
|
|
249
251
|
idempotencyKey?: string;
|
|
252
|
+
integration?: string;
|
|
250
253
|
}
|
|
251
254
|
interface BatchScrapeResponse$1 {
|
|
252
255
|
id: string;
|
|
@@ -271,6 +274,8 @@ interface MapOptions {
|
|
|
271
274
|
includeSubdomains?: boolean;
|
|
272
275
|
limit?: number;
|
|
273
276
|
timeout?: number;
|
|
277
|
+
integration?: string;
|
|
278
|
+
location?: LocationConfig$1;
|
|
274
279
|
}
|
|
275
280
|
interface ExtractResponse$1 {
|
|
276
281
|
success?: boolean;
|
|
@@ -393,6 +398,7 @@ declare function prepareExtractPayload(args: {
|
|
|
393
398
|
showSources?: boolean;
|
|
394
399
|
scrapeOptions?: ScrapeOptions;
|
|
395
400
|
ignoreInvalidURLs?: boolean;
|
|
401
|
+
integration?: string;
|
|
396
402
|
}): Record<string, unknown>;
|
|
397
403
|
declare function startExtract(http: HttpClient, args: Parameters<typeof prepareExtractPayload>[0]): Promise<ExtractResponse$1>;
|
|
398
404
|
|
|
@@ -687,6 +693,13 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
687
693
|
title?: string;
|
|
688
694
|
description?: string;
|
|
689
695
|
}
|
|
696
|
+
/**
|
|
697
|
+
* Location configuration for proxy location
|
|
698
|
+
*/
|
|
699
|
+
interface LocationConfig {
|
|
700
|
+
country?: string;
|
|
701
|
+
languages?: string[];
|
|
702
|
+
}
|
|
690
703
|
/**
|
|
691
704
|
* Parameters for scraping operations.
|
|
692
705
|
* Defines the options and configurations available for scraping web content.
|
|
@@ -699,10 +712,7 @@ interface CrawlScrapeOptions {
|
|
|
699
712
|
onlyMainContent?: boolean;
|
|
700
713
|
waitFor?: number;
|
|
701
714
|
timeout?: number;
|
|
702
|
-
location?:
|
|
703
|
-
country?: string;
|
|
704
|
-
languages?: string[];
|
|
705
|
-
};
|
|
715
|
+
location?: LocationConfig;
|
|
706
716
|
mobile?: boolean;
|
|
707
717
|
skipTlsVerification?: boolean;
|
|
708
718
|
removeBase64Images?: boolean;
|
|
@@ -875,6 +885,7 @@ interface MapParams {
|
|
|
875
885
|
limit?: number;
|
|
876
886
|
timeout?: number;
|
|
877
887
|
useIndex?: boolean;
|
|
888
|
+
location?: LocationConfig;
|
|
878
889
|
}
|
|
879
890
|
/**
|
|
880
891
|
* Response interface for mapping operations.
|
|
@@ -1487,4 +1498,4 @@ declare class Firecrawl extends FirecrawlClient {
|
|
|
1487
1498
|
get v1(): FirecrawlApp;
|
|
1488
1499
|
}
|
|
1489
1500
|
|
|
1490
|
-
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type PressAction, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
|
|
1501
|
+
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type PressAction, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
|
package/dist/index.d.ts
CHANGED
|
@@ -41,7 +41,7 @@ interface AttributesFormat extends Format {
|
|
|
41
41
|
}>;
|
|
42
42
|
}
|
|
43
43
|
type FormatOption = FormatString | Format | JsonFormat | ChangeTrackingFormat | ScreenshotFormat | AttributesFormat;
|
|
44
|
-
interface LocationConfig {
|
|
44
|
+
interface LocationConfig$1 {
|
|
45
45
|
country?: string;
|
|
46
46
|
languages?: string[];
|
|
47
47
|
}
|
|
@@ -104,7 +104,7 @@ interface ScrapeOptions {
|
|
|
104
104
|
maxPages?: number;
|
|
105
105
|
}>;
|
|
106
106
|
actions?: ActionOption[];
|
|
107
|
-
location?: LocationConfig;
|
|
107
|
+
location?: LocationConfig$1;
|
|
108
108
|
skipTlsVerification?: boolean;
|
|
109
109
|
removeBase64Images?: boolean;
|
|
110
110
|
fastMode?: boolean;
|
|
@@ -113,6 +113,7 @@ interface ScrapeOptions {
|
|
|
113
113
|
proxy?: "basic" | "stealth" | "auto" | string;
|
|
114
114
|
maxAge?: number;
|
|
115
115
|
storeInCache?: boolean;
|
|
116
|
+
integration?: string;
|
|
116
117
|
}
|
|
117
118
|
interface WebhookConfig {
|
|
118
119
|
url: string;
|
|
@@ -207,6 +208,7 @@ interface SearchRequest {
|
|
|
207
208
|
ignoreInvalidURLs?: boolean;
|
|
208
209
|
timeout?: number;
|
|
209
210
|
scrapeOptions?: ScrapeOptions;
|
|
211
|
+
integration?: string;
|
|
210
212
|
}
|
|
211
213
|
interface CrawlOptions {
|
|
212
214
|
prompt?: string | null;
|
|
@@ -224,6 +226,7 @@ interface CrawlOptions {
|
|
|
224
226
|
webhook?: string | WebhookConfig | null;
|
|
225
227
|
scrapeOptions?: ScrapeOptions | null;
|
|
226
228
|
zeroDataRetention?: boolean;
|
|
229
|
+
integration?: string;
|
|
227
230
|
}
|
|
228
231
|
interface CrawlResponse$1 {
|
|
229
232
|
id: string;
|
|
@@ -245,8 +248,8 @@ interface BatchScrapeOptions {
|
|
|
245
248
|
ignoreInvalidURLs?: boolean;
|
|
246
249
|
maxConcurrency?: number;
|
|
247
250
|
zeroDataRetention?: boolean;
|
|
248
|
-
integration?: string;
|
|
249
251
|
idempotencyKey?: string;
|
|
252
|
+
integration?: string;
|
|
250
253
|
}
|
|
251
254
|
interface BatchScrapeResponse$1 {
|
|
252
255
|
id: string;
|
|
@@ -271,6 +274,8 @@ interface MapOptions {
|
|
|
271
274
|
includeSubdomains?: boolean;
|
|
272
275
|
limit?: number;
|
|
273
276
|
timeout?: number;
|
|
277
|
+
integration?: string;
|
|
278
|
+
location?: LocationConfig$1;
|
|
274
279
|
}
|
|
275
280
|
interface ExtractResponse$1 {
|
|
276
281
|
success?: boolean;
|
|
@@ -393,6 +398,7 @@ declare function prepareExtractPayload(args: {
|
|
|
393
398
|
showSources?: boolean;
|
|
394
399
|
scrapeOptions?: ScrapeOptions;
|
|
395
400
|
ignoreInvalidURLs?: boolean;
|
|
401
|
+
integration?: string;
|
|
396
402
|
}): Record<string, unknown>;
|
|
397
403
|
declare function startExtract(http: HttpClient, args: Parameters<typeof prepareExtractPayload>[0]): Promise<ExtractResponse$1>;
|
|
398
404
|
|
|
@@ -687,6 +693,13 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
687
693
|
title?: string;
|
|
688
694
|
description?: string;
|
|
689
695
|
}
|
|
696
|
+
/**
|
|
697
|
+
* Location configuration for proxy location
|
|
698
|
+
*/
|
|
699
|
+
interface LocationConfig {
|
|
700
|
+
country?: string;
|
|
701
|
+
languages?: string[];
|
|
702
|
+
}
|
|
690
703
|
/**
|
|
691
704
|
* Parameters for scraping operations.
|
|
692
705
|
* Defines the options and configurations available for scraping web content.
|
|
@@ -699,10 +712,7 @@ interface CrawlScrapeOptions {
|
|
|
699
712
|
onlyMainContent?: boolean;
|
|
700
713
|
waitFor?: number;
|
|
701
714
|
timeout?: number;
|
|
702
|
-
location?:
|
|
703
|
-
country?: string;
|
|
704
|
-
languages?: string[];
|
|
705
|
-
};
|
|
715
|
+
location?: LocationConfig;
|
|
706
716
|
mobile?: boolean;
|
|
707
717
|
skipTlsVerification?: boolean;
|
|
708
718
|
removeBase64Images?: boolean;
|
|
@@ -875,6 +885,7 @@ interface MapParams {
|
|
|
875
885
|
limit?: number;
|
|
876
886
|
timeout?: number;
|
|
877
887
|
useIndex?: boolean;
|
|
888
|
+
location?: LocationConfig;
|
|
878
889
|
}
|
|
879
890
|
/**
|
|
880
891
|
* Response interface for mapping operations.
|
|
@@ -1487,4 +1498,4 @@ declare class Firecrawl extends FirecrawlClient {
|
|
|
1487
1498
|
get v1(): FirecrawlApp;
|
|
1488
1499
|
}
|
|
1489
1500
|
|
|
1490
|
-
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type PressAction, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
|
|
1501
|
+
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type PressAction, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
require_package
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-7RR2L6KO.js";
|
|
4
4
|
|
|
5
5
|
// src/v2/utils/httpClient.ts
|
|
6
6
|
import axios from "axios";
|
|
@@ -218,6 +218,7 @@ function prepareSearchPayload(req) {
|
|
|
218
218
|
if (req.location != null) payload.location = req.location;
|
|
219
219
|
if (req.ignoreInvalidURLs != null) payload.ignoreInvalidURLs = req.ignoreInvalidURLs;
|
|
220
220
|
if (req.timeout != null) payload.timeout = req.timeout;
|
|
221
|
+
if (req.integration && req.integration.trim()) payload.integration = req.integration.trim();
|
|
221
222
|
if (req.scrapeOptions) {
|
|
222
223
|
ensureValidScrapeOptions(req.scrapeOptions);
|
|
223
224
|
payload.scrapeOptions = req.scrapeOptions;
|
|
@@ -268,6 +269,8 @@ function prepareMapPayload(url, options) {
|
|
|
268
269
|
if (options.includeSubdomains != null) payload.includeSubdomains = options.includeSubdomains;
|
|
269
270
|
if (options.limit != null) payload.limit = options.limit;
|
|
270
271
|
if (options.timeout != null) payload.timeout = options.timeout;
|
|
272
|
+
if (options.integration != null && options.integration.trim()) payload.integration = options.integration.trim();
|
|
273
|
+
if (options.location != null) payload.location = options.location;
|
|
271
274
|
}
|
|
272
275
|
return payload;
|
|
273
276
|
}
|
|
@@ -339,6 +342,7 @@ function prepareCrawlPayload(request) {
|
|
|
339
342
|
if (request.delay != null) data.delay = request.delay;
|
|
340
343
|
if (request.maxConcurrency != null) data.maxConcurrency = request.maxConcurrency;
|
|
341
344
|
if (request.webhook != null) data.webhook = request.webhook;
|
|
345
|
+
if (request.integration != null && request.integration.trim()) data.integration = request.integration.trim();
|
|
342
346
|
if (request.scrapeOptions) {
|
|
343
347
|
ensureValidScrapeOptions(request.scrapeOptions);
|
|
344
348
|
data.scrapeOptions = request.scrapeOptions;
|
|
@@ -465,8 +469,8 @@ async function startBatchScrape(http, urls, {
|
|
|
465
469
|
ignoreInvalidURLs,
|
|
466
470
|
maxConcurrency,
|
|
467
471
|
zeroDataRetention,
|
|
468
|
-
|
|
469
|
-
|
|
472
|
+
idempotencyKey,
|
|
473
|
+
integration
|
|
470
474
|
} = {}) {
|
|
471
475
|
if (!Array.isArray(urls) || urls.length === 0) throw new Error("URLs list cannot be empty");
|
|
472
476
|
const payload = { urls };
|
|
@@ -479,7 +483,7 @@ async function startBatchScrape(http, urls, {
|
|
|
479
483
|
if (ignoreInvalidURLs != null) payload.ignoreInvalidURLs = ignoreInvalidURLs;
|
|
480
484
|
if (maxConcurrency != null) payload.maxConcurrency = maxConcurrency;
|
|
481
485
|
if (zeroDataRetention != null) payload.zeroDataRetention = zeroDataRetention;
|
|
482
|
-
if (integration != null) payload.integration = integration;
|
|
486
|
+
if (integration != null && integration.trim()) payload.integration = integration.trim();
|
|
483
487
|
try {
|
|
484
488
|
const headers = http.prepareHeaders(idempotencyKey);
|
|
485
489
|
const res = await http.post("/v2/batch/scrape", payload, headers);
|
|
@@ -576,6 +580,7 @@ function prepareExtractPayload(args) {
|
|
|
576
580
|
if (args.enableWebSearch != null) body.enableWebSearch = args.enableWebSearch;
|
|
577
581
|
if (args.showSources != null) body.showSources = args.showSources;
|
|
578
582
|
if (args.ignoreInvalidURLs != null) body.ignoreInvalidURLs = args.ignoreInvalidURLs;
|
|
583
|
+
if (args.integration && args.integration.trim()) body.integration = args.integration.trim();
|
|
579
584
|
if (args.scrapeOptions) {
|
|
580
585
|
ensureValidScrapeOptions(args.scrapeOptions);
|
|
581
586
|
body.scrapeOptions = args.scrapeOptions;
|
|
@@ -1059,7 +1064,7 @@ var FirecrawlApp = class {
|
|
|
1059
1064
|
if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
|
|
1060
1065
|
return process.env.npm_package_version;
|
|
1061
1066
|
}
|
|
1062
|
-
const packageJson = await import("./package-
|
|
1067
|
+
const packageJson = await import("./package-FLU7SABK.js");
|
|
1063
1068
|
return packageJson.default.version;
|
|
1064
1069
|
} catch (error) {
|
|
1065
1070
|
const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);
|
package/package.json
CHANGED
package/src/v1/index.ts
CHANGED
|
@@ -102,6 +102,14 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
|
|
|
102
102
|
description?: string;
|
|
103
103
|
}
|
|
104
104
|
|
|
105
|
+
/**
|
|
106
|
+
* Location configuration for proxy location
|
|
107
|
+
*/
|
|
108
|
+
export interface LocationConfig {
|
|
109
|
+
country?: string;
|
|
110
|
+
languages?: string[];
|
|
111
|
+
}
|
|
112
|
+
|
|
105
113
|
/**
|
|
106
114
|
* Parameters for scraping operations.
|
|
107
115
|
* Defines the options and configurations available for scraping web content.
|
|
@@ -114,10 +122,7 @@ export interface CrawlScrapeOptions {
|
|
|
114
122
|
onlyMainContent?: boolean;
|
|
115
123
|
waitFor?: number;
|
|
116
124
|
timeout?: number;
|
|
117
|
-
location?:
|
|
118
|
-
country?: string;
|
|
119
|
-
languages?: string[];
|
|
120
|
-
};
|
|
125
|
+
location?: LocationConfig;
|
|
121
126
|
mobile?: boolean;
|
|
122
127
|
skipTlsVerification?: boolean;
|
|
123
128
|
removeBase64Images?: boolean;
|
|
@@ -300,6 +305,7 @@ export interface MapParams {
|
|
|
300
305
|
limit?: number;
|
|
301
306
|
timeout?: number;
|
|
302
307
|
useIndex?: boolean;
|
|
308
|
+
location?: LocationConfig;
|
|
303
309
|
}
|
|
304
310
|
|
|
305
311
|
/**
|
package/src/v2/methods/batch.ts
CHANGED
|
@@ -21,8 +21,8 @@ export async function startBatchScrape(
|
|
|
21
21
|
ignoreInvalidURLs,
|
|
22
22
|
maxConcurrency,
|
|
23
23
|
zeroDataRetention,
|
|
24
|
-
integration,
|
|
25
24
|
idempotencyKey,
|
|
25
|
+
integration,
|
|
26
26
|
}: BatchScrapeOptions = {}
|
|
27
27
|
): Promise<BatchScrapeResponse> {
|
|
28
28
|
if (!Array.isArray(urls) || urls.length === 0) throw new Error("URLs list cannot be empty");
|
|
@@ -36,7 +36,7 @@ export async function startBatchScrape(
|
|
|
36
36
|
if (ignoreInvalidURLs != null) payload.ignoreInvalidURLs = ignoreInvalidURLs;
|
|
37
37
|
if (maxConcurrency != null) payload.maxConcurrency = maxConcurrency;
|
|
38
38
|
if (zeroDataRetention != null) payload.zeroDataRetention = zeroDataRetention;
|
|
39
|
-
if (integration != null) payload.integration = integration;
|
|
39
|
+
if (integration != null && integration.trim()) payload.integration = integration.trim();
|
|
40
40
|
|
|
41
41
|
try {
|
|
42
42
|
const headers = http.prepareHeaders(idempotencyKey);
|
package/src/v2/methods/crawl.ts
CHANGED
|
@@ -33,6 +33,7 @@ function prepareCrawlPayload(request: CrawlRequest): Record<string, unknown> {
|
|
|
33
33
|
if (request.delay != null) data.delay = request.delay;
|
|
34
34
|
if (request.maxConcurrency != null) data.maxConcurrency = request.maxConcurrency;
|
|
35
35
|
if (request.webhook != null) data.webhook = request.webhook;
|
|
36
|
+
if (request.integration != null && request.integration.trim()) data.integration = request.integration.trim();
|
|
36
37
|
if (request.scrapeOptions) {
|
|
37
38
|
ensureValidScrapeOptions(request.scrapeOptions);
|
|
38
39
|
data.scrapeOptions = request.scrapeOptions;
|
|
@@ -15,6 +15,7 @@ function prepareExtractPayload(args: {
|
|
|
15
15
|
showSources?: boolean;
|
|
16
16
|
scrapeOptions?: ScrapeOptions;
|
|
17
17
|
ignoreInvalidURLs?: boolean;
|
|
18
|
+
integration?: string;
|
|
18
19
|
}): Record<string, unknown> {
|
|
19
20
|
const body: Record<string, unknown> = {};
|
|
20
21
|
if (args.urls) body.urls = args.urls;
|
|
@@ -29,6 +30,7 @@ function prepareExtractPayload(args: {
|
|
|
29
30
|
if (args.enableWebSearch != null) body.enableWebSearch = args.enableWebSearch;
|
|
30
31
|
if (args.showSources != null) body.showSources = args.showSources;
|
|
31
32
|
if (args.ignoreInvalidURLs != null) body.ignoreInvalidURLs = args.ignoreInvalidURLs;
|
|
33
|
+
if (args.integration && args.integration.trim()) body.integration = args.integration.trim();
|
|
32
34
|
if (args.scrapeOptions) {
|
|
33
35
|
ensureValidScrapeOptions(args.scrapeOptions);
|
|
34
36
|
body.scrapeOptions = args.scrapeOptions;
|
package/src/v2/methods/map.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { type MapData, type MapOptions, type
|
|
1
|
+
import { type MapData, type MapOptions, type SearchResultWeb } from "../types";
|
|
2
2
|
import { HttpClient } from "../utils/httpClient";
|
|
3
3
|
import { throwForBadResponse, normalizeAxiosError } from "../utils/errorHandler";
|
|
4
4
|
|
|
@@ -11,6 +11,8 @@ function prepareMapPayload(url: string, options?: MapOptions): Record<string, un
|
|
|
11
11
|
if (options.includeSubdomains != null) payload.includeSubdomains = options.includeSubdomains;
|
|
12
12
|
if (options.limit != null) payload.limit = options.limit;
|
|
13
13
|
if (options.timeout != null) payload.timeout = options.timeout;
|
|
14
|
+
if (options.integration != null && options.integration.trim()) payload.integration = options.integration.trim();
|
|
15
|
+
if (options.location != null) payload.location = options.location;
|
|
14
16
|
}
|
|
15
17
|
return payload;
|
|
16
18
|
}
|
|
@@ -18,12 +20,12 @@ function prepareMapPayload(url: string, options?: MapOptions): Record<string, un
|
|
|
18
20
|
export async function map(http: HttpClient, url: string, options?: MapOptions): Promise<MapData> {
|
|
19
21
|
const payload = prepareMapPayload(url, options);
|
|
20
22
|
try {
|
|
21
|
-
const res = await http.post<{ success: boolean; error?: string; links?: Array<string |
|
|
23
|
+
const res = await http.post<{ success: boolean; error?: string; links?: Array<string | SearchResultWeb> }>("/v2/map", payload);
|
|
22
24
|
if (res.status !== 200 || !res.data?.success) {
|
|
23
25
|
throwForBadResponse(res, "map");
|
|
24
26
|
}
|
|
25
27
|
const linksIn = res.data.links || [];
|
|
26
|
-
const links:
|
|
28
|
+
const links: SearchResultWeb[] = [];
|
|
27
29
|
for (const item of linksIn) {
|
|
28
30
|
if (typeof item === "string") links.push({ url: item });
|
|
29
31
|
else if (item && typeof item === "object") links.push({ url: item.url, title: (item as any).title, description: (item as any).description });
|
package/src/v2/methods/search.ts
CHANGED
|
@@ -17,6 +17,7 @@ function prepareSearchPayload(req: SearchRequest): Record<string, unknown> {
|
|
|
17
17
|
if (req.location != null) payload.location = req.location;
|
|
18
18
|
if (req.ignoreInvalidURLs != null) payload.ignoreInvalidURLs = req.ignoreInvalidURLs;
|
|
19
19
|
if (req.timeout != null) payload.timeout = req.timeout;
|
|
20
|
+
if (req.integration && req.integration.trim()) payload.integration = req.integration.trim();
|
|
20
21
|
if (req.scrapeOptions) {
|
|
21
22
|
ensureValidScrapeOptions(req.scrapeOptions as ScrapeOptions);
|
|
22
23
|
payload.scrapeOptions = req.scrapeOptions;
|
package/src/v2/types.ts
CHANGED
|
@@ -144,6 +144,7 @@ export interface ScrapeOptions {
|
|
|
144
144
|
proxy?: "basic" | "stealth" | "auto" | string;
|
|
145
145
|
maxAge?: number;
|
|
146
146
|
storeInCache?: boolean;
|
|
147
|
+
integration?: string;
|
|
147
148
|
}
|
|
148
149
|
|
|
149
150
|
export interface WebhookConfig {
|
|
@@ -247,6 +248,7 @@ export interface SearchRequest {
|
|
|
247
248
|
ignoreInvalidURLs?: boolean;
|
|
248
249
|
timeout?: number; // ms
|
|
249
250
|
scrapeOptions?: ScrapeOptions;
|
|
251
|
+
integration?: string;
|
|
250
252
|
}
|
|
251
253
|
|
|
252
254
|
export interface CrawlOptions {
|
|
@@ -265,6 +267,7 @@ export interface CrawlOptions {
|
|
|
265
267
|
webhook?: string | WebhookConfig | null;
|
|
266
268
|
scrapeOptions?: ScrapeOptions | null;
|
|
267
269
|
zeroDataRetention?: boolean;
|
|
270
|
+
integration?: string;
|
|
268
271
|
}
|
|
269
272
|
|
|
270
273
|
export interface CrawlResponse {
|
|
@@ -289,8 +292,8 @@ export interface BatchScrapeOptions {
|
|
|
289
292
|
ignoreInvalidURLs?: boolean;
|
|
290
293
|
maxConcurrency?: number;
|
|
291
294
|
zeroDataRetention?: boolean;
|
|
292
|
-
integration?: string;
|
|
293
295
|
idempotencyKey?: string;
|
|
296
|
+
integration?: string;
|
|
294
297
|
}
|
|
295
298
|
|
|
296
299
|
export interface BatchScrapeResponse {
|
|
@@ -319,6 +322,8 @@ export interface MapOptions {
|
|
|
319
322
|
includeSubdomains?: boolean;
|
|
320
323
|
limit?: number;
|
|
321
324
|
timeout?: number;
|
|
325
|
+
integration?: string;
|
|
326
|
+
location?: LocationConfig;
|
|
322
327
|
}
|
|
323
328
|
|
|
324
329
|
export interface ExtractResponse {
|