@mendable/firecrawl 4.3.2 → 4.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-QYSKXMMY.js → chunk-TIJLLR5H.js} +1 -1
- package/dist/index.cjs +9 -4
- package/dist/index.d.cts +11 -2
- package/dist/index.d.ts +11 -2
- package/dist/index.js +10 -5
- package/dist/{package-F2DQUYTW.js → package-3EYW3PGP.js} +1 -1
- package/package.json +1 -1
- package/src/v2/methods/batch.ts +2 -2
- package/src/v2/methods/crawl.ts +1 -0
- package/src/v2/methods/extract.ts +5 -1
- package/src/v2/methods/map.ts +4 -3
- package/src/v2/methods/search.ts +1 -0
- package/src/v2/types.ts +9 -1
|
@@ -8,7 +8,7 @@ var require_package = __commonJS({
|
|
|
8
8
|
"package.json"(exports, module) {
|
|
9
9
|
module.exports = {
|
|
10
10
|
name: "@mendable/firecrawl-js",
|
|
11
|
-
version: "4.3.
|
|
11
|
+
version: "4.3.4",
|
|
12
12
|
description: "JavaScript SDK for Firecrawl API",
|
|
13
13
|
main: "dist/index.js",
|
|
14
14
|
types: "dist/index.d.ts",
|
package/dist/index.cjs
CHANGED
|
@@ -35,7 +35,7 @@ var require_package = __commonJS({
|
|
|
35
35
|
"package.json"(exports2, module2) {
|
|
36
36
|
module2.exports = {
|
|
37
37
|
name: "@mendable/firecrawl-js",
|
|
38
|
-
version: "4.3.
|
|
38
|
+
version: "4.3.4",
|
|
39
39
|
description: "JavaScript SDK for Firecrawl API",
|
|
40
40
|
main: "dist/index.js",
|
|
41
41
|
types: "dist/index.d.ts",
|
|
@@ -334,6 +334,7 @@ function prepareSearchPayload(req) {
|
|
|
334
334
|
if (req.location != null) payload.location = req.location;
|
|
335
335
|
if (req.ignoreInvalidURLs != null) payload.ignoreInvalidURLs = req.ignoreInvalidURLs;
|
|
336
336
|
if (req.timeout != null) payload.timeout = req.timeout;
|
|
337
|
+
if (req.integration && req.integration.trim()) payload.integration = req.integration.trim();
|
|
337
338
|
if (req.scrapeOptions) {
|
|
338
339
|
ensureValidScrapeOptions(req.scrapeOptions);
|
|
339
340
|
payload.scrapeOptions = req.scrapeOptions;
|
|
@@ -384,6 +385,7 @@ function prepareMapPayload(url, options) {
|
|
|
384
385
|
if (options.includeSubdomains != null) payload.includeSubdomains = options.includeSubdomains;
|
|
385
386
|
if (options.limit != null) payload.limit = options.limit;
|
|
386
387
|
if (options.timeout != null) payload.timeout = options.timeout;
|
|
388
|
+
if (options.integration != null && options.integration.trim()) payload.integration = options.integration.trim();
|
|
387
389
|
if (options.location != null) payload.location = options.location;
|
|
388
390
|
}
|
|
389
391
|
return payload;
|
|
@@ -456,6 +458,7 @@ function prepareCrawlPayload(request) {
|
|
|
456
458
|
if (request.delay != null) data.delay = request.delay;
|
|
457
459
|
if (request.maxConcurrency != null) data.maxConcurrency = request.maxConcurrency;
|
|
458
460
|
if (request.webhook != null) data.webhook = request.webhook;
|
|
461
|
+
if (request.integration != null && request.integration.trim()) data.integration = request.integration.trim();
|
|
459
462
|
if (request.scrapeOptions) {
|
|
460
463
|
ensureValidScrapeOptions(request.scrapeOptions);
|
|
461
464
|
data.scrapeOptions = request.scrapeOptions;
|
|
@@ -582,8 +585,8 @@ async function startBatchScrape(http, urls, {
|
|
|
582
585
|
ignoreInvalidURLs,
|
|
583
586
|
maxConcurrency,
|
|
584
587
|
zeroDataRetention,
|
|
585
|
-
|
|
586
|
-
|
|
588
|
+
idempotencyKey,
|
|
589
|
+
integration
|
|
587
590
|
} = {}) {
|
|
588
591
|
if (!Array.isArray(urls) || urls.length === 0) throw new Error("URLs list cannot be empty");
|
|
589
592
|
const payload = { urls };
|
|
@@ -596,7 +599,7 @@ async function startBatchScrape(http, urls, {
|
|
|
596
599
|
if (ignoreInvalidURLs != null) payload.ignoreInvalidURLs = ignoreInvalidURLs;
|
|
597
600
|
if (maxConcurrency != null) payload.maxConcurrency = maxConcurrency;
|
|
598
601
|
if (zeroDataRetention != null) payload.zeroDataRetention = zeroDataRetention;
|
|
599
|
-
if (integration != null) payload.integration = integration;
|
|
602
|
+
if (integration != null && integration.trim()) payload.integration = integration.trim();
|
|
600
603
|
try {
|
|
601
604
|
const headers = http.prepareHeaders(idempotencyKey);
|
|
602
605
|
const res = await http.post("/v2/batch/scrape", payload, headers);
|
|
@@ -693,6 +696,8 @@ function prepareExtractPayload(args) {
|
|
|
693
696
|
if (args.enableWebSearch != null) body.enableWebSearch = args.enableWebSearch;
|
|
694
697
|
if (args.showSources != null) body.showSources = args.showSources;
|
|
695
698
|
if (args.ignoreInvalidURLs != null) body.ignoreInvalidURLs = args.ignoreInvalidURLs;
|
|
699
|
+
if (args.integration && args.integration.trim()) body.integration = args.integration.trim();
|
|
700
|
+
if (args.agent) body.agent = args.agent;
|
|
696
701
|
if (args.scrapeOptions) {
|
|
697
702
|
ensureValidScrapeOptions(args.scrapeOptions);
|
|
698
703
|
body.scrapeOptions = args.scrapeOptions;
|
package/dist/index.d.cts
CHANGED
|
@@ -113,6 +113,7 @@ interface ScrapeOptions {
|
|
|
113
113
|
proxy?: "basic" | "stealth" | "auto" | string;
|
|
114
114
|
maxAge?: number;
|
|
115
115
|
storeInCache?: boolean;
|
|
116
|
+
integration?: string;
|
|
116
117
|
}
|
|
117
118
|
interface WebhookConfig {
|
|
118
119
|
url: string;
|
|
@@ -207,6 +208,7 @@ interface SearchRequest {
|
|
|
207
208
|
ignoreInvalidURLs?: boolean;
|
|
208
209
|
timeout?: number;
|
|
209
210
|
scrapeOptions?: ScrapeOptions;
|
|
211
|
+
integration?: string;
|
|
210
212
|
}
|
|
211
213
|
interface CrawlOptions {
|
|
212
214
|
prompt?: string | null;
|
|
@@ -224,6 +226,7 @@ interface CrawlOptions {
|
|
|
224
226
|
webhook?: string | WebhookConfig | null;
|
|
225
227
|
scrapeOptions?: ScrapeOptions | null;
|
|
226
228
|
zeroDataRetention?: boolean;
|
|
229
|
+
integration?: string;
|
|
227
230
|
}
|
|
228
231
|
interface CrawlResponse$1 {
|
|
229
232
|
id: string;
|
|
@@ -245,8 +248,8 @@ interface BatchScrapeOptions {
|
|
|
245
248
|
ignoreInvalidURLs?: boolean;
|
|
246
249
|
maxConcurrency?: number;
|
|
247
250
|
zeroDataRetention?: boolean;
|
|
248
|
-
integration?: string;
|
|
249
251
|
idempotencyKey?: string;
|
|
252
|
+
integration?: string;
|
|
250
253
|
}
|
|
251
254
|
interface BatchScrapeResponse$1 {
|
|
252
255
|
id: string;
|
|
@@ -271,6 +274,7 @@ interface MapOptions {
|
|
|
271
274
|
includeSubdomains?: boolean;
|
|
272
275
|
limit?: number;
|
|
273
276
|
timeout?: number;
|
|
277
|
+
integration?: string;
|
|
274
278
|
location?: LocationConfig$1;
|
|
275
279
|
}
|
|
276
280
|
interface ExtractResponse$1 {
|
|
@@ -283,6 +287,9 @@ interface ExtractResponse$1 {
|
|
|
283
287
|
sources?: Record<string, unknown>;
|
|
284
288
|
expiresAt?: string;
|
|
285
289
|
}
|
|
290
|
+
interface AgentOptions$1 {
|
|
291
|
+
model: "FIRE-1";
|
|
292
|
+
}
|
|
286
293
|
interface ConcurrencyCheck {
|
|
287
294
|
concurrency: number;
|
|
288
295
|
maxConcurrency: number;
|
|
@@ -394,6 +401,8 @@ declare function prepareExtractPayload(args: {
|
|
|
394
401
|
showSources?: boolean;
|
|
395
402
|
scrapeOptions?: ScrapeOptions;
|
|
396
403
|
ignoreInvalidURLs?: boolean;
|
|
404
|
+
integration?: string;
|
|
405
|
+
agent?: AgentOptions$1;
|
|
397
406
|
}): Record<string, unknown>;
|
|
398
407
|
declare function startExtract(http: HttpClient, args: Parameters<typeof prepareExtractPayload>[0]): Promise<ExtractResponse$1>;
|
|
399
408
|
|
|
@@ -1493,4 +1502,4 @@ declare class Firecrawl extends FirecrawlClient {
|
|
|
1493
1502
|
get v1(): FirecrawlApp;
|
|
1494
1503
|
}
|
|
1495
1504
|
|
|
1496
|
-
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type PressAction, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
|
|
1505
|
+
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type PressAction, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
|
package/dist/index.d.ts
CHANGED
|
@@ -113,6 +113,7 @@ interface ScrapeOptions {
|
|
|
113
113
|
proxy?: "basic" | "stealth" | "auto" | string;
|
|
114
114
|
maxAge?: number;
|
|
115
115
|
storeInCache?: boolean;
|
|
116
|
+
integration?: string;
|
|
116
117
|
}
|
|
117
118
|
interface WebhookConfig {
|
|
118
119
|
url: string;
|
|
@@ -207,6 +208,7 @@ interface SearchRequest {
|
|
|
207
208
|
ignoreInvalidURLs?: boolean;
|
|
208
209
|
timeout?: number;
|
|
209
210
|
scrapeOptions?: ScrapeOptions;
|
|
211
|
+
integration?: string;
|
|
210
212
|
}
|
|
211
213
|
interface CrawlOptions {
|
|
212
214
|
prompt?: string | null;
|
|
@@ -224,6 +226,7 @@ interface CrawlOptions {
|
|
|
224
226
|
webhook?: string | WebhookConfig | null;
|
|
225
227
|
scrapeOptions?: ScrapeOptions | null;
|
|
226
228
|
zeroDataRetention?: boolean;
|
|
229
|
+
integration?: string;
|
|
227
230
|
}
|
|
228
231
|
interface CrawlResponse$1 {
|
|
229
232
|
id: string;
|
|
@@ -245,8 +248,8 @@ interface BatchScrapeOptions {
|
|
|
245
248
|
ignoreInvalidURLs?: boolean;
|
|
246
249
|
maxConcurrency?: number;
|
|
247
250
|
zeroDataRetention?: boolean;
|
|
248
|
-
integration?: string;
|
|
249
251
|
idempotencyKey?: string;
|
|
252
|
+
integration?: string;
|
|
250
253
|
}
|
|
251
254
|
interface BatchScrapeResponse$1 {
|
|
252
255
|
id: string;
|
|
@@ -271,6 +274,7 @@ interface MapOptions {
|
|
|
271
274
|
includeSubdomains?: boolean;
|
|
272
275
|
limit?: number;
|
|
273
276
|
timeout?: number;
|
|
277
|
+
integration?: string;
|
|
274
278
|
location?: LocationConfig$1;
|
|
275
279
|
}
|
|
276
280
|
interface ExtractResponse$1 {
|
|
@@ -283,6 +287,9 @@ interface ExtractResponse$1 {
|
|
|
283
287
|
sources?: Record<string, unknown>;
|
|
284
288
|
expiresAt?: string;
|
|
285
289
|
}
|
|
290
|
+
interface AgentOptions$1 {
|
|
291
|
+
model: "FIRE-1";
|
|
292
|
+
}
|
|
286
293
|
interface ConcurrencyCheck {
|
|
287
294
|
concurrency: number;
|
|
288
295
|
maxConcurrency: number;
|
|
@@ -394,6 +401,8 @@ declare function prepareExtractPayload(args: {
|
|
|
394
401
|
showSources?: boolean;
|
|
395
402
|
scrapeOptions?: ScrapeOptions;
|
|
396
403
|
ignoreInvalidURLs?: boolean;
|
|
404
|
+
integration?: string;
|
|
405
|
+
agent?: AgentOptions$1;
|
|
397
406
|
}): Record<string, unknown>;
|
|
398
407
|
declare function startExtract(http: HttpClient, args: Parameters<typeof prepareExtractPayload>[0]): Promise<ExtractResponse$1>;
|
|
399
408
|
|
|
@@ -1493,4 +1502,4 @@ declare class Firecrawl extends FirecrawlClient {
|
|
|
1493
1502
|
get v1(): FirecrawlApp;
|
|
1494
1503
|
}
|
|
1495
1504
|
|
|
1496
|
-
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type PressAction, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
|
|
1505
|
+
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type PressAction, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
require_package
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-TIJLLR5H.js";
|
|
4
4
|
|
|
5
5
|
// src/v2/utils/httpClient.ts
|
|
6
6
|
import axios from "axios";
|
|
@@ -218,6 +218,7 @@ function prepareSearchPayload(req) {
|
|
|
218
218
|
if (req.location != null) payload.location = req.location;
|
|
219
219
|
if (req.ignoreInvalidURLs != null) payload.ignoreInvalidURLs = req.ignoreInvalidURLs;
|
|
220
220
|
if (req.timeout != null) payload.timeout = req.timeout;
|
|
221
|
+
if (req.integration && req.integration.trim()) payload.integration = req.integration.trim();
|
|
221
222
|
if (req.scrapeOptions) {
|
|
222
223
|
ensureValidScrapeOptions(req.scrapeOptions);
|
|
223
224
|
payload.scrapeOptions = req.scrapeOptions;
|
|
@@ -268,6 +269,7 @@ function prepareMapPayload(url, options) {
|
|
|
268
269
|
if (options.includeSubdomains != null) payload.includeSubdomains = options.includeSubdomains;
|
|
269
270
|
if (options.limit != null) payload.limit = options.limit;
|
|
270
271
|
if (options.timeout != null) payload.timeout = options.timeout;
|
|
272
|
+
if (options.integration != null && options.integration.trim()) payload.integration = options.integration.trim();
|
|
271
273
|
if (options.location != null) payload.location = options.location;
|
|
272
274
|
}
|
|
273
275
|
return payload;
|
|
@@ -340,6 +342,7 @@ function prepareCrawlPayload(request) {
|
|
|
340
342
|
if (request.delay != null) data.delay = request.delay;
|
|
341
343
|
if (request.maxConcurrency != null) data.maxConcurrency = request.maxConcurrency;
|
|
342
344
|
if (request.webhook != null) data.webhook = request.webhook;
|
|
345
|
+
if (request.integration != null && request.integration.trim()) data.integration = request.integration.trim();
|
|
343
346
|
if (request.scrapeOptions) {
|
|
344
347
|
ensureValidScrapeOptions(request.scrapeOptions);
|
|
345
348
|
data.scrapeOptions = request.scrapeOptions;
|
|
@@ -466,8 +469,8 @@ async function startBatchScrape(http, urls, {
|
|
|
466
469
|
ignoreInvalidURLs,
|
|
467
470
|
maxConcurrency,
|
|
468
471
|
zeroDataRetention,
|
|
469
|
-
|
|
470
|
-
|
|
472
|
+
idempotencyKey,
|
|
473
|
+
integration
|
|
471
474
|
} = {}) {
|
|
472
475
|
if (!Array.isArray(urls) || urls.length === 0) throw new Error("URLs list cannot be empty");
|
|
473
476
|
const payload = { urls };
|
|
@@ -480,7 +483,7 @@ async function startBatchScrape(http, urls, {
|
|
|
480
483
|
if (ignoreInvalidURLs != null) payload.ignoreInvalidURLs = ignoreInvalidURLs;
|
|
481
484
|
if (maxConcurrency != null) payload.maxConcurrency = maxConcurrency;
|
|
482
485
|
if (zeroDataRetention != null) payload.zeroDataRetention = zeroDataRetention;
|
|
483
|
-
if (integration != null) payload.integration = integration;
|
|
486
|
+
if (integration != null && integration.trim()) payload.integration = integration.trim();
|
|
484
487
|
try {
|
|
485
488
|
const headers = http.prepareHeaders(idempotencyKey);
|
|
486
489
|
const res = await http.post("/v2/batch/scrape", payload, headers);
|
|
@@ -577,6 +580,8 @@ function prepareExtractPayload(args) {
|
|
|
577
580
|
if (args.enableWebSearch != null) body.enableWebSearch = args.enableWebSearch;
|
|
578
581
|
if (args.showSources != null) body.showSources = args.showSources;
|
|
579
582
|
if (args.ignoreInvalidURLs != null) body.ignoreInvalidURLs = args.ignoreInvalidURLs;
|
|
583
|
+
if (args.integration && args.integration.trim()) body.integration = args.integration.trim();
|
|
584
|
+
if (args.agent) body.agent = args.agent;
|
|
580
585
|
if (args.scrapeOptions) {
|
|
581
586
|
ensureValidScrapeOptions(args.scrapeOptions);
|
|
582
587
|
body.scrapeOptions = args.scrapeOptions;
|
|
@@ -1060,7 +1065,7 @@ var FirecrawlApp = class {
|
|
|
1060
1065
|
if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
|
|
1061
1066
|
return process.env.npm_package_version;
|
|
1062
1067
|
}
|
|
1063
|
-
const packageJson = await import("./package-
|
|
1068
|
+
const packageJson = await import("./package-3EYW3PGP.js");
|
|
1064
1069
|
return packageJson.default.version;
|
|
1065
1070
|
} catch (error) {
|
|
1066
1071
|
const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);
|
package/package.json
CHANGED
package/src/v2/methods/batch.ts
CHANGED
|
@@ -21,8 +21,8 @@ export async function startBatchScrape(
|
|
|
21
21
|
ignoreInvalidURLs,
|
|
22
22
|
maxConcurrency,
|
|
23
23
|
zeroDataRetention,
|
|
24
|
-
integration,
|
|
25
24
|
idempotencyKey,
|
|
25
|
+
integration,
|
|
26
26
|
}: BatchScrapeOptions = {}
|
|
27
27
|
): Promise<BatchScrapeResponse> {
|
|
28
28
|
if (!Array.isArray(urls) || urls.length === 0) throw new Error("URLs list cannot be empty");
|
|
@@ -36,7 +36,7 @@ export async function startBatchScrape(
|
|
|
36
36
|
if (ignoreInvalidURLs != null) payload.ignoreInvalidURLs = ignoreInvalidURLs;
|
|
37
37
|
if (maxConcurrency != null) payload.maxConcurrency = maxConcurrency;
|
|
38
38
|
if (zeroDataRetention != null) payload.zeroDataRetention = zeroDataRetention;
|
|
39
|
-
if (integration != null) payload.integration = integration;
|
|
39
|
+
if (integration != null && integration.trim()) payload.integration = integration.trim();
|
|
40
40
|
|
|
41
41
|
try {
|
|
42
42
|
const headers = http.prepareHeaders(idempotencyKey);
|
package/src/v2/methods/crawl.ts
CHANGED
|
@@ -33,6 +33,7 @@ function prepareCrawlPayload(request: CrawlRequest): Record<string, unknown> {
|
|
|
33
33
|
if (request.delay != null) data.delay = request.delay;
|
|
34
34
|
if (request.maxConcurrency != null) data.maxConcurrency = request.maxConcurrency;
|
|
35
35
|
if (request.webhook != null) data.webhook = request.webhook;
|
|
36
|
+
if (request.integration != null && request.integration.trim()) data.integration = request.integration.trim();
|
|
36
37
|
if (request.scrapeOptions) {
|
|
37
38
|
ensureValidScrapeOptions(request.scrapeOptions);
|
|
38
39
|
data.scrapeOptions = request.scrapeOptions;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { type ExtractResponse, type ScrapeOptions } from "../types";
|
|
1
|
+
import { type ExtractResponse, type ScrapeOptions, type AgentOptions } from "../types";
|
|
2
2
|
import { HttpClient } from "../utils/httpClient";
|
|
3
3
|
import { ensureValidScrapeOptions } from "../utils/validation";
|
|
4
4
|
import { normalizeAxiosError, throwForBadResponse } from "../utils/errorHandler";
|
|
@@ -15,6 +15,8 @@ function prepareExtractPayload(args: {
|
|
|
15
15
|
showSources?: boolean;
|
|
16
16
|
scrapeOptions?: ScrapeOptions;
|
|
17
17
|
ignoreInvalidURLs?: boolean;
|
|
18
|
+
integration?: string;
|
|
19
|
+
agent?: AgentOptions;
|
|
18
20
|
}): Record<string, unknown> {
|
|
19
21
|
const body: Record<string, unknown> = {};
|
|
20
22
|
if (args.urls) body.urls = args.urls;
|
|
@@ -29,6 +31,8 @@ function prepareExtractPayload(args: {
|
|
|
29
31
|
if (args.enableWebSearch != null) body.enableWebSearch = args.enableWebSearch;
|
|
30
32
|
if (args.showSources != null) body.showSources = args.showSources;
|
|
31
33
|
if (args.ignoreInvalidURLs != null) body.ignoreInvalidURLs = args.ignoreInvalidURLs;
|
|
34
|
+
if (args.integration && args.integration.trim()) body.integration = args.integration.trim();
|
|
35
|
+
if (args.agent) body.agent = args.agent;
|
|
32
36
|
if (args.scrapeOptions) {
|
|
33
37
|
ensureValidScrapeOptions(args.scrapeOptions);
|
|
34
38
|
body.scrapeOptions = args.scrapeOptions;
|
package/src/v2/methods/map.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { type MapData, type MapOptions, type
|
|
1
|
+
import { type MapData, type MapOptions, type SearchResultWeb } from "../types";
|
|
2
2
|
import { HttpClient } from "../utils/httpClient";
|
|
3
3
|
import { throwForBadResponse, normalizeAxiosError } from "../utils/errorHandler";
|
|
4
4
|
|
|
@@ -11,6 +11,7 @@ function prepareMapPayload(url: string, options?: MapOptions): Record<string, un
|
|
|
11
11
|
if (options.includeSubdomains != null) payload.includeSubdomains = options.includeSubdomains;
|
|
12
12
|
if (options.limit != null) payload.limit = options.limit;
|
|
13
13
|
if (options.timeout != null) payload.timeout = options.timeout;
|
|
14
|
+
if (options.integration != null && options.integration.trim()) payload.integration = options.integration.trim();
|
|
14
15
|
if (options.location != null) payload.location = options.location;
|
|
15
16
|
}
|
|
16
17
|
return payload;
|
|
@@ -19,12 +20,12 @@ function prepareMapPayload(url: string, options?: MapOptions): Record<string, un
|
|
|
19
20
|
export async function map(http: HttpClient, url: string, options?: MapOptions): Promise<MapData> {
|
|
20
21
|
const payload = prepareMapPayload(url, options);
|
|
21
22
|
try {
|
|
22
|
-
const res = await http.post<{ success: boolean; error?: string; links?: Array<string |
|
|
23
|
+
const res = await http.post<{ success: boolean; error?: string; links?: Array<string | SearchResultWeb> }>("/v2/map", payload);
|
|
23
24
|
if (res.status !== 200 || !res.data?.success) {
|
|
24
25
|
throwForBadResponse(res, "map");
|
|
25
26
|
}
|
|
26
27
|
const linksIn = res.data.links || [];
|
|
27
|
-
const links:
|
|
28
|
+
const links: SearchResultWeb[] = [];
|
|
28
29
|
for (const item of linksIn) {
|
|
29
30
|
if (typeof item === "string") links.push({ url: item });
|
|
30
31
|
else if (item && typeof item === "object") links.push({ url: item.url, title: (item as any).title, description: (item as any).description });
|
package/src/v2/methods/search.ts
CHANGED
|
@@ -17,6 +17,7 @@ function prepareSearchPayload(req: SearchRequest): Record<string, unknown> {
|
|
|
17
17
|
if (req.location != null) payload.location = req.location;
|
|
18
18
|
if (req.ignoreInvalidURLs != null) payload.ignoreInvalidURLs = req.ignoreInvalidURLs;
|
|
19
19
|
if (req.timeout != null) payload.timeout = req.timeout;
|
|
20
|
+
if (req.integration && req.integration.trim()) payload.integration = req.integration.trim();
|
|
20
21
|
if (req.scrapeOptions) {
|
|
21
22
|
ensureValidScrapeOptions(req.scrapeOptions as ScrapeOptions);
|
|
22
23
|
payload.scrapeOptions = req.scrapeOptions;
|
package/src/v2/types.ts
CHANGED
|
@@ -144,6 +144,7 @@ export interface ScrapeOptions {
|
|
|
144
144
|
proxy?: "basic" | "stealth" | "auto" | string;
|
|
145
145
|
maxAge?: number;
|
|
146
146
|
storeInCache?: boolean;
|
|
147
|
+
integration?: string;
|
|
147
148
|
}
|
|
148
149
|
|
|
149
150
|
export interface WebhookConfig {
|
|
@@ -247,6 +248,7 @@ export interface SearchRequest {
|
|
|
247
248
|
ignoreInvalidURLs?: boolean;
|
|
248
249
|
timeout?: number; // ms
|
|
249
250
|
scrapeOptions?: ScrapeOptions;
|
|
251
|
+
integration?: string;
|
|
250
252
|
}
|
|
251
253
|
|
|
252
254
|
export interface CrawlOptions {
|
|
@@ -265,6 +267,7 @@ export interface CrawlOptions {
|
|
|
265
267
|
webhook?: string | WebhookConfig | null;
|
|
266
268
|
scrapeOptions?: ScrapeOptions | null;
|
|
267
269
|
zeroDataRetention?: boolean;
|
|
270
|
+
integration?: string;
|
|
268
271
|
}
|
|
269
272
|
|
|
270
273
|
export interface CrawlResponse {
|
|
@@ -289,8 +292,8 @@ export interface BatchScrapeOptions {
|
|
|
289
292
|
ignoreInvalidURLs?: boolean;
|
|
290
293
|
maxConcurrency?: number;
|
|
291
294
|
zeroDataRetention?: boolean;
|
|
292
|
-
integration?: string;
|
|
293
295
|
idempotencyKey?: string;
|
|
296
|
+
integration?: string;
|
|
294
297
|
}
|
|
295
298
|
|
|
296
299
|
export interface BatchScrapeResponse {
|
|
@@ -319,6 +322,7 @@ export interface MapOptions {
|
|
|
319
322
|
includeSubdomains?: boolean;
|
|
320
323
|
limit?: number;
|
|
321
324
|
timeout?: number;
|
|
325
|
+
integration?: string;
|
|
322
326
|
location?: LocationConfig;
|
|
323
327
|
}
|
|
324
328
|
|
|
@@ -333,6 +337,10 @@ export interface ExtractResponse {
|
|
|
333
337
|
expiresAt?: string;
|
|
334
338
|
}
|
|
335
339
|
|
|
340
|
+
export interface AgentOptions {
|
|
341
|
+
model: "FIRE-1";
|
|
342
|
+
}
|
|
343
|
+
|
|
336
344
|
export interface ConcurrencyCheck {
|
|
337
345
|
concurrency: number;
|
|
338
346
|
maxConcurrency: number;
|