firecrawl 3.0.2 → 3.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +0 -0
- package/dist/{chunk-JFWW4BWA.js → chunk-OIZ6OKY4.js} +2 -2
- package/dist/index.cjs +9 -8
- package/dist/index.d.cts +34 -39
- package/dist/index.d.ts +34 -39
- package/dist/index.js +5 -4
- package/dist/{package-KYZ3HXR5.js → package-V5IPFKBE.js} +1 -1
- package/jest.config.js +0 -0
- package/package.json +1 -1
- package/src/v2/client.ts +11 -9
- package/src/v2/methods/batch.ts +3 -15
- package/src/v2/methods/crawl.ts +3 -19
- package/src/v2/types.ts +29 -0
- package/tsup.config.ts +0 -0
- package/dump.rdb +0 -0
package/LICENSE
CHANGED
|
File without changes
|
|
@@ -7,8 +7,8 @@ var __commonJS = (cb, mod) => function __require() {
|
|
|
7
7
|
var require_package = __commonJS({
|
|
8
8
|
"package.json"(exports, module) {
|
|
9
9
|
module.exports = {
|
|
10
|
-
name: "firecrawl",
|
|
11
|
-
version: "3.0.
|
|
10
|
+
name: "@mendable/firecrawl-js",
|
|
11
|
+
version: "3.0.3",
|
|
12
12
|
description: "JavaScript SDK for Firecrawl API",
|
|
13
13
|
main: "dist/index.js",
|
|
14
14
|
types: "dist/index.d.ts",
|
package/dist/index.cjs
CHANGED
|
@@ -34,8 +34,8 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
34
34
|
var require_package = __commonJS({
|
|
35
35
|
"package.json"(exports2, module2) {
|
|
36
36
|
module2.exports = {
|
|
37
|
-
name: "firecrawl",
|
|
38
|
-
version: "3.0.
|
|
37
|
+
name: "@mendable/firecrawl-js",
|
|
38
|
+
version: "3.0.3",
|
|
39
39
|
description: "JavaScript SDK for Firecrawl API",
|
|
40
40
|
main: "dist/index.js",
|
|
41
41
|
types: "dist/index.d.ts",
|
|
@@ -108,15 +108,15 @@ var require_package = __commonJS({
|
|
|
108
108
|
});
|
|
109
109
|
|
|
110
110
|
// src/index.ts
|
|
111
|
-
var
|
|
112
|
-
__export(
|
|
111
|
+
var index_exports = {};
|
|
112
|
+
__export(index_exports, {
|
|
113
113
|
Firecrawl: () => Firecrawl,
|
|
114
114
|
FirecrawlAppV1: () => FirecrawlApp,
|
|
115
115
|
FirecrawlClient: () => FirecrawlClient,
|
|
116
116
|
SdkError: () => SdkError,
|
|
117
|
-
default: () =>
|
|
117
|
+
default: () => index_default
|
|
118
118
|
});
|
|
119
|
-
module.exports = __toCommonJS(
|
|
119
|
+
module.exports = __toCommonJS(index_exports);
|
|
120
120
|
|
|
121
121
|
// src/v2/utils/httpClient.ts
|
|
122
122
|
var import_axios = __toESM(require("axios"), 1);
|
|
@@ -826,6 +826,7 @@ var Watcher = class extends import_events.EventEmitter {
|
|
|
826
826
|
};
|
|
827
827
|
|
|
828
828
|
// src/v2/client.ts
|
|
829
|
+
var zt = require("zod");
|
|
829
830
|
var FirecrawlClient = class {
|
|
830
831
|
http;
|
|
831
832
|
/**
|
|
@@ -1015,7 +1016,7 @@ var FirecrawlClient = class {
|
|
|
1015
1016
|
|
|
1016
1017
|
// src/v1/index.ts
|
|
1017
1018
|
var import_axios3 = __toESM(require("axios"), 1);
|
|
1018
|
-
var
|
|
1019
|
+
var zt2 = require("zod");
|
|
1019
1020
|
var import_zod_to_json_schema3 = require("zod-to-json-schema");
|
|
1020
1021
|
|
|
1021
1022
|
// node_modules/typescript-event-target/dist/index.mjs
|
|
@@ -2354,7 +2355,7 @@ var Firecrawl = class extends FirecrawlClient {
|
|
|
2354
2355
|
return this._v1;
|
|
2355
2356
|
}
|
|
2356
2357
|
};
|
|
2357
|
-
var
|
|
2358
|
+
var index_default = Firecrawl;
|
|
2358
2359
|
// Annotate the CommonJS export names for ESM import in node:
|
|
2359
2360
|
0 && (module.exports = {
|
|
2360
2361
|
Firecrawl,
|
package/dist/index.d.cts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import * as zt from 'zod';
|
|
2
|
-
import { ZodTypeAny
|
|
2
|
+
import { ZodTypeAny } from 'zod';
|
|
3
3
|
import { AxiosResponse, AxiosRequestHeaders } from 'axios';
|
|
4
4
|
import { EventEmitter } from 'events';
|
|
5
5
|
import { TypedEventTarget } from 'typescript-event-target';
|
|
@@ -160,6 +160,23 @@ interface SearchRequest {
|
|
|
160
160
|
timeout?: number;
|
|
161
161
|
scrapeOptions?: ScrapeOptions;
|
|
162
162
|
}
|
|
163
|
+
interface CrawlOptions {
|
|
164
|
+
prompt?: string | null;
|
|
165
|
+
excludePaths?: string[] | null;
|
|
166
|
+
includePaths?: string[] | null;
|
|
167
|
+
maxDiscoveryDepth?: number | null;
|
|
168
|
+
sitemap?: "skip" | "include";
|
|
169
|
+
ignoreQueryParameters?: boolean;
|
|
170
|
+
limit?: number | null;
|
|
171
|
+
crawlEntireDomain?: boolean;
|
|
172
|
+
allowExternalLinks?: boolean;
|
|
173
|
+
allowSubdomains?: boolean;
|
|
174
|
+
delay?: number | null;
|
|
175
|
+
maxConcurrency?: number | null;
|
|
176
|
+
webhook?: string | WebhookConfig | null;
|
|
177
|
+
scrapeOptions?: ScrapeOptions | null;
|
|
178
|
+
zeroDataRetention?: boolean;
|
|
179
|
+
}
|
|
163
180
|
interface CrawlResponse$1 {
|
|
164
181
|
id: string;
|
|
165
182
|
url: string;
|
|
@@ -173,6 +190,16 @@ interface CrawlJob {
|
|
|
173
190
|
next?: string | null;
|
|
174
191
|
data: Document[];
|
|
175
192
|
}
|
|
193
|
+
interface BatchScrapeOptions {
|
|
194
|
+
options?: ScrapeOptions;
|
|
195
|
+
webhook?: string | WebhookConfig;
|
|
196
|
+
appendToId?: string;
|
|
197
|
+
ignoreInvalidURLs?: boolean;
|
|
198
|
+
maxConcurrency?: number;
|
|
199
|
+
zeroDataRetention?: boolean;
|
|
200
|
+
integration?: string;
|
|
201
|
+
idempotencyKey?: string;
|
|
202
|
+
}
|
|
176
203
|
interface BatchScrapeResponse$1 {
|
|
177
204
|
id: string;
|
|
178
205
|
url: string;
|
|
@@ -274,38 +301,6 @@ declare class HttpClient {
|
|
|
274
301
|
prepareHeaders(idempotencyKey?: string): Record<string, string>;
|
|
275
302
|
}
|
|
276
303
|
|
|
277
|
-
interface CrawlRequest {
|
|
278
|
-
url: string;
|
|
279
|
-
prompt?: string | null;
|
|
280
|
-
excludePaths?: string[] | null;
|
|
281
|
-
includePaths?: string[] | null;
|
|
282
|
-
maxDiscoveryDepth?: number | null;
|
|
283
|
-
sitemap?: "skip" | "include";
|
|
284
|
-
ignoreQueryParameters?: boolean;
|
|
285
|
-
limit?: number | null;
|
|
286
|
-
crawlEntireDomain?: boolean;
|
|
287
|
-
allowExternalLinks?: boolean;
|
|
288
|
-
allowSubdomains?: boolean;
|
|
289
|
-
delay?: number | null;
|
|
290
|
-
maxConcurrency?: number | null;
|
|
291
|
-
webhook?: string | WebhookConfig | null;
|
|
292
|
-
scrapeOptions?: ScrapeOptions | null;
|
|
293
|
-
zeroDataRetention?: boolean;
|
|
294
|
-
}
|
|
295
|
-
declare function startCrawl(http: HttpClient, request: CrawlRequest): Promise<CrawlResponse$1>;
|
|
296
|
-
|
|
297
|
-
interface StartBatchOptions {
|
|
298
|
-
options?: ScrapeOptions;
|
|
299
|
-
webhook?: string | WebhookConfig;
|
|
300
|
-
appendToId?: string;
|
|
301
|
-
ignoreInvalidURLs?: boolean;
|
|
302
|
-
maxConcurrency?: number;
|
|
303
|
-
zeroDataRetention?: boolean;
|
|
304
|
-
integration?: string;
|
|
305
|
-
idempotencyKey?: string;
|
|
306
|
-
}
|
|
307
|
-
declare function startBatchScrape(http: HttpClient, urls: string[], { options, webhook, appendToId, ignoreInvalidURLs, maxConcurrency, zeroDataRetention, integration, idempotencyKey, }?: StartBatchOptions): Promise<BatchScrapeResponse$1>;
|
|
308
|
-
|
|
309
304
|
declare function prepareExtractPayload(args: {
|
|
310
305
|
urls?: string[];
|
|
311
306
|
prompt?: string;
|
|
@@ -349,7 +344,7 @@ type ExtractJsonSchemaFromFormats<Formats> = Formats extends readonly any[] ? Ex
|
|
|
349
344
|
}>["schema"] : never;
|
|
350
345
|
type InferredJsonFromOptions<Opts> = Opts extends {
|
|
351
346
|
formats?: infer Fmts;
|
|
352
|
-
} ? ExtractJsonSchemaFromFormats<Fmts> extends ZodTypeAny ? infer<ExtractJsonSchemaFromFormats<Fmts>> : unknown : unknown;
|
|
347
|
+
} ? ExtractJsonSchemaFromFormats<Fmts> extends zt.ZodTypeAny ? zt.infer<ExtractJsonSchemaFromFormats<Fmts>> : unknown : unknown;
|
|
353
348
|
/**
|
|
354
349
|
* Configuration for the v2 client transport.
|
|
355
350
|
*/
|
|
@@ -405,7 +400,7 @@ declare class FirecrawlClient {
|
|
|
405
400
|
* @param req Crawl configuration (paths, limits, scrapeOptions, webhook, etc.).
|
|
406
401
|
* @returns Job id and url.
|
|
407
402
|
*/
|
|
408
|
-
startCrawl(url: string, req?:
|
|
403
|
+
startCrawl(url: string, req?: CrawlOptions): Promise<CrawlResponse$1>;
|
|
409
404
|
/**
|
|
410
405
|
* Get the status and partial data of a crawl job.
|
|
411
406
|
* @param jobId Crawl job id.
|
|
@@ -423,7 +418,7 @@ declare class FirecrawlClient {
|
|
|
423
418
|
* @param req Crawl configuration plus waiter controls (pollInterval, timeout seconds).
|
|
424
419
|
* @returns Final job snapshot.
|
|
425
420
|
*/
|
|
426
|
-
crawl(url: string, req?:
|
|
421
|
+
crawl(url: string, req?: CrawlOptions & {
|
|
427
422
|
pollInterval?: number;
|
|
428
423
|
timeout?: number;
|
|
429
424
|
}): Promise<CrawlJob>;
|
|
@@ -448,7 +443,7 @@ declare class FirecrawlClient {
|
|
|
448
443
|
* @param opts Batch options (scrape options, webhook, concurrency, idempotency key, etc.).
|
|
449
444
|
* @returns Job id and url.
|
|
450
445
|
*/
|
|
451
|
-
startBatchScrape(urls: string[], opts?:
|
|
446
|
+
startBatchScrape(urls: string[], opts?: BatchScrapeOptions): Promise<BatchScrapeResponse$1>;
|
|
452
447
|
/**
|
|
453
448
|
* Get the status and partial data of a batch scrape job.
|
|
454
449
|
* @param jobId Batch job id.
|
|
@@ -471,7 +466,7 @@ declare class FirecrawlClient {
|
|
|
471
466
|
* @param opts Batch options plus waiter controls (pollInterval, timeout seconds).
|
|
472
467
|
* @returns Final job snapshot.
|
|
473
468
|
*/
|
|
474
|
-
batchScrape(urls: string[], opts?:
|
|
469
|
+
batchScrape(urls: string[], opts?: BatchScrapeOptions & {
|
|
475
470
|
pollInterval?: number;
|
|
476
471
|
timeout?: number;
|
|
477
472
|
}): Promise<BatchScrapeJob>;
|
|
@@ -1331,4 +1326,4 @@ declare class Firecrawl extends FirecrawlClient {
|
|
|
1331
1326
|
get v1(): FirecrawlApp;
|
|
1332
1327
|
}
|
|
1333
1328
|
|
|
1334
|
-
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type BatchScrapeJob, type BatchScrapeResponse$1 as BatchScrapeResponse, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig, type MapData, type MapOptions, type PDFAction, type PressAction, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResult, type TokenUsage, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
|
|
1329
|
+
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig, type MapData, type MapOptions, type PDFAction, type PressAction, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResult, type TokenUsage, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import * as zt from 'zod';
|
|
2
|
-
import { ZodTypeAny
|
|
2
|
+
import { ZodTypeAny } from 'zod';
|
|
3
3
|
import { AxiosResponse, AxiosRequestHeaders } from 'axios';
|
|
4
4
|
import { EventEmitter } from 'events';
|
|
5
5
|
import { TypedEventTarget } from 'typescript-event-target';
|
|
@@ -160,6 +160,23 @@ interface SearchRequest {
|
|
|
160
160
|
timeout?: number;
|
|
161
161
|
scrapeOptions?: ScrapeOptions;
|
|
162
162
|
}
|
|
163
|
+
interface CrawlOptions {
|
|
164
|
+
prompt?: string | null;
|
|
165
|
+
excludePaths?: string[] | null;
|
|
166
|
+
includePaths?: string[] | null;
|
|
167
|
+
maxDiscoveryDepth?: number | null;
|
|
168
|
+
sitemap?: "skip" | "include";
|
|
169
|
+
ignoreQueryParameters?: boolean;
|
|
170
|
+
limit?: number | null;
|
|
171
|
+
crawlEntireDomain?: boolean;
|
|
172
|
+
allowExternalLinks?: boolean;
|
|
173
|
+
allowSubdomains?: boolean;
|
|
174
|
+
delay?: number | null;
|
|
175
|
+
maxConcurrency?: number | null;
|
|
176
|
+
webhook?: string | WebhookConfig | null;
|
|
177
|
+
scrapeOptions?: ScrapeOptions | null;
|
|
178
|
+
zeroDataRetention?: boolean;
|
|
179
|
+
}
|
|
163
180
|
interface CrawlResponse$1 {
|
|
164
181
|
id: string;
|
|
165
182
|
url: string;
|
|
@@ -173,6 +190,16 @@ interface CrawlJob {
|
|
|
173
190
|
next?: string | null;
|
|
174
191
|
data: Document[];
|
|
175
192
|
}
|
|
193
|
+
interface BatchScrapeOptions {
|
|
194
|
+
options?: ScrapeOptions;
|
|
195
|
+
webhook?: string | WebhookConfig;
|
|
196
|
+
appendToId?: string;
|
|
197
|
+
ignoreInvalidURLs?: boolean;
|
|
198
|
+
maxConcurrency?: number;
|
|
199
|
+
zeroDataRetention?: boolean;
|
|
200
|
+
integration?: string;
|
|
201
|
+
idempotencyKey?: string;
|
|
202
|
+
}
|
|
176
203
|
interface BatchScrapeResponse$1 {
|
|
177
204
|
id: string;
|
|
178
205
|
url: string;
|
|
@@ -274,38 +301,6 @@ declare class HttpClient {
|
|
|
274
301
|
prepareHeaders(idempotencyKey?: string): Record<string, string>;
|
|
275
302
|
}
|
|
276
303
|
|
|
277
|
-
interface CrawlRequest {
|
|
278
|
-
url: string;
|
|
279
|
-
prompt?: string | null;
|
|
280
|
-
excludePaths?: string[] | null;
|
|
281
|
-
includePaths?: string[] | null;
|
|
282
|
-
maxDiscoveryDepth?: number | null;
|
|
283
|
-
sitemap?: "skip" | "include";
|
|
284
|
-
ignoreQueryParameters?: boolean;
|
|
285
|
-
limit?: number | null;
|
|
286
|
-
crawlEntireDomain?: boolean;
|
|
287
|
-
allowExternalLinks?: boolean;
|
|
288
|
-
allowSubdomains?: boolean;
|
|
289
|
-
delay?: number | null;
|
|
290
|
-
maxConcurrency?: number | null;
|
|
291
|
-
webhook?: string | WebhookConfig | null;
|
|
292
|
-
scrapeOptions?: ScrapeOptions | null;
|
|
293
|
-
zeroDataRetention?: boolean;
|
|
294
|
-
}
|
|
295
|
-
declare function startCrawl(http: HttpClient, request: CrawlRequest): Promise<CrawlResponse$1>;
|
|
296
|
-
|
|
297
|
-
interface StartBatchOptions {
|
|
298
|
-
options?: ScrapeOptions;
|
|
299
|
-
webhook?: string | WebhookConfig;
|
|
300
|
-
appendToId?: string;
|
|
301
|
-
ignoreInvalidURLs?: boolean;
|
|
302
|
-
maxConcurrency?: number;
|
|
303
|
-
zeroDataRetention?: boolean;
|
|
304
|
-
integration?: string;
|
|
305
|
-
idempotencyKey?: string;
|
|
306
|
-
}
|
|
307
|
-
declare function startBatchScrape(http: HttpClient, urls: string[], { options, webhook, appendToId, ignoreInvalidURLs, maxConcurrency, zeroDataRetention, integration, idempotencyKey, }?: StartBatchOptions): Promise<BatchScrapeResponse$1>;
|
|
308
|
-
|
|
309
304
|
declare function prepareExtractPayload(args: {
|
|
310
305
|
urls?: string[];
|
|
311
306
|
prompt?: string;
|
|
@@ -349,7 +344,7 @@ type ExtractJsonSchemaFromFormats<Formats> = Formats extends readonly any[] ? Ex
|
|
|
349
344
|
}>["schema"] : never;
|
|
350
345
|
type InferredJsonFromOptions<Opts> = Opts extends {
|
|
351
346
|
formats?: infer Fmts;
|
|
352
|
-
} ? ExtractJsonSchemaFromFormats<Fmts> extends ZodTypeAny ? infer<ExtractJsonSchemaFromFormats<Fmts>> : unknown : unknown;
|
|
347
|
+
} ? ExtractJsonSchemaFromFormats<Fmts> extends zt.ZodTypeAny ? zt.infer<ExtractJsonSchemaFromFormats<Fmts>> : unknown : unknown;
|
|
353
348
|
/**
|
|
354
349
|
* Configuration for the v2 client transport.
|
|
355
350
|
*/
|
|
@@ -405,7 +400,7 @@ declare class FirecrawlClient {
|
|
|
405
400
|
* @param req Crawl configuration (paths, limits, scrapeOptions, webhook, etc.).
|
|
406
401
|
* @returns Job id and url.
|
|
407
402
|
*/
|
|
408
|
-
startCrawl(url: string, req?:
|
|
403
|
+
startCrawl(url: string, req?: CrawlOptions): Promise<CrawlResponse$1>;
|
|
409
404
|
/**
|
|
410
405
|
* Get the status and partial data of a crawl job.
|
|
411
406
|
* @param jobId Crawl job id.
|
|
@@ -423,7 +418,7 @@ declare class FirecrawlClient {
|
|
|
423
418
|
* @param req Crawl configuration plus waiter controls (pollInterval, timeout seconds).
|
|
424
419
|
* @returns Final job snapshot.
|
|
425
420
|
*/
|
|
426
|
-
crawl(url: string, req?:
|
|
421
|
+
crawl(url: string, req?: CrawlOptions & {
|
|
427
422
|
pollInterval?: number;
|
|
428
423
|
timeout?: number;
|
|
429
424
|
}): Promise<CrawlJob>;
|
|
@@ -448,7 +443,7 @@ declare class FirecrawlClient {
|
|
|
448
443
|
* @param opts Batch options (scrape options, webhook, concurrency, idempotency key, etc.).
|
|
449
444
|
* @returns Job id and url.
|
|
450
445
|
*/
|
|
451
|
-
startBatchScrape(urls: string[], opts?:
|
|
446
|
+
startBatchScrape(urls: string[], opts?: BatchScrapeOptions): Promise<BatchScrapeResponse$1>;
|
|
452
447
|
/**
|
|
453
448
|
* Get the status and partial data of a batch scrape job.
|
|
454
449
|
* @param jobId Batch job id.
|
|
@@ -471,7 +466,7 @@ declare class FirecrawlClient {
|
|
|
471
466
|
* @param opts Batch options plus waiter controls (pollInterval, timeout seconds).
|
|
472
467
|
* @returns Final job snapshot.
|
|
473
468
|
*/
|
|
474
|
-
batchScrape(urls: string[], opts?:
|
|
469
|
+
batchScrape(urls: string[], opts?: BatchScrapeOptions & {
|
|
475
470
|
pollInterval?: number;
|
|
476
471
|
timeout?: number;
|
|
477
472
|
}): Promise<BatchScrapeJob>;
|
|
@@ -1331,4 +1326,4 @@ declare class Firecrawl extends FirecrawlClient {
|
|
|
1331
1326
|
get v1(): FirecrawlApp;
|
|
1332
1327
|
}
|
|
1333
1328
|
|
|
1334
|
-
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type BatchScrapeJob, type BatchScrapeResponse$1 as BatchScrapeResponse, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig, type MapData, type MapOptions, type PDFAction, type PressAction, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResult, type TokenUsage, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
|
|
1329
|
+
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig, type MapData, type MapOptions, type PDFAction, type PressAction, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResult, type TokenUsage, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
require_package
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-OIZ6OKY4.js";
|
|
4
4
|
|
|
5
5
|
// src/v2/utils/httpClient.ts
|
|
6
6
|
import axios from "axios";
|
|
@@ -710,6 +710,7 @@ var Watcher = class extends EventEmitter {
|
|
|
710
710
|
};
|
|
711
711
|
|
|
712
712
|
// src/v2/client.ts
|
|
713
|
+
import "zod";
|
|
713
714
|
var FirecrawlClient = class {
|
|
714
715
|
http;
|
|
715
716
|
/**
|
|
@@ -931,7 +932,7 @@ var FirecrawlApp = class {
|
|
|
931
932
|
if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
|
|
932
933
|
return process.env.npm_package_version;
|
|
933
934
|
}
|
|
934
|
-
const packageJson = await import("./package-
|
|
935
|
+
const packageJson = await import("./package-V5IPFKBE.js");
|
|
935
936
|
return packageJson.default.version;
|
|
936
937
|
} catch (error) {
|
|
937
938
|
const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);
|
|
@@ -2238,11 +2239,11 @@ var Firecrawl = class extends FirecrawlClient {
|
|
|
2238
2239
|
return this._v1;
|
|
2239
2240
|
}
|
|
2240
2241
|
};
|
|
2241
|
-
var
|
|
2242
|
+
var index_default = Firecrawl;
|
|
2242
2243
|
export {
|
|
2243
2244
|
Firecrawl,
|
|
2244
2245
|
FirecrawlApp as FirecrawlAppV1,
|
|
2245
2246
|
FirecrawlClient,
|
|
2246
2247
|
SdkError,
|
|
2247
|
-
|
|
2248
|
+
index_default as default
|
|
2248
2249
|
};
|
package/jest.config.js
CHANGED
|
File without changes
|
package/package.json
CHANGED
package/src/v2/client.ts
CHANGED
|
@@ -34,10 +34,12 @@ import type {
|
|
|
34
34
|
BatchScrapeResponse,
|
|
35
35
|
BatchScrapeJob,
|
|
36
36
|
ExtractResponse,
|
|
37
|
+
CrawlOptions,
|
|
38
|
+
BatchScrapeOptions,
|
|
37
39
|
} from "./types";
|
|
38
40
|
import { Watcher } from "./watcher";
|
|
39
41
|
import type { WatcherOptions } from "./watcher";
|
|
40
|
-
import
|
|
42
|
+
import * as zt from "zod";
|
|
41
43
|
|
|
42
44
|
// Helper types to infer the `json` field from a Zod schema included in `formats`
|
|
43
45
|
type ExtractJsonSchemaFromFormats<Formats> = Formats extends readonly any[]
|
|
@@ -45,8 +47,8 @@ type ExtractJsonSchemaFromFormats<Formats> = Formats extends readonly any[]
|
|
|
45
47
|
: never;
|
|
46
48
|
|
|
47
49
|
type InferredJsonFromOptions<Opts> = Opts extends { formats?: infer Fmts }
|
|
48
|
-
? ExtractJsonSchemaFromFormats<Fmts> extends ZodTypeAny
|
|
49
|
-
?
|
|
50
|
+
? ExtractJsonSchemaFromFormats<Fmts> extends zt.ZodTypeAny
|
|
51
|
+
? zt.infer<ExtractJsonSchemaFromFormats<Fmts>>
|
|
50
52
|
: unknown
|
|
51
53
|
: unknown;
|
|
52
54
|
|
|
@@ -136,8 +138,8 @@ export class FirecrawlClient {
|
|
|
136
138
|
* @param req Crawl configuration (paths, limits, scrapeOptions, webhook, etc.).
|
|
137
139
|
* @returns Job id and url.
|
|
138
140
|
*/
|
|
139
|
-
async startCrawl(url: string, req:
|
|
140
|
-
return startCrawl(this.http, { url, ...
|
|
141
|
+
async startCrawl(url: string, req: CrawlOptions = {}): Promise<CrawlResponse> {
|
|
142
|
+
return startCrawl(this.http, { url, ...req });
|
|
141
143
|
}
|
|
142
144
|
/**
|
|
143
145
|
* Get the status and partial data of a crawl job.
|
|
@@ -160,8 +162,8 @@ export class FirecrawlClient {
|
|
|
160
162
|
* @param req Crawl configuration plus waiter controls (pollInterval, timeout seconds).
|
|
161
163
|
* @returns Final job snapshot.
|
|
162
164
|
*/
|
|
163
|
-
async crawl(url: string, req:
|
|
164
|
-
return crawlWaiter(this.http, { url, ...
|
|
165
|
+
async crawl(url: string, req: CrawlOptions & { pollInterval?: number; timeout?: number } = {}): Promise<CrawlJob> {
|
|
166
|
+
return crawlWaiter(this.http, { url, ...req }, req.pollInterval, req.timeout);
|
|
165
167
|
}
|
|
166
168
|
/**
|
|
167
169
|
* Retrieve crawl errors and robots.txt blocks.
|
|
@@ -192,7 +194,7 @@ export class FirecrawlClient {
|
|
|
192
194
|
* @param opts Batch options (scrape options, webhook, concurrency, idempotency key, etc.).
|
|
193
195
|
* @returns Job id and url.
|
|
194
196
|
*/
|
|
195
|
-
async startBatchScrape(urls: string[], opts?:
|
|
197
|
+
async startBatchScrape(urls: string[], opts?: BatchScrapeOptions): Promise<BatchScrapeResponse> {
|
|
196
198
|
return startBatchScrape(this.http, urls, opts);
|
|
197
199
|
}
|
|
198
200
|
/**
|
|
@@ -223,7 +225,7 @@ export class FirecrawlClient {
|
|
|
223
225
|
* @param opts Batch options plus waiter controls (pollInterval, timeout seconds).
|
|
224
226
|
* @returns Final job snapshot.
|
|
225
227
|
*/
|
|
226
|
-
async batchScrape(urls: string[], opts?:
|
|
228
|
+
async batchScrape(urls: string[], opts?: BatchScrapeOptions & { pollInterval?: number; timeout?: number }): Promise<BatchScrapeJob> {
|
|
227
229
|
return batchWaiter(this.http, urls, opts);
|
|
228
230
|
}
|
|
229
231
|
|
package/src/v2/methods/batch.ts
CHANGED
|
@@ -3,24 +3,12 @@ import {
|
|
|
3
3
|
type BatchScrapeResponse,
|
|
4
4
|
type CrawlErrorsResponse,
|
|
5
5
|
type Document,
|
|
6
|
-
type
|
|
7
|
-
type WebhookConfig,
|
|
6
|
+
type BatchScrapeOptions,
|
|
8
7
|
} from "../types";
|
|
9
8
|
import { HttpClient } from "../utils/httpClient";
|
|
10
9
|
import { ensureValidScrapeOptions } from "../utils/validation";
|
|
11
10
|
import { normalizeAxiosError, throwForBadResponse } from "../utils/errorHandler";
|
|
12
11
|
|
|
13
|
-
export interface StartBatchOptions {
|
|
14
|
-
options?: ScrapeOptions;
|
|
15
|
-
webhook?: string | WebhookConfig;
|
|
16
|
-
appendToId?: string;
|
|
17
|
-
ignoreInvalidURLs?: boolean;
|
|
18
|
-
maxConcurrency?: number;
|
|
19
|
-
zeroDataRetention?: boolean;
|
|
20
|
-
integration?: string;
|
|
21
|
-
idempotencyKey?: string;
|
|
22
|
-
}
|
|
23
|
-
|
|
24
12
|
export async function startBatchScrape(
|
|
25
13
|
http: HttpClient,
|
|
26
14
|
urls: string[],
|
|
@@ -33,7 +21,7 @@ export async function startBatchScrape(
|
|
|
33
21
|
zeroDataRetention,
|
|
34
22
|
integration,
|
|
35
23
|
idempotencyKey,
|
|
36
|
-
}:
|
|
24
|
+
}: BatchScrapeOptions = {}
|
|
37
25
|
): Promise<BatchScrapeResponse> {
|
|
38
26
|
if (!Array.isArray(urls) || urls.length === 0) throw new Error("URLs list cannot be empty");
|
|
39
27
|
const payload: Record<string, unknown> = { urls };
|
|
@@ -117,7 +105,7 @@ export async function waitForBatchCompletion(http: HttpClient, jobId: string, po
|
|
|
117
105
|
export async function batchScrape(
|
|
118
106
|
http: HttpClient,
|
|
119
107
|
urls: string[],
|
|
120
|
-
opts:
|
|
108
|
+
opts: BatchScrapeOptions & { pollInterval?: number; timeout?: number } = {}
|
|
121
109
|
): Promise<BatchScrapeJob> {
|
|
122
110
|
const start = await startBatchScrape(http, urls, opts);
|
|
123
111
|
return waitForBatchCompletion(http, start.id, opts.pollInterval ?? 2, opts.timeout);
|
package/src/v2/methods/crawl.ts
CHANGED
|
@@ -4,31 +4,15 @@ import {
|
|
|
4
4
|
type CrawlJob,
|
|
5
5
|
type CrawlResponse,
|
|
6
6
|
type Document,
|
|
7
|
-
type
|
|
8
|
-
type WebhookConfig,
|
|
7
|
+
type CrawlOptions,
|
|
9
8
|
} from "../types";
|
|
10
9
|
import { HttpClient } from "../utils/httpClient";
|
|
11
10
|
import { ensureValidScrapeOptions } from "../utils/validation";
|
|
12
11
|
import { normalizeAxiosError, throwForBadResponse } from "../utils/errorHandler";
|
|
13
12
|
|
|
14
|
-
export
|
|
13
|
+
export type CrawlRequest = CrawlOptions & {
|
|
15
14
|
url: string;
|
|
16
|
-
|
|
17
|
-
excludePaths?: string[] | null;
|
|
18
|
-
includePaths?: string[] | null;
|
|
19
|
-
maxDiscoveryDepth?: number | null;
|
|
20
|
-
sitemap?: "skip" | "include";
|
|
21
|
-
ignoreQueryParameters?: boolean;
|
|
22
|
-
limit?: number | null;
|
|
23
|
-
crawlEntireDomain?: boolean;
|
|
24
|
-
allowExternalLinks?: boolean;
|
|
25
|
-
allowSubdomains?: boolean;
|
|
26
|
-
delay?: number | null;
|
|
27
|
-
maxConcurrency?: number | null;
|
|
28
|
-
webhook?: string | WebhookConfig | null;
|
|
29
|
-
scrapeOptions?: ScrapeOptions | null;
|
|
30
|
-
zeroDataRetention?: boolean;
|
|
31
|
-
}
|
|
15
|
+
};
|
|
32
16
|
|
|
33
17
|
function prepareCrawlPayload(request: CrawlRequest): Record<string, unknown> {
|
|
34
18
|
if (!request.url || !request.url.trim()) throw new Error("URL cannot be empty");
|
package/src/v2/types.ts
CHANGED
|
@@ -196,6 +196,24 @@ export interface SearchRequest {
|
|
|
196
196
|
scrapeOptions?: ScrapeOptions;
|
|
197
197
|
}
|
|
198
198
|
|
|
199
|
+
export interface CrawlOptions {
|
|
200
|
+
prompt?: string | null;
|
|
201
|
+
excludePaths?: string[] | null;
|
|
202
|
+
includePaths?: string[] | null;
|
|
203
|
+
maxDiscoveryDepth?: number | null;
|
|
204
|
+
sitemap?: "skip" | "include";
|
|
205
|
+
ignoreQueryParameters?: boolean;
|
|
206
|
+
limit?: number | null;
|
|
207
|
+
crawlEntireDomain?: boolean;
|
|
208
|
+
allowExternalLinks?: boolean;
|
|
209
|
+
allowSubdomains?: boolean;
|
|
210
|
+
delay?: number | null;
|
|
211
|
+
maxConcurrency?: number | null;
|
|
212
|
+
webhook?: string | WebhookConfig | null;
|
|
213
|
+
scrapeOptions?: ScrapeOptions | null;
|
|
214
|
+
zeroDataRetention?: boolean;
|
|
215
|
+
}
|
|
216
|
+
|
|
199
217
|
export interface CrawlResponse {
|
|
200
218
|
id: string;
|
|
201
219
|
url: string;
|
|
@@ -211,6 +229,17 @@ export interface CrawlJob {
|
|
|
211
229
|
data: Document[];
|
|
212
230
|
}
|
|
213
231
|
|
|
232
|
+
export interface BatchScrapeOptions {
|
|
233
|
+
options?: ScrapeOptions;
|
|
234
|
+
webhook?: string | WebhookConfig;
|
|
235
|
+
appendToId?: string;
|
|
236
|
+
ignoreInvalidURLs?: boolean;
|
|
237
|
+
maxConcurrency?: number;
|
|
238
|
+
zeroDataRetention?: boolean;
|
|
239
|
+
integration?: string;
|
|
240
|
+
idempotencyKey?: string;
|
|
241
|
+
}
|
|
242
|
+
|
|
214
243
|
export interface BatchScrapeResponse {
|
|
215
244
|
id: string;
|
|
216
245
|
url: string;
|
package/tsup.config.ts
CHANGED
|
File without changes
|
package/dump.rdb
DELETED
|
Binary file
|