firecrawl 3.0.2 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +0 -0
- package/dist/{chunk-JFWW4BWA.js → chunk-WNGXI3ZW.js} +2 -2
- package/dist/index.cjs +27 -26
- package/dist/index.d.cts +55 -44
- package/dist/index.d.ts +55 -44
- package/dist/index.js +23 -22
- package/dist/{package-KYZ3HXR5.js → package-KMFB7KZD.js} +1 -1
- package/jest.config.js +0 -0
- package/package.json +1 -1
- package/src/v2/client.ts +11 -9
- package/src/v2/methods/batch.ts +3 -15
- package/src/v2/methods/crawl.ts +3 -19
- package/src/v2/methods/search.ts +29 -29
- package/src/v2/types.ts +52 -5
- package/tsup.config.ts +0 -0
- package/dump.rdb +0 -0
package/LICENSE
CHANGED
|
File without changes
|
|
@@ -7,8 +7,8 @@ var __commonJS = (cb, mod) => function __require() {
|
|
|
7
7
|
var require_package = __commonJS({
|
|
8
8
|
"package.json"(exports, module) {
|
|
9
9
|
module.exports = {
|
|
10
|
-
name: "firecrawl",
|
|
11
|
-
version: "3.0
|
|
10
|
+
name: "@mendable/firecrawl-js",
|
|
11
|
+
version: "3.1.0",
|
|
12
12
|
description: "JavaScript SDK for Firecrawl API",
|
|
13
13
|
main: "dist/index.js",
|
|
14
14
|
types: "dist/index.d.ts",
|
package/dist/index.cjs
CHANGED
|
@@ -34,8 +34,8 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
34
34
|
var require_package = __commonJS({
|
|
35
35
|
"package.json"(exports2, module2) {
|
|
36
36
|
module2.exports = {
|
|
37
|
-
name: "firecrawl",
|
|
38
|
-
version: "3.0
|
|
37
|
+
name: "@mendable/firecrawl-js",
|
|
38
|
+
version: "3.1.0",
|
|
39
39
|
description: "JavaScript SDK for Firecrawl API",
|
|
40
40
|
main: "dist/index.js",
|
|
41
41
|
types: "dist/index.d.ts",
|
|
@@ -108,15 +108,15 @@ var require_package = __commonJS({
|
|
|
108
108
|
});
|
|
109
109
|
|
|
110
110
|
// src/index.ts
|
|
111
|
-
var
|
|
112
|
-
__export(
|
|
111
|
+
var index_exports = {};
|
|
112
|
+
__export(index_exports, {
|
|
113
113
|
Firecrawl: () => Firecrawl,
|
|
114
114
|
FirecrawlAppV1: () => FirecrawlApp,
|
|
115
115
|
FirecrawlClient: () => FirecrawlClient,
|
|
116
116
|
SdkError: () => SdkError,
|
|
117
|
-
default: () =>
|
|
117
|
+
default: () => index_default
|
|
118
118
|
});
|
|
119
|
-
module.exports = __toCommonJS(
|
|
119
|
+
module.exports = __toCommonJS(index_exports);
|
|
120
120
|
|
|
121
121
|
// src/v2/utils/httpClient.ts
|
|
122
122
|
var import_axios = __toESM(require("axios"), 1);
|
|
@@ -327,6 +327,21 @@ function prepareSearchPayload(req) {
|
|
|
327
327
|
}
|
|
328
328
|
return payload;
|
|
329
329
|
}
|
|
330
|
+
function transformArray(arr) {
|
|
331
|
+
const results = [];
|
|
332
|
+
for (const item of arr) {
|
|
333
|
+
if (item && typeof item === "object") {
|
|
334
|
+
if ("markdown" in item || "html" in item || "rawHtml" in item || "links" in item || "screenshot" in item || "changeTracking" in item || "summary" in item || "json" in item) {
|
|
335
|
+
results.push(item);
|
|
336
|
+
} else {
|
|
337
|
+
results.push(item);
|
|
338
|
+
}
|
|
339
|
+
} else {
|
|
340
|
+
results.push({ url: item });
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
return results;
|
|
344
|
+
}
|
|
330
345
|
async function search(http, request) {
|
|
331
346
|
const payload = prepareSearchPayload(request);
|
|
332
347
|
try {
|
|
@@ -336,24 +351,9 @@ async function search(http, request) {
|
|
|
336
351
|
}
|
|
337
352
|
const data = res.data.data || {};
|
|
338
353
|
const out = {};
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
const results = [];
|
|
343
|
-
for (const item of arr) {
|
|
344
|
-
if (item && typeof item === "object") {
|
|
345
|
-
if ("markdown" in item || "html" in item || "rawHtml" in item || "links" in item || "screenshot" in item || "changeTracking" in item || "summary" in item || "json" in item) {
|
|
346
|
-
results.push(item);
|
|
347
|
-
} else {
|
|
348
|
-
results.push({ url: item.url, title: item.title, description: item.description });
|
|
349
|
-
}
|
|
350
|
-
} else if (typeof item === "string") {
|
|
351
|
-
results.push({ url: item });
|
|
352
|
-
}
|
|
353
|
-
}
|
|
354
|
-
out[key] = results;
|
|
355
|
-
}
|
|
356
|
-
}
|
|
354
|
+
if (data.web) out.web = transformArray(data.web);
|
|
355
|
+
if (data.news) out.news = transformArray(data.news);
|
|
356
|
+
if (data.images) out.images = transformArray(data.images);
|
|
357
357
|
return out;
|
|
358
358
|
} catch (err) {
|
|
359
359
|
if (err?.isAxiosError) return normalizeAxiosError(err, "search");
|
|
@@ -826,6 +826,7 @@ var Watcher = class extends import_events.EventEmitter {
|
|
|
826
826
|
};
|
|
827
827
|
|
|
828
828
|
// src/v2/client.ts
|
|
829
|
+
var zt = require("zod");
|
|
829
830
|
var FirecrawlClient = class {
|
|
830
831
|
http;
|
|
831
832
|
/**
|
|
@@ -1015,7 +1016,7 @@ var FirecrawlClient = class {
|
|
|
1015
1016
|
|
|
1016
1017
|
// src/v1/index.ts
|
|
1017
1018
|
var import_axios3 = __toESM(require("axios"), 1);
|
|
1018
|
-
var
|
|
1019
|
+
var zt2 = require("zod");
|
|
1019
1020
|
var import_zod_to_json_schema3 = require("zod-to-json-schema");
|
|
1020
1021
|
|
|
1021
1022
|
// node_modules/typescript-event-target/dist/index.mjs
|
|
@@ -2354,7 +2355,7 @@ var Firecrawl = class extends FirecrawlClient {
|
|
|
2354
2355
|
return this._v1;
|
|
2355
2356
|
}
|
|
2356
2357
|
};
|
|
2357
|
-
var
|
|
2358
|
+
var index_default = Firecrawl;
|
|
2358
2359
|
// Annotate the CommonJS export names for ESM import in node:
|
|
2359
2360
|
0 && (module.exports = {
|
|
2360
2361
|
Firecrawl,
|
package/dist/index.d.cts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import * as zt from 'zod';
|
|
2
|
-
import { ZodTypeAny
|
|
2
|
+
import { ZodTypeAny } from 'zod';
|
|
3
3
|
import { AxiosResponse, AxiosRequestHeaders } from 'axios';
|
|
4
4
|
import { EventEmitter } from 'events';
|
|
5
5
|
import { TypedEventTarget } from 'typescript-event-target';
|
|
@@ -138,15 +138,31 @@ interface Document {
|
|
|
138
138
|
warning?: string;
|
|
139
139
|
changeTracking?: Record<string, unknown>;
|
|
140
140
|
}
|
|
141
|
-
interface
|
|
141
|
+
interface SearchResultWeb {
|
|
142
142
|
url: string;
|
|
143
143
|
title?: string;
|
|
144
144
|
description?: string;
|
|
145
145
|
}
|
|
146
|
+
interface SearchResultNews {
|
|
147
|
+
title?: string;
|
|
148
|
+
url?: string;
|
|
149
|
+
snippet?: string;
|
|
150
|
+
date?: string;
|
|
151
|
+
imageUrl?: string;
|
|
152
|
+
position?: number;
|
|
153
|
+
}
|
|
154
|
+
interface SearchResultImages {
|
|
155
|
+
title?: string;
|
|
156
|
+
imageUrl?: string;
|
|
157
|
+
imageWidth?: number;
|
|
158
|
+
imageHeight?: number;
|
|
159
|
+
url?: string;
|
|
160
|
+
position?: number;
|
|
161
|
+
}
|
|
146
162
|
interface SearchData {
|
|
147
|
-
web?: Array<
|
|
148
|
-
news?: Array<
|
|
149
|
-
images?: Array<
|
|
163
|
+
web?: Array<SearchResultWeb | Document>;
|
|
164
|
+
news?: Array<SearchResultNews | Document>;
|
|
165
|
+
images?: Array<SearchResultImages | Document>;
|
|
150
166
|
}
|
|
151
167
|
interface SearchRequest {
|
|
152
168
|
query: string;
|
|
@@ -160,6 +176,23 @@ interface SearchRequest {
|
|
|
160
176
|
timeout?: number;
|
|
161
177
|
scrapeOptions?: ScrapeOptions;
|
|
162
178
|
}
|
|
179
|
+
interface CrawlOptions {
|
|
180
|
+
prompt?: string | null;
|
|
181
|
+
excludePaths?: string[] | null;
|
|
182
|
+
includePaths?: string[] | null;
|
|
183
|
+
maxDiscoveryDepth?: number | null;
|
|
184
|
+
sitemap?: "skip" | "include";
|
|
185
|
+
ignoreQueryParameters?: boolean;
|
|
186
|
+
limit?: number | null;
|
|
187
|
+
crawlEntireDomain?: boolean;
|
|
188
|
+
allowExternalLinks?: boolean;
|
|
189
|
+
allowSubdomains?: boolean;
|
|
190
|
+
delay?: number | null;
|
|
191
|
+
maxConcurrency?: number | null;
|
|
192
|
+
webhook?: string | WebhookConfig | null;
|
|
193
|
+
scrapeOptions?: ScrapeOptions | null;
|
|
194
|
+
zeroDataRetention?: boolean;
|
|
195
|
+
}
|
|
163
196
|
interface CrawlResponse$1 {
|
|
164
197
|
id: string;
|
|
165
198
|
url: string;
|
|
@@ -173,6 +206,16 @@ interface CrawlJob {
|
|
|
173
206
|
next?: string | null;
|
|
174
207
|
data: Document[];
|
|
175
208
|
}
|
|
209
|
+
interface BatchScrapeOptions {
|
|
210
|
+
options?: ScrapeOptions;
|
|
211
|
+
webhook?: string | WebhookConfig;
|
|
212
|
+
appendToId?: string;
|
|
213
|
+
ignoreInvalidURLs?: boolean;
|
|
214
|
+
maxConcurrency?: number;
|
|
215
|
+
zeroDataRetention?: boolean;
|
|
216
|
+
integration?: string;
|
|
217
|
+
idempotencyKey?: string;
|
|
218
|
+
}
|
|
176
219
|
interface BatchScrapeResponse$1 {
|
|
177
220
|
id: string;
|
|
178
221
|
url: string;
|
|
@@ -188,7 +231,7 @@ interface BatchScrapeJob {
|
|
|
188
231
|
data: Document[];
|
|
189
232
|
}
|
|
190
233
|
interface MapData {
|
|
191
|
-
links:
|
|
234
|
+
links: SearchResultWeb[];
|
|
192
235
|
}
|
|
193
236
|
interface MapOptions {
|
|
194
237
|
search?: string;
|
|
@@ -274,38 +317,6 @@ declare class HttpClient {
|
|
|
274
317
|
prepareHeaders(idempotencyKey?: string): Record<string, string>;
|
|
275
318
|
}
|
|
276
319
|
|
|
277
|
-
interface CrawlRequest {
|
|
278
|
-
url: string;
|
|
279
|
-
prompt?: string | null;
|
|
280
|
-
excludePaths?: string[] | null;
|
|
281
|
-
includePaths?: string[] | null;
|
|
282
|
-
maxDiscoveryDepth?: number | null;
|
|
283
|
-
sitemap?: "skip" | "include";
|
|
284
|
-
ignoreQueryParameters?: boolean;
|
|
285
|
-
limit?: number | null;
|
|
286
|
-
crawlEntireDomain?: boolean;
|
|
287
|
-
allowExternalLinks?: boolean;
|
|
288
|
-
allowSubdomains?: boolean;
|
|
289
|
-
delay?: number | null;
|
|
290
|
-
maxConcurrency?: number | null;
|
|
291
|
-
webhook?: string | WebhookConfig | null;
|
|
292
|
-
scrapeOptions?: ScrapeOptions | null;
|
|
293
|
-
zeroDataRetention?: boolean;
|
|
294
|
-
}
|
|
295
|
-
declare function startCrawl(http: HttpClient, request: CrawlRequest): Promise<CrawlResponse$1>;
|
|
296
|
-
|
|
297
|
-
interface StartBatchOptions {
|
|
298
|
-
options?: ScrapeOptions;
|
|
299
|
-
webhook?: string | WebhookConfig;
|
|
300
|
-
appendToId?: string;
|
|
301
|
-
ignoreInvalidURLs?: boolean;
|
|
302
|
-
maxConcurrency?: number;
|
|
303
|
-
zeroDataRetention?: boolean;
|
|
304
|
-
integration?: string;
|
|
305
|
-
idempotencyKey?: string;
|
|
306
|
-
}
|
|
307
|
-
declare function startBatchScrape(http: HttpClient, urls: string[], { options, webhook, appendToId, ignoreInvalidURLs, maxConcurrency, zeroDataRetention, integration, idempotencyKey, }?: StartBatchOptions): Promise<BatchScrapeResponse$1>;
|
|
308
|
-
|
|
309
320
|
declare function prepareExtractPayload(args: {
|
|
310
321
|
urls?: string[];
|
|
311
322
|
prompt?: string;
|
|
@@ -349,7 +360,7 @@ type ExtractJsonSchemaFromFormats<Formats> = Formats extends readonly any[] ? Ex
|
|
|
349
360
|
}>["schema"] : never;
|
|
350
361
|
type InferredJsonFromOptions<Opts> = Opts extends {
|
|
351
362
|
formats?: infer Fmts;
|
|
352
|
-
} ? ExtractJsonSchemaFromFormats<Fmts> extends ZodTypeAny ? infer<ExtractJsonSchemaFromFormats<Fmts>> : unknown : unknown;
|
|
363
|
+
} ? ExtractJsonSchemaFromFormats<Fmts> extends zt.ZodTypeAny ? zt.infer<ExtractJsonSchemaFromFormats<Fmts>> : unknown : unknown;
|
|
353
364
|
/**
|
|
354
365
|
* Configuration for the v2 client transport.
|
|
355
366
|
*/
|
|
@@ -405,7 +416,7 @@ declare class FirecrawlClient {
|
|
|
405
416
|
* @param req Crawl configuration (paths, limits, scrapeOptions, webhook, etc.).
|
|
406
417
|
* @returns Job id and url.
|
|
407
418
|
*/
|
|
408
|
-
startCrawl(url: string, req?:
|
|
419
|
+
startCrawl(url: string, req?: CrawlOptions): Promise<CrawlResponse$1>;
|
|
409
420
|
/**
|
|
410
421
|
* Get the status and partial data of a crawl job.
|
|
411
422
|
* @param jobId Crawl job id.
|
|
@@ -423,7 +434,7 @@ declare class FirecrawlClient {
|
|
|
423
434
|
* @param req Crawl configuration plus waiter controls (pollInterval, timeout seconds).
|
|
424
435
|
* @returns Final job snapshot.
|
|
425
436
|
*/
|
|
426
|
-
crawl(url: string, req?:
|
|
437
|
+
crawl(url: string, req?: CrawlOptions & {
|
|
427
438
|
pollInterval?: number;
|
|
428
439
|
timeout?: number;
|
|
429
440
|
}): Promise<CrawlJob>;
|
|
@@ -448,7 +459,7 @@ declare class FirecrawlClient {
|
|
|
448
459
|
* @param opts Batch options (scrape options, webhook, concurrency, idempotency key, etc.).
|
|
449
460
|
* @returns Job id and url.
|
|
450
461
|
*/
|
|
451
|
-
startBatchScrape(urls: string[], opts?:
|
|
462
|
+
startBatchScrape(urls: string[], opts?: BatchScrapeOptions): Promise<BatchScrapeResponse$1>;
|
|
452
463
|
/**
|
|
453
464
|
* Get the status and partial data of a batch scrape job.
|
|
454
465
|
* @param jobId Batch job id.
|
|
@@ -471,7 +482,7 @@ declare class FirecrawlClient {
|
|
|
471
482
|
* @param opts Batch options plus waiter controls (pollInterval, timeout seconds).
|
|
472
483
|
* @returns Final job snapshot.
|
|
473
484
|
*/
|
|
474
|
-
batchScrape(urls: string[], opts?:
|
|
485
|
+
batchScrape(urls: string[], opts?: BatchScrapeOptions & {
|
|
475
486
|
pollInterval?: number;
|
|
476
487
|
timeout?: number;
|
|
477
488
|
}): Promise<BatchScrapeJob>;
|
|
@@ -1331,4 +1342,4 @@ declare class Firecrawl extends FirecrawlClient {
|
|
|
1331
1342
|
get v1(): FirecrawlApp;
|
|
1332
1343
|
}
|
|
1333
1344
|
|
|
1334
|
-
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type BatchScrapeJob, type BatchScrapeResponse$1 as BatchScrapeResponse, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig, type MapData, type MapOptions, type PDFAction, type PressAction, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type
|
|
1345
|
+
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig, type MapData, type MapOptions, type PDFAction, type PressAction, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import * as zt from 'zod';
|
|
2
|
-
import { ZodTypeAny
|
|
2
|
+
import { ZodTypeAny } from 'zod';
|
|
3
3
|
import { AxiosResponse, AxiosRequestHeaders } from 'axios';
|
|
4
4
|
import { EventEmitter } from 'events';
|
|
5
5
|
import { TypedEventTarget } from 'typescript-event-target';
|
|
@@ -138,15 +138,31 @@ interface Document {
|
|
|
138
138
|
warning?: string;
|
|
139
139
|
changeTracking?: Record<string, unknown>;
|
|
140
140
|
}
|
|
141
|
-
interface
|
|
141
|
+
interface SearchResultWeb {
|
|
142
142
|
url: string;
|
|
143
143
|
title?: string;
|
|
144
144
|
description?: string;
|
|
145
145
|
}
|
|
146
|
+
interface SearchResultNews {
|
|
147
|
+
title?: string;
|
|
148
|
+
url?: string;
|
|
149
|
+
snippet?: string;
|
|
150
|
+
date?: string;
|
|
151
|
+
imageUrl?: string;
|
|
152
|
+
position?: number;
|
|
153
|
+
}
|
|
154
|
+
interface SearchResultImages {
|
|
155
|
+
title?: string;
|
|
156
|
+
imageUrl?: string;
|
|
157
|
+
imageWidth?: number;
|
|
158
|
+
imageHeight?: number;
|
|
159
|
+
url?: string;
|
|
160
|
+
position?: number;
|
|
161
|
+
}
|
|
146
162
|
interface SearchData {
|
|
147
|
-
web?: Array<
|
|
148
|
-
news?: Array<
|
|
149
|
-
images?: Array<
|
|
163
|
+
web?: Array<SearchResultWeb | Document>;
|
|
164
|
+
news?: Array<SearchResultNews | Document>;
|
|
165
|
+
images?: Array<SearchResultImages | Document>;
|
|
150
166
|
}
|
|
151
167
|
interface SearchRequest {
|
|
152
168
|
query: string;
|
|
@@ -160,6 +176,23 @@ interface SearchRequest {
|
|
|
160
176
|
timeout?: number;
|
|
161
177
|
scrapeOptions?: ScrapeOptions;
|
|
162
178
|
}
|
|
179
|
+
interface CrawlOptions {
|
|
180
|
+
prompt?: string | null;
|
|
181
|
+
excludePaths?: string[] | null;
|
|
182
|
+
includePaths?: string[] | null;
|
|
183
|
+
maxDiscoveryDepth?: number | null;
|
|
184
|
+
sitemap?: "skip" | "include";
|
|
185
|
+
ignoreQueryParameters?: boolean;
|
|
186
|
+
limit?: number | null;
|
|
187
|
+
crawlEntireDomain?: boolean;
|
|
188
|
+
allowExternalLinks?: boolean;
|
|
189
|
+
allowSubdomains?: boolean;
|
|
190
|
+
delay?: number | null;
|
|
191
|
+
maxConcurrency?: number | null;
|
|
192
|
+
webhook?: string | WebhookConfig | null;
|
|
193
|
+
scrapeOptions?: ScrapeOptions | null;
|
|
194
|
+
zeroDataRetention?: boolean;
|
|
195
|
+
}
|
|
163
196
|
interface CrawlResponse$1 {
|
|
164
197
|
id: string;
|
|
165
198
|
url: string;
|
|
@@ -173,6 +206,16 @@ interface CrawlJob {
|
|
|
173
206
|
next?: string | null;
|
|
174
207
|
data: Document[];
|
|
175
208
|
}
|
|
209
|
+
interface BatchScrapeOptions {
|
|
210
|
+
options?: ScrapeOptions;
|
|
211
|
+
webhook?: string | WebhookConfig;
|
|
212
|
+
appendToId?: string;
|
|
213
|
+
ignoreInvalidURLs?: boolean;
|
|
214
|
+
maxConcurrency?: number;
|
|
215
|
+
zeroDataRetention?: boolean;
|
|
216
|
+
integration?: string;
|
|
217
|
+
idempotencyKey?: string;
|
|
218
|
+
}
|
|
176
219
|
interface BatchScrapeResponse$1 {
|
|
177
220
|
id: string;
|
|
178
221
|
url: string;
|
|
@@ -188,7 +231,7 @@ interface BatchScrapeJob {
|
|
|
188
231
|
data: Document[];
|
|
189
232
|
}
|
|
190
233
|
interface MapData {
|
|
191
|
-
links:
|
|
234
|
+
links: SearchResultWeb[];
|
|
192
235
|
}
|
|
193
236
|
interface MapOptions {
|
|
194
237
|
search?: string;
|
|
@@ -274,38 +317,6 @@ declare class HttpClient {
|
|
|
274
317
|
prepareHeaders(idempotencyKey?: string): Record<string, string>;
|
|
275
318
|
}
|
|
276
319
|
|
|
277
|
-
interface CrawlRequest {
|
|
278
|
-
url: string;
|
|
279
|
-
prompt?: string | null;
|
|
280
|
-
excludePaths?: string[] | null;
|
|
281
|
-
includePaths?: string[] | null;
|
|
282
|
-
maxDiscoveryDepth?: number | null;
|
|
283
|
-
sitemap?: "skip" | "include";
|
|
284
|
-
ignoreQueryParameters?: boolean;
|
|
285
|
-
limit?: number | null;
|
|
286
|
-
crawlEntireDomain?: boolean;
|
|
287
|
-
allowExternalLinks?: boolean;
|
|
288
|
-
allowSubdomains?: boolean;
|
|
289
|
-
delay?: number | null;
|
|
290
|
-
maxConcurrency?: number | null;
|
|
291
|
-
webhook?: string | WebhookConfig | null;
|
|
292
|
-
scrapeOptions?: ScrapeOptions | null;
|
|
293
|
-
zeroDataRetention?: boolean;
|
|
294
|
-
}
|
|
295
|
-
declare function startCrawl(http: HttpClient, request: CrawlRequest): Promise<CrawlResponse$1>;
|
|
296
|
-
|
|
297
|
-
interface StartBatchOptions {
|
|
298
|
-
options?: ScrapeOptions;
|
|
299
|
-
webhook?: string | WebhookConfig;
|
|
300
|
-
appendToId?: string;
|
|
301
|
-
ignoreInvalidURLs?: boolean;
|
|
302
|
-
maxConcurrency?: number;
|
|
303
|
-
zeroDataRetention?: boolean;
|
|
304
|
-
integration?: string;
|
|
305
|
-
idempotencyKey?: string;
|
|
306
|
-
}
|
|
307
|
-
declare function startBatchScrape(http: HttpClient, urls: string[], { options, webhook, appendToId, ignoreInvalidURLs, maxConcurrency, zeroDataRetention, integration, idempotencyKey, }?: StartBatchOptions): Promise<BatchScrapeResponse$1>;
|
|
308
|
-
|
|
309
320
|
declare function prepareExtractPayload(args: {
|
|
310
321
|
urls?: string[];
|
|
311
322
|
prompt?: string;
|
|
@@ -349,7 +360,7 @@ type ExtractJsonSchemaFromFormats<Formats> = Formats extends readonly any[] ? Ex
|
|
|
349
360
|
}>["schema"] : never;
|
|
350
361
|
type InferredJsonFromOptions<Opts> = Opts extends {
|
|
351
362
|
formats?: infer Fmts;
|
|
352
|
-
} ? ExtractJsonSchemaFromFormats<Fmts> extends ZodTypeAny ? infer<ExtractJsonSchemaFromFormats<Fmts>> : unknown : unknown;
|
|
363
|
+
} ? ExtractJsonSchemaFromFormats<Fmts> extends zt.ZodTypeAny ? zt.infer<ExtractJsonSchemaFromFormats<Fmts>> : unknown : unknown;
|
|
353
364
|
/**
|
|
354
365
|
* Configuration for the v2 client transport.
|
|
355
366
|
*/
|
|
@@ -405,7 +416,7 @@ declare class FirecrawlClient {
|
|
|
405
416
|
* @param req Crawl configuration (paths, limits, scrapeOptions, webhook, etc.).
|
|
406
417
|
* @returns Job id and url.
|
|
407
418
|
*/
|
|
408
|
-
startCrawl(url: string, req?:
|
|
419
|
+
startCrawl(url: string, req?: CrawlOptions): Promise<CrawlResponse$1>;
|
|
409
420
|
/**
|
|
410
421
|
* Get the status and partial data of a crawl job.
|
|
411
422
|
* @param jobId Crawl job id.
|
|
@@ -423,7 +434,7 @@ declare class FirecrawlClient {
|
|
|
423
434
|
* @param req Crawl configuration plus waiter controls (pollInterval, timeout seconds).
|
|
424
435
|
* @returns Final job snapshot.
|
|
425
436
|
*/
|
|
426
|
-
crawl(url: string, req?:
|
|
437
|
+
crawl(url: string, req?: CrawlOptions & {
|
|
427
438
|
pollInterval?: number;
|
|
428
439
|
timeout?: number;
|
|
429
440
|
}): Promise<CrawlJob>;
|
|
@@ -448,7 +459,7 @@ declare class FirecrawlClient {
|
|
|
448
459
|
* @param opts Batch options (scrape options, webhook, concurrency, idempotency key, etc.).
|
|
449
460
|
* @returns Job id and url.
|
|
450
461
|
*/
|
|
451
|
-
startBatchScrape(urls: string[], opts?:
|
|
462
|
+
startBatchScrape(urls: string[], opts?: BatchScrapeOptions): Promise<BatchScrapeResponse$1>;
|
|
452
463
|
/**
|
|
453
464
|
* Get the status and partial data of a batch scrape job.
|
|
454
465
|
* @param jobId Batch job id.
|
|
@@ -471,7 +482,7 @@ declare class FirecrawlClient {
|
|
|
471
482
|
* @param opts Batch options plus waiter controls (pollInterval, timeout seconds).
|
|
472
483
|
* @returns Final job snapshot.
|
|
473
484
|
*/
|
|
474
|
-
batchScrape(urls: string[], opts?:
|
|
485
|
+
batchScrape(urls: string[], opts?: BatchScrapeOptions & {
|
|
475
486
|
pollInterval?: number;
|
|
476
487
|
timeout?: number;
|
|
477
488
|
}): Promise<BatchScrapeJob>;
|
|
@@ -1331,4 +1342,4 @@ declare class Firecrawl extends FirecrawlClient {
|
|
|
1331
1342
|
get v1(): FirecrawlApp;
|
|
1332
1343
|
}
|
|
1333
1344
|
|
|
1334
|
-
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type BatchScrapeJob, type BatchScrapeResponse$1 as BatchScrapeResponse, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig, type MapData, type MapOptions, type PDFAction, type PressAction, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type
|
|
1345
|
+
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig, type MapData, type MapOptions, type PDFAction, type PressAction, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
require_package
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-WNGXI3ZW.js";
|
|
4
4
|
|
|
5
5
|
// src/v2/utils/httpClient.ts
|
|
6
6
|
import axios from "axios";
|
|
@@ -211,6 +211,21 @@ function prepareSearchPayload(req) {
|
|
|
211
211
|
}
|
|
212
212
|
return payload;
|
|
213
213
|
}
|
|
214
|
+
function transformArray(arr) {
|
|
215
|
+
const results = [];
|
|
216
|
+
for (const item of arr) {
|
|
217
|
+
if (item && typeof item === "object") {
|
|
218
|
+
if ("markdown" in item || "html" in item || "rawHtml" in item || "links" in item || "screenshot" in item || "changeTracking" in item || "summary" in item || "json" in item) {
|
|
219
|
+
results.push(item);
|
|
220
|
+
} else {
|
|
221
|
+
results.push(item);
|
|
222
|
+
}
|
|
223
|
+
} else {
|
|
224
|
+
results.push({ url: item });
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
return results;
|
|
228
|
+
}
|
|
214
229
|
async function search(http, request) {
|
|
215
230
|
const payload = prepareSearchPayload(request);
|
|
216
231
|
try {
|
|
@@ -220,24 +235,9 @@ async function search(http, request) {
|
|
|
220
235
|
}
|
|
221
236
|
const data = res.data.data || {};
|
|
222
237
|
const out = {};
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
const results = [];
|
|
227
|
-
for (const item of arr) {
|
|
228
|
-
if (item && typeof item === "object") {
|
|
229
|
-
if ("markdown" in item || "html" in item || "rawHtml" in item || "links" in item || "screenshot" in item || "changeTracking" in item || "summary" in item || "json" in item) {
|
|
230
|
-
results.push(item);
|
|
231
|
-
} else {
|
|
232
|
-
results.push({ url: item.url, title: item.title, description: item.description });
|
|
233
|
-
}
|
|
234
|
-
} else if (typeof item === "string") {
|
|
235
|
-
results.push({ url: item });
|
|
236
|
-
}
|
|
237
|
-
}
|
|
238
|
-
out[key] = results;
|
|
239
|
-
}
|
|
240
|
-
}
|
|
238
|
+
if (data.web) out.web = transformArray(data.web);
|
|
239
|
+
if (data.news) out.news = transformArray(data.news);
|
|
240
|
+
if (data.images) out.images = transformArray(data.images);
|
|
241
241
|
return out;
|
|
242
242
|
} catch (err) {
|
|
243
243
|
if (err?.isAxiosError) return normalizeAxiosError(err, "search");
|
|
@@ -710,6 +710,7 @@ var Watcher = class extends EventEmitter {
|
|
|
710
710
|
};
|
|
711
711
|
|
|
712
712
|
// src/v2/client.ts
|
|
713
|
+
import "zod";
|
|
713
714
|
var FirecrawlClient = class {
|
|
714
715
|
http;
|
|
715
716
|
/**
|
|
@@ -931,7 +932,7 @@ var FirecrawlApp = class {
|
|
|
931
932
|
if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
|
|
932
933
|
return process.env.npm_package_version;
|
|
933
934
|
}
|
|
934
|
-
const packageJson = await import("./package-
|
|
935
|
+
const packageJson = await import("./package-KMFB7KZD.js");
|
|
935
936
|
return packageJson.default.version;
|
|
936
937
|
} catch (error) {
|
|
937
938
|
const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);
|
|
@@ -2238,11 +2239,11 @@ var Firecrawl = class extends FirecrawlClient {
|
|
|
2238
2239
|
return this._v1;
|
|
2239
2240
|
}
|
|
2240
2241
|
};
|
|
2241
|
-
var
|
|
2242
|
+
var index_default = Firecrawl;
|
|
2242
2243
|
export {
|
|
2243
2244
|
Firecrawl,
|
|
2244
2245
|
FirecrawlApp as FirecrawlAppV1,
|
|
2245
2246
|
FirecrawlClient,
|
|
2246
2247
|
SdkError,
|
|
2247
|
-
|
|
2248
|
+
index_default as default
|
|
2248
2249
|
};
|
package/jest.config.js
CHANGED
|
File without changes
|
package/package.json
CHANGED
package/src/v2/client.ts
CHANGED
|
@@ -34,10 +34,12 @@ import type {
|
|
|
34
34
|
BatchScrapeResponse,
|
|
35
35
|
BatchScrapeJob,
|
|
36
36
|
ExtractResponse,
|
|
37
|
+
CrawlOptions,
|
|
38
|
+
BatchScrapeOptions,
|
|
37
39
|
} from "./types";
|
|
38
40
|
import { Watcher } from "./watcher";
|
|
39
41
|
import type { WatcherOptions } from "./watcher";
|
|
40
|
-
import
|
|
42
|
+
import * as zt from "zod";
|
|
41
43
|
|
|
42
44
|
// Helper types to infer the `json` field from a Zod schema included in `formats`
|
|
43
45
|
type ExtractJsonSchemaFromFormats<Formats> = Formats extends readonly any[]
|
|
@@ -45,8 +47,8 @@ type ExtractJsonSchemaFromFormats<Formats> = Formats extends readonly any[]
|
|
|
45
47
|
: never;
|
|
46
48
|
|
|
47
49
|
type InferredJsonFromOptions<Opts> = Opts extends { formats?: infer Fmts }
|
|
48
|
-
? ExtractJsonSchemaFromFormats<Fmts> extends ZodTypeAny
|
|
49
|
-
?
|
|
50
|
+
? ExtractJsonSchemaFromFormats<Fmts> extends zt.ZodTypeAny
|
|
51
|
+
? zt.infer<ExtractJsonSchemaFromFormats<Fmts>>
|
|
50
52
|
: unknown
|
|
51
53
|
: unknown;
|
|
52
54
|
|
|
@@ -136,8 +138,8 @@ export class FirecrawlClient {
|
|
|
136
138
|
* @param req Crawl configuration (paths, limits, scrapeOptions, webhook, etc.).
|
|
137
139
|
* @returns Job id and url.
|
|
138
140
|
*/
|
|
139
|
-
async startCrawl(url: string, req:
|
|
140
|
-
return startCrawl(this.http, { url, ...
|
|
141
|
+
async startCrawl(url: string, req: CrawlOptions = {}): Promise<CrawlResponse> {
|
|
142
|
+
return startCrawl(this.http, { url, ...req });
|
|
141
143
|
}
|
|
142
144
|
/**
|
|
143
145
|
* Get the status and partial data of a crawl job.
|
|
@@ -160,8 +162,8 @@ export class FirecrawlClient {
|
|
|
160
162
|
* @param req Crawl configuration plus waiter controls (pollInterval, timeout seconds).
|
|
161
163
|
* @returns Final job snapshot.
|
|
162
164
|
*/
|
|
163
|
-
async crawl(url: string, req:
|
|
164
|
-
return crawlWaiter(this.http, { url, ...
|
|
165
|
+
async crawl(url: string, req: CrawlOptions & { pollInterval?: number; timeout?: number } = {}): Promise<CrawlJob> {
|
|
166
|
+
return crawlWaiter(this.http, { url, ...req }, req.pollInterval, req.timeout);
|
|
165
167
|
}
|
|
166
168
|
/**
|
|
167
169
|
* Retrieve crawl errors and robots.txt blocks.
|
|
@@ -192,7 +194,7 @@ export class FirecrawlClient {
|
|
|
192
194
|
* @param opts Batch options (scrape options, webhook, concurrency, idempotency key, etc.).
|
|
193
195
|
* @returns Job id and url.
|
|
194
196
|
*/
|
|
195
|
-
async startBatchScrape(urls: string[], opts?:
|
|
197
|
+
async startBatchScrape(urls: string[], opts?: BatchScrapeOptions): Promise<BatchScrapeResponse> {
|
|
196
198
|
return startBatchScrape(this.http, urls, opts);
|
|
197
199
|
}
|
|
198
200
|
/**
|
|
@@ -223,7 +225,7 @@ export class FirecrawlClient {
|
|
|
223
225
|
* @param opts Batch options plus waiter controls (pollInterval, timeout seconds).
|
|
224
226
|
* @returns Final job snapshot.
|
|
225
227
|
*/
|
|
226
|
-
async batchScrape(urls: string[], opts?:
|
|
228
|
+
async batchScrape(urls: string[], opts?: BatchScrapeOptions & { pollInterval?: number; timeout?: number }): Promise<BatchScrapeJob> {
|
|
227
229
|
return batchWaiter(this.http, urls, opts);
|
|
228
230
|
}
|
|
229
231
|
|
package/src/v2/methods/batch.ts
CHANGED
|
@@ -3,24 +3,12 @@ import {
|
|
|
3
3
|
type BatchScrapeResponse,
|
|
4
4
|
type CrawlErrorsResponse,
|
|
5
5
|
type Document,
|
|
6
|
-
type
|
|
7
|
-
type WebhookConfig,
|
|
6
|
+
type BatchScrapeOptions,
|
|
8
7
|
} from "../types";
|
|
9
8
|
import { HttpClient } from "../utils/httpClient";
|
|
10
9
|
import { ensureValidScrapeOptions } from "../utils/validation";
|
|
11
10
|
import { normalizeAxiosError, throwForBadResponse } from "../utils/errorHandler";
|
|
12
11
|
|
|
13
|
-
export interface StartBatchOptions {
|
|
14
|
-
options?: ScrapeOptions;
|
|
15
|
-
webhook?: string | WebhookConfig;
|
|
16
|
-
appendToId?: string;
|
|
17
|
-
ignoreInvalidURLs?: boolean;
|
|
18
|
-
maxConcurrency?: number;
|
|
19
|
-
zeroDataRetention?: boolean;
|
|
20
|
-
integration?: string;
|
|
21
|
-
idempotencyKey?: string;
|
|
22
|
-
}
|
|
23
|
-
|
|
24
12
|
export async function startBatchScrape(
|
|
25
13
|
http: HttpClient,
|
|
26
14
|
urls: string[],
|
|
@@ -33,7 +21,7 @@ export async function startBatchScrape(
|
|
|
33
21
|
zeroDataRetention,
|
|
34
22
|
integration,
|
|
35
23
|
idempotencyKey,
|
|
36
|
-
}:
|
|
24
|
+
}: BatchScrapeOptions = {}
|
|
37
25
|
): Promise<BatchScrapeResponse> {
|
|
38
26
|
if (!Array.isArray(urls) || urls.length === 0) throw new Error("URLs list cannot be empty");
|
|
39
27
|
const payload: Record<string, unknown> = { urls };
|
|
@@ -117,7 +105,7 @@ export async function waitForBatchCompletion(http: HttpClient, jobId: string, po
|
|
|
117
105
|
export async function batchScrape(
|
|
118
106
|
http: HttpClient,
|
|
119
107
|
urls: string[],
|
|
120
|
-
opts:
|
|
108
|
+
opts: BatchScrapeOptions & { pollInterval?: number; timeout?: number } = {}
|
|
121
109
|
): Promise<BatchScrapeJob> {
|
|
122
110
|
const start = await startBatchScrape(http, urls, opts);
|
|
123
111
|
return waitForBatchCompletion(http, start.id, opts.pollInterval ?? 2, opts.timeout);
|
package/src/v2/methods/crawl.ts
CHANGED
|
@@ -4,31 +4,15 @@ import {
|
|
|
4
4
|
type CrawlJob,
|
|
5
5
|
type CrawlResponse,
|
|
6
6
|
type Document,
|
|
7
|
-
type
|
|
8
|
-
type WebhookConfig,
|
|
7
|
+
type CrawlOptions,
|
|
9
8
|
} from "../types";
|
|
10
9
|
import { HttpClient } from "../utils/httpClient";
|
|
11
10
|
import { ensureValidScrapeOptions } from "../utils/validation";
|
|
12
11
|
import { normalizeAxiosError, throwForBadResponse } from "../utils/errorHandler";
|
|
13
12
|
|
|
14
|
-
export
|
|
13
|
+
export type CrawlRequest = CrawlOptions & {
|
|
15
14
|
url: string;
|
|
16
|
-
|
|
17
|
-
excludePaths?: string[] | null;
|
|
18
|
-
includePaths?: string[] | null;
|
|
19
|
-
maxDiscoveryDepth?: number | null;
|
|
20
|
-
sitemap?: "skip" | "include";
|
|
21
|
-
ignoreQueryParameters?: boolean;
|
|
22
|
-
limit?: number | null;
|
|
23
|
-
crawlEntireDomain?: boolean;
|
|
24
|
-
allowExternalLinks?: boolean;
|
|
25
|
-
allowSubdomains?: boolean;
|
|
26
|
-
delay?: number | null;
|
|
27
|
-
maxConcurrency?: number | null;
|
|
28
|
-
webhook?: string | WebhookConfig | null;
|
|
29
|
-
scrapeOptions?: ScrapeOptions | null;
|
|
30
|
-
zeroDataRetention?: boolean;
|
|
31
|
-
}
|
|
15
|
+
};
|
|
32
16
|
|
|
33
17
|
function prepareCrawlPayload(request: CrawlRequest): Record<string, unknown> {
|
|
34
18
|
if (!request.url || !request.url.trim()) throw new Error("URL cannot be empty");
|
package/src/v2/methods/search.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { type Document, type SearchData, type SearchRequest, type
|
|
1
|
+
import { type Document, type SearchData, type SearchRequest, type SearchResultWeb, type ScrapeOptions, type SearchResultNews, type SearchResultImages } from "../types";
|
|
2
2
|
import { HttpClient } from "../utils/httpClient";
|
|
3
3
|
import { ensureValidScrapeOptions } from "../utils/validation";
|
|
4
4
|
import { throwForBadResponse, normalizeAxiosError } from "../utils/errorHandler";
|
|
@@ -23,6 +23,31 @@ function prepareSearchPayload(req: SearchRequest): Record<string, unknown> {
|
|
|
23
23
|
return payload;
|
|
24
24
|
}
|
|
25
25
|
|
|
26
|
+
function transformArray<ResultType>(arr: any[]): Array<ResultType | Document> {
|
|
27
|
+
const results: Array<ResultType | Document> = [] as any;
|
|
28
|
+
for (const item of arr) {
|
|
29
|
+
if (item && typeof item === "object") {
|
|
30
|
+
if (
|
|
31
|
+
"markdown" in item ||
|
|
32
|
+
"html" in item ||
|
|
33
|
+
"rawHtml" in item ||
|
|
34
|
+
"links" in item ||
|
|
35
|
+
"screenshot" in item ||
|
|
36
|
+
"changeTracking" in item ||
|
|
37
|
+
"summary" in item ||
|
|
38
|
+
"json" in item
|
|
39
|
+
) {
|
|
40
|
+
results.push(item as Document);
|
|
41
|
+
} else {
|
|
42
|
+
results.push(item as ResultType);
|
|
43
|
+
}
|
|
44
|
+
} else {
|
|
45
|
+
results.push({ url: item } as ResultType);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return results;
|
|
49
|
+
}
|
|
50
|
+
|
|
26
51
|
export async function search(http: HttpClient, request: SearchRequest): Promise<SearchData> {
|
|
27
52
|
const payload = prepareSearchPayload(request);
|
|
28
53
|
try {
|
|
@@ -32,34 +57,9 @@ export async function search(http: HttpClient, request: SearchRequest): Promise<
|
|
|
32
57
|
}
|
|
33
58
|
const data = (res.data.data || {}) as Record<string, any>;
|
|
34
59
|
const out: SearchData = {};
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
const results: Array<SearchResult | Document> = [] as any;
|
|
39
|
-
for (const item of arr) {
|
|
40
|
-
if (item && typeof item === "object") {
|
|
41
|
-
// If scraped page fields present, treat as Document; otherwise SearchResult
|
|
42
|
-
if (
|
|
43
|
-
"markdown" in item ||
|
|
44
|
-
"html" in item ||
|
|
45
|
-
"rawHtml" in item ||
|
|
46
|
-
"links" in item ||
|
|
47
|
-
"screenshot" in item ||
|
|
48
|
-
"changeTracking" in item ||
|
|
49
|
-
"summary" in item ||
|
|
50
|
-
"json" in item
|
|
51
|
-
) {
|
|
52
|
-
results.push(item as Document);
|
|
53
|
-
} else {
|
|
54
|
-
results.push({ url: item.url, title: item.title, description: item.description } as SearchResult);
|
|
55
|
-
}
|
|
56
|
-
} else if (typeof item === "string") {
|
|
57
|
-
results.push({ url: item } as SearchResult);
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
(out as any)[key] = results;
|
|
61
|
-
}
|
|
62
|
-
}
|
|
60
|
+
if (data.web) out.web = transformArray<SearchResultWeb>(data.web);
|
|
61
|
+
if (data.news) out.news = transformArray<SearchResultNews>(data.news);
|
|
62
|
+
if (data.images) out.images = transformArray<SearchResultImages>(data.images);
|
|
63
63
|
return out;
|
|
64
64
|
} catch (err: any) {
|
|
65
65
|
if (err?.isAxiosError) return normalizeAxiosError(err, "search");
|
package/src/v2/types.ts
CHANGED
|
@@ -173,16 +173,34 @@ export interface Document {
|
|
|
173
173
|
changeTracking?: Record<string, unknown>;
|
|
174
174
|
}
|
|
175
175
|
|
|
176
|
-
export interface
|
|
176
|
+
export interface SearchResultWeb {
|
|
177
177
|
url: string;
|
|
178
178
|
title?: string;
|
|
179
179
|
description?: string;
|
|
180
180
|
}
|
|
181
181
|
|
|
182
|
+
export interface SearchResultNews {
|
|
183
|
+
title?: string;
|
|
184
|
+
url?: string;
|
|
185
|
+
snippet?: string;
|
|
186
|
+
date?: string;
|
|
187
|
+
imageUrl?: string;
|
|
188
|
+
position?: number;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
export interface SearchResultImages {
|
|
192
|
+
title?: string;
|
|
193
|
+
imageUrl?: string;
|
|
194
|
+
imageWidth?: number;
|
|
195
|
+
imageHeight?: number;
|
|
196
|
+
url?: string;
|
|
197
|
+
position?: number;
|
|
198
|
+
}
|
|
199
|
+
|
|
182
200
|
export interface SearchData {
|
|
183
|
-
web?: Array<
|
|
184
|
-
news?: Array<
|
|
185
|
-
images?: Array<
|
|
201
|
+
web?: Array<SearchResultWeb | Document>;
|
|
202
|
+
news?: Array<SearchResultNews | Document>;
|
|
203
|
+
images?: Array<SearchResultImages | Document>;
|
|
186
204
|
}
|
|
187
205
|
|
|
188
206
|
export interface SearchRequest {
|
|
@@ -196,6 +214,24 @@ export interface SearchRequest {
|
|
|
196
214
|
scrapeOptions?: ScrapeOptions;
|
|
197
215
|
}
|
|
198
216
|
|
|
217
|
+
export interface CrawlOptions {
|
|
218
|
+
prompt?: string | null;
|
|
219
|
+
excludePaths?: string[] | null;
|
|
220
|
+
includePaths?: string[] | null;
|
|
221
|
+
maxDiscoveryDepth?: number | null;
|
|
222
|
+
sitemap?: "skip" | "include";
|
|
223
|
+
ignoreQueryParameters?: boolean;
|
|
224
|
+
limit?: number | null;
|
|
225
|
+
crawlEntireDomain?: boolean;
|
|
226
|
+
allowExternalLinks?: boolean;
|
|
227
|
+
allowSubdomains?: boolean;
|
|
228
|
+
delay?: number | null;
|
|
229
|
+
maxConcurrency?: number | null;
|
|
230
|
+
webhook?: string | WebhookConfig | null;
|
|
231
|
+
scrapeOptions?: ScrapeOptions | null;
|
|
232
|
+
zeroDataRetention?: boolean;
|
|
233
|
+
}
|
|
234
|
+
|
|
199
235
|
export interface CrawlResponse {
|
|
200
236
|
id: string;
|
|
201
237
|
url: string;
|
|
@@ -211,6 +247,17 @@ export interface CrawlJob {
|
|
|
211
247
|
data: Document[];
|
|
212
248
|
}
|
|
213
249
|
|
|
250
|
+
export interface BatchScrapeOptions {
|
|
251
|
+
options?: ScrapeOptions;
|
|
252
|
+
webhook?: string | WebhookConfig;
|
|
253
|
+
appendToId?: string;
|
|
254
|
+
ignoreInvalidURLs?: boolean;
|
|
255
|
+
maxConcurrency?: number;
|
|
256
|
+
zeroDataRetention?: boolean;
|
|
257
|
+
integration?: string;
|
|
258
|
+
idempotencyKey?: string;
|
|
259
|
+
}
|
|
260
|
+
|
|
214
261
|
export interface BatchScrapeResponse {
|
|
215
262
|
id: string;
|
|
216
263
|
url: string;
|
|
@@ -228,7 +275,7 @@ export interface BatchScrapeJob {
|
|
|
228
275
|
}
|
|
229
276
|
|
|
230
277
|
export interface MapData {
|
|
231
|
-
links:
|
|
278
|
+
links: SearchResultWeb[];
|
|
232
279
|
}
|
|
233
280
|
|
|
234
281
|
export interface MapOptions {
|
package/tsup.config.ts
CHANGED
|
File without changes
|
package/dump.rdb
DELETED
|
Binary file
|