@mendable/firecrawl 1.20.1 → 1.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +28 -5
- package/dist/index.d.cts +36 -6
- package/dist/index.d.ts +36 -6
- package/dist/index.js +27 -4
- package/package.json +8 -6
- package/src/index.ts +59 -7
- package/tsup.config.ts +9 -0
package/dist/index.cjs
CHANGED
|
@@ -38,8 +38,15 @@ module.exports = __toCommonJS(src_exports);
|
|
|
38
38
|
var import_axios = __toESM(require("axios"), 1);
|
|
39
39
|
var zt = __toESM(require("zod"), 1);
|
|
40
40
|
var import_zod_to_json_schema = require("zod-to-json-schema");
|
|
41
|
-
|
|
42
|
-
|
|
41
|
+
|
|
42
|
+
// node_modules/typescript-event-target/dist/index.mjs
|
|
43
|
+
var e = class extends EventTarget {
|
|
44
|
+
dispatchTypedEvent(s, t) {
|
|
45
|
+
return super.dispatchEvent(t);
|
|
46
|
+
}
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
// src/index.ts
|
|
43
50
|
var FirecrawlError = class extends Error {
|
|
44
51
|
statusCode;
|
|
45
52
|
details;
|
|
@@ -419,6 +426,7 @@ var FirecrawlApp = class {
|
|
|
419
426
|
* @param pollInterval - Time in seconds for job status checks.
|
|
420
427
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
421
428
|
* @param webhook - Optional webhook for the batch scrape.
|
|
429
|
+
* @param ignoreInvalidURLs - Optional flag to ignore invalid URLs.
|
|
422
430
|
* @returns The response from the crawl operation.
|
|
423
431
|
*/
|
|
424
432
|
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
@@ -922,10 +930,25 @@ var FirecrawlApp = class {
|
|
|
922
930
|
*/
|
|
923
931
|
async asyncDeepResearch(query, params) {
|
|
924
932
|
const headers = this.prepareHeaders();
|
|
933
|
+
let jsonData = { query, ...params };
|
|
934
|
+
if (jsonData?.jsonOptions?.schema) {
|
|
935
|
+
let schema = jsonData.jsonOptions.schema;
|
|
936
|
+
try {
|
|
937
|
+
schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
|
|
938
|
+
} catch (error) {
|
|
939
|
+
}
|
|
940
|
+
jsonData = {
|
|
941
|
+
...jsonData,
|
|
942
|
+
jsonOptions: {
|
|
943
|
+
...jsonData.jsonOptions,
|
|
944
|
+
schema
|
|
945
|
+
}
|
|
946
|
+
};
|
|
947
|
+
}
|
|
925
948
|
try {
|
|
926
949
|
const response = await this.postRequest(
|
|
927
950
|
`${this.apiUrl}/v1/deep-research`,
|
|
928
|
-
|
|
951
|
+
jsonData,
|
|
929
952
|
headers
|
|
930
953
|
);
|
|
931
954
|
if (response.status === 200) {
|
|
@@ -1176,7 +1199,7 @@ var FirecrawlApp = class {
|
|
|
1176
1199
|
return { success: false, error: "Internal server error." };
|
|
1177
1200
|
}
|
|
1178
1201
|
};
|
|
1179
|
-
var CrawlWatcher = class extends
|
|
1202
|
+
var CrawlWatcher = class extends e {
|
|
1180
1203
|
ws;
|
|
1181
1204
|
data;
|
|
1182
1205
|
status;
|
|
@@ -1185,7 +1208,7 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
|
|
|
1185
1208
|
super();
|
|
1186
1209
|
this.id = id;
|
|
1187
1210
|
const wsUrl = app.apiUrl.replace(/^http/, "ws");
|
|
1188
|
-
this.ws = new
|
|
1211
|
+
this.ws = new WebSocket(`${wsUrl}/v1/crawl/${id}`, app.apiKey);
|
|
1189
1212
|
this.status = "scraping";
|
|
1190
1213
|
this.data = [];
|
|
1191
1214
|
const messageHandler = (msg) => {
|
package/dist/index.d.cts
CHANGED
|
@@ -65,6 +65,11 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
65
65
|
screenshot?: string;
|
|
66
66
|
metadata?: FirecrawlDocumentMetadata;
|
|
67
67
|
actions: ActionsSchema;
|
|
68
|
+
compare?: {
|
|
69
|
+
previousScrapeAt: string | null;
|
|
70
|
+
changeStatus: "new" | "same" | "changed" | "removed";
|
|
71
|
+
visibility: "visible" | "hidden";
|
|
72
|
+
};
|
|
68
73
|
title?: string;
|
|
69
74
|
description?: string;
|
|
70
75
|
}
|
|
@@ -73,7 +78,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
73
78
|
* Defines the options and configurations available for scraping web content.
|
|
74
79
|
*/
|
|
75
80
|
interface CrawlScrapeOptions {
|
|
76
|
-
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
|
|
81
|
+
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json" | "compare")[];
|
|
77
82
|
headers?: Record<string, string>;
|
|
78
83
|
includeTags?: string[];
|
|
79
84
|
excludeTags?: string[];
|
|
@@ -132,6 +137,14 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
|
|
|
132
137
|
}
|
|
133
138
|
interface ActionsResult {
|
|
134
139
|
screenshots: string[];
|
|
140
|
+
scrapes: ({
|
|
141
|
+
url: string;
|
|
142
|
+
html: string;
|
|
143
|
+
})[];
|
|
144
|
+
javascriptReturns: {
|
|
145
|
+
type: string;
|
|
146
|
+
value: unknown;
|
|
147
|
+
}[];
|
|
135
148
|
}
|
|
136
149
|
/**
|
|
137
150
|
* Response interface for scraping operations.
|
|
@@ -326,7 +339,7 @@ interface CrawlErrorsResponse {
|
|
|
326
339
|
* Parameters for deep research operations.
|
|
327
340
|
* Defines options for conducting deep research on a query.
|
|
328
341
|
*/
|
|
329
|
-
interface DeepResearchParams {
|
|
342
|
+
interface DeepResearchParams<LLMSchema extends zt.ZodSchema = any> {
|
|
330
343
|
/**
|
|
331
344
|
* Maximum depth of research iterations (1-10)
|
|
332
345
|
* @default 7
|
|
@@ -343,9 +356,25 @@ interface DeepResearchParams {
|
|
|
343
356
|
*/
|
|
344
357
|
maxUrls?: number;
|
|
345
358
|
/**
|
|
346
|
-
*
|
|
359
|
+
* The prompt to use for the final analysis
|
|
360
|
+
*/
|
|
361
|
+
analysisPrompt?: string;
|
|
362
|
+
/**
|
|
363
|
+
* The system prompt to use for the research agent
|
|
364
|
+
*/
|
|
365
|
+
systemPrompt?: string;
|
|
366
|
+
/**
|
|
367
|
+
* The formats to use for the final analysis
|
|
368
|
+
*/
|
|
369
|
+
formats?: ("markdown" | "json")[];
|
|
370
|
+
/**
|
|
371
|
+
* The JSON options to use for the final analysis
|
|
347
372
|
*/
|
|
348
|
-
|
|
373
|
+
jsonOptions?: {
|
|
374
|
+
prompt?: string;
|
|
375
|
+
schema?: LLMSchema;
|
|
376
|
+
systemPrompt?: string;
|
|
377
|
+
};
|
|
349
378
|
}
|
|
350
379
|
/**
|
|
351
380
|
* Response interface for deep research operations.
|
|
@@ -513,6 +542,7 @@ declare class FirecrawlApp {
|
|
|
513
542
|
* @param pollInterval - Time in seconds for job status checks.
|
|
514
543
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
515
544
|
* @param webhook - Optional webhook for the batch scrape.
|
|
545
|
+
* @param ignoreInvalidURLs - Optional flag to ignore invalid URLs.
|
|
516
546
|
* @returns The response from the crawl operation.
|
|
517
547
|
*/
|
|
518
548
|
batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
@@ -614,7 +644,7 @@ declare class FirecrawlApp {
|
|
|
614
644
|
* @param onSource - Optional callback to receive source updates in real-time.
|
|
615
645
|
* @returns The final research results.
|
|
616
646
|
*/
|
|
617
|
-
deepResearch(query: string, params: DeepResearchParams
|
|
647
|
+
deepResearch(query: string, params: DeepResearchParams<zt.ZodSchema>, onActivity?: (activity: {
|
|
618
648
|
type: string;
|
|
619
649
|
status: string;
|
|
620
650
|
message: string;
|
|
@@ -631,7 +661,7 @@ declare class FirecrawlApp {
|
|
|
631
661
|
* @param params - Parameters for the deep research operation.
|
|
632
662
|
* @returns The response containing the research job ID.
|
|
633
663
|
*/
|
|
634
|
-
asyncDeepResearch(query: string, params: DeepResearchParams): Promise<DeepResearchResponse | ErrorResponse>;
|
|
664
|
+
asyncDeepResearch(query: string, params: DeepResearchParams<zt.ZodSchema>): Promise<DeepResearchResponse | ErrorResponse>;
|
|
635
665
|
/**
|
|
636
666
|
* Checks the status of a deep research operation.
|
|
637
667
|
* @param id - The ID of the deep research operation.
|
package/dist/index.d.ts
CHANGED
|
@@ -65,6 +65,11 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
65
65
|
screenshot?: string;
|
|
66
66
|
metadata?: FirecrawlDocumentMetadata;
|
|
67
67
|
actions: ActionsSchema;
|
|
68
|
+
compare?: {
|
|
69
|
+
previousScrapeAt: string | null;
|
|
70
|
+
changeStatus: "new" | "same" | "changed" | "removed";
|
|
71
|
+
visibility: "visible" | "hidden";
|
|
72
|
+
};
|
|
68
73
|
title?: string;
|
|
69
74
|
description?: string;
|
|
70
75
|
}
|
|
@@ -73,7 +78,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
73
78
|
* Defines the options and configurations available for scraping web content.
|
|
74
79
|
*/
|
|
75
80
|
interface CrawlScrapeOptions {
|
|
76
|
-
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
|
|
81
|
+
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json" | "compare")[];
|
|
77
82
|
headers?: Record<string, string>;
|
|
78
83
|
includeTags?: string[];
|
|
79
84
|
excludeTags?: string[];
|
|
@@ -132,6 +137,14 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
|
|
|
132
137
|
}
|
|
133
138
|
interface ActionsResult {
|
|
134
139
|
screenshots: string[];
|
|
140
|
+
scrapes: ({
|
|
141
|
+
url: string;
|
|
142
|
+
html: string;
|
|
143
|
+
})[];
|
|
144
|
+
javascriptReturns: {
|
|
145
|
+
type: string;
|
|
146
|
+
value: unknown;
|
|
147
|
+
}[];
|
|
135
148
|
}
|
|
136
149
|
/**
|
|
137
150
|
* Response interface for scraping operations.
|
|
@@ -326,7 +339,7 @@ interface CrawlErrorsResponse {
|
|
|
326
339
|
* Parameters for deep research operations.
|
|
327
340
|
* Defines options for conducting deep research on a query.
|
|
328
341
|
*/
|
|
329
|
-
interface DeepResearchParams {
|
|
342
|
+
interface DeepResearchParams<LLMSchema extends zt.ZodSchema = any> {
|
|
330
343
|
/**
|
|
331
344
|
* Maximum depth of research iterations (1-10)
|
|
332
345
|
* @default 7
|
|
@@ -343,9 +356,25 @@ interface DeepResearchParams {
|
|
|
343
356
|
*/
|
|
344
357
|
maxUrls?: number;
|
|
345
358
|
/**
|
|
346
|
-
*
|
|
359
|
+
* The prompt to use for the final analysis
|
|
360
|
+
*/
|
|
361
|
+
analysisPrompt?: string;
|
|
362
|
+
/**
|
|
363
|
+
* The system prompt to use for the research agent
|
|
364
|
+
*/
|
|
365
|
+
systemPrompt?: string;
|
|
366
|
+
/**
|
|
367
|
+
* The formats to use for the final analysis
|
|
368
|
+
*/
|
|
369
|
+
formats?: ("markdown" | "json")[];
|
|
370
|
+
/**
|
|
371
|
+
* The JSON options to use for the final analysis
|
|
347
372
|
*/
|
|
348
|
-
|
|
373
|
+
jsonOptions?: {
|
|
374
|
+
prompt?: string;
|
|
375
|
+
schema?: LLMSchema;
|
|
376
|
+
systemPrompt?: string;
|
|
377
|
+
};
|
|
349
378
|
}
|
|
350
379
|
/**
|
|
351
380
|
* Response interface for deep research operations.
|
|
@@ -513,6 +542,7 @@ declare class FirecrawlApp {
|
|
|
513
542
|
* @param pollInterval - Time in seconds for job status checks.
|
|
514
543
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
515
544
|
* @param webhook - Optional webhook for the batch scrape.
|
|
545
|
+
* @param ignoreInvalidURLs - Optional flag to ignore invalid URLs.
|
|
516
546
|
* @returns The response from the crawl operation.
|
|
517
547
|
*/
|
|
518
548
|
batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
@@ -614,7 +644,7 @@ declare class FirecrawlApp {
|
|
|
614
644
|
* @param onSource - Optional callback to receive source updates in real-time.
|
|
615
645
|
* @returns The final research results.
|
|
616
646
|
*/
|
|
617
|
-
deepResearch(query: string, params: DeepResearchParams
|
|
647
|
+
deepResearch(query: string, params: DeepResearchParams<zt.ZodSchema>, onActivity?: (activity: {
|
|
618
648
|
type: string;
|
|
619
649
|
status: string;
|
|
620
650
|
message: string;
|
|
@@ -631,7 +661,7 @@ declare class FirecrawlApp {
|
|
|
631
661
|
* @param params - Parameters for the deep research operation.
|
|
632
662
|
* @returns The response containing the research job ID.
|
|
633
663
|
*/
|
|
634
|
-
asyncDeepResearch(query: string, params: DeepResearchParams): Promise<DeepResearchResponse | ErrorResponse>;
|
|
664
|
+
asyncDeepResearch(query: string, params: DeepResearchParams<zt.ZodSchema>): Promise<DeepResearchResponse | ErrorResponse>;
|
|
635
665
|
/**
|
|
636
666
|
* Checks the status of a deep research operation.
|
|
637
667
|
* @param id - The ID of the deep research operation.
|
package/dist/index.js
CHANGED
|
@@ -2,8 +2,15 @@
|
|
|
2
2
|
import axios, { AxiosError } from "axios";
|
|
3
3
|
import * as zt from "zod";
|
|
4
4
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
|
|
6
|
+
// node_modules/typescript-event-target/dist/index.mjs
|
|
7
|
+
var e = class extends EventTarget {
|
|
8
|
+
dispatchTypedEvent(s, t) {
|
|
9
|
+
return super.dispatchEvent(t);
|
|
10
|
+
}
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
// src/index.ts
|
|
7
14
|
var FirecrawlError = class extends Error {
|
|
8
15
|
statusCode;
|
|
9
16
|
details;
|
|
@@ -383,6 +390,7 @@ var FirecrawlApp = class {
|
|
|
383
390
|
* @param pollInterval - Time in seconds for job status checks.
|
|
384
391
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
385
392
|
* @param webhook - Optional webhook for the batch scrape.
|
|
393
|
+
* @param ignoreInvalidURLs - Optional flag to ignore invalid URLs.
|
|
386
394
|
* @returns The response from the crawl operation.
|
|
387
395
|
*/
|
|
388
396
|
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
@@ -886,10 +894,25 @@ var FirecrawlApp = class {
|
|
|
886
894
|
*/
|
|
887
895
|
async asyncDeepResearch(query, params) {
|
|
888
896
|
const headers = this.prepareHeaders();
|
|
897
|
+
let jsonData = { query, ...params };
|
|
898
|
+
if (jsonData?.jsonOptions?.schema) {
|
|
899
|
+
let schema = jsonData.jsonOptions.schema;
|
|
900
|
+
try {
|
|
901
|
+
schema = zodToJsonSchema(schema);
|
|
902
|
+
} catch (error) {
|
|
903
|
+
}
|
|
904
|
+
jsonData = {
|
|
905
|
+
...jsonData,
|
|
906
|
+
jsonOptions: {
|
|
907
|
+
...jsonData.jsonOptions,
|
|
908
|
+
schema
|
|
909
|
+
}
|
|
910
|
+
};
|
|
911
|
+
}
|
|
889
912
|
try {
|
|
890
913
|
const response = await this.postRequest(
|
|
891
914
|
`${this.apiUrl}/v1/deep-research`,
|
|
892
|
-
|
|
915
|
+
jsonData,
|
|
893
916
|
headers
|
|
894
917
|
);
|
|
895
918
|
if (response.status === 200) {
|
|
@@ -1140,7 +1163,7 @@ var FirecrawlApp = class {
|
|
|
1140
1163
|
return { success: false, error: "Internal server error." };
|
|
1141
1164
|
}
|
|
1142
1165
|
};
|
|
1143
|
-
var CrawlWatcher = class extends
|
|
1166
|
+
var CrawlWatcher = class extends e {
|
|
1144
1167
|
ws;
|
|
1145
1168
|
data;
|
|
1146
1169
|
status;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mendable/firecrawl",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.22.0",
|
|
4
4
|
"description": "JavaScript SDK for Firecrawl API",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -26,7 +26,6 @@
|
|
|
26
26
|
"license": "MIT",
|
|
27
27
|
"dependencies": {
|
|
28
28
|
"axios": "^1.6.8",
|
|
29
|
-
"isows": "^1.0.4",
|
|
30
29
|
"typescript-event-target": "^1.1.1",
|
|
31
30
|
"zod": "^3.23.8",
|
|
32
31
|
"zod-to-json-schema": "^3.23.0"
|
|
@@ -36,8 +35,6 @@
|
|
|
36
35
|
},
|
|
37
36
|
"homepage": "https://github.com/mendableai/firecrawl#readme",
|
|
38
37
|
"devDependencies": {
|
|
39
|
-
"uuid": "^9.0.1",
|
|
40
|
-
"dotenv": "^16.4.5",
|
|
41
38
|
"@jest/globals": "^29.7.0",
|
|
42
39
|
"@types/axios": "^0.14.0",
|
|
43
40
|
"@types/dotenv": "^8.2.0",
|
|
@@ -45,10 +42,12 @@
|
|
|
45
42
|
"@types/mocha": "^10.0.6",
|
|
46
43
|
"@types/node": "^20.12.12",
|
|
47
44
|
"@types/uuid": "^9.0.8",
|
|
45
|
+
"dotenv": "^16.4.5",
|
|
48
46
|
"jest": "^29.7.0",
|
|
49
47
|
"ts-jest": "^29.2.2",
|
|
50
48
|
"tsup": "^8.2.4",
|
|
51
|
-
"typescript": "^5.4.5"
|
|
49
|
+
"typescript": "^5.4.5",
|
|
50
|
+
"uuid": "^9.0.1"
|
|
52
51
|
},
|
|
53
52
|
"keywords": [
|
|
54
53
|
"firecrawl",
|
|
@@ -58,5 +57,8 @@
|
|
|
58
57
|
"scraper",
|
|
59
58
|
"api",
|
|
60
59
|
"sdk"
|
|
61
|
-
]
|
|
60
|
+
],
|
|
61
|
+
"engines": {
|
|
62
|
+
"node": ">=22.0.0"
|
|
63
|
+
}
|
|
62
64
|
}
|
package/src/index.ts
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import axios, { type AxiosResponse, type AxiosRequestHeaders, AxiosError } from "axios";
|
|
2
2
|
import * as zt from "zod";
|
|
3
3
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
4
|
-
import { WebSocket } from "isows";
|
|
5
4
|
import { TypedEventTarget } from "typescript-event-target";
|
|
6
5
|
|
|
7
6
|
/**
|
|
@@ -69,6 +68,11 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
|
|
|
69
68
|
screenshot?: string;
|
|
70
69
|
metadata?: FirecrawlDocumentMetadata;
|
|
71
70
|
actions: ActionsSchema;
|
|
71
|
+
compare?: {
|
|
72
|
+
previousScrapeAt: string | null;
|
|
73
|
+
changeStatus: "new" | "same" | "changed" | "removed";
|
|
74
|
+
visibility: "visible" | "hidden";
|
|
75
|
+
};
|
|
72
76
|
// v1 search only
|
|
73
77
|
title?: string;
|
|
74
78
|
description?: string;
|
|
@@ -79,7 +83,7 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
|
|
|
79
83
|
* Defines the options and configurations available for scraping web content.
|
|
80
84
|
*/
|
|
81
85
|
export interface CrawlScrapeOptions {
|
|
82
|
-
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
|
|
86
|
+
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json" | "compare")[];
|
|
83
87
|
headers?: Record<string, string>;
|
|
84
88
|
includeTags?: string[];
|
|
85
89
|
excludeTags?: string[];
|
|
@@ -141,6 +145,14 @@ export interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchem
|
|
|
141
145
|
|
|
142
146
|
export interface ActionsResult {
|
|
143
147
|
screenshots: string[];
|
|
148
|
+
scrapes: ({
|
|
149
|
+
url: string;
|
|
150
|
+
html: string;
|
|
151
|
+
})[];
|
|
152
|
+
javascriptReturns: {
|
|
153
|
+
type: string;
|
|
154
|
+
value: unknown
|
|
155
|
+
}[];
|
|
144
156
|
}
|
|
145
157
|
|
|
146
158
|
/**
|
|
@@ -356,7 +368,7 @@ export interface CrawlErrorsResponse {
|
|
|
356
368
|
* Parameters for deep research operations.
|
|
357
369
|
* Defines options for conducting deep research on a query.
|
|
358
370
|
*/
|
|
359
|
-
export interface DeepResearchParams {
|
|
371
|
+
export interface DeepResearchParams<LLMSchema extends zt.ZodSchema = any> {
|
|
360
372
|
/**
|
|
361
373
|
* Maximum depth of research iterations (1-10)
|
|
362
374
|
* @default 7
|
|
@@ -373,9 +385,29 @@ export interface DeepResearchParams {
|
|
|
373
385
|
*/
|
|
374
386
|
maxUrls?: number;
|
|
375
387
|
/**
|
|
388
|
+
* The prompt to use for the final analysis
|
|
389
|
+
*/
|
|
390
|
+
analysisPrompt?: string;
|
|
391
|
+
/**
|
|
392
|
+
* The system prompt to use for the research agent
|
|
393
|
+
*/
|
|
394
|
+
systemPrompt?: string;
|
|
395
|
+
/**
|
|
396
|
+
* The formats to use for the final analysis
|
|
397
|
+
*/
|
|
398
|
+
formats?: ("markdown" | "json")[];
|
|
399
|
+
/**
|
|
400
|
+
* The JSON options to use for the final analysis
|
|
401
|
+
*/
|
|
402
|
+
jsonOptions?:{
|
|
403
|
+
prompt?: string;
|
|
404
|
+
schema?: LLMSchema;
|
|
405
|
+
systemPrompt?: string;
|
|
406
|
+
};
|
|
407
|
+
/**
|
|
376
408
|
* Experimental flag for streaming steps
|
|
377
409
|
*/
|
|
378
|
-
__experimental_streamSteps?: boolean;
|
|
410
|
+
// __experimental_streamSteps?: boolean;
|
|
379
411
|
}
|
|
380
412
|
|
|
381
413
|
/**
|
|
@@ -894,6 +926,7 @@ export default class FirecrawlApp {
|
|
|
894
926
|
* @param pollInterval - Time in seconds for job status checks.
|
|
895
927
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
896
928
|
* @param webhook - Optional webhook for the batch scrape.
|
|
929
|
+
* @param ignoreInvalidURLs - Optional flag to ignore invalid URLs.
|
|
897
930
|
* @returns The response from the crawl operation.
|
|
898
931
|
*/
|
|
899
932
|
async batchScrapeUrls(
|
|
@@ -1416,7 +1449,7 @@ export default class FirecrawlApp {
|
|
|
1416
1449
|
*/
|
|
1417
1450
|
async deepResearch(
|
|
1418
1451
|
query: string,
|
|
1419
|
-
params: DeepResearchParams
|
|
1452
|
+
params: DeepResearchParams<zt.ZodSchema>,
|
|
1420
1453
|
onActivity?: (activity: {
|
|
1421
1454
|
type: string;
|
|
1422
1455
|
status: string;
|
|
@@ -1501,12 +1534,31 @@ export default class FirecrawlApp {
|
|
|
1501
1534
|
* @param params - Parameters for the deep research operation.
|
|
1502
1535
|
* @returns The response containing the research job ID.
|
|
1503
1536
|
*/
|
|
1504
|
-
async asyncDeepResearch(query: string, params: DeepResearchParams): Promise<DeepResearchResponse | ErrorResponse> {
|
|
1537
|
+
async asyncDeepResearch(query: string, params: DeepResearchParams<zt.ZodSchema>): Promise<DeepResearchResponse | ErrorResponse> {
|
|
1505
1538
|
const headers = this.prepareHeaders();
|
|
1539
|
+
let jsonData: any = { query, ...params };
|
|
1540
|
+
|
|
1541
|
+
if (jsonData?.jsonOptions?.schema) {
|
|
1542
|
+
let schema = jsonData.jsonOptions.schema;
|
|
1543
|
+
// Try parsing the schema as a Zod schema
|
|
1544
|
+
try {
|
|
1545
|
+
schema = zodToJsonSchema(schema);
|
|
1546
|
+
} catch (error) {
|
|
1547
|
+
|
|
1548
|
+
}
|
|
1549
|
+
jsonData = {
|
|
1550
|
+
...jsonData,
|
|
1551
|
+
jsonOptions: {
|
|
1552
|
+
...jsonData.jsonOptions,
|
|
1553
|
+
schema: schema,
|
|
1554
|
+
},
|
|
1555
|
+
};
|
|
1556
|
+
}
|
|
1557
|
+
|
|
1506
1558
|
try {
|
|
1507
1559
|
const response: AxiosResponse = await this.postRequest(
|
|
1508
1560
|
`${this.apiUrl}/v1/deep-research`,
|
|
1509
|
-
|
|
1561
|
+
jsonData,
|
|
1510
1562
|
headers
|
|
1511
1563
|
);
|
|
1512
1564
|
|
package/tsup.config.ts
CHANGED
|
@@ -6,4 +6,13 @@ export default defineConfig({
|
|
|
6
6
|
dts: true,
|
|
7
7
|
outDir: "dist",
|
|
8
8
|
clean: true,
|
|
9
|
+
platform: "node",
|
|
10
|
+
target: "node22",
|
|
11
|
+
noExternal: ["typescript-event-target"],
|
|
12
|
+
esbuildOptions(options) {
|
|
13
|
+
options.define = {
|
|
14
|
+
...options.define,
|
|
15
|
+
"process.env.NODE_ENV": JSON.stringify(process.env.NODE_ENV || "production"),
|
|
16
|
+
};
|
|
17
|
+
},
|
|
9
18
|
});
|