@mendable/firecrawl 1.20.1 → 1.21.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +17 -1
- package/dist/index.d.cts +36 -6
- package/dist/index.d.ts +36 -6
- package/dist/index.js +17 -1
- package/package.json +1 -1
- package/src/index.ts +59 -6
package/dist/index.cjs
CHANGED
|
@@ -419,6 +419,7 @@ var FirecrawlApp = class {
|
|
|
419
419
|
* @param pollInterval - Time in seconds for job status checks.
|
|
420
420
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
421
421
|
* @param webhook - Optional webhook for the batch scrape.
|
|
422
|
+
* @param ignoreInvalidURLs - Optional flag to ignore invalid URLs.
|
|
422
423
|
* @returns The response from the crawl operation.
|
|
423
424
|
*/
|
|
424
425
|
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
@@ -922,10 +923,25 @@ var FirecrawlApp = class {
|
|
|
922
923
|
*/
|
|
923
924
|
async asyncDeepResearch(query, params) {
|
|
924
925
|
const headers = this.prepareHeaders();
|
|
926
|
+
let jsonData = { query, ...params };
|
|
927
|
+
if (jsonData?.jsonOptions?.schema) {
|
|
928
|
+
let schema = jsonData.jsonOptions.schema;
|
|
929
|
+
try {
|
|
930
|
+
schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
|
|
931
|
+
} catch (error) {
|
|
932
|
+
}
|
|
933
|
+
jsonData = {
|
|
934
|
+
...jsonData,
|
|
935
|
+
jsonOptions: {
|
|
936
|
+
...jsonData.jsonOptions,
|
|
937
|
+
schema
|
|
938
|
+
}
|
|
939
|
+
};
|
|
940
|
+
}
|
|
925
941
|
try {
|
|
926
942
|
const response = await this.postRequest(
|
|
927
943
|
`${this.apiUrl}/v1/deep-research`,
|
|
928
|
-
|
|
944
|
+
jsonData,
|
|
929
945
|
headers
|
|
930
946
|
);
|
|
931
947
|
if (response.status === 200) {
|
package/dist/index.d.cts
CHANGED
|
@@ -65,6 +65,11 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
65
65
|
screenshot?: string;
|
|
66
66
|
metadata?: FirecrawlDocumentMetadata;
|
|
67
67
|
actions: ActionsSchema;
|
|
68
|
+
compare?: {
|
|
69
|
+
previousScrapeAt: string | null;
|
|
70
|
+
changeStatus: "new" | "same" | "changed" | "removed";
|
|
71
|
+
visibility: "visible" | "hidden";
|
|
72
|
+
};
|
|
68
73
|
title?: string;
|
|
69
74
|
description?: string;
|
|
70
75
|
}
|
|
@@ -73,7 +78,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
73
78
|
* Defines the options and configurations available for scraping web content.
|
|
74
79
|
*/
|
|
75
80
|
interface CrawlScrapeOptions {
|
|
76
|
-
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
|
|
81
|
+
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json" | "compare")[];
|
|
77
82
|
headers?: Record<string, string>;
|
|
78
83
|
includeTags?: string[];
|
|
79
84
|
excludeTags?: string[];
|
|
@@ -132,6 +137,14 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
|
|
|
132
137
|
}
|
|
133
138
|
interface ActionsResult {
|
|
134
139
|
screenshots: string[];
|
|
140
|
+
scrapes: ({
|
|
141
|
+
url: string;
|
|
142
|
+
html: string;
|
|
143
|
+
})[];
|
|
144
|
+
javascriptReturns: {
|
|
145
|
+
type: string;
|
|
146
|
+
value: unknown;
|
|
147
|
+
}[];
|
|
135
148
|
}
|
|
136
149
|
/**
|
|
137
150
|
* Response interface for scraping operations.
|
|
@@ -326,7 +339,7 @@ interface CrawlErrorsResponse {
|
|
|
326
339
|
* Parameters for deep research operations.
|
|
327
340
|
* Defines options for conducting deep research on a query.
|
|
328
341
|
*/
|
|
329
|
-
interface DeepResearchParams {
|
|
342
|
+
interface DeepResearchParams<LLMSchema extends zt.ZodSchema = any> {
|
|
330
343
|
/**
|
|
331
344
|
* Maximum depth of research iterations (1-10)
|
|
332
345
|
* @default 7
|
|
@@ -343,9 +356,25 @@ interface DeepResearchParams {
|
|
|
343
356
|
*/
|
|
344
357
|
maxUrls?: number;
|
|
345
358
|
/**
|
|
346
|
-
*
|
|
359
|
+
* The prompt to use for the final analysis
|
|
360
|
+
*/
|
|
361
|
+
analysisPrompt?: string;
|
|
362
|
+
/**
|
|
363
|
+
* The system prompt to use for the research agent
|
|
364
|
+
*/
|
|
365
|
+
systemPrompt?: string;
|
|
366
|
+
/**
|
|
367
|
+
* The formats to use for the final analysis
|
|
368
|
+
*/
|
|
369
|
+
formats?: ("markdown" | "json")[];
|
|
370
|
+
/**
|
|
371
|
+
* The JSON options to use for the final analysis
|
|
347
372
|
*/
|
|
348
|
-
|
|
373
|
+
jsonOptions?: {
|
|
374
|
+
prompt?: string;
|
|
375
|
+
schema?: LLMSchema;
|
|
376
|
+
systemPrompt?: string;
|
|
377
|
+
};
|
|
349
378
|
}
|
|
350
379
|
/**
|
|
351
380
|
* Response interface for deep research operations.
|
|
@@ -513,6 +542,7 @@ declare class FirecrawlApp {
|
|
|
513
542
|
* @param pollInterval - Time in seconds for job status checks.
|
|
514
543
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
515
544
|
* @param webhook - Optional webhook for the batch scrape.
|
|
545
|
+
* @param ignoreInvalidURLs - Optional flag to ignore invalid URLs.
|
|
516
546
|
* @returns The response from the crawl operation.
|
|
517
547
|
*/
|
|
518
548
|
batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
@@ -614,7 +644,7 @@ declare class FirecrawlApp {
|
|
|
614
644
|
* @param onSource - Optional callback to receive source updates in real-time.
|
|
615
645
|
* @returns The final research results.
|
|
616
646
|
*/
|
|
617
|
-
deepResearch(query: string, params: DeepResearchParams
|
|
647
|
+
deepResearch(query: string, params: DeepResearchParams<zt.ZodSchema>, onActivity?: (activity: {
|
|
618
648
|
type: string;
|
|
619
649
|
status: string;
|
|
620
650
|
message: string;
|
|
@@ -631,7 +661,7 @@ declare class FirecrawlApp {
|
|
|
631
661
|
* @param params - Parameters for the deep research operation.
|
|
632
662
|
* @returns The response containing the research job ID.
|
|
633
663
|
*/
|
|
634
|
-
asyncDeepResearch(query: string, params: DeepResearchParams): Promise<DeepResearchResponse | ErrorResponse>;
|
|
664
|
+
asyncDeepResearch(query: string, params: DeepResearchParams<zt.ZodSchema>): Promise<DeepResearchResponse | ErrorResponse>;
|
|
635
665
|
/**
|
|
636
666
|
* Checks the status of a deep research operation.
|
|
637
667
|
* @param id - The ID of the deep research operation.
|
package/dist/index.d.ts
CHANGED
|
@@ -65,6 +65,11 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
65
65
|
screenshot?: string;
|
|
66
66
|
metadata?: FirecrawlDocumentMetadata;
|
|
67
67
|
actions: ActionsSchema;
|
|
68
|
+
compare?: {
|
|
69
|
+
previousScrapeAt: string | null;
|
|
70
|
+
changeStatus: "new" | "same" | "changed" | "removed";
|
|
71
|
+
visibility: "visible" | "hidden";
|
|
72
|
+
};
|
|
68
73
|
title?: string;
|
|
69
74
|
description?: string;
|
|
70
75
|
}
|
|
@@ -73,7 +78,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
73
78
|
* Defines the options and configurations available for scraping web content.
|
|
74
79
|
*/
|
|
75
80
|
interface CrawlScrapeOptions {
|
|
76
|
-
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
|
|
81
|
+
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json" | "compare")[];
|
|
77
82
|
headers?: Record<string, string>;
|
|
78
83
|
includeTags?: string[];
|
|
79
84
|
excludeTags?: string[];
|
|
@@ -132,6 +137,14 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
|
|
|
132
137
|
}
|
|
133
138
|
interface ActionsResult {
|
|
134
139
|
screenshots: string[];
|
|
140
|
+
scrapes: ({
|
|
141
|
+
url: string;
|
|
142
|
+
html: string;
|
|
143
|
+
})[];
|
|
144
|
+
javascriptReturns: {
|
|
145
|
+
type: string;
|
|
146
|
+
value: unknown;
|
|
147
|
+
}[];
|
|
135
148
|
}
|
|
136
149
|
/**
|
|
137
150
|
* Response interface for scraping operations.
|
|
@@ -326,7 +339,7 @@ interface CrawlErrorsResponse {
|
|
|
326
339
|
* Parameters for deep research operations.
|
|
327
340
|
* Defines options for conducting deep research on a query.
|
|
328
341
|
*/
|
|
329
|
-
interface DeepResearchParams {
|
|
342
|
+
interface DeepResearchParams<LLMSchema extends zt.ZodSchema = any> {
|
|
330
343
|
/**
|
|
331
344
|
* Maximum depth of research iterations (1-10)
|
|
332
345
|
* @default 7
|
|
@@ -343,9 +356,25 @@ interface DeepResearchParams {
|
|
|
343
356
|
*/
|
|
344
357
|
maxUrls?: number;
|
|
345
358
|
/**
|
|
346
|
-
*
|
|
359
|
+
* The prompt to use for the final analysis
|
|
360
|
+
*/
|
|
361
|
+
analysisPrompt?: string;
|
|
362
|
+
/**
|
|
363
|
+
* The system prompt to use for the research agent
|
|
364
|
+
*/
|
|
365
|
+
systemPrompt?: string;
|
|
366
|
+
/**
|
|
367
|
+
* The formats to use for the final analysis
|
|
368
|
+
*/
|
|
369
|
+
formats?: ("markdown" | "json")[];
|
|
370
|
+
/**
|
|
371
|
+
* The JSON options to use for the final analysis
|
|
347
372
|
*/
|
|
348
|
-
|
|
373
|
+
jsonOptions?: {
|
|
374
|
+
prompt?: string;
|
|
375
|
+
schema?: LLMSchema;
|
|
376
|
+
systemPrompt?: string;
|
|
377
|
+
};
|
|
349
378
|
}
|
|
350
379
|
/**
|
|
351
380
|
* Response interface for deep research operations.
|
|
@@ -513,6 +542,7 @@ declare class FirecrawlApp {
|
|
|
513
542
|
* @param pollInterval - Time in seconds for job status checks.
|
|
514
543
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
515
544
|
* @param webhook - Optional webhook for the batch scrape.
|
|
545
|
+
* @param ignoreInvalidURLs - Optional flag to ignore invalid URLs.
|
|
516
546
|
* @returns The response from the crawl operation.
|
|
517
547
|
*/
|
|
518
548
|
batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
@@ -614,7 +644,7 @@ declare class FirecrawlApp {
|
|
|
614
644
|
* @param onSource - Optional callback to receive source updates in real-time.
|
|
615
645
|
* @returns The final research results.
|
|
616
646
|
*/
|
|
617
|
-
deepResearch(query: string, params: DeepResearchParams
|
|
647
|
+
deepResearch(query: string, params: DeepResearchParams<zt.ZodSchema>, onActivity?: (activity: {
|
|
618
648
|
type: string;
|
|
619
649
|
status: string;
|
|
620
650
|
message: string;
|
|
@@ -631,7 +661,7 @@ declare class FirecrawlApp {
|
|
|
631
661
|
* @param params - Parameters for the deep research operation.
|
|
632
662
|
* @returns The response containing the research job ID.
|
|
633
663
|
*/
|
|
634
|
-
asyncDeepResearch(query: string, params: DeepResearchParams): Promise<DeepResearchResponse | ErrorResponse>;
|
|
664
|
+
asyncDeepResearch(query: string, params: DeepResearchParams<zt.ZodSchema>): Promise<DeepResearchResponse | ErrorResponse>;
|
|
635
665
|
/**
|
|
636
666
|
* Checks the status of a deep research operation.
|
|
637
667
|
* @param id - The ID of the deep research operation.
|
package/dist/index.js
CHANGED
|
@@ -383,6 +383,7 @@ var FirecrawlApp = class {
|
|
|
383
383
|
* @param pollInterval - Time in seconds for job status checks.
|
|
384
384
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
385
385
|
* @param webhook - Optional webhook for the batch scrape.
|
|
386
|
+
* @param ignoreInvalidURLs - Optional flag to ignore invalid URLs.
|
|
386
387
|
* @returns The response from the crawl operation.
|
|
387
388
|
*/
|
|
388
389
|
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
@@ -886,10 +887,25 @@ var FirecrawlApp = class {
|
|
|
886
887
|
*/
|
|
887
888
|
async asyncDeepResearch(query, params) {
|
|
888
889
|
const headers = this.prepareHeaders();
|
|
890
|
+
let jsonData = { query, ...params };
|
|
891
|
+
if (jsonData?.jsonOptions?.schema) {
|
|
892
|
+
let schema = jsonData.jsonOptions.schema;
|
|
893
|
+
try {
|
|
894
|
+
schema = zodToJsonSchema(schema);
|
|
895
|
+
} catch (error) {
|
|
896
|
+
}
|
|
897
|
+
jsonData = {
|
|
898
|
+
...jsonData,
|
|
899
|
+
jsonOptions: {
|
|
900
|
+
...jsonData.jsonOptions,
|
|
901
|
+
schema
|
|
902
|
+
}
|
|
903
|
+
};
|
|
904
|
+
}
|
|
889
905
|
try {
|
|
890
906
|
const response = await this.postRequest(
|
|
891
907
|
`${this.apiUrl}/v1/deep-research`,
|
|
892
|
-
|
|
908
|
+
jsonData,
|
|
893
909
|
headers
|
|
894
910
|
);
|
|
895
911
|
if (response.status === 200) {
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -69,6 +69,11 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
|
|
|
69
69
|
screenshot?: string;
|
|
70
70
|
metadata?: FirecrawlDocumentMetadata;
|
|
71
71
|
actions: ActionsSchema;
|
|
72
|
+
compare?: {
|
|
73
|
+
previousScrapeAt: string | null;
|
|
74
|
+
changeStatus: "new" | "same" | "changed" | "removed";
|
|
75
|
+
visibility: "visible" | "hidden";
|
|
76
|
+
};
|
|
72
77
|
// v1 search only
|
|
73
78
|
title?: string;
|
|
74
79
|
description?: string;
|
|
@@ -79,7 +84,7 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
|
|
|
79
84
|
* Defines the options and configurations available for scraping web content.
|
|
80
85
|
*/
|
|
81
86
|
export interface CrawlScrapeOptions {
|
|
82
|
-
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
|
|
87
|
+
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json" | "compare")[];
|
|
83
88
|
headers?: Record<string, string>;
|
|
84
89
|
includeTags?: string[];
|
|
85
90
|
excludeTags?: string[];
|
|
@@ -141,6 +146,14 @@ export interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchem
|
|
|
141
146
|
|
|
142
147
|
export interface ActionsResult {
|
|
143
148
|
screenshots: string[];
|
|
149
|
+
scrapes: ({
|
|
150
|
+
url: string;
|
|
151
|
+
html: string;
|
|
152
|
+
})[];
|
|
153
|
+
javascriptReturns: {
|
|
154
|
+
type: string;
|
|
155
|
+
value: unknown
|
|
156
|
+
}[];
|
|
144
157
|
}
|
|
145
158
|
|
|
146
159
|
/**
|
|
@@ -356,7 +369,7 @@ export interface CrawlErrorsResponse {
|
|
|
356
369
|
* Parameters for deep research operations.
|
|
357
370
|
* Defines options for conducting deep research on a query.
|
|
358
371
|
*/
|
|
359
|
-
export interface DeepResearchParams {
|
|
372
|
+
export interface DeepResearchParams<LLMSchema extends zt.ZodSchema = any> {
|
|
360
373
|
/**
|
|
361
374
|
* Maximum depth of research iterations (1-10)
|
|
362
375
|
* @default 7
|
|
@@ -373,9 +386,29 @@ export interface DeepResearchParams {
|
|
|
373
386
|
*/
|
|
374
387
|
maxUrls?: number;
|
|
375
388
|
/**
|
|
389
|
+
* The prompt to use for the final analysis
|
|
390
|
+
*/
|
|
391
|
+
analysisPrompt?: string;
|
|
392
|
+
/**
|
|
393
|
+
* The system prompt to use for the research agent
|
|
394
|
+
*/
|
|
395
|
+
systemPrompt?: string;
|
|
396
|
+
/**
|
|
397
|
+
* The formats to use for the final analysis
|
|
398
|
+
*/
|
|
399
|
+
formats?: ("markdown" | "json")[];
|
|
400
|
+
/**
|
|
401
|
+
* The JSON options to use for the final analysis
|
|
402
|
+
*/
|
|
403
|
+
jsonOptions?:{
|
|
404
|
+
prompt?: string;
|
|
405
|
+
schema?: LLMSchema;
|
|
406
|
+
systemPrompt?: string;
|
|
407
|
+
};
|
|
408
|
+
/**
|
|
376
409
|
* Experimental flag for streaming steps
|
|
377
410
|
*/
|
|
378
|
-
__experimental_streamSteps?: boolean;
|
|
411
|
+
// __experimental_streamSteps?: boolean;
|
|
379
412
|
}
|
|
380
413
|
|
|
381
414
|
/**
|
|
@@ -894,6 +927,7 @@ export default class FirecrawlApp {
|
|
|
894
927
|
* @param pollInterval - Time in seconds for job status checks.
|
|
895
928
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
896
929
|
* @param webhook - Optional webhook for the batch scrape.
|
|
930
|
+
* @param ignoreInvalidURLs - Optional flag to ignore invalid URLs.
|
|
897
931
|
* @returns The response from the crawl operation.
|
|
898
932
|
*/
|
|
899
933
|
async batchScrapeUrls(
|
|
@@ -1416,7 +1450,7 @@ export default class FirecrawlApp {
|
|
|
1416
1450
|
*/
|
|
1417
1451
|
async deepResearch(
|
|
1418
1452
|
query: string,
|
|
1419
|
-
params: DeepResearchParams
|
|
1453
|
+
params: DeepResearchParams<zt.ZodSchema>,
|
|
1420
1454
|
onActivity?: (activity: {
|
|
1421
1455
|
type: string;
|
|
1422
1456
|
status: string;
|
|
@@ -1501,12 +1535,31 @@ export default class FirecrawlApp {
|
|
|
1501
1535
|
* @param params - Parameters for the deep research operation.
|
|
1502
1536
|
* @returns The response containing the research job ID.
|
|
1503
1537
|
*/
|
|
1504
|
-
async asyncDeepResearch(query: string, params: DeepResearchParams): Promise<DeepResearchResponse | ErrorResponse> {
|
|
1538
|
+
async asyncDeepResearch(query: string, params: DeepResearchParams<zt.ZodSchema>): Promise<DeepResearchResponse | ErrorResponse> {
|
|
1505
1539
|
const headers = this.prepareHeaders();
|
|
1540
|
+
let jsonData: any = { query, ...params };
|
|
1541
|
+
|
|
1542
|
+
if (jsonData?.jsonOptions?.schema) {
|
|
1543
|
+
let schema = jsonData.jsonOptions.schema;
|
|
1544
|
+
// Try parsing the schema as a Zod schema
|
|
1545
|
+
try {
|
|
1546
|
+
schema = zodToJsonSchema(schema);
|
|
1547
|
+
} catch (error) {
|
|
1548
|
+
|
|
1549
|
+
}
|
|
1550
|
+
jsonData = {
|
|
1551
|
+
...jsonData,
|
|
1552
|
+
jsonOptions: {
|
|
1553
|
+
...jsonData.jsonOptions,
|
|
1554
|
+
schema: schema,
|
|
1555
|
+
},
|
|
1556
|
+
};
|
|
1557
|
+
}
|
|
1558
|
+
|
|
1506
1559
|
try {
|
|
1507
1560
|
const response: AxiosResponse = await this.postRequest(
|
|
1508
1561
|
`${this.apiUrl}/v1/deep-research`,
|
|
1509
|
-
|
|
1562
|
+
jsonData,
|
|
1510
1563
|
headers
|
|
1511
1564
|
);
|
|
1512
1565
|
|