@mendable/firecrawl 1.19.1 → 1.20.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +8 -3
- package/dist/index.d.cts +4 -2
- package/dist/index.d.ts +4 -2
- package/dist/index.js +8 -3
- package/package.json +1 -1
- package/src/index.ts +12 -5
package/dist/index.cjs
CHANGED
|
@@ -607,7 +607,7 @@ var FirecrawlApp = class {
|
|
|
607
607
|
/**
|
|
608
608
|
* Extracts information from URLs using the Firecrawl API.
|
|
609
609
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
610
|
-
* @param
|
|
610
|
+
* @param urls - The URLs to extract information from. Optional if using other methods for data extraction.
|
|
611
611
|
* @param params - Additional parameters for the extract request.
|
|
612
612
|
* @returns The response from the extract operation.
|
|
613
613
|
*/
|
|
@@ -788,12 +788,14 @@ var FirecrawlApp = class {
|
|
|
788
788
|
*/
|
|
789
789
|
async monitorJobStatus(id, headers, checkInterval) {
|
|
790
790
|
try {
|
|
791
|
+
let failedTries = 0;
|
|
791
792
|
while (true) {
|
|
792
793
|
let statusResponse = await this.getRequest(
|
|
793
794
|
`${this.apiUrl}/v1/crawl/${id}`,
|
|
794
795
|
headers
|
|
795
796
|
);
|
|
796
797
|
if (statusResponse.status === 200) {
|
|
798
|
+
failedTries = 0;
|
|
797
799
|
let statusData = statusResponse.data;
|
|
798
800
|
if (statusData.status === "completed") {
|
|
799
801
|
if ("data" in statusData) {
|
|
@@ -823,7 +825,10 @@ var FirecrawlApp = class {
|
|
|
823
825
|
);
|
|
824
826
|
}
|
|
825
827
|
} else {
|
|
826
|
-
|
|
828
|
+
failedTries++;
|
|
829
|
+
if (failedTries >= 3) {
|
|
830
|
+
this.handleError(statusResponse, "check crawl status");
|
|
831
|
+
}
|
|
827
832
|
}
|
|
828
833
|
}
|
|
829
834
|
} catch (error) {
|
|
@@ -836,7 +841,7 @@ var FirecrawlApp = class {
|
|
|
836
841
|
* @param {string} action - The action being performed when the error occurred.
|
|
837
842
|
*/
|
|
838
843
|
handleError(response, action) {
|
|
839
|
-
if ([400, 402, 408, 409, 500].includes(response.status)) {
|
|
844
|
+
if ([400, 402, 403, 408, 409, 500].includes(response.status)) {
|
|
840
845
|
const errorMessage = response.data.error || "Unknown error occurred";
|
|
841
846
|
const details = response.data.details ? ` - ${JSON.stringify(response.data.details)}` : "";
|
|
842
847
|
throw new FirecrawlError(
|
package/dist/index.d.cts
CHANGED
|
@@ -97,6 +97,7 @@ type Action = {
|
|
|
97
97
|
} | {
|
|
98
98
|
type: "click";
|
|
99
99
|
selector: string;
|
|
100
|
+
all?: boolean;
|
|
100
101
|
} | {
|
|
101
102
|
type: "screenshot";
|
|
102
103
|
fullPage?: boolean;
|
|
@@ -149,6 +150,7 @@ interface CrawlParams {
|
|
|
149
150
|
includePaths?: string[];
|
|
150
151
|
excludePaths?: string[];
|
|
151
152
|
maxDepth?: number;
|
|
153
|
+
maxDiscoveryDepth?: number;
|
|
152
154
|
limit?: number;
|
|
153
155
|
allowBackwardLinks?: boolean;
|
|
154
156
|
allowExternalLinks?: boolean;
|
|
@@ -542,11 +544,11 @@ declare class FirecrawlApp {
|
|
|
542
544
|
/**
|
|
543
545
|
* Extracts information from URLs using the Firecrawl API.
|
|
544
546
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
545
|
-
* @param
|
|
547
|
+
* @param urls - The URLs to extract information from. Optional if using other methods for data extraction.
|
|
546
548
|
* @param params - Additional parameters for the extract request.
|
|
547
549
|
* @returns The response from the extract operation.
|
|
548
550
|
*/
|
|
549
|
-
extract<T extends zt.ZodSchema = any>(urls
|
|
551
|
+
extract<T extends zt.ZodSchema = any>(urls?: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
|
|
550
552
|
/**
|
|
551
553
|
* Initiates an asynchronous extract job for a URL using the Firecrawl API.
|
|
552
554
|
* @param url - The URL to extract data from.
|
package/dist/index.d.ts
CHANGED
|
@@ -97,6 +97,7 @@ type Action = {
|
|
|
97
97
|
} | {
|
|
98
98
|
type: "click";
|
|
99
99
|
selector: string;
|
|
100
|
+
all?: boolean;
|
|
100
101
|
} | {
|
|
101
102
|
type: "screenshot";
|
|
102
103
|
fullPage?: boolean;
|
|
@@ -149,6 +150,7 @@ interface CrawlParams {
|
|
|
149
150
|
includePaths?: string[];
|
|
150
151
|
excludePaths?: string[];
|
|
151
152
|
maxDepth?: number;
|
|
153
|
+
maxDiscoveryDepth?: number;
|
|
152
154
|
limit?: number;
|
|
153
155
|
allowBackwardLinks?: boolean;
|
|
154
156
|
allowExternalLinks?: boolean;
|
|
@@ -542,11 +544,11 @@ declare class FirecrawlApp {
|
|
|
542
544
|
/**
|
|
543
545
|
* Extracts information from URLs using the Firecrawl API.
|
|
544
546
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
545
|
-
* @param
|
|
547
|
+
* @param urls - The URLs to extract information from. Optional if using other methods for data extraction.
|
|
546
548
|
* @param params - Additional parameters for the extract request.
|
|
547
549
|
* @returns The response from the extract operation.
|
|
548
550
|
*/
|
|
549
|
-
extract<T extends zt.ZodSchema = any>(urls
|
|
551
|
+
extract<T extends zt.ZodSchema = any>(urls?: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
|
|
550
552
|
/**
|
|
551
553
|
* Initiates an asynchronous extract job for a URL using the Firecrawl API.
|
|
552
554
|
* @param url - The URL to extract data from.
|
package/dist/index.js
CHANGED
|
@@ -571,7 +571,7 @@ var FirecrawlApp = class {
|
|
|
571
571
|
/**
|
|
572
572
|
* Extracts information from URLs using the Firecrawl API.
|
|
573
573
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
574
|
-
* @param
|
|
574
|
+
* @param urls - The URLs to extract information from. Optional if using other methods for data extraction.
|
|
575
575
|
* @param params - Additional parameters for the extract request.
|
|
576
576
|
* @returns The response from the extract operation.
|
|
577
577
|
*/
|
|
@@ -752,12 +752,14 @@ var FirecrawlApp = class {
|
|
|
752
752
|
*/
|
|
753
753
|
async monitorJobStatus(id, headers, checkInterval) {
|
|
754
754
|
try {
|
|
755
|
+
let failedTries = 0;
|
|
755
756
|
while (true) {
|
|
756
757
|
let statusResponse = await this.getRequest(
|
|
757
758
|
`${this.apiUrl}/v1/crawl/${id}`,
|
|
758
759
|
headers
|
|
759
760
|
);
|
|
760
761
|
if (statusResponse.status === 200) {
|
|
762
|
+
failedTries = 0;
|
|
761
763
|
let statusData = statusResponse.data;
|
|
762
764
|
if (statusData.status === "completed") {
|
|
763
765
|
if ("data" in statusData) {
|
|
@@ -787,7 +789,10 @@ var FirecrawlApp = class {
|
|
|
787
789
|
);
|
|
788
790
|
}
|
|
789
791
|
} else {
|
|
790
|
-
|
|
792
|
+
failedTries++;
|
|
793
|
+
if (failedTries >= 3) {
|
|
794
|
+
this.handleError(statusResponse, "check crawl status");
|
|
795
|
+
}
|
|
791
796
|
}
|
|
792
797
|
}
|
|
793
798
|
} catch (error) {
|
|
@@ -800,7 +805,7 @@ var FirecrawlApp = class {
|
|
|
800
805
|
* @param {string} action - The action being performed when the error occurred.
|
|
801
806
|
*/
|
|
802
807
|
handleError(response, action) {
|
|
803
|
-
if ([400, 402, 408, 409, 500].includes(response.status)) {
|
|
808
|
+
if ([400, 402, 403, 408, 409, 500].includes(response.status)) {
|
|
804
809
|
const errorMessage = response.data.error || "Unknown error occurred";
|
|
805
810
|
const details = response.data.details ? ` - ${JSON.stringify(response.data.details)}` : "";
|
|
806
811
|
throw new FirecrawlError(
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -104,6 +104,7 @@ export type Action = {
|
|
|
104
104
|
} | {
|
|
105
105
|
type: "click",
|
|
106
106
|
selector: string,
|
|
107
|
+
all?: boolean,
|
|
107
108
|
} | {
|
|
108
109
|
type: "screenshot",
|
|
109
110
|
fullPage?: boolean,
|
|
@@ -160,6 +161,7 @@ export interface CrawlParams {
|
|
|
160
161
|
includePaths?: string[];
|
|
161
162
|
excludePaths?: string[];
|
|
162
163
|
maxDepth?: number;
|
|
164
|
+
maxDiscoveryDepth?: number;
|
|
163
165
|
limit?: number;
|
|
164
166
|
allowBackwardLinks?: boolean;
|
|
165
167
|
allowExternalLinks?: boolean;
|
|
@@ -1118,14 +1120,14 @@ export default class FirecrawlApp {
|
|
|
1118
1120
|
/**
|
|
1119
1121
|
* Extracts information from URLs using the Firecrawl API.
|
|
1120
1122
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
1121
|
-
* @param
|
|
1123
|
+
* @param urls - The URLs to extract information from. Optional if using other methods for data extraction.
|
|
1122
1124
|
* @param params - Additional parameters for the extract request.
|
|
1123
1125
|
* @returns The response from the extract operation.
|
|
1124
1126
|
*/
|
|
1125
|
-
async extract<T extends zt.ZodSchema = any>(urls
|
|
1127
|
+
async extract<T extends zt.ZodSchema = any>(urls?: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse> {
|
|
1126
1128
|
const headers = this.prepareHeaders();
|
|
1127
1129
|
|
|
1128
|
-
let jsonData: { urls
|
|
1130
|
+
let jsonData: { urls?: string[] } & ExtractParams<T> = { urls: urls, ...params };
|
|
1129
1131
|
let jsonSchema: any;
|
|
1130
1132
|
try {
|
|
1131
1133
|
if (!params?.schema) {
|
|
@@ -1331,12 +1333,14 @@ export default class FirecrawlApp {
|
|
|
1331
1333
|
checkInterval: number
|
|
1332
1334
|
): Promise<CrawlStatusResponse | ErrorResponse> {
|
|
1333
1335
|
try {
|
|
1336
|
+
let failedTries = 0;
|
|
1334
1337
|
while (true) {
|
|
1335
1338
|
let statusResponse: AxiosResponse = await this.getRequest(
|
|
1336
1339
|
`${this.apiUrl}/v1/crawl/${id}`,
|
|
1337
1340
|
headers
|
|
1338
1341
|
);
|
|
1339
1342
|
if (statusResponse.status === 200) {
|
|
1343
|
+
failedTries = 0;
|
|
1340
1344
|
let statusData = statusResponse.data;
|
|
1341
1345
|
if (statusData.status === "completed") {
|
|
1342
1346
|
if ("data" in statusData) {
|
|
@@ -1368,7 +1372,10 @@ export default class FirecrawlApp {
|
|
|
1368
1372
|
);
|
|
1369
1373
|
}
|
|
1370
1374
|
} else {
|
|
1371
|
-
|
|
1375
|
+
failedTries++;
|
|
1376
|
+
if (failedTries >= 3) {
|
|
1377
|
+
this.handleError(statusResponse, "check crawl status");
|
|
1378
|
+
}
|
|
1372
1379
|
}
|
|
1373
1380
|
}
|
|
1374
1381
|
} catch (error: any) {
|
|
@@ -1382,7 +1389,7 @@ export default class FirecrawlApp {
|
|
|
1382
1389
|
* @param {string} action - The action being performed when the error occurred.
|
|
1383
1390
|
*/
|
|
1384
1391
|
handleError(response: AxiosResponse, action: string): void {
|
|
1385
|
-
if ([400, 402, 408, 409, 500].includes(response.status)) {
|
|
1392
|
+
if ([400, 402, 403, 408, 409, 500].includes(response.status)) {
|
|
1386
1393
|
const errorMessage: string =
|
|
1387
1394
|
response.data.error || "Unknown error occurred";
|
|
1388
1395
|
const details = response.data.details ? ` - ${JSON.stringify(response.data.details)}` : '';
|