firecrawl 1.19.1 → 1.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +7 -2
- package/dist/index.d.cts +3 -2
- package/dist/index.d.ts +3 -2
- package/dist/index.js +7 -2
- package/dump.rdb +0 -0
- package/package.json +1 -1
- package/src/index.ts +10 -4
package/dist/index.cjs
CHANGED
|
@@ -607,7 +607,7 @@ var FirecrawlApp = class {
|
|
|
607
607
|
/**
|
|
608
608
|
* Extracts information from URLs using the Firecrawl API.
|
|
609
609
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
610
|
-
* @param
|
|
610
|
+
* @param urls - The URLs to extract information from. Optional if using other methods for data extraction.
|
|
611
611
|
* @param params - Additional parameters for the extract request.
|
|
612
612
|
* @returns The response from the extract operation.
|
|
613
613
|
*/
|
|
@@ -788,12 +788,14 @@ var FirecrawlApp = class {
|
|
|
788
788
|
*/
|
|
789
789
|
async monitorJobStatus(id, headers, checkInterval) {
|
|
790
790
|
try {
|
|
791
|
+
let failedTries = 0;
|
|
791
792
|
while (true) {
|
|
792
793
|
let statusResponse = await this.getRequest(
|
|
793
794
|
`${this.apiUrl}/v1/crawl/${id}`,
|
|
794
795
|
headers
|
|
795
796
|
);
|
|
796
797
|
if (statusResponse.status === 200) {
|
|
798
|
+
failedTries = 0;
|
|
797
799
|
let statusData = statusResponse.data;
|
|
798
800
|
if (statusData.status === "completed") {
|
|
799
801
|
if ("data" in statusData) {
|
|
@@ -823,7 +825,10 @@ var FirecrawlApp = class {
|
|
|
823
825
|
);
|
|
824
826
|
}
|
|
825
827
|
} else {
|
|
826
|
-
|
|
828
|
+
failedTries++;
|
|
829
|
+
if (failedTries >= 3) {
|
|
830
|
+
this.handleError(statusResponse, "check crawl status");
|
|
831
|
+
}
|
|
827
832
|
}
|
|
828
833
|
}
|
|
829
834
|
} catch (error) {
|
package/dist/index.d.cts
CHANGED
|
@@ -149,6 +149,7 @@ interface CrawlParams {
|
|
|
149
149
|
includePaths?: string[];
|
|
150
150
|
excludePaths?: string[];
|
|
151
151
|
maxDepth?: number;
|
|
152
|
+
maxDiscoveryDepth?: number;
|
|
152
153
|
limit?: number;
|
|
153
154
|
allowBackwardLinks?: boolean;
|
|
154
155
|
allowExternalLinks?: boolean;
|
|
@@ -542,11 +543,11 @@ declare class FirecrawlApp {
|
|
|
542
543
|
/**
|
|
543
544
|
* Extracts information from URLs using the Firecrawl API.
|
|
544
545
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
545
|
-
* @param
|
|
546
|
+
* @param urls - The URLs to extract information from. Optional if using other methods for data extraction.
|
|
546
547
|
* @param params - Additional parameters for the extract request.
|
|
547
548
|
* @returns The response from the extract operation.
|
|
548
549
|
*/
|
|
549
|
-
extract<T extends zt.ZodSchema = any>(urls
|
|
550
|
+
extract<T extends zt.ZodSchema = any>(urls?: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
|
|
550
551
|
/**
|
|
551
552
|
* Initiates an asynchronous extract job for a URL using the Firecrawl API.
|
|
552
553
|
* @param url - The URL to extract data from.
|
package/dist/index.d.ts
CHANGED
|
@@ -149,6 +149,7 @@ interface CrawlParams {
|
|
|
149
149
|
includePaths?: string[];
|
|
150
150
|
excludePaths?: string[];
|
|
151
151
|
maxDepth?: number;
|
|
152
|
+
maxDiscoveryDepth?: number;
|
|
152
153
|
limit?: number;
|
|
153
154
|
allowBackwardLinks?: boolean;
|
|
154
155
|
allowExternalLinks?: boolean;
|
|
@@ -542,11 +543,11 @@ declare class FirecrawlApp {
|
|
|
542
543
|
/**
|
|
543
544
|
* Extracts information from URLs using the Firecrawl API.
|
|
544
545
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
545
|
-
* @param
|
|
546
|
+
* @param urls - The URLs to extract information from. Optional if using other methods for data extraction.
|
|
546
547
|
* @param params - Additional parameters for the extract request.
|
|
547
548
|
* @returns The response from the extract operation.
|
|
548
549
|
*/
|
|
549
|
-
extract<T extends zt.ZodSchema = any>(urls
|
|
550
|
+
extract<T extends zt.ZodSchema = any>(urls?: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
|
|
550
551
|
/**
|
|
551
552
|
* Initiates an asynchronous extract job for a URL using the Firecrawl API.
|
|
552
553
|
* @param url - The URL to extract data from.
|
package/dist/index.js
CHANGED
|
@@ -571,7 +571,7 @@ var FirecrawlApp = class {
|
|
|
571
571
|
/**
|
|
572
572
|
* Extracts information from URLs using the Firecrawl API.
|
|
573
573
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
574
|
-
* @param
|
|
574
|
+
* @param urls - The URLs to extract information from. Optional if using other methods for data extraction.
|
|
575
575
|
* @param params - Additional parameters for the extract request.
|
|
576
576
|
* @returns The response from the extract operation.
|
|
577
577
|
*/
|
|
@@ -752,12 +752,14 @@ var FirecrawlApp = class {
|
|
|
752
752
|
*/
|
|
753
753
|
async monitorJobStatus(id, headers, checkInterval) {
|
|
754
754
|
try {
|
|
755
|
+
let failedTries = 0;
|
|
755
756
|
while (true) {
|
|
756
757
|
let statusResponse = await this.getRequest(
|
|
757
758
|
`${this.apiUrl}/v1/crawl/${id}`,
|
|
758
759
|
headers
|
|
759
760
|
);
|
|
760
761
|
if (statusResponse.status === 200) {
|
|
762
|
+
failedTries = 0;
|
|
761
763
|
let statusData = statusResponse.data;
|
|
762
764
|
if (statusData.status === "completed") {
|
|
763
765
|
if ("data" in statusData) {
|
|
@@ -787,7 +789,10 @@ var FirecrawlApp = class {
|
|
|
787
789
|
);
|
|
788
790
|
}
|
|
789
791
|
} else {
|
|
790
|
-
|
|
792
|
+
failedTries++;
|
|
793
|
+
if (failedTries >= 3) {
|
|
794
|
+
this.handleError(statusResponse, "check crawl status");
|
|
795
|
+
}
|
|
791
796
|
}
|
|
792
797
|
}
|
|
793
798
|
} catch (error) {
|
package/dump.rdb
ADDED
|
Binary file
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -160,6 +160,7 @@ export interface CrawlParams {
|
|
|
160
160
|
includePaths?: string[];
|
|
161
161
|
excludePaths?: string[];
|
|
162
162
|
maxDepth?: number;
|
|
163
|
+
maxDiscoveryDepth?: number;
|
|
163
164
|
limit?: number;
|
|
164
165
|
allowBackwardLinks?: boolean;
|
|
165
166
|
allowExternalLinks?: boolean;
|
|
@@ -1118,14 +1119,14 @@ export default class FirecrawlApp {
|
|
|
1118
1119
|
/**
|
|
1119
1120
|
* Extracts information from URLs using the Firecrawl API.
|
|
1120
1121
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
1121
|
-
* @param
|
|
1122
|
+
* @param urls - The URLs to extract information from. Optional if using other methods for data extraction.
|
|
1122
1123
|
* @param params - Additional parameters for the extract request.
|
|
1123
1124
|
* @returns The response from the extract operation.
|
|
1124
1125
|
*/
|
|
1125
|
-
async extract<T extends zt.ZodSchema = any>(urls
|
|
1126
|
+
async extract<T extends zt.ZodSchema = any>(urls?: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse> {
|
|
1126
1127
|
const headers = this.prepareHeaders();
|
|
1127
1128
|
|
|
1128
|
-
let jsonData: { urls
|
|
1129
|
+
let jsonData: { urls?: string[] } & ExtractParams<T> = { urls: urls, ...params };
|
|
1129
1130
|
let jsonSchema: any;
|
|
1130
1131
|
try {
|
|
1131
1132
|
if (!params?.schema) {
|
|
@@ -1331,12 +1332,14 @@ export default class FirecrawlApp {
|
|
|
1331
1332
|
checkInterval: number
|
|
1332
1333
|
): Promise<CrawlStatusResponse | ErrorResponse> {
|
|
1333
1334
|
try {
|
|
1335
|
+
let failedTries = 0;
|
|
1334
1336
|
while (true) {
|
|
1335
1337
|
let statusResponse: AxiosResponse = await this.getRequest(
|
|
1336
1338
|
`${this.apiUrl}/v1/crawl/${id}`,
|
|
1337
1339
|
headers
|
|
1338
1340
|
);
|
|
1339
1341
|
if (statusResponse.status === 200) {
|
|
1342
|
+
failedTries = 0;
|
|
1340
1343
|
let statusData = statusResponse.data;
|
|
1341
1344
|
if (statusData.status === "completed") {
|
|
1342
1345
|
if ("data" in statusData) {
|
|
@@ -1368,7 +1371,10 @@ export default class FirecrawlApp {
|
|
|
1368
1371
|
);
|
|
1369
1372
|
}
|
|
1370
1373
|
} else {
|
|
1371
|
-
|
|
1374
|
+
failedTries++;
|
|
1375
|
+
if (failedTries >= 3) {
|
|
1376
|
+
this.handleError(statusResponse, "check crawl status");
|
|
1377
|
+
}
|
|
1372
1378
|
}
|
|
1373
1379
|
}
|
|
1374
1380
|
} catch (error: any) {
|