@mendable/firecrawl 1.19.0 → 1.19.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs
CHANGED
|
@@ -618,7 +618,7 @@ var FirecrawlApp = class {
|
|
|
618
618
|
try {
|
|
619
619
|
if (!params?.schema) {
|
|
620
620
|
jsonSchema = void 0;
|
|
621
|
-
} else if (params.schema
|
|
621
|
+
} else if (typeof params.schema === "object" && params.schema !== null && Object.getPrototypeOf(params.schema)?.constructor?.name?.startsWith("Zod")) {
|
|
622
622
|
jsonSchema = (0, import_zod_to_json_schema.zodToJsonSchema)(params.schema);
|
|
623
623
|
} else {
|
|
624
624
|
jsonSchema = params.schema;
|
|
@@ -788,12 +788,14 @@ var FirecrawlApp = class {
|
|
|
788
788
|
*/
|
|
789
789
|
async monitorJobStatus(id, headers, checkInterval) {
|
|
790
790
|
try {
|
|
791
|
+
let failedTries = 0;
|
|
791
792
|
while (true) {
|
|
792
793
|
let statusResponse = await this.getRequest(
|
|
793
794
|
`${this.apiUrl}/v1/crawl/${id}`,
|
|
794
795
|
headers
|
|
795
796
|
);
|
|
796
797
|
if (statusResponse.status === 200) {
|
|
798
|
+
failedTries = 0;
|
|
797
799
|
let statusData = statusResponse.data;
|
|
798
800
|
if (statusData.status === "completed") {
|
|
799
801
|
if ("data" in statusData) {
|
|
@@ -823,7 +825,10 @@ var FirecrawlApp = class {
|
|
|
823
825
|
);
|
|
824
826
|
}
|
|
825
827
|
} else {
|
|
826
|
-
|
|
828
|
+
failedTries++;
|
|
829
|
+
if (failedTries >= 3) {
|
|
830
|
+
this.handleError(statusResponse, "check crawl status");
|
|
831
|
+
}
|
|
827
832
|
}
|
|
828
833
|
}
|
|
829
834
|
} catch (error) {
|
package/dist/index.d.cts
CHANGED
|
@@ -149,6 +149,7 @@ interface CrawlParams {
|
|
|
149
149
|
includePaths?: string[];
|
|
150
150
|
excludePaths?: string[];
|
|
151
151
|
maxDepth?: number;
|
|
152
|
+
maxDiscoveryDepth?: number;
|
|
152
153
|
limit?: number;
|
|
153
154
|
allowBackwardLinks?: boolean;
|
|
154
155
|
allowExternalLinks?: boolean;
|
|
@@ -162,6 +163,7 @@ interface CrawlParams {
|
|
|
162
163
|
};
|
|
163
164
|
deduplicateSimilarURLs?: boolean;
|
|
164
165
|
ignoreQueryParameters?: boolean;
|
|
166
|
+
regexOnFullURL?: boolean;
|
|
165
167
|
}
|
|
166
168
|
/**
|
|
167
169
|
* Response interface for crawling operations.
|
package/dist/index.d.ts
CHANGED
|
@@ -149,6 +149,7 @@ interface CrawlParams {
|
|
|
149
149
|
includePaths?: string[];
|
|
150
150
|
excludePaths?: string[];
|
|
151
151
|
maxDepth?: number;
|
|
152
|
+
maxDiscoveryDepth?: number;
|
|
152
153
|
limit?: number;
|
|
153
154
|
allowBackwardLinks?: boolean;
|
|
154
155
|
allowExternalLinks?: boolean;
|
|
@@ -162,6 +163,7 @@ interface CrawlParams {
|
|
|
162
163
|
};
|
|
163
164
|
deduplicateSimilarURLs?: boolean;
|
|
164
165
|
ignoreQueryParameters?: boolean;
|
|
166
|
+
regexOnFullURL?: boolean;
|
|
165
167
|
}
|
|
166
168
|
/**
|
|
167
169
|
* Response interface for crawling operations.
|
package/dist/index.js
CHANGED
|
@@ -582,7 +582,7 @@ var FirecrawlApp = class {
|
|
|
582
582
|
try {
|
|
583
583
|
if (!params?.schema) {
|
|
584
584
|
jsonSchema = void 0;
|
|
585
|
-
} else if (params.schema
|
|
585
|
+
} else if (typeof params.schema === "object" && params.schema !== null && Object.getPrototypeOf(params.schema)?.constructor?.name?.startsWith("Zod")) {
|
|
586
586
|
jsonSchema = zodToJsonSchema(params.schema);
|
|
587
587
|
} else {
|
|
588
588
|
jsonSchema = params.schema;
|
|
@@ -752,12 +752,14 @@ var FirecrawlApp = class {
|
|
|
752
752
|
*/
|
|
753
753
|
async monitorJobStatus(id, headers, checkInterval) {
|
|
754
754
|
try {
|
|
755
|
+
let failedTries = 0;
|
|
755
756
|
while (true) {
|
|
756
757
|
let statusResponse = await this.getRequest(
|
|
757
758
|
`${this.apiUrl}/v1/crawl/${id}`,
|
|
758
759
|
headers
|
|
759
760
|
);
|
|
760
761
|
if (statusResponse.status === 200) {
|
|
762
|
+
failedTries = 0;
|
|
761
763
|
let statusData = statusResponse.data;
|
|
762
764
|
if (statusData.status === "completed") {
|
|
763
765
|
if ("data" in statusData) {
|
|
@@ -787,7 +789,10 @@ var FirecrawlApp = class {
|
|
|
787
789
|
);
|
|
788
790
|
}
|
|
789
791
|
} else {
|
|
790
|
-
|
|
792
|
+
failedTries++;
|
|
793
|
+
if (failedTries >= 3) {
|
|
794
|
+
this.handleError(statusResponse, "check crawl status");
|
|
795
|
+
}
|
|
791
796
|
}
|
|
792
797
|
}
|
|
793
798
|
} catch (error) {
|
package/package.json
CHANGED
|
@@ -55,7 +55,7 @@ describe('FirecrawlApp<"v0"> E2E Tests', () => {
|
|
|
55
55
|
"should return successful response with valid preview token",
|
|
56
56
|
async () => {
|
|
57
57
|
const app = new FirecrawlApp<"v0">({
|
|
58
|
-
apiKey:
|
|
58
|
+
apiKey: process.env.PREVIEW_TOKEN,
|
|
59
59
|
apiUrl: API_URL,
|
|
60
60
|
version: "v0",
|
|
61
61
|
});
|
|
@@ -40,7 +40,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
40
40
|
});
|
|
41
41
|
|
|
42
42
|
test.concurrent('should return successful response with valid preview token', async () => {
|
|
43
|
-
const app = new FirecrawlApp({ apiKey:
|
|
43
|
+
const app = new FirecrawlApp({ apiKey: process.env.PREVIEW_TOKEN, apiUrl: API_URL });
|
|
44
44
|
const response = await app.scrapeUrl('https://roastmywebsite.ai');
|
|
45
45
|
if (!response.success) {
|
|
46
46
|
throw new Error(response.error);
|
|
@@ -365,7 +365,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
365
365
|
});
|
|
366
366
|
|
|
367
367
|
test.concurrent('should return successful response with valid preview token', async () => {
|
|
368
|
-
const app = new FirecrawlApp({ apiKey:
|
|
368
|
+
const app = new FirecrawlApp({ apiKey: process.env.PREVIEW_TOKEN, apiUrl: API_URL });
|
|
369
369
|
const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse;
|
|
370
370
|
expect(response).not.toBeNull();
|
|
371
371
|
expect(response.links?.length).toBeGreaterThan(0);
|
package/src/index.ts
CHANGED
|
@@ -160,6 +160,7 @@ export interface CrawlParams {
|
|
|
160
160
|
includePaths?: string[];
|
|
161
161
|
excludePaths?: string[];
|
|
162
162
|
maxDepth?: number;
|
|
163
|
+
maxDiscoveryDepth?: number;
|
|
163
164
|
limit?: number;
|
|
164
165
|
allowBackwardLinks?: boolean;
|
|
165
166
|
allowExternalLinks?: boolean;
|
|
@@ -173,6 +174,7 @@ export interface CrawlParams {
|
|
|
173
174
|
};
|
|
174
175
|
deduplicateSimilarURLs?: boolean;
|
|
175
176
|
ignoreQueryParameters?: boolean;
|
|
177
|
+
regexOnFullURL?: boolean;
|
|
176
178
|
}
|
|
177
179
|
|
|
178
180
|
/**
|
|
@@ -1129,15 +1131,14 @@ export default class FirecrawlApp {
|
|
|
1129
1131
|
try {
|
|
1130
1132
|
if (!params?.schema) {
|
|
1131
1133
|
jsonSchema = undefined;
|
|
1132
|
-
} else if (params.schema
|
|
1133
|
-
jsonSchema = zodToJsonSchema(params.schema);
|
|
1134
|
+
} else if (typeof params.schema === "object" && params.schema !== null && Object.getPrototypeOf(params.schema)?.constructor?.name?.startsWith("Zod")) {
|
|
1135
|
+
jsonSchema = zodToJsonSchema(params.schema as zt.ZodType);
|
|
1134
1136
|
} else {
|
|
1135
1137
|
jsonSchema = params.schema;
|
|
1136
1138
|
}
|
|
1137
1139
|
} catch (error: any) {
|
|
1138
1140
|
throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
|
|
1139
1141
|
}
|
|
1140
|
-
|
|
1141
1142
|
|
|
1142
1143
|
try {
|
|
1143
1144
|
const response: AxiosResponse = await this.postRequest(
|
|
@@ -1331,12 +1332,14 @@ export default class FirecrawlApp {
|
|
|
1331
1332
|
checkInterval: number
|
|
1332
1333
|
): Promise<CrawlStatusResponse | ErrorResponse> {
|
|
1333
1334
|
try {
|
|
1335
|
+
let failedTries = 0;
|
|
1334
1336
|
while (true) {
|
|
1335
1337
|
let statusResponse: AxiosResponse = await this.getRequest(
|
|
1336
1338
|
`${this.apiUrl}/v1/crawl/${id}`,
|
|
1337
1339
|
headers
|
|
1338
1340
|
);
|
|
1339
1341
|
if (statusResponse.status === 200) {
|
|
1342
|
+
failedTries = 0;
|
|
1340
1343
|
let statusData = statusResponse.data;
|
|
1341
1344
|
if (statusData.status === "completed") {
|
|
1342
1345
|
if ("data" in statusData) {
|
|
@@ -1368,7 +1371,10 @@ export default class FirecrawlApp {
|
|
|
1368
1371
|
);
|
|
1369
1372
|
}
|
|
1370
1373
|
} else {
|
|
1371
|
-
|
|
1374
|
+
failedTries++;
|
|
1375
|
+
if (failedTries >= 3) {
|
|
1376
|
+
this.handleError(statusResponse, "check crawl status");
|
|
1377
|
+
}
|
|
1372
1378
|
}
|
|
1373
1379
|
}
|
|
1374
1380
|
} catch (error: any) {
|