@mendable/firecrawl-js 0.0.21 → 0.0.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +3 -0
- package/build/index.js +23 -19
- package/package.json +8 -3
- package/src/__tests__/e2e_withAuth/index.test.ts +155 -0
- package/src/index.ts +29 -18
- package/types/index.d.ts +8 -4
package/.env.example
ADDED
package/build/index.js
CHANGED
|
@@ -19,6 +19,7 @@ export default class FirecrawlApp {
|
|
|
19
19
|
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
|
|
20
20
|
*/
|
|
21
21
|
constructor({ apiKey = null }) {
|
|
22
|
+
this.apiUrl = "https://api.firecrawl.dev";
|
|
22
23
|
this.apiKey = apiKey || "";
|
|
23
24
|
if (!this.apiKey) {
|
|
24
25
|
throw new Error("No API key provided");
|
|
@@ -47,7 +48,7 @@ export default class FirecrawlApp {
|
|
|
47
48
|
jsonData = Object.assign(Object.assign({}, jsonData), { extractorOptions: Object.assign(Object.assign({}, params.extractorOptions), { extractionSchema: schema, mode: params.extractorOptions.mode || "llm-extraction" }) });
|
|
48
49
|
}
|
|
49
50
|
try {
|
|
50
|
-
const response = yield axios.post("
|
|
51
|
+
const response = yield axios.post(this.apiUrl + "/v0/scrape", jsonData, { headers });
|
|
51
52
|
if (response.status === 200) {
|
|
52
53
|
const responseData = response.data;
|
|
53
54
|
if (responseData.success) {
|
|
@@ -84,7 +85,7 @@ export default class FirecrawlApp {
|
|
|
84
85
|
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
|
85
86
|
}
|
|
86
87
|
try {
|
|
87
|
-
const response = yield axios.post("
|
|
88
|
+
const response = yield axios.post(this.apiUrl + "/v0/search", jsonData, { headers });
|
|
88
89
|
if (response.status === 200) {
|
|
89
90
|
const responseData = response.data;
|
|
90
91
|
if (responseData.success) {
|
|
@@ -109,22 +110,23 @@ export default class FirecrawlApp {
|
|
|
109
110
|
* @param {string} url - The URL to crawl.
|
|
110
111
|
* @param {Params | null} params - Additional parameters for the crawl request.
|
|
111
112
|
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
|
|
112
|
-
* @param {number}
|
|
113
|
+
* @param {number} pollInterval - Time in seconds for job status checks.
|
|
114
|
+
* @param {string} idempotencyKey - Optional idempotency key for the request.
|
|
113
115
|
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
|
114
116
|
*/
|
|
115
117
|
crawlUrl(url_1) {
|
|
116
|
-
return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true,
|
|
117
|
-
const headers = this.prepareHeaders();
|
|
118
|
+
return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
|
|
119
|
+
const headers = this.prepareHeaders(idempotencyKey);
|
|
118
120
|
let jsonData = { url };
|
|
119
121
|
if (params) {
|
|
120
122
|
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
|
121
123
|
}
|
|
122
124
|
try {
|
|
123
|
-
const response = yield this.postRequest("
|
|
125
|
+
const response = yield this.postRequest(this.apiUrl + "/v0/crawl", jsonData, headers);
|
|
124
126
|
if (response.status === 200) {
|
|
125
127
|
const jobId = response.data.jobId;
|
|
126
128
|
if (waitUntilDone) {
|
|
127
|
-
return this.monitorJobStatus(jobId, headers,
|
|
129
|
+
return this.monitorJobStatus(jobId, headers, pollInterval);
|
|
128
130
|
}
|
|
129
131
|
else {
|
|
130
132
|
return { success: true, jobId };
|
|
@@ -150,9 +152,14 @@ export default class FirecrawlApp {
|
|
|
150
152
|
return __awaiter(this, void 0, void 0, function* () {
|
|
151
153
|
const headers = this.prepareHeaders();
|
|
152
154
|
try {
|
|
153
|
-
const response = yield this.getRequest(
|
|
155
|
+
const response = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
|
|
154
156
|
if (response.status === 200) {
|
|
155
|
-
return
|
|
157
|
+
return {
|
|
158
|
+
success: true,
|
|
159
|
+
status: response.data.status,
|
|
160
|
+
data: response.data.data,
|
|
161
|
+
partial_data: !response.data.data ? response.data.partial_data : undefined,
|
|
162
|
+
};
|
|
156
163
|
}
|
|
157
164
|
else {
|
|
158
165
|
this.handleError(response, "check crawl status");
|
|
@@ -172,11 +179,8 @@ export default class FirecrawlApp {
|
|
|
172
179
|
* Prepares the headers for an API request.
|
|
173
180
|
* @returns {AxiosRequestHeaders} The prepared headers.
|
|
174
181
|
*/
|
|
175
|
-
prepareHeaders() {
|
|
176
|
-
return {
|
|
177
|
-
"Content-Type": "application/json",
|
|
178
|
-
Authorization: `Bearer ${this.apiKey}`,
|
|
179
|
-
};
|
|
182
|
+
prepareHeaders(idempotencyKey) {
|
|
183
|
+
return Object.assign({ 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.apiKey}` }, (idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {}));
|
|
180
184
|
}
|
|
181
185
|
/**
|
|
182
186
|
* Sends a POST request to the specified URL.
|
|
@@ -204,10 +208,10 @@ export default class FirecrawlApp {
|
|
|
204
208
|
* @param {number} timeout - Timeout in seconds for job status checks.
|
|
205
209
|
* @returns {Promise<any>} The final job status or data.
|
|
206
210
|
*/
|
|
207
|
-
monitorJobStatus(jobId, headers,
|
|
211
|
+
monitorJobStatus(jobId, headers, checkInterval) {
|
|
208
212
|
return __awaiter(this, void 0, void 0, function* () {
|
|
209
213
|
while (true) {
|
|
210
|
-
const statusResponse = yield this.getRequest(
|
|
214
|
+
const statusResponse = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
|
|
211
215
|
if (statusResponse.status === 200) {
|
|
212
216
|
const statusData = statusResponse.data;
|
|
213
217
|
if (statusData.status === "completed") {
|
|
@@ -219,10 +223,10 @@ export default class FirecrawlApp {
|
|
|
219
223
|
}
|
|
220
224
|
}
|
|
221
225
|
else if (["active", "paused", "pending", "queued"].includes(statusData.status)) {
|
|
222
|
-
if (
|
|
223
|
-
|
|
226
|
+
if (checkInterval < 2) {
|
|
227
|
+
checkInterval = 2;
|
|
224
228
|
}
|
|
225
|
-
yield new Promise((resolve) => setTimeout(resolve,
|
|
229
|
+
yield new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again
|
|
226
230
|
}
|
|
227
231
|
else {
|
|
228
232
|
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mendable/firecrawl-js",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.24",
|
|
4
4
|
"description": "JavaScript SDK for Firecrawl API",
|
|
5
5
|
"main": "build/index.js",
|
|
6
6
|
"types": "types/index.d.ts",
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
"build": "tsc",
|
|
10
10
|
"publish": "npm run build && npm publish --access public",
|
|
11
11
|
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
|
12
|
-
"test": "jest src/**/*.test.ts"
|
|
12
|
+
"test": "jest src/__tests__/**/*.test.ts"
|
|
13
13
|
},
|
|
14
14
|
"repository": {
|
|
15
15
|
"type": "git",
|
|
@@ -19,6 +19,8 @@
|
|
|
19
19
|
"license": "MIT",
|
|
20
20
|
"dependencies": {
|
|
21
21
|
"axios": "^1.6.8",
|
|
22
|
+
"dotenv": "^16.4.5",
|
|
23
|
+
"uuid": "^9.0.1",
|
|
22
24
|
"zod": "^3.23.8",
|
|
23
25
|
"zod-to-json-schema": "^3.23.0"
|
|
24
26
|
},
|
|
@@ -29,7 +31,10 @@
|
|
|
29
31
|
"devDependencies": {
|
|
30
32
|
"@jest/globals": "^29.7.0",
|
|
31
33
|
"@types/axios": "^0.14.0",
|
|
32
|
-
"@types/
|
|
34
|
+
"@types/dotenv": "^8.2.0",
|
|
35
|
+
"@types/jest": "^29.5.12",
|
|
36
|
+
"@types/node": "^20.12.12",
|
|
37
|
+
"@types/uuid": "^9.0.8",
|
|
33
38
|
"jest": "^29.7.0",
|
|
34
39
|
"ts-jest": "^29.1.2",
|
|
35
40
|
"typescript": "^5.4.5"
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
import FirecrawlApp from '../../index';
|
|
2
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
3
|
+
import dotenv from 'dotenv';
|
|
4
|
+
|
|
5
|
+
dotenv.config();
|
|
6
|
+
|
|
7
|
+
const TEST_API_KEY = process.env.TEST_API_KEY;
|
|
8
|
+
const API_URL = process.env.API_URL;
|
|
9
|
+
|
|
10
|
+
describe('FirecrawlApp E2E Tests', () => {
|
|
11
|
+
test('should throw error for no API key', () => {
|
|
12
|
+
expect(() => {
|
|
13
|
+
new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
|
|
14
|
+
}).toThrow("No API key provided");
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
test('should throw error for invalid API key on scrape', async () => {
|
|
18
|
+
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
19
|
+
await expect(invalidApp.scrapeUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401");
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
test('should throw error for blocklisted URL on scrape', async () => {
|
|
23
|
+
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
24
|
+
const blocklistedUrl = "https://facebook.com/fake-test";
|
|
25
|
+
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
test('should return successful response with valid preview token', async () => {
|
|
29
|
+
const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
|
|
30
|
+
const response = await app.scrapeUrl('https://firecrawl.dev');
|
|
31
|
+
expect(response).not.toBeNull();
|
|
32
|
+
expect(response.data.content).toContain("🔥 Firecrawl");
|
|
33
|
+
}, 30000); // 30 seconds timeout
|
|
34
|
+
|
|
35
|
+
test('should return successful response for valid scrape', async () => {
|
|
36
|
+
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
37
|
+
const response = await app.scrapeUrl('https://firecrawl.dev');
|
|
38
|
+
expect(response).not.toBeNull();
|
|
39
|
+
expect(response.data.content).toContain("🔥 Firecrawl");
|
|
40
|
+
expect(response.data).toHaveProperty('markdown');
|
|
41
|
+
expect(response.data).toHaveProperty('metadata');
|
|
42
|
+
expect(response.data).not.toHaveProperty('html');
|
|
43
|
+
}, 30000); // 30 seconds timeout
|
|
44
|
+
|
|
45
|
+
test('should return successful response with valid API key and include HTML', async () => {
|
|
46
|
+
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
47
|
+
const response = await app.scrapeUrl('https://firecrawl.dev', { pageOptions: { includeHtml: true } });
|
|
48
|
+
expect(response).not.toBeNull();
|
|
49
|
+
expect(response.data.content).toContain("🔥 Firecrawl");
|
|
50
|
+
expect(response.data.markdown).toContain("🔥 Firecrawl");
|
|
51
|
+
expect(response.data.html).toContain("<h1");
|
|
52
|
+
}, 30000); // 30 seconds timeout
|
|
53
|
+
|
|
54
|
+
test('should return successful response for valid scrape with PDF file', async () => {
|
|
55
|
+
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
56
|
+
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
|
|
57
|
+
expect(response).not.toBeNull();
|
|
58
|
+
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
|
59
|
+
}, 30000); // 30 seconds timeout
|
|
60
|
+
|
|
61
|
+
test('should return successful response for valid scrape with PDF file without explicit extension', async () => {
|
|
62
|
+
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
63
|
+
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
|
|
64
|
+
expect(response).not.toBeNull();
|
|
65
|
+
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
|
66
|
+
}, 30000); // 30 seconds timeout
|
|
67
|
+
|
|
68
|
+
test('should throw error for invalid API key on crawl', async () => {
|
|
69
|
+
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
70
|
+
await expect(invalidApp.crawlUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401");
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
test('should throw error for blocklisted URL on crawl', async () => {
|
|
74
|
+
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
75
|
+
const blocklistedUrl = "https://twitter.com/fake-test";
|
|
76
|
+
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
test('should return successful response for crawl and wait for completion', async () => {
|
|
80
|
+
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
81
|
+
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30);
|
|
82
|
+
expect(response).not.toBeNull();
|
|
83
|
+
expect(response[0].content).toContain("🔥 Firecrawl");
|
|
84
|
+
}, 60000); // 60 seconds timeout
|
|
85
|
+
|
|
86
|
+
test('should handle idempotency key for crawl', async () => {
|
|
87
|
+
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
88
|
+
const uniqueIdempotencyKey = uuidv4();
|
|
89
|
+
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey);
|
|
90
|
+
expect(response).not.toBeNull();
|
|
91
|
+
expect(response.jobId).toBeDefined();
|
|
92
|
+
|
|
93
|
+
await expect(app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
test('should check crawl status', async () => {
|
|
97
|
+
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
98
|
+
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false);
|
|
99
|
+
expect(response).not.toBeNull();
|
|
100
|
+
expect(response.jobId).toBeDefined();
|
|
101
|
+
|
|
102
|
+
let statusResponse = await app.checkCrawlStatus(response.jobId);
|
|
103
|
+
const maxChecks = 15;
|
|
104
|
+
let checks = 0;
|
|
105
|
+
|
|
106
|
+
while (statusResponse.status === 'active' && checks < maxChecks) {
|
|
107
|
+
await new Promise(resolve => setTimeout(resolve, 1000));
|
|
108
|
+
expect(statusResponse.partial_data).not.toBeNull();
|
|
109
|
+
statusResponse = await app.checkCrawlStatus(response.jobId);
|
|
110
|
+
checks++;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
expect(statusResponse).not.toBeNull();
|
|
114
|
+
expect(statusResponse.status).toBe('completed');
|
|
115
|
+
expect(statusResponse.data.length).toBeGreaterThan(0);
|
|
116
|
+
}, 35000); // 35 seconds timeout
|
|
117
|
+
|
|
118
|
+
test('should return successful response for search', async () => {
|
|
119
|
+
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
120
|
+
const response = await app.search("test query");
|
|
121
|
+
expect(response).not.toBeNull();
|
|
122
|
+
expect(response.data[0].content).toBeDefined();
|
|
123
|
+
expect(response.data.length).toBeGreaterThan(2);
|
|
124
|
+
}, 30000); // 30 seconds timeout
|
|
125
|
+
|
|
126
|
+
test('should throw error for invalid API key on search', async () => {
|
|
127
|
+
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
128
|
+
await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401");
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
test('should perform LLM extraction', async () => {
|
|
132
|
+
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
133
|
+
const response = await app.scrapeUrl("https://mendable.ai", {
|
|
134
|
+
extractorOptions: {
|
|
135
|
+
mode: 'llm-extraction',
|
|
136
|
+
extractionPrompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
|
|
137
|
+
extractionSchema: {
|
|
138
|
+
type: 'object',
|
|
139
|
+
properties: {
|
|
140
|
+
company_mission: { type: 'string' },
|
|
141
|
+
supports_sso: { type: 'boolean' },
|
|
142
|
+
is_open_source: { type: 'boolean' }
|
|
143
|
+
},
|
|
144
|
+
required: ['company_mission', 'supports_sso', 'is_open_source']
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
});
|
|
148
|
+
expect(response).not.toBeNull();
|
|
149
|
+
expect(response.data.llm_extraction).toBeDefined();
|
|
150
|
+
const llmExtraction = response.data.llm_extraction;
|
|
151
|
+
expect(llmExtraction.company_mission).toBeDefined();
|
|
152
|
+
expect(typeof llmExtraction.supports_sso).toBe('boolean');
|
|
153
|
+
expect(typeof llmExtraction.is_open_source).toBe('boolean');
|
|
154
|
+
}, 30000); // 30 seconds timeout
|
|
155
|
+
});
|
package/src/index.ts
CHANGED
|
@@ -6,6 +6,7 @@ import { zodToJsonSchema } from "zod-to-json-schema";
|
|
|
6
6
|
*/
|
|
7
7
|
export interface FirecrawlAppConfig {
|
|
8
8
|
apiKey?: string | null;
|
|
9
|
+
apiUrl?: string | null;
|
|
9
10
|
}
|
|
10
11
|
|
|
11
12
|
/**
|
|
@@ -55,6 +56,7 @@ export interface JobStatusResponse {
|
|
|
55
56
|
status: string;
|
|
56
57
|
jobId?: string;
|
|
57
58
|
data?: any;
|
|
59
|
+
partial_data?: any,
|
|
58
60
|
error?: string;
|
|
59
61
|
}
|
|
60
62
|
|
|
@@ -63,6 +65,7 @@ export interface JobStatusResponse {
|
|
|
63
65
|
*/
|
|
64
66
|
export default class FirecrawlApp {
|
|
65
67
|
private apiKey: string;
|
|
68
|
+
private apiUrl: string = "https://api.firecrawl.dev";
|
|
66
69
|
|
|
67
70
|
/**
|
|
68
71
|
* Initializes a new instance of the FirecrawlApp class.
|
|
@@ -107,7 +110,7 @@ export default class FirecrawlApp {
|
|
|
107
110
|
}
|
|
108
111
|
try {
|
|
109
112
|
const response: AxiosResponse = await axios.post(
|
|
110
|
-
"
|
|
113
|
+
this.apiUrl + "/v0/scrape",
|
|
111
114
|
jsonData,
|
|
112
115
|
{ headers },
|
|
113
116
|
);
|
|
@@ -147,7 +150,7 @@ export default class FirecrawlApp {
|
|
|
147
150
|
}
|
|
148
151
|
try {
|
|
149
152
|
const response: AxiosResponse = await axios.post(
|
|
150
|
-
"
|
|
153
|
+
this.apiUrl + "/v0/search",
|
|
151
154
|
jsonData,
|
|
152
155
|
{ headers }
|
|
153
156
|
);
|
|
@@ -172,30 +175,32 @@ export default class FirecrawlApp {
|
|
|
172
175
|
* @param {string} url - The URL to crawl.
|
|
173
176
|
* @param {Params | null} params - Additional parameters for the crawl request.
|
|
174
177
|
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
|
|
175
|
-
* @param {number}
|
|
178
|
+
* @param {number} pollInterval - Time in seconds for job status checks.
|
|
179
|
+
* @param {string} idempotencyKey - Optional idempotency key for the request.
|
|
176
180
|
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
|
177
181
|
*/
|
|
178
182
|
async crawlUrl(
|
|
179
183
|
url: string,
|
|
180
184
|
params: Params | null = null,
|
|
181
185
|
waitUntilDone: boolean = true,
|
|
182
|
-
|
|
186
|
+
pollInterval: number = 2,
|
|
187
|
+
idempotencyKey?: string
|
|
183
188
|
): Promise<CrawlResponse | any> {
|
|
184
|
-
const headers = this.prepareHeaders();
|
|
189
|
+
const headers = this.prepareHeaders(idempotencyKey);
|
|
185
190
|
let jsonData: Params = { url };
|
|
186
191
|
if (params) {
|
|
187
192
|
jsonData = { ...jsonData, ...params };
|
|
188
193
|
}
|
|
189
194
|
try {
|
|
190
195
|
const response: AxiosResponse = await this.postRequest(
|
|
191
|
-
"
|
|
196
|
+
this.apiUrl + "/v0/crawl",
|
|
192
197
|
jsonData,
|
|
193
198
|
headers
|
|
194
199
|
);
|
|
195
200
|
if (response.status === 200) {
|
|
196
201
|
const jobId: string = response.data.jobId;
|
|
197
202
|
if (waitUntilDone) {
|
|
198
|
-
return this.monitorJobStatus(jobId, headers,
|
|
203
|
+
return this.monitorJobStatus(jobId, headers, pollInterval);
|
|
199
204
|
} else {
|
|
200
205
|
return { success: true, jobId };
|
|
201
206
|
}
|
|
@@ -218,11 +223,16 @@ export default class FirecrawlApp {
|
|
|
218
223
|
const headers: AxiosRequestHeaders = this.prepareHeaders();
|
|
219
224
|
try {
|
|
220
225
|
const response: AxiosResponse = await this.getRequest(
|
|
221
|
-
|
|
226
|
+
this.apiUrl + `/v0/crawl/status/${jobId}`,
|
|
222
227
|
headers
|
|
223
228
|
);
|
|
224
229
|
if (response.status === 200) {
|
|
225
|
-
return
|
|
230
|
+
return {
|
|
231
|
+
success: true,
|
|
232
|
+
status: response.data.status,
|
|
233
|
+
data: response.data.data,
|
|
234
|
+
partial_data: !response.data.data ? response.data.partial_data : undefined,
|
|
235
|
+
};
|
|
226
236
|
} else {
|
|
227
237
|
this.handleError(response, "check crawl status");
|
|
228
238
|
}
|
|
@@ -240,11 +250,12 @@ export default class FirecrawlApp {
|
|
|
240
250
|
* Prepares the headers for an API request.
|
|
241
251
|
* @returns {AxiosRequestHeaders} The prepared headers.
|
|
242
252
|
*/
|
|
243
|
-
prepareHeaders(): AxiosRequestHeaders {
|
|
253
|
+
prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders {
|
|
244
254
|
return {
|
|
245
|
-
|
|
246
|
-
Authorization: `Bearer ${this.apiKey}`,
|
|
247
|
-
|
|
255
|
+
'Content-Type': 'application/json',
|
|
256
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
257
|
+
...(idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {}),
|
|
258
|
+
} as AxiosRequestHeaders & { 'x-idempotency-key'?: string };
|
|
248
259
|
}
|
|
249
260
|
|
|
250
261
|
/**
|
|
@@ -285,11 +296,11 @@ export default class FirecrawlApp {
|
|
|
285
296
|
async monitorJobStatus(
|
|
286
297
|
jobId: string,
|
|
287
298
|
headers: AxiosRequestHeaders,
|
|
288
|
-
|
|
299
|
+
checkInterval: number
|
|
289
300
|
): Promise<any> {
|
|
290
301
|
while (true) {
|
|
291
302
|
const statusResponse: AxiosResponse = await this.getRequest(
|
|
292
|
-
|
|
303
|
+
this.apiUrl + `/v0/crawl/status/${jobId}`,
|
|
293
304
|
headers
|
|
294
305
|
);
|
|
295
306
|
if (statusResponse.status === 200) {
|
|
@@ -303,10 +314,10 @@ export default class FirecrawlApp {
|
|
|
303
314
|
} else if (
|
|
304
315
|
["active", "paused", "pending", "queued"].includes(statusData.status)
|
|
305
316
|
) {
|
|
306
|
-
if (
|
|
307
|
-
|
|
317
|
+
if (checkInterval < 2) {
|
|
318
|
+
checkInterval = 2;
|
|
308
319
|
}
|
|
309
|
-
await new Promise((resolve) => setTimeout(resolve,
|
|
320
|
+
await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again
|
|
310
321
|
} else {
|
|
311
322
|
throw new Error(
|
|
312
323
|
`Crawl job failed or was stopped. Status: ${statusData.status}`
|
package/types/index.d.ts
CHANGED
|
@@ -5,6 +5,7 @@ import { z } from "zod";
|
|
|
5
5
|
*/
|
|
6
6
|
export interface FirecrawlAppConfig {
|
|
7
7
|
apiKey?: string | null;
|
|
8
|
+
apiUrl?: string | null;
|
|
8
9
|
}
|
|
9
10
|
/**
|
|
10
11
|
* Generic parameter interface.
|
|
@@ -50,6 +51,7 @@ export interface JobStatusResponse {
|
|
|
50
51
|
status: string;
|
|
51
52
|
jobId?: string;
|
|
52
53
|
data?: any;
|
|
54
|
+
partial_data?: any;
|
|
53
55
|
error?: string;
|
|
54
56
|
}
|
|
55
57
|
/**
|
|
@@ -57,6 +59,7 @@ export interface JobStatusResponse {
|
|
|
57
59
|
*/
|
|
58
60
|
export default class FirecrawlApp {
|
|
59
61
|
private apiKey;
|
|
62
|
+
private apiUrl;
|
|
60
63
|
/**
|
|
61
64
|
* Initializes a new instance of the FirecrawlApp class.
|
|
62
65
|
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
|
|
@@ -81,10 +84,11 @@ export default class FirecrawlApp {
|
|
|
81
84
|
* @param {string} url - The URL to crawl.
|
|
82
85
|
* @param {Params | null} params - Additional parameters for the crawl request.
|
|
83
86
|
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
|
|
84
|
-
* @param {number}
|
|
87
|
+
* @param {number} pollInterval - Time in seconds for job status checks.
|
|
88
|
+
* @param {string} idempotencyKey - Optional idempotency key for the request.
|
|
85
89
|
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
|
86
90
|
*/
|
|
87
|
-
crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean,
|
|
91
|
+
crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, pollInterval?: number, idempotencyKey?: string): Promise<CrawlResponse | any>;
|
|
88
92
|
/**
|
|
89
93
|
* Checks the status of a crawl job using the Firecrawl API.
|
|
90
94
|
* @param {string} jobId - The job ID of the crawl operation.
|
|
@@ -95,7 +99,7 @@ export default class FirecrawlApp {
|
|
|
95
99
|
* Prepares the headers for an API request.
|
|
96
100
|
* @returns {AxiosRequestHeaders} The prepared headers.
|
|
97
101
|
*/
|
|
98
|
-
prepareHeaders(): AxiosRequestHeaders;
|
|
102
|
+
prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders;
|
|
99
103
|
/**
|
|
100
104
|
* Sends a POST request to the specified URL.
|
|
101
105
|
* @param {string} url - The URL to send the request to.
|
|
@@ -118,7 +122,7 @@ export default class FirecrawlApp {
|
|
|
118
122
|
* @param {number} timeout - Timeout in seconds for job status checks.
|
|
119
123
|
* @returns {Promise<any>} The final job status or data.
|
|
120
124
|
*/
|
|
121
|
-
monitorJobStatus(jobId: string, headers: AxiosRequestHeaders,
|
|
125
|
+
monitorJobStatus(jobId: string, headers: AxiosRequestHeaders, checkInterval: number): Promise<any>;
|
|
122
126
|
/**
|
|
123
127
|
* Handles errors from API responses.
|
|
124
128
|
* @param {AxiosResponse} response - The response from the API.
|