@mendable/firecrawl-js 0.0.25 → 0.0.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -0
- package/build/index.js +6 -4
- package/package.json +2 -1
- package/src/__tests__/e2e_withAuth/index.test.ts +43 -42
- package/src/__tests__/index.test.ts +1 -1
- package/src/index.ts +85 -24
- package/types/index.d.ts +70 -13
package/README.md
CHANGED
|
@@ -176,6 +176,11 @@ async function checkStatusExample(jobId) {
|
|
|
176
176
|
checkStatusExample('your_job_id_here');
|
|
177
177
|
```
|
|
178
178
|
|
|
179
|
+
## Running Locally
|
|
180
|
+
To use the SDK when running Firecrawl locally, you can change the initial Firecrawl app instance to:
|
|
181
|
+
```js
|
|
182
|
+
const app = new FirecrawlApp({ apiKey: "YOUR_API_KEY", apiUrl: "http://localhost:3002" });
|
|
183
|
+
```
|
|
179
184
|
|
|
180
185
|
## Error Handling
|
|
181
186
|
|
package/build/index.js
CHANGED
|
@@ -18,9 +18,9 @@ export default class FirecrawlApp {
|
|
|
18
18
|
* Initializes a new instance of the FirecrawlApp class.
|
|
19
19
|
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
|
|
20
20
|
*/
|
|
21
|
-
constructor({ apiKey = null }) {
|
|
22
|
-
this.apiUrl = "https://api.firecrawl.dev";
|
|
21
|
+
constructor({ apiKey = null, apiUrl = null }) {
|
|
23
22
|
this.apiKey = apiKey || "";
|
|
23
|
+
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
|
24
24
|
if (!this.apiKey) {
|
|
25
25
|
throw new Error("No API key provided");
|
|
26
26
|
}
|
|
@@ -158,7 +158,9 @@ export default class FirecrawlApp {
|
|
|
158
158
|
success: true,
|
|
159
159
|
status: response.data.status,
|
|
160
160
|
data: response.data.data,
|
|
161
|
-
partial_data: !response.data.data
|
|
161
|
+
partial_data: !response.data.data
|
|
162
|
+
? response.data.partial_data
|
|
163
|
+
: undefined,
|
|
162
164
|
};
|
|
163
165
|
}
|
|
164
166
|
else {
|
|
@@ -180,7 +182,7 @@ export default class FirecrawlApp {
|
|
|
180
182
|
* @returns {AxiosRequestHeaders} The prepared headers.
|
|
181
183
|
*/
|
|
182
184
|
prepareHeaders(idempotencyKey) {
|
|
183
|
-
return Object.assign({
|
|
185
|
+
return Object.assign({ "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}` }, (idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}));
|
|
184
186
|
}
|
|
185
187
|
/**
|
|
186
188
|
* Sends a POST request to the specified URL.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mendable/firecrawl-js",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.28",
|
|
4
4
|
"description": "JavaScript SDK for Firecrawl API",
|
|
5
5
|
"main": "build/index.js",
|
|
6
6
|
"types": "types/index.d.ts",
|
|
@@ -33,6 +33,7 @@
|
|
|
33
33
|
"@types/axios": "^0.14.0",
|
|
34
34
|
"@types/dotenv": "^8.2.0",
|
|
35
35
|
"@types/jest": "^29.5.12",
|
|
36
|
+
"@types/mocha": "^10.0.6",
|
|
36
37
|
"@types/node": "^20.12.12",
|
|
37
38
|
"@types/uuid": "^9.0.8",
|
|
38
39
|
"jest": "^29.7.0",
|
|
@@ -2,100 +2,101 @@ import FirecrawlApp from '../../index';
|
|
|
2
2
|
import { v4 as uuidv4 } from 'uuid';
|
|
3
3
|
import dotenv from 'dotenv';
|
|
4
4
|
|
|
5
|
+
|
|
5
6
|
dotenv.config();
|
|
6
7
|
|
|
7
8
|
const TEST_API_KEY = process.env.TEST_API_KEY;
|
|
8
|
-
const API_URL =
|
|
9
|
+
const API_URL = "http://127.0.0.1:3002";
|
|
9
10
|
|
|
10
11
|
describe('FirecrawlApp E2E Tests', () => {
|
|
11
|
-
test('should throw error for no API key', () => {
|
|
12
|
+
test.concurrent('should throw error for no API key', () => {
|
|
12
13
|
expect(() => {
|
|
13
14
|
new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
|
|
14
15
|
}).toThrow("No API key provided");
|
|
15
16
|
});
|
|
16
17
|
|
|
17
|
-
test('should throw error for invalid API key on scrape', async () => {
|
|
18
|
+
test.concurrent('should throw error for invalid API key on scrape', async () => {
|
|
18
19
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
19
|
-
await expect(invalidApp.scrapeUrl('https://
|
|
20
|
+
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
|
|
20
21
|
});
|
|
21
22
|
|
|
22
|
-
test('should throw error for blocklisted URL on scrape', async () => {
|
|
23
|
+
test.concurrent('should throw error for blocklisted URL on scrape', async () => {
|
|
23
24
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
24
25
|
const blocklistedUrl = "https://facebook.com/fake-test";
|
|
25
26
|
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
|
26
27
|
});
|
|
27
28
|
|
|
28
|
-
test('should return successful response with valid preview token', async () => {
|
|
29
|
+
test.concurrent('should return successful response with valid preview token', async () => {
|
|
29
30
|
const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
|
|
30
|
-
const response = await app.scrapeUrl('https://
|
|
31
|
+
const response = await app.scrapeUrl('https://roastmywebsite.ai');
|
|
31
32
|
expect(response).not.toBeNull();
|
|
32
|
-
expect(response.data
|
|
33
|
+
expect(response.data?.content).toContain("_Roast_");
|
|
33
34
|
}, 30000); // 30 seconds timeout
|
|
34
35
|
|
|
35
|
-
test('should return successful response for valid scrape', async () => {
|
|
36
|
+
test.concurrent('should return successful response for valid scrape', async () => {
|
|
36
37
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
37
|
-
const response = await app.scrapeUrl('https://
|
|
38
|
+
const response = await app.scrapeUrl('https://roastmywebsite.ai');
|
|
38
39
|
expect(response).not.toBeNull();
|
|
39
|
-
expect(response.data
|
|
40
|
+
expect(response.data?.content).toContain("_Roast_");
|
|
40
41
|
expect(response.data).toHaveProperty('markdown');
|
|
41
42
|
expect(response.data).toHaveProperty('metadata');
|
|
42
43
|
expect(response.data).not.toHaveProperty('html');
|
|
43
44
|
}, 30000); // 30 seconds timeout
|
|
44
45
|
|
|
45
|
-
test('should return successful response with valid API key and include HTML', async () => {
|
|
46
|
+
test.concurrent('should return successful response with valid API key and include HTML', async () => {
|
|
46
47
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
47
|
-
const response = await app.scrapeUrl('https://
|
|
48
|
+
const response = await app.scrapeUrl('https://roastmywebsite.ai', { pageOptions: { includeHtml: true } });
|
|
48
49
|
expect(response).not.toBeNull();
|
|
49
|
-
expect(response.data
|
|
50
|
-
expect(response.data
|
|
51
|
-
expect(response.data
|
|
50
|
+
expect(response.data?.content).toContain("_Roast_");
|
|
51
|
+
expect(response.data?.markdown).toContain("_Roast_");
|
|
52
|
+
expect(response.data?.html).toContain("<h1");
|
|
52
53
|
}, 30000); // 30 seconds timeout
|
|
53
54
|
|
|
54
|
-
test('should return successful response for valid scrape with PDF file', async () => {
|
|
55
|
+
test.concurrent('should return successful response for valid scrape with PDF file', async () => {
|
|
55
56
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
56
57
|
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
|
|
57
58
|
expect(response).not.toBeNull();
|
|
58
|
-
expect(response.data
|
|
59
|
+
expect(response.data?.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
|
59
60
|
}, 30000); // 30 seconds timeout
|
|
60
61
|
|
|
61
|
-
test('should return successful response for valid scrape with PDF file without explicit extension', async () => {
|
|
62
|
+
test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => {
|
|
62
63
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
63
64
|
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
|
|
64
65
|
expect(response).not.toBeNull();
|
|
65
|
-
expect(response.data
|
|
66
|
+
expect(response.data?.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
|
66
67
|
}, 30000); // 30 seconds timeout
|
|
67
68
|
|
|
68
|
-
test('should throw error for invalid API key on crawl', async () => {
|
|
69
|
+
test.concurrent('should throw error for invalid API key on crawl', async () => {
|
|
69
70
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
70
|
-
await expect(invalidApp.crawlUrl('https://
|
|
71
|
+
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
|
|
71
72
|
});
|
|
72
73
|
|
|
73
|
-
test('should throw error for blocklisted URL on crawl', async () => {
|
|
74
|
+
test.concurrent('should throw error for blocklisted URL on crawl', async () => {
|
|
74
75
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
75
76
|
const blocklistedUrl = "https://twitter.com/fake-test";
|
|
76
77
|
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
|
77
78
|
});
|
|
78
79
|
|
|
79
|
-
test('should return successful response for crawl and wait for completion', async () => {
|
|
80
|
+
test.concurrent('should return successful response for crawl and wait for completion', async () => {
|
|
80
81
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
81
|
-
const response = await app.crawlUrl('https://
|
|
82
|
+
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30);
|
|
82
83
|
expect(response).not.toBeNull();
|
|
83
|
-
expect(response[0].content).toContain("
|
|
84
|
+
expect(response[0].content).toContain("_Roast_");
|
|
84
85
|
}, 60000); // 60 seconds timeout
|
|
85
86
|
|
|
86
|
-
test('should handle idempotency key for crawl', async () => {
|
|
87
|
+
test.concurrent('should handle idempotency key for crawl', async () => {
|
|
87
88
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
88
89
|
const uniqueIdempotencyKey = uuidv4();
|
|
89
|
-
const response = await app.crawlUrl('https://
|
|
90
|
+
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey);
|
|
90
91
|
expect(response).not.toBeNull();
|
|
91
92
|
expect(response.jobId).toBeDefined();
|
|
92
93
|
|
|
93
|
-
await expect(app.crawlUrl('https://
|
|
94
|
+
await expect(app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
|
|
94
95
|
});
|
|
95
96
|
|
|
96
|
-
test('should check crawl status', async () => {
|
|
97
|
+
test.concurrent('should check crawl status', async () => {
|
|
97
98
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
98
|
-
const response = await app.crawlUrl('https://
|
|
99
|
+
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false);
|
|
99
100
|
expect(response).not.toBeNull();
|
|
100
101
|
expect(response.jobId).toBeDefined();
|
|
101
102
|
|
|
@@ -112,23 +113,23 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
112
113
|
|
|
113
114
|
expect(statusResponse).not.toBeNull();
|
|
114
115
|
expect(statusResponse.status).toBe('completed');
|
|
115
|
-
expect(statusResponse
|
|
116
|
+
expect(statusResponse?.data?.length).toBeGreaterThan(0);
|
|
116
117
|
}, 35000); // 35 seconds timeout
|
|
117
118
|
|
|
118
|
-
test('should return successful response for search', async () => {
|
|
119
|
+
test.concurrent('should return successful response for search', async () => {
|
|
119
120
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
120
121
|
const response = await app.search("test query");
|
|
121
122
|
expect(response).not.toBeNull();
|
|
122
|
-
expect(response
|
|
123
|
-
expect(response
|
|
123
|
+
expect(response?.data?.[0]?.content).toBeDefined();
|
|
124
|
+
expect(response?.data?.length).toBeGreaterThan(2);
|
|
124
125
|
}, 30000); // 30 seconds timeout
|
|
125
126
|
|
|
126
|
-
test('should throw error for invalid API key on search', async () => {
|
|
127
|
+
test.concurrent('should throw error for invalid API key on search', async () => {
|
|
127
128
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
128
129
|
await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401");
|
|
129
130
|
});
|
|
130
131
|
|
|
131
|
-
test('should perform LLM extraction', async () => {
|
|
132
|
+
test.concurrent('should perform LLM extraction', async () => {
|
|
132
133
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
133
134
|
const response = await app.scrapeUrl("https://mendable.ai", {
|
|
134
135
|
extractorOptions: {
|
|
@@ -146,10 +147,10 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
146
147
|
}
|
|
147
148
|
});
|
|
148
149
|
expect(response).not.toBeNull();
|
|
149
|
-
expect(response.data
|
|
150
|
-
const llmExtraction = response.data
|
|
151
|
-
expect(llmExtraction
|
|
152
|
-
expect(typeof llmExtraction
|
|
153
|
-
expect(typeof llmExtraction
|
|
150
|
+
expect(response.data?.llm_extraction).toBeDefined();
|
|
151
|
+
const llmExtraction = response.data?.llm_extraction;
|
|
152
|
+
expect(llmExtraction?.company_mission).toBeDefined();
|
|
153
|
+
expect(typeof llmExtraction?.supports_sso).toBe('boolean');
|
|
154
|
+
expect(typeof llmExtraction?.is_open_source).toBe('boolean');
|
|
154
155
|
}, 30000); // 30 seconds timeout
|
|
155
156
|
});
|
|
@@ -43,6 +43,6 @@ describe('the firecrawl JS SDK', () => {
|
|
|
43
43
|
expect.objectContaining({ headers: expect.objectContaining({'Authorization': `Bearer ${apiKey}`}) }),
|
|
44
44
|
)
|
|
45
45
|
expect(scrapedData.success).toBe(true);
|
|
46
|
-
expect(scrapedData
|
|
46
|
+
expect(scrapedData?.data?.metadata.title).toEqual('Mendable');
|
|
47
47
|
});
|
|
48
48
|
})
|
package/src/index.ts
CHANGED
|
@@ -10,15 +10,63 @@ export interface FirecrawlAppConfig {
|
|
|
10
10
|
}
|
|
11
11
|
|
|
12
12
|
/**
|
|
13
|
-
*
|
|
13
|
+
* Metadata for a Firecrawl document.
|
|
14
14
|
*/
|
|
15
|
-
export interface
|
|
15
|
+
export interface FirecrawlDocumentMetadata {
|
|
16
|
+
title?: string;
|
|
17
|
+
description?: string;
|
|
18
|
+
language?: string;
|
|
19
|
+
keywords?: string;
|
|
20
|
+
robots?: string;
|
|
21
|
+
ogTitle?: string;
|
|
22
|
+
ogDescription?: string;
|
|
23
|
+
ogUrl?: string;
|
|
24
|
+
ogImage?: string;
|
|
25
|
+
ogAudio?: string;
|
|
26
|
+
ogDeterminer?: string;
|
|
27
|
+
ogLocale?: string;
|
|
28
|
+
ogLocaleAlternate?: string[];
|
|
29
|
+
ogSiteName?: string;
|
|
30
|
+
ogVideo?: string;
|
|
31
|
+
dctermsCreated?: string;
|
|
32
|
+
dcDateCreated?: string;
|
|
33
|
+
dcDate?: string;
|
|
34
|
+
dctermsType?: string;
|
|
35
|
+
dcType?: string;
|
|
36
|
+
dctermsAudience?: string;
|
|
37
|
+
dctermsSubject?: string;
|
|
38
|
+
dcSubject?: string;
|
|
39
|
+
dcDescription?: string;
|
|
40
|
+
dctermsKeywords?: string;
|
|
41
|
+
modifiedTime?: string;
|
|
42
|
+
publishedTime?: string;
|
|
43
|
+
articleTag?: string;
|
|
44
|
+
articleSection?: string;
|
|
45
|
+
sourceURL?: string;
|
|
46
|
+
pageStatusCode?: number;
|
|
47
|
+
pageError?: string;
|
|
16
48
|
[key: string]: any;
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Document interface for Firecrawl.
|
|
53
|
+
*/
|
|
54
|
+
export interface FirecrawlDocument {
|
|
55
|
+
id?: string;
|
|
56
|
+
url?: string;
|
|
57
|
+
content: string;
|
|
58
|
+
markdown?: string;
|
|
59
|
+
html?: string;
|
|
60
|
+
llm_extraction?: Record<string, any>;
|
|
61
|
+
createdAt?: Date;
|
|
62
|
+
updatedAt?: Date;
|
|
63
|
+
type?: string;
|
|
64
|
+
metadata: FirecrawlDocumentMetadata;
|
|
65
|
+
childrenLinks?: string[];
|
|
66
|
+
provider?: string;
|
|
67
|
+
warning?: string;
|
|
68
|
+
|
|
69
|
+
index?: number;
|
|
22
70
|
}
|
|
23
71
|
|
|
24
72
|
/**
|
|
@@ -26,16 +74,15 @@ export interface Params {
|
|
|
26
74
|
*/
|
|
27
75
|
export interface ScrapeResponse {
|
|
28
76
|
success: boolean;
|
|
29
|
-
data?:
|
|
77
|
+
data?: FirecrawlDocument;
|
|
30
78
|
error?: string;
|
|
31
79
|
}
|
|
32
|
-
|
|
33
80
|
/**
|
|
34
81
|
* Response interface for searching operations.
|
|
35
82
|
*/
|
|
36
83
|
export interface SearchResponse {
|
|
37
84
|
success: boolean;
|
|
38
|
-
data?:
|
|
85
|
+
data?: FirecrawlDocument[];
|
|
39
86
|
error?: string;
|
|
40
87
|
}
|
|
41
88
|
/**
|
|
@@ -44,10 +91,9 @@ export interface SearchResponse {
|
|
|
44
91
|
export interface CrawlResponse {
|
|
45
92
|
success: boolean;
|
|
46
93
|
jobId?: string;
|
|
47
|
-
data?:
|
|
94
|
+
data?: FirecrawlDocument[];
|
|
48
95
|
error?: string;
|
|
49
96
|
}
|
|
50
|
-
|
|
51
97
|
/**
|
|
52
98
|
* Response interface for job status checks.
|
|
53
99
|
*/
|
|
@@ -55,24 +101,35 @@ export interface JobStatusResponse {
|
|
|
55
101
|
success: boolean;
|
|
56
102
|
status: string;
|
|
57
103
|
jobId?: string;
|
|
58
|
-
data?:
|
|
59
|
-
partial_data?:
|
|
104
|
+
data?: FirecrawlDocument[];
|
|
105
|
+
partial_data?: FirecrawlDocument[];
|
|
60
106
|
error?: string;
|
|
61
107
|
}
|
|
62
|
-
|
|
108
|
+
/**
|
|
109
|
+
* Generic parameter interface.
|
|
110
|
+
*/
|
|
111
|
+
export interface Params {
|
|
112
|
+
[key: string]: any;
|
|
113
|
+
extractorOptions?: {
|
|
114
|
+
extractionSchema: z.ZodSchema | any;
|
|
115
|
+
mode?: "llm-extraction";
|
|
116
|
+
extractionPrompt?: string;
|
|
117
|
+
};
|
|
118
|
+
}
|
|
63
119
|
/**
|
|
64
120
|
* Main class for interacting with the Firecrawl API.
|
|
65
121
|
*/
|
|
66
122
|
export default class FirecrawlApp {
|
|
67
123
|
private apiKey: string;
|
|
68
|
-
private apiUrl: string
|
|
124
|
+
private apiUrl: string;
|
|
69
125
|
|
|
70
126
|
/**
|
|
71
127
|
* Initializes a new instance of the FirecrawlApp class.
|
|
72
128
|
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
|
|
73
129
|
*/
|
|
74
|
-
constructor({ apiKey = null }: FirecrawlAppConfig) {
|
|
130
|
+
constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
|
|
75
131
|
this.apiKey = apiKey || "";
|
|
132
|
+
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
|
76
133
|
if (!this.apiKey) {
|
|
77
134
|
throw new Error("No API key provided");
|
|
78
135
|
}
|
|
@@ -112,7 +169,7 @@ export default class FirecrawlApp {
|
|
|
112
169
|
const response: AxiosResponse = await axios.post(
|
|
113
170
|
this.apiUrl + "/v0/scrape",
|
|
114
171
|
jsonData,
|
|
115
|
-
{ headers }
|
|
172
|
+
{ headers }
|
|
116
173
|
);
|
|
117
174
|
if (response.status === 200) {
|
|
118
175
|
const responseData = response.data;
|
|
@@ -231,7 +288,9 @@ export default class FirecrawlApp {
|
|
|
231
288
|
success: true,
|
|
232
289
|
status: response.data.status,
|
|
233
290
|
data: response.data.data,
|
|
234
|
-
partial_data: !response.data.data
|
|
291
|
+
partial_data: !response.data.data
|
|
292
|
+
? response.data.partial_data
|
|
293
|
+
: undefined,
|
|
235
294
|
};
|
|
236
295
|
} else {
|
|
237
296
|
this.handleError(response, "check crawl status");
|
|
@@ -252,10 +311,10 @@ export default class FirecrawlApp {
|
|
|
252
311
|
*/
|
|
253
312
|
prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders {
|
|
254
313
|
return {
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
...(idempotencyKey ? {
|
|
258
|
-
} as AxiosRequestHeaders & {
|
|
314
|
+
"Content-Type": "application/json",
|
|
315
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
316
|
+
...(idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}),
|
|
317
|
+
} as AxiosRequestHeaders & { "x-idempotency-key"?: string };
|
|
259
318
|
}
|
|
260
319
|
|
|
261
320
|
/**
|
|
@@ -317,7 +376,9 @@ export default class FirecrawlApp {
|
|
|
317
376
|
if (checkInterval < 2) {
|
|
318
377
|
checkInterval = 2;
|
|
319
378
|
}
|
|
320
|
-
await new Promise((resolve) =>
|
|
379
|
+
await new Promise((resolve) =>
|
|
380
|
+
setTimeout(resolve, checkInterval * 1000)
|
|
381
|
+
); // Wait for the specified timeout before checking again
|
|
321
382
|
} else {
|
|
322
383
|
throw new Error(
|
|
323
384
|
`Crawl job failed or was stopped. Status: ${statusData.status}`
|
package/types/index.d.ts
CHANGED
|
@@ -8,22 +8,68 @@ export interface FirecrawlAppConfig {
|
|
|
8
8
|
apiUrl?: string | null;
|
|
9
9
|
}
|
|
10
10
|
/**
|
|
11
|
-
*
|
|
11
|
+
* Metadata for a Firecrawl document.
|
|
12
12
|
*/
|
|
13
|
-
export interface
|
|
13
|
+
export interface FirecrawlDocumentMetadata {
|
|
14
|
+
title?: string;
|
|
15
|
+
description?: string;
|
|
16
|
+
language?: string;
|
|
17
|
+
keywords?: string;
|
|
18
|
+
robots?: string;
|
|
19
|
+
ogTitle?: string;
|
|
20
|
+
ogDescription?: string;
|
|
21
|
+
ogUrl?: string;
|
|
22
|
+
ogImage?: string;
|
|
23
|
+
ogAudio?: string;
|
|
24
|
+
ogDeterminer?: string;
|
|
25
|
+
ogLocale?: string;
|
|
26
|
+
ogLocaleAlternate?: string[];
|
|
27
|
+
ogSiteName?: string;
|
|
28
|
+
ogVideo?: string;
|
|
29
|
+
dctermsCreated?: string;
|
|
30
|
+
dcDateCreated?: string;
|
|
31
|
+
dcDate?: string;
|
|
32
|
+
dctermsType?: string;
|
|
33
|
+
dcType?: string;
|
|
34
|
+
dctermsAudience?: string;
|
|
35
|
+
dctermsSubject?: string;
|
|
36
|
+
dcSubject?: string;
|
|
37
|
+
dcDescription?: string;
|
|
38
|
+
dctermsKeywords?: string;
|
|
39
|
+
modifiedTime?: string;
|
|
40
|
+
publishedTime?: string;
|
|
41
|
+
articleTag?: string;
|
|
42
|
+
articleSection?: string;
|
|
43
|
+
sourceURL?: string;
|
|
44
|
+
pageStatusCode?: number;
|
|
45
|
+
pageError?: string;
|
|
14
46
|
[key: string]: any;
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Document interface for Firecrawl.
|
|
50
|
+
*/
|
|
51
|
+
export interface FirecrawlDocument {
|
|
52
|
+
id?: string;
|
|
53
|
+
url?: string;
|
|
54
|
+
content: string;
|
|
55
|
+
markdown?: string;
|
|
56
|
+
html?: string;
|
|
57
|
+
llm_extraction?: Record<string, any>;
|
|
58
|
+
createdAt?: Date;
|
|
59
|
+
updatedAt?: Date;
|
|
60
|
+
type?: string;
|
|
61
|
+
metadata: FirecrawlDocumentMetadata;
|
|
62
|
+
childrenLinks?: string[];
|
|
63
|
+
provider?: string;
|
|
64
|
+
warning?: string;
|
|
65
|
+
index?: number;
|
|
20
66
|
}
|
|
21
67
|
/**
|
|
22
68
|
* Response interface for scraping operations.
|
|
23
69
|
*/
|
|
24
70
|
export interface ScrapeResponse {
|
|
25
71
|
success: boolean;
|
|
26
|
-
data?:
|
|
72
|
+
data?: FirecrawlDocument;
|
|
27
73
|
error?: string;
|
|
28
74
|
}
|
|
29
75
|
/**
|
|
@@ -31,7 +77,7 @@ export interface ScrapeResponse {
|
|
|
31
77
|
*/
|
|
32
78
|
export interface SearchResponse {
|
|
33
79
|
success: boolean;
|
|
34
|
-
data?:
|
|
80
|
+
data?: FirecrawlDocument[];
|
|
35
81
|
error?: string;
|
|
36
82
|
}
|
|
37
83
|
/**
|
|
@@ -40,7 +86,7 @@ export interface SearchResponse {
|
|
|
40
86
|
export interface CrawlResponse {
|
|
41
87
|
success: boolean;
|
|
42
88
|
jobId?: string;
|
|
43
|
-
data?:
|
|
89
|
+
data?: FirecrawlDocument[];
|
|
44
90
|
error?: string;
|
|
45
91
|
}
|
|
46
92
|
/**
|
|
@@ -50,10 +96,21 @@ export interface JobStatusResponse {
|
|
|
50
96
|
success: boolean;
|
|
51
97
|
status: string;
|
|
52
98
|
jobId?: string;
|
|
53
|
-
data?:
|
|
54
|
-
partial_data?:
|
|
99
|
+
data?: FirecrawlDocument[];
|
|
100
|
+
partial_data?: FirecrawlDocument[];
|
|
55
101
|
error?: string;
|
|
56
102
|
}
|
|
103
|
+
/**
|
|
104
|
+
* Generic parameter interface.
|
|
105
|
+
*/
|
|
106
|
+
export interface Params {
|
|
107
|
+
[key: string]: any;
|
|
108
|
+
extractorOptions?: {
|
|
109
|
+
extractionSchema: z.ZodSchema | any;
|
|
110
|
+
mode?: "llm-extraction";
|
|
111
|
+
extractionPrompt?: string;
|
|
112
|
+
};
|
|
113
|
+
}
|
|
57
114
|
/**
|
|
58
115
|
* Main class for interacting with the Firecrawl API.
|
|
59
116
|
*/
|
|
@@ -64,7 +121,7 @@ export default class FirecrawlApp {
|
|
|
64
121
|
* Initializes a new instance of the FirecrawlApp class.
|
|
65
122
|
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
|
|
66
123
|
*/
|
|
67
|
-
constructor({ apiKey }: FirecrawlAppConfig);
|
|
124
|
+
constructor({ apiKey, apiUrl }: FirecrawlAppConfig);
|
|
68
125
|
/**
|
|
69
126
|
* Scrapes a URL using the Firecrawl API.
|
|
70
127
|
* @param {string} url - The URL to scrape.
|