@mendable/firecrawl-js 0.0.8 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -5
- package/build/index.js +60 -2
- package/package.json +4 -4
- package/src/index.ts +101 -7
package/README.md
CHANGED
|
@@ -19,7 +19,7 @@ npm install @mendable/firecrawl-js
|
|
|
19
19
|
Here's an example of how to use the SDK with error handling:
|
|
20
20
|
|
|
21
21
|
```js
|
|
22
|
-
import
|
|
22
|
+
import FirecrawlApp from '@mendable/firecrawl-js';
|
|
23
23
|
|
|
24
24
|
async function main() {
|
|
25
25
|
try {
|
|
@@ -33,15 +33,18 @@ Here's an example of how to use the SDK with error handling:
|
|
|
33
33
|
|
|
34
34
|
// Crawl a website
|
|
35
35
|
const crawlUrl = 'https://mendable.ai';
|
|
36
|
-
const
|
|
36
|
+
const params = {
|
|
37
37
|
crawlerOptions: {
|
|
38
38
|
excludes: ['blog/'],
|
|
39
39
|
includes: [], // leave empty for all pages
|
|
40
40
|
limit: 1000,
|
|
41
|
+
},
|
|
42
|
+
pageOptions: {
|
|
43
|
+
onlyMainContent: true
|
|
41
44
|
}
|
|
42
45
|
};
|
|
43
46
|
|
|
44
|
-
const crawlResult = await app.crawlUrl(crawlUrl,
|
|
47
|
+
const crawlResult = await app.crawlUrl(crawlUrl, params);
|
|
45
48
|
console.log(crawlResult);
|
|
46
49
|
|
|
47
50
|
} catch (error) {
|
|
@@ -83,18 +86,21 @@ To crawl a website with error handling, use the `crawlUrl` method. It takes the
|
|
|
83
86
|
async function crawlExample() {
|
|
84
87
|
try {
|
|
85
88
|
const crawlUrl = 'https://example.com';
|
|
86
|
-
const
|
|
89
|
+
const params = {
|
|
87
90
|
crawlerOptions: {
|
|
88
91
|
excludes: ['blog/'],
|
|
89
92
|
includes: [], // leave empty for all pages
|
|
90
93
|
limit: 1000,
|
|
94
|
+
},
|
|
95
|
+
pageOptions: {
|
|
96
|
+
onlyMainContent: true
|
|
91
97
|
}
|
|
92
98
|
};
|
|
93
99
|
const waitUntilDone = true;
|
|
94
100
|
const timeout = 5;
|
|
95
101
|
const crawlResult = await app.crawlUrl(
|
|
96
102
|
crawlUrl,
|
|
97
|
-
|
|
103
|
+
params,
|
|
98
104
|
waitUntilDone,
|
|
99
105
|
timeout
|
|
100
106
|
);
|
package/build/index.js
CHANGED
|
@@ -10,13 +10,26 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
10
10
|
import axios from 'axios';
|
|
11
11
|
import dotenv from 'dotenv';
|
|
12
12
|
dotenv.config();
|
|
13
|
+
/**
|
|
14
|
+
* Main class for interacting with the Firecrawl API.
|
|
15
|
+
*/
|
|
13
16
|
export default class FirecrawlApp {
|
|
17
|
+
/**
|
|
18
|
+
* Initializes a new instance of the FirecrawlApp class.
|
|
19
|
+
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
|
|
20
|
+
*/
|
|
14
21
|
constructor({ apiKey = null }) {
|
|
15
22
|
this.apiKey = apiKey || process.env.FIRECRAWL_API_KEY || '';
|
|
16
23
|
if (!this.apiKey) {
|
|
17
24
|
throw new Error('No API key provided');
|
|
18
25
|
}
|
|
19
26
|
}
|
|
27
|
+
/**
|
|
28
|
+
* Scrapes a URL using the Firecrawl API.
|
|
29
|
+
* @param {string} url - The URL to scrape.
|
|
30
|
+
* @param {Params | null} params - Additional parameters for the scrape request.
|
|
31
|
+
* @returns {Promise<ScrapeResponse>} The response from the scrape operation.
|
|
32
|
+
*/
|
|
20
33
|
scrapeUrl(url_1) {
|
|
21
34
|
return __awaiter(this, arguments, void 0, function* (url, params = null) {
|
|
22
35
|
const headers = {
|
|
@@ -32,7 +45,7 @@ export default class FirecrawlApp {
|
|
|
32
45
|
if (response.status === 200) {
|
|
33
46
|
const responseData = response.data;
|
|
34
47
|
if (responseData.success) {
|
|
35
|
-
return responseData
|
|
48
|
+
return responseData;
|
|
36
49
|
}
|
|
37
50
|
else {
|
|
38
51
|
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
|
@@ -45,8 +58,17 @@ export default class FirecrawlApp {
|
|
|
45
58
|
catch (error) {
|
|
46
59
|
throw new Error(error.message);
|
|
47
60
|
}
|
|
61
|
+
return { success: false, error: 'Internal server error.' };
|
|
48
62
|
});
|
|
49
63
|
}
|
|
64
|
+
/**
|
|
65
|
+
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
66
|
+
* @param {string} url - The URL to crawl.
|
|
67
|
+
* @param {Params | null} params - Additional parameters for the crawl request.
|
|
68
|
+
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
|
|
69
|
+
* @param {number} timeout - Timeout in seconds for job status checks.
|
|
70
|
+
* @returns {Promise<CrawlResponse>} The response from the crawl operation.
|
|
71
|
+
*/
|
|
50
72
|
crawlUrl(url_1) {
|
|
51
73
|
return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, timeout = 2) {
|
|
52
74
|
const headers = this.prepareHeaders();
|
|
@@ -62,7 +84,7 @@ export default class FirecrawlApp {
|
|
|
62
84
|
return this.monitorJobStatus(jobId, headers, timeout);
|
|
63
85
|
}
|
|
64
86
|
else {
|
|
65
|
-
return { jobId };
|
|
87
|
+
return { success: true, jobId };
|
|
66
88
|
}
|
|
67
89
|
}
|
|
68
90
|
else {
|
|
@@ -73,8 +95,14 @@ export default class FirecrawlApp {
|
|
|
73
95
|
console.log(error);
|
|
74
96
|
throw new Error(error.message);
|
|
75
97
|
}
|
|
98
|
+
return { success: false, error: 'Internal server error.' };
|
|
76
99
|
});
|
|
77
100
|
}
|
|
101
|
+
/**
|
|
102
|
+
* Checks the status of a crawl job using the Firecrawl API.
|
|
103
|
+
* @param {string} jobId - The job ID of the crawl operation.
|
|
104
|
+
* @returns {Promise<JobStatusResponse>} The response containing the job status.
|
|
105
|
+
*/
|
|
78
106
|
checkCrawlStatus(jobId) {
|
|
79
107
|
return __awaiter(this, void 0, void 0, function* () {
|
|
80
108
|
const headers = this.prepareHeaders();
|
|
@@ -90,20 +118,45 @@ export default class FirecrawlApp {
|
|
|
90
118
|
catch (error) {
|
|
91
119
|
throw new Error(error.message);
|
|
92
120
|
}
|
|
121
|
+
return { success: false, status: 'unknown', error: 'Internal server error.' };
|
|
93
122
|
});
|
|
94
123
|
}
|
|
124
|
+
/**
|
|
125
|
+
* Prepares the headers for an API request.
|
|
126
|
+
* @returns {AxiosRequestHeaders} The prepared headers.
|
|
127
|
+
*/
|
|
95
128
|
prepareHeaders() {
|
|
96
129
|
return {
|
|
97
130
|
'Content-Type': 'application/json',
|
|
98
131
|
'Authorization': `Bearer ${this.apiKey}`,
|
|
99
132
|
};
|
|
100
133
|
}
|
|
134
|
+
/**
|
|
135
|
+
* Sends a POST request to the specified URL.
|
|
136
|
+
* @param {string} url - The URL to send the request to.
|
|
137
|
+
* @param {Params} data - The data to send in the request.
|
|
138
|
+
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
|
139
|
+
* @returns {Promise<AxiosResponse>} The response from the POST request.
|
|
140
|
+
*/
|
|
101
141
|
postRequest(url, data, headers) {
|
|
102
142
|
return axios.post(url, data, { headers });
|
|
103
143
|
}
|
|
144
|
+
/**
|
|
145
|
+
* Sends a GET request to the specified URL.
|
|
146
|
+
* @param {string} url - The URL to send the request to.
|
|
147
|
+
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
|
148
|
+
* @returns {Promise<AxiosResponse>} The response from the GET request.
|
|
149
|
+
*/
|
|
104
150
|
getRequest(url, headers) {
|
|
105
151
|
return axios.get(url, { headers });
|
|
106
152
|
}
|
|
153
|
+
/**
|
|
154
|
+
* Monitors the status of a crawl job until completion or failure.
|
|
155
|
+
* @param {string} jobId - The job ID of the crawl operation.
|
|
156
|
+
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
|
157
|
+
* @param {number} timeout - Timeout in seconds for job status checks.
|
|
158
|
+
* @returns {Promise<any>} The final job status or data.
|
|
159
|
+
*/
|
|
107
160
|
monitorJobStatus(jobId, headers, timeout) {
|
|
108
161
|
return __awaiter(this, void 0, void 0, function* () {
|
|
109
162
|
while (true) {
|
|
@@ -134,6 +187,11 @@ export default class FirecrawlApp {
|
|
|
134
187
|
}
|
|
135
188
|
});
|
|
136
189
|
}
|
|
190
|
+
/**
|
|
191
|
+
* Handles errors from API responses.
|
|
192
|
+
* @param {AxiosResponse} response - The response from the API.
|
|
193
|
+
* @param {string} action - The action being performed when the error occurred.
|
|
194
|
+
*/
|
|
137
195
|
handleError(response, action) {
|
|
138
196
|
if ([402, 409, 500].includes(response.status)) {
|
|
139
197
|
const errorMessage = response.data.error || 'Unknown error occurred';
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mendable/firecrawl-js",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.10",
|
|
4
4
|
"description": "JavaScript SDK for Firecrawl API",
|
|
5
5
|
"main": "build/index.js",
|
|
6
6
|
"type": "module",
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
},
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
12
|
-
"url": "git+https://github.com/mendableai/firecrawl
|
|
12
|
+
"url": "git+https://github.com/mendableai/firecrawl.git"
|
|
13
13
|
},
|
|
14
14
|
"author": "Mendable.ai",
|
|
15
15
|
"license": "MIT",
|
|
@@ -18,9 +18,9 @@
|
|
|
18
18
|
"dotenv": "^16.4.5"
|
|
19
19
|
},
|
|
20
20
|
"bugs": {
|
|
21
|
-
"url": "https://github.com/mendableai/firecrawl
|
|
21
|
+
"url": "https://github.com/mendableai/firecrawl/issues"
|
|
22
22
|
},
|
|
23
|
-
"homepage": "https://github.com/mendableai/firecrawl
|
|
23
|
+
"homepage": "https://github.com/mendableai/firecrawl#readme",
|
|
24
24
|
"devDependencies": {
|
|
25
25
|
"@types/axios": "^0.14.0",
|
|
26
26
|
"@types/dotenv": "^8.2.0",
|
package/src/index.ts
CHANGED
|
@@ -2,17 +2,60 @@ import axios, { AxiosResponse, AxiosRequestHeaders } from 'axios';
|
|
|
2
2
|
import dotenv from 'dotenv';
|
|
3
3
|
dotenv.config();
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
/**
|
|
6
|
+
* Configuration interface for FirecrawlApp.
|
|
7
|
+
*/
|
|
8
|
+
export interface FirecrawlAppConfig {
|
|
6
9
|
apiKey?: string | null;
|
|
7
10
|
}
|
|
8
11
|
|
|
9
|
-
|
|
12
|
+
/**
|
|
13
|
+
* Generic parameter interface.
|
|
14
|
+
*/
|
|
15
|
+
export interface Params {
|
|
10
16
|
[key: string]: any;
|
|
11
17
|
}
|
|
12
18
|
|
|
19
|
+
/**
|
|
20
|
+
* Response interface for scraping operations.
|
|
21
|
+
*/
|
|
22
|
+
export interface ScrapeResponse {
|
|
23
|
+
success: boolean;
|
|
24
|
+
data?: any;
|
|
25
|
+
error?: string;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Response interface for crawling operations.
|
|
30
|
+
*/
|
|
31
|
+
export interface CrawlResponse {
|
|
32
|
+
success: boolean;
|
|
33
|
+
jobId?: string;
|
|
34
|
+
data?: any;
|
|
35
|
+
error?: string;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Response interface for job status checks.
|
|
40
|
+
*/
|
|
41
|
+
export interface JobStatusResponse {
|
|
42
|
+
success: boolean;
|
|
43
|
+
status: string;
|
|
44
|
+
jobId?: string;
|
|
45
|
+
data?: any;
|
|
46
|
+
error?: string;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Main class for interacting with the Firecrawl API.
|
|
51
|
+
*/
|
|
13
52
|
export default class FirecrawlApp {
|
|
14
53
|
private apiKey: string;
|
|
15
54
|
|
|
55
|
+
/**
|
|
56
|
+
* Initializes a new instance of the FirecrawlApp class.
|
|
57
|
+
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
|
|
58
|
+
*/
|
|
16
59
|
constructor({ apiKey = null }: FirecrawlAppConfig) {
|
|
17
60
|
this.apiKey = apiKey || process.env.FIRECRAWL_API_KEY || '';
|
|
18
61
|
if (!this.apiKey) {
|
|
@@ -20,7 +63,13 @@ export default class FirecrawlApp {
|
|
|
20
63
|
}
|
|
21
64
|
}
|
|
22
65
|
|
|
23
|
-
|
|
66
|
+
/**
|
|
67
|
+
* Scrapes a URL using the Firecrawl API.
|
|
68
|
+
* @param {string} url - The URL to scrape.
|
|
69
|
+
* @param {Params | null} params - Additional parameters for the scrape request.
|
|
70
|
+
* @returns {Promise<ScrapeResponse>} The response from the scrape operation.
|
|
71
|
+
*/
|
|
72
|
+
async scrapeUrl(url: string, params: Params | null = null): Promise<ScrapeResponse> {
|
|
24
73
|
const headers: AxiosRequestHeaders = {
|
|
25
74
|
'Content-Type': 'application/json',
|
|
26
75
|
'Authorization': `Bearer ${this.apiKey}`,
|
|
@@ -34,7 +83,7 @@ export default class FirecrawlApp {
|
|
|
34
83
|
if (response.status === 200) {
|
|
35
84
|
const responseData = response.data;
|
|
36
85
|
if (responseData.success) {
|
|
37
|
-
return responseData
|
|
86
|
+
return responseData;
|
|
38
87
|
} else {
|
|
39
88
|
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
|
40
89
|
}
|
|
@@ -44,9 +93,18 @@ export default class FirecrawlApp {
|
|
|
44
93
|
} catch (error: any) {
|
|
45
94
|
throw new Error(error.message);
|
|
46
95
|
}
|
|
96
|
+
return { success: false, error: 'Internal server error.' };
|
|
47
97
|
}
|
|
48
98
|
|
|
49
|
-
|
|
99
|
+
/**
|
|
100
|
+
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
101
|
+
* @param {string} url - The URL to crawl.
|
|
102
|
+
* @param {Params | null} params - Additional parameters for the crawl request.
|
|
103
|
+
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
|
|
104
|
+
* @param {number} timeout - Timeout in seconds for job status checks.
|
|
105
|
+
* @returns {Promise<CrawlResponse>} The response from the crawl operation.
|
|
106
|
+
*/
|
|
107
|
+
async crawlUrl(url: string, params: Params | null = null, waitUntilDone: boolean = true, timeout: number = 2): Promise<CrawlResponse> {
|
|
50
108
|
const headers = this.prepareHeaders();
|
|
51
109
|
let jsonData: Params = { url };
|
|
52
110
|
if (params) {
|
|
@@ -59,7 +117,7 @@ export default class FirecrawlApp {
|
|
|
59
117
|
if (waitUntilDone) {
|
|
60
118
|
return this.monitorJobStatus(jobId, headers, timeout);
|
|
61
119
|
} else {
|
|
62
|
-
return { jobId };
|
|
120
|
+
return { success: true, jobId };
|
|
63
121
|
}
|
|
64
122
|
} else {
|
|
65
123
|
this.handleError(response, 'start crawl job');
|
|
@@ -68,9 +126,15 @@ export default class FirecrawlApp {
|
|
|
68
126
|
console.log(error)
|
|
69
127
|
throw new Error(error.message);
|
|
70
128
|
}
|
|
129
|
+
return { success: false, error: 'Internal server error.' };
|
|
71
130
|
}
|
|
72
131
|
|
|
73
|
-
|
|
132
|
+
/**
|
|
133
|
+
* Checks the status of a crawl job using the Firecrawl API.
|
|
134
|
+
* @param {string} jobId - The job ID of the crawl operation.
|
|
135
|
+
* @returns {Promise<JobStatusResponse>} The response containing the job status.
|
|
136
|
+
*/
|
|
137
|
+
async checkCrawlStatus(jobId: string): Promise<JobStatusResponse> {
|
|
74
138
|
const headers: AxiosRequestHeaders = this.prepareHeaders();
|
|
75
139
|
try {
|
|
76
140
|
const response: AxiosResponse = await this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
|
|
@@ -82,8 +146,13 @@ export default class FirecrawlApp {
|
|
|
82
146
|
} catch (error: any) {
|
|
83
147
|
throw new Error(error.message);
|
|
84
148
|
}
|
|
149
|
+
return { success: false, status: 'unknown', error: 'Internal server error.' };
|
|
85
150
|
}
|
|
86
151
|
|
|
152
|
+
/**
|
|
153
|
+
* Prepares the headers for an API request.
|
|
154
|
+
* @returns {AxiosRequestHeaders} The prepared headers.
|
|
155
|
+
*/
|
|
87
156
|
prepareHeaders(): AxiosRequestHeaders {
|
|
88
157
|
return {
|
|
89
158
|
'Content-Type': 'application/json',
|
|
@@ -91,14 +160,34 @@ export default class FirecrawlApp {
|
|
|
91
160
|
} as AxiosRequestHeaders;
|
|
92
161
|
}
|
|
93
162
|
|
|
163
|
+
/**
|
|
164
|
+
* Sends a POST request to the specified URL.
|
|
165
|
+
* @param {string} url - The URL to send the request to.
|
|
166
|
+
* @param {Params} data - The data to send in the request.
|
|
167
|
+
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
|
168
|
+
* @returns {Promise<AxiosResponse>} The response from the POST request.
|
|
169
|
+
*/
|
|
94
170
|
postRequest(url: string, data: Params, headers: AxiosRequestHeaders): Promise<AxiosResponse> {
|
|
95
171
|
return axios.post(url, data, { headers });
|
|
96
172
|
}
|
|
97
173
|
|
|
174
|
+
/**
|
|
175
|
+
* Sends a GET request to the specified URL.
|
|
176
|
+
* @param {string} url - The URL to send the request to.
|
|
177
|
+
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
|
178
|
+
* @returns {Promise<AxiosResponse>} The response from the GET request.
|
|
179
|
+
*/
|
|
98
180
|
getRequest(url: string, headers: AxiosRequestHeaders): Promise<AxiosResponse> {
|
|
99
181
|
return axios.get(url, { headers });
|
|
100
182
|
}
|
|
101
183
|
|
|
184
|
+
/**
|
|
185
|
+
* Monitors the status of a crawl job until completion or failure.
|
|
186
|
+
* @param {string} jobId - The job ID of the crawl operation.
|
|
187
|
+
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
|
188
|
+
* @param {number} timeout - Timeout in seconds for job status checks.
|
|
189
|
+
* @returns {Promise<any>} The final job status or data.
|
|
190
|
+
*/
|
|
102
191
|
async monitorJobStatus(jobId: string, headers: AxiosRequestHeaders, timeout: number): Promise<any> {
|
|
103
192
|
while (true) {
|
|
104
193
|
const statusResponse: AxiosResponse = await this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
|
|
@@ -124,6 +213,11 @@ export default class FirecrawlApp {
|
|
|
124
213
|
}
|
|
125
214
|
}
|
|
126
215
|
|
|
216
|
+
/**
|
|
217
|
+
* Handles errors from API responses.
|
|
218
|
+
* @param {AxiosResponse} response - The response from the API.
|
|
219
|
+
* @param {string} action - The action being performed when the error occurred.
|
|
220
|
+
*/
|
|
127
221
|
handleError(response: AxiosResponse, action: string): void {
|
|
128
222
|
if ([402, 409, 500].includes(response.status)) {
|
|
129
223
|
const errorMessage: string = response.data.error || 'Unknown error occurred';
|