@mendable/firecrawl-js 0.0.21 → 0.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example ADDED
@@ -0,0 +1,3 @@
1
+ API_URL=http://localhost:3002
2
+ TEST_API_KEY=fc-YOUR_API_KEY
3
+
package/build/index.js CHANGED
@@ -19,6 +19,7 @@ export default class FirecrawlApp {
19
19
  * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
20
20
  */
21
21
  constructor({ apiKey = null }) {
22
+ this.apiUrl = "https://api.firecrawl.dev";
22
23
  this.apiKey = apiKey || "";
23
24
  if (!this.apiKey) {
24
25
  throw new Error("No API key provided");
@@ -47,7 +48,7 @@ export default class FirecrawlApp {
47
48
  jsonData = Object.assign(Object.assign({}, jsonData), { extractorOptions: Object.assign(Object.assign({}, params.extractorOptions), { extractionSchema: schema, mode: params.extractorOptions.mode || "llm-extraction" }) });
48
49
  }
49
50
  try {
50
- const response = yield axios.post("https://api.firecrawl.dev/v0/scrape", jsonData, { headers });
51
+ const response = yield axios.post(this.apiUrl + "/v0/scrape", jsonData, { headers });
51
52
  if (response.status === 200) {
52
53
  const responseData = response.data;
53
54
  if (responseData.success) {
@@ -84,7 +85,7 @@ export default class FirecrawlApp {
84
85
  jsonData = Object.assign(Object.assign({}, jsonData), params);
85
86
  }
86
87
  try {
87
- const response = yield axios.post("https://api.firecrawl.dev/v0/search", jsonData, { headers });
88
+ const response = yield axios.post(this.apiUrl + "/v0/search", jsonData, { headers });
88
89
  if (response.status === 200) {
89
90
  const responseData = response.data;
90
91
  if (responseData.success) {
@@ -110,17 +111,18 @@ export default class FirecrawlApp {
110
111
  * @param {Params | null} params - Additional parameters for the crawl request.
111
112
  * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
112
113
  * @param {number} timeout - Timeout in seconds for job status checks.
114
+ * @param {string} idempotencyKey - Optional idempotency key for the request.
113
115
  * @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
114
116
  */
115
117
  crawlUrl(url_1) {
116
- return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, timeout = 2) {
117
- const headers = this.prepareHeaders();
118
+ return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, timeout = 2, idempotencyKey) {
119
+ const headers = this.prepareHeaders(idempotencyKey);
118
120
  let jsonData = { url };
119
121
  if (params) {
120
122
  jsonData = Object.assign(Object.assign({}, jsonData), params);
121
123
  }
122
124
  try {
123
- const response = yield this.postRequest("https://api.firecrawl.dev/v0/crawl", jsonData, headers);
125
+ const response = yield this.postRequest(this.apiUrl + "/v0/crawl", jsonData, headers);
124
126
  if (response.status === 200) {
125
127
  const jobId = response.data.jobId;
126
128
  if (waitUntilDone) {
@@ -150,9 +152,14 @@ export default class FirecrawlApp {
150
152
  return __awaiter(this, void 0, void 0, function* () {
151
153
  const headers = this.prepareHeaders();
152
154
  try {
153
- const response = yield this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
155
+ const response = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
154
156
  if (response.status === 200) {
155
- return response.data;
157
+ return {
158
+ success: true,
159
+ status: response.data.status,
160
+ data: response.data.data,
161
+ partial_data: !response.data.data ? response.data.partial_data : undefined,
162
+ };
156
163
  }
157
164
  else {
158
165
  this.handleError(response, "check crawl status");
@@ -172,11 +179,8 @@ export default class FirecrawlApp {
172
179
  * Prepares the headers for an API request.
173
180
  * @returns {AxiosRequestHeaders} The prepared headers.
174
181
  */
175
- prepareHeaders() {
176
- return {
177
- "Content-Type": "application/json",
178
- Authorization: `Bearer ${this.apiKey}`,
179
- };
182
+ prepareHeaders(idempotencyKey) {
183
+ return Object.assign({ 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.apiKey}` }, (idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {}));
180
184
  }
181
185
  /**
182
186
  * Sends a POST request to the specified URL.
@@ -207,7 +211,7 @@ export default class FirecrawlApp {
207
211
  monitorJobStatus(jobId, headers, timeout) {
208
212
  return __awaiter(this, void 0, void 0, function* () {
209
213
  while (true) {
210
- const statusResponse = yield this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
214
+ const statusResponse = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
211
215
  if (statusResponse.status === 200) {
212
216
  const statusData = statusResponse.data;
213
217
  if (statusData.status === "completed") {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mendable/firecrawl-js",
3
- "version": "0.0.21",
3
+ "version": "0.0.23",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
5
  "main": "build/index.js",
6
6
  "types": "types/index.d.ts",
@@ -9,7 +9,7 @@
9
9
  "build": "tsc",
10
10
  "publish": "npm run build && npm publish --access public",
11
11
  "publish-beta": "npm run build && npm publish --access public --tag beta",
12
- "test": "jest src/**/*.test.ts"
12
+ "test": "jest src/__tests__/**/*.test.ts"
13
13
  },
14
14
  "repository": {
15
15
  "type": "git",
@@ -19,6 +19,8 @@
19
19
  "license": "MIT",
20
20
  "dependencies": {
21
21
  "axios": "^1.6.8",
22
+ "dotenv": "^16.4.5",
23
+ "uuid": "^9.0.1",
22
24
  "zod": "^3.23.8",
23
25
  "zod-to-json-schema": "^3.23.0"
24
26
  },
@@ -29,7 +31,10 @@
29
31
  "devDependencies": {
30
32
  "@jest/globals": "^29.7.0",
31
33
  "@types/axios": "^0.14.0",
32
- "@types/node": "^20.12.7",
34
+ "@types/dotenv": "^8.2.0",
35
+ "@types/jest": "^29.5.12",
36
+ "@types/node": "^20.12.12",
37
+ "@types/uuid": "^9.0.8",
33
38
  "jest": "^29.7.0",
34
39
  "ts-jest": "^29.1.2",
35
40
  "typescript": "^5.4.5"
@@ -0,0 +1,155 @@
1
+ import FirecrawlApp from '../../index';
2
+ import { v4 as uuidv4 } from 'uuid';
3
+ import dotenv from 'dotenv';
4
+
5
+ dotenv.config();
6
+
7
+ const TEST_API_KEY = process.env.TEST_API_KEY;
8
+ const API_URL = process.env.API_URL;
9
+
10
+ describe('FirecrawlApp E2E Tests', () => {
11
+ test('should throw error for no API key', () => {
12
+ expect(() => {
13
+ new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
14
+ }).toThrow("No API key provided");
15
+ });
16
+
17
+ test('should throw error for invalid API key on scrape', async () => {
18
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
19
+ await expect(invalidApp.scrapeUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401");
20
+ });
21
+
22
+ test('should throw error for blocklisted URL on scrape', async () => {
23
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
24
+ const blocklistedUrl = "https://facebook.com/fake-test";
25
+ await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
26
+ });
27
+
28
+ test('should return successful response with valid preview token', async () => {
29
+ const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
30
+ const response = await app.scrapeUrl('https://firecrawl.dev');
31
+ expect(response).not.toBeNull();
32
+ expect(response.data.content).toContain("🔥 Firecrawl");
33
+ }, 30000); // 30 seconds timeout
34
+
35
+ test('should return successful response for valid scrape', async () => {
36
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
37
+ const response = await app.scrapeUrl('https://firecrawl.dev');
38
+ expect(response).not.toBeNull();
39
+ expect(response.data.content).toContain("🔥 Firecrawl");
40
+ expect(response.data).toHaveProperty('markdown');
41
+ expect(response.data).toHaveProperty('metadata');
42
+ expect(response.data).not.toHaveProperty('html');
43
+ }, 30000); // 30 seconds timeout
44
+
45
+ test('should return successful response with valid API key and include HTML', async () => {
46
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
47
+ const response = await app.scrapeUrl('https://firecrawl.dev', { pageOptions: { includeHtml: true } });
48
+ expect(response).not.toBeNull();
49
+ expect(response.data.content).toContain("🔥 Firecrawl");
50
+ expect(response.data.markdown).toContain("🔥 Firecrawl");
51
+ expect(response.data.html).toContain("<h1");
52
+ }, 30000); // 30 seconds timeout
53
+
54
+ test('should return successful response for valid scrape with PDF file', async () => {
55
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
56
+ const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
57
+ expect(response).not.toBeNull();
58
+ expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
59
+ }, 30000); // 30 seconds timeout
60
+
61
+ test('should return successful response for valid scrape with PDF file without explicit extension', async () => {
62
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
63
+ const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
64
+ expect(response).not.toBeNull();
65
+ expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
66
+ }, 30000); // 30 seconds timeout
67
+
68
+ test('should throw error for invalid API key on crawl', async () => {
69
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
70
+ await expect(invalidApp.crawlUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401");
71
+ });
72
+
73
+ test('should throw error for blocklisted URL on crawl', async () => {
74
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
75
+ const blocklistedUrl = "https://twitter.com/fake-test";
76
+ await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
77
+ });
78
+
79
+ test('should return successful response for crawl and wait for completion', async () => {
80
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
81
+ const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30);
82
+ expect(response).not.toBeNull();
83
+ expect(response[0].content).toContain("🔥 Firecrawl");
84
+ }, 60000); // 60 seconds timeout
85
+
86
+ test('should handle idempotency key for crawl', async () => {
87
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
88
+ const uniqueIdempotencyKey = uuidv4();
89
+ const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey);
90
+ expect(response).not.toBeNull();
91
+ expect(response.jobId).toBeDefined();
92
+
93
+ await expect(app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
94
+ });
95
+
96
+ test('should check crawl status', async () => {
97
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
98
+ const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false);
99
+ expect(response).not.toBeNull();
100
+ expect(response.jobId).toBeDefined();
101
+
102
+ let statusResponse = await app.checkCrawlStatus(response.jobId);
103
+ const maxChecks = 15;
104
+ let checks = 0;
105
+
106
+ while (statusResponse.status === 'active' && checks < maxChecks) {
107
+ await new Promise(resolve => setTimeout(resolve, 1000));
108
+ expect(statusResponse.partial_data).not.toBeNull();
109
+ statusResponse = await app.checkCrawlStatus(response.jobId);
110
+ checks++;
111
+ }
112
+
113
+ expect(statusResponse).not.toBeNull();
114
+ expect(statusResponse.status).toBe('completed');
115
+ expect(statusResponse.data.length).toBeGreaterThan(0);
116
+ }, 35000); // 35 seconds timeout
117
+
118
+ test('should return successful response for search', async () => {
119
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
120
+ const response = await app.search("test query");
121
+ expect(response).not.toBeNull();
122
+ expect(response.data[0].content).toBeDefined();
123
+ expect(response.data.length).toBeGreaterThan(2);
124
+ }, 30000); // 30 seconds timeout
125
+
126
+ test('should throw error for invalid API key on search', async () => {
127
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
128
+ await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401");
129
+ });
130
+
131
+ test('should perform LLM extraction', async () => {
132
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
133
+ const response = await app.scrapeUrl("https://mendable.ai", {
134
+ extractorOptions: {
135
+ mode: 'llm-extraction',
136
+ extractionPrompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
137
+ extractionSchema: {
138
+ type: 'object',
139
+ properties: {
140
+ company_mission: { type: 'string' },
141
+ supports_sso: { type: 'boolean' },
142
+ is_open_source: { type: 'boolean' }
143
+ },
144
+ required: ['company_mission', 'supports_sso', 'is_open_source']
145
+ }
146
+ }
147
+ });
148
+ expect(response).not.toBeNull();
149
+ expect(response.data.llm_extraction).toBeDefined();
150
+ const llmExtraction = response.data.llm_extraction;
151
+ expect(llmExtraction.company_mission).toBeDefined();
152
+ expect(typeof llmExtraction.supports_sso).toBe('boolean');
153
+ expect(typeof llmExtraction.is_open_source).toBe('boolean');
154
+ }, 30000); // 30 seconds timeout
155
+ });
package/src/index.ts CHANGED
@@ -6,6 +6,7 @@ import { zodToJsonSchema } from "zod-to-json-schema";
6
6
  */
7
7
  export interface FirecrawlAppConfig {
8
8
  apiKey?: string | null;
9
+ apiUrl?: string | null;
9
10
  }
10
11
 
11
12
  /**
@@ -55,6 +56,7 @@ export interface JobStatusResponse {
55
56
  status: string;
56
57
  jobId?: string;
57
58
  data?: any;
59
+ partial_data?: any,
58
60
  error?: string;
59
61
  }
60
62
 
@@ -63,6 +65,7 @@ export interface JobStatusResponse {
63
65
  */
64
66
  export default class FirecrawlApp {
65
67
  private apiKey: string;
68
+ private apiUrl: string = "https://api.firecrawl.dev";
66
69
 
67
70
  /**
68
71
  * Initializes a new instance of the FirecrawlApp class.
@@ -107,7 +110,7 @@ export default class FirecrawlApp {
107
110
  }
108
111
  try {
109
112
  const response: AxiosResponse = await axios.post(
110
- "https://api.firecrawl.dev/v0/scrape",
113
+ this.apiUrl + "/v0/scrape",
111
114
  jsonData,
112
115
  { headers },
113
116
  );
@@ -147,7 +150,7 @@ export default class FirecrawlApp {
147
150
  }
148
151
  try {
149
152
  const response: AxiosResponse = await axios.post(
150
- "https://api.firecrawl.dev/v0/search",
153
+ this.apiUrl + "/v0/search",
151
154
  jsonData,
152
155
  { headers }
153
156
  );
@@ -173,22 +176,24 @@ export default class FirecrawlApp {
173
176
  * @param {Params | null} params - Additional parameters for the crawl request.
174
177
  * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
175
178
  * @param {number} timeout - Timeout in seconds for job status checks.
179
+ * @param {string} idempotencyKey - Optional idempotency key for the request.
176
180
  * @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
177
181
  */
178
182
  async crawlUrl(
179
183
  url: string,
180
184
  params: Params | null = null,
181
185
  waitUntilDone: boolean = true,
182
- timeout: number = 2
186
+ timeout: number = 2,
187
+ idempotencyKey?: string
183
188
  ): Promise<CrawlResponse | any> {
184
- const headers = this.prepareHeaders();
189
+ const headers = this.prepareHeaders(idempotencyKey);
185
190
  let jsonData: Params = { url };
186
191
  if (params) {
187
192
  jsonData = { ...jsonData, ...params };
188
193
  }
189
194
  try {
190
195
  const response: AxiosResponse = await this.postRequest(
191
- "https://api.firecrawl.dev/v0/crawl",
196
+ this.apiUrl + "/v0/crawl",
192
197
  jsonData,
193
198
  headers
194
199
  );
@@ -218,11 +223,16 @@ export default class FirecrawlApp {
218
223
  const headers: AxiosRequestHeaders = this.prepareHeaders();
219
224
  try {
220
225
  const response: AxiosResponse = await this.getRequest(
221
- `https://api.firecrawl.dev/v0/crawl/status/${jobId}`,
226
+ this.apiUrl + `/v0/crawl/status/${jobId}`,
222
227
  headers
223
228
  );
224
229
  if (response.status === 200) {
225
- return response.data;
230
+ return {
231
+ success: true,
232
+ status: response.data.status,
233
+ data: response.data.data,
234
+ partial_data: !response.data.data ? response.data.partial_data : undefined,
235
+ };
226
236
  } else {
227
237
  this.handleError(response, "check crawl status");
228
238
  }
@@ -240,11 +250,12 @@ export default class FirecrawlApp {
240
250
  * Prepares the headers for an API request.
241
251
  * @returns {AxiosRequestHeaders} The prepared headers.
242
252
  */
243
- prepareHeaders(): AxiosRequestHeaders {
253
+ prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders {
244
254
  return {
245
- "Content-Type": "application/json",
246
- Authorization: `Bearer ${this.apiKey}`,
247
- } as AxiosRequestHeaders;
255
+ 'Content-Type': 'application/json',
256
+ 'Authorization': `Bearer ${this.apiKey}`,
257
+ ...(idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {}),
258
+ } as AxiosRequestHeaders & { 'x-idempotency-key'?: string };
248
259
  }
249
260
 
250
261
  /**
@@ -289,7 +300,7 @@ export default class FirecrawlApp {
289
300
  ): Promise<any> {
290
301
  while (true) {
291
302
  const statusResponse: AxiosResponse = await this.getRequest(
292
- `https://api.firecrawl.dev/v0/crawl/status/${jobId}`,
303
+ this.apiUrl + `/v0/crawl/status/${jobId}`,
293
304
  headers
294
305
  );
295
306
  if (statusResponse.status === 200) {
package/types/index.d.ts CHANGED
@@ -5,6 +5,7 @@ import { z } from "zod";
5
5
  */
6
6
  export interface FirecrawlAppConfig {
7
7
  apiKey?: string | null;
8
+ apiUrl?: string | null;
8
9
  }
9
10
  /**
10
11
  * Generic parameter interface.
@@ -50,6 +51,7 @@ export interface JobStatusResponse {
50
51
  status: string;
51
52
  jobId?: string;
52
53
  data?: any;
54
+ partial_data?: any;
53
55
  error?: string;
54
56
  }
55
57
  /**
@@ -57,6 +59,7 @@ export interface JobStatusResponse {
57
59
  */
58
60
  export default class FirecrawlApp {
59
61
  private apiKey;
62
+ private apiUrl;
60
63
  /**
61
64
  * Initializes a new instance of the FirecrawlApp class.
62
65
  * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
@@ -82,9 +85,10 @@ export default class FirecrawlApp {
82
85
  * @param {Params | null} params - Additional parameters for the crawl request.
83
86
  * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
84
87
  * @param {number} timeout - Timeout in seconds for job status checks.
88
+ * @param {string} idempotencyKey - Optional idempotency key for the request.
85
89
  * @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
86
90
  */
87
- crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, timeout?: number): Promise<CrawlResponse | any>;
91
+ crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, timeout?: number, idempotencyKey?: string): Promise<CrawlResponse | any>;
88
92
  /**
89
93
  * Checks the status of a crawl job using the Firecrawl API.
90
94
  * @param {string} jobId - The job ID of the crawl operation.
@@ -95,7 +99,7 @@ export default class FirecrawlApp {
95
99
  * Prepares the headers for an API request.
96
100
  * @returns {AxiosRequestHeaders} The prepared headers.
97
101
  */
98
- prepareHeaders(): AxiosRequestHeaders;
102
+ prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders;
99
103
  /**
100
104
  * Sends a POST request to the specified URL.
101
105
  * @param {string} url - The URL to send the request to.