@mendable/firecrawl-js 0.0.26 → 0.0.29-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -176,6 +176,11 @@ async function checkStatusExample(jobId) {
176
176
  checkStatusExample('your_job_id_here');
177
177
  ```
178
178
 
179
+ ## Running Locally
180
+ To use the SDK when running Firecrawl locally, you can change the initial Firecrawl app instance to:
181
+ ```js
182
+ const app = new FirecrawlApp({ apiKey: "YOUR_API_KEY", apiUrl: "http://localhost:3002" });
183
+ ```
179
184
 
180
185
  ## Error Handling
181
186
 
package/build/index.js CHANGED
@@ -1,12 +1,3 @@
1
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
- return new (P || (P = Promise))(function (resolve, reject) {
4
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
- step((generator = generator.apply(thisArg, _arguments || [])).next());
8
- });
9
- };
10
1
  import axios from "axios";
11
2
  import { z } from "zod";
12
3
  import { zodToJsonSchema } from "zod-to-json-schema";
@@ -18,9 +9,9 @@ export default class FirecrawlApp {
18
9
  * Initializes a new instance of the FirecrawlApp class.
19
10
  * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
20
11
  */
21
- constructor({ apiKey = null }) {
22
- this.apiUrl = "https://api.firecrawl.dev";
12
+ constructor({ apiKey = null, apiUrl = null }) {
23
13
  this.apiKey = apiKey || "";
14
+ this.apiUrl = apiUrl || "https://api.firecrawl.dev";
24
15
  if (!this.apiKey) {
25
16
  throw new Error("No API key provided");
26
17
  }
@@ -31,42 +22,46 @@ export default class FirecrawlApp {
31
22
  * @param {Params | null} params - Additional parameters for the scrape request.
32
23
  * @returns {Promise<ScrapeResponse>} The response from the scrape operation.
33
24
  */
34
- scrapeUrl(url_1) {
35
- return __awaiter(this, arguments, void 0, function* (url, params = null) {
36
- var _a;
37
- const headers = {
38
- "Content-Type": "application/json",
39
- Authorization: `Bearer ${this.apiKey}`,
40
- };
41
- let jsonData = Object.assign({ url }, params);
42
- if ((_a = params === null || params === void 0 ? void 0 : params.extractorOptions) === null || _a === void 0 ? void 0 : _a.extractionSchema) {
43
- let schema = params.extractorOptions.extractionSchema;
44
- // Check if schema is an instance of ZodSchema to correctly identify Zod schemas
45
- if (schema instanceof z.ZodSchema) {
46
- schema = zodToJsonSchema(schema);
47
- }
48
- jsonData = Object.assign(Object.assign({}, jsonData), { extractorOptions: Object.assign(Object.assign({}, params.extractorOptions), { extractionSchema: schema, mode: params.extractorOptions.mode || "llm-extraction" }) });
25
+ async scrapeUrl(url, params = null) {
26
+ const headers = {
27
+ "Content-Type": "application/json",
28
+ Authorization: `Bearer ${this.apiKey}`,
29
+ };
30
+ let jsonData = { url, ...params };
31
+ if (params?.extractorOptions?.extractionSchema) {
32
+ let schema = params.extractorOptions.extractionSchema;
33
+ // Check if schema is an instance of ZodSchema to correctly identify Zod schemas
34
+ if (schema instanceof z.ZodSchema) {
35
+ schema = zodToJsonSchema(schema);
49
36
  }
50
- try {
51
- const response = yield axios.post(this.apiUrl + "/v0/scrape", jsonData, { headers });
52
- if (response.status === 200) {
53
- const responseData = response.data;
54
- if (responseData.success) {
55
- return responseData;
56
- }
57
- else {
58
- throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
59
- }
37
+ jsonData = {
38
+ ...jsonData,
39
+ extractorOptions: {
40
+ ...params.extractorOptions,
41
+ extractionSchema: schema,
42
+ mode: params.extractorOptions.mode || "llm-extraction",
43
+ },
44
+ };
45
+ }
46
+ try {
47
+ const response = await axios.post(this.apiUrl + "/v0/scrape", jsonData, { headers });
48
+ if (response.status === 200) {
49
+ const responseData = response.data;
50
+ if (responseData.success) {
51
+ return responseData;
60
52
  }
61
53
  else {
62
- this.handleError(response, "scrape URL");
54
+ throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
63
55
  }
64
56
  }
65
- catch (error) {
66
- throw new Error(error.message);
57
+ else {
58
+ this.handleError(response, "scrape URL");
67
59
  }
68
- return { success: false, error: "Internal server error." };
69
- });
60
+ }
61
+ catch (error) {
62
+ throw new Error(error.message);
63
+ }
64
+ return { success: false, error: "Internal server error." };
70
65
  }
71
66
  /**
72
67
  * Searches for a query using the Firecrawl API.
@@ -74,36 +69,34 @@ export default class FirecrawlApp {
74
69
  * @param {Params | null} params - Additional parameters for the search request.
75
70
  * @returns {Promise<SearchResponse>} The response from the search operation.
76
71
  */
77
- search(query_1) {
78
- return __awaiter(this, arguments, void 0, function* (query, params = null) {
79
- const headers = {
80
- "Content-Type": "application/json",
81
- Authorization: `Bearer ${this.apiKey}`,
82
- };
83
- let jsonData = { query };
84
- if (params) {
85
- jsonData = Object.assign(Object.assign({}, jsonData), params);
86
- }
87
- try {
88
- const response = yield axios.post(this.apiUrl + "/v0/search", jsonData, { headers });
89
- if (response.status === 200) {
90
- const responseData = response.data;
91
- if (responseData.success) {
92
- return responseData;
93
- }
94
- else {
95
- throw new Error(`Failed to search. Error: ${responseData.error}`);
96
- }
72
+ async search(query, params = null) {
73
+ const headers = {
74
+ "Content-Type": "application/json",
75
+ Authorization: `Bearer ${this.apiKey}`,
76
+ };
77
+ let jsonData = { query };
78
+ if (params) {
79
+ jsonData = { ...jsonData, ...params };
80
+ }
81
+ try {
82
+ const response = await axios.post(this.apiUrl + "/v0/search", jsonData, { headers });
83
+ if (response.status === 200) {
84
+ const responseData = response.data;
85
+ if (responseData.success) {
86
+ return responseData;
97
87
  }
98
88
  else {
99
- this.handleError(response, "search");
89
+ throw new Error(`Failed to search. Error: ${responseData.error}`);
100
90
  }
101
91
  }
102
- catch (error) {
103
- throw new Error(error.message);
92
+ else {
93
+ this.handleError(response, "search");
104
94
  }
105
- return { success: false, error: "Internal server error." };
106
- });
95
+ }
96
+ catch (error) {
97
+ throw new Error(error.message);
98
+ }
99
+ return { success: false, error: "Internal server error." };
107
100
  }
108
101
  /**
109
102
  * Initiates a crawl job for a URL using the Firecrawl API.
@@ -114,73 +107,75 @@ export default class FirecrawlApp {
114
107
  * @param {string} idempotencyKey - Optional idempotency key for the request.
115
108
  * @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
116
109
  */
117
- crawlUrl(url_1) {
118
- return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
119
- const headers = this.prepareHeaders(idempotencyKey);
120
- let jsonData = { url };
121
- if (params) {
122
- jsonData = Object.assign(Object.assign({}, jsonData), params);
123
- }
124
- try {
125
- const response = yield this.postRequest(this.apiUrl + "/v0/crawl", jsonData, headers);
126
- if (response.status === 200) {
127
- const jobId = response.data.jobId;
128
- if (waitUntilDone) {
129
- return this.monitorJobStatus(jobId, headers, pollInterval);
130
- }
131
- else {
132
- return { success: true, jobId };
133
- }
110
+ async crawlUrl(url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
111
+ const headers = this.prepareHeaders(idempotencyKey);
112
+ let jsonData = { url };
113
+ if (params) {
114
+ jsonData = { ...jsonData, ...params };
115
+ }
116
+ try {
117
+ const response = await this.postRequest(this.apiUrl + "/v0/crawl", jsonData, headers);
118
+ if (response.status === 200) {
119
+ const jobId = response.data.jobId;
120
+ if (waitUntilDone) {
121
+ return this.monitorJobStatus(jobId, headers, pollInterval);
134
122
  }
135
123
  else {
136
- this.handleError(response, "start crawl job");
124
+ return { success: true, jobId };
137
125
  }
138
126
  }
139
- catch (error) {
140
- console.log(error);
141
- throw new Error(error.message);
127
+ else {
128
+ this.handleError(response, "start crawl job");
142
129
  }
143
- return { success: false, error: "Internal server error." };
144
- });
130
+ }
131
+ catch (error) {
132
+ console.log(error);
133
+ throw new Error(error.message);
134
+ }
135
+ return { success: false, error: "Internal server error." };
145
136
  }
146
137
  /**
147
138
  * Checks the status of a crawl job using the Firecrawl API.
148
139
  * @param {string} jobId - The job ID of the crawl operation.
149
140
  * @returns {Promise<JobStatusResponse>} The response containing the job status.
150
141
  */
151
- checkCrawlStatus(jobId) {
152
- return __awaiter(this, void 0, void 0, function* () {
153
- const headers = this.prepareHeaders();
154
- try {
155
- const response = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
156
- if (response.status === 200) {
157
- return {
158
- success: true,
159
- status: response.data.status,
160
- data: response.data.data,
161
- partial_data: !response.data.data ? response.data.partial_data : undefined,
162
- };
163
- }
164
- else {
165
- this.handleError(response, "check crawl status");
166
- }
142
+ async checkCrawlStatus(jobId) {
143
+ const headers = this.prepareHeaders();
144
+ try {
145
+ const response = await this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
146
+ if (response.status === 200) {
147
+ return {
148
+ success: true,
149
+ status: response.data.status,
150
+ data: response.data.data,
151
+ partial_data: !response.data.data
152
+ ? response.data.partial_data
153
+ : undefined,
154
+ };
167
155
  }
168
- catch (error) {
169
- throw new Error(error.message);
156
+ else {
157
+ this.handleError(response, "check crawl status");
170
158
  }
171
- return {
172
- success: false,
173
- status: "unknown",
174
- error: "Internal server error.",
175
- };
176
- });
159
+ }
160
+ catch (error) {
161
+ throw new Error(error.message);
162
+ }
163
+ return {
164
+ success: false,
165
+ status: "unknown",
166
+ error: "Internal server error.",
167
+ };
177
168
  }
178
169
  /**
179
170
  * Prepares the headers for an API request.
180
171
  * @returns {AxiosRequestHeaders} The prepared headers.
181
172
  */
182
173
  prepareHeaders(idempotencyKey) {
183
- return Object.assign({ 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.apiKey}` }, (idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {}));
174
+ return {
175
+ "Content-Type": "application/json",
176
+ Authorization: `Bearer ${this.apiKey}`,
177
+ ...(idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}),
178
+ };
184
179
  }
185
180
  /**
186
181
  * Sends a POST request to the specified URL.
@@ -208,35 +203,33 @@ export default class FirecrawlApp {
208
203
  * @param {number} timeout - Timeout in seconds for job status checks.
209
204
  * @returns {Promise<any>} The final job status or data.
210
205
  */
211
- monitorJobStatus(jobId, headers, checkInterval) {
212
- return __awaiter(this, void 0, void 0, function* () {
213
- while (true) {
214
- const statusResponse = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
215
- if (statusResponse.status === 200) {
216
- const statusData = statusResponse.data;
217
- if (statusData.status === "completed") {
218
- if ("data" in statusData) {
219
- return statusData.data;
220
- }
221
- else {
222
- throw new Error("Crawl job completed but no data was returned");
223
- }
224
- }
225
- else if (["active", "paused", "pending", "queued"].includes(statusData.status)) {
226
- if (checkInterval < 2) {
227
- checkInterval = 2;
228
- }
229
- yield new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again
206
+ async monitorJobStatus(jobId, headers, checkInterval) {
207
+ while (true) {
208
+ const statusResponse = await this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
209
+ if (statusResponse.status === 200) {
210
+ const statusData = statusResponse.data;
211
+ if (statusData.status === "completed") {
212
+ if ("data" in statusData) {
213
+ return statusData.data;
230
214
  }
231
215
  else {
232
- throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
216
+ throw new Error("Crawl job completed but no data was returned");
233
217
  }
234
218
  }
219
+ else if (["active", "paused", "pending", "queued"].includes(statusData.status)) {
220
+ if (checkInterval < 2) {
221
+ checkInterval = 2;
222
+ }
223
+ await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again
224
+ }
235
225
  else {
236
- this.handleError(statusResponse, "check crawl status");
226
+ throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
237
227
  }
238
228
  }
239
- });
229
+ else {
230
+ this.handleError(statusResponse, "check crawl status");
231
+ }
232
+ }
240
233
  }
241
234
  /**
242
235
  * Handles errors from API responses.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mendable/firecrawl-js",
3
- "version": "0.0.26",
3
+ "version": "0.0.29-beta.1",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
5
  "main": "build/index.js",
6
6
  "types": "types/index.d.ts",
@@ -33,6 +33,7 @@
33
33
  "@types/axios": "^0.14.0",
34
34
  "@types/dotenv": "^8.2.0",
35
35
  "@types/jest": "^29.5.12",
36
+ "@types/mocha": "^10.0.6",
36
37
  "@types/node": "^20.12.12",
37
38
  "@types/uuid": "^9.0.8",
38
39
  "jest": "^29.7.0",
@@ -2,6 +2,7 @@ import FirecrawlApp from '../../index';
2
2
  import { v4 as uuidv4 } from 'uuid';
3
3
  import dotenv from 'dotenv';
4
4
 
5
+
5
6
  dotenv.config();
6
7
 
7
8
  const TEST_API_KEY = process.env.TEST_API_KEY;
@@ -29,14 +30,14 @@ describe('FirecrawlApp E2E Tests', () => {
29
30
  const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
30
31
  const response = await app.scrapeUrl('https://roastmywebsite.ai');
31
32
  expect(response).not.toBeNull();
32
- expect(response.data.content).toContain("_Roast_");
33
+ expect(response.data?.content).toContain("_Roast_");
33
34
  }, 30000); // 30 seconds timeout
34
35
 
35
36
  test.concurrent('should return successful response for valid scrape', async () => {
36
37
  const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
37
38
  const response = await app.scrapeUrl('https://roastmywebsite.ai');
38
39
  expect(response).not.toBeNull();
39
- expect(response.data.content).toContain("_Roast_");
40
+ expect(response.data?.content).toContain("_Roast_");
40
41
  expect(response.data).toHaveProperty('markdown');
41
42
  expect(response.data).toHaveProperty('metadata');
42
43
  expect(response.data).not.toHaveProperty('html');
@@ -46,23 +47,23 @@ describe('FirecrawlApp E2E Tests', () => {
46
47
  const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
47
48
  const response = await app.scrapeUrl('https://roastmywebsite.ai', { pageOptions: { includeHtml: true } });
48
49
  expect(response).not.toBeNull();
49
- expect(response.data.content).toContain("_Roast_");
50
- expect(response.data.markdown).toContain("_Roast_");
51
- expect(response.data.html).toContain("<h1");
50
+ expect(response.data?.content).toContain("_Roast_");
51
+ expect(response.data?.markdown).toContain("_Roast_");
52
+ expect(response.data?.html).toContain("<h1");
52
53
  }, 30000); // 30 seconds timeout
53
54
 
54
55
  test.concurrent('should return successful response for valid scrape with PDF file', async () => {
55
56
  const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
56
57
  const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
57
58
  expect(response).not.toBeNull();
58
- expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
59
+ expect(response.data?.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
59
60
  }, 30000); // 30 seconds timeout
60
61
 
61
62
  test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => {
62
63
  const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
63
64
  const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
64
65
  expect(response).not.toBeNull();
65
- expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
66
+ expect(response.data?.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
66
67
  }, 30000); // 30 seconds timeout
67
68
 
68
69
  test.concurrent('should throw error for invalid API key on crawl', async () => {
@@ -112,15 +113,15 @@ describe('FirecrawlApp E2E Tests', () => {
112
113
 
113
114
  expect(statusResponse).not.toBeNull();
114
115
  expect(statusResponse.status).toBe('completed');
115
- expect(statusResponse.data.length).toBeGreaterThan(0);
116
+ expect(statusResponse?.data?.length).toBeGreaterThan(0);
116
117
  }, 35000); // 35 seconds timeout
117
118
 
118
119
  test.concurrent('should return successful response for search', async () => {
119
120
  const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
120
121
  const response = await app.search("test query");
121
122
  expect(response).not.toBeNull();
122
- expect(response.data[0].content).toBeDefined();
123
- expect(response.data.length).toBeGreaterThan(2);
123
+ expect(response?.data?.[0]?.content).toBeDefined();
124
+ expect(response?.data?.length).toBeGreaterThan(2);
124
125
  }, 30000); // 30 seconds timeout
125
126
 
126
127
  test.concurrent('should throw error for invalid API key on search', async () => {
@@ -146,10 +147,10 @@ describe('FirecrawlApp E2E Tests', () => {
146
147
  }
147
148
  });
148
149
  expect(response).not.toBeNull();
149
- expect(response.data.llm_extraction).toBeDefined();
150
- const llmExtraction = response.data.llm_extraction;
151
- expect(llmExtraction.company_mission).toBeDefined();
152
- expect(typeof llmExtraction.supports_sso).toBe('boolean');
153
- expect(typeof llmExtraction.is_open_source).toBe('boolean');
150
+ expect(response.data?.llm_extraction).toBeDefined();
151
+ const llmExtraction = response.data?.llm_extraction;
152
+ expect(llmExtraction?.company_mission).toBeDefined();
153
+ expect(typeof llmExtraction?.supports_sso).toBe('boolean');
154
+ expect(typeof llmExtraction?.is_open_source).toBe('boolean');
154
155
  }, 30000); // 30 seconds timeout
155
156
  });
@@ -43,6 +43,6 @@ describe('the firecrawl JS SDK', () => {
43
43
  expect.objectContaining({ headers: expect.objectContaining({'Authorization': `Bearer ${apiKey}`}) }),
44
44
  )
45
45
  expect(scrapedData.success).toBe(true);
46
- expect(scrapedData.data.metadata.title).toEqual('Mendable');
46
+ expect(scrapedData?.data?.metadata.title).toEqual('Mendable');
47
47
  });
48
48
  })
package/src/index.ts CHANGED
@@ -10,15 +10,63 @@ export interface FirecrawlAppConfig {
10
10
  }
11
11
 
12
12
  /**
13
- * Generic parameter interface.
13
+ * Metadata for a Firecrawl document.
14
14
  */
15
- export interface Params {
15
+ export interface FirecrawlDocumentMetadata {
16
+ title?: string;
17
+ description?: string;
18
+ language?: string;
19
+ keywords?: string;
20
+ robots?: string;
21
+ ogTitle?: string;
22
+ ogDescription?: string;
23
+ ogUrl?: string;
24
+ ogImage?: string;
25
+ ogAudio?: string;
26
+ ogDeterminer?: string;
27
+ ogLocale?: string;
28
+ ogLocaleAlternate?: string[];
29
+ ogSiteName?: string;
30
+ ogVideo?: string;
31
+ dctermsCreated?: string;
32
+ dcDateCreated?: string;
33
+ dcDate?: string;
34
+ dctermsType?: string;
35
+ dcType?: string;
36
+ dctermsAudience?: string;
37
+ dctermsSubject?: string;
38
+ dcSubject?: string;
39
+ dcDescription?: string;
40
+ dctermsKeywords?: string;
41
+ modifiedTime?: string;
42
+ publishedTime?: string;
43
+ articleTag?: string;
44
+ articleSection?: string;
45
+ sourceURL?: string;
46
+ pageStatusCode?: number;
47
+ pageError?: string;
16
48
  [key: string]: any;
17
- extractorOptions?: {
18
- extractionSchema: z.ZodSchema | any;
19
- mode?: "llm-extraction";
20
- extractionPrompt?: string;
21
- };
49
+ }
50
+
51
+ /**
52
+ * Document interface for Firecrawl.
53
+ */
54
+ export interface FirecrawlDocument {
55
+ id?: string;
56
+ url?: string;
57
+ content: string;
58
+ markdown?: string;
59
+ html?: string;
60
+ llm_extraction?: Record<string, any>;
61
+ createdAt?: Date;
62
+ updatedAt?: Date;
63
+ type?: string;
64
+ metadata: FirecrawlDocumentMetadata;
65
+ childrenLinks?: string[];
66
+ provider?: string;
67
+ warning?: string;
68
+
69
+ index?: number;
22
70
  }
23
71
 
24
72
  /**
@@ -26,16 +74,15 @@ export interface Params {
26
74
  */
27
75
  export interface ScrapeResponse {
28
76
  success: boolean;
29
- data?: any;
77
+ data?: FirecrawlDocument;
30
78
  error?: string;
31
79
  }
32
-
33
80
  /**
34
81
  * Response interface for searching operations.
35
82
  */
36
83
  export interface SearchResponse {
37
84
  success: boolean;
38
- data?: any;
85
+ data?: FirecrawlDocument[];
39
86
  error?: string;
40
87
  }
41
88
  /**
@@ -44,10 +91,9 @@ export interface SearchResponse {
44
91
  export interface CrawlResponse {
45
92
  success: boolean;
46
93
  jobId?: string;
47
- data?: any;
94
+ data?: FirecrawlDocument[];
48
95
  error?: string;
49
96
  }
50
-
51
97
  /**
52
98
  * Response interface for job status checks.
53
99
  */
@@ -55,24 +101,35 @@ export interface JobStatusResponse {
55
101
  success: boolean;
56
102
  status: string;
57
103
  jobId?: string;
58
- data?: any;
59
- partial_data?: any,
104
+ data?: FirecrawlDocument[];
105
+ partial_data?: FirecrawlDocument[];
60
106
  error?: string;
61
107
  }
62
-
108
+ /**
109
+ * Generic parameter interface.
110
+ */
111
+ export interface Params {
112
+ [key: string]: any;
113
+ extractorOptions?: {
114
+ extractionSchema: z.ZodSchema | any;
115
+ mode?: "llm-extraction";
116
+ extractionPrompt?: string;
117
+ };
118
+ }
63
119
  /**
64
120
  * Main class for interacting with the Firecrawl API.
65
121
  */
66
122
  export default class FirecrawlApp {
67
123
  private apiKey: string;
68
- private apiUrl: string = "https://api.firecrawl.dev";
124
+ private apiUrl: string;
69
125
 
70
126
  /**
71
127
  * Initializes a new instance of the FirecrawlApp class.
72
128
  * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
73
129
  */
74
- constructor({ apiKey = null }: FirecrawlAppConfig) {
130
+ constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
75
131
  this.apiKey = apiKey || "";
132
+ this.apiUrl = apiUrl || "https://api.firecrawl.dev";
76
133
  if (!this.apiKey) {
77
134
  throw new Error("No API key provided");
78
135
  }
@@ -112,7 +169,7 @@ export default class FirecrawlApp {
112
169
  const response: AxiosResponse = await axios.post(
113
170
  this.apiUrl + "/v0/scrape",
114
171
  jsonData,
115
- { headers },
172
+ { headers }
116
173
  );
117
174
  if (response.status === 200) {
118
175
  const responseData = response.data;
@@ -231,7 +288,9 @@ export default class FirecrawlApp {
231
288
  success: true,
232
289
  status: response.data.status,
233
290
  data: response.data.data,
234
- partial_data: !response.data.data ? response.data.partial_data : undefined,
291
+ partial_data: !response.data.data
292
+ ? response.data.partial_data
293
+ : undefined,
235
294
  };
236
295
  } else {
237
296
  this.handleError(response, "check crawl status");
@@ -252,10 +311,10 @@ export default class FirecrawlApp {
252
311
  */
253
312
  prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders {
254
313
  return {
255
- 'Content-Type': 'application/json',
256
- 'Authorization': `Bearer ${this.apiKey}`,
257
- ...(idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {}),
258
- } as AxiosRequestHeaders & { 'x-idempotency-key'?: string };
314
+ "Content-Type": "application/json",
315
+ Authorization: `Bearer ${this.apiKey}`,
316
+ ...(idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}),
317
+ } as AxiosRequestHeaders & { "x-idempotency-key"?: string };
259
318
  }
260
319
 
261
320
  /**
@@ -317,7 +376,9 @@ export default class FirecrawlApp {
317
376
  if (checkInterval < 2) {
318
377
  checkInterval = 2;
319
378
  }
320
- await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again
379
+ await new Promise((resolve) =>
380
+ setTimeout(resolve, checkInterval * 1000)
381
+ ); // Wait for the specified timeout before checking again
321
382
  } else {
322
383
  throw new Error(
323
384
  `Crawl job failed or was stopped. Status: ${statusData.status}`
package/tsconfig.json CHANGED
@@ -11,7 +11,6 @@
11
11
  // "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */
12
12
 
13
13
  /* Language and Environment */
14
- "target": "es2016", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
15
14
  // "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
16
15
  // "jsx": "preserve", /* Specify what JSX code is generated. */
17
16
  // "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */
@@ -25,9 +24,16 @@
25
24
  // "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */
26
25
 
27
26
  /* Modules */
28
- "module": "NodeNext", /* Specify what module code is generated. */
29
27
  "rootDir": "./src", /* Specify the root folder within your source files. */
30
- "moduleResolution": "nodenext", /* Specify how TypeScript looks up a file from a given module specifier. */
28
+
29
+ "target": "ES2021",
30
+ "lib": [
31
+ "ES2021",
32
+ "ES2022.Object",
33
+ "DOM"
34
+ ],
35
+ "module": "NodeNext",
36
+ "moduleResolution": "nodenext",/* Specify how TypeScript looks up a file from a given module specifier. */
31
37
  // "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */
32
38
  // "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */
33
39
  // "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */
package/types/index.d.ts CHANGED
@@ -8,22 +8,68 @@ export interface FirecrawlAppConfig {
8
8
  apiUrl?: string | null;
9
9
  }
10
10
  /**
11
- * Generic parameter interface.
11
+ * Metadata for a Firecrawl document.
12
12
  */
13
- export interface Params {
13
+ export interface FirecrawlDocumentMetadata {
14
+ title?: string;
15
+ description?: string;
16
+ language?: string;
17
+ keywords?: string;
18
+ robots?: string;
19
+ ogTitle?: string;
20
+ ogDescription?: string;
21
+ ogUrl?: string;
22
+ ogImage?: string;
23
+ ogAudio?: string;
24
+ ogDeterminer?: string;
25
+ ogLocale?: string;
26
+ ogLocaleAlternate?: string[];
27
+ ogSiteName?: string;
28
+ ogVideo?: string;
29
+ dctermsCreated?: string;
30
+ dcDateCreated?: string;
31
+ dcDate?: string;
32
+ dctermsType?: string;
33
+ dcType?: string;
34
+ dctermsAudience?: string;
35
+ dctermsSubject?: string;
36
+ dcSubject?: string;
37
+ dcDescription?: string;
38
+ dctermsKeywords?: string;
39
+ modifiedTime?: string;
40
+ publishedTime?: string;
41
+ articleTag?: string;
42
+ articleSection?: string;
43
+ sourceURL?: string;
44
+ pageStatusCode?: number;
45
+ pageError?: string;
14
46
  [key: string]: any;
15
- extractorOptions?: {
16
- extractionSchema: z.ZodSchema | any;
17
- mode?: "llm-extraction";
18
- extractionPrompt?: string;
19
- };
47
+ }
48
+ /**
49
+ * Document interface for Firecrawl.
50
+ */
51
+ export interface FirecrawlDocument {
52
+ id?: string;
53
+ url?: string;
54
+ content: string;
55
+ markdown?: string;
56
+ html?: string;
57
+ llm_extraction?: Record<string, any>;
58
+ createdAt?: Date;
59
+ updatedAt?: Date;
60
+ type?: string;
61
+ metadata: FirecrawlDocumentMetadata;
62
+ childrenLinks?: string[];
63
+ provider?: string;
64
+ warning?: string;
65
+ index?: number;
20
66
  }
21
67
  /**
22
68
  * Response interface for scraping operations.
23
69
  */
24
70
  export interface ScrapeResponse {
25
71
  success: boolean;
26
- data?: any;
72
+ data?: FirecrawlDocument;
27
73
  error?: string;
28
74
  }
29
75
  /**
@@ -31,7 +77,7 @@ export interface ScrapeResponse {
31
77
  */
32
78
  export interface SearchResponse {
33
79
  success: boolean;
34
- data?: any;
80
+ data?: FirecrawlDocument[];
35
81
  error?: string;
36
82
  }
37
83
  /**
@@ -40,7 +86,7 @@ export interface SearchResponse {
40
86
  export interface CrawlResponse {
41
87
  success: boolean;
42
88
  jobId?: string;
43
- data?: any;
89
+ data?: FirecrawlDocument[];
44
90
  error?: string;
45
91
  }
46
92
  /**
@@ -50,10 +96,21 @@ export interface JobStatusResponse {
50
96
  success: boolean;
51
97
  status: string;
52
98
  jobId?: string;
53
- data?: any;
54
- partial_data?: any;
99
+ data?: FirecrawlDocument[];
100
+ partial_data?: FirecrawlDocument[];
55
101
  error?: string;
56
102
  }
103
+ /**
104
+ * Generic parameter interface.
105
+ */
106
+ export interface Params {
107
+ [key: string]: any;
108
+ extractorOptions?: {
109
+ extractionSchema: z.ZodSchema | any;
110
+ mode?: "llm-extraction";
111
+ extractionPrompt?: string;
112
+ };
113
+ }
57
114
  /**
58
115
  * Main class for interacting with the Firecrawl API.
59
116
  */
@@ -64,7 +121,7 @@ export default class FirecrawlApp {
64
121
  * Initializes a new instance of the FirecrawlApp class.
65
122
  * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
66
123
  */
67
- constructor({ apiKey }: FirecrawlAppConfig);
124
+ constructor({ apiKey, apiUrl }: FirecrawlAppConfig);
68
125
  /**
69
126
  * Scrapes a URL using the Firecrawl API.
70
127
  * @param {string} url - The URL to scrape.