@mendable/firecrawl-js 0.0.9 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -33,15 +33,18 @@ Here's an example of how to use the SDK with error handling:
33
33
 
34
34
  // Crawl a website
35
35
  const crawlUrl = 'https://mendable.ai';
36
- const crawlParams = {
36
+ const params = {
37
37
  crawlerOptions: {
38
38
  excludes: ['blog/'],
39
39
  includes: [], // leave empty for all pages
40
40
  limit: 1000,
41
+ },
42
+ pageOptions: {
43
+ onlyMainContent: true
41
44
  }
42
45
  };
43
46
 
44
- const crawlResult = await app.crawlUrl(crawlUrl, crawlParams);
47
+ const crawlResult = await app.crawlUrl(crawlUrl, params);
45
48
  console.log(crawlResult);
46
49
 
47
50
  } catch (error) {
@@ -83,18 +86,21 @@ To crawl a website with error handling, use the `crawlUrl` method. It takes the
83
86
  async function crawlExample() {
84
87
  try {
85
88
  const crawlUrl = 'https://example.com';
86
- const crawlParams = {
89
+ const params = {
87
90
  crawlerOptions: {
88
91
  excludes: ['blog/'],
89
92
  includes: [], // leave empty for all pages
90
93
  limit: 1000,
94
+ },
95
+ pageOptions: {
96
+ onlyMainContent: true
91
97
  }
92
98
  };
93
99
  const waitUntilDone = true;
94
100
  const timeout = 5;
95
101
  const crawlResult = await app.crawlUrl(
96
102
  crawlUrl,
97
- crawlParams,
103
+ params,
98
104
  waitUntilDone,
99
105
  timeout
100
106
  );
package/build/index.js CHANGED
@@ -10,13 +10,26 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
10
10
  import axios from 'axios';
11
11
  import dotenv from 'dotenv';
12
12
  dotenv.config();
13
+ /**
14
+ * Main class for interacting with the Firecrawl API.
15
+ */
13
16
  export default class FirecrawlApp {
17
+ /**
18
+ * Initializes a new instance of the FirecrawlApp class.
19
+ * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
20
+ */
14
21
  constructor({ apiKey = null }) {
15
22
  this.apiKey = apiKey || process.env.FIRECRAWL_API_KEY || '';
16
23
  if (!this.apiKey) {
17
24
  throw new Error('No API key provided');
18
25
  }
19
26
  }
27
+ /**
28
+ * Scrapes a URL using the Firecrawl API.
29
+ * @param {string} url - The URL to scrape.
30
+ * @param {Params | null} params - Additional parameters for the scrape request.
31
+ * @returns {Promise<ScrapeResponse>} The response from the scrape operation.
32
+ */
20
33
  scrapeUrl(url_1) {
21
34
  return __awaiter(this, arguments, void 0, function* (url, params = null) {
22
35
  const headers = {
@@ -32,7 +45,7 @@ export default class FirecrawlApp {
32
45
  if (response.status === 200) {
33
46
  const responseData = response.data;
34
47
  if (responseData.success) {
35
- return responseData.data;
48
+ return responseData;
36
49
  }
37
50
  else {
38
51
  throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
@@ -45,8 +58,17 @@ export default class FirecrawlApp {
45
58
  catch (error) {
46
59
  throw new Error(error.message);
47
60
  }
61
+ return { success: false, error: 'Internal server error.' };
48
62
  });
49
63
  }
64
+ /**
65
+ * Initiates a crawl job for a URL using the Firecrawl API.
66
+ * @param {string} url - The URL to crawl.
67
+ * @param {Params | null} params - Additional parameters for the crawl request.
68
+ * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
69
+ * @param {number} timeout - Timeout in seconds for job status checks.
70
+ * @returns {Promise<CrawlResponse>} The response from the crawl operation.
71
+ */
50
72
  crawlUrl(url_1) {
51
73
  return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, timeout = 2) {
52
74
  const headers = this.prepareHeaders();
@@ -62,7 +84,7 @@ export default class FirecrawlApp {
62
84
  return this.monitorJobStatus(jobId, headers, timeout);
63
85
  }
64
86
  else {
65
- return { jobId };
87
+ return { success: true, jobId };
66
88
  }
67
89
  }
68
90
  else {
@@ -73,8 +95,14 @@ export default class FirecrawlApp {
73
95
  console.log(error);
74
96
  throw new Error(error.message);
75
97
  }
98
+ return { success: false, error: 'Internal server error.' };
76
99
  });
77
100
  }
101
+ /**
102
+ * Checks the status of a crawl job using the Firecrawl API.
103
+ * @param {string} jobId - The job ID of the crawl operation.
104
+ * @returns {Promise<JobStatusResponse>} The response containing the job status.
105
+ */
78
106
  checkCrawlStatus(jobId) {
79
107
  return __awaiter(this, void 0, void 0, function* () {
80
108
  const headers = this.prepareHeaders();
@@ -90,20 +118,45 @@ export default class FirecrawlApp {
90
118
  catch (error) {
91
119
  throw new Error(error.message);
92
120
  }
121
+ return { success: false, status: 'unknown', error: 'Internal server error.' };
93
122
  });
94
123
  }
124
+ /**
125
+ * Prepares the headers for an API request.
126
+ * @returns {AxiosRequestHeaders} The prepared headers.
127
+ */
95
128
  prepareHeaders() {
96
129
  return {
97
130
  'Content-Type': 'application/json',
98
131
  'Authorization': `Bearer ${this.apiKey}`,
99
132
  };
100
133
  }
134
+ /**
135
+ * Sends a POST request to the specified URL.
136
+ * @param {string} url - The URL to send the request to.
137
+ * @param {Params} data - The data to send in the request.
138
+ * @param {AxiosRequestHeaders} headers - The headers for the request.
139
+ * @returns {Promise<AxiosResponse>} The response from the POST request.
140
+ */
101
141
  postRequest(url, data, headers) {
102
142
  return axios.post(url, data, { headers });
103
143
  }
144
+ /**
145
+ * Sends a GET request to the specified URL.
146
+ * @param {string} url - The URL to send the request to.
147
+ * @param {AxiosRequestHeaders} headers - The headers for the request.
148
+ * @returns {Promise<AxiosResponse>} The response from the GET request.
149
+ */
104
150
  getRequest(url, headers) {
105
151
  return axios.get(url, { headers });
106
152
  }
153
+ /**
154
+ * Monitors the status of a crawl job until completion or failure.
155
+ * @param {string} jobId - The job ID of the crawl operation.
156
+ * @param {AxiosRequestHeaders} headers - The headers for the request.
157
+ * @param {number} timeout - Timeout in seconds for job status checks.
158
+ * @returns {Promise<any>} The final job status or data.
159
+ */
107
160
  monitorJobStatus(jobId, headers, timeout) {
108
161
  return __awaiter(this, void 0, void 0, function* () {
109
162
  while (true) {
@@ -134,6 +187,11 @@ export default class FirecrawlApp {
134
187
  }
135
188
  });
136
189
  }
190
+ /**
191
+ * Handles errors from API responses.
192
+ * @param {AxiosResponse} response - The response from the API.
193
+ * @param {string} action - The action being performed when the error occurred.
194
+ */
137
195
  handleError(response, action) {
138
196
  if ([402, 409, 500].includes(response.status)) {
139
197
  const errorMessage = response.data.error || 'Unknown error occurred';
package/package.json CHANGED
@@ -1,15 +1,16 @@
1
1
  {
2
2
  "name": "@mendable/firecrawl-js",
3
- "version": "0.0.9",
3
+ "version": "0.0.11",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
5
  "main": "build/index.js",
6
+ "types": "types/index.d.ts",
6
7
  "type": "module",
7
8
  "scripts": {
8
9
  "test": "echo \"Error: no test specified\" && exit 1"
9
10
  },
10
11
  "repository": {
11
12
  "type": "git",
12
- "url": "git+https://github.com/mendableai/firecrawl-js.git"
13
+ "url": "git+https://github.com/mendableai/firecrawl.git"
13
14
  },
14
15
  "author": "Mendable.ai",
15
16
  "license": "MIT",
@@ -18,9 +19,9 @@
18
19
  "dotenv": "^16.4.5"
19
20
  },
20
21
  "bugs": {
21
- "url": "https://github.com/mendableai/firecrawl-js/issues"
22
+ "url": "https://github.com/mendableai/firecrawl/issues"
22
23
  },
23
- "homepage": "https://github.com/mendableai/firecrawl-js#readme",
24
+ "homepage": "https://github.com/mendableai/firecrawl#readme",
24
25
  "devDependencies": {
25
26
  "@types/axios": "^0.14.0",
26
27
  "@types/dotenv": "^8.2.0",
package/src/index.ts CHANGED
@@ -2,17 +2,60 @@ import axios, { AxiosResponse, AxiosRequestHeaders } from 'axios';
2
2
  import dotenv from 'dotenv';
3
3
  dotenv.config();
4
4
 
5
- interface FirecrawlAppConfig {
5
+ /**
6
+ * Configuration interface for FirecrawlApp.
7
+ */
8
+ export interface FirecrawlAppConfig {
6
9
  apiKey?: string | null;
7
10
  }
8
11
 
9
- interface Params {
12
+ /**
13
+ * Generic parameter interface.
14
+ */
15
+ export interface Params {
10
16
  [key: string]: any;
11
17
  }
12
18
 
19
+ /**
20
+ * Response interface for scraping operations.
21
+ */
22
+ export interface ScrapeResponse {
23
+ success: boolean;
24
+ data?: any;
25
+ error?: string;
26
+ }
27
+
28
+ /**
29
+ * Response interface for crawling operations.
30
+ */
31
+ export interface CrawlResponse {
32
+ success: boolean;
33
+ jobId?: string;
34
+ data?: any;
35
+ error?: string;
36
+ }
37
+
38
+ /**
39
+ * Response interface for job status checks.
40
+ */
41
+ export interface JobStatusResponse {
42
+ success: boolean;
43
+ status: string;
44
+ jobId?: string;
45
+ data?: any;
46
+ error?: string;
47
+ }
48
+
49
+ /**
50
+ * Main class for interacting with the Firecrawl API.
51
+ */
13
52
  export default class FirecrawlApp {
14
53
  private apiKey: string;
15
54
 
55
+ /**
56
+ * Initializes a new instance of the FirecrawlApp class.
57
+ * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
58
+ */
16
59
  constructor({ apiKey = null }: FirecrawlAppConfig) {
17
60
  this.apiKey = apiKey || process.env.FIRECRAWL_API_KEY || '';
18
61
  if (!this.apiKey) {
@@ -20,7 +63,13 @@ export default class FirecrawlApp {
20
63
  }
21
64
  }
22
65
 
23
- async scrapeUrl(url: string, params: Params | null = null): Promise<any> {
66
+ /**
67
+ * Scrapes a URL using the Firecrawl API.
68
+ * @param {string} url - The URL to scrape.
69
+ * @param {Params | null} params - Additional parameters for the scrape request.
70
+ * @returns {Promise<ScrapeResponse>} The response from the scrape operation.
71
+ */
72
+ async scrapeUrl(url: string, params: Params | null = null): Promise<ScrapeResponse> {
24
73
  const headers: AxiosRequestHeaders = {
25
74
  'Content-Type': 'application/json',
26
75
  'Authorization': `Bearer ${this.apiKey}`,
@@ -34,7 +83,7 @@ export default class FirecrawlApp {
34
83
  if (response.status === 200) {
35
84
  const responseData = response.data;
36
85
  if (responseData.success) {
37
- return responseData.data;
86
+ return responseData;
38
87
  } else {
39
88
  throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
40
89
  }
@@ -44,9 +93,18 @@ export default class FirecrawlApp {
44
93
  } catch (error: any) {
45
94
  throw new Error(error.message);
46
95
  }
96
+ return { success: false, error: 'Internal server error.' };
47
97
  }
48
98
 
49
- async crawlUrl(url: string, params: Params | null = null, waitUntilDone: boolean = true, timeout: number = 2): Promise<any> {
99
+ /**
100
+ * Initiates a crawl job for a URL using the Firecrawl API.
101
+ * @param {string} url - The URL to crawl.
102
+ * @param {Params | null} params - Additional parameters for the crawl request.
103
+ * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
104
+ * @param {number} timeout - Timeout in seconds for job status checks.
105
+ * @returns {Promise<CrawlResponse>} The response from the crawl operation.
106
+ */
107
+ async crawlUrl(url: string, params: Params | null = null, waitUntilDone: boolean = true, timeout: number = 2): Promise<CrawlResponse> {
50
108
  const headers = this.prepareHeaders();
51
109
  let jsonData: Params = { url };
52
110
  if (params) {
@@ -59,7 +117,7 @@ export default class FirecrawlApp {
59
117
  if (waitUntilDone) {
60
118
  return this.monitorJobStatus(jobId, headers, timeout);
61
119
  } else {
62
- return { jobId };
120
+ return { success: true, jobId };
63
121
  }
64
122
  } else {
65
123
  this.handleError(response, 'start crawl job');
@@ -68,9 +126,15 @@ export default class FirecrawlApp {
68
126
  console.log(error)
69
127
  throw new Error(error.message);
70
128
  }
129
+ return { success: false, error: 'Internal server error.' };
71
130
  }
72
131
 
73
- async checkCrawlStatus(jobId: string): Promise<any> {
132
+ /**
133
+ * Checks the status of a crawl job using the Firecrawl API.
134
+ * @param {string} jobId - The job ID of the crawl operation.
135
+ * @returns {Promise<JobStatusResponse>} The response containing the job status.
136
+ */
137
+ async checkCrawlStatus(jobId: string): Promise<JobStatusResponse> {
74
138
  const headers: AxiosRequestHeaders = this.prepareHeaders();
75
139
  try {
76
140
  const response: AxiosResponse = await this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
@@ -82,8 +146,13 @@ export default class FirecrawlApp {
82
146
  } catch (error: any) {
83
147
  throw new Error(error.message);
84
148
  }
149
+ return { success: false, status: 'unknown', error: 'Internal server error.' };
85
150
  }
86
151
 
152
+ /**
153
+ * Prepares the headers for an API request.
154
+ * @returns {AxiosRequestHeaders} The prepared headers.
155
+ */
87
156
  prepareHeaders(): AxiosRequestHeaders {
88
157
  return {
89
158
  'Content-Type': 'application/json',
@@ -91,14 +160,34 @@ export default class FirecrawlApp {
91
160
  } as AxiosRequestHeaders;
92
161
  }
93
162
 
163
+ /**
164
+ * Sends a POST request to the specified URL.
165
+ * @param {string} url - The URL to send the request to.
166
+ * @param {Params} data - The data to send in the request.
167
+ * @param {AxiosRequestHeaders} headers - The headers for the request.
168
+ * @returns {Promise<AxiosResponse>} The response from the POST request.
169
+ */
94
170
  postRequest(url: string, data: Params, headers: AxiosRequestHeaders): Promise<AxiosResponse> {
95
171
  return axios.post(url, data, { headers });
96
172
  }
97
173
 
174
+ /**
175
+ * Sends a GET request to the specified URL.
176
+ * @param {string} url - The URL to send the request to.
177
+ * @param {AxiosRequestHeaders} headers - The headers for the request.
178
+ * @returns {Promise<AxiosResponse>} The response from the GET request.
179
+ */
98
180
  getRequest(url: string, headers: AxiosRequestHeaders): Promise<AxiosResponse> {
99
181
  return axios.get(url, { headers });
100
182
  }
101
183
 
184
+ /**
185
+ * Monitors the status of a crawl job until completion or failure.
186
+ * @param {string} jobId - The job ID of the crawl operation.
187
+ * @param {AxiosRequestHeaders} headers - The headers for the request.
188
+ * @param {number} timeout - Timeout in seconds for job status checks.
189
+ * @returns {Promise<any>} The final job status or data.
190
+ */
102
191
  async monitorJobStatus(jobId: string, headers: AxiosRequestHeaders, timeout: number): Promise<any> {
103
192
  while (true) {
104
193
  const statusResponse: AxiosResponse = await this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
@@ -124,6 +213,11 @@ export default class FirecrawlApp {
124
213
  }
125
214
  }
126
215
 
216
+ /**
217
+ * Handles errors from API responses.
218
+ * @param {AxiosResponse} response - The response from the API.
219
+ * @param {string} action - The action being performed when the error occurred.
220
+ */
127
221
  handleError(response: AxiosResponse, action: string): void {
128
222
  if ([402, 409, 500].includes(response.status)) {
129
223
  const errorMessage: string = response.data.error || 'Unknown error occurred';
package/tsconfig.json CHANGED
@@ -49,7 +49,7 @@
49
49
  // "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */
50
50
 
51
51
  /* Emit */
52
- // "declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */
52
+ "declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */
53
53
  // "declarationMap": true, /* Create sourcemaps for d.ts files. */
54
54
  // "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */
55
55
  // "sourceMap": true, /* Create source map files for emitted JavaScript files. */
@@ -70,7 +70,7 @@
70
70
  // "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */
71
71
  // "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */
72
72
  // "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */
73
- // "declarationDir": "./", /* Specify the output directory for generated declaration files. */
73
+ "declarationDir": "./types", /* Specify the output directory for generated declaration files. */
74
74
  // "preserveValueImports": true, /* Preserve unused imported values in the JavaScript output that would otherwise be removed. */
75
75
 
76
76
  /* Interop Constraints */
@@ -105,5 +105,7 @@
105
105
  /* Completeness */
106
106
  // "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */
107
107
  "skipLibCheck": true /* Skip type checking all .d.ts files. */
108
- }
108
+ },
109
+ "include": ["src/**/*"],
110
+ "exclude": ["node_modules", "dist", "**/__tests__/*"]
109
111
  }
@@ -0,0 +1,107 @@
1
+ import { AxiosResponse, AxiosRequestHeaders } from 'axios';
2
+ /**
3
+ * Configuration interface for FirecrawlApp.
4
+ */
5
+ export interface FirecrawlAppConfig {
6
+ apiKey?: string | null;
7
+ }
8
+ /**
9
+ * Generic parameter interface.
10
+ */
11
+ export interface Params {
12
+ [key: string]: any;
13
+ }
14
+ /**
15
+ * Response interface for scraping operations.
16
+ */
17
+ export interface ScrapeResponse {
18
+ success: boolean;
19
+ data?: any;
20
+ error?: string;
21
+ }
22
+ /**
23
+ * Response interface for crawling operations.
24
+ */
25
+ export interface CrawlResponse {
26
+ success: boolean;
27
+ jobId?: string;
28
+ data?: any;
29
+ error?: string;
30
+ }
31
+ /**
32
+ * Response interface for job status checks.
33
+ */
34
+ export interface JobStatusResponse {
35
+ success: boolean;
36
+ status: string;
37
+ jobId?: string;
38
+ data?: any;
39
+ error?: string;
40
+ }
41
+ /**
42
+ * Main class for interacting with the Firecrawl API.
43
+ */
44
+ export default class FirecrawlApp {
45
+ private apiKey;
46
+ /**
47
+ * Initializes a new instance of the FirecrawlApp class.
48
+ * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
49
+ */
50
+ constructor({ apiKey }: FirecrawlAppConfig);
51
+ /**
52
+ * Scrapes a URL using the Firecrawl API.
53
+ * @param {string} url - The URL to scrape.
54
+ * @param {Params | null} params - Additional parameters for the scrape request.
55
+ * @returns {Promise<ScrapeResponse>} The response from the scrape operation.
56
+ */
57
+ scrapeUrl(url: string, params?: Params | null): Promise<ScrapeResponse>;
58
+ /**
59
+ * Initiates a crawl job for a URL using the Firecrawl API.
60
+ * @param {string} url - The URL to crawl.
61
+ * @param {Params | null} params - Additional parameters for the crawl request.
62
+ * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
63
+ * @param {number} timeout - Timeout in seconds for job status checks.
64
+ * @returns {Promise<CrawlResponse>} The response from the crawl operation.
65
+ */
66
+ crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, timeout?: number): Promise<CrawlResponse>;
67
+ /**
68
+ * Checks the status of a crawl job using the Firecrawl API.
69
+ * @param {string} jobId - The job ID of the crawl operation.
70
+ * @returns {Promise<JobStatusResponse>} The response containing the job status.
71
+ */
72
+ checkCrawlStatus(jobId: string): Promise<JobStatusResponse>;
73
+ /**
74
+ * Prepares the headers for an API request.
75
+ * @returns {AxiosRequestHeaders} The prepared headers.
76
+ */
77
+ prepareHeaders(): AxiosRequestHeaders;
78
+ /**
79
+ * Sends a POST request to the specified URL.
80
+ * @param {string} url - The URL to send the request to.
81
+ * @param {Params} data - The data to send in the request.
82
+ * @param {AxiosRequestHeaders} headers - The headers for the request.
83
+ * @returns {Promise<AxiosResponse>} The response from the POST request.
84
+ */
85
+ postRequest(url: string, data: Params, headers: AxiosRequestHeaders): Promise<AxiosResponse>;
86
+ /**
87
+ * Sends a GET request to the specified URL.
88
+ * @param {string} url - The URL to send the request to.
89
+ * @param {AxiosRequestHeaders} headers - The headers for the request.
90
+ * @returns {Promise<AxiosResponse>} The response from the GET request.
91
+ */
92
+ getRequest(url: string, headers: AxiosRequestHeaders): Promise<AxiosResponse>;
93
+ /**
94
+ * Monitors the status of a crawl job until completion or failure.
95
+ * @param {string} jobId - The job ID of the crawl operation.
96
+ * @param {AxiosRequestHeaders} headers - The headers for the request.
97
+ * @param {number} timeout - Timeout in seconds for job status checks.
98
+ * @returns {Promise<any>} The final job status or data.
99
+ */
100
+ monitorJobStatus(jobId: string, headers: AxiosRequestHeaders, timeout: number): Promise<any>;
101
+ /**
102
+ * Handles errors from API responses.
103
+ * @param {AxiosResponse} response - The response from the API.
104
+ * @param {string} action - The action being performed when the error occurred.
105
+ */
106
+ handleError(response: AxiosResponse, action: string): void;
107
+ }