@mendable/firecrawl-js 0.0.33-beta.2 → 0.0.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js ADDED
@@ -0,0 +1,265 @@
1
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
+ return new (P || (P = Promise))(function (resolve, reject) {
4
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
8
+ });
9
+ };
10
+ import axios from "axios";
11
+ import { z } from "zod";
12
+ import { zodToJsonSchema } from "zod-to-json-schema";
13
+ /**
14
+ * Main class for interacting with the Firecrawl API.
15
+ */
16
+ export default class FirecrawlApp {
17
+ /**
18
+ * Initializes a new instance of the FirecrawlApp class.
19
+ * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
20
+ */
21
+ constructor({ apiKey = null, apiUrl = null }) {
22
+ this.apiKey = apiKey || "";
23
+ this.apiUrl = apiUrl || "https://api.firecrawl.dev";
24
+ if (!this.apiKey) {
25
+ throw new Error("No API key provided");
26
+ }
27
+ }
28
+ /**
29
+ * Scrapes a URL using the Firecrawl API.
30
+ * @param {string} url - The URL to scrape.
31
+ * @param {Params | null} params - Additional parameters for the scrape request.
32
+ * @returns {Promise<ScrapeResponse>} The response from the scrape operation.
33
+ */
34
+ scrapeUrl(url, params = null) {
35
+ var _a;
36
+ return __awaiter(this, void 0, void 0, function* () {
37
+ const headers = {
38
+ "Content-Type": "application/json",
39
+ Authorization: `Bearer ${this.apiKey}`,
40
+ };
41
+ let jsonData = Object.assign({ url }, params);
42
+ if ((_a = params === null || params === void 0 ? void 0 : params.extractorOptions) === null || _a === void 0 ? void 0 : _a.extractionSchema) {
43
+ let schema = params.extractorOptions.extractionSchema;
44
+ // Check if schema is an instance of ZodSchema to correctly identify Zod schemas
45
+ if (schema instanceof z.ZodSchema) {
46
+ schema = zodToJsonSchema(schema);
47
+ }
48
+ jsonData = Object.assign(Object.assign({}, jsonData), { extractorOptions: Object.assign(Object.assign({}, params.extractorOptions), { extractionSchema: schema, mode: params.extractorOptions.mode || "llm-extraction" }) });
49
+ }
50
+ try {
51
+ const response = yield axios.post(this.apiUrl + "/v0/scrape", jsonData, { headers });
52
+ if (response.status === 200) {
53
+ const responseData = response.data;
54
+ if (responseData.success) {
55
+ return responseData;
56
+ }
57
+ else {
58
+ throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
59
+ }
60
+ }
61
+ else {
62
+ this.handleError(response, "scrape URL");
63
+ }
64
+ }
65
+ catch (error) {
66
+ throw new Error(error.message);
67
+ }
68
+ return { success: false, error: "Internal server error." };
69
+ });
70
+ }
71
+ /**
72
+ * Searches for a query using the Firecrawl API.
73
+ * @param {string} query - The query to search for.
74
+ * @param {Params | null} params - Additional parameters for the search request.
75
+ * @returns {Promise<SearchResponse>} The response from the search operation.
76
+ */
77
+ search(query, params = null) {
78
+ return __awaiter(this, void 0, void 0, function* () {
79
+ const headers = {
80
+ "Content-Type": "application/json",
81
+ Authorization: `Bearer ${this.apiKey}`,
82
+ };
83
+ let jsonData = { query };
84
+ if (params) {
85
+ jsonData = Object.assign(Object.assign({}, jsonData), params);
86
+ }
87
+ try {
88
+ const response = yield axios.post(this.apiUrl + "/v0/search", jsonData, { headers });
89
+ if (response.status === 200) {
90
+ const responseData = response.data;
91
+ if (responseData.success) {
92
+ return responseData;
93
+ }
94
+ else {
95
+ throw new Error(`Failed to search. Error: ${responseData.error}`);
96
+ }
97
+ }
98
+ else {
99
+ this.handleError(response, "search");
100
+ }
101
+ }
102
+ catch (error) {
103
+ throw new Error(error.message);
104
+ }
105
+ return { success: false, error: "Internal server error." };
106
+ });
107
+ }
108
+ /**
109
+ * Initiates a crawl job for a URL using the Firecrawl API.
110
+ * @param {string} url - The URL to crawl.
111
+ * @param {Params | null} params - Additional parameters for the crawl request.
112
+ * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
113
+ * @param {number} pollInterval - Time in seconds for job status checks.
114
+ * @param {string} idempotencyKey - Optional idempotency key for the request.
115
+ * @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
116
+ */
117
+ crawlUrl(url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
118
+ return __awaiter(this, void 0, void 0, function* () {
119
+ const headers = this.prepareHeaders(idempotencyKey);
120
+ let jsonData = { url };
121
+ if (params) {
122
+ jsonData = Object.assign(Object.assign({}, jsonData), params);
123
+ }
124
+ try {
125
+ const response = yield this.postRequest(this.apiUrl + "/v0/crawl", jsonData, headers);
126
+ if (response.status === 200) {
127
+ const jobId = response.data.jobId;
128
+ if (waitUntilDone) {
129
+ return this.monitorJobStatus(jobId, headers, pollInterval);
130
+ }
131
+ else {
132
+ return { success: true, jobId };
133
+ }
134
+ }
135
+ else {
136
+ this.handleError(response, "start crawl job");
137
+ }
138
+ }
139
+ catch (error) {
140
+ console.log(error);
141
+ throw new Error(error.message);
142
+ }
143
+ return { success: false, error: "Internal server error." };
144
+ });
145
+ }
146
+ /**
147
+ * Checks the status of a crawl job using the Firecrawl API.
148
+ * @param {string} jobId - The job ID of the crawl operation.
149
+ * @returns {Promise<JobStatusResponse>} The response containing the job status.
150
+ */
151
+ checkCrawlStatus(jobId) {
152
+ return __awaiter(this, void 0, void 0, function* () {
153
+ const headers = this.prepareHeaders();
154
+ try {
155
+ const response = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
156
+ if (response.status === 200) {
157
+ return {
158
+ success: true,
159
+ status: response.data.status,
160
+ current: response.data.current,
161
+ current_url: response.data.current_url,
162
+ current_step: response.data.current_step,
163
+ total: response.data.total,
164
+ data: response.data.data,
165
+ partial_data: !response.data.data
166
+ ? response.data.partial_data
167
+ : undefined,
168
+ };
169
+ }
170
+ else {
171
+ this.handleError(response, "check crawl status");
172
+ }
173
+ }
174
+ catch (error) {
175
+ throw new Error(error.message);
176
+ }
177
+ return {
178
+ success: false,
179
+ status: "unknown",
180
+ current: 0,
181
+ current_url: "",
182
+ current_step: "",
183
+ total: 0,
184
+ error: "Internal server error.",
185
+ };
186
+ });
187
+ }
188
+ /**
189
+ * Prepares the headers for an API request.
190
+ * @returns {AxiosRequestHeaders} The prepared headers.
191
+ */
192
+ prepareHeaders(idempotencyKey) {
193
+ return Object.assign({ "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}` }, (idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}));
194
+ }
195
+ /**
196
+ * Sends a POST request to the specified URL.
197
+ * @param {string} url - The URL to send the request to.
198
+ * @param {Params} data - The data to send in the request.
199
+ * @param {AxiosRequestHeaders} headers - The headers for the request.
200
+ * @returns {Promise<AxiosResponse>} The response from the POST request.
201
+ */
202
+ postRequest(url, data, headers) {
203
+ return axios.post(url, data, { headers });
204
+ }
205
+ /**
206
+ * Sends a GET request to the specified URL.
207
+ * @param {string} url - The URL to send the request to.
208
+ * @param {AxiosRequestHeaders} headers - The headers for the request.
209
+ * @returns {Promise<AxiosResponse>} The response from the GET request.
210
+ */
211
+ getRequest(url, headers) {
212
+ return axios.get(url, { headers });
213
+ }
214
+ /**
215
+ * Monitors the status of a crawl job until completion or failure.
216
+ * @param {string} jobId - The job ID of the crawl operation.
217
+ * @param {AxiosRequestHeaders} headers - The headers for the request.
218
+ * @param {number} timeout - Timeout in seconds for job status checks.
219
+ * @returns {Promise<any>} The final job status or data.
220
+ */
221
+ monitorJobStatus(jobId, headers, checkInterval) {
222
+ return __awaiter(this, void 0, void 0, function* () {
223
+ while (true) {
224
+ const statusResponse = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
225
+ if (statusResponse.status === 200) {
226
+ const statusData = statusResponse.data;
227
+ if (statusData.status === "completed") {
228
+ if ("data" in statusData) {
229
+ return statusData.data;
230
+ }
231
+ else {
232
+ throw new Error("Crawl job completed but no data was returned");
233
+ }
234
+ }
235
+ else if (["active", "paused", "pending", "queued"].includes(statusData.status)) {
236
+ if (checkInterval < 2) {
237
+ checkInterval = 2;
238
+ }
239
+ yield new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again
240
+ }
241
+ else {
242
+ throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
243
+ }
244
+ }
245
+ else {
246
+ this.handleError(statusResponse, "check crawl status");
247
+ }
248
+ }
249
+ });
250
+ }
251
+ /**
252
+ * Handles errors from API responses.
253
+ * @param {AxiosResponse} response - The response from the API.
254
+ * @param {string} action - The action being performed when the error occurred.
255
+ */
256
+ handleError(response, action) {
257
+ if ([402, 408, 409, 500].includes(response.status)) {
258
+ const errorMessage = response.data.error || "Unknown error occurred";
259
+ throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`);
260
+ }
261
+ else {
262
+ throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`);
263
+ }
264
+ }
265
+ }
package/package.json CHANGED
@@ -1,16 +1,12 @@
1
1
  {
2
2
  "name": "@mendable/firecrawl-js",
3
- "version": "0.0.33-beta.2",
3
+ "version": "0.0.34",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
- "main": "build/cjs/index",
5
+ "main": "build/index.js",
6
6
  "types": "types/index.d.ts",
7
7
  "type": "module",
8
- "exports": {
9
- "require": "./build/cjs/index.js",
10
- "import": "./build/esm/index.js"
11
- },
12
8
  "scripts": {
13
- "build": "tsc --module commonjs --moduleResolution node10 --outDir build/cjs/ && echo '{\"type\": \"commonjs\"}' > build/cjs/package.json && npx tsc --module NodeNext --moduleResolution NodeNext --outDir build/esm/ && echo '{\"type\": \"module\"}' > build/esm/package.json",
9
+ "build": "tsc",
14
10
  "build-and-publish": "npm run build && npm publish --access public",
15
11
  "publish-beta": "npm run build && npm publish --access public --tag beta",
16
12
  "test": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/**/*.test.ts"
@@ -0,0 +1,193 @@
1
+ import { AxiosResponse, AxiosRequestHeaders } from "axios";
2
+ import { z } from "zod";
3
+ /**
4
+ * Configuration interface for FirecrawlApp.
5
+ */
6
+ export interface FirecrawlAppConfig {
7
+ apiKey?: string | null;
8
+ apiUrl?: string | null;
9
+ }
10
+ /**
11
+ * Metadata for a Firecrawl document.
12
+ */
13
+ export interface FirecrawlDocumentMetadata {
14
+ title?: string;
15
+ description?: string;
16
+ language?: string;
17
+ keywords?: string;
18
+ robots?: string;
19
+ ogTitle?: string;
20
+ ogDescription?: string;
21
+ ogUrl?: string;
22
+ ogImage?: string;
23
+ ogAudio?: string;
24
+ ogDeterminer?: string;
25
+ ogLocale?: string;
26
+ ogLocaleAlternate?: string[];
27
+ ogSiteName?: string;
28
+ ogVideo?: string;
29
+ dctermsCreated?: string;
30
+ dcDateCreated?: string;
31
+ dcDate?: string;
32
+ dctermsType?: string;
33
+ dcType?: string;
34
+ dctermsAudience?: string;
35
+ dctermsSubject?: string;
36
+ dcSubject?: string;
37
+ dcDescription?: string;
38
+ dctermsKeywords?: string;
39
+ modifiedTime?: string;
40
+ publishedTime?: string;
41
+ articleTag?: string;
42
+ articleSection?: string;
43
+ sourceURL?: string;
44
+ pageStatusCode?: number;
45
+ pageError?: string;
46
+ [key: string]: any;
47
+ }
48
+ /**
49
+ * Document interface for Firecrawl.
50
+ */
51
+ export interface FirecrawlDocument {
52
+ id?: string;
53
+ url?: string;
54
+ content: string;
55
+ markdown?: string;
56
+ html?: string;
57
+ llm_extraction?: Record<string, any>;
58
+ createdAt?: Date;
59
+ updatedAt?: Date;
60
+ type?: string;
61
+ metadata: FirecrawlDocumentMetadata;
62
+ childrenLinks?: string[];
63
+ provider?: string;
64
+ warning?: string;
65
+ index?: number;
66
+ }
67
+ /**
68
+ * Response interface for scraping operations.
69
+ */
70
+ export interface ScrapeResponse {
71
+ success: boolean;
72
+ data?: FirecrawlDocument;
73
+ error?: string;
74
+ }
75
+ /**
76
+ * Response interface for searching operations.
77
+ */
78
+ export interface SearchResponse {
79
+ success: boolean;
80
+ data?: FirecrawlDocument[];
81
+ error?: string;
82
+ }
83
+ /**
84
+ * Response interface for crawling operations.
85
+ */
86
+ export interface CrawlResponse {
87
+ success: boolean;
88
+ jobId?: string;
89
+ data?: FirecrawlDocument[];
90
+ error?: string;
91
+ }
92
+ /**
93
+ * Response interface for job status checks.
94
+ */
95
+ export interface JobStatusResponse {
96
+ success: boolean;
97
+ status: string;
98
+ current?: number;
99
+ current_url?: string;
100
+ current_step?: string;
101
+ total?: number;
102
+ jobId?: string;
103
+ data?: FirecrawlDocument[];
104
+ partial_data?: FirecrawlDocument[];
105
+ error?: string;
106
+ }
107
+ /**
108
+ * Generic parameter interface.
109
+ */
110
+ export interface Params {
111
+ [key: string]: any;
112
+ extractorOptions?: {
113
+ extractionSchema: z.ZodSchema | any;
114
+ mode?: "llm-extraction";
115
+ extractionPrompt?: string;
116
+ };
117
+ }
118
+ /**
119
+ * Main class for interacting with the Firecrawl API.
120
+ */
121
+ export default class FirecrawlApp {
122
+ private apiKey;
123
+ private apiUrl;
124
+ /**
125
+ * Initializes a new instance of the FirecrawlApp class.
126
+ * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
127
+ */
128
+ constructor({ apiKey, apiUrl }: FirecrawlAppConfig);
129
+ /**
130
+ * Scrapes a URL using the Firecrawl API.
131
+ * @param {string} url - The URL to scrape.
132
+ * @param {Params | null} params - Additional parameters for the scrape request.
133
+ * @returns {Promise<ScrapeResponse>} The response from the scrape operation.
134
+ */
135
+ scrapeUrl(url: string, params?: Params | null): Promise<ScrapeResponse>;
136
+ /**
137
+ * Searches for a query using the Firecrawl API.
138
+ * @param {string} query - The query to search for.
139
+ * @param {Params | null} params - Additional parameters for the search request.
140
+ * @returns {Promise<SearchResponse>} The response from the search operation.
141
+ */
142
+ search(query: string, params?: Params | null): Promise<SearchResponse>;
143
+ /**
144
+ * Initiates a crawl job for a URL using the Firecrawl API.
145
+ * @param {string} url - The URL to crawl.
146
+ * @param {Params | null} params - Additional parameters for the crawl request.
147
+ * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
148
+ * @param {number} pollInterval - Time in seconds for job status checks.
149
+ * @param {string} idempotencyKey - Optional idempotency key for the request.
150
+ * @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
151
+ */
152
+ crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, pollInterval?: number, idempotencyKey?: string): Promise<CrawlResponse | any>;
153
+ /**
154
+ * Checks the status of a crawl job using the Firecrawl API.
155
+ * @param {string} jobId - The job ID of the crawl operation.
156
+ * @returns {Promise<JobStatusResponse>} The response containing the job status.
157
+ */
158
+ checkCrawlStatus(jobId: string): Promise<JobStatusResponse>;
159
+ /**
160
+ * Prepares the headers for an API request.
161
+ * @returns {AxiosRequestHeaders} The prepared headers.
162
+ */
163
+ prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders;
164
+ /**
165
+ * Sends a POST request to the specified URL.
166
+ * @param {string} url - The URL to send the request to.
167
+ * @param {Params} data - The data to send in the request.
168
+ * @param {AxiosRequestHeaders} headers - The headers for the request.
169
+ * @returns {Promise<AxiosResponse>} The response from the POST request.
170
+ */
171
+ postRequest(url: string, data: Params, headers: AxiosRequestHeaders): Promise<AxiosResponse>;
172
+ /**
173
+ * Sends a GET request to the specified URL.
174
+ * @param {string} url - The URL to send the request to.
175
+ * @param {AxiosRequestHeaders} headers - The headers for the request.
176
+ * @returns {Promise<AxiosResponse>} The response from the GET request.
177
+ */
178
+ getRequest(url: string, headers: AxiosRequestHeaders): Promise<AxiosResponse>;
179
+ /**
180
+ * Monitors the status of a crawl job until completion or failure.
181
+ * @param {string} jobId - The job ID of the crawl operation.
182
+ * @param {AxiosRequestHeaders} headers - The headers for the request.
183
+ * @param {number} timeout - Timeout in seconds for job status checks.
184
+ * @returns {Promise<any>} The final job status or data.
185
+ */
186
+ monitorJobStatus(jobId: string, headers: AxiosRequestHeaders, checkInterval: number): Promise<any>;
187
+ /**
188
+ * Handles errors from API responses.
189
+ * @param {AxiosResponse} response - The response from the API.
190
+ * @param {string} action - The action being performed when the error occurred.
191
+ */
192
+ handleError(response: AxiosResponse, action: string): void;
193
+ }