@mendable/firecrawl-js 0.0.30 → 0.0.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/cjs/index.js +271 -0
- package/build/cjs/package.json +1 -0
- package/build/{index.js → esm/index.js} +8 -0
- package/build/esm/package.json +1 -0
- package/jest.config.js +16 -0
- package/package.json +9 -5
- package/src/__tests__/e2e_withAuth/index.test.ts +6 -2
- package/src/index.ts +12 -0
- package/build_and_publish.sh +0 -34
- package/jest.config.cjs +0 -5
- package/types/index.d.ts +0 -189
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
const axios_1 = __importDefault(require("axios"));
|
|
16
|
+
const zod_1 = require("zod");
|
|
17
|
+
const zod_to_json_schema_1 = require("zod-to-json-schema");
|
|
18
|
+
/**
|
|
19
|
+
* Main class for interacting with the Firecrawl API.
|
|
20
|
+
*/
|
|
21
|
+
class FirecrawlApp {
|
|
22
|
+
/**
|
|
23
|
+
* Initializes a new instance of the FirecrawlApp class.
|
|
24
|
+
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
|
|
25
|
+
*/
|
|
26
|
+
constructor({ apiKey = null, apiUrl = null }) {
|
|
27
|
+
this.apiKey = apiKey || "";
|
|
28
|
+
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
|
29
|
+
if (!this.apiKey) {
|
|
30
|
+
throw new Error("No API key provided");
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Scrapes a URL using the Firecrawl API.
|
|
35
|
+
* @param {string} url - The URL to scrape.
|
|
36
|
+
* @param {Params | null} params - Additional parameters for the scrape request.
|
|
37
|
+
* @returns {Promise<ScrapeResponse>} The response from the scrape operation.
|
|
38
|
+
*/
|
|
39
|
+
scrapeUrl(url, params = null) {
|
|
40
|
+
var _a;
|
|
41
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
42
|
+
const headers = {
|
|
43
|
+
"Content-Type": "application/json",
|
|
44
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
45
|
+
};
|
|
46
|
+
let jsonData = Object.assign({ url }, params);
|
|
47
|
+
if ((_a = params === null || params === void 0 ? void 0 : params.extractorOptions) === null || _a === void 0 ? void 0 : _a.extractionSchema) {
|
|
48
|
+
let schema = params.extractorOptions.extractionSchema;
|
|
49
|
+
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
|
50
|
+
if (schema instanceof zod_1.z.ZodSchema) {
|
|
51
|
+
schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
|
|
52
|
+
}
|
|
53
|
+
jsonData = Object.assign(Object.assign({}, jsonData), { extractorOptions: Object.assign(Object.assign({}, params.extractorOptions), { extractionSchema: schema, mode: params.extractorOptions.mode || "llm-extraction" }) });
|
|
54
|
+
}
|
|
55
|
+
try {
|
|
56
|
+
const response = yield axios_1.default.post(this.apiUrl + "/v0/scrape", jsonData, { headers });
|
|
57
|
+
if (response.status === 200) {
|
|
58
|
+
const responseData = response.data;
|
|
59
|
+
if (responseData.success) {
|
|
60
|
+
return responseData;
|
|
61
|
+
}
|
|
62
|
+
else {
|
|
63
|
+
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
else {
|
|
67
|
+
this.handleError(response, "scrape URL");
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
catch (error) {
|
|
71
|
+
throw new Error(error.message);
|
|
72
|
+
}
|
|
73
|
+
return { success: false, error: "Internal server error." };
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Searches for a query using the Firecrawl API.
|
|
78
|
+
* @param {string} query - The query to search for.
|
|
79
|
+
* @param {Params | null} params - Additional parameters for the search request.
|
|
80
|
+
* @returns {Promise<SearchResponse>} The response from the search operation.
|
|
81
|
+
*/
|
|
82
|
+
search(query, params = null) {
|
|
83
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
84
|
+
const headers = {
|
|
85
|
+
"Content-Type": "application/json",
|
|
86
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
87
|
+
};
|
|
88
|
+
let jsonData = { query };
|
|
89
|
+
if (params) {
|
|
90
|
+
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
|
91
|
+
}
|
|
92
|
+
try {
|
|
93
|
+
const response = yield axios_1.default.post(this.apiUrl + "/v0/search", jsonData, { headers });
|
|
94
|
+
if (response.status === 200) {
|
|
95
|
+
const responseData = response.data;
|
|
96
|
+
if (responseData.success) {
|
|
97
|
+
return responseData;
|
|
98
|
+
}
|
|
99
|
+
else {
|
|
100
|
+
throw new Error(`Failed to search. Error: ${responseData.error}`);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
else {
|
|
104
|
+
this.handleError(response, "search");
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
catch (error) {
|
|
108
|
+
throw new Error(error.message);
|
|
109
|
+
}
|
|
110
|
+
return { success: false, error: "Internal server error." };
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
115
|
+
* @param {string} url - The URL to crawl.
|
|
116
|
+
* @param {Params | null} params - Additional parameters for the crawl request.
|
|
117
|
+
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
|
|
118
|
+
* @param {number} pollInterval - Time in seconds for job status checks.
|
|
119
|
+
* @param {string} idempotencyKey - Optional idempotency key for the request.
|
|
120
|
+
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
|
121
|
+
*/
|
|
122
|
+
crawlUrl(url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
|
|
123
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
124
|
+
const headers = this.prepareHeaders(idempotencyKey);
|
|
125
|
+
let jsonData = { url };
|
|
126
|
+
if (params) {
|
|
127
|
+
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
|
128
|
+
}
|
|
129
|
+
try {
|
|
130
|
+
const response = yield this.postRequest(this.apiUrl + "/v0/crawl", jsonData, headers);
|
|
131
|
+
if (response.status === 200) {
|
|
132
|
+
const jobId = response.data.jobId;
|
|
133
|
+
if (waitUntilDone) {
|
|
134
|
+
return this.monitorJobStatus(jobId, headers, pollInterval);
|
|
135
|
+
}
|
|
136
|
+
else {
|
|
137
|
+
return { success: true, jobId };
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
else {
|
|
141
|
+
this.handleError(response, "start crawl job");
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
catch (error) {
|
|
145
|
+
console.log(error);
|
|
146
|
+
throw new Error(error.message);
|
|
147
|
+
}
|
|
148
|
+
return { success: false, error: "Internal server error." };
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Checks the status of a crawl job using the Firecrawl API.
|
|
153
|
+
* @param {string} jobId - The job ID of the crawl operation.
|
|
154
|
+
* @returns {Promise<JobStatusResponse>} The response containing the job status.
|
|
155
|
+
*/
|
|
156
|
+
checkCrawlStatus(jobId) {
|
|
157
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
158
|
+
const headers = this.prepareHeaders();
|
|
159
|
+
try {
|
|
160
|
+
const response = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
|
|
161
|
+
if (response.status === 200) {
|
|
162
|
+
return {
|
|
163
|
+
success: true,
|
|
164
|
+
status: response.data.status,
|
|
165
|
+
current: response.data.current,
|
|
166
|
+
current_url: response.data.current_url,
|
|
167
|
+
current_step: response.data.current_step,
|
|
168
|
+
total: response.data.total,
|
|
169
|
+
data: response.data.data,
|
|
170
|
+
partial_data: !response.data.data
|
|
171
|
+
? response.data.partial_data
|
|
172
|
+
: undefined,
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
else {
|
|
176
|
+
this.handleError(response, "check crawl status");
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
catch (error) {
|
|
180
|
+
throw new Error(error.message);
|
|
181
|
+
}
|
|
182
|
+
return {
|
|
183
|
+
success: false,
|
|
184
|
+
status: "unknown",
|
|
185
|
+
current: 0,
|
|
186
|
+
current_url: "",
|
|
187
|
+
current_step: "",
|
|
188
|
+
total: 0,
|
|
189
|
+
error: "Internal server error.",
|
|
190
|
+
};
|
|
191
|
+
});
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Prepares the headers for an API request.
|
|
195
|
+
* @returns {AxiosRequestHeaders} The prepared headers.
|
|
196
|
+
*/
|
|
197
|
+
prepareHeaders(idempotencyKey) {
|
|
198
|
+
return Object.assign({ "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}` }, (idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}));
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* Sends a POST request to the specified URL.
|
|
202
|
+
* @param {string} url - The URL to send the request to.
|
|
203
|
+
* @param {Params} data - The data to send in the request.
|
|
204
|
+
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
|
205
|
+
* @returns {Promise<AxiosResponse>} The response from the POST request.
|
|
206
|
+
*/
|
|
207
|
+
postRequest(url, data, headers) {
|
|
208
|
+
return axios_1.default.post(url, data, { headers });
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Sends a GET request to the specified URL.
|
|
212
|
+
* @param {string} url - The URL to send the request to.
|
|
213
|
+
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
|
214
|
+
* @returns {Promise<AxiosResponse>} The response from the GET request.
|
|
215
|
+
*/
|
|
216
|
+
getRequest(url, headers) {
|
|
217
|
+
return axios_1.default.get(url, { headers });
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* Monitors the status of a crawl job until completion or failure.
|
|
221
|
+
* @param {string} jobId - The job ID of the crawl operation.
|
|
222
|
+
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
|
223
|
+
* @param {number} timeout - Timeout in seconds for job status checks.
|
|
224
|
+
* @returns {Promise<any>} The final job status or data.
|
|
225
|
+
*/
|
|
226
|
+
monitorJobStatus(jobId, headers, checkInterval) {
|
|
227
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
228
|
+
while (true) {
|
|
229
|
+
const statusResponse = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
|
|
230
|
+
if (statusResponse.status === 200) {
|
|
231
|
+
const statusData = statusResponse.data;
|
|
232
|
+
if (statusData.status === "completed") {
|
|
233
|
+
if ("data" in statusData) {
|
|
234
|
+
return statusData.data;
|
|
235
|
+
}
|
|
236
|
+
else {
|
|
237
|
+
throw new Error("Crawl job completed but no data was returned");
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
else if (["active", "paused", "pending", "queued"].includes(statusData.status)) {
|
|
241
|
+
if (checkInterval < 2) {
|
|
242
|
+
checkInterval = 2;
|
|
243
|
+
}
|
|
244
|
+
yield new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again
|
|
245
|
+
}
|
|
246
|
+
else {
|
|
247
|
+
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
else {
|
|
251
|
+
this.handleError(statusResponse, "check crawl status");
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
});
|
|
255
|
+
}
|
|
256
|
+
/**
|
|
257
|
+
* Handles errors from API responses.
|
|
258
|
+
* @param {AxiosResponse} response - The response from the API.
|
|
259
|
+
* @param {string} action - The action being performed when the error occurred.
|
|
260
|
+
*/
|
|
261
|
+
handleError(response, action) {
|
|
262
|
+
if ([402, 408, 409, 500].includes(response.status)) {
|
|
263
|
+
const errorMessage = response.data.error || "Unknown error occurred";
|
|
264
|
+
throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`);
|
|
265
|
+
}
|
|
266
|
+
else {
|
|
267
|
+
throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
exports.default = FirecrawlApp;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"type": "commonjs"}
|
|
@@ -157,6 +157,10 @@ export default class FirecrawlApp {
|
|
|
157
157
|
return {
|
|
158
158
|
success: true,
|
|
159
159
|
status: response.data.status,
|
|
160
|
+
current: response.data.current,
|
|
161
|
+
current_url: response.data.current_url,
|
|
162
|
+
current_step: response.data.current_step,
|
|
163
|
+
total: response.data.total,
|
|
160
164
|
data: response.data.data,
|
|
161
165
|
partial_data: !response.data.data
|
|
162
166
|
? response.data.partial_data
|
|
@@ -173,6 +177,10 @@ export default class FirecrawlApp {
|
|
|
173
177
|
return {
|
|
174
178
|
success: false,
|
|
175
179
|
status: "unknown",
|
|
180
|
+
current: 0,
|
|
181
|
+
current_url: "",
|
|
182
|
+
current_step: "",
|
|
183
|
+
total: 0,
|
|
176
184
|
error: "Internal server error.",
|
|
177
185
|
};
|
|
178
186
|
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"type": "module"}
|
package/jest.config.js
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/** @type {import('ts-jest').JestConfigWithTsJest} **/
|
|
2
|
+
export default {
|
|
3
|
+
testEnvironment: "node",
|
|
4
|
+
"moduleNameMapper": {
|
|
5
|
+
"^(\\.{1,2}/.*)\\.js$": "$1",
|
|
6
|
+
},
|
|
7
|
+
"extensionsToTreatAsEsm": [".ts"],
|
|
8
|
+
"transform": {
|
|
9
|
+
"^.+\\.(mt|t|cj|j)s$": [
|
|
10
|
+
"ts-jest",
|
|
11
|
+
{
|
|
12
|
+
"useESM": true
|
|
13
|
+
}
|
|
14
|
+
]
|
|
15
|
+
},
|
|
16
|
+
};
|
package/package.json
CHANGED
|
@@ -1,15 +1,19 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mendable/firecrawl-js",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.31",
|
|
4
4
|
"description": "JavaScript SDK for Firecrawl API",
|
|
5
|
-
"main": "build/index.js",
|
|
5
|
+
"main": "build/cjs/index.js",
|
|
6
6
|
"types": "types/index.d.ts",
|
|
7
7
|
"type": "module",
|
|
8
|
+
"exports": {
|
|
9
|
+
"require": "./build/cjs/index.js",
|
|
10
|
+
"import": "./build/esm/index.js"
|
|
11
|
+
},
|
|
8
12
|
"scripts": {
|
|
9
|
-
"build": "tsc",
|
|
13
|
+
"build": "tsc --module commonjs --moduleResolution node10 --outDir build/cjs/ && echo '{\"type\": \"commonjs\"}' > build/cjs/package.json && npx tsc --module NodeNext --moduleResolution NodeNext --outDir build/esm/ && echo '{\"type\": \"module\"}' > build/esm/package.json",
|
|
10
14
|
"build-and-publish": "npm run build && npm publish --access public",
|
|
11
15
|
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
|
12
|
-
"test": "jest src/__tests__/**/*.test.ts"
|
|
16
|
+
"test": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/**/*.test.ts"
|
|
13
17
|
},
|
|
14
18
|
"repository": {
|
|
15
19
|
"type": "git",
|
|
@@ -37,7 +41,7 @@
|
|
|
37
41
|
"@types/node": "^20.12.12",
|
|
38
42
|
"@types/uuid": "^9.0.8",
|
|
39
43
|
"jest": "^29.7.0",
|
|
40
|
-
"ts-jest": "^29.
|
|
44
|
+
"ts-jest": "^29.2.2",
|
|
41
45
|
"typescript": "^5.4.5"
|
|
42
46
|
},
|
|
43
47
|
"keywords": [
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import FirecrawlApp from '../../index';
|
|
2
2
|
import { v4 as uuidv4 } from 'uuid';
|
|
3
3
|
import dotenv from 'dotenv';
|
|
4
|
-
|
|
4
|
+
import { describe, test, expect } from '@jest/globals';
|
|
5
5
|
|
|
6
6
|
dotenv.config();
|
|
7
7
|
|
|
@@ -9,7 +9,7 @@ const TEST_API_KEY = process.env.TEST_API_KEY;
|
|
|
9
9
|
const API_URL = "http://127.0.0.1:3002";
|
|
10
10
|
|
|
11
11
|
describe('FirecrawlApp E2E Tests', () => {
|
|
12
|
-
test.concurrent('should throw error for no API key', () => {
|
|
12
|
+
test.concurrent('should throw error for no API key', async () => {
|
|
13
13
|
expect(() => {
|
|
14
14
|
new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
|
|
15
15
|
}).toThrow("No API key provided");
|
|
@@ -107,12 +107,16 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
107
107
|
while (statusResponse.status === 'active' && checks < maxChecks) {
|
|
108
108
|
await new Promise(resolve => setTimeout(resolve, 1000));
|
|
109
109
|
expect(statusResponse.partial_data).not.toBeNull();
|
|
110
|
+
expect(statusResponse.current).toBeGreaterThanOrEqual(1);
|
|
110
111
|
statusResponse = await app.checkCrawlStatus(response.jobId);
|
|
111
112
|
checks++;
|
|
112
113
|
}
|
|
113
114
|
|
|
114
115
|
expect(statusResponse).not.toBeNull();
|
|
116
|
+
expect(statusResponse.success).toBe(true);
|
|
115
117
|
expect(statusResponse.status).toBe('completed');
|
|
118
|
+
expect(statusResponse.total).toEqual(statusResponse.current);
|
|
119
|
+
expect(statusResponse.current_step).not.toBeNull();
|
|
116
120
|
expect(statusResponse?.data?.length).toBeGreaterThan(0);
|
|
117
121
|
}, 35000); // 35 seconds timeout
|
|
118
122
|
|
package/src/index.ts
CHANGED
|
@@ -100,6 +100,10 @@ export interface CrawlResponse {
|
|
|
100
100
|
export interface JobStatusResponse {
|
|
101
101
|
success: boolean;
|
|
102
102
|
status: string;
|
|
103
|
+
current?: number;
|
|
104
|
+
current_url?: string;
|
|
105
|
+
current_step?: string;
|
|
106
|
+
total?: number;
|
|
103
107
|
jobId?: string;
|
|
104
108
|
data?: FirecrawlDocument[];
|
|
105
109
|
partial_data?: FirecrawlDocument[];
|
|
@@ -287,6 +291,10 @@ export default class FirecrawlApp {
|
|
|
287
291
|
return {
|
|
288
292
|
success: true,
|
|
289
293
|
status: response.data.status,
|
|
294
|
+
current: response.data.current,
|
|
295
|
+
current_url: response.data.current_url,
|
|
296
|
+
current_step: response.data.current_step,
|
|
297
|
+
total: response.data.total,
|
|
290
298
|
data: response.data.data,
|
|
291
299
|
partial_data: !response.data.data
|
|
292
300
|
? response.data.partial_data
|
|
@@ -301,6 +309,10 @@ export default class FirecrawlApp {
|
|
|
301
309
|
return {
|
|
302
310
|
success: false,
|
|
303
311
|
status: "unknown",
|
|
312
|
+
current: 0,
|
|
313
|
+
current_url: "",
|
|
314
|
+
current_step: "",
|
|
315
|
+
total: 0,
|
|
304
316
|
error: "Internal server error.",
|
|
305
317
|
};
|
|
306
318
|
}
|
package/build_and_publish.sh
DELETED
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
|
|
3
|
-
function build_and_publish {
|
|
4
|
-
PACKAGE_NAME=$1
|
|
5
|
-
|
|
6
|
-
# Replace placeholder with the package name in package.json
|
|
7
|
-
jq --arg name "$PACKAGE_NAME" '.name = $name' package.json > temp.json && mv temp.json package.json
|
|
8
|
-
|
|
9
|
-
# Debug: show modified state
|
|
10
|
-
echo "Modified package.json for $PACKAGE_NAME:"
|
|
11
|
-
cat package.json
|
|
12
|
-
|
|
13
|
-
# Publish the package using npm
|
|
14
|
-
npm publish
|
|
15
|
-
|
|
16
|
-
# Check if publish was successful
|
|
17
|
-
if [ $? -ne 0 ]; then
|
|
18
|
-
echo "Publish failed for $PACKAGE_NAME"
|
|
19
|
-
exit 1
|
|
20
|
-
fi
|
|
21
|
-
|
|
22
|
-
# Revert the changes to the original placeholder in package.json
|
|
23
|
-
jq '.name = "PLACEHOLDER_NAME"' package.json > temp.json && mv temp.json package.json
|
|
24
|
-
|
|
25
|
-
# Debug: show reverted state
|
|
26
|
-
echo "Reverted package.json to placeholder:"
|
|
27
|
-
cat package.json
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
# Build and publish the first package to npm
|
|
31
|
-
build_and_publish "@mendable/firecrawl-js"
|
|
32
|
-
|
|
33
|
-
# Build and publish the second package to npm
|
|
34
|
-
build_and_publish "firecrawl"
|
package/jest.config.cjs
DELETED
package/types/index.d.ts
DELETED
|
@@ -1,189 +0,0 @@
|
|
|
1
|
-
import { AxiosResponse, AxiosRequestHeaders } from "axios";
|
|
2
|
-
import { z } from "zod";
|
|
3
|
-
/**
|
|
4
|
-
* Configuration interface for FirecrawlApp.
|
|
5
|
-
*/
|
|
6
|
-
export interface FirecrawlAppConfig {
|
|
7
|
-
apiKey?: string | null;
|
|
8
|
-
apiUrl?: string | null;
|
|
9
|
-
}
|
|
10
|
-
/**
|
|
11
|
-
* Metadata for a Firecrawl document.
|
|
12
|
-
*/
|
|
13
|
-
export interface FirecrawlDocumentMetadata {
|
|
14
|
-
title?: string;
|
|
15
|
-
description?: string;
|
|
16
|
-
language?: string;
|
|
17
|
-
keywords?: string;
|
|
18
|
-
robots?: string;
|
|
19
|
-
ogTitle?: string;
|
|
20
|
-
ogDescription?: string;
|
|
21
|
-
ogUrl?: string;
|
|
22
|
-
ogImage?: string;
|
|
23
|
-
ogAudio?: string;
|
|
24
|
-
ogDeterminer?: string;
|
|
25
|
-
ogLocale?: string;
|
|
26
|
-
ogLocaleAlternate?: string[];
|
|
27
|
-
ogSiteName?: string;
|
|
28
|
-
ogVideo?: string;
|
|
29
|
-
dctermsCreated?: string;
|
|
30
|
-
dcDateCreated?: string;
|
|
31
|
-
dcDate?: string;
|
|
32
|
-
dctermsType?: string;
|
|
33
|
-
dcType?: string;
|
|
34
|
-
dctermsAudience?: string;
|
|
35
|
-
dctermsSubject?: string;
|
|
36
|
-
dcSubject?: string;
|
|
37
|
-
dcDescription?: string;
|
|
38
|
-
dctermsKeywords?: string;
|
|
39
|
-
modifiedTime?: string;
|
|
40
|
-
publishedTime?: string;
|
|
41
|
-
articleTag?: string;
|
|
42
|
-
articleSection?: string;
|
|
43
|
-
sourceURL?: string;
|
|
44
|
-
pageStatusCode?: number;
|
|
45
|
-
pageError?: string;
|
|
46
|
-
[key: string]: any;
|
|
47
|
-
}
|
|
48
|
-
/**
|
|
49
|
-
* Document interface for Firecrawl.
|
|
50
|
-
*/
|
|
51
|
-
export interface FirecrawlDocument {
|
|
52
|
-
id?: string;
|
|
53
|
-
url?: string;
|
|
54
|
-
content: string;
|
|
55
|
-
markdown?: string;
|
|
56
|
-
html?: string;
|
|
57
|
-
llm_extraction?: Record<string, any>;
|
|
58
|
-
createdAt?: Date;
|
|
59
|
-
updatedAt?: Date;
|
|
60
|
-
type?: string;
|
|
61
|
-
metadata: FirecrawlDocumentMetadata;
|
|
62
|
-
childrenLinks?: string[];
|
|
63
|
-
provider?: string;
|
|
64
|
-
warning?: string;
|
|
65
|
-
index?: number;
|
|
66
|
-
}
|
|
67
|
-
/**
|
|
68
|
-
* Response interface for scraping operations.
|
|
69
|
-
*/
|
|
70
|
-
export interface ScrapeResponse {
|
|
71
|
-
success: boolean;
|
|
72
|
-
data?: FirecrawlDocument;
|
|
73
|
-
error?: string;
|
|
74
|
-
}
|
|
75
|
-
/**
|
|
76
|
-
* Response interface for searching operations.
|
|
77
|
-
*/
|
|
78
|
-
export interface SearchResponse {
|
|
79
|
-
success: boolean;
|
|
80
|
-
data?: FirecrawlDocument[];
|
|
81
|
-
error?: string;
|
|
82
|
-
}
|
|
83
|
-
/**
|
|
84
|
-
* Response interface for crawling operations.
|
|
85
|
-
*/
|
|
86
|
-
export interface CrawlResponse {
|
|
87
|
-
success: boolean;
|
|
88
|
-
jobId?: string;
|
|
89
|
-
data?: FirecrawlDocument[];
|
|
90
|
-
error?: string;
|
|
91
|
-
}
|
|
92
|
-
/**
|
|
93
|
-
* Response interface for job status checks.
|
|
94
|
-
*/
|
|
95
|
-
export interface JobStatusResponse {
|
|
96
|
-
success: boolean;
|
|
97
|
-
status: string;
|
|
98
|
-
jobId?: string;
|
|
99
|
-
data?: FirecrawlDocument[];
|
|
100
|
-
partial_data?: FirecrawlDocument[];
|
|
101
|
-
error?: string;
|
|
102
|
-
}
|
|
103
|
-
/**
|
|
104
|
-
* Generic parameter interface.
|
|
105
|
-
*/
|
|
106
|
-
export interface Params {
|
|
107
|
-
[key: string]: any;
|
|
108
|
-
extractorOptions?: {
|
|
109
|
-
extractionSchema: z.ZodSchema | any;
|
|
110
|
-
mode?: "llm-extraction";
|
|
111
|
-
extractionPrompt?: string;
|
|
112
|
-
};
|
|
113
|
-
}
|
|
114
|
-
/**
|
|
115
|
-
* Main class for interacting with the Firecrawl API.
|
|
116
|
-
*/
|
|
117
|
-
export default class FirecrawlApp {
|
|
118
|
-
private apiKey;
|
|
119
|
-
private apiUrl;
|
|
120
|
-
/**
|
|
121
|
-
* Initializes a new instance of the FirecrawlApp class.
|
|
122
|
-
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
|
|
123
|
-
*/
|
|
124
|
-
constructor({ apiKey, apiUrl }: FirecrawlAppConfig);
|
|
125
|
-
/**
|
|
126
|
-
* Scrapes a URL using the Firecrawl API.
|
|
127
|
-
* @param {string} url - The URL to scrape.
|
|
128
|
-
* @param {Params | null} params - Additional parameters for the scrape request.
|
|
129
|
-
* @returns {Promise<ScrapeResponse>} The response from the scrape operation.
|
|
130
|
-
*/
|
|
131
|
-
scrapeUrl(url: string, params?: Params | null): Promise<ScrapeResponse>;
|
|
132
|
-
/**
|
|
133
|
-
* Searches for a query using the Firecrawl API.
|
|
134
|
-
* @param {string} query - The query to search for.
|
|
135
|
-
* @param {Params | null} params - Additional parameters for the search request.
|
|
136
|
-
* @returns {Promise<SearchResponse>} The response from the search operation.
|
|
137
|
-
*/
|
|
138
|
-
search(query: string, params?: Params | null): Promise<SearchResponse>;
|
|
139
|
-
/**
|
|
140
|
-
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
141
|
-
* @param {string} url - The URL to crawl.
|
|
142
|
-
* @param {Params | null} params - Additional parameters for the crawl request.
|
|
143
|
-
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
|
|
144
|
-
* @param {number} pollInterval - Time in seconds for job status checks.
|
|
145
|
-
* @param {string} idempotencyKey - Optional idempotency key for the request.
|
|
146
|
-
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
|
147
|
-
*/
|
|
148
|
-
crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, pollInterval?: number, idempotencyKey?: string): Promise<CrawlResponse | any>;
|
|
149
|
-
/**
|
|
150
|
-
* Checks the status of a crawl job using the Firecrawl API.
|
|
151
|
-
* @param {string} jobId - The job ID of the crawl operation.
|
|
152
|
-
* @returns {Promise<JobStatusResponse>} The response containing the job status.
|
|
153
|
-
*/
|
|
154
|
-
checkCrawlStatus(jobId: string): Promise<JobStatusResponse>;
|
|
155
|
-
/**
|
|
156
|
-
* Prepares the headers for an API request.
|
|
157
|
-
* @returns {AxiosRequestHeaders} The prepared headers.
|
|
158
|
-
*/
|
|
159
|
-
prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders;
|
|
160
|
-
/**
|
|
161
|
-
* Sends a POST request to the specified URL.
|
|
162
|
-
* @param {string} url - The URL to send the request to.
|
|
163
|
-
* @param {Params} data - The data to send in the request.
|
|
164
|
-
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
|
165
|
-
* @returns {Promise<AxiosResponse>} The response from the POST request.
|
|
166
|
-
*/
|
|
167
|
-
postRequest(url: string, data: Params, headers: AxiosRequestHeaders): Promise<AxiosResponse>;
|
|
168
|
-
/**
|
|
169
|
-
* Sends a GET request to the specified URL.
|
|
170
|
-
* @param {string} url - The URL to send the request to.
|
|
171
|
-
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
|
172
|
-
* @returns {Promise<AxiosResponse>} The response from the GET request.
|
|
173
|
-
*/
|
|
174
|
-
getRequest(url: string, headers: AxiosRequestHeaders): Promise<AxiosResponse>;
|
|
175
|
-
/**
|
|
176
|
-
* Monitors the status of a crawl job until completion or failure.
|
|
177
|
-
* @param {string} jobId - The job ID of the crawl operation.
|
|
178
|
-
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
|
179
|
-
* @param {number} timeout - Timeout in seconds for job status checks.
|
|
180
|
-
* @returns {Promise<any>} The final job status or data.
|
|
181
|
-
*/
|
|
182
|
-
monitorJobStatus(jobId: string, headers: AxiosRequestHeaders, checkInterval: number): Promise<any>;
|
|
183
|
-
/**
|
|
184
|
-
* Handles errors from API responses.
|
|
185
|
-
* @param {AxiosResponse} response - The response from the API.
|
|
186
|
-
* @param {string} action - The action being performed when the error occurred.
|
|
187
|
-
*/
|
|
188
|
-
handleError(response: AxiosResponse, action: string): void;
|
|
189
|
-
}
|