firecrawl 0.0.30 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +88 -123
- package/build/cjs/index.js +347 -0
- package/build/cjs/package.json +1 -0
- package/build/esm/index.js +339 -0
- package/build/esm/package.json +1 -0
- package/jest.config.js +16 -0
- package/package.json +17 -5
- package/src/__tests__/e2e_withAuth/index.test.ts +298 -124
- package/src/__tests__/index.test.ts +1 -1
- package/src/__tests__/v1/e2e_withAuth/index.test.ts +312 -0
- package/src/index.ts +349 -148
- package/tsconfig.json +3 -3
- package/types/index.d.ts +143 -73
- package/build/index.js +0 -257
- package/build_and_publish.sh +0 -34
- package/jest.config.cjs +0 -5
package/tsconfig.json
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
// "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */
|
|
12
12
|
|
|
13
13
|
/* Language and Environment */
|
|
14
|
-
"target": "
|
|
14
|
+
"target": "es2020", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
|
|
15
15
|
// "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
|
|
16
16
|
// "jsx": "preserve", /* Specify what JSX code is generated. */
|
|
17
17
|
// "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */
|
|
@@ -25,9 +25,9 @@
|
|
|
25
25
|
// "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */
|
|
26
26
|
|
|
27
27
|
/* Modules */
|
|
28
|
-
"module": "
|
|
28
|
+
"module": "commonjs", /* Specify what module code is generated. */
|
|
29
29
|
"rootDir": "./src", /* Specify the root folder within your source files. */
|
|
30
|
-
"moduleResolution": "
|
|
30
|
+
"moduleResolution": "node", /* Specify how TypeScript looks up a file from a given module specifier. */
|
|
31
31
|
// "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */
|
|
32
32
|
// "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */
|
|
33
33
|
// "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */
|
package/types/index.d.ts
CHANGED
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import { AxiosResponse, AxiosRequestHeaders } from "axios";
|
|
2
2
|
import { z } from "zod";
|
|
3
|
+
import { TypedEventTarget } from "typescript-event-target";
|
|
3
4
|
/**
|
|
4
5
|
* Configuration interface for FirecrawlApp.
|
|
6
|
+
* @param apiKey - Optional API key for authentication.
|
|
7
|
+
* @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'.
|
|
5
8
|
*/
|
|
6
9
|
export interface FirecrawlAppConfig {
|
|
7
10
|
apiKey?: string | null;
|
|
@@ -9,6 +12,7 @@ export interface FirecrawlAppConfig {
|
|
|
9
12
|
}
|
|
10
13
|
/**
|
|
11
14
|
* Metadata for a Firecrawl document.
|
|
15
|
+
* Includes various optional properties for document metadata.
|
|
12
16
|
*/
|
|
13
17
|
export interface FirecrawlDocumentMetadata {
|
|
14
18
|
title?: string;
|
|
@@ -41,145 +45,191 @@ export interface FirecrawlDocumentMetadata {
|
|
|
41
45
|
articleTag?: string;
|
|
42
46
|
articleSection?: string;
|
|
43
47
|
sourceURL?: string;
|
|
44
|
-
|
|
45
|
-
|
|
48
|
+
statusCode?: number;
|
|
49
|
+
error?: string;
|
|
46
50
|
[key: string]: any;
|
|
47
51
|
}
|
|
48
52
|
/**
|
|
49
53
|
* Document interface for Firecrawl.
|
|
54
|
+
* Represents a document retrieved or processed by Firecrawl.
|
|
50
55
|
*/
|
|
51
56
|
export interface FirecrawlDocument {
|
|
52
|
-
id?: string;
|
|
53
57
|
url?: string;
|
|
54
|
-
content: string;
|
|
55
58
|
markdown?: string;
|
|
56
59
|
html?: string;
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
metadata
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
60
|
+
rawHtml?: string;
|
|
61
|
+
links?: string[];
|
|
62
|
+
extract?: Record<any, any>;
|
|
63
|
+
screenshot?: string;
|
|
64
|
+
metadata?: FirecrawlDocumentMetadata;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Parameters for scraping operations.
|
|
68
|
+
* Defines the options and configurations available for scraping web content.
|
|
69
|
+
*/
|
|
70
|
+
export interface ScrapeParams {
|
|
71
|
+
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract" | "full@scrennshot")[];
|
|
72
|
+
headers?: Record<string, string>;
|
|
73
|
+
includeTags?: string[];
|
|
74
|
+
excludeTags?: string[];
|
|
75
|
+
onlyMainContent?: boolean;
|
|
76
|
+
extract?: {
|
|
77
|
+
prompt?: string;
|
|
78
|
+
schema?: z.ZodSchema | any;
|
|
79
|
+
systemPrompt?: string;
|
|
80
|
+
};
|
|
81
|
+
waitFor?: number;
|
|
82
|
+
timeout?: number;
|
|
66
83
|
}
|
|
67
84
|
/**
|
|
68
85
|
* Response interface for scraping operations.
|
|
86
|
+
* Defines the structure of the response received after a scraping operation.
|
|
69
87
|
*/
|
|
70
|
-
export interface ScrapeResponse {
|
|
71
|
-
success:
|
|
72
|
-
|
|
88
|
+
export interface ScrapeResponse extends FirecrawlDocument {
|
|
89
|
+
success: true;
|
|
90
|
+
warning?: string;
|
|
73
91
|
error?: string;
|
|
74
92
|
}
|
|
75
93
|
/**
|
|
76
|
-
*
|
|
94
|
+
* Parameters for crawling operations.
|
|
95
|
+
* Includes options for both scraping and mapping during a crawl.
|
|
77
96
|
*/
|
|
78
|
-
export interface
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
97
|
+
export interface CrawlParams {
|
|
98
|
+
includePaths?: string[];
|
|
99
|
+
excludePaths?: string[];
|
|
100
|
+
maxDepth?: number;
|
|
101
|
+
limit?: number;
|
|
102
|
+
allowBackwardLinks?: boolean;
|
|
103
|
+
allowExternalLinks?: boolean;
|
|
104
|
+
ignoreSitemap?: boolean;
|
|
105
|
+
scrapeOptions?: ScrapeParams;
|
|
82
106
|
}
|
|
83
107
|
/**
|
|
84
108
|
* Response interface for crawling operations.
|
|
109
|
+
* Defines the structure of the response received after initiating a crawl.
|
|
85
110
|
*/
|
|
86
111
|
export interface CrawlResponse {
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
112
|
+
id?: string;
|
|
113
|
+
url?: string;
|
|
114
|
+
success: true;
|
|
90
115
|
error?: string;
|
|
91
116
|
}
|
|
92
117
|
/**
|
|
93
118
|
* Response interface for job status checks.
|
|
119
|
+
* Provides detailed status of a crawl job including progress and results.
|
|
94
120
|
*/
|
|
95
|
-
export interface
|
|
96
|
-
success:
|
|
97
|
-
|
|
98
|
-
|
|
121
|
+
export interface CrawlStatusResponse {
|
|
122
|
+
success: true;
|
|
123
|
+
total: number;
|
|
124
|
+
completed: number;
|
|
125
|
+
creditsUsed: number;
|
|
126
|
+
expiresAt: Date;
|
|
127
|
+
status: "scraping" | "completed" | "failed";
|
|
128
|
+
next: string;
|
|
99
129
|
data?: FirecrawlDocument[];
|
|
100
|
-
partial_data?: FirecrawlDocument[];
|
|
101
130
|
error?: string;
|
|
102
131
|
}
|
|
103
132
|
/**
|
|
104
|
-
*
|
|
133
|
+
* Parameters for mapping operations.
|
|
134
|
+
* Defines options for mapping URLs during a crawl.
|
|
105
135
|
*/
|
|
106
|
-
export interface
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
136
|
+
export interface MapParams {
|
|
137
|
+
search?: string;
|
|
138
|
+
ignoreSitemap?: boolean;
|
|
139
|
+
includeSubdomains?: boolean;
|
|
140
|
+
limit?: number;
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Response interface for mapping operations.
|
|
144
|
+
* Defines the structure of the response received after a mapping operation.
|
|
145
|
+
*/
|
|
146
|
+
export interface MapResponse {
|
|
147
|
+
success: true;
|
|
148
|
+
links?: string[];
|
|
149
|
+
error?: string;
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Error response interface.
|
|
153
|
+
* Defines the structure of the response received when an error occurs.
|
|
154
|
+
*/
|
|
155
|
+
export interface ErrorResponse {
|
|
156
|
+
success: false;
|
|
157
|
+
error: string;
|
|
113
158
|
}
|
|
114
159
|
/**
|
|
115
160
|
* Main class for interacting with the Firecrawl API.
|
|
161
|
+
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
116
162
|
*/
|
|
117
163
|
export default class FirecrawlApp {
|
|
118
|
-
|
|
119
|
-
|
|
164
|
+
apiKey: string;
|
|
165
|
+
apiUrl: string;
|
|
120
166
|
/**
|
|
121
167
|
* Initializes a new instance of the FirecrawlApp class.
|
|
122
|
-
* @param
|
|
168
|
+
* @param config - Configuration options for the FirecrawlApp instance.
|
|
123
169
|
*/
|
|
124
170
|
constructor({ apiKey, apiUrl }: FirecrawlAppConfig);
|
|
125
171
|
/**
|
|
126
172
|
* Scrapes a URL using the Firecrawl API.
|
|
127
|
-
* @param
|
|
128
|
-
* @param
|
|
129
|
-
* @returns
|
|
173
|
+
* @param url - The URL to scrape.
|
|
174
|
+
* @param params - Additional parameters for the scrape request.
|
|
175
|
+
* @returns The response from the scrape operation.
|
|
130
176
|
*/
|
|
131
|
-
scrapeUrl(url: string, params?:
|
|
177
|
+
scrapeUrl(url: string, params?: ScrapeParams): Promise<ScrapeResponse | ErrorResponse>;
|
|
132
178
|
/**
|
|
133
|
-
*
|
|
134
|
-
* @param
|
|
135
|
-
* @param
|
|
136
|
-
* @returns
|
|
179
|
+
* This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
|
|
180
|
+
* @param query - The search query string.
|
|
181
|
+
* @param params - Additional parameters for the search.
|
|
182
|
+
* @returns Throws an error advising to use version 0 of the API.
|
|
137
183
|
*/
|
|
138
|
-
search(query: string, params?:
|
|
184
|
+
search(query: string, params?: any): Promise<any>;
|
|
139
185
|
/**
|
|
140
186
|
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
141
|
-
* @param
|
|
142
|
-
* @param
|
|
143
|
-
* @param
|
|
144
|
-
* @param
|
|
145
|
-
* @
|
|
146
|
-
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
|
187
|
+
* @param url - The URL to crawl.
|
|
188
|
+
* @param params - Additional parameters for the crawl request.
|
|
189
|
+
* @param pollInterval - Time in seconds for job status checks.
|
|
190
|
+
* @param idempotencyKey - Optional idempotency key for the request.
|
|
191
|
+
* @returns The response from the crawl operation.
|
|
147
192
|
*/
|
|
148
|
-
crawlUrl(url: string, params?:
|
|
193
|
+
crawlUrl(url: string, params?: CrawlParams, pollInterval?: number, idempotencyKey?: string): Promise<CrawlStatusResponse | ErrorResponse>;
|
|
194
|
+
asyncCrawlUrl(url: string, params?: CrawlParams, idempotencyKey?: string): Promise<CrawlResponse | ErrorResponse>;
|
|
149
195
|
/**
|
|
150
196
|
* Checks the status of a crawl job using the Firecrawl API.
|
|
151
|
-
* @param
|
|
152
|
-
* @returns
|
|
197
|
+
* @param id - The ID of the crawl operation.
|
|
198
|
+
* @returns The response containing the job status.
|
|
153
199
|
*/
|
|
154
|
-
checkCrawlStatus(
|
|
200
|
+
checkCrawlStatus(id?: string): Promise<CrawlStatusResponse | ErrorResponse>;
|
|
201
|
+
crawlUrlAndWatch(url: string, params?: CrawlParams, idempotencyKey?: string): Promise<CrawlWatcher>;
|
|
202
|
+
mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse>;
|
|
155
203
|
/**
|
|
156
204
|
* Prepares the headers for an API request.
|
|
157
|
-
* @
|
|
205
|
+
* @param idempotencyKey - Optional key to ensure idempotency.
|
|
206
|
+
* @returns The prepared headers.
|
|
158
207
|
*/
|
|
159
208
|
prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders;
|
|
160
209
|
/**
|
|
161
210
|
* Sends a POST request to the specified URL.
|
|
162
|
-
* @param
|
|
163
|
-
* @param
|
|
164
|
-
* @param
|
|
165
|
-
* @returns
|
|
211
|
+
* @param url - The URL to send the request to.
|
|
212
|
+
* @param data - The data to send in the request.
|
|
213
|
+
* @param headers - The headers for the request.
|
|
214
|
+
* @returns The response from the POST request.
|
|
166
215
|
*/
|
|
167
|
-
postRequest(url: string, data:
|
|
216
|
+
postRequest(url: string, data: any, headers: AxiosRequestHeaders): Promise<AxiosResponse>;
|
|
168
217
|
/**
|
|
169
218
|
* Sends a GET request to the specified URL.
|
|
170
|
-
* @param
|
|
171
|
-
* @param
|
|
172
|
-
* @returns
|
|
219
|
+
* @param url - The URL to send the request to.
|
|
220
|
+
* @param headers - The headers for the request.
|
|
221
|
+
* @returns The response from the GET request.
|
|
173
222
|
*/
|
|
174
223
|
getRequest(url: string, headers: AxiosRequestHeaders): Promise<AxiosResponse>;
|
|
175
224
|
/**
|
|
176
225
|
* Monitors the status of a crawl job until completion or failure.
|
|
177
|
-
* @param
|
|
178
|
-
* @param
|
|
179
|
-
* @param
|
|
180
|
-
* @
|
|
226
|
+
* @param id - The ID of the crawl operation.
|
|
227
|
+
* @param headers - The headers for the request.
|
|
228
|
+
* @param checkInterval - Interval in seconds for job status checks.
|
|
229
|
+
* @param checkUrl - Optional URL to check the status (used for v1 API)
|
|
230
|
+
* @returns The final job status or data.
|
|
181
231
|
*/
|
|
182
|
-
monitorJobStatus(
|
|
232
|
+
monitorJobStatus(id: string, headers: AxiosRequestHeaders, checkInterval: number): Promise<CrawlStatusResponse>;
|
|
183
233
|
/**
|
|
184
234
|
* Handles errors from API responses.
|
|
185
235
|
* @param {AxiosResponse} response - The response from the API.
|
|
@@ -187,3 +237,23 @@ export default class FirecrawlApp {
|
|
|
187
237
|
*/
|
|
188
238
|
handleError(response: AxiosResponse, action: string): void;
|
|
189
239
|
}
|
|
240
|
+
interface CrawlWatcherEvents {
|
|
241
|
+
document: CustomEvent<FirecrawlDocument>;
|
|
242
|
+
done: CustomEvent<{
|
|
243
|
+
status: CrawlStatusResponse["status"];
|
|
244
|
+
data: FirecrawlDocument[];
|
|
245
|
+
}>;
|
|
246
|
+
error: CustomEvent<{
|
|
247
|
+
status: CrawlStatusResponse["status"];
|
|
248
|
+
data: FirecrawlDocument[];
|
|
249
|
+
error: string;
|
|
250
|
+
}>;
|
|
251
|
+
}
|
|
252
|
+
export declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
|
253
|
+
private ws;
|
|
254
|
+
data: FirecrawlDocument[];
|
|
255
|
+
status: CrawlStatusResponse["status"];
|
|
256
|
+
constructor(id: string, app: FirecrawlApp);
|
|
257
|
+
close(): void;
|
|
258
|
+
}
|
|
259
|
+
export {};
|
package/build/index.js
DELETED
|
@@ -1,257 +0,0 @@
|
|
|
1
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
-
});
|
|
9
|
-
};
|
|
10
|
-
import axios from "axios";
|
|
11
|
-
import { z } from "zod";
|
|
12
|
-
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
13
|
-
/**
|
|
14
|
-
* Main class for interacting with the Firecrawl API.
|
|
15
|
-
*/
|
|
16
|
-
export default class FirecrawlApp {
|
|
17
|
-
/**
|
|
18
|
-
* Initializes a new instance of the FirecrawlApp class.
|
|
19
|
-
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
|
|
20
|
-
*/
|
|
21
|
-
constructor({ apiKey = null, apiUrl = null }) {
|
|
22
|
-
this.apiKey = apiKey || "";
|
|
23
|
-
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
|
24
|
-
if (!this.apiKey) {
|
|
25
|
-
throw new Error("No API key provided");
|
|
26
|
-
}
|
|
27
|
-
}
|
|
28
|
-
/**
|
|
29
|
-
* Scrapes a URL using the Firecrawl API.
|
|
30
|
-
* @param {string} url - The URL to scrape.
|
|
31
|
-
* @param {Params | null} params - Additional parameters for the scrape request.
|
|
32
|
-
* @returns {Promise<ScrapeResponse>} The response from the scrape operation.
|
|
33
|
-
*/
|
|
34
|
-
scrapeUrl(url, params = null) {
|
|
35
|
-
var _a;
|
|
36
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
37
|
-
const headers = {
|
|
38
|
-
"Content-Type": "application/json",
|
|
39
|
-
Authorization: `Bearer ${this.apiKey}`,
|
|
40
|
-
};
|
|
41
|
-
let jsonData = Object.assign({ url }, params);
|
|
42
|
-
if ((_a = params === null || params === void 0 ? void 0 : params.extractorOptions) === null || _a === void 0 ? void 0 : _a.extractionSchema) {
|
|
43
|
-
let schema = params.extractorOptions.extractionSchema;
|
|
44
|
-
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
|
45
|
-
if (schema instanceof z.ZodSchema) {
|
|
46
|
-
schema = zodToJsonSchema(schema);
|
|
47
|
-
}
|
|
48
|
-
jsonData = Object.assign(Object.assign({}, jsonData), { extractorOptions: Object.assign(Object.assign({}, params.extractorOptions), { extractionSchema: schema, mode: params.extractorOptions.mode || "llm-extraction" }) });
|
|
49
|
-
}
|
|
50
|
-
try {
|
|
51
|
-
const response = yield axios.post(this.apiUrl + "/v0/scrape", jsonData, { headers });
|
|
52
|
-
if (response.status === 200) {
|
|
53
|
-
const responseData = response.data;
|
|
54
|
-
if (responseData.success) {
|
|
55
|
-
return responseData;
|
|
56
|
-
}
|
|
57
|
-
else {
|
|
58
|
-
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
else {
|
|
62
|
-
this.handleError(response, "scrape URL");
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
catch (error) {
|
|
66
|
-
throw new Error(error.message);
|
|
67
|
-
}
|
|
68
|
-
return { success: false, error: "Internal server error." };
|
|
69
|
-
});
|
|
70
|
-
}
|
|
71
|
-
/**
|
|
72
|
-
* Searches for a query using the Firecrawl API.
|
|
73
|
-
* @param {string} query - The query to search for.
|
|
74
|
-
* @param {Params | null} params - Additional parameters for the search request.
|
|
75
|
-
* @returns {Promise<SearchResponse>} The response from the search operation.
|
|
76
|
-
*/
|
|
77
|
-
search(query, params = null) {
|
|
78
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
79
|
-
const headers = {
|
|
80
|
-
"Content-Type": "application/json",
|
|
81
|
-
Authorization: `Bearer ${this.apiKey}`,
|
|
82
|
-
};
|
|
83
|
-
let jsonData = { query };
|
|
84
|
-
if (params) {
|
|
85
|
-
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
|
86
|
-
}
|
|
87
|
-
try {
|
|
88
|
-
const response = yield axios.post(this.apiUrl + "/v0/search", jsonData, { headers });
|
|
89
|
-
if (response.status === 200) {
|
|
90
|
-
const responseData = response.data;
|
|
91
|
-
if (responseData.success) {
|
|
92
|
-
return responseData;
|
|
93
|
-
}
|
|
94
|
-
else {
|
|
95
|
-
throw new Error(`Failed to search. Error: ${responseData.error}`);
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
else {
|
|
99
|
-
this.handleError(response, "search");
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
catch (error) {
|
|
103
|
-
throw new Error(error.message);
|
|
104
|
-
}
|
|
105
|
-
return { success: false, error: "Internal server error." };
|
|
106
|
-
});
|
|
107
|
-
}
|
|
108
|
-
/**
|
|
109
|
-
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
110
|
-
* @param {string} url - The URL to crawl.
|
|
111
|
-
* @param {Params | null} params - Additional parameters for the crawl request.
|
|
112
|
-
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
|
|
113
|
-
* @param {number} pollInterval - Time in seconds for job status checks.
|
|
114
|
-
* @param {string} idempotencyKey - Optional idempotency key for the request.
|
|
115
|
-
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
|
116
|
-
*/
|
|
117
|
-
crawlUrl(url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
|
|
118
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
119
|
-
const headers = this.prepareHeaders(idempotencyKey);
|
|
120
|
-
let jsonData = { url };
|
|
121
|
-
if (params) {
|
|
122
|
-
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
|
123
|
-
}
|
|
124
|
-
try {
|
|
125
|
-
const response = yield this.postRequest(this.apiUrl + "/v0/crawl", jsonData, headers);
|
|
126
|
-
if (response.status === 200) {
|
|
127
|
-
const jobId = response.data.jobId;
|
|
128
|
-
if (waitUntilDone) {
|
|
129
|
-
return this.monitorJobStatus(jobId, headers, pollInterval);
|
|
130
|
-
}
|
|
131
|
-
else {
|
|
132
|
-
return { success: true, jobId };
|
|
133
|
-
}
|
|
134
|
-
}
|
|
135
|
-
else {
|
|
136
|
-
this.handleError(response, "start crawl job");
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
catch (error) {
|
|
140
|
-
console.log(error);
|
|
141
|
-
throw new Error(error.message);
|
|
142
|
-
}
|
|
143
|
-
return { success: false, error: "Internal server error." };
|
|
144
|
-
});
|
|
145
|
-
}
|
|
146
|
-
/**
|
|
147
|
-
* Checks the status of a crawl job using the Firecrawl API.
|
|
148
|
-
* @param {string} jobId - The job ID of the crawl operation.
|
|
149
|
-
* @returns {Promise<JobStatusResponse>} The response containing the job status.
|
|
150
|
-
*/
|
|
151
|
-
checkCrawlStatus(jobId) {
|
|
152
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
153
|
-
const headers = this.prepareHeaders();
|
|
154
|
-
try {
|
|
155
|
-
const response = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
|
|
156
|
-
if (response.status === 200) {
|
|
157
|
-
return {
|
|
158
|
-
success: true,
|
|
159
|
-
status: response.data.status,
|
|
160
|
-
data: response.data.data,
|
|
161
|
-
partial_data: !response.data.data
|
|
162
|
-
? response.data.partial_data
|
|
163
|
-
: undefined,
|
|
164
|
-
};
|
|
165
|
-
}
|
|
166
|
-
else {
|
|
167
|
-
this.handleError(response, "check crawl status");
|
|
168
|
-
}
|
|
169
|
-
}
|
|
170
|
-
catch (error) {
|
|
171
|
-
throw new Error(error.message);
|
|
172
|
-
}
|
|
173
|
-
return {
|
|
174
|
-
success: false,
|
|
175
|
-
status: "unknown",
|
|
176
|
-
error: "Internal server error.",
|
|
177
|
-
};
|
|
178
|
-
});
|
|
179
|
-
}
|
|
180
|
-
/**
|
|
181
|
-
* Prepares the headers for an API request.
|
|
182
|
-
* @returns {AxiosRequestHeaders} The prepared headers.
|
|
183
|
-
*/
|
|
184
|
-
prepareHeaders(idempotencyKey) {
|
|
185
|
-
return Object.assign({ "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}` }, (idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}));
|
|
186
|
-
}
|
|
187
|
-
/**
|
|
188
|
-
* Sends a POST request to the specified URL.
|
|
189
|
-
* @param {string} url - The URL to send the request to.
|
|
190
|
-
* @param {Params} data - The data to send in the request.
|
|
191
|
-
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
|
192
|
-
* @returns {Promise<AxiosResponse>} The response from the POST request.
|
|
193
|
-
*/
|
|
194
|
-
postRequest(url, data, headers) {
|
|
195
|
-
return axios.post(url, data, { headers });
|
|
196
|
-
}
|
|
197
|
-
/**
|
|
198
|
-
* Sends a GET request to the specified URL.
|
|
199
|
-
* @param {string} url - The URL to send the request to.
|
|
200
|
-
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
|
201
|
-
* @returns {Promise<AxiosResponse>} The response from the GET request.
|
|
202
|
-
*/
|
|
203
|
-
getRequest(url, headers) {
|
|
204
|
-
return axios.get(url, { headers });
|
|
205
|
-
}
|
|
206
|
-
/**
|
|
207
|
-
* Monitors the status of a crawl job until completion or failure.
|
|
208
|
-
* @param {string} jobId - The job ID of the crawl operation.
|
|
209
|
-
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
|
210
|
-
* @param {number} timeout - Timeout in seconds for job status checks.
|
|
211
|
-
* @returns {Promise<any>} The final job status or data.
|
|
212
|
-
*/
|
|
213
|
-
monitorJobStatus(jobId, headers, checkInterval) {
|
|
214
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
215
|
-
while (true) {
|
|
216
|
-
const statusResponse = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
|
|
217
|
-
if (statusResponse.status === 200) {
|
|
218
|
-
const statusData = statusResponse.data;
|
|
219
|
-
if (statusData.status === "completed") {
|
|
220
|
-
if ("data" in statusData) {
|
|
221
|
-
return statusData.data;
|
|
222
|
-
}
|
|
223
|
-
else {
|
|
224
|
-
throw new Error("Crawl job completed but no data was returned");
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
else if (["active", "paused", "pending", "queued"].includes(statusData.status)) {
|
|
228
|
-
if (checkInterval < 2) {
|
|
229
|
-
checkInterval = 2;
|
|
230
|
-
}
|
|
231
|
-
yield new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again
|
|
232
|
-
}
|
|
233
|
-
else {
|
|
234
|
-
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
else {
|
|
238
|
-
this.handleError(statusResponse, "check crawl status");
|
|
239
|
-
}
|
|
240
|
-
}
|
|
241
|
-
});
|
|
242
|
-
}
|
|
243
|
-
/**
|
|
244
|
-
* Handles errors from API responses.
|
|
245
|
-
* @param {AxiosResponse} response - The response from the API.
|
|
246
|
-
* @param {string} action - The action being performed when the error occurred.
|
|
247
|
-
*/
|
|
248
|
-
handleError(response, action) {
|
|
249
|
-
if ([402, 408, 409, 500].includes(response.status)) {
|
|
250
|
-
const errorMessage = response.data.error || "Unknown error occurred";
|
|
251
|
-
throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`);
|
|
252
|
-
}
|
|
253
|
-
else {
|
|
254
|
-
throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`);
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
|
-
}
|
package/build_and_publish.sh
DELETED
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
|
|
3
|
-
function build_and_publish {
|
|
4
|
-
PACKAGE_NAME=$1
|
|
5
|
-
|
|
6
|
-
# Replace placeholder with the package name in package.json
|
|
7
|
-
jq --arg name "$PACKAGE_NAME" '.name = $name' package.json > temp.json && mv temp.json package.json
|
|
8
|
-
|
|
9
|
-
# Debug: show modified state
|
|
10
|
-
echo "Modified package.json for $PACKAGE_NAME:"
|
|
11
|
-
cat package.json
|
|
12
|
-
|
|
13
|
-
# Publish the package using npm
|
|
14
|
-
npm publish
|
|
15
|
-
|
|
16
|
-
# Check if publish was successful
|
|
17
|
-
if [ $? -ne 0 ]; then
|
|
18
|
-
echo "Publish failed for $PACKAGE_NAME"
|
|
19
|
-
exit 1
|
|
20
|
-
fi
|
|
21
|
-
|
|
22
|
-
# Revert the changes to the original placeholder in package.json
|
|
23
|
-
jq '.name = "PLACEHOLDER_NAME"' package.json > temp.json && mv temp.json package.json
|
|
24
|
-
|
|
25
|
-
# Debug: show reverted state
|
|
26
|
-
echo "Reverted package.json to placeholder:"
|
|
27
|
-
cat package.json
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
# Build and publish the first package to npm
|
|
31
|
-
build_and_publish "@mendable/firecrawl-js"
|
|
32
|
-
|
|
33
|
-
# Build and publish the second package to npm
|
|
34
|
-
build_and_publish "firecrawl"
|