@mendable/firecrawl-js 1.2.2 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +410 -0
- package/dist/index.d.cts +264 -0
- package/{types → dist}/index.d.ts +38 -34
- package/dist/index.js +375 -0
- package/package.json +12 -14
- package/src/__tests__/v1/e2e_withAuth/index.test.ts +111 -88
- package/src/index.ts +62 -43
- package/tsconfig.json +19 -105
- package/tsup.config.ts +9 -0
- package/build/cjs/index.js +0 -354
- package/build/cjs/package.json +0 -1
- package/build/esm/index.js +0 -346
- package/build/esm/package.json +0 -1
|
@@ -1,12 +1,13 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import { TypedEventTarget } from
|
|
1
|
+
import { AxiosRequestHeaders, AxiosResponse } from 'axios';
|
|
2
|
+
import { ZodSchema, infer } from 'zod';
|
|
3
|
+
import { TypedEventTarget } from 'typescript-event-target';
|
|
4
|
+
|
|
4
5
|
/**
|
|
5
6
|
* Configuration interface for FirecrawlApp.
|
|
6
7
|
* @param apiKey - Optional API key for authentication.
|
|
7
8
|
* @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'.
|
|
8
9
|
*/
|
|
9
|
-
|
|
10
|
+
interface FirecrawlAppConfig {
|
|
10
11
|
apiKey?: string | null;
|
|
11
12
|
apiUrl?: string | null;
|
|
12
13
|
}
|
|
@@ -14,7 +15,7 @@ export interface FirecrawlAppConfig {
|
|
|
14
15
|
* Metadata for a Firecrawl document.
|
|
15
16
|
* Includes various optional properties for document metadata.
|
|
16
17
|
*/
|
|
17
|
-
|
|
18
|
+
interface FirecrawlDocumentMetadata {
|
|
18
19
|
title?: string;
|
|
19
20
|
description?: string;
|
|
20
21
|
language?: string;
|
|
@@ -53,13 +54,13 @@ export interface FirecrawlDocumentMetadata {
|
|
|
53
54
|
* Document interface for Firecrawl.
|
|
54
55
|
* Represents a document retrieved or processed by Firecrawl.
|
|
55
56
|
*/
|
|
56
|
-
|
|
57
|
+
interface FirecrawlDocument<T> {
|
|
57
58
|
url?: string;
|
|
58
59
|
markdown?: string;
|
|
59
60
|
html?: string;
|
|
60
61
|
rawHtml?: string;
|
|
61
62
|
links?: string[];
|
|
62
|
-
extract?:
|
|
63
|
+
extract?: T;
|
|
63
64
|
screenshot?: string;
|
|
64
65
|
metadata?: FirecrawlDocumentMetadata;
|
|
65
66
|
}
|
|
@@ -67,25 +68,27 @@ export interface FirecrawlDocument {
|
|
|
67
68
|
* Parameters for scraping operations.
|
|
68
69
|
* Defines the options and configurations available for scraping web content.
|
|
69
70
|
*/
|
|
70
|
-
|
|
71
|
+
interface CrawlScrapeOptions {
|
|
71
72
|
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract" | "full@scrennshot")[];
|
|
72
73
|
headers?: Record<string, string>;
|
|
73
74
|
includeTags?: string[];
|
|
74
75
|
excludeTags?: string[];
|
|
75
76
|
onlyMainContent?: boolean;
|
|
77
|
+
waitFor?: number;
|
|
78
|
+
timeout?: number;
|
|
79
|
+
}
|
|
80
|
+
interface ScrapeParams<LLMSchema extends ZodSchema> extends CrawlScrapeOptions {
|
|
76
81
|
extract?: {
|
|
77
82
|
prompt?: string;
|
|
78
|
-
schema?:
|
|
83
|
+
schema?: LLMSchema;
|
|
79
84
|
systemPrompt?: string;
|
|
80
85
|
};
|
|
81
|
-
waitFor?: number;
|
|
82
|
-
timeout?: number;
|
|
83
86
|
}
|
|
84
87
|
/**
|
|
85
88
|
* Response interface for scraping operations.
|
|
86
89
|
* Defines the structure of the response received after a scraping operation.
|
|
87
90
|
*/
|
|
88
|
-
|
|
91
|
+
interface ScrapeResponse<LLMResult> extends FirecrawlDocument<LLMResult> {
|
|
89
92
|
success: true;
|
|
90
93
|
warning?: string;
|
|
91
94
|
error?: string;
|
|
@@ -94,7 +97,7 @@ export interface ScrapeResponse extends FirecrawlDocument {
|
|
|
94
97
|
* Parameters for crawling operations.
|
|
95
98
|
* Includes options for both scraping and mapping during a crawl.
|
|
96
99
|
*/
|
|
97
|
-
|
|
100
|
+
interface CrawlParams {
|
|
98
101
|
includePaths?: string[];
|
|
99
102
|
excludePaths?: string[];
|
|
100
103
|
maxDepth?: number;
|
|
@@ -102,14 +105,14 @@ export interface CrawlParams {
|
|
|
102
105
|
allowBackwardLinks?: boolean;
|
|
103
106
|
allowExternalLinks?: boolean;
|
|
104
107
|
ignoreSitemap?: boolean;
|
|
105
|
-
scrapeOptions?:
|
|
108
|
+
scrapeOptions?: CrawlScrapeOptions;
|
|
106
109
|
webhook?: string;
|
|
107
110
|
}
|
|
108
111
|
/**
|
|
109
112
|
* Response interface for crawling operations.
|
|
110
113
|
* Defines the structure of the response received after initiating a crawl.
|
|
111
114
|
*/
|
|
112
|
-
|
|
115
|
+
interface CrawlResponse {
|
|
113
116
|
id?: string;
|
|
114
117
|
url?: string;
|
|
115
118
|
success: true;
|
|
@@ -119,22 +122,21 @@ export interface CrawlResponse {
|
|
|
119
122
|
* Response interface for job status checks.
|
|
120
123
|
* Provides detailed status of a crawl job including progress and results.
|
|
121
124
|
*/
|
|
122
|
-
|
|
125
|
+
interface CrawlStatusResponse {
|
|
123
126
|
success: true;
|
|
124
|
-
|
|
127
|
+
status: "scraping" | "completed" | "failed" | "cancelled";
|
|
125
128
|
completed: number;
|
|
129
|
+
total: number;
|
|
126
130
|
creditsUsed: number;
|
|
127
131
|
expiresAt: Date;
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
data?: FirecrawlDocument[];
|
|
131
|
-
error?: string;
|
|
132
|
+
next?: string;
|
|
133
|
+
data: FirecrawlDocument<undefined>[];
|
|
132
134
|
}
|
|
133
135
|
/**
|
|
134
136
|
* Parameters for mapping operations.
|
|
135
137
|
* Defines options for mapping URLs during a crawl.
|
|
136
138
|
*/
|
|
137
|
-
|
|
139
|
+
interface MapParams {
|
|
138
140
|
search?: string;
|
|
139
141
|
ignoreSitemap?: boolean;
|
|
140
142
|
includeSubdomains?: boolean;
|
|
@@ -144,7 +146,7 @@ export interface MapParams {
|
|
|
144
146
|
* Response interface for mapping operations.
|
|
145
147
|
* Defines the structure of the response received after a mapping operation.
|
|
146
148
|
*/
|
|
147
|
-
|
|
149
|
+
interface MapResponse {
|
|
148
150
|
success: true;
|
|
149
151
|
links?: string[];
|
|
150
152
|
error?: string;
|
|
@@ -153,7 +155,7 @@ export interface MapResponse {
|
|
|
153
155
|
* Error response interface.
|
|
154
156
|
* Defines the structure of the response received when an error occurs.
|
|
155
157
|
*/
|
|
156
|
-
|
|
158
|
+
interface ErrorResponse {
|
|
157
159
|
success: false;
|
|
158
160
|
error: string;
|
|
159
161
|
}
|
|
@@ -161,7 +163,7 @@ export interface ErrorResponse {
|
|
|
161
163
|
* Main class for interacting with the Firecrawl API.
|
|
162
164
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
163
165
|
*/
|
|
164
|
-
|
|
166
|
+
declare class FirecrawlApp {
|
|
165
167
|
apiKey: string;
|
|
166
168
|
apiUrl: string;
|
|
167
169
|
/**
|
|
@@ -175,7 +177,7 @@ export default class FirecrawlApp {
|
|
|
175
177
|
* @param params - Additional parameters for the scrape request.
|
|
176
178
|
* @returns The response from the scrape operation.
|
|
177
179
|
*/
|
|
178
|
-
scrapeUrl(url: string, params?: ScrapeParams): Promise<ScrapeResponse | ErrorResponse>;
|
|
180
|
+
scrapeUrl<T extends ZodSchema>(url: string, params?: ScrapeParams<T>): Promise<ScrapeResponse<infer<T>> | ErrorResponse>;
|
|
179
181
|
/**
|
|
180
182
|
* This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
|
|
181
183
|
* @param query - The search query string.
|
|
@@ -196,9 +198,10 @@ export default class FirecrawlApp {
|
|
|
196
198
|
/**
|
|
197
199
|
* Checks the status of a crawl job using the Firecrawl API.
|
|
198
200
|
* @param id - The ID of the crawl operation.
|
|
201
|
+
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
|
199
202
|
* @returns The response containing the job status.
|
|
200
203
|
*/
|
|
201
|
-
checkCrawlStatus(id?: string): Promise<CrawlStatusResponse | ErrorResponse>;
|
|
204
|
+
checkCrawlStatus(id?: string, getAllData?: boolean): Promise<CrawlStatusResponse | ErrorResponse>;
|
|
202
205
|
crawlUrlAndWatch(url: string, params?: CrawlParams, idempotencyKey?: string): Promise<CrawlWatcher>;
|
|
203
206
|
mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse>;
|
|
204
207
|
/**
|
|
@@ -230,7 +233,7 @@ export default class FirecrawlApp {
|
|
|
230
233
|
* @param checkUrl - Optional URL to check the status (used for v1 API)
|
|
231
234
|
* @returns The final job status or data.
|
|
232
235
|
*/
|
|
233
|
-
monitorJobStatus(id: string, headers: AxiosRequestHeaders, checkInterval: number): Promise<CrawlStatusResponse>;
|
|
236
|
+
monitorJobStatus(id: string, headers: AxiosRequestHeaders, checkInterval: number): Promise<CrawlStatusResponse | ErrorResponse>;
|
|
234
237
|
/**
|
|
235
238
|
* Handles errors from API responses.
|
|
236
239
|
* @param {AxiosResponse} response - The response from the API.
|
|
@@ -239,22 +242,23 @@ export default class FirecrawlApp {
|
|
|
239
242
|
handleError(response: AxiosResponse, action: string): void;
|
|
240
243
|
}
|
|
241
244
|
interface CrawlWatcherEvents {
|
|
242
|
-
document: CustomEvent<FirecrawlDocument
|
|
245
|
+
document: CustomEvent<FirecrawlDocument<undefined>>;
|
|
243
246
|
done: CustomEvent<{
|
|
244
247
|
status: CrawlStatusResponse["status"];
|
|
245
|
-
data: FirecrawlDocument[];
|
|
248
|
+
data: FirecrawlDocument<undefined>[];
|
|
246
249
|
}>;
|
|
247
250
|
error: CustomEvent<{
|
|
248
251
|
status: CrawlStatusResponse["status"];
|
|
249
|
-
data: FirecrawlDocument[];
|
|
252
|
+
data: FirecrawlDocument<undefined>[];
|
|
250
253
|
error: string;
|
|
251
254
|
}>;
|
|
252
255
|
}
|
|
253
|
-
|
|
256
|
+
declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
|
254
257
|
private ws;
|
|
255
|
-
data: FirecrawlDocument[];
|
|
258
|
+
data: FirecrawlDocument<undefined>[];
|
|
256
259
|
status: CrawlStatusResponse["status"];
|
|
257
260
|
constructor(id: string, app: FirecrawlApp);
|
|
258
261
|
close(): void;
|
|
259
262
|
}
|
|
260
|
-
|
|
263
|
+
|
|
264
|
+
export { type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, FirecrawlApp as default };
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
// src/index.ts
|
|
2
|
+
import axios from "axios";
|
|
3
|
+
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
4
|
+
import { WebSocket } from "isows";
|
|
5
|
+
import { TypedEventTarget } from "typescript-event-target";
|
|
6
|
+
var FirecrawlApp = class {
|
|
7
|
+
apiKey;
|
|
8
|
+
apiUrl;
|
|
9
|
+
/**
|
|
10
|
+
* Initializes a new instance of the FirecrawlApp class.
|
|
11
|
+
* @param config - Configuration options for the FirecrawlApp instance.
|
|
12
|
+
*/
|
|
13
|
+
constructor({ apiKey = null, apiUrl = null }) {
|
|
14
|
+
if (typeof apiKey !== "string") {
|
|
15
|
+
throw new Error("No API key provided");
|
|
16
|
+
}
|
|
17
|
+
this.apiKey = apiKey;
|
|
18
|
+
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Scrapes a URL using the Firecrawl API.
|
|
22
|
+
* @param url - The URL to scrape.
|
|
23
|
+
* @param params - Additional parameters for the scrape request.
|
|
24
|
+
* @returns The response from the scrape operation.
|
|
25
|
+
*/
|
|
26
|
+
async scrapeUrl(url, params) {
|
|
27
|
+
const headers = {
|
|
28
|
+
"Content-Type": "application/json",
|
|
29
|
+
Authorization: `Bearer ${this.apiKey}`
|
|
30
|
+
};
|
|
31
|
+
let jsonData = { url, ...params };
|
|
32
|
+
if (jsonData?.extract?.schema) {
|
|
33
|
+
let schema = jsonData.extract.schema;
|
|
34
|
+
try {
|
|
35
|
+
schema = zodToJsonSchema(schema);
|
|
36
|
+
} catch (error) {
|
|
37
|
+
}
|
|
38
|
+
jsonData = {
|
|
39
|
+
...jsonData,
|
|
40
|
+
extract: {
|
|
41
|
+
...jsonData.extract,
|
|
42
|
+
schema
|
|
43
|
+
}
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
try {
|
|
47
|
+
const response = await axios.post(
|
|
48
|
+
this.apiUrl + `/v1/scrape`,
|
|
49
|
+
jsonData,
|
|
50
|
+
{ headers }
|
|
51
|
+
);
|
|
52
|
+
if (response.status === 200) {
|
|
53
|
+
const responseData = response.data;
|
|
54
|
+
if (responseData.success) {
|
|
55
|
+
return {
|
|
56
|
+
success: true,
|
|
57
|
+
warning: responseData.warning,
|
|
58
|
+
error: responseData.error,
|
|
59
|
+
...responseData.data
|
|
60
|
+
};
|
|
61
|
+
} else {
|
|
62
|
+
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
|
63
|
+
}
|
|
64
|
+
} else {
|
|
65
|
+
this.handleError(response, "scrape URL");
|
|
66
|
+
}
|
|
67
|
+
} catch (error) {
|
|
68
|
+
throw new Error(error.message);
|
|
69
|
+
}
|
|
70
|
+
return { success: false, error: "Internal server error." };
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
|
|
74
|
+
* @param query - The search query string.
|
|
75
|
+
* @param params - Additional parameters for the search.
|
|
76
|
+
* @returns Throws an error advising to use version 0 of the API.
|
|
77
|
+
*/
|
|
78
|
+
async search(query, params) {
|
|
79
|
+
throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
83
|
+
* @param url - The URL to crawl.
|
|
84
|
+
* @param params - Additional parameters for the crawl request.
|
|
85
|
+
* @param pollInterval - Time in seconds for job status checks.
|
|
86
|
+
* @param idempotencyKey - Optional idempotency key for the request.
|
|
87
|
+
* @returns The response from the crawl operation.
|
|
88
|
+
*/
|
|
89
|
+
async crawlUrl(url, params, pollInterval = 2, idempotencyKey) {
|
|
90
|
+
const headers = this.prepareHeaders(idempotencyKey);
|
|
91
|
+
let jsonData = { url, ...params };
|
|
92
|
+
try {
|
|
93
|
+
const response = await this.postRequest(
|
|
94
|
+
this.apiUrl + `/v1/crawl`,
|
|
95
|
+
jsonData,
|
|
96
|
+
headers
|
|
97
|
+
);
|
|
98
|
+
if (response.status === 200) {
|
|
99
|
+
const id = response.data.id;
|
|
100
|
+
return this.monitorJobStatus(id, headers, pollInterval);
|
|
101
|
+
} else {
|
|
102
|
+
this.handleError(response, "start crawl job");
|
|
103
|
+
}
|
|
104
|
+
} catch (error) {
|
|
105
|
+
if (error.response?.data?.error) {
|
|
106
|
+
throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`);
|
|
107
|
+
} else {
|
|
108
|
+
throw new Error(error.message);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return { success: false, error: "Internal server error." };
|
|
112
|
+
}
|
|
113
|
+
async asyncCrawlUrl(url, params, idempotencyKey) {
|
|
114
|
+
const headers = this.prepareHeaders(idempotencyKey);
|
|
115
|
+
let jsonData = { url, ...params };
|
|
116
|
+
try {
|
|
117
|
+
const response = await this.postRequest(
|
|
118
|
+
this.apiUrl + `/v1/crawl`,
|
|
119
|
+
jsonData,
|
|
120
|
+
headers
|
|
121
|
+
);
|
|
122
|
+
if (response.status === 200) {
|
|
123
|
+
return response.data;
|
|
124
|
+
} else {
|
|
125
|
+
this.handleError(response, "start crawl job");
|
|
126
|
+
}
|
|
127
|
+
} catch (error) {
|
|
128
|
+
if (error.response?.data?.error) {
|
|
129
|
+
throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`);
|
|
130
|
+
} else {
|
|
131
|
+
throw new Error(error.message);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
return { success: false, error: "Internal server error." };
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* Checks the status of a crawl job using the Firecrawl API.
|
|
138
|
+
* @param id - The ID of the crawl operation.
|
|
139
|
+
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
|
140
|
+
* @returns The response containing the job status.
|
|
141
|
+
*/
|
|
142
|
+
async checkCrawlStatus(id, getAllData = false) {
|
|
143
|
+
if (!id) {
|
|
144
|
+
throw new Error("No crawl ID provided");
|
|
145
|
+
}
|
|
146
|
+
const headers = this.prepareHeaders();
|
|
147
|
+
try {
|
|
148
|
+
const response = await this.getRequest(
|
|
149
|
+
`${this.apiUrl}/v1/crawl/${id}`,
|
|
150
|
+
headers
|
|
151
|
+
);
|
|
152
|
+
if (response.status === 200) {
|
|
153
|
+
let allData = response.data.data;
|
|
154
|
+
if (getAllData && response.data.status === "completed") {
|
|
155
|
+
let statusData = response.data;
|
|
156
|
+
if ("data" in statusData) {
|
|
157
|
+
let data = statusData.data;
|
|
158
|
+
while ("next" in statusData) {
|
|
159
|
+
statusData = (await this.getRequest(statusData.next, headers)).data;
|
|
160
|
+
data = data.concat(statusData.data);
|
|
161
|
+
}
|
|
162
|
+
allData = data;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
return {
|
|
166
|
+
success: response.data.success,
|
|
167
|
+
status: response.data.status,
|
|
168
|
+
total: response.data.total,
|
|
169
|
+
completed: response.data.completed,
|
|
170
|
+
creditsUsed: response.data.creditsUsed,
|
|
171
|
+
expiresAt: new Date(response.data.expiresAt),
|
|
172
|
+
next: response.data.next,
|
|
173
|
+
data: allData,
|
|
174
|
+
error: response.data.error
|
|
175
|
+
};
|
|
176
|
+
} else {
|
|
177
|
+
this.handleError(response, "check crawl status");
|
|
178
|
+
}
|
|
179
|
+
} catch (error) {
|
|
180
|
+
throw new Error(error.message);
|
|
181
|
+
}
|
|
182
|
+
return { success: false, error: "Internal server error." };
|
|
183
|
+
}
|
|
184
|
+
async crawlUrlAndWatch(url, params, idempotencyKey) {
|
|
185
|
+
const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey);
|
|
186
|
+
if (crawl.success && crawl.id) {
|
|
187
|
+
const id = crawl.id;
|
|
188
|
+
return new CrawlWatcher(id, this);
|
|
189
|
+
}
|
|
190
|
+
throw new Error("Crawl job failed to start");
|
|
191
|
+
}
|
|
192
|
+
async mapUrl(url, params) {
|
|
193
|
+
const headers = this.prepareHeaders();
|
|
194
|
+
let jsonData = { url, ...params };
|
|
195
|
+
try {
|
|
196
|
+
const response = await this.postRequest(
|
|
197
|
+
this.apiUrl + `/v1/map`,
|
|
198
|
+
jsonData,
|
|
199
|
+
headers
|
|
200
|
+
);
|
|
201
|
+
if (response.status === 200) {
|
|
202
|
+
return response.data;
|
|
203
|
+
} else {
|
|
204
|
+
this.handleError(response, "map");
|
|
205
|
+
}
|
|
206
|
+
} catch (error) {
|
|
207
|
+
throw new Error(error.message);
|
|
208
|
+
}
|
|
209
|
+
return { success: false, error: "Internal server error." };
|
|
210
|
+
}
|
|
211
|
+
/**
|
|
212
|
+
* Prepares the headers for an API request.
|
|
213
|
+
* @param idempotencyKey - Optional key to ensure idempotency.
|
|
214
|
+
* @returns The prepared headers.
|
|
215
|
+
*/
|
|
216
|
+
prepareHeaders(idempotencyKey) {
|
|
217
|
+
return {
|
|
218
|
+
"Content-Type": "application/json",
|
|
219
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
220
|
+
...idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
/**
|
|
224
|
+
* Sends a POST request to the specified URL.
|
|
225
|
+
* @param url - The URL to send the request to.
|
|
226
|
+
* @param data - The data to send in the request.
|
|
227
|
+
* @param headers - The headers for the request.
|
|
228
|
+
* @returns The response from the POST request.
|
|
229
|
+
*/
|
|
230
|
+
postRequest(url, data, headers) {
|
|
231
|
+
return axios.post(url, data, { headers });
|
|
232
|
+
}
|
|
233
|
+
/**
|
|
234
|
+
* Sends a GET request to the specified URL.
|
|
235
|
+
* @param url - The URL to send the request to.
|
|
236
|
+
* @param headers - The headers for the request.
|
|
237
|
+
* @returns The response from the GET request.
|
|
238
|
+
*/
|
|
239
|
+
getRequest(url, headers) {
|
|
240
|
+
return axios.get(url, { headers });
|
|
241
|
+
}
|
|
242
|
+
/**
|
|
243
|
+
* Monitors the status of a crawl job until completion or failure.
|
|
244
|
+
* @param id - The ID of the crawl operation.
|
|
245
|
+
* @param headers - The headers for the request.
|
|
246
|
+
* @param checkInterval - Interval in seconds for job status checks.
|
|
247
|
+
* @param checkUrl - Optional URL to check the status (used for v1 API)
|
|
248
|
+
* @returns The final job status or data.
|
|
249
|
+
*/
|
|
250
|
+
async monitorJobStatus(id, headers, checkInterval) {
|
|
251
|
+
while (true) {
|
|
252
|
+
let statusResponse = await this.getRequest(
|
|
253
|
+
`${this.apiUrl}/v1/crawl/${id}`,
|
|
254
|
+
headers
|
|
255
|
+
);
|
|
256
|
+
if (statusResponse.status === 200) {
|
|
257
|
+
let statusData = statusResponse.data;
|
|
258
|
+
if (statusData.status === "completed") {
|
|
259
|
+
if ("data" in statusData) {
|
|
260
|
+
let data = statusData.data;
|
|
261
|
+
while ("next" in statusData) {
|
|
262
|
+
statusResponse = await this.getRequest(statusData.next, headers);
|
|
263
|
+
statusData = statusResponse.data;
|
|
264
|
+
data = data.concat(statusData.data);
|
|
265
|
+
}
|
|
266
|
+
statusData.data = data;
|
|
267
|
+
return statusData;
|
|
268
|
+
} else {
|
|
269
|
+
throw new Error("Crawl job completed but no data was returned");
|
|
270
|
+
}
|
|
271
|
+
} else if (["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)) {
|
|
272
|
+
checkInterval = Math.max(checkInterval, 2);
|
|
273
|
+
await new Promise(
|
|
274
|
+
(resolve) => setTimeout(resolve, checkInterval * 1e3)
|
|
275
|
+
);
|
|
276
|
+
} else {
|
|
277
|
+
throw new Error(
|
|
278
|
+
`Crawl job failed or was stopped. Status: ${statusData.status}`
|
|
279
|
+
);
|
|
280
|
+
}
|
|
281
|
+
} else {
|
|
282
|
+
this.handleError(statusResponse, "check crawl status");
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
/**
|
|
287
|
+
* Handles errors from API responses.
|
|
288
|
+
* @param {AxiosResponse} response - The response from the API.
|
|
289
|
+
* @param {string} action - The action being performed when the error occurred.
|
|
290
|
+
*/
|
|
291
|
+
handleError(response, action) {
|
|
292
|
+
if ([402, 408, 409, 500].includes(response.status)) {
|
|
293
|
+
const errorMessage = response.data.error || "Unknown error occurred";
|
|
294
|
+
throw new Error(
|
|
295
|
+
`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`
|
|
296
|
+
);
|
|
297
|
+
} else {
|
|
298
|
+
throw new Error(
|
|
299
|
+
`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`
|
|
300
|
+
);
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
};
|
|
304
|
+
var CrawlWatcher = class extends TypedEventTarget {
|
|
305
|
+
ws;
|
|
306
|
+
data;
|
|
307
|
+
status;
|
|
308
|
+
constructor(id, app) {
|
|
309
|
+
super();
|
|
310
|
+
this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
|
|
311
|
+
this.status = "scraping";
|
|
312
|
+
this.data = [];
|
|
313
|
+
const messageHandler = (msg) => {
|
|
314
|
+
if (msg.type === "done") {
|
|
315
|
+
this.status = "completed";
|
|
316
|
+
this.dispatchTypedEvent("done", new CustomEvent("done", {
|
|
317
|
+
detail: {
|
|
318
|
+
status: this.status,
|
|
319
|
+
data: this.data
|
|
320
|
+
}
|
|
321
|
+
}));
|
|
322
|
+
} else if (msg.type === "error") {
|
|
323
|
+
this.status = "failed";
|
|
324
|
+
this.dispatchTypedEvent("error", new CustomEvent("error", {
|
|
325
|
+
detail: {
|
|
326
|
+
status: this.status,
|
|
327
|
+
data: this.data,
|
|
328
|
+
error: msg.error
|
|
329
|
+
}
|
|
330
|
+
}));
|
|
331
|
+
} else if (msg.type === "catchup") {
|
|
332
|
+
this.status = msg.data.status;
|
|
333
|
+
this.data.push(...msg.data.data ?? []);
|
|
334
|
+
for (const doc of this.data) {
|
|
335
|
+
this.dispatchTypedEvent("document", new CustomEvent("document", {
|
|
336
|
+
detail: doc
|
|
337
|
+
}));
|
|
338
|
+
}
|
|
339
|
+
} else if (msg.type === "document") {
|
|
340
|
+
this.dispatchTypedEvent("document", new CustomEvent("document", {
|
|
341
|
+
detail: msg.data
|
|
342
|
+
}));
|
|
343
|
+
}
|
|
344
|
+
};
|
|
345
|
+
this.ws.onmessage = ((ev) => {
|
|
346
|
+
if (typeof ev.data !== "string") {
|
|
347
|
+
this.ws.close();
|
|
348
|
+
return;
|
|
349
|
+
}
|
|
350
|
+
const msg = JSON.parse(ev.data);
|
|
351
|
+
messageHandler(msg);
|
|
352
|
+
}).bind(this);
|
|
353
|
+
this.ws.onclose = ((ev) => {
|
|
354
|
+
const msg = JSON.parse(ev.reason);
|
|
355
|
+
messageHandler(msg);
|
|
356
|
+
}).bind(this);
|
|
357
|
+
this.ws.onerror = ((_) => {
|
|
358
|
+
this.status = "failed";
|
|
359
|
+
this.dispatchTypedEvent("error", new CustomEvent("error", {
|
|
360
|
+
detail: {
|
|
361
|
+
status: this.status,
|
|
362
|
+
data: this.data,
|
|
363
|
+
error: "WebSocket error"
|
|
364
|
+
}
|
|
365
|
+
}));
|
|
366
|
+
}).bind(this);
|
|
367
|
+
}
|
|
368
|
+
close() {
|
|
369
|
+
this.ws.close();
|
|
370
|
+
}
|
|
371
|
+
};
|
|
372
|
+
export {
|
|
373
|
+
CrawlWatcher,
|
|
374
|
+
FirecrawlApp as default
|
|
375
|
+
};
|
package/package.json
CHANGED
|
@@ -1,22 +1,19 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mendable/firecrawl-js",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.4.0",
|
|
4
4
|
"description": "JavaScript SDK for Firecrawl API",
|
|
5
|
-
"main": "
|
|
6
|
-
"types": "
|
|
7
|
-
"type": "module",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
8
7
|
"exports": {
|
|
9
|
-
"
|
|
10
|
-
|
|
11
|
-
"
|
|
12
|
-
|
|
13
|
-
"import": {
|
|
14
|
-
"types": "./types/index.d.ts",
|
|
15
|
-
"default": "./build/esm/index.js"
|
|
8
|
+
"./package.json": "./package.json",
|
|
9
|
+
".": {
|
|
10
|
+
"import": "./dist/index.js",
|
|
11
|
+
"default": "./dist/index.cjs"
|
|
16
12
|
}
|
|
17
13
|
},
|
|
14
|
+
"type": "module",
|
|
18
15
|
"scripts": {
|
|
19
|
-
"build": "
|
|
16
|
+
"build": "tsup",
|
|
20
17
|
"build-and-publish": "npm run build && npm publish --access public",
|
|
21
18
|
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
|
22
19
|
"test": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts"
|
|
@@ -29,10 +26,8 @@
|
|
|
29
26
|
"license": "MIT",
|
|
30
27
|
"dependencies": {
|
|
31
28
|
"axios": "^1.6.8",
|
|
32
|
-
"dotenv": "^16.4.5",
|
|
33
29
|
"isows": "^1.0.4",
|
|
34
30
|
"typescript-event-target": "^1.1.1",
|
|
35
|
-
"uuid": "^9.0.1",
|
|
36
31
|
"zod": "^3.23.8",
|
|
37
32
|
"zod-to-json-schema": "^3.23.0"
|
|
38
33
|
},
|
|
@@ -41,6 +36,8 @@
|
|
|
41
36
|
},
|
|
42
37
|
"homepage": "https://github.com/mendableai/firecrawl#readme",
|
|
43
38
|
"devDependencies": {
|
|
39
|
+
"uuid": "^9.0.1",
|
|
40
|
+
"dotenv": "^16.4.5",
|
|
44
41
|
"@jest/globals": "^29.7.0",
|
|
45
42
|
"@types/axios": "^0.14.0",
|
|
46
43
|
"@types/dotenv": "^8.2.0",
|
|
@@ -50,6 +47,7 @@
|
|
|
50
47
|
"@types/uuid": "^9.0.8",
|
|
51
48
|
"jest": "^29.7.0",
|
|
52
49
|
"ts-jest": "^29.2.2",
|
|
50
|
+
"tsup": "^8.2.4",
|
|
53
51
|
"typescript": "^5.4.5"
|
|
54
52
|
},
|
|
55
53
|
"keywords": [
|