firecrawl 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +410 -0
- package/dist/index.d.cts +262 -0
- package/{types → dist}/index.d.ts +28 -25
- package/dist/index.js +375 -0
- package/package.json +12 -14
- package/src/__tests__/v1/e2e_withAuth/index.test.ts +111 -88
- package/src/index.ts +49 -25
- package/tsconfig.json +19 -105
- package/tsup.config.ts +9 -0
- package/build/cjs/index.js +0 -347
- package/build/cjs/package.json +0 -1
- package/build/esm/index.js +0 -339
- package/build/esm/package.json +0 -1
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import FirecrawlApp, { CrawlParams, CrawlResponse, CrawlStatusResponse, MapResponse,
|
|
1
|
+
import FirecrawlApp, { type CrawlParams, type CrawlResponse, type CrawlStatusResponse, type MapResponse, type ScrapeResponse } from '../../../index';
|
|
2
2
|
import { v4 as uuidv4 } from 'uuid';
|
|
3
3
|
import dotenv from 'dotenv';
|
|
4
4
|
import { describe, test, expect } from '@jest/globals';
|
|
@@ -6,7 +6,7 @@ import { describe, test, expect } from '@jest/globals';
|
|
|
6
6
|
dotenv.config();
|
|
7
7
|
|
|
8
8
|
const TEST_API_KEY = process.env.TEST_API_KEY;
|
|
9
|
-
const API_URL = "
|
|
9
|
+
const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev";
|
|
10
10
|
|
|
11
11
|
describe('FirecrawlApp E2E Tests', () => {
|
|
12
12
|
test.concurrent('should throw error for no API key', async () => {
|
|
@@ -71,6 +71,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
71
71
|
expect(response.links?.length).toBeGreaterThan(0);
|
|
72
72
|
expect(response.links?.[0]).toContain("https://");
|
|
73
73
|
expect(response.metadata).not.toBeNull();
|
|
74
|
+
expect(response.metadata).not.toBeUndefined();
|
|
74
75
|
expect(response.metadata).toHaveProperty("title");
|
|
75
76
|
expect(response.metadata).toHaveProperty("description");
|
|
76
77
|
expect(response.metadata).toHaveProperty("keywords");
|
|
@@ -85,19 +86,21 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
85
86
|
expect(response.metadata).not.toHaveProperty("pageStatusCode");
|
|
86
87
|
expect(response.metadata).toHaveProperty("statusCode");
|
|
87
88
|
expect(response.metadata).not.toHaveProperty("pageError");
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
89
|
+
if (response.metadata !== undefined) {
|
|
90
|
+
expect(response.metadata.error).toBeUndefined();
|
|
91
|
+
expect(response.metadata.title).toBe("Roast My Website");
|
|
92
|
+
expect(response.metadata.description).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
|
|
93
|
+
expect(response.metadata.keywords).toBe("Roast My Website,Roast,Website,GitHub,Firecrawl");
|
|
94
|
+
expect(response.metadata.robots).toBe("follow, index");
|
|
95
|
+
expect(response.metadata.ogTitle).toBe("Roast My Website");
|
|
96
|
+
expect(response.metadata.ogDescription).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
|
|
97
|
+
expect(response.metadata.ogUrl).toBe("https://www.roastmywebsite.ai");
|
|
98
|
+
expect(response.metadata.ogImage).toBe("https://www.roastmywebsite.ai/og.png");
|
|
99
|
+
expect(response.metadata.ogLocaleAlternate).toStrictEqual([]);
|
|
100
|
+
expect(response.metadata.ogSiteName).toBe("Roast My Website");
|
|
101
|
+
expect(response.metadata.sourceURL).toBe("https://roastmywebsite.ai");
|
|
102
|
+
expect(response.metadata.statusCode).toBe(200);
|
|
103
|
+
}
|
|
101
104
|
}, 30000); // 30 seconds timeout
|
|
102
105
|
|
|
103
106
|
test.concurrent('should return successful response for valid scrape with PDF file', async () => {
|
|
@@ -127,7 +130,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
127
130
|
|
|
128
131
|
test.concurrent('should return successful response for crawl and wait for completion', async () => {
|
|
129
132
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
130
|
-
const response = await app.crawlUrl('https://roastmywebsite.ai', {},
|
|
133
|
+
const response = await app.crawlUrl('https://roastmywebsite.ai', {}, 30) as CrawlStatusResponse;
|
|
131
134
|
expect(response).not.toBeNull();
|
|
132
135
|
expect(response).toHaveProperty("total");
|
|
133
136
|
expect(response.total).toBeGreaterThan(0);
|
|
@@ -138,21 +141,25 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
138
141
|
expect(response).toHaveProperty("status");
|
|
139
142
|
expect(response.status).toBe("completed");
|
|
140
143
|
expect(response).not.toHaveProperty("next"); // wait until done
|
|
141
|
-
expect(response.data
|
|
142
|
-
expect(response.data
|
|
143
|
-
expect(response.data
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
144
|
+
expect(response.data.length).toBeGreaterThan(0);
|
|
145
|
+
expect(response.data[0]).not.toBeNull();
|
|
146
|
+
expect(response.data[0]).not.toBeUndefined();
|
|
147
|
+
if (response.data[0]) {
|
|
148
|
+
expect(response.data[0]).toHaveProperty("markdown");
|
|
149
|
+
expect(response.data[0].markdown).toContain("_Roast_");
|
|
150
|
+
expect(response.data[0]).not.toHaveProperty('content'); // v0
|
|
151
|
+
expect(response.data[0]).not.toHaveProperty("html");
|
|
152
|
+
expect(response.data[0]).not.toHaveProperty("rawHtml");
|
|
153
|
+
expect(response.data[0]).not.toHaveProperty("screenshot");
|
|
154
|
+
expect(response.data[0]).not.toHaveProperty("links");
|
|
155
|
+
expect(response.data[0]).toHaveProperty("metadata");
|
|
156
|
+
expect(response.data[0].metadata).toHaveProperty("title");
|
|
157
|
+
expect(response.data[0].metadata).toHaveProperty("description");
|
|
158
|
+
expect(response.data[0].metadata).toHaveProperty("language");
|
|
159
|
+
expect(response.data[0].metadata).toHaveProperty("sourceURL");
|
|
160
|
+
expect(response.data[0].metadata).toHaveProperty("statusCode");
|
|
161
|
+
expect(response.data[0].metadata).not.toHaveProperty("error");
|
|
162
|
+
}
|
|
156
163
|
}, 60000); // 60 seconds timeout
|
|
157
164
|
|
|
158
165
|
test.concurrent('should return successful response for crawl with options and wait for completion', async () => {
|
|
@@ -173,7 +180,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
173
180
|
onlyMainContent: true,
|
|
174
181
|
waitFor: 1000
|
|
175
182
|
}
|
|
176
|
-
} as CrawlParams,
|
|
183
|
+
} as CrawlParams, 30) as CrawlStatusResponse;
|
|
177
184
|
expect(response).not.toBeNull();
|
|
178
185
|
expect(response).toHaveProperty("total");
|
|
179
186
|
expect(response.total).toBeGreaterThan(0);
|
|
@@ -184,41 +191,45 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
184
191
|
expect(response).toHaveProperty("status");
|
|
185
192
|
expect(response.status).toBe("completed");
|
|
186
193
|
expect(response).not.toHaveProperty("next");
|
|
187
|
-
expect(response.data
|
|
188
|
-
expect(response.data
|
|
189
|
-
expect(response.data
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
194
|
+
expect(response.data.length).toBeGreaterThan(0);
|
|
195
|
+
expect(response.data[0]).not.toBeNull();
|
|
196
|
+
expect(response.data[0]).not.toBeUndefined();
|
|
197
|
+
if (response.data[0]) {
|
|
198
|
+
expect(response.data[0]).toHaveProperty("markdown");
|
|
199
|
+
expect(response.data[0].markdown).toContain("_Roast_");
|
|
200
|
+
expect(response.data[0]).not.toHaveProperty('content'); // v0
|
|
201
|
+
expect(response.data[0]).toHaveProperty("html");
|
|
202
|
+
expect(response.data[0].html).toContain("<h1");
|
|
203
|
+
expect(response.data[0]).toHaveProperty("rawHtml");
|
|
204
|
+
expect(response.data[0].rawHtml).toContain("<h1");
|
|
205
|
+
expect(response.data[0]).toHaveProperty("screenshot");
|
|
206
|
+
expect(response.data[0].screenshot).toContain("https://");
|
|
207
|
+
expect(response.data[0]).toHaveProperty("links");
|
|
208
|
+
expect(response.data[0].links).not.toBeNull();
|
|
209
|
+
expect(response.data[0].links?.length).toBeGreaterThan(0);
|
|
210
|
+
expect(response.data[0]).toHaveProperty("metadata");
|
|
211
|
+
expect(response.data[0].metadata).toHaveProperty("title");
|
|
212
|
+
expect(response.data[0].metadata).toHaveProperty("description");
|
|
213
|
+
expect(response.data[0].metadata).toHaveProperty("language");
|
|
214
|
+
expect(response.data[0].metadata).toHaveProperty("sourceURL");
|
|
215
|
+
expect(response.data[0].metadata).toHaveProperty("statusCode");
|
|
216
|
+
expect(response.data[0].metadata).not.toHaveProperty("error");
|
|
217
|
+
}
|
|
207
218
|
}, 60000); // 60 seconds timeout
|
|
208
219
|
|
|
209
220
|
test.concurrent('should handle idempotency key for crawl', async () => {
|
|
210
221
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
211
222
|
const uniqueIdempotencyKey = uuidv4();
|
|
212
|
-
const response = await app.
|
|
223
|
+
const response = await app.asyncCrawlUrl('https://roastmywebsite.ai', {}, uniqueIdempotencyKey) as CrawlResponse;
|
|
213
224
|
expect(response).not.toBeNull();
|
|
214
225
|
expect(response.id).toBeDefined();
|
|
215
226
|
|
|
216
|
-
await expect(app.crawlUrl('https://roastmywebsite.ai', {},
|
|
227
|
+
await expect(app.crawlUrl('https://roastmywebsite.ai', {}, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
|
|
217
228
|
});
|
|
218
229
|
|
|
219
230
|
test.concurrent('should check crawl status', async () => {
|
|
220
231
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
221
|
-
const response = await app.
|
|
232
|
+
const response = await app.asyncCrawlUrl('https://firecrawl.dev', { scrapeOptions: { formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links']}} as CrawlParams) as CrawlResponse;
|
|
222
233
|
expect(response).not.toBeNull();
|
|
223
234
|
expect(response.id).toBeDefined();
|
|
224
235
|
|
|
@@ -226,7 +237,8 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
226
237
|
const maxChecks = 15;
|
|
227
238
|
let checks = 0;
|
|
228
239
|
|
|
229
|
-
|
|
240
|
+
expect(statusResponse.success).toBe(true);
|
|
241
|
+
while ((statusResponse as any).status === 'scraping' && checks < maxChecks) {
|
|
230
242
|
await new Promise(resolve => setTimeout(resolve, 5000));
|
|
231
243
|
expect(statusResponse).not.toHaveProperty("partial_data"); // v0
|
|
232
244
|
expect(statusResponse).not.toHaveProperty("current"); // v0
|
|
@@ -236,44 +248,55 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
236
248
|
expect(statusResponse).toHaveProperty("expiresAt");
|
|
237
249
|
expect(statusResponse).toHaveProperty("status");
|
|
238
250
|
expect(statusResponse).toHaveProperty("next");
|
|
239
|
-
expect(statusResponse.
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
251
|
+
expect(statusResponse.success).toBe(true);
|
|
252
|
+
if (statusResponse.success === true) {
|
|
253
|
+
expect(statusResponse.total).toBeGreaterThan(0);
|
|
254
|
+
expect(statusResponse.creditsUsed).toBeGreaterThan(0);
|
|
255
|
+
expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
|
|
256
|
+
expect(statusResponse.status).toBe("scraping");
|
|
257
|
+
expect(statusResponse.next).toContain("/v1/crawl/");
|
|
258
|
+
}
|
|
244
259
|
statusResponse = await app.checkCrawlStatus(response.id) as CrawlStatusResponse;
|
|
260
|
+
expect(statusResponse.success).toBe(true);
|
|
245
261
|
checks++;
|
|
246
262
|
}
|
|
247
263
|
|
|
248
264
|
expect(statusResponse).not.toBeNull();
|
|
249
265
|
expect(statusResponse).toHaveProperty("total");
|
|
250
|
-
expect(statusResponse.
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
266
|
+
expect(statusResponse.success).toBe(true);
|
|
267
|
+
if (statusResponse.success === true) {
|
|
268
|
+
expect(statusResponse.total).toBeGreaterThan(0);
|
|
269
|
+
expect(statusResponse).toHaveProperty("creditsUsed");
|
|
270
|
+
expect(statusResponse.creditsUsed).toBeGreaterThan(0);
|
|
271
|
+
expect(statusResponse).toHaveProperty("expiresAt");
|
|
272
|
+
expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
|
|
273
|
+
expect(statusResponse).toHaveProperty("status");
|
|
274
|
+
expect(statusResponse.status).toBe("completed");
|
|
275
|
+
expect(statusResponse.data.length).toBeGreaterThan(0);
|
|
276
|
+
expect(statusResponse.data[0]).not.toBeNull();
|
|
277
|
+
expect(statusResponse.data[0]).not.toBeUndefined();
|
|
278
|
+
if (statusResponse.data[0]) {
|
|
279
|
+
expect(statusResponse.data[0]).toHaveProperty("markdown");
|
|
280
|
+
expect(statusResponse.data[0].markdown?.length).toBeGreaterThan(10);
|
|
281
|
+
expect(statusResponse.data[0]).not.toHaveProperty('content'); // v0
|
|
282
|
+
expect(statusResponse.data[0]).toHaveProperty("html");
|
|
283
|
+
expect(statusResponse.data[0].html).toContain("<div");
|
|
284
|
+
expect(statusResponse.data[0]).toHaveProperty("rawHtml");
|
|
285
|
+
expect(statusResponse.data[0].rawHtml).toContain("<div");
|
|
286
|
+
expect(statusResponse.data[0]).toHaveProperty("screenshot");
|
|
287
|
+
expect(statusResponse.data[0].screenshot).toContain("https://");
|
|
288
|
+
expect(statusResponse.data[0]).toHaveProperty("links");
|
|
289
|
+
expect(statusResponse.data[0].links).not.toBeNull();
|
|
290
|
+
expect(statusResponse.data[0].links?.length).toBeGreaterThan(0);
|
|
291
|
+
expect(statusResponse.data[0]).toHaveProperty("metadata");
|
|
292
|
+
expect(statusResponse.data[0].metadata).toHaveProperty("title");
|
|
293
|
+
expect(statusResponse.data[0].metadata).toHaveProperty("description");
|
|
294
|
+
expect(statusResponse.data[0].metadata).toHaveProperty("language");
|
|
295
|
+
expect(statusResponse.data[0].metadata).toHaveProperty("sourceURL");
|
|
296
|
+
expect(statusResponse.data[0].metadata).toHaveProperty("statusCode");
|
|
297
|
+
expect(statusResponse.data[0].metadata).not.toHaveProperty("error");
|
|
298
|
+
}
|
|
299
|
+
}
|
|
277
300
|
}, 60000); // 60 seconds timeout
|
|
278
301
|
|
|
279
302
|
test.concurrent('should throw error for invalid API key on map', async () => {
|
package/src/index.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import axios, { AxiosResponse, AxiosRequestHeaders } from "axios";
|
|
2
|
-
import {
|
|
1
|
+
import axios, { type AxiosResponse, type AxiosRequestHeaders } from "axios";
|
|
2
|
+
import type { ZodSchema } from "zod";
|
|
3
3
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
4
4
|
import { WebSocket } from "isows";
|
|
5
5
|
import { TypedEventTarget } from "typescript-event-target";
|
|
@@ -81,7 +81,7 @@ export interface ScrapeParams {
|
|
|
81
81
|
onlyMainContent?: boolean;
|
|
82
82
|
extract?: {
|
|
83
83
|
prompt?: string;
|
|
84
|
-
schema?:
|
|
84
|
+
schema?: ZodSchema | any;
|
|
85
85
|
systemPrompt?: string;
|
|
86
86
|
};
|
|
87
87
|
waitFor?: number;
|
|
@@ -111,6 +111,7 @@ export interface CrawlParams {
|
|
|
111
111
|
allowExternalLinks?: boolean;
|
|
112
112
|
ignoreSitemap?: boolean;
|
|
113
113
|
scrapeOptions?: ScrapeParams;
|
|
114
|
+
webhook?: string;
|
|
114
115
|
}
|
|
115
116
|
|
|
116
117
|
/**
|
|
@@ -130,15 +131,14 @@ export interface CrawlResponse {
|
|
|
130
131
|
*/
|
|
131
132
|
export interface CrawlStatusResponse {
|
|
132
133
|
success: true;
|
|
133
|
-
|
|
134
|
+
status: "scraping" | "completed" | "failed" | "cancelled";
|
|
134
135
|
completed: number;
|
|
136
|
+
total: number;
|
|
135
137
|
creditsUsed: number;
|
|
136
138
|
expiresAt: Date;
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
error?: string;
|
|
141
|
-
}
|
|
139
|
+
next?: string;
|
|
140
|
+
data: FirecrawlDocument[];
|
|
141
|
+
};
|
|
142
142
|
|
|
143
143
|
/**
|
|
144
144
|
* Parameters for mapping operations.
|
|
@@ -183,7 +183,11 @@ export default class FirecrawlApp {
|
|
|
183
183
|
* @param config - Configuration options for the FirecrawlApp instance.
|
|
184
184
|
*/
|
|
185
185
|
constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
|
|
186
|
-
|
|
186
|
+
if (typeof apiKey !== "string") {
|
|
187
|
+
throw new Error("No API key provided");
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
this.apiKey = apiKey;
|
|
187
191
|
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
|
188
192
|
}
|
|
189
193
|
|
|
@@ -328,9 +332,10 @@ export default class FirecrawlApp {
|
|
|
328
332
|
/**
|
|
329
333
|
* Checks the status of a crawl job using the Firecrawl API.
|
|
330
334
|
* @param id - The ID of the crawl operation.
|
|
335
|
+
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
|
331
336
|
* @returns The response containing the job status.
|
|
332
337
|
*/
|
|
333
|
-
async checkCrawlStatus(id?: string): Promise<CrawlStatusResponse | ErrorResponse> {
|
|
338
|
+
async checkCrawlStatus(id?: string, getAllData = false): Promise<CrawlStatusResponse | ErrorResponse> {
|
|
334
339
|
if (!id) {
|
|
335
340
|
throw new Error("No crawl ID provided");
|
|
336
341
|
}
|
|
@@ -342,16 +347,28 @@ export default class FirecrawlApp {
|
|
|
342
347
|
headers
|
|
343
348
|
);
|
|
344
349
|
if (response.status === 200) {
|
|
350
|
+
let allData = response.data.data;
|
|
351
|
+
if (getAllData && response.data.status === "completed") {
|
|
352
|
+
let statusData = response.data
|
|
353
|
+
if ("data" in statusData) {
|
|
354
|
+
let data = statusData.data;
|
|
355
|
+
while ('next' in statusData) {
|
|
356
|
+
statusData = (await this.getRequest(statusData.next, headers)).data;
|
|
357
|
+
data = data.concat(statusData.data);
|
|
358
|
+
}
|
|
359
|
+
allData = data;
|
|
360
|
+
}
|
|
361
|
+
}
|
|
345
362
|
return ({
|
|
346
|
-
success:
|
|
363
|
+
success: response.data.success,
|
|
347
364
|
status: response.data.status,
|
|
348
365
|
total: response.data.total,
|
|
349
366
|
completed: response.data.completed,
|
|
350
367
|
creditsUsed: response.data.creditsUsed,
|
|
351
368
|
expiresAt: new Date(response.data.expiresAt),
|
|
352
369
|
next: response.data.next,
|
|
353
|
-
data:
|
|
354
|
-
error: response.data.error
|
|
370
|
+
data: allData,
|
|
371
|
+
error: response.data.error,
|
|
355
372
|
})
|
|
356
373
|
} else {
|
|
357
374
|
this.handleError(response, "check crawl status");
|
|
@@ -451,22 +468,29 @@ export default class FirecrawlApp {
|
|
|
451
468
|
id: string,
|
|
452
469
|
headers: AxiosRequestHeaders,
|
|
453
470
|
checkInterval: number
|
|
454
|
-
): Promise<CrawlStatusResponse> {
|
|
471
|
+
): Promise<CrawlStatusResponse | ErrorResponse> {
|
|
455
472
|
while (true) {
|
|
456
|
-
|
|
473
|
+
let statusResponse: AxiosResponse = await this.getRequest(
|
|
457
474
|
`${this.apiUrl}/v1/crawl/${id}`,
|
|
458
475
|
headers
|
|
459
476
|
);
|
|
460
477
|
if (statusResponse.status === 200) {
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
478
|
+
let statusData = statusResponse.data;
|
|
479
|
+
if (statusData.status === "completed") {
|
|
480
|
+
if ("data" in statusData) {
|
|
481
|
+
let data = statusData.data;
|
|
482
|
+
while ('next' in statusData) {
|
|
483
|
+
statusResponse = await this.getRequest(statusData.next, headers);
|
|
484
|
+
statusData = statusResponse.data;
|
|
485
|
+
data = data.concat(statusData.data);
|
|
486
|
+
}
|
|
487
|
+
statusData.data = data;
|
|
488
|
+
return statusData;
|
|
489
|
+
} else {
|
|
490
|
+
throw new Error("Crawl job completed but no data was returned");
|
|
491
|
+
}
|
|
492
|
+
} else if (
|
|
493
|
+
["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)
|
|
470
494
|
) {
|
|
471
495
|
checkInterval = Math.max(checkInterval, 2);
|
|
472
496
|
await new Promise((resolve) =>
|
package/tsconfig.json
CHANGED
|
@@ -1,110 +1,24 @@
|
|
|
1
1
|
{
|
|
2
2
|
"compilerOptions": {
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
// "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */
|
|
23
|
-
// "noLib": true, /* Disable including any library files, including the default lib.d.ts. */
|
|
24
|
-
// "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */
|
|
25
|
-
// "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */
|
|
26
|
-
|
|
27
|
-
/* Modules */
|
|
28
|
-
"module": "commonjs", /* Specify what module code is generated. */
|
|
29
|
-
"rootDir": "./src", /* Specify the root folder within your source files. */
|
|
30
|
-
"moduleResolution": "node", /* Specify how TypeScript looks up a file from a given module specifier. */
|
|
31
|
-
// "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */
|
|
32
|
-
// "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */
|
|
33
|
-
// "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */
|
|
34
|
-
// "typeRoots": [], /* Specify multiple folders that act like './node_modules/@types'. */
|
|
35
|
-
// "types": [], /* Specify type package names to be included without being referenced in a source file. */
|
|
36
|
-
// "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */
|
|
37
|
-
// "moduleSuffixes": [], /* List of file name suffixes to search when resolving a module. */
|
|
38
|
-
// "allowImportingTsExtensions": true, /* Allow imports to include TypeScript file extensions. Requires '--moduleResolution bundler' and either '--noEmit' or '--emitDeclarationOnly' to be set. */
|
|
39
|
-
// "resolvePackageJsonExports": true, /* Use the package.json 'exports' field when resolving package imports. */
|
|
40
|
-
// "resolvePackageJsonImports": true, /* Use the package.json 'imports' field when resolving imports. */
|
|
41
|
-
// "customConditions": [], /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */
|
|
42
|
-
// "resolveJsonModule": true, /* Enable importing .json files. */
|
|
43
|
-
// "allowArbitraryExtensions": true, /* Enable importing files with any extension, provided a declaration file is present. */
|
|
44
|
-
// "noResolve": true, /* Disallow 'import's, 'require's or '<reference>'s from expanding the number of files TypeScript should add to a project. */
|
|
45
|
-
|
|
46
|
-
/* JavaScript Support */
|
|
47
|
-
// "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */
|
|
48
|
-
// "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */
|
|
49
|
-
// "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */
|
|
50
|
-
|
|
51
|
-
/* Emit */
|
|
52
|
-
"declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */
|
|
53
|
-
// "declarationMap": true, /* Create sourcemaps for d.ts files. */
|
|
54
|
-
// "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */
|
|
55
|
-
// "sourceMap": true, /* Create source map files for emitted JavaScript files. */
|
|
56
|
-
// "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */
|
|
57
|
-
// "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */
|
|
58
|
-
"outDir": "./build", /* Specify an output folder for all emitted files. */
|
|
59
|
-
// "removeComments": true, /* Disable emitting comments. */
|
|
60
|
-
// "noEmit": true, /* Disable emitting files from a compilation. */
|
|
61
|
-
// "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */
|
|
62
|
-
// "importsNotUsedAsValues": "remove", /* Specify emit/checking behavior for imports that are only used for types. */
|
|
63
|
-
// "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */
|
|
64
|
-
// "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */
|
|
65
|
-
// "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */
|
|
66
|
-
// "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */
|
|
67
|
-
// "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */
|
|
68
|
-
// "newLine": "crlf", /* Set the newline character for emitting files. */
|
|
69
|
-
// "stripInternal": true, /* Disable emitting declarations that have '@internal' in their JSDoc comments. */
|
|
70
|
-
// "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */
|
|
71
|
-
// "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */
|
|
72
|
-
// "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */
|
|
73
|
-
"declarationDir": "./types", /* Specify the output directory for generated declaration files. */
|
|
74
|
-
// "preserveValueImports": true, /* Preserve unused imported values in the JavaScript output that would otherwise be removed. */
|
|
75
|
-
|
|
76
|
-
/* Interop Constraints */
|
|
77
|
-
// "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */
|
|
78
|
-
// "verbatimModuleSyntax": true, /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */
|
|
79
|
-
// "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */
|
|
80
|
-
"esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */
|
|
81
|
-
// "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */
|
|
82
|
-
"forceConsistentCasingInFileNames": true, /* Ensure that casing is correct in imports. */
|
|
83
|
-
|
|
84
|
-
/* Type Checking */
|
|
85
|
-
"strict": true, /* Enable all strict type-checking options. */
|
|
86
|
-
// "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied 'any' type. */
|
|
87
|
-
// "strictNullChecks": true, /* When type checking, take into account 'null' and 'undefined'. */
|
|
88
|
-
// "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */
|
|
89
|
-
// "strictBindCallApply": true, /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */
|
|
90
|
-
// "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */
|
|
91
|
-
// "noImplicitThis": true, /* Enable error reporting when 'this' is given the type 'any'. */
|
|
92
|
-
// "useUnknownInCatchVariables": true, /* Default catch clause variables as 'unknown' instead of 'any'. */
|
|
93
|
-
// "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */
|
|
94
|
-
// "noUnusedLocals": true, /* Enable error reporting when local variables aren't read. */
|
|
95
|
-
// "noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */
|
|
96
|
-
// "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */
|
|
97
|
-
// "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */
|
|
98
|
-
// "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */
|
|
99
|
-
// "noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */
|
|
100
|
-
// "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */
|
|
101
|
-
// "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */
|
|
102
|
-
// "allowUnusedLabels": true, /* Disable error reporting for unused labels. */
|
|
103
|
-
// "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */
|
|
104
|
-
|
|
105
|
-
/* Completeness */
|
|
106
|
-
// "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */
|
|
107
|
-
"skipLibCheck": true /* Skip type checking all .d.ts files. */
|
|
3
|
+
// See https://www.totaltypescript.com/tsconfig-cheat-sheet
|
|
4
|
+
/* Base Options: */
|
|
5
|
+
"esModuleInterop": true,
|
|
6
|
+
"skipLibCheck": true,
|
|
7
|
+
"target": "es2022",
|
|
8
|
+
"allowJs": true,
|
|
9
|
+
"resolveJsonModule": true,
|
|
10
|
+
"moduleDetection": "force",
|
|
11
|
+
"isolatedModules": true,
|
|
12
|
+
"verbatimModuleSyntax": true,
|
|
13
|
+
|
|
14
|
+
/* Strictness */
|
|
15
|
+
"strict": true,
|
|
16
|
+
"noUncheckedIndexedAccess": true,
|
|
17
|
+
"noImplicitOverride": true,
|
|
18
|
+
|
|
19
|
+
/* If NOT transpiling with TypeScript: */
|
|
20
|
+
"module": "NodeNext",
|
|
21
|
+
"noEmit": true,
|
|
108
22
|
},
|
|
109
23
|
"include": ["src/**/*"],
|
|
110
24
|
"exclude": ["node_modules", "dist", "**/__tests__/*"]
|