firecrawl 1.25.5 → 1.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +0 -0
- package/LICENSE +0 -0
- package/README.md +0 -0
- package/dist/index.cjs +4 -4
- package/dist/index.d.cts +3 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.js +3 -3
- package/dist/{package-FAEOWTCQ.js → package-6BJUVGSJ.js} +2 -2
- package/dump.rdb +0 -0
- package/jest.config.js +0 -0
- package/package.json +1 -1
- package/src/__tests__/v1/e2e_withAuth/index.test.ts +16 -167
- package/src/index.ts +4 -1
- package/tsconfig.json +0 -0
- package/tsup.config.ts +0 -0
- package/src/__tests__/e2e_withAuth/index.test.ts +0 -330
- package/src/__tests__/fixtures/scrape.json +0 -22
- package/src/__tests__/index.test.ts +0 -57
package/.env.example
CHANGED
|
File without changes
|
package/LICENSE
CHANGED
|
File without changes
|
package/README.md
CHANGED
|
File without changes
|
package/dist/index.cjs
CHANGED
|
@@ -34,8 +34,8 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
34
34
|
var require_package = __commonJS({
|
|
35
35
|
"package.json"(exports2, module2) {
|
|
36
36
|
module2.exports = {
|
|
37
|
-
name: "
|
|
38
|
-
version: "1.
|
|
37
|
+
name: "firecrawl",
|
|
38
|
+
version: "1.26.0",
|
|
39
39
|
description: "JavaScript SDK for Firecrawl API",
|
|
40
40
|
main: "dist/index.js",
|
|
41
41
|
types: "dist/index.d.ts",
|
|
@@ -516,9 +516,9 @@ var FirecrawlApp = class {
|
|
|
516
516
|
* @param ignoreInvalidURLs - Optional flag to ignore invalid URLs.
|
|
517
517
|
* @returns The response from the crawl operation.
|
|
518
518
|
*/
|
|
519
|
-
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
519
|
+
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs, maxConcurrency) {
|
|
520
520
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
521
|
-
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
521
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, maxConcurrency, ...params, origin: `js-sdk@${this.version}` };
|
|
522
522
|
if (jsonData?.extract?.schema) {
|
|
523
523
|
let schema = jsonData.extract.schema;
|
|
524
524
|
try {
|
package/dist/index.d.cts
CHANGED
|
@@ -199,6 +199,7 @@ interface CrawlParams {
|
|
|
199
199
|
maxDiscoveryDepth?: number;
|
|
200
200
|
limit?: number;
|
|
201
201
|
allowBackwardLinks?: boolean;
|
|
202
|
+
crawlEntireDomain?: boolean;
|
|
202
203
|
allowExternalLinks?: boolean;
|
|
203
204
|
ignoreSitemap?: boolean;
|
|
204
205
|
scrapeOptions?: CrawlScrapeOptions;
|
|
@@ -216,6 +217,7 @@ interface CrawlParams {
|
|
|
216
217
|
* If not provided, the crawler may use the robots.txt crawl delay if available.
|
|
217
218
|
*/
|
|
218
219
|
delay?: number;
|
|
220
|
+
maxConcurrency?: number;
|
|
219
221
|
}
|
|
220
222
|
/**
|
|
221
223
|
* Response interface for crawling operations.
|
|
@@ -606,7 +608,7 @@ declare class FirecrawlApp {
|
|
|
606
608
|
* @param ignoreInvalidURLs - Optional flag to ignore invalid URLs.
|
|
607
609
|
* @returns The response from the crawl operation.
|
|
608
610
|
*/
|
|
609
|
-
batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
611
|
+
batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean, maxConcurrency?: number): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
610
612
|
asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeResponse | ErrorResponse>;
|
|
611
613
|
/**
|
|
612
614
|
* Initiates a batch scrape job and returns a CrawlWatcher to monitor the job via WebSocket.
|
package/dist/index.d.ts
CHANGED
|
@@ -199,6 +199,7 @@ interface CrawlParams {
|
|
|
199
199
|
maxDiscoveryDepth?: number;
|
|
200
200
|
limit?: number;
|
|
201
201
|
allowBackwardLinks?: boolean;
|
|
202
|
+
crawlEntireDomain?: boolean;
|
|
202
203
|
allowExternalLinks?: boolean;
|
|
203
204
|
ignoreSitemap?: boolean;
|
|
204
205
|
scrapeOptions?: CrawlScrapeOptions;
|
|
@@ -216,6 +217,7 @@ interface CrawlParams {
|
|
|
216
217
|
* If not provided, the crawler may use the robots.txt crawl delay if available.
|
|
217
218
|
*/
|
|
218
219
|
delay?: number;
|
|
220
|
+
maxConcurrency?: number;
|
|
219
221
|
}
|
|
220
222
|
/**
|
|
221
223
|
* Response interface for crawling operations.
|
|
@@ -606,7 +608,7 @@ declare class FirecrawlApp {
|
|
|
606
608
|
* @param ignoreInvalidURLs - Optional flag to ignore invalid URLs.
|
|
607
609
|
* @returns The response from the crawl operation.
|
|
608
610
|
*/
|
|
609
|
-
batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
611
|
+
batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean, maxConcurrency?: number): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
610
612
|
asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeResponse | ErrorResponse>;
|
|
611
613
|
/**
|
|
612
614
|
* Initiates a batch scrape job and returns a CrawlWatcher to monitor the job via WebSocket.
|
package/dist/index.js
CHANGED
|
@@ -29,7 +29,7 @@ var FirecrawlApp = class {
|
|
|
29
29
|
}
|
|
30
30
|
async getVersion() {
|
|
31
31
|
try {
|
|
32
|
-
const packageJson = await import("./package-
|
|
32
|
+
const packageJson = await import("./package-6BJUVGSJ.js");
|
|
33
33
|
return packageJson.default.version;
|
|
34
34
|
} catch (error) {
|
|
35
35
|
console.error("Error getting version:", error);
|
|
@@ -407,9 +407,9 @@ var FirecrawlApp = class {
|
|
|
407
407
|
* @param ignoreInvalidURLs - Optional flag to ignore invalid URLs.
|
|
408
408
|
* @returns The response from the crawl operation.
|
|
409
409
|
*/
|
|
410
|
-
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
410
|
+
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs, maxConcurrency) {
|
|
411
411
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
412
|
-
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
412
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, maxConcurrency, ...params, origin: `js-sdk@${this.version}` };
|
|
413
413
|
if (jsonData?.extract?.schema) {
|
|
414
414
|
let schema = jsonData.extract.schema;
|
|
415
415
|
try {
|
package/dump.rdb
ADDED
|
Binary file
|
package/jest.config.js
CHANGED
|
File without changes
|
package/package.json
CHANGED
|
@@ -26,7 +26,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
26
26
|
test.concurrent('should throw error for invalid API key on scrape', async () => {
|
|
27
27
|
if (API_URL.includes('api.firecrawl.dev')) {
|
|
28
28
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
29
|
-
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Unexpected error occurred while trying to scrape URL. Status code:
|
|
29
|
+
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Unexpected error occurred while trying to scrape URL. Status code: 401");
|
|
30
30
|
} else {
|
|
31
31
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
32
32
|
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
|
|
@@ -36,36 +36,16 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
36
36
|
test.concurrent('should throw error for blocklisted URL on scrape', async () => {
|
|
37
37
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
38
38
|
const blocklistedUrl = "https://facebook.com/fake-test";
|
|
39
|
-
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("
|
|
39
|
+
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("This website is no longer supported");
|
|
40
40
|
});
|
|
41
41
|
|
|
42
|
-
test.concurrent('should return successful response with valid preview token', async () => {
|
|
43
|
-
const app = new FirecrawlApp({ apiKey: process.env.PREVIEW_TOKEN, apiUrl: API_URL });
|
|
44
|
-
const response = await app.scrapeUrl('https://roastmywebsite.ai');
|
|
45
|
-
if (!response.success) {
|
|
46
|
-
throw new Error(response.error);
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
expect(response).not.toBeNull();
|
|
50
|
-
expect(response?.markdown).toContain("_Roast_");
|
|
51
|
-
}, 30000); // 30 seconds timeout
|
|
52
|
-
|
|
53
42
|
test.concurrent('should return successful response for valid scrape', async () => {
|
|
54
43
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
44
|
+
|
|
55
45
|
const response = await app.scrapeUrl('https://roastmywebsite.ai');
|
|
56
46
|
if (!response.success) {
|
|
57
47
|
throw new Error(response.error);
|
|
58
48
|
}
|
|
59
|
-
|
|
60
|
-
expect(response).not.toBeNull();
|
|
61
|
-
expect(response).not.toHaveProperty('content'); // v0
|
|
62
|
-
expect(response).not.toHaveProperty('html');
|
|
63
|
-
expect(response).not.toHaveProperty('rawHtml');
|
|
64
|
-
expect(response).not.toHaveProperty('screenshot');
|
|
65
|
-
expect(response).not.toHaveProperty('links');
|
|
66
|
-
|
|
67
|
-
expect(response).toHaveProperty('markdown');
|
|
68
|
-
expect(response).toHaveProperty('metadata');
|
|
69
49
|
}, 30000); // 30 seconds timeout
|
|
70
50
|
|
|
71
51
|
test.concurrent('should return successful response with valid API key and options', async () => {
|
|
@@ -80,53 +60,10 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
80
60
|
timeout: 30000,
|
|
81
61
|
waitFor: 1000
|
|
82
62
|
});
|
|
63
|
+
|
|
83
64
|
if (!response.success) {
|
|
84
65
|
throw new Error(response.error);
|
|
85
66
|
}
|
|
86
|
-
|
|
87
|
-
expect(response).not.toBeNull();
|
|
88
|
-
expect(response).not.toHaveProperty('content'); // v0
|
|
89
|
-
expect(response.markdown).toContain("_Roast_");
|
|
90
|
-
expect(response.html).toContain("<h1");
|
|
91
|
-
expect(response.rawHtml).toContain("<h1");
|
|
92
|
-
expect(response.screenshot).not.toBeUndefined();
|
|
93
|
-
expect(response.screenshot).not.toBeNull();
|
|
94
|
-
expect(response.screenshot).toContain("https://");
|
|
95
|
-
expect(response.links).not.toBeNull();
|
|
96
|
-
expect(response.links?.length).toBeGreaterThan(0);
|
|
97
|
-
expect(response.links?.[0]).toContain("https://");
|
|
98
|
-
expect(response.metadata).not.toBeNull();
|
|
99
|
-
expect(response.metadata).not.toBeUndefined();
|
|
100
|
-
expect(response.metadata).toHaveProperty("title");
|
|
101
|
-
expect(response.metadata).toHaveProperty("description");
|
|
102
|
-
expect(response.metadata).toHaveProperty("keywords");
|
|
103
|
-
expect(response.metadata).toHaveProperty("robots");
|
|
104
|
-
expect(response.metadata).toHaveProperty("ogTitle");
|
|
105
|
-
expect(response.metadata).toHaveProperty("ogDescription");
|
|
106
|
-
expect(response.metadata).toHaveProperty("ogUrl");
|
|
107
|
-
expect(response.metadata).toHaveProperty("ogImage");
|
|
108
|
-
expect(response.metadata).toHaveProperty("ogLocaleAlternate");
|
|
109
|
-
expect(response.metadata).toHaveProperty("ogSiteName");
|
|
110
|
-
expect(response.metadata).toHaveProperty("sourceURL");
|
|
111
|
-
expect(response.metadata).not.toHaveProperty("pageStatusCode");
|
|
112
|
-
expect(response.metadata).toHaveProperty("statusCode");
|
|
113
|
-
expect(response.metadata).not.toHaveProperty("pageError");
|
|
114
|
-
|
|
115
|
-
if (response.metadata !== undefined) {
|
|
116
|
-
expect(response.metadata.error).toBeUndefined();
|
|
117
|
-
expect(response.metadata.title).toBe("Roast My Website");
|
|
118
|
-
expect(response.metadata.description).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
|
|
119
|
-
expect(response.metadata.keywords).toBe("Roast My Website,Roast,Website,GitHub,Firecrawl");
|
|
120
|
-
expect(response.metadata.robots).toBe("follow, index");
|
|
121
|
-
expect(response.metadata.ogTitle).toBe("Roast My Website");
|
|
122
|
-
expect(response.metadata.ogDescription).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
|
|
123
|
-
expect(response.metadata.ogUrl).toBe("https://www.roastmywebsite.ai");
|
|
124
|
-
expect(response.metadata.ogImage).toBe("https://www.roastmywebsite.ai/og.png");
|
|
125
|
-
expect(response.metadata.ogLocaleAlternate).toStrictEqual([]);
|
|
126
|
-
expect(response.metadata.ogSiteName).toBe("Roast My Website");
|
|
127
|
-
expect(response.metadata.sourceURL).toBe("https://roastmywebsite.ai");
|
|
128
|
-
expect(response.metadata.statusCode).toBe(200);
|
|
129
|
-
}
|
|
130
67
|
}, 30000); // 30 seconds timeout
|
|
131
68
|
|
|
132
69
|
test.concurrent('should return successful response with valid API key and screenshot fullPage', async () => {
|
|
@@ -139,7 +76,6 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
139
76
|
throw new Error(response.error);
|
|
140
77
|
}
|
|
141
78
|
|
|
142
|
-
expect(response).not.toBeNull();
|
|
143
79
|
expect(response.screenshot).not.toBeUndefined();
|
|
144
80
|
expect(response.screenshot).not.toBeNull();
|
|
145
81
|
expect(response.screenshot).toContain("https://");
|
|
@@ -170,7 +106,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
170
106
|
test.concurrent('should throw error for invalid API key on crawl', async () => {
|
|
171
107
|
if (API_URL.includes('api.firecrawl.dev')) {
|
|
172
108
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
173
|
-
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code
|
|
109
|
+
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
|
|
174
110
|
} else {
|
|
175
111
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
176
112
|
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
|
|
@@ -180,34 +116,10 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
180
116
|
test.concurrent('should return successful response for crawl and wait for completion', async () => {
|
|
181
117
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
182
118
|
const response = await app.crawlUrl('https://roastmywebsite.ai', {}, 30) as CrawlStatusResponse;
|
|
183
|
-
expect(response).not.toBeNull();
|
|
184
|
-
expect(response).toHaveProperty("total");
|
|
185
|
-
expect(response.total).toBeGreaterThan(0);
|
|
186
|
-
expect(response).toHaveProperty("creditsUsed");
|
|
187
|
-
expect(response.creditsUsed).toBeGreaterThan(0);
|
|
188
|
-
expect(response).toHaveProperty("expiresAt");
|
|
189
|
-
expect(new Date(response.expiresAt).getTime()).toBeGreaterThan(Date.now());
|
|
190
|
-
expect(response).toHaveProperty("status");
|
|
191
|
-
expect(response.status).toBe("completed");
|
|
192
119
|
expect(response).not.toHaveProperty("next"); // wait until done
|
|
193
120
|
expect(response.data.length).toBeGreaterThan(0);
|
|
194
|
-
expect(response.data[0]).not.toBeNull();
|
|
195
|
-
expect(response.data[0]).not.toBeUndefined();
|
|
196
121
|
if (response.data[0]) {
|
|
197
122
|
expect(response.data[0]).toHaveProperty("markdown");
|
|
198
|
-
expect(response.data[0].markdown).toContain("_Roast_");
|
|
199
|
-
expect(response.data[0]).not.toHaveProperty('content'); // v0
|
|
200
|
-
expect(response.data[0]).not.toHaveProperty("html");
|
|
201
|
-
expect(response.data[0]).not.toHaveProperty("rawHtml");
|
|
202
|
-
expect(response.data[0]).not.toHaveProperty("screenshot");
|
|
203
|
-
expect(response.data[0]).not.toHaveProperty("links");
|
|
204
|
-
expect(response.data[0]).toHaveProperty("metadata");
|
|
205
|
-
expect(response.data[0].metadata).toHaveProperty("title");
|
|
206
|
-
expect(response.data[0].metadata).toHaveProperty("description");
|
|
207
|
-
expect(response.data[0].metadata).toHaveProperty("language");
|
|
208
|
-
expect(response.data[0].metadata).toHaveProperty("sourceURL");
|
|
209
|
-
expect(response.data[0].metadata).toHaveProperty("statusCode");
|
|
210
|
-
expect(response.data[0].metadata).not.toHaveProperty("error");
|
|
211
123
|
}
|
|
212
124
|
}, 60000); // 60 seconds timeout
|
|
213
125
|
|
|
@@ -230,39 +142,15 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
230
142
|
waitFor: 1000
|
|
231
143
|
}
|
|
232
144
|
} as CrawlParams, 30) as CrawlStatusResponse;
|
|
233
|
-
expect(response).not.toBeNull();
|
|
234
|
-
expect(response).toHaveProperty("total");
|
|
235
|
-
expect(response.total).toBeGreaterThan(0);
|
|
236
|
-
expect(response).toHaveProperty("creditsUsed");
|
|
237
|
-
expect(response.creditsUsed).toBeGreaterThan(0);
|
|
238
|
-
expect(response).toHaveProperty("expiresAt");
|
|
239
|
-
expect(new Date(response.expiresAt).getTime()).toBeGreaterThan(Date.now());
|
|
240
|
-
expect(response).toHaveProperty("status");
|
|
241
|
-
expect(response.status).toBe("completed");
|
|
242
145
|
expect(response).not.toHaveProperty("next");
|
|
243
146
|
expect(response.data.length).toBeGreaterThan(0);
|
|
244
|
-
expect(response.data[0]).not.toBeNull();
|
|
245
|
-
expect(response.data[0]).not.toBeUndefined();
|
|
246
147
|
if (response.data[0]) {
|
|
247
148
|
expect(response.data[0]).toHaveProperty("markdown");
|
|
248
|
-
expect(response.data[0].markdown).toContain("_Roast_");
|
|
249
149
|
expect(response.data[0]).not.toHaveProperty('content'); // v0
|
|
250
150
|
expect(response.data[0]).toHaveProperty("html");
|
|
251
|
-
expect(response.data[0].html).toContain("<h1");
|
|
252
151
|
expect(response.data[0]).toHaveProperty("rawHtml");
|
|
253
|
-
expect(response.data[0].rawHtml).toContain("<h1");
|
|
254
152
|
expect(response.data[0]).toHaveProperty("screenshot");
|
|
255
|
-
expect(response.data[0].screenshot).toContain("https://");
|
|
256
153
|
expect(response.data[0]).toHaveProperty("links");
|
|
257
|
-
expect(response.data[0].links).not.toBeNull();
|
|
258
|
-
expect(response.data[0].links?.length).toBeGreaterThan(0);
|
|
259
|
-
expect(response.data[0]).toHaveProperty("metadata");
|
|
260
|
-
expect(response.data[0].metadata).toHaveProperty("title");
|
|
261
|
-
expect(response.data[0].metadata).toHaveProperty("description");
|
|
262
|
-
expect(response.data[0].metadata).toHaveProperty("language");
|
|
263
|
-
expect(response.data[0].metadata).toHaveProperty("sourceURL");
|
|
264
|
-
expect(response.data[0].metadata).toHaveProperty("statusCode");
|
|
265
|
-
expect(response.data[0].metadata).not.toHaveProperty("error");
|
|
266
154
|
}
|
|
267
155
|
}, 60000); // 60 seconds timeout
|
|
268
156
|
|
|
@@ -278,7 +166,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
278
166
|
|
|
279
167
|
test.concurrent('should check crawl status', async () => {
|
|
280
168
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
281
|
-
const response = await app.asyncCrawlUrl('https://firecrawl.dev', { scrapeOptions: { formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links']}} as CrawlParams) as CrawlResponse;
|
|
169
|
+
const response = await app.asyncCrawlUrl('https://firecrawl.dev', { limit: 20, scrapeOptions: { formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links']}} as CrawlParams) as CrawlResponse;
|
|
282
170
|
expect(response).not.toBeNull();
|
|
283
171
|
expect(response.id).toBeDefined();
|
|
284
172
|
|
|
@@ -314,44 +202,15 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
314
202
|
expect(statusResponse).toHaveProperty("total");
|
|
315
203
|
expect(statusResponse.success).toBe(true);
|
|
316
204
|
if (statusResponse.success === true) {
|
|
317
|
-
expect(statusResponse.total).toBeGreaterThan(0);
|
|
318
|
-
expect(statusResponse).toHaveProperty("creditsUsed");
|
|
319
|
-
expect(statusResponse.creditsUsed).toBeGreaterThan(0);
|
|
320
|
-
expect(statusResponse).toHaveProperty("expiresAt");
|
|
321
|
-
expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
|
|
322
|
-
expect(statusResponse).toHaveProperty("status");
|
|
323
205
|
expect(statusResponse.status).toBe("completed");
|
|
324
206
|
expect(statusResponse.data.length).toBeGreaterThan(0);
|
|
325
|
-
expect(statusResponse.data[0]).not.toBeNull();
|
|
326
|
-
expect(statusResponse.data[0]).not.toBeUndefined();
|
|
327
|
-
if (statusResponse.data[0]) {
|
|
328
|
-
expect(statusResponse.data[0]).toHaveProperty("markdown");
|
|
329
|
-
expect(statusResponse.data[0].markdown?.length).toBeGreaterThan(10);
|
|
330
|
-
expect(statusResponse.data[0]).not.toHaveProperty('content'); // v0
|
|
331
|
-
expect(statusResponse.data[0]).toHaveProperty("html");
|
|
332
|
-
expect(statusResponse.data[0].html).toContain("<div");
|
|
333
|
-
expect(statusResponse.data[0]).toHaveProperty("rawHtml");
|
|
334
|
-
expect(statusResponse.data[0].rawHtml).toContain("<div");
|
|
335
|
-
expect(statusResponse.data[0]).toHaveProperty("screenshot");
|
|
336
|
-
expect(statusResponse.data[0].screenshot).toContain("https://");
|
|
337
|
-
expect(statusResponse.data[0]).toHaveProperty("links");
|
|
338
|
-
expect(statusResponse.data[0].links).not.toBeNull();
|
|
339
|
-
expect(statusResponse.data[0].links?.length).toBeGreaterThan(0);
|
|
340
|
-
expect(statusResponse.data[0]).toHaveProperty("metadata");
|
|
341
|
-
expect(statusResponse.data[0].metadata).toHaveProperty("title");
|
|
342
|
-
expect(statusResponse.data[0].metadata).toHaveProperty("description");
|
|
343
|
-
expect(statusResponse.data[0].metadata).toHaveProperty("language");
|
|
344
|
-
expect(statusResponse.data[0].metadata).toHaveProperty("sourceURL");
|
|
345
|
-
expect(statusResponse.data[0].metadata).toHaveProperty("statusCode");
|
|
346
|
-
expect(statusResponse.data[0].metadata).not.toHaveProperty("error");
|
|
347
|
-
}
|
|
348
207
|
}
|
|
349
208
|
}, 60000); // 60 seconds timeout
|
|
350
209
|
|
|
351
210
|
test.concurrent('should throw error for invalid API key on map', async () => {
|
|
352
211
|
if (API_URL.includes('api.firecrawl.dev')) {
|
|
353
212
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
354
|
-
await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code
|
|
213
|
+
await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
|
|
355
214
|
} else {
|
|
356
215
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
357
216
|
await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
|
|
@@ -361,16 +220,9 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
361
220
|
test.concurrent('should throw error for blocklisted URL on map', async () => {
|
|
362
221
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
363
222
|
const blocklistedUrl = "https://facebook.com/fake-test";
|
|
364
|
-
await expect(app.mapUrl(blocklistedUrl)).rejects.toThrow("
|
|
223
|
+
await expect(app.mapUrl(blocklistedUrl)).rejects.toThrow("403");
|
|
365
224
|
});
|
|
366
225
|
|
|
367
|
-
test.concurrent('should return successful response with valid preview token', async () => {
|
|
368
|
-
const app = new FirecrawlApp({ apiKey: process.env.PREVIEW_TOKEN, apiUrl: API_URL });
|
|
369
|
-
const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse;
|
|
370
|
-
expect(response).not.toBeNull();
|
|
371
|
-
expect(response.links?.length).toBeGreaterThan(0);
|
|
372
|
-
}, 30000); // 30 seconds timeout
|
|
373
|
-
|
|
374
226
|
test.concurrent('should return successful response for valid map', async () => {
|
|
375
227
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse;
|
|
376
228
|
expect(response).not.toBeNull();
|
|
@@ -387,13 +239,11 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
387
239
|
const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: TEST_API_KEY });
|
|
388
240
|
const response = await app.search("firecrawl");
|
|
389
241
|
expect(response.success).toBe(true);
|
|
390
|
-
console.log(response.data);
|
|
391
242
|
expect(response.data?.length).toBeGreaterThan(0);
|
|
392
|
-
expect(response.data?.[0]?.markdown).toBeDefined();
|
|
393
|
-
expect(response.data?.[0]?.
|
|
394
|
-
expect(response.data?.[0]?.
|
|
395
|
-
|
|
396
|
-
});
|
|
243
|
+
expect(response.data?.[0]?.markdown).not.toBeDefined();
|
|
244
|
+
expect(response.data?.[0]?.title).toBeDefined();
|
|
245
|
+
expect(response.data?.[0]?.description).toBeDefined();
|
|
246
|
+
}, 30000); // 30 seconds timeout
|
|
397
247
|
|
|
398
248
|
test('should search with params object', async () => {
|
|
399
249
|
const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: TEST_API_KEY });
|
|
@@ -412,14 +262,13 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
412
262
|
expect(doc.markdown).toBeDefined();
|
|
413
263
|
expect(doc.html).toBeDefined();
|
|
414
264
|
expect(doc.links).toBeDefined();
|
|
415
|
-
expect(doc.
|
|
416
|
-
expect(doc.
|
|
417
|
-
expect(doc.metadata?.description).toBeDefined();
|
|
265
|
+
expect(doc.title).toBeDefined();
|
|
266
|
+
expect(doc.description).toBeDefined();
|
|
418
267
|
}
|
|
419
|
-
});
|
|
268
|
+
}, 30000); // 30 seconds timeout
|
|
420
269
|
|
|
421
270
|
test('should handle invalid API key for search', async () => {
|
|
422
271
|
const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: "invalid_api_key" });
|
|
423
|
-
await expect(app.search("test query")).rejects.toThrow("Request failed with status code
|
|
272
|
+
await expect(app.search("test query")).rejects.toThrow("Request failed with status code 401");
|
|
424
273
|
});
|
|
425
274
|
});
|
package/src/index.ts
CHANGED
|
@@ -209,6 +209,7 @@ export interface CrawlParams {
|
|
|
209
209
|
maxDiscoveryDepth?: number;
|
|
210
210
|
limit?: number;
|
|
211
211
|
allowBackwardLinks?: boolean;
|
|
212
|
+
crawlEntireDomain?: boolean;
|
|
212
213
|
allowExternalLinks?: boolean;
|
|
213
214
|
ignoreSitemap?: boolean;
|
|
214
215
|
scrapeOptions?: CrawlScrapeOptions;
|
|
@@ -226,6 +227,7 @@ export interface CrawlParams {
|
|
|
226
227
|
* If not provided, the crawler may use the robots.txt crawl delay if available.
|
|
227
228
|
*/
|
|
228
229
|
delay?: number;
|
|
230
|
+
maxConcurrency?: number;
|
|
229
231
|
}
|
|
230
232
|
|
|
231
233
|
/**
|
|
@@ -1012,9 +1014,10 @@ export default class FirecrawlApp {
|
|
|
1012
1014
|
idempotencyKey?: string,
|
|
1013
1015
|
webhook?: CrawlParams["webhook"],
|
|
1014
1016
|
ignoreInvalidURLs?: boolean,
|
|
1017
|
+
maxConcurrency?: number,
|
|
1015
1018
|
): Promise<BatchScrapeStatusResponse | ErrorResponse> {
|
|
1016
1019
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
1017
|
-
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
1020
|
+
let jsonData: any = { urls, webhook, ignoreInvalidURLs, maxConcurrency, ...params, origin: `js-sdk@${this.version}` };
|
|
1018
1021
|
if (jsonData?.extract?.schema) {
|
|
1019
1022
|
let schema = jsonData.extract.schema;
|
|
1020
1023
|
|
package/tsconfig.json
CHANGED
|
File without changes
|
package/tsup.config.ts
CHANGED
|
File without changes
|
|
@@ -1,330 +0,0 @@
|
|
|
1
|
-
import FirecrawlApp, {
|
|
2
|
-
CrawlResponseV0,
|
|
3
|
-
CrawlStatusResponse,
|
|
4
|
-
CrawlStatusResponseV0,
|
|
5
|
-
FirecrawlDocumentV0,
|
|
6
|
-
ScrapeResponseV0,
|
|
7
|
-
SearchResponseV0,
|
|
8
|
-
} from "../../index";
|
|
9
|
-
import { v4 as uuidv4 } from "uuid";
|
|
10
|
-
import dotenv from "dotenv";
|
|
11
|
-
import { describe, test, expect } from "@jest/globals";
|
|
12
|
-
|
|
13
|
-
dotenv.config();
|
|
14
|
-
|
|
15
|
-
const TEST_API_KEY = process.env.TEST_API_KEY;
|
|
16
|
-
const API_URL = "http://127.0.0.1:3002";
|
|
17
|
-
|
|
18
|
-
describe('FirecrawlApp<"v0"> E2E Tests', () => {
|
|
19
|
-
test.concurrent("should throw error for no API key", async () => {
|
|
20
|
-
expect(() => {
|
|
21
|
-
new FirecrawlApp<"v0">({ apiKey: null, apiUrl: API_URL, version: "v0" });
|
|
22
|
-
}).toThrow("No API key provided");
|
|
23
|
-
});
|
|
24
|
-
|
|
25
|
-
test.concurrent(
|
|
26
|
-
"should throw error for invalid API key on scrape",
|
|
27
|
-
async () => {
|
|
28
|
-
const invalidApp = new FirecrawlApp<"v0">({
|
|
29
|
-
apiKey: "invalid_api_key",
|
|
30
|
-
apiUrl: API_URL,
|
|
31
|
-
version: "v0",
|
|
32
|
-
});
|
|
33
|
-
await expect(
|
|
34
|
-
invalidApp.scrapeUrl("https://roastmywebsite.ai")
|
|
35
|
-
).rejects.toThrow("Request failed with status code 401");
|
|
36
|
-
}
|
|
37
|
-
);
|
|
38
|
-
|
|
39
|
-
test.concurrent(
|
|
40
|
-
"should throw error for blocklisted URL on scrape",
|
|
41
|
-
async () => {
|
|
42
|
-
const app = new FirecrawlApp<"v0">({
|
|
43
|
-
apiKey: TEST_API_KEY,
|
|
44
|
-
apiUrl: API_URL,
|
|
45
|
-
version: "v0",
|
|
46
|
-
});
|
|
47
|
-
const blocklistedUrl = "https://facebook.com/fake-test";
|
|
48
|
-
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow(
|
|
49
|
-
"Request failed with status code 403"
|
|
50
|
-
);
|
|
51
|
-
}
|
|
52
|
-
);
|
|
53
|
-
|
|
54
|
-
test.concurrent(
|
|
55
|
-
"should return successful response with valid preview token",
|
|
56
|
-
async () => {
|
|
57
|
-
const app = new FirecrawlApp<"v0">({
|
|
58
|
-
apiKey: process.env.PREVIEW_TOKEN,
|
|
59
|
-
apiUrl: API_URL,
|
|
60
|
-
version: "v0",
|
|
61
|
-
});
|
|
62
|
-
const response = (await app.scrapeUrl(
|
|
63
|
-
"https://roastmywebsite.ai"
|
|
64
|
-
)) as ScrapeResponseV0;
|
|
65
|
-
expect(response).not.toBeNull();
|
|
66
|
-
expect(response.data?.content).toContain("_Roast_");
|
|
67
|
-
},
|
|
68
|
-
30000
|
|
69
|
-
); // 30 seconds timeout
|
|
70
|
-
|
|
71
|
-
test.concurrent(
|
|
72
|
-
"should return successful response for valid scrape",
|
|
73
|
-
async () => {
|
|
74
|
-
const app = new FirecrawlApp<"v0">({
|
|
75
|
-
apiKey: TEST_API_KEY,
|
|
76
|
-
apiUrl: API_URL,
|
|
77
|
-
version: "v0",
|
|
78
|
-
});
|
|
79
|
-
const response = (await app.scrapeUrl(
|
|
80
|
-
"https://roastmywebsite.ai"
|
|
81
|
-
)) as ScrapeResponseV0;
|
|
82
|
-
expect(response).not.toBeNull();
|
|
83
|
-
expect(response.data?.content).toContain("_Roast_");
|
|
84
|
-
expect(response.data).toHaveProperty("markdown");
|
|
85
|
-
expect(response.data).toHaveProperty("metadata");
|
|
86
|
-
expect(response.data).not.toHaveProperty("html");
|
|
87
|
-
},
|
|
88
|
-
30000
|
|
89
|
-
); // 30 seconds timeout
|
|
90
|
-
|
|
91
|
-
test.concurrent(
|
|
92
|
-
"should return successful response with valid API key and include HTML",
|
|
93
|
-
async () => {
|
|
94
|
-
const app = new FirecrawlApp<"v0">({
|
|
95
|
-
apiKey: TEST_API_KEY,
|
|
96
|
-
apiUrl: API_URL,
|
|
97
|
-
version: "v0",
|
|
98
|
-
});
|
|
99
|
-
const response = (await app.scrapeUrl("https://roastmywebsite.ai", {
|
|
100
|
-
pageOptions: { includeHtml: true },
|
|
101
|
-
})) as ScrapeResponseV0;
|
|
102
|
-
expect(response).not.toBeNull();
|
|
103
|
-
expect(response.data?.content).toContain("_Roast_");
|
|
104
|
-
expect(response.data?.markdown).toContain("_Roast_");
|
|
105
|
-
expect(response.data?.html).toContain("<h1");
|
|
106
|
-
},
|
|
107
|
-
30000
|
|
108
|
-
); // 30 seconds timeout
|
|
109
|
-
|
|
110
|
-
test.concurrent(
|
|
111
|
-
"should return successful response for valid scrape with PDF file",
|
|
112
|
-
async () => {
|
|
113
|
-
const app = new FirecrawlApp<"v0">({
|
|
114
|
-
apiKey: TEST_API_KEY,
|
|
115
|
-
apiUrl: API_URL,
|
|
116
|
-
version: "v0",
|
|
117
|
-
});
|
|
118
|
-
const response = (await app.scrapeUrl(
|
|
119
|
-
"https://arxiv.org/pdf/astro-ph/9301001.pdf"
|
|
120
|
-
)) as ScrapeResponseV0;
|
|
121
|
-
expect(response).not.toBeNull();
|
|
122
|
-
expect(response.data?.content).toContain(
|
|
123
|
-
"We present spectrophotometric observations of the Broad Line Radio Galaxy"
|
|
124
|
-
);
|
|
125
|
-
},
|
|
126
|
-
30000
|
|
127
|
-
); // 30 seconds timeout
|
|
128
|
-
|
|
129
|
-
test.concurrent(
|
|
130
|
-
"should return successful response for valid scrape with PDF file without explicit extension",
|
|
131
|
-
async () => {
|
|
132
|
-
const app = new FirecrawlApp<"v0">({
|
|
133
|
-
apiKey: TEST_API_KEY,
|
|
134
|
-
apiUrl: API_URL,
|
|
135
|
-
version: "v0",
|
|
136
|
-
});
|
|
137
|
-
const response = (await app.scrapeUrl(
|
|
138
|
-
"https://arxiv.org/pdf/astro-ph/9301001"
|
|
139
|
-
)) as ScrapeResponseV0;
|
|
140
|
-
expect(response).not.toBeNull();
|
|
141
|
-
expect(response.data?.content).toContain(
|
|
142
|
-
"We present spectrophotometric observations of the Broad Line Radio Galaxy"
|
|
143
|
-
);
|
|
144
|
-
},
|
|
145
|
-
30000
|
|
146
|
-
); // 30 seconds timeout
|
|
147
|
-
|
|
148
|
-
test.concurrent(
|
|
149
|
-
"should throw error for invalid API key on crawl",
|
|
150
|
-
async () => {
|
|
151
|
-
const invalidApp = new FirecrawlApp<"v0">({
|
|
152
|
-
apiKey: "invalid_api_key",
|
|
153
|
-
apiUrl: API_URL,
|
|
154
|
-
version: "v0",
|
|
155
|
-
});
|
|
156
|
-
await expect(
|
|
157
|
-
invalidApp.crawlUrl("https://roastmywebsite.ai")
|
|
158
|
-
).rejects.toThrow("Request failed with status code 401");
|
|
159
|
-
}
|
|
160
|
-
);
|
|
161
|
-
|
|
162
|
-
test.concurrent(
|
|
163
|
-
"should throw error for blocklisted URL on crawl",
|
|
164
|
-
async () => {
|
|
165
|
-
const app = new FirecrawlApp<"v0">({
|
|
166
|
-
apiKey: TEST_API_KEY,
|
|
167
|
-
apiUrl: API_URL,
|
|
168
|
-
version: "v0",
|
|
169
|
-
});
|
|
170
|
-
const blocklistedUrl = "https://twitter.com/fake-test";
|
|
171
|
-
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow(
|
|
172
|
-
"Request failed with status code 403"
|
|
173
|
-
);
|
|
174
|
-
}
|
|
175
|
-
);
|
|
176
|
-
|
|
177
|
-
test.concurrent(
|
|
178
|
-
"should return successful response for crawl and wait for completion",
|
|
179
|
-
async () => {
|
|
180
|
-
const app = new FirecrawlApp<"v0">({
|
|
181
|
-
apiKey: TEST_API_KEY,
|
|
182
|
-
apiUrl: API_URL,
|
|
183
|
-
version: "v0",
|
|
184
|
-
});
|
|
185
|
-
const response = (await app.crawlUrl(
|
|
186
|
-
"https://roastmywebsite.ai",
|
|
187
|
-
{ crawlerOptions: { excludes: ["blog/*"] } },
|
|
188
|
-
true,
|
|
189
|
-
10
|
|
190
|
-
)) as FirecrawlDocumentV0[];
|
|
191
|
-
expect(response).not.toBeNull();
|
|
192
|
-
expect(response[0].content).toContain("_Roast_");
|
|
193
|
-
},
|
|
194
|
-
60000
|
|
195
|
-
); // 60 seconds timeout
|
|
196
|
-
|
|
197
|
-
test.concurrent("should handle idempotency key for crawl", async () => {
|
|
198
|
-
const app = new FirecrawlApp<"v0">({
|
|
199
|
-
apiKey: TEST_API_KEY,
|
|
200
|
-
apiUrl: API_URL,
|
|
201
|
-
version: "v0",
|
|
202
|
-
});
|
|
203
|
-
const uniqueIdempotencyKey = uuidv4();
|
|
204
|
-
const response = (await app.crawlUrl(
|
|
205
|
-
"https://roastmywebsite.ai",
|
|
206
|
-
{ crawlerOptions: { excludes: ["blog/*"] } },
|
|
207
|
-
false,
|
|
208
|
-
2,
|
|
209
|
-
uniqueIdempotencyKey
|
|
210
|
-
)) as CrawlResponseV0;
|
|
211
|
-
expect(response).not.toBeNull();
|
|
212
|
-
expect(response.jobId).toBeDefined();
|
|
213
|
-
|
|
214
|
-
await expect(
|
|
215
|
-
app.crawlUrl(
|
|
216
|
-
"https://roastmywebsite.ai",
|
|
217
|
-
{ crawlerOptions: { excludes: ["blog/*"] } },
|
|
218
|
-
true,
|
|
219
|
-
2,
|
|
220
|
-
uniqueIdempotencyKey
|
|
221
|
-
)
|
|
222
|
-
).rejects.toThrow("Request failed with status code 409");
|
|
223
|
-
});
|
|
224
|
-
|
|
225
|
-
test.concurrent(
|
|
226
|
-
"should check crawl status",
|
|
227
|
-
async () => {
|
|
228
|
-
const app = new FirecrawlApp<"v0">({
|
|
229
|
-
apiKey: TEST_API_KEY,
|
|
230
|
-
apiUrl: API_URL,
|
|
231
|
-
version: "v0",
|
|
232
|
-
});
|
|
233
|
-
const response: any = (await app.crawlUrl(
|
|
234
|
-
"https://roastmywebsite.ai",
|
|
235
|
-
{ crawlerOptions: { excludes: ["blog/*"] } },
|
|
236
|
-
false
|
|
237
|
-
)) as CrawlResponseV0;
|
|
238
|
-
expect(response).not.toBeNull();
|
|
239
|
-
expect(response.jobId).toBeDefined();
|
|
240
|
-
|
|
241
|
-
let statusResponse = await app.checkCrawlStatus(response.jobId);
|
|
242
|
-
const maxChecks = 15;
|
|
243
|
-
let checks = 0;
|
|
244
|
-
|
|
245
|
-
while (statusResponse.status === "active" && checks < maxChecks) {
|
|
246
|
-
await new Promise((resolve) => setTimeout(resolve, 5000));
|
|
247
|
-
expect(statusResponse.partial_data).not.toBeNull();
|
|
248
|
-
// expect(statusResponse.current).toBeGreaterThanOrEqual(1);
|
|
249
|
-
statusResponse = (await app.checkCrawlStatus(
|
|
250
|
-
response.jobId
|
|
251
|
-
)) as CrawlStatusResponseV0;
|
|
252
|
-
checks++;
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
expect(statusResponse).not.toBeNull();
|
|
256
|
-
expect(statusResponse.success).toBe(true);
|
|
257
|
-
expect(statusResponse.status).toBe("completed");
|
|
258
|
-
expect(statusResponse.total).toEqual(statusResponse.current);
|
|
259
|
-
expect(statusResponse.current_step).not.toBeNull();
|
|
260
|
-
expect(statusResponse.current).toBeGreaterThanOrEqual(1);
|
|
261
|
-
|
|
262
|
-
expect(statusResponse?.data?.length).toBeGreaterThan(0);
|
|
263
|
-
},
|
|
264
|
-
35000
|
|
265
|
-
); // 35 seconds timeout
|
|
266
|
-
|
|
267
|
-
test.concurrent(
|
|
268
|
-
"should return successful response for search",
|
|
269
|
-
async () => {
|
|
270
|
-
const app = new FirecrawlApp<"v0">({
|
|
271
|
-
apiKey: TEST_API_KEY,
|
|
272
|
-
apiUrl: API_URL,
|
|
273
|
-
version: "v0",
|
|
274
|
-
});
|
|
275
|
-
const response = (await app.search("test query")) as SearchResponseV0;
|
|
276
|
-
expect(response).not.toBeNull();
|
|
277
|
-
expect(response?.data?.[0]?.content).toBeDefined();
|
|
278
|
-
expect(response?.data?.length).toBeGreaterThan(2);
|
|
279
|
-
},
|
|
280
|
-
30000
|
|
281
|
-
); // 30 seconds timeout
|
|
282
|
-
|
|
283
|
-
test.concurrent(
|
|
284
|
-
"should throw error for invalid API key on search",
|
|
285
|
-
async () => {
|
|
286
|
-
const invalidApp = new FirecrawlApp<"v0">({
|
|
287
|
-
apiKey: "invalid_api_key",
|
|
288
|
-
apiUrl: API_URL,
|
|
289
|
-
version: "v0",
|
|
290
|
-
});
|
|
291
|
-
await expect(invalidApp.search("test query")).rejects.toThrow(
|
|
292
|
-
"Request failed with status code 401"
|
|
293
|
-
);
|
|
294
|
-
}
|
|
295
|
-
);
|
|
296
|
-
|
|
297
|
-
test.concurrent(
|
|
298
|
-
"should perform LLM extraction",
|
|
299
|
-
async () => {
|
|
300
|
-
const app = new FirecrawlApp<"v0">({
|
|
301
|
-
apiKey: TEST_API_KEY,
|
|
302
|
-
apiUrl: API_URL,
|
|
303
|
-
version: "v0",
|
|
304
|
-
});
|
|
305
|
-
const response = (await app.scrapeUrl("https://mendable.ai", {
|
|
306
|
-
extractorOptions: {
|
|
307
|
-
mode: "llm-extraction",
|
|
308
|
-
extractionPrompt:
|
|
309
|
-
"Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
|
|
310
|
-
extractionSchema: {
|
|
311
|
-
type: "object",
|
|
312
|
-
properties: {
|
|
313
|
-
company_mission: { type: "string" },
|
|
314
|
-
supports_sso: { type: "boolean" },
|
|
315
|
-
is_open_source: { type: "boolean" },
|
|
316
|
-
},
|
|
317
|
-
required: ["company_mission", "supports_sso", "is_open_source"],
|
|
318
|
-
},
|
|
319
|
-
},
|
|
320
|
-
})) as ScrapeResponseV0;
|
|
321
|
-
expect(response).not.toBeNull();
|
|
322
|
-
expect(response.data?.llm_extraction).toBeDefined();
|
|
323
|
-
const llmExtraction = response.data?.llm_extraction;
|
|
324
|
-
expect(llmExtraction?.company_mission).toBeDefined();
|
|
325
|
-
expect(typeof llmExtraction?.supports_sso).toBe("boolean");
|
|
326
|
-
expect(typeof llmExtraction?.is_open_source).toBe("boolean");
|
|
327
|
-
},
|
|
328
|
-
30000
|
|
329
|
-
); // 30 seconds timeout
|
|
330
|
-
});
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"success": true,
|
|
3
|
-
"data": {
|
|
4
|
-
"content": "\n\n[Mendable](/)\n\n* Getting started\n* Use Cases\n* [Docs](https://docs.mendable.ai)\n \n* [Pricing](/pricing)\n \n* [Blog](/blog)\n \n\nOpen main menu\n\n[Sign In](/signin)\n[Get Started](/signup)\n\n\n\n[$ npm i @mendable/search](https://docs.mendable.ai)\n\nJust in time answers \nfor Sales and Support\n============================================\n\nTrain a secure AI on your technical resources that answers customer and employee questions so your team doesn't have to\n\nGet Started\n\nTalk to Us\n\nBacked BY\n\nCombinator\n\ninvisible\n\nAssistant\n\nHi, how can I help you?\n\nGenerating\n\nLoading...\n\n\n\nFrom small startups to Fortune 500\n\nTrusted by top companies\n------------------------\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nDeploy a knowledgable technical AI anywhere\n\nUse Mendable for\n----------------\n\n[Docs & Knowledge Base](/usecases/documentation)\n\n-------------------------------------------------\n\nDecrease tickets & activation times with an AI assistant\n\n[Customer Success Enablement](/usecases/cs-enablement)\n\n-------------------------------------------------------\n\nUse a technical AI copilot to increase retention\n\n[Sales Enablement](/usecases/sales-enablement)\n\n-----------------------------------------------\n\nUse a technical AI copilot to build trust with prospects\n\n[Product Copilot](/usecases/productcopilot)\n\n--------------------------------------------\n\nSpeed up adoption with a technical assistant in your app\n\nSee how companies implement Mendable\n------------------------------------\n\n\n\n[Langchain Docs](https://python.langchain.com)\n\n-----------------------------------------------\n\nOne of the most popular frameworks for developing AI applications\n\nhttps://python.langchain.com\n\n\n\n[0x Docs](https://0x.org/docs)\n\n-------------------------------\n\n0x offers the core building blocks to create the most powerful Web3 apps\n\nhttps://0x.org/docs\n\n\n\n[Zenlytics](https://docs.zenlytic.com)\n\n---------------------------------------\n\nSelf-serve analytics tool that helps you answer the deeper questions you have about your data\n\nhttps://docs.zenlytic.com\n\n\n\n[Llama Index](http://gpt-index.readthedocs.io)\n\n-----------------------------------------------\n\nA central interface to connect your LLM’s with external data.\n\nhttp://gpt-index.readthedocs.io\n\n\n\n[Spectrocloud](https://docs.spectrocloud.com/)\n\n-----------------------------------------------\n\nK8s management uniquely built for scale. Manage the lifecycle of any type of cluster.\n\nhttps://docs.spectrocloud.com/\n\n\n\n[Code GPT](https://www.codegpt.co/)\n\n------------------------------------\n\nWith over 450,000 installs, CodeGPT brings AI inside your code editor.\n\nhttps://www.codegpt.co/\n\nAnd many more...\n\nFrom SSO to BYOK\n\nEnterprise-grade security\n-------------------------\n\n#### SOC 2 Type II\n\nMendable is SOC 2 Type II certified. Check out our [AI Trust Center](https://mendable.wolfia.com/?ref=mendable-website)\n for additional information.\n\n#### SSO (SAML, OIDC, OAuth)\n\nSupports SAML 2.0, OpenID Connect, and OAuth 2.0 for single sign-on (SSO) and identity federation.\n\n#### RBAC (Project and chunk level)\n\nRole-based access control to ensure that only the right people have access to the right data.\n\n#### Secure Data Connectors\n\nIntegrate securely to Google Drive, Salesforce, Zendesk and more using OAuth 2.0.\n\n#### BYOK / BYOM\n\nBring your own key or custom model to Mendable to ensure compliance.\n\n#### Rate Limiting\n\nProject and user rate limit protection to prevent abuse and ensure availability.\n\nOver 20+ data connectors\n\nStart by connecting your data\n-----------------------------\n\nMendable offers managed ingestion through a simple online GUI and through our API. You can easily add, modify, or delete different types of sources.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nEasily Teach Your Model\n\nCustomize your model\n--------------------\n\nCustomize base model properties\n\nGPT-3.5-Turbo and GPT-4 are supported with a variety of base models coming soon\n\nTraining through answer correction\n\nCorrect the answers generated by the model and it will instantly learn from your feedback\n\nCustom prompt edits\n\nEdit the prompt to prevent hallucinations, maintain voice and format requirements\n\nKeep your data always updated\n\nMendable reingestion process offers CRON jobs and webhooks to keep your data synced and always up to date\n\nSupport Link\n\nHave customers redirected to your customer support link when the bot can't answer their questions\n\nPrivacy-first features\n\nMendable provides custom private, open source LLMs depending on your needs\n\n### Make it perfect for your use case\n\nWe know every uses case is slightly different so the Mendable platform allows you to customize your model to fit your company's needs through multiple features.\n\n* Support for multiple base LLM models (including privacy first models)\n* Training through answer correction\n* Custom prompt edits\n* Model creativity control\n\nTeach Model\n\nContinuous Training\n-------------------\n\nCoach the model by correcting the wrong responses, keeping your chat applications always up to date\n\nMore than just a chatbot\n\nTools and Actions\n-----------------\n\nGive your AI access to tools for augmentation and actions for automation. Integrate with any API\n\n\n\n\n\nReact, Vanilla JS, API\n\nChoose your component\n---------------------\n\nMendable provides a variety of components ranging from search bars, to chat bubbles, to full CLIs built on our API. Customize them or easily build your own\n\n\n\n \n import { MendableSearchBar } from '@mendable/search'\n <MendableSearchBar anon_key={MENDABLE_ANON_KEY} />\n \n\nFrom zero to production in minutes\n\nDeploy anywhere\n---------------\n\nDeploy Mendable internally, externally, or both. Our API allows you to send and query data from anywhere.\n\nView Documentation\n\n\n\nMendables integration on Nylas is a goldmine of data. Now, the product team has a direct source of user feedback, questions, and problems. It's amazing!\n\nSaif Khan \\- Product @ NylasKarl Cardenas \\- Director @ SpectroCloudGuillermo Rauch \\- CEO @ Vercel\n\nAI Chat Infrastructure built for production\n\nEnterprise ready out of the box\n-------------------------------\n\nVerified Sources\n----------------\n\nReduce hallucinations by grounding answers with sources from your documentation\n\nEnterprise Grade Security\n-------------------------\n\nOur platform is built for enterprises in mind. We provide RBAC, bring your own model, and SLAs\n\nReady for the whole team\n------------------------\n\nMendable supports single-sign-on so your entire team can train, manage your custom AI\n\nExplore your dashboard\n\nGet insights from usage\n-----------------------\n\nUsage\n\nNumber of chat messages per month\n\n### Understand all interactions\n\nUnravel your users' queries, track their interactions, customize responses, and monitor your product usage effortlessly.\n\n* \\-Gain key insights into user queries\n* \\-Monitor real-time product-user interactions\n* \\-Fine-tune your model for optimized responses\n* \\-Track and evaluate Mendable usage\n\n### Insights beyond conversations\n\nLearn what your users are asking, how they are asking, and their satisfaction level with the answers. Teach the model based on the answers rating and improve the model's performance.\n\nOur wall of love\n\nDon't take our word for it\n--------------------------\n\nEmpower your users with AI powered search\n\nBuild an AI technical assistant in minutes\n------------------------------------------\n\nTry it out\n\nFrequently asked questions\n--------------------------\n\nIf you have anything else you want to ask,[reach out to us](mailto:hello@mendable.ai)\n.\n\n* * ### Is it free?\n \n We have a free plan that gives you 500 message credits. It is also free for certain Open source projects. Contact us to see if your project is eligible.\n \n * ### Do you train your AI model with my code?\n \n Currently, Mendable does not look at any of your repository's code. However, in the future we may add it. We will always give you the option to opt out of sharing your data.\n \n* * ### How do I remove the Powered by Mendable?\n \n To remove the Powered by Mendable, you need to upgrade to an enterprise or custom plan. Contact us at [garrett@mendable.ai](mailto:garrett@mendable.ai)\n and we can help you out.\n \n * ### How do I get an anon key?\n \n To get your anon key you need to sign up at [mendable.ai](https://mendable.ai)\n and create a project. Then you can find your anon key in the API Keys section of the dashboard. Anon keys are used for client-side while API keys are used for server-side.\n \n* * ### Which model does Mendable use?\n \n Mendable offers gpt-3.5-turbo, gpt-4, claude-2 and more. If you'd like a custom model, contact us and we can help you out.\n \n * ### Is GPT-4 pricing different?\n \n Yes, right now GPT-4 will cost 3 requests per message instead of 1 (gpt-3.5-turbo). That means that instead of 500 messages, you will get around 166 messages if you only use GPT-4.\n \n* * ### Can you correct the AI response?\n \n Yes, Mendable offers a 'teach the model' functionality where you can correct the AI response and it will learn from it.\n \n * ### How can I integrate Mendable with my application?\n \n Probably! Check out the Mendable documentation here [https://docs.mendable.ai](https://docs.mendable.ai)\n to better understand how you can start integrating.\n \n* * ### Is it 100% accurate?\n \n Like Humans, AI will never be 100% accurate. So we can't assure you that every solution will be correct.\n \n * ### How do I cancel my subscription?\n \n Simply log into our platform, go to your account and click on \"Open customer portal\" button. There you will be able to cancel/modify it through Stripe.\n \n* * ### How does Mendable work?\n \n Our application syncs with your documentation and support channels, then uses your docs and previously answered questions to suggest possible answers.\n \n * ### Are you open-source?\n \n Currently not - although we have some open source components and integrations. If you have input here, please message us at.[hello@mendable.ai](mailto:hello@mendable.ai)\n .\n \n* * ### How does Mendable price custom plans?\n \n #### 1\\. Use case\n \n * Mendable differentiates between internal and external use cases.\n * With Mendable, we give you the ability to use our chat bots for a variety of use cases, both for internal efficiency and external communication to your customers.\n \n #### 2\\. Total usage\n \n * For specifically external use cases, you will only pay for the value you're receiving.\n * Mendable will look at the total number of messages sent during a month.\n \n #### 3\\. Custom work\n \n * If there are any special feature requests (custom data connectors, etc.), we are happy to discuss these requirements!\n \n\nWe use tracking cookies to understand how you use the product and help us improve it! \nPlease accept cookies to help us improve.\n\nAccept CookiesDecline Cookies\n\n[Mendable](#_)\n\n[Instagram](https://instagram.com/sideguide.dev)\n[Twitter](https://twitter.com/mendableai)\n[GitHub](https://github.com/sideguide)\n[Discord](https://discord.com/invite/kJufGDb7AA)\n\n\n\nDocumentation\n\n* [Getting Started](/signup)\n \n\n* [API Docs](https://docs.mendable.ai)\n \n\n* [Integrations](https://docs.mendable.ai/integrations/slack)\n \n\n* [Examples](https://docs.mendable.ai/examples)\n \n\n* [Tools & Actions](https://docs.mendable.ai/tools)\n \n\nUse Cases\n\n* [Sales Enablement](/usecases/sales-enablement)\n \n\n* [Knowledge Base](/usecases/documentation)\n \n\n* [CS Enablement](/usecases/cs-enablement)\n \n\n* [Product Copilot](/usecases/productcopilot)\n \n\nResources\n\n* [Pricing](/pricing)\n \n\n* [Changelog](https://docs.mendable.ai/changelog)\n \n\n* [Security](/security)\n \n\n* [AI Trust Center](https://mendable.wolfia.com/?ref=mendable-footer)\n \n\nCompany\n\n* [Blog](/blog)\n \n\n* [Contact](mailto:garrett@mendable.ai)\n \n\n© 2024 SideGuide - SideGuide Technologies Inc.\n\n[System Status](https://mendable.betteruptime.com)\n\n[Status](https://mendable.betteruptime.com)\n[Privacy Policy](/privacy-policy)\n[Privacy](/privacy-policy)\n[Terms](/terms-of-conditions)",
|
|
5
|
-
"markdown": "\n\n[.png)Mendable](/)\n\n* Getting started\n* Use Cases\n* [Docs](https://docs.mendable.ai)\n \n* [Pricing](/pricing)\n \n* [Blog](/blog)\n \n\nOpen main menu\n\n[Sign In](/signin)\n[Get Started](/signup)\n\n\n\n[$ npm i @mendable/search](https://docs.mendable.ai)\n\nJust in time answers \nfor Sales and Support\n============================================\n\nTrain a secure AI on your technical resources that answers customer and employee questions so your team doesn't have to\n\nGet Started\n\nTalk to Us\n\nBacked BY\n\nCombinator\n\ninvisible\n\nAssistant\n\nHi, how can I help you?\n\nGenerating\n\nLoading...\n\n\n\nFrom small startups to Fortune 500\n\nTrusted by top companies\n------------------------\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nDeploy a knowledgable technical AI anywhere\n\nUse Mendable for\n----------------\n\n[Docs & Knowledge Base](/usecases/documentation)\n\n-------------------------------------------------\n\nDecrease tickets & activation times with an AI assistant\n\n[Customer Success Enablement](/usecases/cs-enablement)\n\n-------------------------------------------------------\n\nUse a technical AI copilot to increase retention\n\n[Sales Enablement](/usecases/sales-enablement)\n\n-----------------------------------------------\n\nUse a technical AI copilot to build trust with prospects\n\n[Product Copilot](/usecases/productcopilot)\n\n--------------------------------------------\n\nSpeed up adoption with a technical assistant in your app\n\nSee how companies implement Mendable\n------------------------------------\n\n\n\n[Langchain Docs](https://python.langchain.com)\n\n-----------------------------------------------\n\nOne of the most popular frameworks for developing AI applications\n\nhttps://python.langchain.com\n\n\n\n[0x Docs](https://0x.org/docs)\n\n-------------------------------\n\n0x offers the core building blocks to create the most powerful Web3 apps\n\nhttps://0x.org/docs\n\n\n\n[Zenlytics](https://docs.zenlytic.com)\n\n---------------------------------------\n\nSelf-serve analytics tool that helps you answer the deeper questions you have about your data\n\nhttps://docs.zenlytic.com\n\n\n\n[Llama Index](http://gpt-index.readthedocs.io)\n\n-----------------------------------------------\n\nA central interface to connect your LLM’s with external data.\n\nhttp://gpt-index.readthedocs.io\n\n\n\n[Spectrocloud](https://docs.spectrocloud.com/)\n\n-----------------------------------------------\n\nK8s management uniquely built for scale. Manage the lifecycle of any type of cluster.\n\nhttps://docs.spectrocloud.com/\n\n\n\n[Code GPT](https://www.codegpt.co/)\n\n------------------------------------\n\nWith over 450,000 installs, CodeGPT brings AI inside your code editor.\n\nhttps://www.codegpt.co/\n\nAnd many more...\n\nFrom SSO to BYOK\n\nEnterprise-grade security\n-------------------------\n\n#### SOC 2 Type II\n\nMendable is SOC 2 Type II certified. Check out our [AI Trust Center](https://mendable.wolfia.com/?ref=mendable-website)\n for additional information.\n\n#### SSO (SAML, OIDC, OAuth)\n\nSupports SAML 2.0, OpenID Connect, and OAuth 2.0 for single sign-on (SSO) and identity federation.\n\n#### RBAC (Project and chunk level)\n\nRole-based access control to ensure that only the right people have access to the right data.\n\n#### Secure Data Connectors\n\nIntegrate securely to Google Drive, Salesforce, Zendesk and more using OAuth 2.0.\n\n#### BYOK / BYOM\n\nBring your own key or custom model to Mendable to ensure compliance.\n\n#### Rate Limiting\n\nProject and user rate limit protection to prevent abuse and ensure availability.\n\nOver 20+ data connectors\n\nStart by connecting your data\n-----------------------------\n\nMendable offers managed ingestion through a simple online GUI and through our API. You can easily add, modify, or delete different types of sources.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nEasily Teach Your Model\n\nCustomize your model\n--------------------\n\nCustomize base model properties\n\nGPT-3.5-Turbo and GPT-4 are supported with a variety of base models coming soon\n\nTraining through answer correction\n\nCorrect the answers generated by the model and it will instantly learn from your feedback\n\nCustom prompt edits\n\nEdit the prompt to prevent hallucinations, maintain voice and format requirements\n\nKeep your data always updated\n\nMendable reingestion process offers CRON jobs and webhooks to keep your data synced and always up to date\n\nSupport Link\n\nHave customers redirected to your customer support link when the bot can't answer their questions\n\nPrivacy-first features\n\nMendable provides custom private, open source LLMs depending on your needs\n\n### Make it perfect for your use case\n\nWe know every uses case is slightly different so the Mendable platform allows you to customize your model to fit your company's needs through multiple features.\n\n* Support for multiple base LLM models (including privacy first models)\n* Training through answer correction\n* Custom prompt edits\n* Model creativity control\n\nTeach Model\n\nContinuous Training\n-------------------\n\nCoach the model by correcting the wrong responses, keeping your chat applications always up to date\n\nMore than just a chatbot\n\nTools and Actions\n-----------------\n\nGive your AI access to tools for augmentation and actions for automation. Integrate with any API\n\n\n\n\n\nReact, Vanilla JS, API\n\nChoose your component\n---------------------\n\nMendable provides a variety of components ranging from search bars, to chat bubbles, to full CLIs built on our API. Customize them or easily build your own\n\n.png)\n\n \n import { MendableSearchBar } from '@mendable/search'\n <MendableSearchBar anon_key={MENDABLE_ANON_KEY} />\n \n\nFrom zero to production in minutes\n\nDeploy anywhere\n---------------\n\nDeploy Mendable internally, externally, or both. Our API allows you to send and query data from anywhere.\n\nView Documentation\n\n\n\nMendables integration on Nylas is a goldmine of data. Now, the product team has a direct source of user feedback, questions, and problems. It's amazing!\n\nSaif Khan \\- Product @ NylasKarl Cardenas \\- Director @ SpectroCloudGuillermo Rauch \\- CEO @ Vercel\n\nAI Chat Infrastructure built for production\n\nEnterprise ready out of the box\n-------------------------------\n\nVerified Sources\n----------------\n\nReduce hallucinations by grounding answers with sources from your documentation\n\nEnterprise Grade Security\n-------------------------\n\nOur platform is built for enterprises in mind. We provide RBAC, bring your own model, and SLAs\n\nReady for the whole team\n------------------------\n\nMendable supports single-sign-on so your entire team can train, manage your custom AI\n\nExplore your dashboard\n\nGet insights from usage\n-----------------------\n\nUsage\n\nNumber of chat messages per month\n\n### Understand all interactions\n\nUnravel your users' queries, track their interactions, customize responses, and monitor your product usage effortlessly.\n\n* \\-Gain key insights into user queries\n* \\-Monitor real-time product-user interactions\n* \\-Fine-tune your model for optimized responses\n* \\-Track and evaluate Mendable usage\n\n### Insights beyond conversations\n\nLearn what your users are asking, how they are asking, and their satisfaction level with the answers. Teach the model based on the answers rating and improve the model's performance.\n\nOur wall of love\n\nDon't take our word for it\n--------------------------\n\nEmpower your users with AI powered search\n\nBuild an AI technical assistant in minutes\n------------------------------------------\n\nTry it out\n\nFrequently asked questions\n--------------------------\n\nIf you have anything else you want to ask,[reach out to us](mailto:hello@mendable.ai)\n.\n\n* * ### Is it free?\n \n We have a free plan that gives you 500 message credits. It is also free for certain Open source projects. Contact us to see if your project is eligible.\n \n * ### Do you train your AI model with my code?\n \n Currently, Mendable does not look at any of your repository's code. However, in the future we may add it. We will always give you the option to opt out of sharing your data.\n \n* * ### How do I remove the Powered by Mendable?\n \n To remove the Powered by Mendable, you need to upgrade to an enterprise or custom plan. Contact us at [garrett@mendable.ai](mailto:garrett@mendable.ai)\n and we can help you out.\n \n * ### How do I get an anon key?\n \n To get your anon key you need to sign up at [mendable.ai](https://mendable.ai)\n and create a project. Then you can find your anon key in the API Keys section of the dashboard. Anon keys are used for client-side while API keys are used for server-side.\n \n* * ### Which model does Mendable use?\n \n Mendable offers gpt-3.5-turbo, gpt-4, claude-2 and more. If you'd like a custom model, contact us and we can help you out.\n \n * ### Is GPT-4 pricing different?\n \n Yes, right now GPT-4 will cost 3 requests per message instead of 1 (gpt-3.5-turbo). That means that instead of 500 messages, you will get around 166 messages if you only use GPT-4.\n \n* * ### Can you correct the AI response?\n \n Yes, Mendable offers a 'teach the model' functionality where you can correct the AI response and it will learn from it.\n \n * ### How can I integrate Mendable with my application?\n \n Probably! Check out the Mendable documentation here [https://docs.mendable.ai](https://docs.mendable.ai)\n to better understand how you can start integrating.\n \n* * ### Is it 100% accurate?\n \n Like Humans, AI will never be 100% accurate. So we can't assure you that every solution will be correct.\n \n * ### How do I cancel my subscription?\n \n Simply log into our platform, go to your account and click on \"Open customer portal\" button. There you will be able to cancel/modify it through Stripe.\n \n* * ### How does Mendable work?\n \n Our application syncs with your documentation and support channels, then uses your docs and previously answered questions to suggest possible answers.\n \n * ### Are you open-source?\n \n Currently not - although we have some open source components and integrations. If you have input here, please message us at.[hello@mendable.ai](mailto:hello@mendable.ai)\n .\n \n* * ### How does Mendable price custom plans?\n \n #### 1\\. Use case\n \n * Mendable differentiates between internal and external use cases.\n * With Mendable, we give you the ability to use our chat bots for a variety of use cases, both for internal efficiency and external communication to your customers.\n \n #### 2\\. Total usage\n \n * For specifically external use cases, you will only pay for the value you're receiving.\n * Mendable will look at the total number of messages sent during a month.\n \n #### 3\\. Custom work\n \n * If there are any special feature requests (custom data connectors, etc.), we are happy to discuss these requirements!\n \n\nWe use tracking cookies to understand how you use the product and help us improve it! \nPlease accept cookies to help us improve.\n\nAccept CookiesDecline Cookies\n\n.png)[Mendable](#_)\n\n[Instagram](https://instagram.com/sideguide.dev)\n[Twitter](https://twitter.com/mendableai)\n[GitHub](https://github.com/sideguide)\n[Discord](https://discord.com/invite/kJufGDb7AA)\n\n\n\nDocumentation\n\n* [Getting Started](/signup)\n \n\n* [API Docs](https://docs.mendable.ai)\n \n\n* [Integrations](https://docs.mendable.ai/integrations/slack)\n \n\n* [Examples](https://docs.mendable.ai/examples)\n \n\n* [Tools & Actions](https://docs.mendable.ai/tools)\n \n\nUse Cases\n\n* [Sales Enablement](/usecases/sales-enablement)\n \n\n* [Knowledge Base](/usecases/documentation)\n \n\n* [CS Enablement](/usecases/cs-enablement)\n \n\n* [Product Copilot](/usecases/productcopilot)\n \n\nResources\n\n* [Pricing](/pricing)\n \n\n* [Changelog](https://docs.mendable.ai/changelog)\n \n\n* [Security](/security)\n \n\n* [AI Trust Center](https://mendable.wolfia.com/?ref=mendable-footer)\n \n\nCompany\n\n* [Blog](/blog)\n \n\n* [Contact](mailto:garrett@mendable.ai)\n \n\n© 2024 SideGuide - SideGuide Technologies Inc.\n\n[System Status](https://mendable.betteruptime.com)\n\n[Status](https://mendable.betteruptime.com)\n[Privacy Policy](/privacy-policy)\n[Privacy](/privacy-policy)\n[Terms](/terms-of-conditions)",
|
|
6
|
-
"metadata": {
|
|
7
|
-
"title": "Mendable",
|
|
8
|
-
"description": "Mendable allows you to easily build AI chat applications. Ingest, customize, then deploy with one line of code anywhere you want. Brought to you by SideGuide",
|
|
9
|
-
"robots": "follow, index",
|
|
10
|
-
"ogTitle": "Mendable",
|
|
11
|
-
"ogDescription": "Mendable allows you to easily build AI chat applications. Ingest, customize, then deploy with one line of code anywhere you want. Brought to you by SideGuide",
|
|
12
|
-
"ogUrl": "https://mendable.ai/",
|
|
13
|
-
"ogImage": "https://mendable.ai/mendable_new_og1.png",
|
|
14
|
-
"ogLocaleAlternate": [],
|
|
15
|
-
"ogSiteName": "Mendable",
|
|
16
|
-
"sourceURL": "https://mendable.ai",
|
|
17
|
-
"sitemap": {
|
|
18
|
-
"changefreq": "hourly"
|
|
19
|
-
}
|
|
20
|
-
}
|
|
21
|
-
}
|
|
22
|
-
}
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
import { describe, expect, jest, test } from '@jest/globals';
|
|
2
|
-
|
|
3
|
-
import FirecrawlApp from '../index';
|
|
4
|
-
import axios from 'axios';
|
|
5
|
-
import { join } from 'path';
|
|
6
|
-
import { readFile } from 'fs/promises';
|
|
7
|
-
|
|
8
|
-
// Mock jest and set the type
|
|
9
|
-
jest.mock('axios');
|
|
10
|
-
const mockedAxios = axios as jest.Mocked<typeof axios>;
|
|
11
|
-
|
|
12
|
-
// Get the fixure data from the JSON file in ./fixtures
|
|
13
|
-
async function loadFixture(name: string): Promise<string> {
|
|
14
|
-
return await readFile(join(__dirname, 'fixtures', `${name}.json`), 'utf-8')
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev";
|
|
18
|
-
|
|
19
|
-
describe('the firecrawl JS SDK', () => {
|
|
20
|
-
|
|
21
|
-
test('Should require an API key only for cloud service', async () => {
|
|
22
|
-
if (API_URL.includes('api.firecrawl.dev')) {
|
|
23
|
-
// Should throw for cloud service
|
|
24
|
-
expect(() => {
|
|
25
|
-
new FirecrawlApp({ apiKey: undefined, apiUrl: API_URL });
|
|
26
|
-
}).toThrow('No API key provided');
|
|
27
|
-
} else {
|
|
28
|
-
// Should not throw for self-hosted
|
|
29
|
-
expect(() => {
|
|
30
|
-
new FirecrawlApp({ apiKey: undefined, apiUrl: API_URL });
|
|
31
|
-
}).not.toThrow();
|
|
32
|
-
}
|
|
33
|
-
});
|
|
34
|
-
|
|
35
|
-
test('Should return scraped data from a /scrape API call', async () => {
|
|
36
|
-
const mockData = await loadFixture('scrape');
|
|
37
|
-
mockedAxios.post.mockResolvedValue({
|
|
38
|
-
status: 200,
|
|
39
|
-
data: JSON.parse(mockData),
|
|
40
|
-
});
|
|
41
|
-
|
|
42
|
-
const apiKey = 'YOUR_API_KEY'
|
|
43
|
-
const app = new FirecrawlApp<"v0">({ apiKey });
|
|
44
|
-
// Scrape a single URL
|
|
45
|
-
const url = 'https://mendable.ai';
|
|
46
|
-
const scrapedData = await app.scrapeUrl(url);
|
|
47
|
-
|
|
48
|
-
expect(mockedAxios.post).toHaveBeenCalledTimes(1);
|
|
49
|
-
expect(mockedAxios.post).toHaveBeenCalledWith(
|
|
50
|
-
expect.stringMatching(/^https:\/\/api.firecrawl.dev/),
|
|
51
|
-
expect.objectContaining({ url }),
|
|
52
|
-
expect.objectContaining({ headers: expect.objectContaining({'Authorization': `Bearer ${apiKey}`}) }),
|
|
53
|
-
)
|
|
54
|
-
expect(scrapedData.success).toBe(true);
|
|
55
|
-
expect(scrapedData?.data?.metadata.title).toEqual('Mendable');
|
|
56
|
-
});
|
|
57
|
-
})
|