firecrawl 1.29.3 → 3.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +4 -2
- package/README.md +85 -78
- package/audit-ci.jsonc +4 -0
- package/dist/chunk-OIZ6OKY4.js +85 -0
- package/dist/index.cjs +961 -35
- package/dist/index.d.cts +524 -11
- package/dist/index.d.ts +524 -11
- package/dist/index.js +953 -27
- package/dist/package-V5IPFKBE.js +4 -0
- package/package.json +6 -6
- package/src/__tests__/e2e/v2/batch.test.ts +74 -0
- package/src/__tests__/e2e/v2/crawl.test.ts +182 -0
- package/src/__tests__/e2e/v2/extract.test.ts +70 -0
- package/src/__tests__/e2e/v2/map.test.ts +55 -0
- package/src/__tests__/e2e/v2/scrape.test.ts +130 -0
- package/src/__tests__/e2e/v2/search.test.ts +247 -0
- package/src/__tests__/e2e/v2/usage.test.ts +36 -0
- package/src/__tests__/e2e/v2/utils/idmux.ts +58 -0
- package/src/__tests__/e2e/v2/watcher.test.ts +96 -0
- package/src/__tests__/unit/v2/errorHandler.test.ts +19 -0
- package/src/__tests__/unit/v2/scrape.unit.test.ts +11 -0
- package/src/__tests__/unit/v2/validation.test.ts +59 -0
- package/src/index.backup.ts +2146 -0
- package/src/index.ts +27 -2134
- package/src/v1/index.ts +2158 -0
- package/src/v2/client.ts +283 -0
- package/src/v2/methods/batch.ts +119 -0
- package/src/v2/methods/crawl.ts +144 -0
- package/src/v2/methods/extract.ts +86 -0
- package/src/v2/methods/map.ts +37 -0
- package/src/v2/methods/scrape.ts +26 -0
- package/src/v2/methods/search.ts +69 -0
- package/src/v2/methods/usage.ts +39 -0
- package/src/v2/types.ts +337 -0
- package/src/v2/utils/errorHandler.ts +18 -0
- package/src/v2/utils/getVersion.ts +14 -0
- package/src/v2/utils/httpClient.ts +99 -0
- package/src/v2/utils/validation.ts +50 -0
- package/src/v2/watcher.ts +159 -0
- package/tsconfig.json +2 -1
- package/dist/package-Z6F7JDXI.js +0 -111
- /package/src/__tests__/{v1/e2e_withAuth → e2e/v1}/index.test.ts +0 -0
- /package/src/__tests__/{v1/unit → unit/v1}/monitor-job-status-retry.test.ts +0 -0
package/.env.example
CHANGED
package/README.md
CHANGED
|
@@ -18,171 +18,178 @@ npm install @mendable/firecrawl-js
|
|
|
18
18
|
Here's an example of how to use the SDK with error handling:
|
|
19
19
|
|
|
20
20
|
```js
|
|
21
|
-
import
|
|
21
|
+
import Firecrawl from '@mendable/firecrawl-js';
|
|
22
22
|
|
|
23
|
-
const app = new
|
|
23
|
+
const app = new Firecrawl({ apiKey: 'fc-YOUR_API_KEY' });
|
|
24
24
|
|
|
25
25
|
// Scrape a website
|
|
26
|
-
const scrapeResponse = await app.
|
|
26
|
+
const scrapeResponse = await app.scrape('https://firecrawl.dev', {
|
|
27
27
|
formats: ['markdown', 'html'],
|
|
28
28
|
});
|
|
29
|
+
console.log(scrapeResponse);
|
|
29
30
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
// Crawl a website
|
|
35
|
-
const crawlResponse = await app.crawlUrl('https://firecrawl.dev', {
|
|
31
|
+
// Crawl a website (waiter)
|
|
32
|
+
const crawlResponse = await app.crawl('https://firecrawl.dev', {
|
|
36
33
|
limit: 100,
|
|
37
|
-
scrapeOptions: {
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
console.log(crawlResponse)
|
|
34
|
+
scrapeOptions: { formats: ['markdown', 'html'] },
|
|
35
|
+
pollInterval: 2,
|
|
36
|
+
});
|
|
37
|
+
console.log(crawlResponse);
|
|
43
38
|
```
|
|
44
39
|
|
|
45
40
|
### Scraping a URL
|
|
46
41
|
|
|
47
|
-
To scrape a single URL with error handling, use the `
|
|
42
|
+
To scrape a single URL with error handling, use the `scrape` method. It takes the URL as a parameter and returns the scraped data.
|
|
48
43
|
|
|
49
44
|
```js
|
|
50
|
-
const url =
|
|
51
|
-
const scrapedData = await app.
|
|
45
|
+
const url = 'https://example.com';
|
|
46
|
+
const scrapedData = await app.scrape(url);
|
|
52
47
|
```
|
|
53
48
|
|
|
54
49
|
### Crawling a Website
|
|
55
50
|
|
|
56
|
-
To crawl a website with error handling, use the `
|
|
51
|
+
To crawl a website with error handling, use the `crawl` method. It takes the starting URL and optional parameters, including limits and per‑page `scrapeOptions`.
|
|
57
52
|
|
|
58
53
|
```js
|
|
59
|
-
const crawlResponse = await app.
|
|
54
|
+
const crawlResponse = await app.crawl('https://firecrawl.dev', {
|
|
60
55
|
limit: 100,
|
|
61
|
-
scrapeOptions: {
|
|
62
|
-
|
|
63
|
-
}
|
|
64
|
-
})
|
|
56
|
+
scrapeOptions: { formats: ['markdown', 'html'] },
|
|
57
|
+
});
|
|
65
58
|
```
|
|
66
59
|
|
|
67
60
|
|
|
68
61
|
### Asynchronous Crawl
|
|
69
62
|
|
|
70
|
-
To
|
|
63
|
+
To start an asynchronous crawl, use `startCrawl`. It returns a job ID you can poll with `getCrawlStatus`.
|
|
71
64
|
|
|
72
65
|
```js
|
|
73
|
-
const
|
|
66
|
+
const start = await app.startCrawl('https://mendable.ai', {
|
|
67
|
+
excludePaths: ['blog/*'],
|
|
68
|
+
limit: 5,
|
|
69
|
+
});
|
|
74
70
|
```
|
|
75
71
|
|
|
76
72
|
### Checking Crawl Status
|
|
77
73
|
|
|
78
|
-
To check the status of a crawl job with error handling, use the `
|
|
74
|
+
To check the status of a crawl job with error handling, use the `getCrawlStatus` method. It takes the job ID as a parameter and returns the current status.
|
|
79
75
|
|
|
80
76
|
```js
|
|
81
|
-
const status = await app.
|
|
77
|
+
const status = await app.getCrawlStatus(id);
|
|
82
78
|
```
|
|
83
79
|
|
|
84
|
-
### Extracting structured data from
|
|
80
|
+
### Extracting structured data from URLs
|
|
85
81
|
|
|
86
|
-
|
|
82
|
+
Use `extract` with a prompt and schema. Zod schemas are supported directly.
|
|
87
83
|
|
|
88
84
|
```js
|
|
89
|
-
import
|
|
90
|
-
import { z } from
|
|
85
|
+
import Firecrawl from '@mendable/firecrawl-js';
|
|
86
|
+
import { z } from 'zod';
|
|
91
87
|
|
|
92
|
-
const app = new
|
|
93
|
-
apiKey: "fc-YOUR_API_KEY",
|
|
94
|
-
});
|
|
88
|
+
const app = new Firecrawl({ apiKey: 'fc-YOUR_API_KEY' });
|
|
95
89
|
|
|
96
|
-
// Define schema to extract contents into
|
|
97
90
|
const schema = z.object({
|
|
98
|
-
|
|
99
|
-
.array(
|
|
100
|
-
z.object({
|
|
101
|
-
title: z.string(),
|
|
102
|
-
points: z.number(),
|
|
103
|
-
by: z.string(),
|
|
104
|
-
commentsURL: z.string(),
|
|
105
|
-
})
|
|
106
|
-
)
|
|
107
|
-
.length(5)
|
|
108
|
-
.describe("Top 5 stories on Hacker News"),
|
|
91
|
+
title: z.string(),
|
|
109
92
|
});
|
|
110
93
|
|
|
111
|
-
const
|
|
112
|
-
|
|
94
|
+
const result = await app.extract({
|
|
95
|
+
urls: ['https://firecrawl.dev'],
|
|
96
|
+
prompt: 'Extract the page title',
|
|
97
|
+
schema,
|
|
98
|
+
showSources: true,
|
|
113
99
|
});
|
|
114
100
|
|
|
115
|
-
console.log(
|
|
101
|
+
console.log(result.data);
|
|
116
102
|
```
|
|
117
103
|
|
|
118
104
|
### Map a Website
|
|
119
105
|
|
|
120
|
-
Use `
|
|
106
|
+
Use `map` to generate a list of URLs from a website. Options let you customize the mapping process, including whether to utilize the sitemap or include subdomains.
|
|
121
107
|
|
|
122
108
|
```js
|
|
123
|
-
const mapResult = await app.
|
|
124
|
-
console.log(mapResult)
|
|
109
|
+
const mapResult = await app.map('https://example.com');
|
|
110
|
+
console.log(mapResult);
|
|
125
111
|
```
|
|
126
112
|
|
|
127
|
-
### Crawl a website with
|
|
113
|
+
### Crawl a website with real‑time updates
|
|
128
114
|
|
|
129
|
-
To
|
|
115
|
+
To receive real‑time updates, start a crawl and attach a watcher.
|
|
130
116
|
|
|
131
117
|
```js
|
|
132
|
-
|
|
133
|
-
const watch =
|
|
118
|
+
const start = await app.startCrawl('https://mendable.ai', { excludePaths: ['blog/*'], limit: 5 });
|
|
119
|
+
const watch = app.watcher(start.id, { kind: 'crawl', pollInterval: 2 });
|
|
134
120
|
|
|
135
|
-
watch.
|
|
136
|
-
|
|
121
|
+
watch.on('document', (doc) => {
|
|
122
|
+
console.log('DOC', doc);
|
|
137
123
|
});
|
|
138
124
|
|
|
139
|
-
watch.
|
|
140
|
-
|
|
125
|
+
watch.on('error', (err) => {
|
|
126
|
+
console.error('ERR', err);
|
|
141
127
|
});
|
|
142
128
|
|
|
143
|
-
watch.
|
|
144
|
-
|
|
129
|
+
watch.on('done', (state) => {
|
|
130
|
+
console.log('DONE', state.status);
|
|
145
131
|
});
|
|
132
|
+
|
|
133
|
+
await watch.start();
|
|
146
134
|
```
|
|
147
135
|
|
|
148
136
|
### Batch scraping multiple URLs
|
|
149
137
|
|
|
150
|
-
To batch scrape multiple URLs with error handling, use the `
|
|
138
|
+
To batch scrape multiple URLs with error handling, use the `batchScrape` method.
|
|
151
139
|
|
|
152
140
|
```js
|
|
153
|
-
const batchScrapeResponse = await app.
|
|
141
|
+
const batchScrapeResponse = await app.batchScrape(['https://firecrawl.dev', 'https://mendable.ai'], {
|
|
154
142
|
formats: ['markdown', 'html'],
|
|
155
|
-
})
|
|
143
|
+
});
|
|
156
144
|
```
|
|
157
145
|
|
|
158
146
|
|
|
159
147
|
#### Asynchronous batch scrape
|
|
160
148
|
|
|
161
|
-
To
|
|
149
|
+
To start an asynchronous batch scrape, use `startBatchScrape` and poll with `getBatchScrapeStatus`.
|
|
162
150
|
|
|
163
151
|
```js
|
|
164
|
-
const asyncBatchScrapeResult = await app.
|
|
152
|
+
const asyncBatchScrapeResult = await app.startBatchScrape(['https://firecrawl.dev', 'https://mendable.ai'], {
|
|
153
|
+
formats: ['markdown', 'html'],
|
|
154
|
+
});
|
|
165
155
|
```
|
|
166
156
|
|
|
167
|
-
#### Batch scrape with
|
|
157
|
+
#### Batch scrape with real‑time updates
|
|
168
158
|
|
|
169
|
-
To use batch scrape with
|
|
159
|
+
To use batch scrape with real‑time updates, start the job and watch it using the watcher.
|
|
170
160
|
|
|
171
161
|
```js
|
|
172
|
-
|
|
173
|
-
const watch =
|
|
162
|
+
const start = await app.startBatchScrape(['https://firecrawl.dev', 'https://mendable.ai'], { formats: ['markdown', 'html'] });
|
|
163
|
+
const watch = app.watcher(start.id, { kind: 'batch', pollInterval: 2 });
|
|
174
164
|
|
|
175
|
-
watch.
|
|
176
|
-
|
|
165
|
+
watch.on('document', (doc) => {
|
|
166
|
+
console.log('DOC', doc);
|
|
177
167
|
});
|
|
178
168
|
|
|
179
|
-
watch.
|
|
180
|
-
|
|
169
|
+
watch.on('error', (err) => {
|
|
170
|
+
console.error('ERR', err);
|
|
181
171
|
});
|
|
182
172
|
|
|
183
|
-
watch.
|
|
184
|
-
|
|
173
|
+
watch.on('done', (state) => {
|
|
174
|
+
console.log('DONE', state.status);
|
|
185
175
|
});
|
|
176
|
+
|
|
177
|
+
await watch.start();
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
## v1 compatibility
|
|
181
|
+
|
|
182
|
+
The feature‑frozen v1 is still available under `app.v1` with the original method names.
|
|
183
|
+
|
|
184
|
+
```js
|
|
185
|
+
import Firecrawl from '@mendable/firecrawl-js';
|
|
186
|
+
|
|
187
|
+
const app = new Firecrawl({ apiKey: 'fc-YOUR_API_KEY' });
|
|
188
|
+
|
|
189
|
+
// v1 methods (feature‑frozen)
|
|
190
|
+
const scrapeV1 = await app.v1.scrapeUrl('https://firecrawl.dev', { formats: ['markdown', 'html'] });
|
|
191
|
+
const crawlV1 = await app.v1.crawlUrl('https://firecrawl.dev', { limit: 100 });
|
|
192
|
+
const mapV1 = await app.v1.mapUrl('https://firecrawl.dev');
|
|
186
193
|
```
|
|
187
194
|
|
|
188
195
|
## Error Handling
|
package/audit-ci.jsonc
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
2
|
+
var __commonJS = (cb, mod) => function __require() {
|
|
3
|
+
return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
|
|
4
|
+
};
|
|
5
|
+
|
|
6
|
+
// package.json
|
|
7
|
+
var require_package = __commonJS({
|
|
8
|
+
"package.json"(exports, module) {
|
|
9
|
+
module.exports = {
|
|
10
|
+
name: "@mendable/firecrawl-js",
|
|
11
|
+
version: "3.0.3",
|
|
12
|
+
description: "JavaScript SDK for Firecrawl API",
|
|
13
|
+
main: "dist/index.js",
|
|
14
|
+
types: "dist/index.d.ts",
|
|
15
|
+
exports: {
|
|
16
|
+
"./package.json": "./package.json",
|
|
17
|
+
".": {
|
|
18
|
+
import: "./dist/index.js",
|
|
19
|
+
default: "./dist/index.cjs"
|
|
20
|
+
}
|
|
21
|
+
},
|
|
22
|
+
type: "module",
|
|
23
|
+
scripts: {
|
|
24
|
+
build: "tsup",
|
|
25
|
+
"build-and-publish": "npm run build && npm publish --access public",
|
|
26
|
+
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
|
27
|
+
test: "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/e2e/v2/*.test.ts --detectOpenHandles",
|
|
28
|
+
"test:unit": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/unit/v2/*.test.ts"
|
|
29
|
+
},
|
|
30
|
+
repository: {
|
|
31
|
+
type: "git",
|
|
32
|
+
url: "git+https://github.com/firecrawl/firecrawl.git"
|
|
33
|
+
},
|
|
34
|
+
author: "Mendable.ai",
|
|
35
|
+
license: "MIT",
|
|
36
|
+
dependencies: {
|
|
37
|
+
axios: "^1.11.0",
|
|
38
|
+
"typescript-event-target": "^1.1.1",
|
|
39
|
+
zod: "^3.23.8",
|
|
40
|
+
"zod-to-json-schema": "^3.23.0"
|
|
41
|
+
},
|
|
42
|
+
bugs: {
|
|
43
|
+
url: "https://github.com/firecrawl/firecrawl/issues"
|
|
44
|
+
},
|
|
45
|
+
homepage: "https://github.com/firecrawl/firecrawl#readme",
|
|
46
|
+
devDependencies: {
|
|
47
|
+
"@jest/globals": "^30.0.5",
|
|
48
|
+
"@types/dotenv": "^8.2.0",
|
|
49
|
+
"@types/jest": "^30.0.0",
|
|
50
|
+
"@types/mocha": "^10.0.6",
|
|
51
|
+
"@types/node": "^20.12.12",
|
|
52
|
+
"@types/uuid": "^9.0.8",
|
|
53
|
+
dotenv: "^16.4.5",
|
|
54
|
+
jest: "^30.0.5",
|
|
55
|
+
"ts-jest": "^29.4.0",
|
|
56
|
+
tsup: "^8.5.0",
|
|
57
|
+
typescript: "^5.4.5",
|
|
58
|
+
uuid: "^9.0.1"
|
|
59
|
+
},
|
|
60
|
+
keywords: [
|
|
61
|
+
"firecrawl",
|
|
62
|
+
"mendable",
|
|
63
|
+
"crawler",
|
|
64
|
+
"web",
|
|
65
|
+
"scraper",
|
|
66
|
+
"api",
|
|
67
|
+
"sdk"
|
|
68
|
+
],
|
|
69
|
+
engines: {
|
|
70
|
+
node: ">=22.0.0"
|
|
71
|
+
},
|
|
72
|
+
pnpm: {
|
|
73
|
+
overrides: {
|
|
74
|
+
"@babel/helpers@<7.26.10": ">=7.26.10",
|
|
75
|
+
"brace-expansion@>=1.0.0 <=1.1.11": ">=1.1.12",
|
|
76
|
+
"brace-expansion@>=2.0.0 <=2.0.1": ">=2.0.2"
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
export {
|
|
84
|
+
require_package
|
|
85
|
+
};
|