@mendable/firecrawl 1.29.3 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/.env.example +4 -2
  2. package/README.md +85 -78
  3. package/audit-ci.jsonc +4 -0
  4. package/dist/chunk-WNGXI3ZW.js +85 -0
  5. package/dist/index.cjs +961 -35
  6. package/dist/index.d.cts +540 -11
  7. package/dist/index.d.ts +540 -11
  8. package/dist/index.js +953 -27
  9. package/dist/package-KMFB7KZD.js +4 -0
  10. package/package.json +6 -6
  11. package/src/__tests__/e2e/v2/batch.test.ts +74 -0
  12. package/src/__tests__/e2e/v2/crawl.test.ts +182 -0
  13. package/src/__tests__/e2e/v2/extract.test.ts +70 -0
  14. package/src/__tests__/e2e/v2/map.test.ts +55 -0
  15. package/src/__tests__/e2e/v2/scrape.test.ts +130 -0
  16. package/src/__tests__/e2e/v2/search.test.ts +247 -0
  17. package/src/__tests__/e2e/v2/usage.test.ts +36 -0
  18. package/src/__tests__/e2e/v2/utils/idmux.ts +58 -0
  19. package/src/__tests__/e2e/v2/watcher.test.ts +96 -0
  20. package/src/__tests__/unit/v2/errorHandler.test.ts +19 -0
  21. package/src/__tests__/unit/v2/scrape.unit.test.ts +11 -0
  22. package/src/__tests__/unit/v2/validation.test.ts +59 -0
  23. package/src/index.backup.ts +2146 -0
  24. package/src/index.ts +27 -2134
  25. package/src/v1/index.ts +2158 -0
  26. package/src/v2/client.ts +283 -0
  27. package/src/v2/methods/batch.ts +119 -0
  28. package/src/v2/methods/crawl.ts +144 -0
  29. package/src/v2/methods/extract.ts +86 -0
  30. package/src/v2/methods/map.ts +37 -0
  31. package/src/v2/methods/scrape.ts +26 -0
  32. package/src/v2/methods/search.ts +69 -0
  33. package/src/v2/methods/usage.ts +39 -0
  34. package/src/v2/types.ts +355 -0
  35. package/src/v2/utils/errorHandler.ts +18 -0
  36. package/src/v2/utils/getVersion.ts +14 -0
  37. package/src/v2/utils/httpClient.ts +99 -0
  38. package/src/v2/utils/validation.ts +50 -0
  39. package/src/v2/watcher.ts +159 -0
  40. package/tsconfig.json +2 -1
  41. package/dist/package-Z6F7JDXI.js +0 -111
  42. /package/src/__tests__/{v1/e2e_withAuth → e2e/v1}/index.test.ts +0 -0
  43. /package/src/__tests__/{v1/unit → unit/v1}/monitor-job-status-retry.test.ts +0 -0
package/.env.example CHANGED
@@ -1,3 +1,5 @@
1
- API_URL=http://localhost:3002
2
- TEST_API_KEY=fc-YOUR_API_KEY
1
+ FIRECRAWL_API_KEY=<your api key>
2
+ FIRECRAWL_API_URL=https://api.firecrawl.dev
3
3
 
4
+ # TESTING PURPOSE:
5
+ IDMUX_URL=
package/README.md CHANGED
@@ -18,171 +18,178 @@ npm install @mendable/firecrawl-js
18
18
  Here's an example of how to use the SDK with error handling:
19
19
 
20
20
  ```js
21
- import FirecrawlApp, { CrawlParams, CrawlStatusResponse } from '@mendable/firecrawl-js';
21
+ import Firecrawl from '@mendable/firecrawl-js';
22
22
 
23
- const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
23
+ const app = new Firecrawl({ apiKey: 'fc-YOUR_API_KEY' });
24
24
 
25
25
  // Scrape a website
26
- const scrapeResponse = await app.scrapeUrl('https://firecrawl.dev', {
26
+ const scrapeResponse = await app.scrape('https://firecrawl.dev', {
27
27
  formats: ['markdown', 'html'],
28
28
  });
29
+ console.log(scrapeResponse);
29
30
 
30
- if (scrapeResponse) {
31
- console.log(scrapeResponse)
32
- }
33
-
34
- // Crawl a website
35
- const crawlResponse = await app.crawlUrl('https://firecrawl.dev', {
31
+ // Crawl a website (waiter)
32
+ const crawlResponse = await app.crawl('https://firecrawl.dev', {
36
33
  limit: 100,
37
- scrapeOptions: {
38
- formats: ['markdown', 'html'],
39
- }
40
- })
41
-
42
- console.log(crawlResponse)
34
+ scrapeOptions: { formats: ['markdown', 'html'] },
35
+ pollInterval: 2,
36
+ });
37
+ console.log(crawlResponse);
43
38
  ```
44
39
 
45
40
  ### Scraping a URL
46
41
 
47
- To scrape a single URL with error handling, use the `scrapeUrl` method. It takes the URL as a parameter and returns the scraped data as a dictionary.
42
+ To scrape a single URL with error handling, use the `scrape` method. It takes the URL as a parameter and returns the scraped data.
48
43
 
49
44
  ```js
50
- const url = "https://example.com";
51
- const scrapedData = await app.scrapeUrl(url);
45
+ const url = 'https://example.com';
46
+ const scrapedData = await app.scrape(url);
52
47
  ```
53
48
 
54
49
  ### Crawling a Website
55
50
 
56
- To crawl a website with error handling, use the `crawlUrl` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
51
+ To crawl a website with error handling, use the `crawl` method. It takes the starting URL and optional parameters, including limits and per‑page `scrapeOptions`.
57
52
 
58
53
  ```js
59
- const crawlResponse = await app.crawlUrl('https://firecrawl.dev', {
54
+ const crawlResponse = await app.crawl('https://firecrawl.dev', {
60
55
  limit: 100,
61
- scrapeOptions: {
62
- formats: ['markdown', 'html'],
63
- }
64
- })
56
+ scrapeOptions: { formats: ['markdown', 'html'] },
57
+ });
65
58
  ```
66
59
 
67
60
 
68
61
  ### Asynchronous Crawl
69
62
 
70
- To initiate an asynchronous crawl of a website, utilize the AsyncCrawlURL method. This method requires the starting URL and optional parameters as inputs. The params argument enables you to define various settings for the asynchronous crawl, such as the maximum number of pages to crawl, permitted domains, and the output format. Upon successful initiation, this method returns an ID, which is essential for subsequently checking the status of the crawl.
63
+ To start an asynchronous crawl, use `startCrawl`. It returns a job ID you can poll with `getCrawlStatus`.
71
64
 
72
65
  ```js
73
- const asyncCrawlResult = await app.asyncCrawlUrl('mendable.ai', { excludePaths: ['blog/*'], limit: 5});
66
+ const start = await app.startCrawl('https://mendable.ai', {
67
+ excludePaths: ['blog/*'],
68
+ limit: 5,
69
+ });
74
70
  ```
75
71
 
76
72
  ### Checking Crawl Status
77
73
 
78
- To check the status of a crawl job with error handling, use the `checkCrawlStatus` method. It takes the job ID as a parameter and returns the current status of the crawl job`
74
+ To check the status of a crawl job with error handling, use the `getCrawlStatus` method. It takes the job ID as a parameter and returns the current status.
79
75
 
80
76
  ```js
81
- const status = await app.checkCrawlStatus(id);
77
+ const status = await app.getCrawlStatus(id);
82
78
  ```
83
79
 
84
- ### Extracting structured data from a URL
80
+ ### Extracting structured data from URLs
85
81
 
86
- With LLM extraction, you can easily extract structured data from any URL. We support zod schema to make it easier for you too. Here is how you to use it:
82
+ Use `extract` with a prompt and schema. Zod schemas are supported directly.
87
83
 
88
84
  ```js
89
- import FirecrawlApp from "@mendable/firecrawl-js";
90
- import { z } from "zod";
85
+ import Firecrawl from '@mendable/firecrawl-js';
86
+ import { z } from 'zod';
91
87
 
92
- const app = new FirecrawlApp({
93
- apiKey: "fc-YOUR_API_KEY",
94
- });
88
+ const app = new Firecrawl({ apiKey: 'fc-YOUR_API_KEY' });
95
89
 
96
- // Define schema to extract contents into
97
90
  const schema = z.object({
98
- top: z
99
- .array(
100
- z.object({
101
- title: z.string(),
102
- points: z.number(),
103
- by: z.string(),
104
- commentsURL: z.string(),
105
- })
106
- )
107
- .length(5)
108
- .describe("Top 5 stories on Hacker News"),
91
+ title: z.string(),
109
92
  });
110
93
 
111
- const scrapeResult = await app.scrapeUrl("https://firecrawl.dev", {
112
- extractorOptions: { extractionSchema: schema },
94
+ const result = await app.extract({
95
+ urls: ['https://firecrawl.dev'],
96
+ prompt: 'Extract the page title',
97
+ schema,
98
+ showSources: true,
113
99
  });
114
100
 
115
- console.log(scrapeResult.data["llm_extraction"]);
101
+ console.log(result.data);
116
102
  ```
117
103
 
118
104
  ### Map a Website
119
105
 
120
- Use `map_url` to generate a list of URLs from a website. The `params` argument let you customize the mapping process, including options to exclude subdomains or to utilize the sitemap.
106
+ Use `map` to generate a list of URLs from a website. Options let you customize the mapping process, including whether to utilize the sitemap or include subdomains.
121
107
 
122
108
  ```js
123
- const mapResult = await app.mapUrl('https://example.com') as MapResponse;
124
- console.log(mapResult)
109
+ const mapResult = await app.map('https://example.com');
110
+ console.log(mapResult);
125
111
  ```
126
112
 
127
- ### Crawl a website with WebSockets
113
+ ### Crawl a website with real‑time updates
128
114
 
129
- To crawl a website with WebSockets, use the `crawlUrlAndWatch` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
115
+ To receive real‑time updates, start a crawl and attach a watcher.
130
116
 
131
117
  ```js
132
- // Crawl a website with WebSockets:
133
- const watch = await app.crawlUrlAndWatch('mendable.ai', { excludePaths: ['blog/*'], limit: 5});
118
+ const start = await app.startCrawl('https://mendable.ai', { excludePaths: ['blog/*'], limit: 5 });
119
+ const watch = app.watcher(start.id, { kind: 'crawl', pollInterval: 2 });
134
120
 
135
- watch.addEventListener("document", doc => {
136
- console.log("DOC", doc.detail);
121
+ watch.on('document', (doc) => {
122
+ console.log('DOC', doc);
137
123
  });
138
124
 
139
- watch.addEventListener("error", err => {
140
- console.error("ERR", err.detail.error);
125
+ watch.on('error', (err) => {
126
+ console.error('ERR', err);
141
127
  });
142
128
 
143
- watch.addEventListener("done", state => {
144
- console.log("DONE", state.detail.status);
129
+ watch.on('done', (state) => {
130
+ console.log('DONE', state.status);
145
131
  });
132
+
133
+ await watch.start();
146
134
  ```
147
135
 
148
136
  ### Batch scraping multiple URLs
149
137
 
150
- To batch scrape multiple URLs with error handling, use the `batchScrapeUrls` method. It takes the starting URLs and optional parameters as arguments. The `params` argument allows you to specify additional options for the batch scrape job, such as the output formats.
138
+ To batch scrape multiple URLs with error handling, use the `batchScrape` method.
151
139
 
152
140
  ```js
153
- const batchScrapeResponse = await app.batchScrapeUrls(['https://firecrawl.dev', 'https://mendable.ai'], {
141
+ const batchScrapeResponse = await app.batchScrape(['https://firecrawl.dev', 'https://mendable.ai'], {
154
142
  formats: ['markdown', 'html'],
155
- })
143
+ });
156
144
  ```
157
145
 
158
146
 
159
147
  #### Asynchronous batch scrape
160
148
 
161
- To initiate an asynchronous batch scrape, utilize the `asyncBatchScrapeUrls` method. This method requires the starting URLs and optional parameters as inputs. The params argument enables you to define various settings for the scrape, such as the output formats. Upon successful initiation, this method returns an ID, which is essential for subsequently checking the status of the batch scrape.
149
+ To start an asynchronous batch scrape, use `startBatchScrape` and poll with `getBatchScrapeStatus`.
162
150
 
163
151
  ```js
164
- const asyncBatchScrapeResult = await app.asyncBatchScrapeUrls(['https://firecrawl.dev', 'https://mendable.ai'], { formats: ['markdown', 'html'] });
152
+ const asyncBatchScrapeResult = await app.startBatchScrape(['https://firecrawl.dev', 'https://mendable.ai'], {
153
+ formats: ['markdown', 'html'],
154
+ });
165
155
  ```
166
156
 
167
- #### Batch scrape with WebSockets
157
+ #### Batch scrape with real‑time updates
168
158
 
169
- To use batch scrape with WebSockets, use the `batchScrapeUrlsAndWatch` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the batch scrape job, such as the output formats.
159
+ To use batch scrape with real‑time updates, start the job and watch it using the watcher.
170
160
 
171
161
  ```js
172
- // Batch scrape multiple URLs with WebSockets:
173
- const watch = await app.batchScrapeUrlsAndWatch(['https://firecrawl.dev', 'https://mendable.ai'], { formats: ['markdown', 'html'] });
162
+ const start = await app.startBatchScrape(['https://firecrawl.dev', 'https://mendable.ai'], { formats: ['markdown', 'html'] });
163
+ const watch = app.watcher(start.id, { kind: 'batch', pollInterval: 2 });
174
164
 
175
- watch.addEventListener("document", doc => {
176
- console.log("DOC", doc.detail);
165
+ watch.on('document', (doc) => {
166
+ console.log('DOC', doc);
177
167
  });
178
168
 
179
- watch.addEventListener("error", err => {
180
- console.error("ERR", err.detail.error);
169
+ watch.on('error', (err) => {
170
+ console.error('ERR', err);
181
171
  });
182
172
 
183
- watch.addEventListener("done", state => {
184
- console.log("DONE", state.detail.status);
173
+ watch.on('done', (state) => {
174
+ console.log('DONE', state.status);
185
175
  });
176
+
177
+ await watch.start();
178
+ ```
179
+
180
+ ## v1 compatibility
181
+
182
+ The feature‑frozen v1 is still available under `app.v1` with the original method names.
183
+
184
+ ```js
185
+ import Firecrawl from '@mendable/firecrawl-js';
186
+
187
+ const app = new Firecrawl({ apiKey: 'fc-YOUR_API_KEY' });
188
+
189
+ // v1 methods (feature‑frozen)
190
+ const scrapeV1 = await app.v1.scrapeUrl('https://firecrawl.dev', { formats: ['markdown', 'html'] });
191
+ const crawlV1 = await app.v1.crawlUrl('https://firecrawl.dev', { limit: 100 });
192
+ const mapV1 = await app.v1.mapUrl('https://firecrawl.dev');
186
193
  ```
187
194
 
188
195
  ## Error Handling
package/audit-ci.jsonc ADDED
@@ -0,0 +1,4 @@
1
+ {
2
+ "$schema": "https://github.com/IBM/audit-ci/raw/main/docs/schema.json",
3
+ "low": true
4
+ }
@@ -0,0 +1,85 @@
1
+ var __getOwnPropNames = Object.getOwnPropertyNames;
2
+ var __commonJS = (cb, mod) => function __require() {
3
+ return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
4
+ };
5
+
6
+ // package.json
7
+ var require_package = __commonJS({
8
+ "package.json"(exports, module) {
9
+ module.exports = {
10
+ name: "@mendable/firecrawl-js",
11
+ version: "3.1.0",
12
+ description: "JavaScript SDK for Firecrawl API",
13
+ main: "dist/index.js",
14
+ types: "dist/index.d.ts",
15
+ exports: {
16
+ "./package.json": "./package.json",
17
+ ".": {
18
+ import: "./dist/index.js",
19
+ default: "./dist/index.cjs"
20
+ }
21
+ },
22
+ type: "module",
23
+ scripts: {
24
+ build: "tsup",
25
+ "build-and-publish": "npm run build && npm publish --access public",
26
+ "publish-beta": "npm run build && npm publish --access public --tag beta",
27
+ test: "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/e2e/v2/*.test.ts --detectOpenHandles",
28
+ "test:unit": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/unit/v2/*.test.ts"
29
+ },
30
+ repository: {
31
+ type: "git",
32
+ url: "git+https://github.com/firecrawl/firecrawl.git"
33
+ },
34
+ author: "Mendable.ai",
35
+ license: "MIT",
36
+ dependencies: {
37
+ axios: "^1.11.0",
38
+ "typescript-event-target": "^1.1.1",
39
+ zod: "^3.23.8",
40
+ "zod-to-json-schema": "^3.23.0"
41
+ },
42
+ bugs: {
43
+ url: "https://github.com/firecrawl/firecrawl/issues"
44
+ },
45
+ homepage: "https://github.com/firecrawl/firecrawl#readme",
46
+ devDependencies: {
47
+ "@jest/globals": "^30.0.5",
48
+ "@types/dotenv": "^8.2.0",
49
+ "@types/jest": "^30.0.0",
50
+ "@types/mocha": "^10.0.6",
51
+ "@types/node": "^20.12.12",
52
+ "@types/uuid": "^9.0.8",
53
+ dotenv: "^16.4.5",
54
+ jest: "^30.0.5",
55
+ "ts-jest": "^29.4.0",
56
+ tsup: "^8.5.0",
57
+ typescript: "^5.4.5",
58
+ uuid: "^9.0.1"
59
+ },
60
+ keywords: [
61
+ "firecrawl",
62
+ "mendable",
63
+ "crawler",
64
+ "web",
65
+ "scraper",
66
+ "api",
67
+ "sdk"
68
+ ],
69
+ engines: {
70
+ node: ">=22.0.0"
71
+ },
72
+ pnpm: {
73
+ overrides: {
74
+ "@babel/helpers@<7.26.10": ">=7.26.10",
75
+ "brace-expansion@>=1.0.0 <=1.1.11": ">=1.1.12",
76
+ "brace-expansion@>=2.0.0 <=2.0.1": ">=2.0.2"
77
+ }
78
+ }
79
+ };
80
+ }
81
+ });
82
+
83
+ export {
84
+ require_package
85
+ };