npm - @mendable/firecrawl - Versions diffs - 1.29.3 → 3.1.0 - Mend

@mendable/firecrawl 1.29.3 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/.env.example +4 -2
package/README.md +85 -78
package/audit-ci.jsonc +4 -0
package/dist/chunk-WNGXI3ZW.js +85 -0
package/dist/index.cjs +961 -35
package/dist/index.d.cts +540 -11
package/dist/index.d.ts +540 -11
package/dist/index.js +953 -27
package/dist/package-KMFB7KZD.js +4 -0
package/package.json +6 -6
package/src/__tests__/e2e/v2/batch.test.ts +74 -0
package/src/__tests__/e2e/v2/crawl.test.ts +182 -0
package/src/__tests__/e2e/v2/extract.test.ts +70 -0
package/src/__tests__/e2e/v2/map.test.ts +55 -0
package/src/__tests__/e2e/v2/scrape.test.ts +130 -0
package/src/__tests__/e2e/v2/search.test.ts +247 -0
package/src/__tests__/e2e/v2/usage.test.ts +36 -0
package/src/__tests__/e2e/v2/utils/idmux.ts +58 -0
package/src/__tests__/e2e/v2/watcher.test.ts +96 -0
package/src/__tests__/unit/v2/errorHandler.test.ts +19 -0
package/src/__tests__/unit/v2/scrape.unit.test.ts +11 -0
package/src/__tests__/unit/v2/validation.test.ts +59 -0
package/src/index.backup.ts +2146 -0
package/src/index.ts +27 -2134
package/src/v1/index.ts +2158 -0
package/src/v2/client.ts +283 -0
package/src/v2/methods/batch.ts +119 -0
package/src/v2/methods/crawl.ts +144 -0
package/src/v2/methods/extract.ts +86 -0
package/src/v2/methods/map.ts +37 -0
package/src/v2/methods/scrape.ts +26 -0
package/src/v2/methods/search.ts +69 -0
package/src/v2/methods/usage.ts +39 -0
package/src/v2/types.ts +355 -0
package/src/v2/utils/errorHandler.ts +18 -0
package/src/v2/utils/getVersion.ts +14 -0
package/src/v2/utils/httpClient.ts +99 -0
package/src/v2/utils/validation.ts +50 -0
package/src/v2/watcher.ts +159 -0
package/tsconfig.json +2 -1
package/dist/package-Z6F7JDXI.js +0 -111
/package/src/__tests__/{v1/e2e_withAuth → e2e/v1}/index.test.ts +0 -0
/package/src/__tests__/{v1/unit → unit/v1}/monitor-job-status-retry.test.ts +0 -0

package/.env.example CHANGED Viewed

@@ -1,3 +1,5 @@
-API_URL=http://localhost:3002
-TEST_API_KEY=fc-YOUR_API_KEY
+FIRECRAWL_API_KEY=<your api key>
+FIRECRAWL_API_URL=https://api.firecrawl.dev
+# TESTING PURPOSE:
+IDMUX_URL=

package/README.md CHANGED Viewed

@@ -18,171 +18,178 @@ npm install @mendable/firecrawl-js
 Here's an example of how to use the SDK with error handling:
 ```js
-import FirecrawlApp, { CrawlParams, CrawlStatusResponse } from '@mendable/firecrawl-js';
+import Firecrawl from '@mendable/firecrawl-js';
-const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
+const app = new Firecrawl({ apiKey: 'fc-YOUR_API_KEY' });
 // Scrape a website
-const scrapeResponse = await app.scrapeUrl('https://firecrawl.dev', {
+const scrapeResponse = await app.scrape('https://firecrawl.dev', {
   formats: ['markdown', 'html'],
 });
+console.log(scrapeResponse);
-if (scrapeResponse) {
-  console.log(scrapeResponse)
-}
-// Crawl a website
-const crawlResponse = await app.crawlUrl('https://firecrawl.dev', {
+// Crawl a website (waiter)
+const crawlResponse = await app.crawl('https://firecrawl.dev', {
   limit: 100,
-  scrapeOptions: {
-    formats: ['markdown', 'html'],
-  }
-})
-console.log(crawlResponse)
+  scrapeOptions: { formats: ['markdown', 'html'] },
+  pollInterval: 2,
+});
+console.log(crawlResponse);
 ```
 ### Scraping a URL
-To scrape a single URL with error handling, use the `scrapeUrl` method. It takes the URL as a parameter and returns the scraped data as a dictionary.
+To scrape a single URL with error handling, use the `scrape` method. It takes the URL as a parameter and returns the scraped data.
 ```js
-const url = "https://example.com";
-const scrapedData = await app.scrapeUrl(url);
+const url = 'https://example.com';
+const scrapedData = await app.scrape(url);
 ```
 ### Crawling a Website
-To crawl a website with error handling, use the `crawlUrl` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
+To crawl a website with error handling, use the `crawl` method. It takes the starting URL and optional parameters, including limits and per‑page `scrapeOptions`.
 ```js
-const crawlResponse = await app.crawlUrl('https://firecrawl.dev', {
+const crawlResponse = await app.crawl('https://firecrawl.dev', {
   limit: 100,
-  scrapeOptions: {
-    formats: ['markdown', 'html'],
-  }
-})
+  scrapeOptions: { formats: ['markdown', 'html'] },
+});
 ```
 ### Asynchronous Crawl
-To initiate an asynchronous crawl of a website, utilize the AsyncCrawlURL method. This method requires the starting URL and optional parameters as inputs. The params argument enables you to define various settings for the asynchronous crawl, such as the maximum number of pages to crawl, permitted domains, and the output format. Upon successful initiation, this method returns an ID, which is essential for subsequently checking the status of the crawl.
+To start an asynchronous crawl, use `startCrawl`. It returns a job ID you can poll with `getCrawlStatus`.
 ```js
-const asyncCrawlResult = await app.asyncCrawlUrl('mendable.ai', { excludePaths: ['blog/*'], limit: 5});
+const start = await app.startCrawl('https://mendable.ai', {
+  excludePaths: ['blog/*'],
+  limit: 5,
+});
 ```
 ### Checking Crawl Status
-To check the status of a crawl job with error handling, use the `checkCrawlStatus` method. It takes the job ID as a parameter and returns the current status of the crawl job`
+To check the status of a crawl job with error handling, use the `getCrawlStatus` method. It takes the job ID as a parameter and returns the current status.
 ```js
-const status = await app.checkCrawlStatus(id);
+const status = await app.getCrawlStatus(id);
 ```
-### Extracting structured data from a URL
+### Extracting structured data from URLs
-With LLM extraction, you can easily extract structured data from any URL. We support zod schema to make it easier for you too. Here is how you to use it:
+Use `extract` with a prompt and schema. Zod schemas are supported directly.
 ```js
-import FirecrawlApp from "@mendable/firecrawl-js";
-import { z } from "zod";
+import Firecrawl from '@mendable/firecrawl-js';
+import { z } from 'zod';
-const app = new FirecrawlApp({
-  apiKey: "fc-YOUR_API_KEY",
-});
+const app = new Firecrawl({ apiKey: 'fc-YOUR_API_KEY' });
-// Define schema to extract contents into
 const schema = z.object({
-  top: z
-    .array(
-      z.object({
-        title: z.string(),
-        points: z.number(),
-        by: z.string(),
-        commentsURL: z.string(),
-      })
-    )
-    .length(5)
-    .describe("Top 5 stories on Hacker News"),
+  title: z.string(),
 });
-const scrapeResult = await app.scrapeUrl("https://firecrawl.dev", {
-  extractorOptions: { extractionSchema: schema },
+const result = await app.extract({
+  urls: ['https://firecrawl.dev'],
+  prompt: 'Extract the page title',
+  schema,
+  showSources: true,
 });
-console.log(scrapeResult.data["llm_extraction"]);
+console.log(result.data);
 ```
 ### Map a Website
-Use `map_url` to generate a list of URLs from a website. The `params` argument let you customize the mapping process, including options to exclude subdomains or to utilize the sitemap.
+Use `map` to generate a list of URLs from a website. Options let you customize the mapping process, including whether to utilize the sitemap or include subdomains.
 ```js
-const mapResult = await app.mapUrl('https://example.com') as MapResponse;
-console.log(mapResult)
+const mapResult = await app.map('https://example.com');
+console.log(mapResult);
 ```
-### Crawl a website with WebSockets
+### Crawl a website with real‑time updates
-To crawl a website with WebSockets, use the `crawlUrlAndWatch` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
+To receive real‑time updates, start a crawl and attach a watcher.
 ```js
-// Crawl a website with WebSockets:
-const watch = await app.crawlUrlAndWatch('mendable.ai', { excludePaths: ['blog/*'], limit: 5});
+const start = await app.startCrawl('https://mendable.ai', { excludePaths: ['blog/*'], limit: 5 });
+const watch = app.watcher(start.id, { kind: 'crawl', pollInterval: 2 });
-watch.addEventListener("document", doc => {
- console.log("DOC", doc.detail);
+watch.on('document', (doc) => {
+  console.log('DOC', doc);
 });
-watch.addEventListener("error", err => {
- console.error("ERR", err.detail.error);
+watch.on('error', (err) => {
+  console.error('ERR', err);
 });
-watch.addEventListener("done", state => {
- console.log("DONE", state.detail.status);
+watch.on('done', (state) => {
+  console.log('DONE', state.status);
 });
+await watch.start();
 ```
 ### Batch scraping multiple URLs
-To batch scrape multiple URLs with error handling, use the `batchScrapeUrls` method. It takes the starting URLs and optional parameters as arguments. The `params` argument allows you to specify additional options for the batch scrape job, such as the output formats.
+To batch scrape multiple URLs with error handling, use the `batchScrape` method.
 ```js
-const batchScrapeResponse = await app.batchScrapeUrls(['https://firecrawl.dev', 'https://mendable.ai'], {
+const batchScrapeResponse = await app.batchScrape(['https://firecrawl.dev', 'https://mendable.ai'], {
   formats: ['markdown', 'html'],
-})
+});
 ```
 #### Asynchronous batch scrape
-To initiate an asynchronous batch scrape, utilize the `asyncBatchScrapeUrls` method. This method requires the starting URLs and optional parameters as inputs. The params argument enables you to define various settings for the scrape, such as the output formats. Upon successful initiation, this method returns an ID, which is essential for subsequently checking the status of the batch scrape.
+To start an asynchronous batch scrape, use `startBatchScrape` and poll with `getBatchScrapeStatus`.
 ```js
-const asyncBatchScrapeResult = await app.asyncBatchScrapeUrls(['https://firecrawl.dev', 'https://mendable.ai'], { formats: ['markdown', 'html'] });
+const asyncBatchScrapeResult = await app.startBatchScrape(['https://firecrawl.dev', 'https://mendable.ai'], {
+  formats: ['markdown', 'html'],
+});
 ```
-#### Batch scrape with WebSockets
+#### Batch scrape with real‑time updates
-To use batch scrape with WebSockets, use the `batchScrapeUrlsAndWatch` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the batch scrape job, such as the output formats.
+To use batch scrape with real‑time updates, start the job and watch it using the watcher.
 ```js
-// Batch scrape multiple URLs with WebSockets:
-const watch = await app.batchScrapeUrlsAndWatch(['https://firecrawl.dev', 'https://mendable.ai'], { formats: ['markdown', 'html'] });
+const start = await app.startBatchScrape(['https://firecrawl.dev', 'https://mendable.ai'], { formats: ['markdown', 'html'] });
+const watch = app.watcher(start.id, { kind: 'batch', pollInterval: 2 });
-watch.addEventListener("document", doc => {
- console.log("DOC", doc.detail);
+watch.on('document', (doc) => {
+  console.log('DOC', doc);
 });
-watch.addEventListener("error", err => {
- console.error("ERR", err.detail.error);
+watch.on('error', (err) => {
+  console.error('ERR', err);
 });
-watch.addEventListener("done", state => {
- console.log("DONE", state.detail.status);
+watch.on('done', (state) => {
+  console.log('DONE', state.status);
 });
+await watch.start();
+```
+## v1 compatibility
+The feature‑frozen v1 is still available under `app.v1` with the original method names.
+```js
+import Firecrawl from '@mendable/firecrawl-js';
+const app = new Firecrawl({ apiKey: 'fc-YOUR_API_KEY' });
+// v1 methods (feature‑frozen)
+const scrapeV1 = await app.v1.scrapeUrl('https://firecrawl.dev', { formats: ['markdown', 'html'] });
+const crawlV1 = await app.v1.crawlUrl('https://firecrawl.dev', { limit: 100 });
+const mapV1 = await app.v1.mapUrl('https://firecrawl.dev');
 ```
 ## Error Handling

package/audit-ci.jsonc ADDED Viewed

@@ -0,0 +1,4 @@
+{
+    "$schema": "https://github.com/IBM/audit-ci/raw/main/docs/schema.json",
+    "low": true
+}

package/dist/chunk-WNGXI3ZW.js ADDED Viewed

@@ -0,0 +1,85 @@
+var __getOwnPropNames = Object.getOwnPropertyNames;
+var __commonJS = (cb, mod) => function __require() {
+  return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
+};
+// package.json
+var require_package = __commonJS({
+  "package.json"(exports, module) {
+    module.exports = {
+      name: "@mendable/firecrawl-js",
+      version: "3.1.0",
+      description: "JavaScript SDK for Firecrawl API",
+      main: "dist/index.js",
+      types: "dist/index.d.ts",
+      exports: {
+        "./package.json": "./package.json",
+        ".": {
+          import: "./dist/index.js",
+          default: "./dist/index.cjs"
+        }
+      },
+      type: "module",
+      scripts: {
+        build: "tsup",
+        "build-and-publish": "npm run build && npm publish --access public",
+        "publish-beta": "npm run build && npm publish --access public --tag beta",
+        test: "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/e2e/v2/*.test.ts --detectOpenHandles",
+        "test:unit": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/unit/v2/*.test.ts"
+      },
+      repository: {
+        type: "git",
+        url: "git+https://github.com/firecrawl/firecrawl.git"
+      },
+      author: "Mendable.ai",
+      license: "MIT",
+      dependencies: {
+        axios: "^1.11.0",
+        "typescript-event-target": "^1.1.1",
+        zod: "^3.23.8",
+        "zod-to-json-schema": "^3.23.0"
+      },
+      bugs: {
+        url: "https://github.com/firecrawl/firecrawl/issues"
+      },
+      homepage: "https://github.com/firecrawl/firecrawl#readme",
+      devDependencies: {
+        "@jest/globals": "^30.0.5",
+        "@types/dotenv": "^8.2.0",
+        "@types/jest": "^30.0.0",
+        "@types/mocha": "^10.0.6",
+        "@types/node": "^20.12.12",
+        "@types/uuid": "^9.0.8",
+        dotenv: "^16.4.5",
+        jest: "^30.0.5",
+        "ts-jest": "^29.4.0",
+        tsup: "^8.5.0",
+        typescript: "^5.4.5",
+        uuid: "^9.0.1"
+      },
+      keywords: [
+        "firecrawl",
+        "mendable",
+        "crawler",
+        "web",
+        "scraper",
+        "api",
+        "sdk"
+      ],
+      engines: {
+        node: ">=22.0.0"
+      },
+      pnpm: {
+        overrides: {
+          "@babel/helpers@<7.26.10": ">=7.26.10",
+          "brace-expansion@>=1.0.0 <=1.1.11": ">=1.1.12",
+          "brace-expansion@>=2.0.0 <=2.0.1": ">=2.0.2"
+        }
+      }
+    };
+  }
+});
+export {
+  require_package
+};