@mendable/firecrawl 3.2.1 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-QPAPMZLC.js → chunk-Y3QF4XAJ.js} +1 -1
- package/dist/index.cjs +1 -1
- package/dist/index.d.cts +16 -3
- package/dist/index.d.ts +16 -3
- package/dist/index.js +2 -2
- package/dist/{package-VNFDXLYR.js → package-LI2S3JCZ.js} +1 -1
- package/package.json +1 -1
- package/src/__tests__/e2e/v2/scrape.test.ts +33 -0
- package/src/v2/types.ts +18 -2
|
@@ -8,7 +8,7 @@ var require_package = __commonJS({
|
|
|
8
8
|
"package.json"(exports, module) {
|
|
9
9
|
module.exports = {
|
|
10
10
|
name: "@mendable/firecrawl-js",
|
|
11
|
-
version: "3.
|
|
11
|
+
version: "3.3.0",
|
|
12
12
|
description: "JavaScript SDK for Firecrawl API",
|
|
13
13
|
main: "dist/index.js",
|
|
14
14
|
types: "dist/index.d.ts",
|
package/dist/index.cjs
CHANGED
|
@@ -35,7 +35,7 @@ var require_package = __commonJS({
|
|
|
35
35
|
"package.json"(exports2, module2) {
|
|
36
36
|
module2.exports = {
|
|
37
37
|
name: "@mendable/firecrawl-js",
|
|
38
|
-
version: "3.
|
|
38
|
+
version: "3.3.0",
|
|
39
39
|
description: "JavaScript SDK for Firecrawl API",
|
|
40
40
|
main: "dist/index.js",
|
|
41
41
|
types: "dist/index.d.ts",
|
package/dist/index.d.cts
CHANGED
|
@@ -4,7 +4,7 @@ import { AxiosResponse, AxiosRequestHeaders } from 'axios';
|
|
|
4
4
|
import { EventEmitter } from 'events';
|
|
5
5
|
import { TypedEventTarget } from 'typescript-event-target';
|
|
6
6
|
|
|
7
|
-
type FormatString = "markdown" | "html" | "rawHtml" | "links" | "screenshot" | "summary" | "changeTracking" | "json";
|
|
7
|
+
type FormatString = "markdown" | "html" | "rawHtml" | "links" | "images" | "screenshot" | "summary" | "changeTracking" | "json" | "attributes";
|
|
8
8
|
interface Viewport {
|
|
9
9
|
width: number;
|
|
10
10
|
height: number;
|
|
@@ -33,7 +33,14 @@ interface ChangeTrackingFormat extends Format {
|
|
|
33
33
|
prompt?: string;
|
|
34
34
|
tag?: string;
|
|
35
35
|
}
|
|
36
|
-
|
|
36
|
+
interface AttributesFormat extends Format {
|
|
37
|
+
type: "attributes";
|
|
38
|
+
selectors: Array<{
|
|
39
|
+
selector: string;
|
|
40
|
+
attribute: string;
|
|
41
|
+
}>;
|
|
42
|
+
}
|
|
43
|
+
type FormatOption = FormatString | Format | JsonFormat | ChangeTrackingFormat | ScreenshotFormat | AttributesFormat;
|
|
37
44
|
interface LocationConfig {
|
|
38
45
|
country?: string;
|
|
39
46
|
languages?: string[];
|
|
@@ -133,7 +140,13 @@ interface Document {
|
|
|
133
140
|
summary?: string;
|
|
134
141
|
metadata?: DocumentMetadata;
|
|
135
142
|
links?: string[];
|
|
143
|
+
images?: string[];
|
|
136
144
|
screenshot?: string;
|
|
145
|
+
attributes?: Array<{
|
|
146
|
+
selector: string;
|
|
147
|
+
attribute: string;
|
|
148
|
+
values: string[];
|
|
149
|
+
}>;
|
|
137
150
|
actions?: Record<string, unknown>;
|
|
138
151
|
warning?: string;
|
|
139
152
|
changeTracking?: Record<string, unknown>;
|
|
@@ -1348,4 +1361,4 @@ declare class Firecrawl extends FirecrawlClient {
|
|
|
1348
1361
|
get v1(): FirecrawlApp;
|
|
1349
1362
|
}
|
|
1350
1363
|
|
|
1351
|
-
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig, type MapData, type MapOptions, type PDFAction, type PressAction, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
|
|
1364
|
+
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig, type MapData, type MapOptions, type PDFAction, type PressAction, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
|
package/dist/index.d.ts
CHANGED
|
@@ -4,7 +4,7 @@ import { AxiosResponse, AxiosRequestHeaders } from 'axios';
|
|
|
4
4
|
import { EventEmitter } from 'events';
|
|
5
5
|
import { TypedEventTarget } from 'typescript-event-target';
|
|
6
6
|
|
|
7
|
-
type FormatString = "markdown" | "html" | "rawHtml" | "links" | "screenshot" | "summary" | "changeTracking" | "json";
|
|
7
|
+
type FormatString = "markdown" | "html" | "rawHtml" | "links" | "images" | "screenshot" | "summary" | "changeTracking" | "json" | "attributes";
|
|
8
8
|
interface Viewport {
|
|
9
9
|
width: number;
|
|
10
10
|
height: number;
|
|
@@ -33,7 +33,14 @@ interface ChangeTrackingFormat extends Format {
|
|
|
33
33
|
prompt?: string;
|
|
34
34
|
tag?: string;
|
|
35
35
|
}
|
|
36
|
-
|
|
36
|
+
interface AttributesFormat extends Format {
|
|
37
|
+
type: "attributes";
|
|
38
|
+
selectors: Array<{
|
|
39
|
+
selector: string;
|
|
40
|
+
attribute: string;
|
|
41
|
+
}>;
|
|
42
|
+
}
|
|
43
|
+
type FormatOption = FormatString | Format | JsonFormat | ChangeTrackingFormat | ScreenshotFormat | AttributesFormat;
|
|
37
44
|
interface LocationConfig {
|
|
38
45
|
country?: string;
|
|
39
46
|
languages?: string[];
|
|
@@ -133,7 +140,13 @@ interface Document {
|
|
|
133
140
|
summary?: string;
|
|
134
141
|
metadata?: DocumentMetadata;
|
|
135
142
|
links?: string[];
|
|
143
|
+
images?: string[];
|
|
136
144
|
screenshot?: string;
|
|
145
|
+
attributes?: Array<{
|
|
146
|
+
selector: string;
|
|
147
|
+
attribute: string;
|
|
148
|
+
values: string[];
|
|
149
|
+
}>;
|
|
137
150
|
actions?: Record<string, unknown>;
|
|
138
151
|
warning?: string;
|
|
139
152
|
changeTracking?: Record<string, unknown>;
|
|
@@ -1348,4 +1361,4 @@ declare class Firecrawl extends FirecrawlClient {
|
|
|
1348
1361
|
get v1(): FirecrawlApp;
|
|
1349
1362
|
}
|
|
1350
1363
|
|
|
1351
|
-
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig, type MapData, type MapOptions, type PDFAction, type PressAction, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
|
|
1364
|
+
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig, type MapData, type MapOptions, type PDFAction, type PressAction, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
require_package
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-Y3QF4XAJ.js";
|
|
4
4
|
|
|
5
5
|
// src/v2/utils/httpClient.ts
|
|
6
6
|
import axios from "axios";
|
|
@@ -933,7 +933,7 @@ var FirecrawlApp = class {
|
|
|
933
933
|
if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
|
|
934
934
|
return process.env.npm_package_version;
|
|
935
935
|
}
|
|
936
|
-
const packageJson = await import("./package-
|
|
936
|
+
const packageJson = await import("./package-LI2S3JCZ.js");
|
|
937
937
|
return packageJson.default.version;
|
|
938
938
|
} catch (error) {
|
|
939
939
|
const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);
|
package/package.json
CHANGED
|
@@ -121,6 +121,39 @@ describe("v2.scrape e2e", () => {
|
|
|
121
121
|
}
|
|
122
122
|
}, 90_000);
|
|
123
123
|
|
|
124
|
+
test("images format: extract all images from webpage", async () => {
|
|
125
|
+
if (!client) throw new Error();
|
|
126
|
+
const doc = await client.scrape("https://firecrawl.dev", {
|
|
127
|
+
formats: ["images"],
|
|
128
|
+
});
|
|
129
|
+
expect(doc.images).toBeTruthy();
|
|
130
|
+
expect(Array.isArray(doc.images)).toBe(true);
|
|
131
|
+
expect(doc.images.length).toBeGreaterThan(0);
|
|
132
|
+
// Should find firecrawl logo/branding images
|
|
133
|
+
expect(doc.images.some(img => img.includes("firecrawl") || img.includes("logo"))).toBe(true);
|
|
134
|
+
}, 60_000);
|
|
135
|
+
|
|
136
|
+
test("images format: works with multiple formats", async () => {
|
|
137
|
+
if (!client) throw new Error();
|
|
138
|
+
const doc = await client.scrape("https://github.com", {
|
|
139
|
+
formats: ["markdown", "links", "images"],
|
|
140
|
+
});
|
|
141
|
+
expect(doc.markdown).toBeTruthy();
|
|
142
|
+
expect(doc.links).toBeTruthy();
|
|
143
|
+
expect(doc.images).toBeTruthy();
|
|
144
|
+
expect(Array.isArray(doc.images)).toBe(true);
|
|
145
|
+
expect(doc.images.length).toBeGreaterThan(0);
|
|
146
|
+
|
|
147
|
+
// Images should find things not available in links format
|
|
148
|
+
const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg', '.ico'];
|
|
149
|
+
const linkImages = doc.links?.filter(link =>
|
|
150
|
+
imageExtensions.some(ext => link.toLowerCase().includes(ext))
|
|
151
|
+
) || [];
|
|
152
|
+
|
|
153
|
+
// Should discover additional images beyond those with obvious extensions
|
|
154
|
+
expect(doc.images.length).toBeGreaterThanOrEqual(linkImages.length);
|
|
155
|
+
}, 60_000);
|
|
156
|
+
|
|
124
157
|
test("invalid url should throw", async () => {
|
|
125
158
|
if (!client) throw new Error();
|
|
126
159
|
await expect(client.scrape("")).rejects.toThrow("URL cannot be empty");
|
package/src/v2/types.ts
CHANGED
|
@@ -6,10 +6,12 @@ export type FormatString =
|
|
|
6
6
|
| "html"
|
|
7
7
|
| "rawHtml"
|
|
8
8
|
| "links"
|
|
9
|
+
| "images"
|
|
9
10
|
| "screenshot"
|
|
10
11
|
| "summary"
|
|
11
12
|
| "changeTracking"
|
|
12
|
-
| "json"
|
|
13
|
+
| "json"
|
|
14
|
+
| "attributes";
|
|
13
15
|
|
|
14
16
|
export interface Viewport {
|
|
15
17
|
width: number;
|
|
@@ -40,13 +42,21 @@ export interface ChangeTrackingFormat extends Format {
|
|
|
40
42
|
prompt?: string;
|
|
41
43
|
tag?: string;
|
|
42
44
|
}
|
|
45
|
+
export interface AttributesFormat extends Format {
|
|
46
|
+
type: "attributes";
|
|
47
|
+
selectors: Array<{
|
|
48
|
+
selector: string;
|
|
49
|
+
attribute: string;
|
|
50
|
+
}>;
|
|
51
|
+
}
|
|
43
52
|
|
|
44
53
|
export type FormatOption =
|
|
45
54
|
| FormatString
|
|
46
55
|
| Format
|
|
47
56
|
| JsonFormat
|
|
48
57
|
| ChangeTrackingFormat
|
|
49
|
-
| ScreenshotFormat
|
|
58
|
+
| ScreenshotFormat
|
|
59
|
+
| AttributesFormat;
|
|
50
60
|
|
|
51
61
|
export interface LocationConfig {
|
|
52
62
|
country?: string;
|
|
@@ -167,7 +177,13 @@ export interface Document {
|
|
|
167
177
|
summary?: string;
|
|
168
178
|
metadata?: DocumentMetadata;
|
|
169
179
|
links?: string[];
|
|
180
|
+
images?: string[];
|
|
170
181
|
screenshot?: string;
|
|
182
|
+
attributes?: Array<{
|
|
183
|
+
selector: string;
|
|
184
|
+
attribute: string;
|
|
185
|
+
values: string[];
|
|
186
|
+
}>;
|
|
171
187
|
actions?: Record<string, unknown>;
|
|
172
188
|
warning?: string;
|
|
173
189
|
changeTracking?: Record<string, unknown>;
|