firecrawl 4.22.2 → 4.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -1
- package/dist/{chunk-HGRZHWZU.js → chunk-ZR3KTUEQ.js} +1 -1
- package/dist/index.cjs +7 -1
- package/dist/index.d.cts +4 -2
- package/dist/index.d.ts +4 -2
- package/dist/index.js +8 -2
- package/dist/{package-WSP46L7M.js → package-APBHZ5F3.js} +1 -1
- package/package.json +1 -1
- package/src/__tests__/unit/v2/parse.unit.test.ts +18 -0
- package/src/__tests__/unit/v2/validation.test.ts +5 -0
- package/src/v2/client.ts +1 -0
- package/src/v2/types.ts +4 -2
- package/src/v2/utils/validation.ts +6 -0
package/README.md
CHANGED
|
@@ -46,10 +46,22 @@ const url = 'https://example.com';
|
|
|
46
46
|
const scrapedData = await app.scrape(url);
|
|
47
47
|
```
|
|
48
48
|
|
|
49
|
+
### Video extraction
|
|
50
|
+
|
|
51
|
+
Use the `video` format on supported video URLs, including YouTube and TikTok. The returned `video` field is a signed URL to the extracted video file.
|
|
52
|
+
|
|
53
|
+
```js
|
|
54
|
+
const doc = await app.scrape('https://www.youtube.com/watch?v=dQw4w9WgXcQ', {
|
|
55
|
+
formats: ['video'],
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
console.log(doc.video);
|
|
59
|
+
```
|
|
60
|
+
|
|
49
61
|
### Parsing uploaded files
|
|
50
62
|
|
|
51
63
|
Use `parse` to upload a file (`html`, `pdf`, `docx`, etc.) as multipart form data and process it through the same parsing pipeline.
|
|
52
|
-
Parse does not support browser-only formats/options like `changeTracking`, `screenshot`, `branding`, `actions`, `waitFor`, `location`, or `mobile`.
|
|
64
|
+
Parse does not support browser-only formats/options like `changeTracking`, `screenshot`, `branding`, `audio`, `video`, `actions`, `waitFor`, `location`, or `mobile`.
|
|
53
65
|
|
|
54
66
|
```js
|
|
55
67
|
const parsed = await app.parse(
|
|
@@ -8,7 +8,7 @@ var require_package = __commonJS({
|
|
|
8
8
|
"package.json"(exports, module) {
|
|
9
9
|
module.exports = {
|
|
10
10
|
name: "@mendable/firecrawl-js",
|
|
11
|
-
version: "4.
|
|
11
|
+
version: "4.23.0",
|
|
12
12
|
description: "JavaScript SDK for Firecrawl API",
|
|
13
13
|
main: "dist/index.js",
|
|
14
14
|
types: "dist/index.d.ts",
|
package/dist/index.cjs
CHANGED
|
@@ -35,7 +35,7 @@ var require_package = __commonJS({
|
|
|
35
35
|
"package.json"(exports2, module2) {
|
|
36
36
|
module2.exports = {
|
|
37
37
|
name: "@mendable/firecrawl-js",
|
|
38
|
-
version: "4.
|
|
38
|
+
version: "4.23.0",
|
|
39
39
|
description: "JavaScript SDK for Firecrawl API",
|
|
40
40
|
main: "dist/index.js",
|
|
41
41
|
types: "dist/index.d.ts",
|
|
@@ -426,6 +426,9 @@ function ensureValidParseFormats(formats) {
|
|
|
426
426
|
if (fmt === "branding") {
|
|
427
427
|
throw new Error("parse does not support branding format");
|
|
428
428
|
}
|
|
429
|
+
if (fmt === "audio" || fmt === "video") {
|
|
430
|
+
throw new Error(`parse does not support ${fmt} format`);
|
|
431
|
+
}
|
|
429
432
|
continue;
|
|
430
433
|
}
|
|
431
434
|
const type = fmt.type;
|
|
@@ -438,6 +441,9 @@ function ensureValidParseFormats(formats) {
|
|
|
438
441
|
if (type === "branding") {
|
|
439
442
|
throw new Error("parse does not support branding format");
|
|
440
443
|
}
|
|
444
|
+
if (type === "audio" || type === "video") {
|
|
445
|
+
throw new Error(`parse does not support ${type} format`);
|
|
446
|
+
}
|
|
441
447
|
if (fmt.type === "json") {
|
|
442
448
|
const j = fmt;
|
|
443
449
|
if (!j.prompt && !j.schema) {
|
package/dist/index.d.cts
CHANGED
|
@@ -4,7 +4,7 @@ import { AxiosResponse, AxiosRequestHeaders } from 'axios';
|
|
|
4
4
|
import { EventEmitter } from 'events';
|
|
5
5
|
import { TypedEventTarget } from 'typescript-event-target';
|
|
6
6
|
|
|
7
|
-
type FormatString = 'markdown' | 'html' | 'rawHtml' | 'links' | 'images' | 'screenshot' | 'summary' | 'changeTracking' | 'json' | 'attributes' | 'branding' | 'audio';
|
|
7
|
+
type FormatString = 'markdown' | 'html' | 'rawHtml' | 'links' | 'images' | 'screenshot' | 'summary' | 'changeTracking' | 'json' | 'attributes' | 'branding' | 'audio' | 'video';
|
|
8
8
|
interface Viewport {
|
|
9
9
|
width: number;
|
|
10
10
|
height: number;
|
|
@@ -55,7 +55,7 @@ interface QueryFormat {
|
|
|
55
55
|
mode?: 'freeform' | 'directQuote';
|
|
56
56
|
}
|
|
57
57
|
type FormatOption = FormatString | Format | JsonFormat | ChangeTrackingFormat | ScreenshotFormat | AttributesFormat | QuestionFormat | HighlightsFormat | QueryFormat;
|
|
58
|
-
type ParseFormatString = Exclude<FormatString, 'screenshot' | 'changeTracking' | 'branding'>;
|
|
58
|
+
type ParseFormatString = Exclude<FormatString, 'screenshot' | 'changeTracking' | 'branding' | 'audio' | 'video'>;
|
|
59
59
|
interface ParseFormat {
|
|
60
60
|
type: ParseFormatString;
|
|
61
61
|
}
|
|
@@ -347,6 +347,7 @@ interface Document {
|
|
|
347
347
|
images?: string[];
|
|
348
348
|
screenshot?: string;
|
|
349
349
|
audio?: string;
|
|
350
|
+
video?: string;
|
|
350
351
|
attributes?: Array<{
|
|
351
352
|
selector: string;
|
|
352
353
|
attribute: string;
|
|
@@ -958,6 +959,7 @@ declare class FirecrawlClient {
|
|
|
958
959
|
* @param file File payload (data, filename, optional contentType).
|
|
959
960
|
* @param options Optional parse options (formats, parsers, etc.).
|
|
960
961
|
* Note: parse does not support changeTracking, screenshot, branding,
|
|
962
|
+
* audio, video,
|
|
961
963
|
* actions, waitFor, location, or mobile options.
|
|
962
964
|
* @returns Parsed document with requested formats.
|
|
963
965
|
*/
|
package/dist/index.d.ts
CHANGED
|
@@ -4,7 +4,7 @@ import { AxiosResponse, AxiosRequestHeaders } from 'axios';
|
|
|
4
4
|
import { EventEmitter } from 'events';
|
|
5
5
|
import { TypedEventTarget } from 'typescript-event-target';
|
|
6
6
|
|
|
7
|
-
type FormatString = 'markdown' | 'html' | 'rawHtml' | 'links' | 'images' | 'screenshot' | 'summary' | 'changeTracking' | 'json' | 'attributes' | 'branding' | 'audio';
|
|
7
|
+
type FormatString = 'markdown' | 'html' | 'rawHtml' | 'links' | 'images' | 'screenshot' | 'summary' | 'changeTracking' | 'json' | 'attributes' | 'branding' | 'audio' | 'video';
|
|
8
8
|
interface Viewport {
|
|
9
9
|
width: number;
|
|
10
10
|
height: number;
|
|
@@ -55,7 +55,7 @@ interface QueryFormat {
|
|
|
55
55
|
mode?: 'freeform' | 'directQuote';
|
|
56
56
|
}
|
|
57
57
|
type FormatOption = FormatString | Format | JsonFormat | ChangeTrackingFormat | ScreenshotFormat | AttributesFormat | QuestionFormat | HighlightsFormat | QueryFormat;
|
|
58
|
-
type ParseFormatString = Exclude<FormatString, 'screenshot' | 'changeTracking' | 'branding'>;
|
|
58
|
+
type ParseFormatString = Exclude<FormatString, 'screenshot' | 'changeTracking' | 'branding' | 'audio' | 'video'>;
|
|
59
59
|
interface ParseFormat {
|
|
60
60
|
type: ParseFormatString;
|
|
61
61
|
}
|
|
@@ -347,6 +347,7 @@ interface Document {
|
|
|
347
347
|
images?: string[];
|
|
348
348
|
screenshot?: string;
|
|
349
349
|
audio?: string;
|
|
350
|
+
video?: string;
|
|
350
351
|
attributes?: Array<{
|
|
351
352
|
selector: string;
|
|
352
353
|
attribute: string;
|
|
@@ -958,6 +959,7 @@ declare class FirecrawlClient {
|
|
|
958
959
|
* @param file File payload (data, filename, optional contentType).
|
|
959
960
|
* @param options Optional parse options (formats, parsers, etc.).
|
|
960
961
|
* Note: parse does not support changeTracking, screenshot, branding,
|
|
962
|
+
* audio, video,
|
|
961
963
|
* actions, waitFor, location, or mobile options.
|
|
962
964
|
* @returns Parsed document with requested formats.
|
|
963
965
|
*/
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
require_package
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-ZR3KTUEQ.js";
|
|
4
4
|
|
|
5
5
|
// src/v2/utils/httpClient.ts
|
|
6
6
|
import axios from "axios";
|
|
@@ -302,6 +302,9 @@ function ensureValidParseFormats(formats) {
|
|
|
302
302
|
if (fmt === "branding") {
|
|
303
303
|
throw new Error("parse does not support branding format");
|
|
304
304
|
}
|
|
305
|
+
if (fmt === "audio" || fmt === "video") {
|
|
306
|
+
throw new Error(`parse does not support ${fmt} format`);
|
|
307
|
+
}
|
|
305
308
|
continue;
|
|
306
309
|
}
|
|
307
310
|
const type = fmt.type;
|
|
@@ -314,6 +317,9 @@ function ensureValidParseFormats(formats) {
|
|
|
314
317
|
if (type === "branding") {
|
|
315
318
|
throw new Error("parse does not support branding format");
|
|
316
319
|
}
|
|
320
|
+
if (type === "audio" || type === "video") {
|
|
321
|
+
throw new Error(`parse does not support ${type} format`);
|
|
322
|
+
}
|
|
317
323
|
if (fmt.type === "json") {
|
|
318
324
|
const j = fmt;
|
|
319
325
|
if (!j.prompt && !j.schema) {
|
|
@@ -2036,7 +2042,7 @@ var FirecrawlApp = class {
|
|
|
2036
2042
|
if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
|
|
2037
2043
|
return process.env.npm_package_version;
|
|
2038
2044
|
}
|
|
2039
|
-
const packageJson = await import("./package-
|
|
2045
|
+
const packageJson = await import("./package-APBHZ5F3.js");
|
|
2040
2046
|
return packageJson.default.version;
|
|
2041
2047
|
} catch (error) {
|
|
2042
2048
|
const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);
|
package/package.json
CHANGED
|
@@ -38,6 +38,24 @@ describe("v2.parse unit", () => {
|
|
|
38
38
|
).rejects.toThrow("parse does not support changeTracking format");
|
|
39
39
|
});
|
|
40
40
|
|
|
41
|
+
test("rejects video format before making requests", async () => {
|
|
42
|
+
const client = new FirecrawlClient({
|
|
43
|
+
apiKey: "test-key",
|
|
44
|
+
apiUrl: "https://localhost:3002",
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
await expect(
|
|
48
|
+
client.parse(
|
|
49
|
+
{
|
|
50
|
+
data: Buffer.from("<html></html>"),
|
|
51
|
+
filename: "upload.html",
|
|
52
|
+
contentType: "text/html",
|
|
53
|
+
},
|
|
54
|
+
{ formats: ["video" as any] },
|
|
55
|
+
),
|
|
56
|
+
).rejects.toThrow("parse does not support video format");
|
|
57
|
+
});
|
|
58
|
+
|
|
41
59
|
test("rejects lockdown option before making requests", async () => {
|
|
42
60
|
const client = new FirecrawlClient({
|
|
43
61
|
apiKey: "test-key",
|
|
@@ -9,6 +9,11 @@ describe("v2 utils: validation", () => {
|
|
|
9
9
|
expect(() => ensureValidFormats(formats)).toThrow(/json format must be an object/i);
|
|
10
10
|
});
|
|
11
11
|
|
|
12
|
+
test("ensureValidFormats: accepts video string format", () => {
|
|
13
|
+
const formats: FormatOption[] = ["markdown", "video"];
|
|
14
|
+
expect(() => ensureValidFormats(formats)).not.toThrow();
|
|
15
|
+
});
|
|
16
|
+
|
|
12
17
|
test("ensureValidFormats: json format requires prompt or schema", () => {
|
|
13
18
|
// Valid cases - should not throw
|
|
14
19
|
const valid1: FormatOption[] = [{ type: "json", prompt: "p" } as any];
|
package/src/v2/client.ts
CHANGED
|
@@ -204,6 +204,7 @@ export class FirecrawlClient {
|
|
|
204
204
|
* @param file File payload (data, filename, optional contentType).
|
|
205
205
|
* @param options Optional parse options (formats, parsers, etc.).
|
|
206
206
|
* Note: parse does not support changeTracking, screenshot, branding,
|
|
207
|
+
* audio, video,
|
|
207
208
|
* actions, waitFor, location, or mobile options.
|
|
208
209
|
* @returns Parsed document with requested formats.
|
|
209
210
|
*/
|
package/src/v2/types.ts
CHANGED
|
@@ -13,7 +13,8 @@ export type FormatString =
|
|
|
13
13
|
| 'json'
|
|
14
14
|
| 'attributes'
|
|
15
15
|
| 'branding'
|
|
16
|
-
| 'audio'
|
|
16
|
+
| 'audio'
|
|
17
|
+
| 'video';
|
|
17
18
|
|
|
18
19
|
export interface Viewport {
|
|
19
20
|
width: number;
|
|
@@ -82,7 +83,7 @@ export type FormatOption =
|
|
|
82
83
|
|
|
83
84
|
export type ParseFormatString = Exclude<
|
|
84
85
|
FormatString,
|
|
85
|
-
'screenshot' | 'changeTracking' | 'branding'
|
|
86
|
+
'screenshot' | 'changeTracking' | 'branding' | 'audio' | 'video'
|
|
86
87
|
>;
|
|
87
88
|
|
|
88
89
|
export interface ParseFormat {
|
|
@@ -458,6 +459,7 @@ export interface Document {
|
|
|
458
459
|
images?: string[];
|
|
459
460
|
screenshot?: string;
|
|
460
461
|
audio?: string;
|
|
462
|
+
video?: string;
|
|
461
463
|
attributes?: Array<{
|
|
462
464
|
selector: string;
|
|
463
465
|
attribute: string;
|
|
@@ -114,6 +114,9 @@ export function ensureValidParseFormats(formats?: ParseFormatOption[]): void {
|
|
|
114
114
|
if (fmt === "branding") {
|
|
115
115
|
throw new Error("parse does not support branding format");
|
|
116
116
|
}
|
|
117
|
+
if (fmt === "audio" || fmt === "video") {
|
|
118
|
+
throw new Error(`parse does not support ${fmt} format`);
|
|
119
|
+
}
|
|
117
120
|
continue;
|
|
118
121
|
}
|
|
119
122
|
|
|
@@ -127,6 +130,9 @@ export function ensureValidParseFormats(formats?: ParseFormatOption[]): void {
|
|
|
127
130
|
if (type === "branding") {
|
|
128
131
|
throw new Error("parse does not support branding format");
|
|
129
132
|
}
|
|
133
|
+
if (type === "audio" || type === "video") {
|
|
134
|
+
throw new Error(`parse does not support ${type} format`);
|
|
135
|
+
}
|
|
130
136
|
|
|
131
137
|
if ((fmt as JsonFormat).type === "json") {
|
|
132
138
|
const j = fmt as JsonFormat;
|