firecrawl 4.18.4 → 4.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -0
- package/dist/{chunk-ZUJQPZTD.js → chunk-JJY4NJXL.js} +1 -1
- package/dist/index.cjs +150 -3
- package/dist/index.d.cts +30 -1
- package/dist/index.d.ts +30 -1
- package/dist/index.js +151 -4
- package/dist/{package-43GY3VT3.js → package-HMEPZJ3J.js} +1 -1
- package/package.json +1 -1
- package/src/__tests__/e2e/v2/parse.test.ts +67 -0
- package/src/__tests__/unit/v2/parse.unit.test.ts +58 -0
- package/src/v2/client.ts +21 -0
- package/src/v2/methods/parse.ts +90 -0
- package/src/v2/types.ts +48 -0
- package/src/v2/utils/httpClient.ts +37 -4
- package/src/v2/utils/validation.ts +93 -1
package/README.md
CHANGED
|
@@ -46,6 +46,26 @@ const url = 'https://example.com';
|
|
|
46
46
|
const scrapedData = await app.scrape(url);
|
|
47
47
|
```
|
|
48
48
|
|
|
49
|
+
### Parsing uploaded files
|
|
50
|
+
|
|
51
|
+
Use `parse` to upload a file (`html`, `pdf`, `docx`, etc.) as multipart form data and process it through the same parsing pipeline.
|
|
52
|
+
Parse does not support browser-only formats/options like `changeTracking`, `screenshot`, `branding`, `actions`, `waitFor`, `location`, or `mobile`.
|
|
53
|
+
|
|
54
|
+
```js
|
|
55
|
+
const parsed = await app.parse(
|
|
56
|
+
{
|
|
57
|
+
data: '<html><body><h1>Hello parse</h1></body></html>',
|
|
58
|
+
filename: 'upload.html',
|
|
59
|
+
contentType: 'text/html',
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
formats: ['markdown'],
|
|
63
|
+
}
|
|
64
|
+
);
|
|
65
|
+
|
|
66
|
+
console.log(parsed.markdown);
|
|
67
|
+
```
|
|
68
|
+
|
|
49
69
|
### Crawling a Website
|
|
50
70
|
|
|
51
71
|
To crawl a website with error handling, use the `crawl` method. It takes the starting URL and optional parameters, including limits and per‑page `scrapeOptions`.
|
|
@@ -8,7 +8,7 @@ var require_package = __commonJS({
|
|
|
8
8
|
"package.json"(exports, module) {
|
|
9
9
|
module.exports = {
|
|
10
10
|
name: "@mendable/firecrawl-js",
|
|
11
|
-
version: "4.
|
|
11
|
+
version: "4.19.0",
|
|
12
12
|
description: "JavaScript SDK for Firecrawl API",
|
|
13
13
|
main: "dist/index.js",
|
|
14
14
|
types: "dist/index.d.ts",
|
package/dist/index.cjs
CHANGED
|
@@ -35,7 +35,7 @@ var require_package = __commonJS({
|
|
|
35
35
|
"package.json"(exports2, module2) {
|
|
36
36
|
module2.exports = {
|
|
37
37
|
name: "@mendable/firecrawl-js",
|
|
38
|
-
version: "4.
|
|
38
|
+
version: "4.19.0",
|
|
39
39
|
description: "JavaScript SDK for Firecrawl API",
|
|
40
40
|
main: "dist/index.js",
|
|
41
41
|
types: "dist/index.d.ts",
|
|
@@ -158,7 +158,6 @@ var HttpClient = class {
|
|
|
158
158
|
baseURL: this.apiUrl,
|
|
159
159
|
timeout: options.timeoutMs ?? 3e5,
|
|
160
160
|
headers: {
|
|
161
|
-
"Content-Type": "application/json",
|
|
162
161
|
Authorization: `Bearer ${this.apiKey}`
|
|
163
162
|
},
|
|
164
163
|
transitional: { clarifyTimeoutError: true }
|
|
@@ -179,13 +178,20 @@ var HttpClient = class {
|
|
|
179
178
|
for (let attempt = 0; attempt < this.maxRetries; attempt++) {
|
|
180
179
|
try {
|
|
181
180
|
const cfg = { ...config };
|
|
182
|
-
|
|
181
|
+
const isFormDataBody = typeof FormData !== "undefined" && cfg.data instanceof FormData;
|
|
182
|
+
const isPlainObjectBody = !isFormDataBody && cfg.data != null && typeof cfg.data === "object" && !Array.isArray(cfg.data);
|
|
183
|
+
if (isPlainObjectBody && cfg.method && ["post", "put", "patch"].includes(cfg.method.toLowerCase())) {
|
|
183
184
|
const data = cfg.data ?? {};
|
|
184
185
|
cfg.data = { ...data, origin: typeof data.origin === "string" && data.origin.includes("mcp") ? data.origin : `js-sdk@${version}` };
|
|
185
186
|
if (typeof data.timeout === "number") {
|
|
186
187
|
cfg.timeout = data.timeout + 5e3;
|
|
187
188
|
}
|
|
188
189
|
}
|
|
190
|
+
if (isFormDataBody) {
|
|
191
|
+
cfg.headers = { ...cfg.headers || {} };
|
|
192
|
+
delete cfg.headers["Content-Type"];
|
|
193
|
+
delete cfg.headers["content-type"];
|
|
194
|
+
}
|
|
189
195
|
const res = await this.instance.request(cfg);
|
|
190
196
|
if (res.status === 502 && attempt < this.maxRetries - 1) {
|
|
191
197
|
await this.sleep(this.backoffFactor * Math.pow(2, attempt));
|
|
@@ -210,6 +216,15 @@ var HttpClient = class {
|
|
|
210
216
|
post(endpoint, body, headers) {
|
|
211
217
|
return this.request({ method: "post", url: endpoint, data: body, headers });
|
|
212
218
|
}
|
|
219
|
+
postMultipart(endpoint, formData, headers, timeoutMs) {
|
|
220
|
+
return this.request({
|
|
221
|
+
method: "post",
|
|
222
|
+
url: endpoint,
|
|
223
|
+
data: formData,
|
|
224
|
+
headers,
|
|
225
|
+
timeout: timeoutMs
|
|
226
|
+
});
|
|
227
|
+
}
|
|
213
228
|
get(endpoint, headers) {
|
|
214
229
|
return this.request({ method: "get", url: endpoint, headers });
|
|
215
230
|
}
|
|
@@ -356,6 +371,76 @@ function ensureValidScrapeOptions(options) {
|
|
|
356
371
|
}
|
|
357
372
|
ensureValidFormats(options.formats);
|
|
358
373
|
}
|
|
374
|
+
function ensureValidParseFormats(formats) {
|
|
375
|
+
if (!formats) return;
|
|
376
|
+
for (const fmt of formats) {
|
|
377
|
+
if (typeof fmt === "string") {
|
|
378
|
+
if (fmt === "json") {
|
|
379
|
+
throw new Error("json format must be an object with { type: 'json', prompt, schema }");
|
|
380
|
+
}
|
|
381
|
+
if (fmt === "screenshot") {
|
|
382
|
+
throw new Error("parse does not support screenshot format");
|
|
383
|
+
}
|
|
384
|
+
if (fmt === "changeTracking") {
|
|
385
|
+
throw new Error("parse does not support changeTracking format");
|
|
386
|
+
}
|
|
387
|
+
if (fmt === "branding") {
|
|
388
|
+
throw new Error("parse does not support branding format");
|
|
389
|
+
}
|
|
390
|
+
continue;
|
|
391
|
+
}
|
|
392
|
+
const type = fmt.type;
|
|
393
|
+
if (type === "changeTracking") {
|
|
394
|
+
throw new Error("parse does not support changeTracking format");
|
|
395
|
+
}
|
|
396
|
+
if (type === "screenshot") {
|
|
397
|
+
throw new Error("parse does not support screenshot format");
|
|
398
|
+
}
|
|
399
|
+
if (type === "branding") {
|
|
400
|
+
throw new Error("parse does not support branding format");
|
|
401
|
+
}
|
|
402
|
+
if (fmt.type === "json") {
|
|
403
|
+
const j = fmt;
|
|
404
|
+
if (!j.prompt && !j.schema) {
|
|
405
|
+
throw new Error("json format requires either 'prompt' or 'schema' (or both)");
|
|
406
|
+
}
|
|
407
|
+
const maybeSchema = j.schema;
|
|
408
|
+
if (isZodSchema(maybeSchema)) {
|
|
409
|
+
j.schema = zodSchemaToJsonSchema(maybeSchema);
|
|
410
|
+
} else if (looksLikeZodShape(maybeSchema)) {
|
|
411
|
+
throw new Error(
|
|
412
|
+
"json format schema appears to be a Zod schema's .shape property. Pass the Zod schema directly (e.g., `schema: MySchema`) instead of `schema: MySchema.shape`. The SDK will automatically convert Zod schemas to JSON Schema format."
|
|
413
|
+
);
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
function ensureValidParseOptions(options) {
|
|
419
|
+
if (!options) return;
|
|
420
|
+
if (options.timeout != null && options.timeout <= 0) {
|
|
421
|
+
throw new Error("timeout must be positive");
|
|
422
|
+
}
|
|
423
|
+
const raw = options;
|
|
424
|
+
if (raw.waitFor !== void 0) {
|
|
425
|
+
throw new Error("parse does not support waitFor");
|
|
426
|
+
}
|
|
427
|
+
if (raw.actions !== void 0) {
|
|
428
|
+
throw new Error("parse does not support actions");
|
|
429
|
+
}
|
|
430
|
+
if (raw.location !== void 0) {
|
|
431
|
+
throw new Error("parse does not support location overrides");
|
|
432
|
+
}
|
|
433
|
+
if (raw.mobile !== void 0) {
|
|
434
|
+
throw new Error("parse does not support mobile rendering");
|
|
435
|
+
}
|
|
436
|
+
if (raw.maxAge !== void 0 || raw.minAge !== void 0 || raw.storeInCache !== void 0 || raw.lockdown !== void 0) {
|
|
437
|
+
throw new Error("parse does not support cache/index options");
|
|
438
|
+
}
|
|
439
|
+
if (raw.proxy !== void 0 && raw.proxy !== "basic" && raw.proxy !== "auto") {
|
|
440
|
+
throw new Error("parse only supports proxy values of 'basic' or 'auto'");
|
|
441
|
+
}
|
|
442
|
+
ensureValidParseFormats(options.formats);
|
|
443
|
+
}
|
|
359
444
|
|
|
360
445
|
// src/v2/utils/errorHandler.ts
|
|
361
446
|
var import_axios2 = require("axios");
|
|
@@ -456,6 +541,65 @@ async function stopInteraction(http, jobId) {
|
|
|
456
541
|
}
|
|
457
542
|
}
|
|
458
543
|
|
|
544
|
+
// src/v2/methods/parse.ts
|
|
545
|
+
function toUploadBlob(input, contentType) {
|
|
546
|
+
if (typeof Blob !== "undefined" && input instanceof Blob) {
|
|
547
|
+
if (contentType && input.type !== contentType) {
|
|
548
|
+
return new Blob([input], { type: contentType });
|
|
549
|
+
}
|
|
550
|
+
return input;
|
|
551
|
+
}
|
|
552
|
+
if (typeof Buffer !== "undefined" && Buffer.isBuffer(input)) {
|
|
553
|
+
return new Blob([input], { type: contentType });
|
|
554
|
+
}
|
|
555
|
+
if (input instanceof ArrayBuffer) {
|
|
556
|
+
return new Blob([input], { type: contentType });
|
|
557
|
+
}
|
|
558
|
+
if (ArrayBuffer.isView(input)) {
|
|
559
|
+
return new Blob([input], { type: contentType });
|
|
560
|
+
}
|
|
561
|
+
if (typeof input === "string") {
|
|
562
|
+
return new Blob([input], { type: contentType ?? "text/plain; charset=utf-8" });
|
|
563
|
+
}
|
|
564
|
+
throw new Error("Unsupported parse file data type");
|
|
565
|
+
}
|
|
566
|
+
async function parse(http, file, options) {
|
|
567
|
+
if (!file || !file.filename || !file.filename.trim()) {
|
|
568
|
+
throw new Error("filename cannot be empty");
|
|
569
|
+
}
|
|
570
|
+
if (file.data == null) {
|
|
571
|
+
throw new Error("file data cannot be empty");
|
|
572
|
+
}
|
|
573
|
+
const blob = toUploadBlob(file.data, file.contentType);
|
|
574
|
+
if (blob.size === 0) {
|
|
575
|
+
throw new Error("file data cannot be empty");
|
|
576
|
+
}
|
|
577
|
+
if (options) ensureValidParseOptions(options);
|
|
578
|
+
const version = getVersion();
|
|
579
|
+
const normalizedOptions = {
|
|
580
|
+
...options ?? {},
|
|
581
|
+
origin: typeof options?.origin === "string" && options.origin.includes("mcp") ? options.origin : options?.origin ?? `js-sdk@${version}`
|
|
582
|
+
};
|
|
583
|
+
const formData = new FormData();
|
|
584
|
+
formData.append("options", JSON.stringify(normalizedOptions));
|
|
585
|
+
formData.append(
|
|
586
|
+
"file",
|
|
587
|
+
toUploadBlob(file.data, file.contentType),
|
|
588
|
+
file.filename.trim()
|
|
589
|
+
);
|
|
590
|
+
const requestTimeoutMs = typeof normalizedOptions.timeout === "number" ? normalizedOptions.timeout + 5e3 : void 0;
|
|
591
|
+
try {
|
|
592
|
+
const res = await http.postMultipart("/v2/parse", formData, void 0, requestTimeoutMs);
|
|
593
|
+
if (res.status !== 200 || !res.data?.success) {
|
|
594
|
+
throwForBadResponse(res, "parse");
|
|
595
|
+
}
|
|
596
|
+
return res.data.data || {};
|
|
597
|
+
} catch (err) {
|
|
598
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "parse");
|
|
599
|
+
throw err;
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
|
|
459
603
|
// src/v2/methods/search.ts
|
|
460
604
|
function prepareSearchPayload(req) {
|
|
461
605
|
if (!req.query || !req.query.trim()) throw new Error("Query cannot be empty");
|
|
@@ -1404,6 +1548,9 @@ var FirecrawlClient = class {
|
|
|
1404
1548
|
async deleteScrapeBrowser(jobId) {
|
|
1405
1549
|
return this.stopInteraction(jobId);
|
|
1406
1550
|
}
|
|
1551
|
+
async parse(file, options) {
|
|
1552
|
+
return parse(this.http, file, options);
|
|
1553
|
+
}
|
|
1407
1554
|
// Search
|
|
1408
1555
|
/**
|
|
1409
1556
|
* Search the web and optionally scrape each result.
|
package/dist/index.d.cts
CHANGED
|
@@ -45,6 +45,11 @@ interface QueryFormat {
|
|
|
45
45
|
prompt: string;
|
|
46
46
|
}
|
|
47
47
|
type FormatOption = FormatString | Format | JsonFormat | ChangeTrackingFormat | ScreenshotFormat | AttributesFormat | QueryFormat;
|
|
48
|
+
type ParseFormatString = Exclude<FormatString, 'screenshot' | 'changeTracking' | 'branding'>;
|
|
49
|
+
interface ParseFormat {
|
|
50
|
+
type: ParseFormatString;
|
|
51
|
+
}
|
|
52
|
+
type ParseFormatOption = ParseFormatString | ParseFormat | JsonFormat | AttributesFormat | QueryFormat;
|
|
48
53
|
interface LocationConfig$1 {
|
|
49
54
|
country?: string;
|
|
50
55
|
languages?: string[];
|
|
@@ -119,6 +124,7 @@ interface ScrapeOptions {
|
|
|
119
124
|
maxAge?: number;
|
|
120
125
|
minAge?: number;
|
|
121
126
|
storeInCache?: boolean;
|
|
127
|
+
lockdown?: boolean;
|
|
122
128
|
profile?: {
|
|
123
129
|
name: string;
|
|
124
130
|
saveChanges?: boolean;
|
|
@@ -126,6 +132,16 @@ interface ScrapeOptions {
|
|
|
126
132
|
integration?: string;
|
|
127
133
|
origin?: string;
|
|
128
134
|
}
|
|
135
|
+
type ParseFileData = Blob | File | Buffer | Uint8Array | ArrayBuffer | string;
|
|
136
|
+
interface ParseFile {
|
|
137
|
+
data: ParseFileData;
|
|
138
|
+
filename: string;
|
|
139
|
+
contentType?: string;
|
|
140
|
+
}
|
|
141
|
+
type ParseOptions = Omit<ScrapeOptions, 'formats' | 'waitFor' | 'mobile' | 'actions' | 'location' | 'maxAge' | 'minAge' | 'storeInCache' | 'lockdown' | 'proxy'> & {
|
|
142
|
+
formats?: ParseFormatOption[];
|
|
143
|
+
proxy?: 'basic' | 'auto';
|
|
144
|
+
};
|
|
129
145
|
interface WebhookConfig {
|
|
130
146
|
url: string;
|
|
131
147
|
headers?: Record<string, string>;
|
|
@@ -645,6 +661,7 @@ declare class HttpClient {
|
|
|
645
661
|
private request;
|
|
646
662
|
private sleep;
|
|
647
663
|
post<T = any>(endpoint: string, body: Record<string, unknown>, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>;
|
|
664
|
+
postMultipart<T = any>(endpoint: string, formData: FormData, headers?: Record<string, string>, timeoutMs?: number): Promise<AxiosResponse<T, any, {}>>;
|
|
648
665
|
get<T = any>(endpoint: string, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>;
|
|
649
666
|
delete<T = any>(endpoint: string, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>;
|
|
650
667
|
prepareHeaders(idempotencyKey?: string): Record<string, string>;
|
|
@@ -796,6 +813,18 @@ declare class FirecrawlClient {
|
|
|
796
813
|
* @deprecated Use stopInteraction().
|
|
797
814
|
*/
|
|
798
815
|
deleteScrapeBrowser(jobId: string): Promise<ScrapeBrowserDeleteResponse>;
|
|
816
|
+
/**
|
|
817
|
+
* Parse an uploaded file via the v2 parse endpoint.
|
|
818
|
+
* @param file File payload (data, filename, optional contentType).
|
|
819
|
+
* @param options Optional parse options (formats, parsers, etc.).
|
|
820
|
+
* Note: parse does not support changeTracking, screenshot, branding,
|
|
821
|
+
* actions, waitFor, location, or mobile options.
|
|
822
|
+
* @returns Parsed document with requested formats.
|
|
823
|
+
*/
|
|
824
|
+
parse<Opts extends ParseOptions>(file: ParseFile, options: Opts): Promise<Omit<Document, "json"> & {
|
|
825
|
+
json?: InferredJsonFromOptions<Opts>;
|
|
826
|
+
}>;
|
|
827
|
+
parse(file: ParseFile, options?: ParseOptions): Promise<Document>;
|
|
799
828
|
/**
|
|
800
829
|
* Search the web and optionally scrape each result.
|
|
801
830
|
* @param query Search query string.
|
|
@@ -1892,4 +1921,4 @@ declare class Firecrawl extends FirecrawlClient {
|
|
|
1892
1921
|
get v1(): FirecrawlApp;
|
|
1893
1922
|
}
|
|
1894
1923
|
|
|
1895
|
-
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, JobTimeoutError, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type PressAction, type QueryFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
|
|
1924
|
+
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, JobTimeoutError, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type ParseFile, type ParseFileData, type ParseFormat, type ParseFormatOption, type ParseFormatString, type ParseOptions, type PressAction, type QueryFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
|
package/dist/index.d.ts
CHANGED
|
@@ -45,6 +45,11 @@ interface QueryFormat {
|
|
|
45
45
|
prompt: string;
|
|
46
46
|
}
|
|
47
47
|
type FormatOption = FormatString | Format | JsonFormat | ChangeTrackingFormat | ScreenshotFormat | AttributesFormat | QueryFormat;
|
|
48
|
+
type ParseFormatString = Exclude<FormatString, 'screenshot' | 'changeTracking' | 'branding'>;
|
|
49
|
+
interface ParseFormat {
|
|
50
|
+
type: ParseFormatString;
|
|
51
|
+
}
|
|
52
|
+
type ParseFormatOption = ParseFormatString | ParseFormat | JsonFormat | AttributesFormat | QueryFormat;
|
|
48
53
|
interface LocationConfig$1 {
|
|
49
54
|
country?: string;
|
|
50
55
|
languages?: string[];
|
|
@@ -119,6 +124,7 @@ interface ScrapeOptions {
|
|
|
119
124
|
maxAge?: number;
|
|
120
125
|
minAge?: number;
|
|
121
126
|
storeInCache?: boolean;
|
|
127
|
+
lockdown?: boolean;
|
|
122
128
|
profile?: {
|
|
123
129
|
name: string;
|
|
124
130
|
saveChanges?: boolean;
|
|
@@ -126,6 +132,16 @@ interface ScrapeOptions {
|
|
|
126
132
|
integration?: string;
|
|
127
133
|
origin?: string;
|
|
128
134
|
}
|
|
135
|
+
type ParseFileData = Blob | File | Buffer | Uint8Array | ArrayBuffer | string;
|
|
136
|
+
interface ParseFile {
|
|
137
|
+
data: ParseFileData;
|
|
138
|
+
filename: string;
|
|
139
|
+
contentType?: string;
|
|
140
|
+
}
|
|
141
|
+
type ParseOptions = Omit<ScrapeOptions, 'formats' | 'waitFor' | 'mobile' | 'actions' | 'location' | 'maxAge' | 'minAge' | 'storeInCache' | 'lockdown' | 'proxy'> & {
|
|
142
|
+
formats?: ParseFormatOption[];
|
|
143
|
+
proxy?: 'basic' | 'auto';
|
|
144
|
+
};
|
|
129
145
|
interface WebhookConfig {
|
|
130
146
|
url: string;
|
|
131
147
|
headers?: Record<string, string>;
|
|
@@ -645,6 +661,7 @@ declare class HttpClient {
|
|
|
645
661
|
private request;
|
|
646
662
|
private sleep;
|
|
647
663
|
post<T = any>(endpoint: string, body: Record<string, unknown>, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>;
|
|
664
|
+
postMultipart<T = any>(endpoint: string, formData: FormData, headers?: Record<string, string>, timeoutMs?: number): Promise<AxiosResponse<T, any, {}>>;
|
|
648
665
|
get<T = any>(endpoint: string, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>;
|
|
649
666
|
delete<T = any>(endpoint: string, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>;
|
|
650
667
|
prepareHeaders(idempotencyKey?: string): Record<string, string>;
|
|
@@ -796,6 +813,18 @@ declare class FirecrawlClient {
|
|
|
796
813
|
* @deprecated Use stopInteraction().
|
|
797
814
|
*/
|
|
798
815
|
deleteScrapeBrowser(jobId: string): Promise<ScrapeBrowserDeleteResponse>;
|
|
816
|
+
/**
|
|
817
|
+
* Parse an uploaded file via the v2 parse endpoint.
|
|
818
|
+
* @param file File payload (data, filename, optional contentType).
|
|
819
|
+
* @param options Optional parse options (formats, parsers, etc.).
|
|
820
|
+
* Note: parse does not support changeTracking, screenshot, branding,
|
|
821
|
+
* actions, waitFor, location, or mobile options.
|
|
822
|
+
* @returns Parsed document with requested formats.
|
|
823
|
+
*/
|
|
824
|
+
parse<Opts extends ParseOptions>(file: ParseFile, options: Opts): Promise<Omit<Document, "json"> & {
|
|
825
|
+
json?: InferredJsonFromOptions<Opts>;
|
|
826
|
+
}>;
|
|
827
|
+
parse(file: ParseFile, options?: ParseOptions): Promise<Document>;
|
|
799
828
|
/**
|
|
800
829
|
* Search the web and optionally scrape each result.
|
|
801
830
|
* @param query Search query string.
|
|
@@ -1892,4 +1921,4 @@ declare class Firecrawl extends FirecrawlClient {
|
|
|
1892
1921
|
get v1(): FirecrawlApp;
|
|
1893
1922
|
}
|
|
1894
1923
|
|
|
1895
|
-
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, JobTimeoutError, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type PressAction, type QueryFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
|
|
1924
|
+
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, JobTimeoutError, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type ParseFile, type ParseFileData, type ParseFormat, type ParseFormatOption, type ParseFormatString, type ParseOptions, type PressAction, type QueryFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
require_package
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-JJY4NJXL.js";
|
|
4
4
|
|
|
5
5
|
// src/v2/utils/httpClient.ts
|
|
6
6
|
import axios from "axios";
|
|
@@ -34,7 +34,6 @@ var HttpClient = class {
|
|
|
34
34
|
baseURL: this.apiUrl,
|
|
35
35
|
timeout: options.timeoutMs ?? 3e5,
|
|
36
36
|
headers: {
|
|
37
|
-
"Content-Type": "application/json",
|
|
38
37
|
Authorization: `Bearer ${this.apiKey}`
|
|
39
38
|
},
|
|
40
39
|
transitional: { clarifyTimeoutError: true }
|
|
@@ -55,13 +54,20 @@ var HttpClient = class {
|
|
|
55
54
|
for (let attempt = 0; attempt < this.maxRetries; attempt++) {
|
|
56
55
|
try {
|
|
57
56
|
const cfg = { ...config };
|
|
58
|
-
|
|
57
|
+
const isFormDataBody = typeof FormData !== "undefined" && cfg.data instanceof FormData;
|
|
58
|
+
const isPlainObjectBody = !isFormDataBody && cfg.data != null && typeof cfg.data === "object" && !Array.isArray(cfg.data);
|
|
59
|
+
if (isPlainObjectBody && cfg.method && ["post", "put", "patch"].includes(cfg.method.toLowerCase())) {
|
|
59
60
|
const data = cfg.data ?? {};
|
|
60
61
|
cfg.data = { ...data, origin: typeof data.origin === "string" && data.origin.includes("mcp") ? data.origin : `js-sdk@${version}` };
|
|
61
62
|
if (typeof data.timeout === "number") {
|
|
62
63
|
cfg.timeout = data.timeout + 5e3;
|
|
63
64
|
}
|
|
64
65
|
}
|
|
66
|
+
if (isFormDataBody) {
|
|
67
|
+
cfg.headers = { ...cfg.headers || {} };
|
|
68
|
+
delete cfg.headers["Content-Type"];
|
|
69
|
+
delete cfg.headers["content-type"];
|
|
70
|
+
}
|
|
65
71
|
const res = await this.instance.request(cfg);
|
|
66
72
|
if (res.status === 502 && attempt < this.maxRetries - 1) {
|
|
67
73
|
await this.sleep(this.backoffFactor * Math.pow(2, attempt));
|
|
@@ -86,6 +92,15 @@ var HttpClient = class {
|
|
|
86
92
|
post(endpoint, body, headers) {
|
|
87
93
|
return this.request({ method: "post", url: endpoint, data: body, headers });
|
|
88
94
|
}
|
|
95
|
+
postMultipart(endpoint, formData, headers, timeoutMs) {
|
|
96
|
+
return this.request({
|
|
97
|
+
method: "post",
|
|
98
|
+
url: endpoint,
|
|
99
|
+
data: formData,
|
|
100
|
+
headers,
|
|
101
|
+
timeout: timeoutMs
|
|
102
|
+
});
|
|
103
|
+
}
|
|
89
104
|
get(endpoint, headers) {
|
|
90
105
|
return this.request({ method: "get", url: endpoint, headers });
|
|
91
106
|
}
|
|
@@ -232,6 +247,76 @@ function ensureValidScrapeOptions(options) {
|
|
|
232
247
|
}
|
|
233
248
|
ensureValidFormats(options.formats);
|
|
234
249
|
}
|
|
250
|
+
function ensureValidParseFormats(formats) {
|
|
251
|
+
if (!formats) return;
|
|
252
|
+
for (const fmt of formats) {
|
|
253
|
+
if (typeof fmt === "string") {
|
|
254
|
+
if (fmt === "json") {
|
|
255
|
+
throw new Error("json format must be an object with { type: 'json', prompt, schema }");
|
|
256
|
+
}
|
|
257
|
+
if (fmt === "screenshot") {
|
|
258
|
+
throw new Error("parse does not support screenshot format");
|
|
259
|
+
}
|
|
260
|
+
if (fmt === "changeTracking") {
|
|
261
|
+
throw new Error("parse does not support changeTracking format");
|
|
262
|
+
}
|
|
263
|
+
if (fmt === "branding") {
|
|
264
|
+
throw new Error("parse does not support branding format");
|
|
265
|
+
}
|
|
266
|
+
continue;
|
|
267
|
+
}
|
|
268
|
+
const type = fmt.type;
|
|
269
|
+
if (type === "changeTracking") {
|
|
270
|
+
throw new Error("parse does not support changeTracking format");
|
|
271
|
+
}
|
|
272
|
+
if (type === "screenshot") {
|
|
273
|
+
throw new Error("parse does not support screenshot format");
|
|
274
|
+
}
|
|
275
|
+
if (type === "branding") {
|
|
276
|
+
throw new Error("parse does not support branding format");
|
|
277
|
+
}
|
|
278
|
+
if (fmt.type === "json") {
|
|
279
|
+
const j = fmt;
|
|
280
|
+
if (!j.prompt && !j.schema) {
|
|
281
|
+
throw new Error("json format requires either 'prompt' or 'schema' (or both)");
|
|
282
|
+
}
|
|
283
|
+
const maybeSchema = j.schema;
|
|
284
|
+
if (isZodSchema(maybeSchema)) {
|
|
285
|
+
j.schema = zodSchemaToJsonSchema(maybeSchema);
|
|
286
|
+
} else if (looksLikeZodShape(maybeSchema)) {
|
|
287
|
+
throw new Error(
|
|
288
|
+
"json format schema appears to be a Zod schema's .shape property. Pass the Zod schema directly (e.g., `schema: MySchema`) instead of `schema: MySchema.shape`. The SDK will automatically convert Zod schemas to JSON Schema format."
|
|
289
|
+
);
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
function ensureValidParseOptions(options) {
|
|
295
|
+
if (!options) return;
|
|
296
|
+
if (options.timeout != null && options.timeout <= 0) {
|
|
297
|
+
throw new Error("timeout must be positive");
|
|
298
|
+
}
|
|
299
|
+
const raw = options;
|
|
300
|
+
if (raw.waitFor !== void 0) {
|
|
301
|
+
throw new Error("parse does not support waitFor");
|
|
302
|
+
}
|
|
303
|
+
if (raw.actions !== void 0) {
|
|
304
|
+
throw new Error("parse does not support actions");
|
|
305
|
+
}
|
|
306
|
+
if (raw.location !== void 0) {
|
|
307
|
+
throw new Error("parse does not support location overrides");
|
|
308
|
+
}
|
|
309
|
+
if (raw.mobile !== void 0) {
|
|
310
|
+
throw new Error("parse does not support mobile rendering");
|
|
311
|
+
}
|
|
312
|
+
if (raw.maxAge !== void 0 || raw.minAge !== void 0 || raw.storeInCache !== void 0 || raw.lockdown !== void 0) {
|
|
313
|
+
throw new Error("parse does not support cache/index options");
|
|
314
|
+
}
|
|
315
|
+
if (raw.proxy !== void 0 && raw.proxy !== "basic" && raw.proxy !== "auto") {
|
|
316
|
+
throw new Error("parse only supports proxy values of 'basic' or 'auto'");
|
|
317
|
+
}
|
|
318
|
+
ensureValidParseFormats(options.formats);
|
|
319
|
+
}
|
|
235
320
|
|
|
236
321
|
// src/v2/utils/errorHandler.ts
|
|
237
322
|
import "axios";
|
|
@@ -332,6 +417,65 @@ async function stopInteraction(http, jobId) {
|
|
|
332
417
|
}
|
|
333
418
|
}
|
|
334
419
|
|
|
420
|
+
// src/v2/methods/parse.ts
|
|
421
|
+
function toUploadBlob(input, contentType) {
|
|
422
|
+
if (typeof Blob !== "undefined" && input instanceof Blob) {
|
|
423
|
+
if (contentType && input.type !== contentType) {
|
|
424
|
+
return new Blob([input], { type: contentType });
|
|
425
|
+
}
|
|
426
|
+
return input;
|
|
427
|
+
}
|
|
428
|
+
if (typeof Buffer !== "undefined" && Buffer.isBuffer(input)) {
|
|
429
|
+
return new Blob([input], { type: contentType });
|
|
430
|
+
}
|
|
431
|
+
if (input instanceof ArrayBuffer) {
|
|
432
|
+
return new Blob([input], { type: contentType });
|
|
433
|
+
}
|
|
434
|
+
if (ArrayBuffer.isView(input)) {
|
|
435
|
+
return new Blob([input], { type: contentType });
|
|
436
|
+
}
|
|
437
|
+
if (typeof input === "string") {
|
|
438
|
+
return new Blob([input], { type: contentType ?? "text/plain; charset=utf-8" });
|
|
439
|
+
}
|
|
440
|
+
throw new Error("Unsupported parse file data type");
|
|
441
|
+
}
|
|
442
|
+
async function parse(http, file, options) {
|
|
443
|
+
if (!file || !file.filename || !file.filename.trim()) {
|
|
444
|
+
throw new Error("filename cannot be empty");
|
|
445
|
+
}
|
|
446
|
+
if (file.data == null) {
|
|
447
|
+
throw new Error("file data cannot be empty");
|
|
448
|
+
}
|
|
449
|
+
const blob = toUploadBlob(file.data, file.contentType);
|
|
450
|
+
if (blob.size === 0) {
|
|
451
|
+
throw new Error("file data cannot be empty");
|
|
452
|
+
}
|
|
453
|
+
if (options) ensureValidParseOptions(options);
|
|
454
|
+
const version = getVersion();
|
|
455
|
+
const normalizedOptions = {
|
|
456
|
+
...options ?? {},
|
|
457
|
+
origin: typeof options?.origin === "string" && options.origin.includes("mcp") ? options.origin : options?.origin ?? `js-sdk@${version}`
|
|
458
|
+
};
|
|
459
|
+
const formData = new FormData();
|
|
460
|
+
formData.append("options", JSON.stringify(normalizedOptions));
|
|
461
|
+
formData.append(
|
|
462
|
+
"file",
|
|
463
|
+
toUploadBlob(file.data, file.contentType),
|
|
464
|
+
file.filename.trim()
|
|
465
|
+
);
|
|
466
|
+
const requestTimeoutMs = typeof normalizedOptions.timeout === "number" ? normalizedOptions.timeout + 5e3 : void 0;
|
|
467
|
+
try {
|
|
468
|
+
const res = await http.postMultipart("/v2/parse", formData, void 0, requestTimeoutMs);
|
|
469
|
+
if (res.status !== 200 || !res.data?.success) {
|
|
470
|
+
throwForBadResponse(res, "parse");
|
|
471
|
+
}
|
|
472
|
+
return res.data.data || {};
|
|
473
|
+
} catch (err) {
|
|
474
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "parse");
|
|
475
|
+
throw err;
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
|
|
335
479
|
// src/v2/methods/search.ts
|
|
336
480
|
function prepareSearchPayload(req) {
|
|
337
481
|
if (!req.query || !req.query.trim()) throw new Error("Query cannot be empty");
|
|
@@ -1280,6 +1424,9 @@ var FirecrawlClient = class {
|
|
|
1280
1424
|
async deleteScrapeBrowser(jobId) {
|
|
1281
1425
|
return this.stopInteraction(jobId);
|
|
1282
1426
|
}
|
|
1427
|
+
async parse(file, options) {
|
|
1428
|
+
return parse(this.http, file, options);
|
|
1429
|
+
}
|
|
1283
1430
|
// Search
|
|
1284
1431
|
/**
|
|
1285
1432
|
* Search the web and optionally scrape each result.
|
|
@@ -1560,7 +1707,7 @@ var FirecrawlApp = class {
|
|
|
1560
1707
|
if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
|
|
1561
1708
|
return process.env.npm_package_version;
|
|
1562
1709
|
}
|
|
1563
|
-
const packageJson = await import("./package-
|
|
1710
|
+
const packageJson = await import("./package-HMEPZJ3J.js");
|
|
1564
1711
|
return packageJson.default.version;
|
|
1565
1712
|
} catch (error) {
|
|
1566
1713
|
const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);
|
package/package.json
CHANGED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import Firecrawl from "../../../index";
|
|
2
|
+
import { config } from "dotenv";
|
|
3
|
+
import { getIdentity, getApiUrl } from "./utils/idmux";
|
|
4
|
+
import { describe, test, expect, beforeAll } from "@jest/globals";
|
|
5
|
+
|
|
6
|
+
config();
|
|
7
|
+
|
|
8
|
+
const API_URL = getApiUrl();
|
|
9
|
+
let client: Firecrawl;
|
|
10
|
+
|
|
11
|
+
beforeAll(async () => {
|
|
12
|
+
const { apiKey } = await getIdentity({ name: "js-e2e-parse" });
|
|
13
|
+
client = new Firecrawl({ apiKey, apiUrl: API_URL });
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
describe("v2.parse e2e", () => {
|
|
17
|
+
test(
|
|
18
|
+
"parses uploaded HTML files",
|
|
19
|
+
async () => {
|
|
20
|
+
if (!client) throw new Error();
|
|
21
|
+
|
|
22
|
+
const doc = await client.parse(
|
|
23
|
+
{
|
|
24
|
+
data: `
|
|
25
|
+
<!DOCTYPE html>
|
|
26
|
+
<html>
|
|
27
|
+
<body>
|
|
28
|
+
<h1>JS SDK Parse E2E</h1>
|
|
29
|
+
<p>multipart upload body</p>
|
|
30
|
+
</body>
|
|
31
|
+
</html>
|
|
32
|
+
`,
|
|
33
|
+
filename: "parse-e2e.html",
|
|
34
|
+
contentType: "text/html",
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
formats: ["markdown"],
|
|
38
|
+
},
|
|
39
|
+
);
|
|
40
|
+
|
|
41
|
+
expect(doc.markdown).toContain("JS SDK Parse E2E");
|
|
42
|
+
expect(doc.metadata?.creditsUsed).toBe(1);
|
|
43
|
+
},
|
|
44
|
+
60_000,
|
|
45
|
+
);
|
|
46
|
+
|
|
47
|
+
test(
|
|
48
|
+
"returns errors for unsupported file types",
|
|
49
|
+
async () => {
|
|
50
|
+
if (!client) throw new Error();
|
|
51
|
+
|
|
52
|
+
await expect(
|
|
53
|
+
client.parse(
|
|
54
|
+
{
|
|
55
|
+
data: Buffer.from("image-data"),
|
|
56
|
+
filename: "parse-e2e.png",
|
|
57
|
+
contentType: "image/png",
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
formats: ["markdown"],
|
|
61
|
+
},
|
|
62
|
+
),
|
|
63
|
+
).rejects.toThrow();
|
|
64
|
+
},
|
|
65
|
+
60_000,
|
|
66
|
+
);
|
|
67
|
+
});
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { describe, test, expect } from "@jest/globals";
|
|
2
|
+
import { FirecrawlClient } from "../../../v2/client";
|
|
3
|
+
|
|
4
|
+
describe("v2.parse unit", () => {
|
|
5
|
+
test("rejects empty filenames before making requests", async () => {
|
|
6
|
+
const client = new FirecrawlClient({
|
|
7
|
+
apiKey: "test-key",
|
|
8
|
+
apiUrl: "https://localhost:3002",
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
await expect(
|
|
12
|
+
client.parse(
|
|
13
|
+
{
|
|
14
|
+
data: "<html><body>test</body></html>",
|
|
15
|
+
filename: " ",
|
|
16
|
+
contentType: "text/html",
|
|
17
|
+
},
|
|
18
|
+
{ formats: ["markdown"] },
|
|
19
|
+
),
|
|
20
|
+
).rejects.toThrow("filename cannot be empty");
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
test("rejects changeTracking format before making requests", async () => {
|
|
24
|
+
const client = new FirecrawlClient({
|
|
25
|
+
apiKey: "test-key",
|
|
26
|
+
apiUrl: "https://localhost:3002",
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
await expect(
|
|
30
|
+
client.parse(
|
|
31
|
+
{
|
|
32
|
+
data: "<html><body>test</body></html>",
|
|
33
|
+
filename: "upload.html",
|
|
34
|
+
contentType: "text/html",
|
|
35
|
+
},
|
|
36
|
+
{ formats: ["markdown", { type: "changeTracking" } as any] },
|
|
37
|
+
),
|
|
38
|
+
).rejects.toThrow("parse does not support changeTracking format");
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
test("rejects lockdown option before making requests", async () => {
|
|
42
|
+
const client = new FirecrawlClient({
|
|
43
|
+
apiKey: "test-key",
|
|
44
|
+
apiUrl: "https://localhost:3002",
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
await expect(
|
|
48
|
+
client.parse(
|
|
49
|
+
{
|
|
50
|
+
data: "<html><body>test</body></html>",
|
|
51
|
+
filename: "upload.html",
|
|
52
|
+
contentType: "text/html",
|
|
53
|
+
},
|
|
54
|
+
{ formats: ["markdown"], lockdown: true } as any,
|
|
55
|
+
),
|
|
56
|
+
).rejects.toThrow("parse does not support cache/index options");
|
|
57
|
+
});
|
|
58
|
+
});
|
package/src/v2/client.ts
CHANGED
|
@@ -4,6 +4,7 @@ import {
|
|
|
4
4
|
interact as interactMethod,
|
|
5
5
|
stopInteraction as stopInteractionMethod,
|
|
6
6
|
} from "./methods/scrape";
|
|
7
|
+
import { parse as parseMethod } from "./methods/parse";
|
|
7
8
|
import { search } from "./methods/search";
|
|
8
9
|
import { map as mapMethod } from "./methods/map";
|
|
9
10
|
import {
|
|
@@ -33,6 +34,8 @@ import {
|
|
|
33
34
|
import { getConcurrency, getCreditUsage, getQueueStatus, getTokenUsage, getCreditUsageHistorical, getTokenUsageHistorical } from "./methods/usage";
|
|
34
35
|
import type {
|
|
35
36
|
Document,
|
|
37
|
+
ParseFile,
|
|
38
|
+
ParseOptions,
|
|
36
39
|
ScrapeOptions,
|
|
37
40
|
SearchData,
|
|
38
41
|
SearchRequest,
|
|
@@ -177,6 +180,24 @@ export class FirecrawlClient {
|
|
|
177
180
|
return this.stopInteraction(jobId);
|
|
178
181
|
}
|
|
179
182
|
|
|
183
|
+
// Parse
|
|
184
|
+
/**
|
|
185
|
+
* Parse an uploaded file via the v2 parse endpoint.
|
|
186
|
+
* @param file File payload (data, filename, optional contentType).
|
|
187
|
+
* @param options Optional parse options (formats, parsers, etc.).
|
|
188
|
+
* Note: parse does not support changeTracking, screenshot, branding,
|
|
189
|
+
* actions, waitFor, location, or mobile options.
|
|
190
|
+
* @returns Parsed document with requested formats.
|
|
191
|
+
*/
|
|
192
|
+
async parse<Opts extends ParseOptions>(
|
|
193
|
+
file: ParseFile,
|
|
194
|
+
options: Opts
|
|
195
|
+
): Promise<Omit<Document, "json"> & { json?: InferredJsonFromOptions<Opts> }>;
|
|
196
|
+
async parse(file: ParseFile, options?: ParseOptions): Promise<Document>;
|
|
197
|
+
async parse(file: ParseFile, options?: ParseOptions): Promise<Document> {
|
|
198
|
+
return parseMethod(this.http, file, options);
|
|
199
|
+
}
|
|
200
|
+
|
|
180
201
|
// Search
|
|
181
202
|
/**
|
|
182
203
|
* Search the web and optionally scrape each result.
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import { type Document, type ParseFile, type ParseOptions } from "../types";
|
|
2
|
+
import { HttpClient } from "../utils/httpClient";
|
|
3
|
+
import { ensureValidParseOptions } from "../utils/validation";
|
|
4
|
+
import { throwForBadResponse, normalizeAxiosError } from "../utils/errorHandler";
|
|
5
|
+
import { getVersion } from "../utils/getVersion";
|
|
6
|
+
|
|
7
|
+
function toUploadBlob(input: ParseFile["data"], contentType?: string): Blob {
|
|
8
|
+
if (typeof Blob !== "undefined" && input instanceof Blob) {
|
|
9
|
+
if (contentType && input.type !== contentType) {
|
|
10
|
+
return new Blob([input], { type: contentType });
|
|
11
|
+
}
|
|
12
|
+
return input;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
if (typeof Buffer !== "undefined" && Buffer.isBuffer(input)) {
|
|
16
|
+
return new Blob([input], { type: contentType });
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
if (input instanceof ArrayBuffer) {
|
|
20
|
+
return new Blob([input], { type: contentType });
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
if (ArrayBuffer.isView(input)) {
|
|
24
|
+
return new Blob([input], { type: contentType });
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
if (typeof input === "string") {
|
|
28
|
+
return new Blob([input], { type: contentType ?? "text/plain; charset=utf-8" });
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
throw new Error("Unsupported parse file data type");
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export async function parse(
|
|
35
|
+
http: HttpClient,
|
|
36
|
+
file: ParseFile,
|
|
37
|
+
options?: ParseOptions,
|
|
38
|
+
): Promise<Document> {
|
|
39
|
+
if (!file || !file.filename || !file.filename.trim()) {
|
|
40
|
+
throw new Error("filename cannot be empty");
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if (file.data == null) {
|
|
44
|
+
throw new Error("file data cannot be empty");
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const blob = toUploadBlob(file.data, file.contentType);
|
|
48
|
+
if (blob.size === 0) {
|
|
49
|
+
throw new Error("file data cannot be empty");
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (options) ensureValidParseOptions(options);
|
|
53
|
+
|
|
54
|
+
const version = getVersion();
|
|
55
|
+
const normalizedOptions: ParseOptions = {
|
|
56
|
+
...(options ?? {}),
|
|
57
|
+
origin:
|
|
58
|
+
typeof options?.origin === "string" && options.origin.includes("mcp")
|
|
59
|
+
? options.origin
|
|
60
|
+
: options?.origin ?? `js-sdk@${version}`,
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
const formData = new FormData();
|
|
64
|
+
formData.append("options", JSON.stringify(normalizedOptions));
|
|
65
|
+
formData.append(
|
|
66
|
+
"file",
|
|
67
|
+
toUploadBlob(file.data, file.contentType),
|
|
68
|
+
file.filename.trim(),
|
|
69
|
+
);
|
|
70
|
+
|
|
71
|
+
const requestTimeoutMs =
|
|
72
|
+
typeof normalizedOptions.timeout === "number"
|
|
73
|
+
? normalizedOptions.timeout + 5000
|
|
74
|
+
: undefined;
|
|
75
|
+
|
|
76
|
+
try {
|
|
77
|
+
const res = await http.postMultipart<{
|
|
78
|
+
success: boolean;
|
|
79
|
+
data?: Document;
|
|
80
|
+
error?: string;
|
|
81
|
+
}>("/v2/parse", formData, undefined, requestTimeoutMs);
|
|
82
|
+
if (res.status !== 200 || !res.data?.success) {
|
|
83
|
+
throwForBadResponse(res, "parse");
|
|
84
|
+
}
|
|
85
|
+
return (res.data.data || {}) as Document;
|
|
86
|
+
} catch (err: any) {
|
|
87
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "parse");
|
|
88
|
+
throw err;
|
|
89
|
+
}
|
|
90
|
+
}
|
package/src/v2/types.ts
CHANGED
|
@@ -66,6 +66,22 @@ export type FormatOption =
|
|
|
66
66
|
| AttributesFormat
|
|
67
67
|
| QueryFormat;
|
|
68
68
|
|
|
69
|
+
export type ParseFormatString = Exclude<
|
|
70
|
+
FormatString,
|
|
71
|
+
'screenshot' | 'changeTracking' | 'branding'
|
|
72
|
+
>;
|
|
73
|
+
|
|
74
|
+
export interface ParseFormat {
|
|
75
|
+
type: ParseFormatString;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export type ParseFormatOption =
|
|
79
|
+
| ParseFormatString
|
|
80
|
+
| ParseFormat
|
|
81
|
+
| JsonFormat
|
|
82
|
+
| AttributesFormat
|
|
83
|
+
| QueryFormat;
|
|
84
|
+
|
|
69
85
|
export interface LocationConfig {
|
|
70
86
|
country?: string;
|
|
71
87
|
languages?: string[];
|
|
@@ -164,6 +180,7 @@ export interface ScrapeOptions {
|
|
|
164
180
|
maxAge?: number;
|
|
165
181
|
minAge?: number;
|
|
166
182
|
storeInCache?: boolean;
|
|
183
|
+
lockdown?: boolean;
|
|
167
184
|
profile?: {
|
|
168
185
|
name: string;
|
|
169
186
|
saveChanges?: boolean;
|
|
@@ -172,6 +189,37 @@ export interface ScrapeOptions {
|
|
|
172
189
|
origin?: string;
|
|
173
190
|
}
|
|
174
191
|
|
|
192
|
+
export type ParseFileData =
|
|
193
|
+
| Blob
|
|
194
|
+
| File
|
|
195
|
+
| Buffer
|
|
196
|
+
| Uint8Array
|
|
197
|
+
| ArrayBuffer
|
|
198
|
+
| string;
|
|
199
|
+
|
|
200
|
+
export interface ParseFile {
|
|
201
|
+
data: ParseFileData;
|
|
202
|
+
filename: string;
|
|
203
|
+
contentType?: string;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
export type ParseOptions = Omit<
|
|
207
|
+
ScrapeOptions,
|
|
208
|
+
| 'formats'
|
|
209
|
+
| 'waitFor'
|
|
210
|
+
| 'mobile'
|
|
211
|
+
| 'actions'
|
|
212
|
+
| 'location'
|
|
213
|
+
| 'maxAge'
|
|
214
|
+
| 'minAge'
|
|
215
|
+
| 'storeInCache'
|
|
216
|
+
| 'lockdown'
|
|
217
|
+
| 'proxy'
|
|
218
|
+
> & {
|
|
219
|
+
formats?: ParseFormatOption[];
|
|
220
|
+
proxy?: 'basic' | 'auto';
|
|
221
|
+
};
|
|
222
|
+
|
|
175
223
|
export interface WebhookConfig {
|
|
176
224
|
url: string;
|
|
177
225
|
headers?: Record<string, string>;
|
|
@@ -25,7 +25,6 @@ export class HttpClient {
|
|
|
25
25
|
baseURL: this.apiUrl,
|
|
26
26
|
timeout: options.timeoutMs ?? 300000,
|
|
27
27
|
headers: {
|
|
28
|
-
"Content-Type": "application/json",
|
|
29
28
|
Authorization: `Bearer ${this.apiKey}`,
|
|
30
29
|
},
|
|
31
30
|
transitional: { clarifyTimeoutError: true },
|
|
@@ -50,16 +49,35 @@ export class HttpClient {
|
|
|
50
49
|
for (let attempt = 0; attempt < this.maxRetries; attempt++) {
|
|
51
50
|
try {
|
|
52
51
|
const cfg: AxiosRequestConfig = { ...config };
|
|
53
|
-
|
|
54
|
-
|
|
52
|
+
const isFormDataBody =
|
|
53
|
+
typeof FormData !== "undefined" && cfg.data instanceof FormData;
|
|
54
|
+
const isPlainObjectBody =
|
|
55
|
+
!isFormDataBody &&
|
|
56
|
+
cfg.data != null &&
|
|
57
|
+
typeof cfg.data === "object" &&
|
|
58
|
+
!Array.isArray(cfg.data);
|
|
59
|
+
|
|
60
|
+
// For JSON POST/PUT/PATCH, ensure origin is present in body
|
|
61
|
+
if (
|
|
62
|
+
isPlainObjectBody &&
|
|
63
|
+
cfg.method &&
|
|
64
|
+
["post", "put", "patch"].includes(cfg.method.toLowerCase())
|
|
65
|
+
) {
|
|
55
66
|
const data = (cfg.data ?? {}) as Record<string, unknown>;
|
|
56
67
|
cfg.data = { ...data, origin: typeof data.origin === "string" && data.origin.includes("mcp") ? data.origin : `js-sdk@${version}` };
|
|
57
|
-
|
|
68
|
+
|
|
58
69
|
// If timeout is specified in the body, use it to override the request timeout
|
|
59
70
|
if (typeof data.timeout === "number") {
|
|
60
71
|
cfg.timeout = data.timeout + 5000;
|
|
61
72
|
}
|
|
62
73
|
}
|
|
74
|
+
|
|
75
|
+
if (isFormDataBody) {
|
|
76
|
+
cfg.headers = { ...(cfg.headers || {}) };
|
|
77
|
+
delete (cfg.headers as Record<string, unknown>)["Content-Type"];
|
|
78
|
+
delete (cfg.headers as Record<string, unknown>)["content-type"];
|
|
79
|
+
}
|
|
80
|
+
|
|
63
81
|
const res = await this.instance.request<T>(cfg);
|
|
64
82
|
if (res.status === 502 && attempt < this.maxRetries - 1) {
|
|
65
83
|
await this.sleep(this.backoffFactor * Math.pow(2, attempt));
|
|
@@ -87,6 +105,21 @@ export class HttpClient {
|
|
|
87
105
|
return this.request<T>({ method: "post", url: endpoint, data: body, headers });
|
|
88
106
|
}
|
|
89
107
|
|
|
108
|
+
postMultipart<T = any>(
|
|
109
|
+
endpoint: string,
|
|
110
|
+
formData: FormData,
|
|
111
|
+
headers?: Record<string, string>,
|
|
112
|
+
timeoutMs?: number,
|
|
113
|
+
) {
|
|
114
|
+
return this.request<T>({
|
|
115
|
+
method: "post",
|
|
116
|
+
url: endpoint,
|
|
117
|
+
data: formData,
|
|
118
|
+
headers,
|
|
119
|
+
timeout: timeoutMs,
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
|
|
90
123
|
get<T = any>(endpoint: string, headers?: Record<string, string>) {
|
|
91
124
|
return this.request<T>({ method: "get", url: endpoint, headers });
|
|
92
125
|
}
|
|
@@ -1,4 +1,12 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {
|
|
2
|
+
type ChangeTrackingFormat,
|
|
3
|
+
type FormatOption,
|
|
4
|
+
type JsonFormat,
|
|
5
|
+
type ParseFormatOption,
|
|
6
|
+
type ParseOptions,
|
|
7
|
+
type ScrapeOptions,
|
|
8
|
+
type ScreenshotFormat,
|
|
9
|
+
} from "../types";
|
|
2
10
|
import { isZodSchema, zodSchemaToJsonSchema, looksLikeZodShape } from "../../utils/zodSchemaToJson";
|
|
3
11
|
|
|
4
12
|
export function ensureValidFormats(formats?: FormatOption[]): void {
|
|
@@ -62,3 +70,87 @@ export function ensureValidScrapeOptions(options?: ScrapeOptions): void {
|
|
|
62
70
|
ensureValidFormats(options.formats);
|
|
63
71
|
}
|
|
64
72
|
|
|
73
|
+
export function ensureValidParseFormats(formats?: ParseFormatOption[]): void {
|
|
74
|
+
if (!formats) return;
|
|
75
|
+
|
|
76
|
+
for (const fmt of formats) {
|
|
77
|
+
if (typeof fmt === "string") {
|
|
78
|
+
if (fmt === "json") {
|
|
79
|
+
throw new Error("json format must be an object with { type: 'json', prompt, schema }");
|
|
80
|
+
}
|
|
81
|
+
if (fmt === "screenshot") {
|
|
82
|
+
throw new Error("parse does not support screenshot format");
|
|
83
|
+
}
|
|
84
|
+
if (fmt === "changeTracking") {
|
|
85
|
+
throw new Error("parse does not support changeTracking format");
|
|
86
|
+
}
|
|
87
|
+
if (fmt === "branding") {
|
|
88
|
+
throw new Error("parse does not support branding format");
|
|
89
|
+
}
|
|
90
|
+
continue;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const type = (fmt as any).type;
|
|
94
|
+
if (type === "changeTracking") {
|
|
95
|
+
throw new Error("parse does not support changeTracking format");
|
|
96
|
+
}
|
|
97
|
+
if (type === "screenshot") {
|
|
98
|
+
throw new Error("parse does not support screenshot format");
|
|
99
|
+
}
|
|
100
|
+
if (type === "branding") {
|
|
101
|
+
throw new Error("parse does not support branding format");
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
if ((fmt as JsonFormat).type === "json") {
|
|
105
|
+
const j = fmt as JsonFormat;
|
|
106
|
+
if (!j.prompt && !j.schema) {
|
|
107
|
+
throw new Error("json format requires either 'prompt' or 'schema' (or both)");
|
|
108
|
+
}
|
|
109
|
+
const maybeSchema = j.schema;
|
|
110
|
+
if (isZodSchema(maybeSchema)) {
|
|
111
|
+
(j as any).schema = zodSchemaToJsonSchema(maybeSchema);
|
|
112
|
+
} else if (looksLikeZodShape(maybeSchema)) {
|
|
113
|
+
throw new Error(
|
|
114
|
+
"json format schema appears to be a Zod schema's .shape property. " +
|
|
115
|
+
"Pass the Zod schema directly (e.g., `schema: MySchema`) instead of `schema: MySchema.shape`. " +
|
|
116
|
+
"The SDK will automatically convert Zod schemas to JSON Schema format."
|
|
117
|
+
);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
export function ensureValidParseOptions(options?: ParseOptions): void {
|
|
124
|
+
if (!options) return;
|
|
125
|
+
if (options.timeout != null && options.timeout <= 0) {
|
|
126
|
+
throw new Error("timeout must be positive");
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const raw = options as Record<string, unknown>;
|
|
130
|
+
if (raw.waitFor !== undefined) {
|
|
131
|
+
throw new Error("parse does not support waitFor");
|
|
132
|
+
}
|
|
133
|
+
if (raw.actions !== undefined) {
|
|
134
|
+
throw new Error("parse does not support actions");
|
|
135
|
+
}
|
|
136
|
+
if (raw.location !== undefined) {
|
|
137
|
+
throw new Error("parse does not support location overrides");
|
|
138
|
+
}
|
|
139
|
+
if (raw.mobile !== undefined) {
|
|
140
|
+
throw new Error("parse does not support mobile rendering");
|
|
141
|
+
}
|
|
142
|
+
if (
|
|
143
|
+
raw.maxAge !== undefined ||
|
|
144
|
+
raw.minAge !== undefined ||
|
|
145
|
+
raw.storeInCache !== undefined ||
|
|
146
|
+
raw.lockdown !== undefined
|
|
147
|
+
) {
|
|
148
|
+
throw new Error("parse does not support cache/index options");
|
|
149
|
+
}
|
|
150
|
+
if (raw.proxy !== undefined && raw.proxy !== "basic" && raw.proxy !== "auto") {
|
|
151
|
+
throw new Error("parse only supports proxy values of 'basic' or 'auto'");
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
ensureValidParseFormats(options.formats);
|
|
155
|
+
}
|
|
156
|
+
|