firecrawl 4.18.4 → 4.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -46,6 +46,26 @@ const url = 'https://example.com';
46
46
  const scrapedData = await app.scrape(url);
47
47
  ```
48
48
 
49
+ ### Parsing uploaded files
50
+
51
+ Use `parse` to upload a file (`html`, `pdf`, `docx`, etc.) as multipart form data and process it through the same parsing pipeline.
52
+ Parse does not support browser-only formats/options like `changeTracking`, `screenshot`, `branding`, `actions`, `waitFor`, `location`, or `mobile`.
53
+
54
+ ```js
55
+ const parsed = await app.parse(
56
+ {
57
+ data: '<html><body><h1>Hello parse</h1></body></html>',
58
+ filename: 'upload.html',
59
+ contentType: 'text/html',
60
+ },
61
+ {
62
+ formats: ['markdown'],
63
+ }
64
+ );
65
+
66
+ console.log(parsed.markdown);
67
+ ```
68
+
49
69
  ### Crawling a Website
50
70
 
51
71
  To crawl a website with error handling, use the `crawl` method. It takes the starting URL and optional parameters, including limits and per‑page `scrapeOptions`.
@@ -8,7 +8,7 @@ var require_package = __commonJS({
8
8
  "package.json"(exports, module) {
9
9
  module.exports = {
10
10
  name: "@mendable/firecrawl-js",
11
- version: "4.18.4",
11
+ version: "4.19.0",
12
12
  description: "JavaScript SDK for Firecrawl API",
13
13
  main: "dist/index.js",
14
14
  types: "dist/index.d.ts",
package/dist/index.cjs CHANGED
@@ -35,7 +35,7 @@ var require_package = __commonJS({
35
35
  "package.json"(exports2, module2) {
36
36
  module2.exports = {
37
37
  name: "@mendable/firecrawl-js",
38
- version: "4.18.4",
38
+ version: "4.19.0",
39
39
  description: "JavaScript SDK for Firecrawl API",
40
40
  main: "dist/index.js",
41
41
  types: "dist/index.d.ts",
@@ -158,7 +158,6 @@ var HttpClient = class {
158
158
  baseURL: this.apiUrl,
159
159
  timeout: options.timeoutMs ?? 3e5,
160
160
  headers: {
161
- "Content-Type": "application/json",
162
161
  Authorization: `Bearer ${this.apiKey}`
163
162
  },
164
163
  transitional: { clarifyTimeoutError: true }
@@ -179,13 +178,20 @@ var HttpClient = class {
179
178
  for (let attempt = 0; attempt < this.maxRetries; attempt++) {
180
179
  try {
181
180
  const cfg = { ...config };
182
- if (cfg.method && ["post", "put", "patch"].includes(cfg.method.toLowerCase())) {
181
+ const isFormDataBody = typeof FormData !== "undefined" && cfg.data instanceof FormData;
182
+ const isPlainObjectBody = !isFormDataBody && cfg.data != null && typeof cfg.data === "object" && !Array.isArray(cfg.data);
183
+ if (isPlainObjectBody && cfg.method && ["post", "put", "patch"].includes(cfg.method.toLowerCase())) {
183
184
  const data = cfg.data ?? {};
184
185
  cfg.data = { ...data, origin: typeof data.origin === "string" && data.origin.includes("mcp") ? data.origin : `js-sdk@${version}` };
185
186
  if (typeof data.timeout === "number") {
186
187
  cfg.timeout = data.timeout + 5e3;
187
188
  }
188
189
  }
190
+ if (isFormDataBody) {
191
+ cfg.headers = { ...cfg.headers || {} };
192
+ delete cfg.headers["Content-Type"];
193
+ delete cfg.headers["content-type"];
194
+ }
189
195
  const res = await this.instance.request(cfg);
190
196
  if (res.status === 502 && attempt < this.maxRetries - 1) {
191
197
  await this.sleep(this.backoffFactor * Math.pow(2, attempt));
@@ -210,6 +216,15 @@ var HttpClient = class {
210
216
  post(endpoint, body, headers) {
211
217
  return this.request({ method: "post", url: endpoint, data: body, headers });
212
218
  }
219
+ postMultipart(endpoint, formData, headers, timeoutMs) {
220
+ return this.request({
221
+ method: "post",
222
+ url: endpoint,
223
+ data: formData,
224
+ headers,
225
+ timeout: timeoutMs
226
+ });
227
+ }
213
228
  get(endpoint, headers) {
214
229
  return this.request({ method: "get", url: endpoint, headers });
215
230
  }
@@ -356,6 +371,76 @@ function ensureValidScrapeOptions(options) {
356
371
  }
357
372
  ensureValidFormats(options.formats);
358
373
  }
374
+ function ensureValidParseFormats(formats) {
375
+ if (!formats) return;
376
+ for (const fmt of formats) {
377
+ if (typeof fmt === "string") {
378
+ if (fmt === "json") {
379
+ throw new Error("json format must be an object with { type: 'json', prompt, schema }");
380
+ }
381
+ if (fmt === "screenshot") {
382
+ throw new Error("parse does not support screenshot format");
383
+ }
384
+ if (fmt === "changeTracking") {
385
+ throw new Error("parse does not support changeTracking format");
386
+ }
387
+ if (fmt === "branding") {
388
+ throw new Error("parse does not support branding format");
389
+ }
390
+ continue;
391
+ }
392
+ const type = fmt.type;
393
+ if (type === "changeTracking") {
394
+ throw new Error("parse does not support changeTracking format");
395
+ }
396
+ if (type === "screenshot") {
397
+ throw new Error("parse does not support screenshot format");
398
+ }
399
+ if (type === "branding") {
400
+ throw new Error("parse does not support branding format");
401
+ }
402
+ if (fmt.type === "json") {
403
+ const j = fmt;
404
+ if (!j.prompt && !j.schema) {
405
+ throw new Error("json format requires either 'prompt' or 'schema' (or both)");
406
+ }
407
+ const maybeSchema = j.schema;
408
+ if (isZodSchema(maybeSchema)) {
409
+ j.schema = zodSchemaToJsonSchema(maybeSchema);
410
+ } else if (looksLikeZodShape(maybeSchema)) {
411
+ throw new Error(
412
+ "json format schema appears to be a Zod schema's .shape property. Pass the Zod schema directly (e.g., `schema: MySchema`) instead of `schema: MySchema.shape`. The SDK will automatically convert Zod schemas to JSON Schema format."
413
+ );
414
+ }
415
+ }
416
+ }
417
+ }
418
+ function ensureValidParseOptions(options) {
419
+ if (!options) return;
420
+ if (options.timeout != null && options.timeout <= 0) {
421
+ throw new Error("timeout must be positive");
422
+ }
423
+ const raw = options;
424
+ if (raw.waitFor !== void 0) {
425
+ throw new Error("parse does not support waitFor");
426
+ }
427
+ if (raw.actions !== void 0) {
428
+ throw new Error("parse does not support actions");
429
+ }
430
+ if (raw.location !== void 0) {
431
+ throw new Error("parse does not support location overrides");
432
+ }
433
+ if (raw.mobile !== void 0) {
434
+ throw new Error("parse does not support mobile rendering");
435
+ }
436
+ if (raw.maxAge !== void 0 || raw.minAge !== void 0 || raw.storeInCache !== void 0 || raw.lockdown !== void 0) {
437
+ throw new Error("parse does not support cache/index options");
438
+ }
439
+ if (raw.proxy !== void 0 && raw.proxy !== "basic" && raw.proxy !== "auto") {
440
+ throw new Error("parse only supports proxy values of 'basic' or 'auto'");
441
+ }
442
+ ensureValidParseFormats(options.formats);
443
+ }
359
444
 
360
445
  // src/v2/utils/errorHandler.ts
361
446
  var import_axios2 = require("axios");
@@ -456,6 +541,65 @@ async function stopInteraction(http, jobId) {
456
541
  }
457
542
  }
458
543
 
544
+ // src/v2/methods/parse.ts
545
+ function toUploadBlob(input, contentType) {
546
+ if (typeof Blob !== "undefined" && input instanceof Blob) {
547
+ if (contentType && input.type !== contentType) {
548
+ return new Blob([input], { type: contentType });
549
+ }
550
+ return input;
551
+ }
552
+ if (typeof Buffer !== "undefined" && Buffer.isBuffer(input)) {
553
+ return new Blob([input], { type: contentType });
554
+ }
555
+ if (input instanceof ArrayBuffer) {
556
+ return new Blob([input], { type: contentType });
557
+ }
558
+ if (ArrayBuffer.isView(input)) {
559
+ return new Blob([input], { type: contentType });
560
+ }
561
+ if (typeof input === "string") {
562
+ return new Blob([input], { type: contentType ?? "text/plain; charset=utf-8" });
563
+ }
564
+ throw new Error("Unsupported parse file data type");
565
+ }
566
+ async function parse(http, file, options) {
567
+ if (!file || !file.filename || !file.filename.trim()) {
568
+ throw new Error("filename cannot be empty");
569
+ }
570
+ if (file.data == null) {
571
+ throw new Error("file data cannot be empty");
572
+ }
573
+ const blob = toUploadBlob(file.data, file.contentType);
574
+ if (blob.size === 0) {
575
+ throw new Error("file data cannot be empty");
576
+ }
577
+ if (options) ensureValidParseOptions(options);
578
+ const version = getVersion();
579
+ const normalizedOptions = {
580
+ ...options ?? {},
581
+ origin: typeof options?.origin === "string" && options.origin.includes("mcp") ? options.origin : options?.origin ?? `js-sdk@${version}`
582
+ };
583
+ const formData = new FormData();
584
+ formData.append("options", JSON.stringify(normalizedOptions));
585
+ formData.append(
586
+ "file",
587
+ toUploadBlob(file.data, file.contentType),
588
+ file.filename.trim()
589
+ );
590
+ const requestTimeoutMs = typeof normalizedOptions.timeout === "number" ? normalizedOptions.timeout + 5e3 : void 0;
591
+ try {
592
+ const res = await http.postMultipart("/v2/parse", formData, void 0, requestTimeoutMs);
593
+ if (res.status !== 200 || !res.data?.success) {
594
+ throwForBadResponse(res, "parse");
595
+ }
596
+ return res.data.data || {};
597
+ } catch (err) {
598
+ if (err?.isAxiosError) return normalizeAxiosError(err, "parse");
599
+ throw err;
600
+ }
601
+ }
602
+
459
603
  // src/v2/methods/search.ts
460
604
  function prepareSearchPayload(req) {
461
605
  if (!req.query || !req.query.trim()) throw new Error("Query cannot be empty");
@@ -1404,6 +1548,9 @@ var FirecrawlClient = class {
1404
1548
  async deleteScrapeBrowser(jobId) {
1405
1549
  return this.stopInteraction(jobId);
1406
1550
  }
1551
+ async parse(file, options) {
1552
+ return parse(this.http, file, options);
1553
+ }
1407
1554
  // Search
1408
1555
  /**
1409
1556
  * Search the web and optionally scrape each result.
package/dist/index.d.cts CHANGED
@@ -45,6 +45,11 @@ interface QueryFormat {
45
45
  prompt: string;
46
46
  }
47
47
  type FormatOption = FormatString | Format | JsonFormat | ChangeTrackingFormat | ScreenshotFormat | AttributesFormat | QueryFormat;
48
+ type ParseFormatString = Exclude<FormatString, 'screenshot' | 'changeTracking' | 'branding'>;
49
+ interface ParseFormat {
50
+ type: ParseFormatString;
51
+ }
52
+ type ParseFormatOption = ParseFormatString | ParseFormat | JsonFormat | AttributesFormat | QueryFormat;
48
53
  interface LocationConfig$1 {
49
54
  country?: string;
50
55
  languages?: string[];
@@ -119,6 +124,7 @@ interface ScrapeOptions {
119
124
  maxAge?: number;
120
125
  minAge?: number;
121
126
  storeInCache?: boolean;
127
+ lockdown?: boolean;
122
128
  profile?: {
123
129
  name: string;
124
130
  saveChanges?: boolean;
@@ -126,6 +132,16 @@ interface ScrapeOptions {
126
132
  integration?: string;
127
133
  origin?: string;
128
134
  }
135
+ type ParseFileData = Blob | File | Buffer | Uint8Array | ArrayBuffer | string;
136
+ interface ParseFile {
137
+ data: ParseFileData;
138
+ filename: string;
139
+ contentType?: string;
140
+ }
141
+ type ParseOptions = Omit<ScrapeOptions, 'formats' | 'waitFor' | 'mobile' | 'actions' | 'location' | 'maxAge' | 'minAge' | 'storeInCache' | 'lockdown' | 'proxy'> & {
142
+ formats?: ParseFormatOption[];
143
+ proxy?: 'basic' | 'auto';
144
+ };
129
145
  interface WebhookConfig {
130
146
  url: string;
131
147
  headers?: Record<string, string>;
@@ -645,6 +661,7 @@ declare class HttpClient {
645
661
  private request;
646
662
  private sleep;
647
663
  post<T = any>(endpoint: string, body: Record<string, unknown>, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>;
664
+ postMultipart<T = any>(endpoint: string, formData: FormData, headers?: Record<string, string>, timeoutMs?: number): Promise<AxiosResponse<T, any, {}>>;
648
665
  get<T = any>(endpoint: string, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>;
649
666
  delete<T = any>(endpoint: string, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>;
650
667
  prepareHeaders(idempotencyKey?: string): Record<string, string>;
@@ -796,6 +813,18 @@ declare class FirecrawlClient {
796
813
  * @deprecated Use stopInteraction().
797
814
  */
798
815
  deleteScrapeBrowser(jobId: string): Promise<ScrapeBrowserDeleteResponse>;
816
+ /**
817
+ * Parse an uploaded file via the v2 parse endpoint.
818
+ * @param file File payload (data, filename, optional contentType).
819
+ * @param options Optional parse options (formats, parsers, etc.).
820
+ * Note: parse does not support changeTracking, screenshot, branding,
821
+ * actions, waitFor, location, or mobile options.
822
+ * @returns Parsed document with requested formats.
823
+ */
824
+ parse<Opts extends ParseOptions>(file: ParseFile, options: Opts): Promise<Omit<Document, "json"> & {
825
+ json?: InferredJsonFromOptions<Opts>;
826
+ }>;
827
+ parse(file: ParseFile, options?: ParseOptions): Promise<Document>;
799
828
  /**
800
829
  * Search the web and optionally scrape each result.
801
830
  * @param query Search query string.
@@ -1892,4 +1921,4 @@ declare class Firecrawl extends FirecrawlClient {
1892
1921
  get v1(): FirecrawlApp;
1893
1922
  }
1894
1923
 
1895
- export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, JobTimeoutError, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type PressAction, type QueryFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
1924
+ export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, JobTimeoutError, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type ParseFile, type ParseFileData, type ParseFormat, type ParseFormatOption, type ParseFormatString, type ParseOptions, type PressAction, type QueryFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
package/dist/index.d.ts CHANGED
@@ -45,6 +45,11 @@ interface QueryFormat {
45
45
  prompt: string;
46
46
  }
47
47
  type FormatOption = FormatString | Format | JsonFormat | ChangeTrackingFormat | ScreenshotFormat | AttributesFormat | QueryFormat;
48
+ type ParseFormatString = Exclude<FormatString, 'screenshot' | 'changeTracking' | 'branding'>;
49
+ interface ParseFormat {
50
+ type: ParseFormatString;
51
+ }
52
+ type ParseFormatOption = ParseFormatString | ParseFormat | JsonFormat | AttributesFormat | QueryFormat;
48
53
  interface LocationConfig$1 {
49
54
  country?: string;
50
55
  languages?: string[];
@@ -119,6 +124,7 @@ interface ScrapeOptions {
119
124
  maxAge?: number;
120
125
  minAge?: number;
121
126
  storeInCache?: boolean;
127
+ lockdown?: boolean;
122
128
  profile?: {
123
129
  name: string;
124
130
  saveChanges?: boolean;
@@ -126,6 +132,16 @@ interface ScrapeOptions {
126
132
  integration?: string;
127
133
  origin?: string;
128
134
  }
135
+ type ParseFileData = Blob | File | Buffer | Uint8Array | ArrayBuffer | string;
136
+ interface ParseFile {
137
+ data: ParseFileData;
138
+ filename: string;
139
+ contentType?: string;
140
+ }
141
+ type ParseOptions = Omit<ScrapeOptions, 'formats' | 'waitFor' | 'mobile' | 'actions' | 'location' | 'maxAge' | 'minAge' | 'storeInCache' | 'lockdown' | 'proxy'> & {
142
+ formats?: ParseFormatOption[];
143
+ proxy?: 'basic' | 'auto';
144
+ };
129
145
  interface WebhookConfig {
130
146
  url: string;
131
147
  headers?: Record<string, string>;
@@ -645,6 +661,7 @@ declare class HttpClient {
645
661
  private request;
646
662
  private sleep;
647
663
  post<T = any>(endpoint: string, body: Record<string, unknown>, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>;
664
+ postMultipart<T = any>(endpoint: string, formData: FormData, headers?: Record<string, string>, timeoutMs?: number): Promise<AxiosResponse<T, any, {}>>;
648
665
  get<T = any>(endpoint: string, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>;
649
666
  delete<T = any>(endpoint: string, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>;
650
667
  prepareHeaders(idempotencyKey?: string): Record<string, string>;
@@ -796,6 +813,18 @@ declare class FirecrawlClient {
796
813
  * @deprecated Use stopInteraction().
797
814
  */
798
815
  deleteScrapeBrowser(jobId: string): Promise<ScrapeBrowserDeleteResponse>;
816
+ /**
817
+ * Parse an uploaded file via the v2 parse endpoint.
818
+ * @param file File payload (data, filename, optional contentType).
819
+ * @param options Optional parse options (formats, parsers, etc.).
820
+ * Note: parse does not support changeTracking, screenshot, branding,
821
+ * actions, waitFor, location, or mobile options.
822
+ * @returns Parsed document with requested formats.
823
+ */
824
+ parse<Opts extends ParseOptions>(file: ParseFile, options: Opts): Promise<Omit<Document, "json"> & {
825
+ json?: InferredJsonFromOptions<Opts>;
826
+ }>;
827
+ parse(file: ParseFile, options?: ParseOptions): Promise<Document>;
799
828
  /**
800
829
  * Search the web and optionally scrape each result.
801
830
  * @param query Search query string.
@@ -1892,4 +1921,4 @@ declare class Firecrawl extends FirecrawlClient {
1892
1921
  get v1(): FirecrawlApp;
1893
1922
  }
1894
1923
 
1895
- export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, JobTimeoutError, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type PressAction, type QueryFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
1924
+ export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, JobTimeoutError, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type ParseFile, type ParseFileData, type ParseFormat, type ParseFormatOption, type ParseFormatString, type ParseOptions, type PressAction, type QueryFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
package/dist/index.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  require_package
3
- } from "./chunk-ZUJQPZTD.js";
3
+ } from "./chunk-JJY4NJXL.js";
4
4
 
5
5
  // src/v2/utils/httpClient.ts
6
6
  import axios from "axios";
@@ -34,7 +34,6 @@ var HttpClient = class {
34
34
  baseURL: this.apiUrl,
35
35
  timeout: options.timeoutMs ?? 3e5,
36
36
  headers: {
37
- "Content-Type": "application/json",
38
37
  Authorization: `Bearer ${this.apiKey}`
39
38
  },
40
39
  transitional: { clarifyTimeoutError: true }
@@ -55,13 +54,20 @@ var HttpClient = class {
55
54
  for (let attempt = 0; attempt < this.maxRetries; attempt++) {
56
55
  try {
57
56
  const cfg = { ...config };
58
- if (cfg.method && ["post", "put", "patch"].includes(cfg.method.toLowerCase())) {
57
+ const isFormDataBody = typeof FormData !== "undefined" && cfg.data instanceof FormData;
58
+ const isPlainObjectBody = !isFormDataBody && cfg.data != null && typeof cfg.data === "object" && !Array.isArray(cfg.data);
59
+ if (isPlainObjectBody && cfg.method && ["post", "put", "patch"].includes(cfg.method.toLowerCase())) {
59
60
  const data = cfg.data ?? {};
60
61
  cfg.data = { ...data, origin: typeof data.origin === "string" && data.origin.includes("mcp") ? data.origin : `js-sdk@${version}` };
61
62
  if (typeof data.timeout === "number") {
62
63
  cfg.timeout = data.timeout + 5e3;
63
64
  }
64
65
  }
66
+ if (isFormDataBody) {
67
+ cfg.headers = { ...cfg.headers || {} };
68
+ delete cfg.headers["Content-Type"];
69
+ delete cfg.headers["content-type"];
70
+ }
65
71
  const res = await this.instance.request(cfg);
66
72
  if (res.status === 502 && attempt < this.maxRetries - 1) {
67
73
  await this.sleep(this.backoffFactor * Math.pow(2, attempt));
@@ -86,6 +92,15 @@ var HttpClient = class {
86
92
  post(endpoint, body, headers) {
87
93
  return this.request({ method: "post", url: endpoint, data: body, headers });
88
94
  }
95
+ postMultipart(endpoint, formData, headers, timeoutMs) {
96
+ return this.request({
97
+ method: "post",
98
+ url: endpoint,
99
+ data: formData,
100
+ headers,
101
+ timeout: timeoutMs
102
+ });
103
+ }
89
104
  get(endpoint, headers) {
90
105
  return this.request({ method: "get", url: endpoint, headers });
91
106
  }
@@ -232,6 +247,76 @@ function ensureValidScrapeOptions(options) {
232
247
  }
233
248
  ensureValidFormats(options.formats);
234
249
  }
250
+ function ensureValidParseFormats(formats) {
251
+ if (!formats) return;
252
+ for (const fmt of formats) {
253
+ if (typeof fmt === "string") {
254
+ if (fmt === "json") {
255
+ throw new Error("json format must be an object with { type: 'json', prompt, schema }");
256
+ }
257
+ if (fmt === "screenshot") {
258
+ throw new Error("parse does not support screenshot format");
259
+ }
260
+ if (fmt === "changeTracking") {
261
+ throw new Error("parse does not support changeTracking format");
262
+ }
263
+ if (fmt === "branding") {
264
+ throw new Error("parse does not support branding format");
265
+ }
266
+ continue;
267
+ }
268
+ const type = fmt.type;
269
+ if (type === "changeTracking") {
270
+ throw new Error("parse does not support changeTracking format");
271
+ }
272
+ if (type === "screenshot") {
273
+ throw new Error("parse does not support screenshot format");
274
+ }
275
+ if (type === "branding") {
276
+ throw new Error("parse does not support branding format");
277
+ }
278
+ if (fmt.type === "json") {
279
+ const j = fmt;
280
+ if (!j.prompt && !j.schema) {
281
+ throw new Error("json format requires either 'prompt' or 'schema' (or both)");
282
+ }
283
+ const maybeSchema = j.schema;
284
+ if (isZodSchema(maybeSchema)) {
285
+ j.schema = zodSchemaToJsonSchema(maybeSchema);
286
+ } else if (looksLikeZodShape(maybeSchema)) {
287
+ throw new Error(
288
+ "json format schema appears to be a Zod schema's .shape property. Pass the Zod schema directly (e.g., `schema: MySchema`) instead of `schema: MySchema.shape`. The SDK will automatically convert Zod schemas to JSON Schema format."
289
+ );
290
+ }
291
+ }
292
+ }
293
+ }
294
+ function ensureValidParseOptions(options) {
295
+ if (!options) return;
296
+ if (options.timeout != null && options.timeout <= 0) {
297
+ throw new Error("timeout must be positive");
298
+ }
299
+ const raw = options;
300
+ if (raw.waitFor !== void 0) {
301
+ throw new Error("parse does not support waitFor");
302
+ }
303
+ if (raw.actions !== void 0) {
304
+ throw new Error("parse does not support actions");
305
+ }
306
+ if (raw.location !== void 0) {
307
+ throw new Error("parse does not support location overrides");
308
+ }
309
+ if (raw.mobile !== void 0) {
310
+ throw new Error("parse does not support mobile rendering");
311
+ }
312
+ if (raw.maxAge !== void 0 || raw.minAge !== void 0 || raw.storeInCache !== void 0 || raw.lockdown !== void 0) {
313
+ throw new Error("parse does not support cache/index options");
314
+ }
315
+ if (raw.proxy !== void 0 && raw.proxy !== "basic" && raw.proxy !== "auto") {
316
+ throw new Error("parse only supports proxy values of 'basic' or 'auto'");
317
+ }
318
+ ensureValidParseFormats(options.formats);
319
+ }
235
320
 
236
321
  // src/v2/utils/errorHandler.ts
237
322
  import "axios";
@@ -332,6 +417,65 @@ async function stopInteraction(http, jobId) {
332
417
  }
333
418
  }
334
419
 
420
+ // src/v2/methods/parse.ts
421
+ function toUploadBlob(input, contentType) {
422
+ if (typeof Blob !== "undefined" && input instanceof Blob) {
423
+ if (contentType && input.type !== contentType) {
424
+ return new Blob([input], { type: contentType });
425
+ }
426
+ return input;
427
+ }
428
+ if (typeof Buffer !== "undefined" && Buffer.isBuffer(input)) {
429
+ return new Blob([input], { type: contentType });
430
+ }
431
+ if (input instanceof ArrayBuffer) {
432
+ return new Blob([input], { type: contentType });
433
+ }
434
+ if (ArrayBuffer.isView(input)) {
435
+ return new Blob([input], { type: contentType });
436
+ }
437
+ if (typeof input === "string") {
438
+ return new Blob([input], { type: contentType ?? "text/plain; charset=utf-8" });
439
+ }
440
+ throw new Error("Unsupported parse file data type");
441
+ }
442
+ async function parse(http, file, options) {
443
+ if (!file || !file.filename || !file.filename.trim()) {
444
+ throw new Error("filename cannot be empty");
445
+ }
446
+ if (file.data == null) {
447
+ throw new Error("file data cannot be empty");
448
+ }
449
+ const blob = toUploadBlob(file.data, file.contentType);
450
+ if (blob.size === 0) {
451
+ throw new Error("file data cannot be empty");
452
+ }
453
+ if (options) ensureValidParseOptions(options);
454
+ const version = getVersion();
455
+ const normalizedOptions = {
456
+ ...options ?? {},
457
+ origin: typeof options?.origin === "string" && options.origin.includes("mcp") ? options.origin : options?.origin ?? `js-sdk@${version}`
458
+ };
459
+ const formData = new FormData();
460
+ formData.append("options", JSON.stringify(normalizedOptions));
461
+ formData.append(
462
+ "file",
463
+ toUploadBlob(file.data, file.contentType),
464
+ file.filename.trim()
465
+ );
466
+ const requestTimeoutMs = typeof normalizedOptions.timeout === "number" ? normalizedOptions.timeout + 5e3 : void 0;
467
+ try {
468
+ const res = await http.postMultipart("/v2/parse", formData, void 0, requestTimeoutMs);
469
+ if (res.status !== 200 || !res.data?.success) {
470
+ throwForBadResponse(res, "parse");
471
+ }
472
+ return res.data.data || {};
473
+ } catch (err) {
474
+ if (err?.isAxiosError) return normalizeAxiosError(err, "parse");
475
+ throw err;
476
+ }
477
+ }
478
+
335
479
  // src/v2/methods/search.ts
336
480
  function prepareSearchPayload(req) {
337
481
  if (!req.query || !req.query.trim()) throw new Error("Query cannot be empty");
@@ -1280,6 +1424,9 @@ var FirecrawlClient = class {
1280
1424
  async deleteScrapeBrowser(jobId) {
1281
1425
  return this.stopInteraction(jobId);
1282
1426
  }
1427
+ async parse(file, options) {
1428
+ return parse(this.http, file, options);
1429
+ }
1283
1430
  // Search
1284
1431
  /**
1285
1432
  * Search the web and optionally scrape each result.
@@ -1560,7 +1707,7 @@ var FirecrawlApp = class {
1560
1707
  if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
1561
1708
  return process.env.npm_package_version;
1562
1709
  }
1563
- const packageJson = await import("./package-43GY3VT3.js");
1710
+ const packageJson = await import("./package-HMEPZJ3J.js");
1564
1711
  return packageJson.default.version;
1565
1712
  } catch (error) {
1566
1713
  const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);
@@ -1,4 +1,4 @@
1
1
  import {
2
2
  require_package
3
- } from "./chunk-ZUJQPZTD.js";
3
+ } from "./chunk-JJY4NJXL.js";
4
4
  export default require_package();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "firecrawl",
3
- "version": "4.18.4",
3
+ "version": "4.19.0",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -0,0 +1,67 @@
1
+ import Firecrawl from "../../../index";
2
+ import { config } from "dotenv";
3
+ import { getIdentity, getApiUrl } from "./utils/idmux";
4
+ import { describe, test, expect, beforeAll } from "@jest/globals";
5
+
6
+ config();
7
+
8
+ const API_URL = getApiUrl();
9
+ let client: Firecrawl;
10
+
11
+ beforeAll(async () => {
12
+ const { apiKey } = await getIdentity({ name: "js-e2e-parse" });
13
+ client = new Firecrawl({ apiKey, apiUrl: API_URL });
14
+ });
15
+
16
+ describe("v2.parse e2e", () => {
17
+ test(
18
+ "parses uploaded HTML files",
19
+ async () => {
20
+ if (!client) throw new Error();
21
+
22
+ const doc = await client.parse(
23
+ {
24
+ data: `
25
+ <!DOCTYPE html>
26
+ <html>
27
+ <body>
28
+ <h1>JS SDK Parse E2E</h1>
29
+ <p>multipart upload body</p>
30
+ </body>
31
+ </html>
32
+ `,
33
+ filename: "parse-e2e.html",
34
+ contentType: "text/html",
35
+ },
36
+ {
37
+ formats: ["markdown"],
38
+ },
39
+ );
40
+
41
+ expect(doc.markdown).toContain("JS SDK Parse E2E");
42
+ expect(doc.metadata?.creditsUsed).toBe(1);
43
+ },
44
+ 60_000,
45
+ );
46
+
47
+ test(
48
+ "returns errors for unsupported file types",
49
+ async () => {
50
+ if (!client) throw new Error();
51
+
52
+ await expect(
53
+ client.parse(
54
+ {
55
+ data: Buffer.from("image-data"),
56
+ filename: "parse-e2e.png",
57
+ contentType: "image/png",
58
+ },
59
+ {
60
+ formats: ["markdown"],
61
+ },
62
+ ),
63
+ ).rejects.toThrow();
64
+ },
65
+ 60_000,
66
+ );
67
+ });
@@ -0,0 +1,58 @@
1
+ import { describe, test, expect } from "@jest/globals";
2
+ import { FirecrawlClient } from "../../../v2/client";
3
+
4
+ describe("v2.parse unit", () => {
5
+ test("rejects empty filenames before making requests", async () => {
6
+ const client = new FirecrawlClient({
7
+ apiKey: "test-key",
8
+ apiUrl: "https://localhost:3002",
9
+ });
10
+
11
+ await expect(
12
+ client.parse(
13
+ {
14
+ data: "<html><body>test</body></html>",
15
+ filename: " ",
16
+ contentType: "text/html",
17
+ },
18
+ { formats: ["markdown"] },
19
+ ),
20
+ ).rejects.toThrow("filename cannot be empty");
21
+ });
22
+
23
+ test("rejects changeTracking format before making requests", async () => {
24
+ const client = new FirecrawlClient({
25
+ apiKey: "test-key",
26
+ apiUrl: "https://localhost:3002",
27
+ });
28
+
29
+ await expect(
30
+ client.parse(
31
+ {
32
+ data: "<html><body>test</body></html>",
33
+ filename: "upload.html",
34
+ contentType: "text/html",
35
+ },
36
+ { formats: ["markdown", { type: "changeTracking" } as any] },
37
+ ),
38
+ ).rejects.toThrow("parse does not support changeTracking format");
39
+ });
40
+
41
+ test("rejects lockdown option before making requests", async () => {
42
+ const client = new FirecrawlClient({
43
+ apiKey: "test-key",
44
+ apiUrl: "https://localhost:3002",
45
+ });
46
+
47
+ await expect(
48
+ client.parse(
49
+ {
50
+ data: "<html><body>test</body></html>",
51
+ filename: "upload.html",
52
+ contentType: "text/html",
53
+ },
54
+ { formats: ["markdown"], lockdown: true } as any,
55
+ ),
56
+ ).rejects.toThrow("parse does not support cache/index options");
57
+ });
58
+ });
package/src/v2/client.ts CHANGED
@@ -4,6 +4,7 @@ import {
4
4
  interact as interactMethod,
5
5
  stopInteraction as stopInteractionMethod,
6
6
  } from "./methods/scrape";
7
+ import { parse as parseMethod } from "./methods/parse";
7
8
  import { search } from "./methods/search";
8
9
  import { map as mapMethod } from "./methods/map";
9
10
  import {
@@ -33,6 +34,8 @@ import {
33
34
  import { getConcurrency, getCreditUsage, getQueueStatus, getTokenUsage, getCreditUsageHistorical, getTokenUsageHistorical } from "./methods/usage";
34
35
  import type {
35
36
  Document,
37
+ ParseFile,
38
+ ParseOptions,
36
39
  ScrapeOptions,
37
40
  SearchData,
38
41
  SearchRequest,
@@ -177,6 +180,24 @@ export class FirecrawlClient {
177
180
  return this.stopInteraction(jobId);
178
181
  }
179
182
 
183
+ // Parse
184
+ /**
185
+ * Parse an uploaded file via the v2 parse endpoint.
186
+ * @param file File payload (data, filename, optional contentType).
187
+ * @param options Optional parse options (formats, parsers, etc.).
188
+ * Note: parse does not support changeTracking, screenshot, branding,
189
+ * actions, waitFor, location, or mobile options.
190
+ * @returns Parsed document with requested formats.
191
+ */
192
+ async parse<Opts extends ParseOptions>(
193
+ file: ParseFile,
194
+ options: Opts
195
+ ): Promise<Omit<Document, "json"> & { json?: InferredJsonFromOptions<Opts> }>;
196
+ async parse(file: ParseFile, options?: ParseOptions): Promise<Document>;
197
+ async parse(file: ParseFile, options?: ParseOptions): Promise<Document> {
198
+ return parseMethod(this.http, file, options);
199
+ }
200
+
180
201
  // Search
181
202
  /**
182
203
  * Search the web and optionally scrape each result.
@@ -0,0 +1,90 @@
1
+ import { type Document, type ParseFile, type ParseOptions } from "../types";
2
+ import { HttpClient } from "../utils/httpClient";
3
+ import { ensureValidParseOptions } from "../utils/validation";
4
+ import { throwForBadResponse, normalizeAxiosError } from "../utils/errorHandler";
5
+ import { getVersion } from "../utils/getVersion";
6
+
7
+ function toUploadBlob(input: ParseFile["data"], contentType?: string): Blob {
8
+ if (typeof Blob !== "undefined" && input instanceof Blob) {
9
+ if (contentType && input.type !== contentType) {
10
+ return new Blob([input], { type: contentType });
11
+ }
12
+ return input;
13
+ }
14
+
15
+ if (typeof Buffer !== "undefined" && Buffer.isBuffer(input)) {
16
+ return new Blob([input], { type: contentType });
17
+ }
18
+
19
+ if (input instanceof ArrayBuffer) {
20
+ return new Blob([input], { type: contentType });
21
+ }
22
+
23
+ if (ArrayBuffer.isView(input)) {
24
+ return new Blob([input], { type: contentType });
25
+ }
26
+
27
+ if (typeof input === "string") {
28
+ return new Blob([input], { type: contentType ?? "text/plain; charset=utf-8" });
29
+ }
30
+
31
+ throw new Error("Unsupported parse file data type");
32
+ }
33
+
34
+ export async function parse(
35
+ http: HttpClient,
36
+ file: ParseFile,
37
+ options?: ParseOptions,
38
+ ): Promise<Document> {
39
+ if (!file || !file.filename || !file.filename.trim()) {
40
+ throw new Error("filename cannot be empty");
41
+ }
42
+
43
+ if (file.data == null) {
44
+ throw new Error("file data cannot be empty");
45
+ }
46
+
47
+ const blob = toUploadBlob(file.data, file.contentType);
48
+ if (blob.size === 0) {
49
+ throw new Error("file data cannot be empty");
50
+ }
51
+
52
+ if (options) ensureValidParseOptions(options);
53
+
54
+ const version = getVersion();
55
+ const normalizedOptions: ParseOptions = {
56
+ ...(options ?? {}),
57
+ origin:
58
+ typeof options?.origin === "string" && options.origin.includes("mcp")
59
+ ? options.origin
60
+ : options?.origin ?? `js-sdk@${version}`,
61
+ };
62
+
63
+ const formData = new FormData();
64
+ formData.append("options", JSON.stringify(normalizedOptions));
65
+ formData.append(
66
+ "file",
67
+ toUploadBlob(file.data, file.contentType),
68
+ file.filename.trim(),
69
+ );
70
+
71
+ const requestTimeoutMs =
72
+ typeof normalizedOptions.timeout === "number"
73
+ ? normalizedOptions.timeout + 5000
74
+ : undefined;
75
+
76
+ try {
77
+ const res = await http.postMultipart<{
78
+ success: boolean;
79
+ data?: Document;
80
+ error?: string;
81
+ }>("/v2/parse", formData, undefined, requestTimeoutMs);
82
+ if (res.status !== 200 || !res.data?.success) {
83
+ throwForBadResponse(res, "parse");
84
+ }
85
+ return (res.data.data || {}) as Document;
86
+ } catch (err: any) {
87
+ if (err?.isAxiosError) return normalizeAxiosError(err, "parse");
88
+ throw err;
89
+ }
90
+ }
package/src/v2/types.ts CHANGED
@@ -66,6 +66,22 @@ export type FormatOption =
66
66
  | AttributesFormat
67
67
  | QueryFormat;
68
68
 
69
+ export type ParseFormatString = Exclude<
70
+ FormatString,
71
+ 'screenshot' | 'changeTracking' | 'branding'
72
+ >;
73
+
74
+ export interface ParseFormat {
75
+ type: ParseFormatString;
76
+ }
77
+
78
+ export type ParseFormatOption =
79
+ | ParseFormatString
80
+ | ParseFormat
81
+ | JsonFormat
82
+ | AttributesFormat
83
+ | QueryFormat;
84
+
69
85
  export interface LocationConfig {
70
86
  country?: string;
71
87
  languages?: string[];
@@ -164,6 +180,7 @@ export interface ScrapeOptions {
164
180
  maxAge?: number;
165
181
  minAge?: number;
166
182
  storeInCache?: boolean;
183
+ lockdown?: boolean;
167
184
  profile?: {
168
185
  name: string;
169
186
  saveChanges?: boolean;
@@ -172,6 +189,37 @@ export interface ScrapeOptions {
172
189
  origin?: string;
173
190
  }
174
191
 
192
+ export type ParseFileData =
193
+ | Blob
194
+ | File
195
+ | Buffer
196
+ | Uint8Array
197
+ | ArrayBuffer
198
+ | string;
199
+
200
+ export interface ParseFile {
201
+ data: ParseFileData;
202
+ filename: string;
203
+ contentType?: string;
204
+ }
205
+
206
+ export type ParseOptions = Omit<
207
+ ScrapeOptions,
208
+ | 'formats'
209
+ | 'waitFor'
210
+ | 'mobile'
211
+ | 'actions'
212
+ | 'location'
213
+ | 'maxAge'
214
+ | 'minAge'
215
+ | 'storeInCache'
216
+ | 'lockdown'
217
+ | 'proxy'
218
+ > & {
219
+ formats?: ParseFormatOption[];
220
+ proxy?: 'basic' | 'auto';
221
+ };
222
+
175
223
  export interface WebhookConfig {
176
224
  url: string;
177
225
  headers?: Record<string, string>;
@@ -25,7 +25,6 @@ export class HttpClient {
25
25
  baseURL: this.apiUrl,
26
26
  timeout: options.timeoutMs ?? 300000,
27
27
  headers: {
28
- "Content-Type": "application/json",
29
28
  Authorization: `Bearer ${this.apiKey}`,
30
29
  },
31
30
  transitional: { clarifyTimeoutError: true },
@@ -50,16 +49,35 @@ export class HttpClient {
50
49
  for (let attempt = 0; attempt < this.maxRetries; attempt++) {
51
50
  try {
52
51
  const cfg: AxiosRequestConfig = { ...config };
53
- // For POST/PUT, ensure origin is present in JSON body too
54
- if (cfg.method && ["post", "put", "patch"].includes(cfg.method.toLowerCase())) {
52
+ const isFormDataBody =
53
+ typeof FormData !== "undefined" && cfg.data instanceof FormData;
54
+ const isPlainObjectBody =
55
+ !isFormDataBody &&
56
+ cfg.data != null &&
57
+ typeof cfg.data === "object" &&
58
+ !Array.isArray(cfg.data);
59
+
60
+ // For JSON POST/PUT/PATCH, ensure origin is present in body
61
+ if (
62
+ isPlainObjectBody &&
63
+ cfg.method &&
64
+ ["post", "put", "patch"].includes(cfg.method.toLowerCase())
65
+ ) {
55
66
  const data = (cfg.data ?? {}) as Record<string, unknown>;
56
67
  cfg.data = { ...data, origin: typeof data.origin === "string" && data.origin.includes("mcp") ? data.origin : `js-sdk@${version}` };
57
-
68
+
58
69
  // If timeout is specified in the body, use it to override the request timeout
59
70
  if (typeof data.timeout === "number") {
60
71
  cfg.timeout = data.timeout + 5000;
61
72
  }
62
73
  }
74
+
75
+ if (isFormDataBody) {
76
+ cfg.headers = { ...(cfg.headers || {}) };
77
+ delete (cfg.headers as Record<string, unknown>)["Content-Type"];
78
+ delete (cfg.headers as Record<string, unknown>)["content-type"];
79
+ }
80
+
63
81
  const res = await this.instance.request<T>(cfg);
64
82
  if (res.status === 502 && attempt < this.maxRetries - 1) {
65
83
  await this.sleep(this.backoffFactor * Math.pow(2, attempt));
@@ -87,6 +105,21 @@ export class HttpClient {
87
105
  return this.request<T>({ method: "post", url: endpoint, data: body, headers });
88
106
  }
89
107
 
108
+ postMultipart<T = any>(
109
+ endpoint: string,
110
+ formData: FormData,
111
+ headers?: Record<string, string>,
112
+ timeoutMs?: number,
113
+ ) {
114
+ return this.request<T>({
115
+ method: "post",
116
+ url: endpoint,
117
+ data: formData,
118
+ headers,
119
+ timeout: timeoutMs,
120
+ });
121
+ }
122
+
90
123
  get<T = any>(endpoint: string, headers?: Record<string, string>) {
91
124
  return this.request<T>({ method: "get", url: endpoint, headers });
92
125
  }
@@ -1,4 +1,12 @@
1
- import { type FormatOption, type JsonFormat, type ScrapeOptions, type ScreenshotFormat, type ChangeTrackingFormat } from "../types";
1
+ import {
2
+ type ChangeTrackingFormat,
3
+ type FormatOption,
4
+ type JsonFormat,
5
+ type ParseFormatOption,
6
+ type ParseOptions,
7
+ type ScrapeOptions,
8
+ type ScreenshotFormat,
9
+ } from "../types";
2
10
  import { isZodSchema, zodSchemaToJsonSchema, looksLikeZodShape } from "../../utils/zodSchemaToJson";
3
11
 
4
12
  export function ensureValidFormats(formats?: FormatOption[]): void {
@@ -62,3 +70,87 @@ export function ensureValidScrapeOptions(options?: ScrapeOptions): void {
62
70
  ensureValidFormats(options.formats);
63
71
  }
64
72
 
73
+ export function ensureValidParseFormats(formats?: ParseFormatOption[]): void {
74
+ if (!formats) return;
75
+
76
+ for (const fmt of formats) {
77
+ if (typeof fmt === "string") {
78
+ if (fmt === "json") {
79
+ throw new Error("json format must be an object with { type: 'json', prompt, schema }");
80
+ }
81
+ if (fmt === "screenshot") {
82
+ throw new Error("parse does not support screenshot format");
83
+ }
84
+ if (fmt === "changeTracking") {
85
+ throw new Error("parse does not support changeTracking format");
86
+ }
87
+ if (fmt === "branding") {
88
+ throw new Error("parse does not support branding format");
89
+ }
90
+ continue;
91
+ }
92
+
93
+ const type = (fmt as any).type;
94
+ if (type === "changeTracking") {
95
+ throw new Error("parse does not support changeTracking format");
96
+ }
97
+ if (type === "screenshot") {
98
+ throw new Error("parse does not support screenshot format");
99
+ }
100
+ if (type === "branding") {
101
+ throw new Error("parse does not support branding format");
102
+ }
103
+
104
+ if ((fmt as JsonFormat).type === "json") {
105
+ const j = fmt as JsonFormat;
106
+ if (!j.prompt && !j.schema) {
107
+ throw new Error("json format requires either 'prompt' or 'schema' (or both)");
108
+ }
109
+ const maybeSchema = j.schema;
110
+ if (isZodSchema(maybeSchema)) {
111
+ (j as any).schema = zodSchemaToJsonSchema(maybeSchema);
112
+ } else if (looksLikeZodShape(maybeSchema)) {
113
+ throw new Error(
114
+ "json format schema appears to be a Zod schema's .shape property. " +
115
+ "Pass the Zod schema directly (e.g., `schema: MySchema`) instead of `schema: MySchema.shape`. " +
116
+ "The SDK will automatically convert Zod schemas to JSON Schema format."
117
+ );
118
+ }
119
+ }
120
+ }
121
+ }
122
+
123
+ export function ensureValidParseOptions(options?: ParseOptions): void {
124
+ if (!options) return;
125
+ if (options.timeout != null && options.timeout <= 0) {
126
+ throw new Error("timeout must be positive");
127
+ }
128
+
129
+ const raw = options as Record<string, unknown>;
130
+ if (raw.waitFor !== undefined) {
131
+ throw new Error("parse does not support waitFor");
132
+ }
133
+ if (raw.actions !== undefined) {
134
+ throw new Error("parse does not support actions");
135
+ }
136
+ if (raw.location !== undefined) {
137
+ throw new Error("parse does not support location overrides");
138
+ }
139
+ if (raw.mobile !== undefined) {
140
+ throw new Error("parse does not support mobile rendering");
141
+ }
142
+ if (
143
+ raw.maxAge !== undefined ||
144
+ raw.minAge !== undefined ||
145
+ raw.storeInCache !== undefined ||
146
+ raw.lockdown !== undefined
147
+ ) {
148
+ throw new Error("parse does not support cache/index options");
149
+ }
150
+ if (raw.proxy !== undefined && raw.proxy !== "basic" && raw.proxy !== "auto") {
151
+ throw new Error("parse only supports proxy values of 'basic' or 'auto'");
152
+ }
153
+
154
+ ensureValidParseFormats(options.formats);
155
+ }
156
+