firecrawl 4.18.3 → 4.18.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -46,6 +46,26 @@ const url = 'https://example.com';
46
46
  const scrapedData = await app.scrape(url);
47
47
  ```
48
48
 
49
+ ### Parsing uploaded files
50
+
51
+ Use `parse` to upload a file (`html`, `pdf`, `docx`, etc.) as multipart form data and process it through the same parsing pipeline.
52
+ Parse does not support browser-only formats/options like `changeTracking`, `screenshot`, `branding`, `actions`, `waitFor`, `location`, or `mobile`.
53
+
54
+ ```js
55
+ const parsed = await app.parse(
56
+ {
57
+ data: '<html><body><h1>Hello parse</h1></body></html>',
58
+ filename: 'upload.html',
59
+ contentType: 'text/html',
60
+ },
61
+ {
62
+ formats: ['markdown'],
63
+ }
64
+ );
65
+
66
+ console.log(parsed.markdown);
67
+ ```
68
+
49
69
  ### Crawling a Website
50
70
 
51
71
  To crawl a website with error handling, use the `crawl` method. It takes the starting URL and optional parameters, including limits and per‑page `scrapeOptions`.
@@ -8,7 +8,7 @@ var require_package = __commonJS({
8
8
  "package.json"(exports, module) {
9
9
  module.exports = {
10
10
  name: "@mendable/firecrawl-js",
11
- version: "4.18.3",
11
+ version: "4.18.5",
12
12
  description: "JavaScript SDK for Firecrawl API",
13
13
  main: "dist/index.js",
14
14
  types: "dist/index.d.ts",
@@ -78,7 +78,8 @@ var require_package = __commonJS({
78
78
  "picomatch@<4.0.4": ">=4.0.4",
79
79
  handlebars: ">=4.7.9",
80
80
  "brace-expansion": ">=5.0.5",
81
- "axios@<1.15.0": "1.15.0"
81
+ "axios@<1.15.0": "1.15.0",
82
+ "follow-redirects@<1.16.0": ">=1.16.0 <2.0.0"
82
83
  }
83
84
  }
84
85
  };
package/dist/index.cjs CHANGED
@@ -35,7 +35,7 @@ var require_package = __commonJS({
35
35
  "package.json"(exports2, module2) {
36
36
  module2.exports = {
37
37
  name: "@mendable/firecrawl-js",
38
- version: "4.18.3",
38
+ version: "4.18.5",
39
39
  description: "JavaScript SDK for Firecrawl API",
40
40
  main: "dist/index.js",
41
41
  types: "dist/index.d.ts",
@@ -105,7 +105,8 @@ var require_package = __commonJS({
105
105
  "picomatch@<4.0.4": ">=4.0.4",
106
106
  handlebars: ">=4.7.9",
107
107
  "brace-expansion": ">=5.0.5",
108
- "axios@<1.15.0": "1.15.0"
108
+ "axios@<1.15.0": "1.15.0",
109
+ "follow-redirects@<1.16.0": ">=1.16.0 <2.0.0"
109
110
  }
110
111
  }
111
112
  };
@@ -157,7 +158,6 @@ var HttpClient = class {
157
158
  baseURL: this.apiUrl,
158
159
  timeout: options.timeoutMs ?? 3e5,
159
160
  headers: {
160
- "Content-Type": "application/json",
161
161
  Authorization: `Bearer ${this.apiKey}`
162
162
  },
163
163
  transitional: { clarifyTimeoutError: true }
@@ -178,13 +178,20 @@ var HttpClient = class {
178
178
  for (let attempt = 0; attempt < this.maxRetries; attempt++) {
179
179
  try {
180
180
  const cfg = { ...config };
181
- if (cfg.method && ["post", "put", "patch"].includes(cfg.method.toLowerCase())) {
181
+ const isFormDataBody = typeof FormData !== "undefined" && cfg.data instanceof FormData;
182
+ const isPlainObjectBody = !isFormDataBody && cfg.data != null && typeof cfg.data === "object" && !Array.isArray(cfg.data);
183
+ if (isPlainObjectBody && cfg.method && ["post", "put", "patch"].includes(cfg.method.toLowerCase())) {
182
184
  const data = cfg.data ?? {};
183
185
  cfg.data = { ...data, origin: typeof data.origin === "string" && data.origin.includes("mcp") ? data.origin : `js-sdk@${version}` };
184
186
  if (typeof data.timeout === "number") {
185
187
  cfg.timeout = data.timeout + 5e3;
186
188
  }
187
189
  }
190
+ if (isFormDataBody) {
191
+ cfg.headers = { ...cfg.headers || {} };
192
+ delete cfg.headers["Content-Type"];
193
+ delete cfg.headers["content-type"];
194
+ }
188
195
  const res = await this.instance.request(cfg);
189
196
  if (res.status === 502 && attempt < this.maxRetries - 1) {
190
197
  await this.sleep(this.backoffFactor * Math.pow(2, attempt));
@@ -209,6 +216,15 @@ var HttpClient = class {
209
216
  post(endpoint, body, headers) {
210
217
  return this.request({ method: "post", url: endpoint, data: body, headers });
211
218
  }
219
+ postMultipart(endpoint, formData, headers, timeoutMs) {
220
+ return this.request({
221
+ method: "post",
222
+ url: endpoint,
223
+ data: formData,
224
+ headers,
225
+ timeout: timeoutMs
226
+ });
227
+ }
212
228
  get(endpoint, headers) {
213
229
  return this.request({ method: "get", url: endpoint, headers });
214
230
  }
@@ -355,6 +371,76 @@ function ensureValidScrapeOptions(options) {
355
371
  }
356
372
  ensureValidFormats(options.formats);
357
373
  }
374
+ function ensureValidParseFormats(formats) {
375
+ if (!formats) return;
376
+ for (const fmt of formats) {
377
+ if (typeof fmt === "string") {
378
+ if (fmt === "json") {
379
+ throw new Error("json format must be an object with { type: 'json', prompt, schema }");
380
+ }
381
+ if (fmt === "screenshot") {
382
+ throw new Error("parse does not support screenshot format");
383
+ }
384
+ if (fmt === "changeTracking") {
385
+ throw new Error("parse does not support changeTracking format");
386
+ }
387
+ if (fmt === "branding") {
388
+ throw new Error("parse does not support branding format");
389
+ }
390
+ continue;
391
+ }
392
+ const type = fmt.type;
393
+ if (type === "changeTracking") {
394
+ throw new Error("parse does not support changeTracking format");
395
+ }
396
+ if (type === "screenshot") {
397
+ throw new Error("parse does not support screenshot format");
398
+ }
399
+ if (type === "branding") {
400
+ throw new Error("parse does not support branding format");
401
+ }
402
+ if (fmt.type === "json") {
403
+ const j = fmt;
404
+ if (!j.prompt && !j.schema) {
405
+ throw new Error("json format requires either 'prompt' or 'schema' (or both)");
406
+ }
407
+ const maybeSchema = j.schema;
408
+ if (isZodSchema(maybeSchema)) {
409
+ j.schema = zodSchemaToJsonSchema(maybeSchema);
410
+ } else if (looksLikeZodShape(maybeSchema)) {
411
+ throw new Error(
412
+ "json format schema appears to be a Zod schema's .shape property. Pass the Zod schema directly (e.g., `schema: MySchema`) instead of `schema: MySchema.shape`. The SDK will automatically convert Zod schemas to JSON Schema format."
413
+ );
414
+ }
415
+ }
416
+ }
417
+ }
418
+ function ensureValidParseOptions(options) {
419
+ if (!options) return;
420
+ if (options.timeout != null && options.timeout <= 0) {
421
+ throw new Error("timeout must be positive");
422
+ }
423
+ const raw = options;
424
+ if (raw.waitFor !== void 0) {
425
+ throw new Error("parse does not support waitFor");
426
+ }
427
+ if (raw.actions !== void 0) {
428
+ throw new Error("parse does not support actions");
429
+ }
430
+ if (raw.location !== void 0) {
431
+ throw new Error("parse does not support location overrides");
432
+ }
433
+ if (raw.mobile !== void 0) {
434
+ throw new Error("parse does not support mobile rendering");
435
+ }
436
+ if (raw.maxAge !== void 0 || raw.minAge !== void 0 || raw.storeInCache !== void 0) {
437
+ throw new Error("parse does not support cache/index options");
438
+ }
439
+ if (raw.proxy !== void 0 && raw.proxy !== "basic" && raw.proxy !== "auto") {
440
+ throw new Error("parse only supports proxy values of 'basic' or 'auto'");
441
+ }
442
+ ensureValidParseFormats(options.formats);
443
+ }
358
444
 
359
445
  // src/v2/utils/errorHandler.ts
360
446
  var import_axios2 = require("axios");
@@ -455,6 +541,65 @@ async function stopInteraction(http, jobId) {
455
541
  }
456
542
  }
457
543
 
544
+ // src/v2/methods/parse.ts
545
+ function toUploadBlob(input, contentType) {
546
+ if (typeof Blob !== "undefined" && input instanceof Blob) {
547
+ if (contentType && input.type !== contentType) {
548
+ return new Blob([input], { type: contentType });
549
+ }
550
+ return input;
551
+ }
552
+ if (typeof Buffer !== "undefined" && Buffer.isBuffer(input)) {
553
+ return new Blob([input], { type: contentType });
554
+ }
555
+ if (input instanceof ArrayBuffer) {
556
+ return new Blob([input], { type: contentType });
557
+ }
558
+ if (ArrayBuffer.isView(input)) {
559
+ return new Blob([input], { type: contentType });
560
+ }
561
+ if (typeof input === "string") {
562
+ return new Blob([input], { type: contentType ?? "text/plain; charset=utf-8" });
563
+ }
564
+ throw new Error("Unsupported parse file data type");
565
+ }
566
+ async function parse(http, file, options) {
567
+ if (!file || !file.filename || !file.filename.trim()) {
568
+ throw new Error("filename cannot be empty");
569
+ }
570
+ if (file.data == null) {
571
+ throw new Error("file data cannot be empty");
572
+ }
573
+ const blob = toUploadBlob(file.data, file.contentType);
574
+ if (blob.size === 0) {
575
+ throw new Error("file data cannot be empty");
576
+ }
577
+ if (options) ensureValidParseOptions(options);
578
+ const version = getVersion();
579
+ const normalizedOptions = {
580
+ ...options ?? {},
581
+ origin: typeof options?.origin === "string" && options.origin.includes("mcp") ? options.origin : options?.origin ?? `js-sdk@${version}`
582
+ };
583
+ const formData = new FormData();
584
+ formData.append("options", JSON.stringify(normalizedOptions));
585
+ formData.append(
586
+ "file",
587
+ toUploadBlob(file.data, file.contentType),
588
+ file.filename.trim()
589
+ );
590
+ const requestTimeoutMs = typeof normalizedOptions.timeout === "number" ? normalizedOptions.timeout + 5e3 : void 0;
591
+ try {
592
+ const res = await http.postMultipart("/v2/parse", formData, void 0, requestTimeoutMs);
593
+ if (res.status !== 200 || !res.data?.success) {
594
+ throwForBadResponse(res, "parse");
595
+ }
596
+ return res.data.data || {};
597
+ } catch (err) {
598
+ if (err?.isAxiosError) return normalizeAxiosError(err, "parse");
599
+ throw err;
600
+ }
601
+ }
602
+
458
603
  // src/v2/methods/search.ts
459
604
  function prepareSearchPayload(req) {
460
605
  if (!req.query || !req.query.trim()) throw new Error("Query cannot be empty");
@@ -589,6 +734,7 @@ function prepareCrawlPayload(request) {
589
734
  if (request.includePaths) data.includePaths = request.includePaths;
590
735
  if (request.maxDiscoveryDepth != null) data.maxDiscoveryDepth = request.maxDiscoveryDepth;
591
736
  if (request.sitemap != null) data.sitemap = request.sitemap;
737
+ if (request.robotsUserAgent != null) data.robotsUserAgent = request.robotsUserAgent;
592
738
  if (request.ignoreQueryParameters != null) data.ignoreQueryParameters = request.ignoreQueryParameters;
593
739
  if (request.deduplicateSimilarURLs != null) data.deduplicateSimilarURLs = request.deduplicateSimilarURLs;
594
740
  if (request.limit != null) data.limit = request.limit;
@@ -1402,6 +1548,9 @@ var FirecrawlClient = class {
1402
1548
  async deleteScrapeBrowser(jobId) {
1403
1549
  return this.stopInteraction(jobId);
1404
1550
  }
1551
+ async parse(file, options) {
1552
+ return parse(this.http, file, options);
1553
+ }
1405
1554
  // Search
1406
1555
  /**
1407
1556
  * Search the web and optionally scrape each result.
package/dist/index.d.cts CHANGED
@@ -45,6 +45,11 @@ interface QueryFormat {
45
45
  prompt: string;
46
46
  }
47
47
  type FormatOption = FormatString | Format | JsonFormat | ChangeTrackingFormat | ScreenshotFormat | AttributesFormat | QueryFormat;
48
+ type ParseFormatString = Exclude<FormatString, 'screenshot' | 'changeTracking' | 'branding'>;
49
+ interface ParseFormat {
50
+ type: ParseFormatString;
51
+ }
52
+ type ParseFormatOption = ParseFormatString | ParseFormat | JsonFormat | AttributesFormat | QueryFormat;
48
53
  interface LocationConfig$1 {
49
54
  country?: string;
50
55
  languages?: string[];
@@ -126,6 +131,16 @@ interface ScrapeOptions {
126
131
  integration?: string;
127
132
  origin?: string;
128
133
  }
134
+ type ParseFileData = Blob | File | Buffer | Uint8Array | ArrayBuffer | string;
135
+ interface ParseFile {
136
+ data: ParseFileData;
137
+ filename: string;
138
+ contentType?: string;
139
+ }
140
+ type ParseOptions = Omit<ScrapeOptions, 'formats' | 'waitFor' | 'mobile' | 'actions' | 'location' | 'maxAge' | 'minAge' | 'storeInCache' | 'proxy'> & {
141
+ formats?: ParseFormatOption[];
142
+ proxy?: 'basic' | 'auto';
143
+ };
129
144
  interface WebhookConfig {
130
145
  url: string;
131
146
  headers?: Record<string, string>;
@@ -645,6 +660,7 @@ declare class HttpClient {
645
660
  private request;
646
661
  private sleep;
647
662
  post<T = any>(endpoint: string, body: Record<string, unknown>, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>;
663
+ postMultipart<T = any>(endpoint: string, formData: FormData, headers?: Record<string, string>, timeoutMs?: number): Promise<AxiosResponse<T, any, {}>>;
648
664
  get<T = any>(endpoint: string, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>;
649
665
  delete<T = any>(endpoint: string, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>;
650
666
  prepareHeaders(idempotencyKey?: string): Record<string, string>;
@@ -796,6 +812,18 @@ declare class FirecrawlClient {
796
812
  * @deprecated Use stopInteraction().
797
813
  */
798
814
  deleteScrapeBrowser(jobId: string): Promise<ScrapeBrowserDeleteResponse>;
815
+ /**
816
+ * Parse an uploaded file via the v2 parse endpoint.
817
+ * @param file File payload (data, filename, optional contentType).
818
+ * @param options Optional parse options (formats, parsers, etc.).
819
+ * Note: parse does not support changeTracking, screenshot, branding,
820
+ * actions, waitFor, location, or mobile options.
821
+ * @returns Parsed document with requested formats.
822
+ */
823
+ parse<Opts extends ParseOptions>(file: ParseFile, options: Opts): Promise<Omit<Document, "json"> & {
824
+ json?: InferredJsonFromOptions<Opts>;
825
+ }>;
826
+ parse(file: ParseFile, options?: ParseOptions): Promise<Document>;
799
827
  /**
800
828
  * Search the web and optionally scrape each result.
801
829
  * @param query Search query string.
@@ -1892,4 +1920,4 @@ declare class Firecrawl extends FirecrawlClient {
1892
1920
  get v1(): FirecrawlApp;
1893
1921
  }
1894
1922
 
1895
- export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, JobTimeoutError, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type PressAction, type QueryFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
1923
+ export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, JobTimeoutError, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type ParseFile, type ParseFileData, type ParseFormat, type ParseFormatOption, type ParseFormatString, type ParseOptions, type PressAction, type QueryFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
package/dist/index.d.ts CHANGED
@@ -45,6 +45,11 @@ interface QueryFormat {
45
45
  prompt: string;
46
46
  }
47
47
  type FormatOption = FormatString | Format | JsonFormat | ChangeTrackingFormat | ScreenshotFormat | AttributesFormat | QueryFormat;
48
+ type ParseFormatString = Exclude<FormatString, 'screenshot' | 'changeTracking' | 'branding'>;
49
+ interface ParseFormat {
50
+ type: ParseFormatString;
51
+ }
52
+ type ParseFormatOption = ParseFormatString | ParseFormat | JsonFormat | AttributesFormat | QueryFormat;
48
53
  interface LocationConfig$1 {
49
54
  country?: string;
50
55
  languages?: string[];
@@ -126,6 +131,16 @@ interface ScrapeOptions {
126
131
  integration?: string;
127
132
  origin?: string;
128
133
  }
134
+ type ParseFileData = Blob | File | Buffer | Uint8Array | ArrayBuffer | string;
135
+ interface ParseFile {
136
+ data: ParseFileData;
137
+ filename: string;
138
+ contentType?: string;
139
+ }
140
+ type ParseOptions = Omit<ScrapeOptions, 'formats' | 'waitFor' | 'mobile' | 'actions' | 'location' | 'maxAge' | 'minAge' | 'storeInCache' | 'proxy'> & {
141
+ formats?: ParseFormatOption[];
142
+ proxy?: 'basic' | 'auto';
143
+ };
129
144
  interface WebhookConfig {
130
145
  url: string;
131
146
  headers?: Record<string, string>;
@@ -645,6 +660,7 @@ declare class HttpClient {
645
660
  private request;
646
661
  private sleep;
647
662
  post<T = any>(endpoint: string, body: Record<string, unknown>, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>;
663
+ postMultipart<T = any>(endpoint: string, formData: FormData, headers?: Record<string, string>, timeoutMs?: number): Promise<AxiosResponse<T, any, {}>>;
648
664
  get<T = any>(endpoint: string, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>;
649
665
  delete<T = any>(endpoint: string, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>;
650
666
  prepareHeaders(idempotencyKey?: string): Record<string, string>;
@@ -796,6 +812,18 @@ declare class FirecrawlClient {
796
812
  * @deprecated Use stopInteraction().
797
813
  */
798
814
  deleteScrapeBrowser(jobId: string): Promise<ScrapeBrowserDeleteResponse>;
815
+ /**
816
+ * Parse an uploaded file via the v2 parse endpoint.
817
+ * @param file File payload (data, filename, optional contentType).
818
+ * @param options Optional parse options (formats, parsers, etc.).
819
+ * Note: parse does not support changeTracking, screenshot, branding,
820
+ * actions, waitFor, location, or mobile options.
821
+ * @returns Parsed document with requested formats.
822
+ */
823
+ parse<Opts extends ParseOptions>(file: ParseFile, options: Opts): Promise<Omit<Document, "json"> & {
824
+ json?: InferredJsonFromOptions<Opts>;
825
+ }>;
826
+ parse(file: ParseFile, options?: ParseOptions): Promise<Document>;
799
827
  /**
800
828
  * Search the web and optionally scrape each result.
801
829
  * @param query Search query string.
@@ -1892,4 +1920,4 @@ declare class Firecrawl extends FirecrawlClient {
1892
1920
  get v1(): FirecrawlApp;
1893
1921
  }
1894
1922
 
1895
- export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, JobTimeoutError, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type PressAction, type QueryFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
1923
+ export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, JobTimeoutError, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type ParseFile, type ParseFileData, type ParseFormat, type ParseFormatOption, type ParseFormatString, type ParseOptions, type PressAction, type QueryFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
package/dist/index.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  require_package
3
- } from "./chunk-XC6YCBFX.js";
3
+ } from "./chunk-SEIHZPTI.js";
4
4
 
5
5
  // src/v2/utils/httpClient.ts
6
6
  import axios from "axios";
@@ -34,7 +34,6 @@ var HttpClient = class {
34
34
  baseURL: this.apiUrl,
35
35
  timeout: options.timeoutMs ?? 3e5,
36
36
  headers: {
37
- "Content-Type": "application/json",
38
37
  Authorization: `Bearer ${this.apiKey}`
39
38
  },
40
39
  transitional: { clarifyTimeoutError: true }
@@ -55,13 +54,20 @@ var HttpClient = class {
55
54
  for (let attempt = 0; attempt < this.maxRetries; attempt++) {
56
55
  try {
57
56
  const cfg = { ...config };
58
- if (cfg.method && ["post", "put", "patch"].includes(cfg.method.toLowerCase())) {
57
+ const isFormDataBody = typeof FormData !== "undefined" && cfg.data instanceof FormData;
58
+ const isPlainObjectBody = !isFormDataBody && cfg.data != null && typeof cfg.data === "object" && !Array.isArray(cfg.data);
59
+ if (isPlainObjectBody && cfg.method && ["post", "put", "patch"].includes(cfg.method.toLowerCase())) {
59
60
  const data = cfg.data ?? {};
60
61
  cfg.data = { ...data, origin: typeof data.origin === "string" && data.origin.includes("mcp") ? data.origin : `js-sdk@${version}` };
61
62
  if (typeof data.timeout === "number") {
62
63
  cfg.timeout = data.timeout + 5e3;
63
64
  }
64
65
  }
66
+ if (isFormDataBody) {
67
+ cfg.headers = { ...cfg.headers || {} };
68
+ delete cfg.headers["Content-Type"];
69
+ delete cfg.headers["content-type"];
70
+ }
65
71
  const res = await this.instance.request(cfg);
66
72
  if (res.status === 502 && attempt < this.maxRetries - 1) {
67
73
  await this.sleep(this.backoffFactor * Math.pow(2, attempt));
@@ -86,6 +92,15 @@ var HttpClient = class {
86
92
  post(endpoint, body, headers) {
87
93
  return this.request({ method: "post", url: endpoint, data: body, headers });
88
94
  }
95
+ postMultipart(endpoint, formData, headers, timeoutMs) {
96
+ return this.request({
97
+ method: "post",
98
+ url: endpoint,
99
+ data: formData,
100
+ headers,
101
+ timeout: timeoutMs
102
+ });
103
+ }
89
104
  get(endpoint, headers) {
90
105
  return this.request({ method: "get", url: endpoint, headers });
91
106
  }
@@ -232,6 +247,76 @@ function ensureValidScrapeOptions(options) {
232
247
  }
233
248
  ensureValidFormats(options.formats);
234
249
  }
250
+ function ensureValidParseFormats(formats) {
251
+ if (!formats) return;
252
+ for (const fmt of formats) {
253
+ if (typeof fmt === "string") {
254
+ if (fmt === "json") {
255
+ throw new Error("json format must be an object with { type: 'json', prompt, schema }");
256
+ }
257
+ if (fmt === "screenshot") {
258
+ throw new Error("parse does not support screenshot format");
259
+ }
260
+ if (fmt === "changeTracking") {
261
+ throw new Error("parse does not support changeTracking format");
262
+ }
263
+ if (fmt === "branding") {
264
+ throw new Error("parse does not support branding format");
265
+ }
266
+ continue;
267
+ }
268
+ const type = fmt.type;
269
+ if (type === "changeTracking") {
270
+ throw new Error("parse does not support changeTracking format");
271
+ }
272
+ if (type === "screenshot") {
273
+ throw new Error("parse does not support screenshot format");
274
+ }
275
+ if (type === "branding") {
276
+ throw new Error("parse does not support branding format");
277
+ }
278
+ if (fmt.type === "json") {
279
+ const j = fmt;
280
+ if (!j.prompt && !j.schema) {
281
+ throw new Error("json format requires either 'prompt' or 'schema' (or both)");
282
+ }
283
+ const maybeSchema = j.schema;
284
+ if (isZodSchema(maybeSchema)) {
285
+ j.schema = zodSchemaToJsonSchema(maybeSchema);
286
+ } else if (looksLikeZodShape(maybeSchema)) {
287
+ throw new Error(
288
+ "json format schema appears to be a Zod schema's .shape property. Pass the Zod schema directly (e.g., `schema: MySchema`) instead of `schema: MySchema.shape`. The SDK will automatically convert Zod schemas to JSON Schema format."
289
+ );
290
+ }
291
+ }
292
+ }
293
+ }
294
+ function ensureValidParseOptions(options) {
295
+ if (!options) return;
296
+ if (options.timeout != null && options.timeout <= 0) {
297
+ throw new Error("timeout must be positive");
298
+ }
299
+ const raw = options;
300
+ if (raw.waitFor !== void 0) {
301
+ throw new Error("parse does not support waitFor");
302
+ }
303
+ if (raw.actions !== void 0) {
304
+ throw new Error("parse does not support actions");
305
+ }
306
+ if (raw.location !== void 0) {
307
+ throw new Error("parse does not support location overrides");
308
+ }
309
+ if (raw.mobile !== void 0) {
310
+ throw new Error("parse does not support mobile rendering");
311
+ }
312
+ if (raw.maxAge !== void 0 || raw.minAge !== void 0 || raw.storeInCache !== void 0) {
313
+ throw new Error("parse does not support cache/index options");
314
+ }
315
+ if (raw.proxy !== void 0 && raw.proxy !== "basic" && raw.proxy !== "auto") {
316
+ throw new Error("parse only supports proxy values of 'basic' or 'auto'");
317
+ }
318
+ ensureValidParseFormats(options.formats);
319
+ }
235
320
 
236
321
  // src/v2/utils/errorHandler.ts
237
322
  import "axios";
@@ -332,6 +417,65 @@ async function stopInteraction(http, jobId) {
332
417
  }
333
418
  }
334
419
 
420
+ // src/v2/methods/parse.ts
421
+ function toUploadBlob(input, contentType) {
422
+ if (typeof Blob !== "undefined" && input instanceof Blob) {
423
+ if (contentType && input.type !== contentType) {
424
+ return new Blob([input], { type: contentType });
425
+ }
426
+ return input;
427
+ }
428
+ if (typeof Buffer !== "undefined" && Buffer.isBuffer(input)) {
429
+ return new Blob([input], { type: contentType });
430
+ }
431
+ if (input instanceof ArrayBuffer) {
432
+ return new Blob([input], { type: contentType });
433
+ }
434
+ if (ArrayBuffer.isView(input)) {
435
+ return new Blob([input], { type: contentType });
436
+ }
437
+ if (typeof input === "string") {
438
+ return new Blob([input], { type: contentType ?? "text/plain; charset=utf-8" });
439
+ }
440
+ throw new Error("Unsupported parse file data type");
441
+ }
442
+ async function parse(http, file, options) {
443
+ if (!file || !file.filename || !file.filename.trim()) {
444
+ throw new Error("filename cannot be empty");
445
+ }
446
+ if (file.data == null) {
447
+ throw new Error("file data cannot be empty");
448
+ }
449
+ const blob = toUploadBlob(file.data, file.contentType);
450
+ if (blob.size === 0) {
451
+ throw new Error("file data cannot be empty");
452
+ }
453
+ if (options) ensureValidParseOptions(options);
454
+ const version = getVersion();
455
+ const normalizedOptions = {
456
+ ...options ?? {},
457
+ origin: typeof options?.origin === "string" && options.origin.includes("mcp") ? options.origin : options?.origin ?? `js-sdk@${version}`
458
+ };
459
+ const formData = new FormData();
460
+ formData.append("options", JSON.stringify(normalizedOptions));
461
+ formData.append(
462
+ "file",
463
+ toUploadBlob(file.data, file.contentType),
464
+ file.filename.trim()
465
+ );
466
+ const requestTimeoutMs = typeof normalizedOptions.timeout === "number" ? normalizedOptions.timeout + 5e3 : void 0;
467
+ try {
468
+ const res = await http.postMultipart("/v2/parse", formData, void 0, requestTimeoutMs);
469
+ if (res.status !== 200 || !res.data?.success) {
470
+ throwForBadResponse(res, "parse");
471
+ }
472
+ return res.data.data || {};
473
+ } catch (err) {
474
+ if (err?.isAxiosError) return normalizeAxiosError(err, "parse");
475
+ throw err;
476
+ }
477
+ }
478
+
335
479
  // src/v2/methods/search.ts
336
480
  function prepareSearchPayload(req) {
337
481
  if (!req.query || !req.query.trim()) throw new Error("Query cannot be empty");
@@ -466,6 +610,7 @@ function prepareCrawlPayload(request) {
466
610
  if (request.includePaths) data.includePaths = request.includePaths;
467
611
  if (request.maxDiscoveryDepth != null) data.maxDiscoveryDepth = request.maxDiscoveryDepth;
468
612
  if (request.sitemap != null) data.sitemap = request.sitemap;
613
+ if (request.robotsUserAgent != null) data.robotsUserAgent = request.robotsUserAgent;
469
614
  if (request.ignoreQueryParameters != null) data.ignoreQueryParameters = request.ignoreQueryParameters;
470
615
  if (request.deduplicateSimilarURLs != null) data.deduplicateSimilarURLs = request.deduplicateSimilarURLs;
471
616
  if (request.limit != null) data.limit = request.limit;
@@ -1279,6 +1424,9 @@ var FirecrawlClient = class {
1279
1424
  async deleteScrapeBrowser(jobId) {
1280
1425
  return this.stopInteraction(jobId);
1281
1426
  }
1427
+ async parse(file, options) {
1428
+ return parse(this.http, file, options);
1429
+ }
1282
1430
  // Search
1283
1431
  /**
1284
1432
  * Search the web and optionally scrape each result.
@@ -1559,7 +1707,7 @@ var FirecrawlApp = class {
1559
1707
  if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
1560
1708
  return process.env.npm_package_version;
1561
1709
  }
1562
- const packageJson = await import("./package-PW6FMSAE.js");
1710
+ const packageJson = await import("./package-ASKBBK6V.js");
1563
1711
  return packageJson.default.version;
1564
1712
  } catch (error) {
1565
1713
  const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);
@@ -1,4 +1,4 @@
1
1
  import {
2
2
  require_package
3
- } from "./chunk-XC6YCBFX.js";
3
+ } from "./chunk-SEIHZPTI.js";
4
4
  export default require_package();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "firecrawl",
3
- "version": "4.18.3",
3
+ "version": "4.18.5",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -0,0 +1,67 @@
1
+ import Firecrawl from "../../../index";
2
+ import { config } from "dotenv";
3
+ import { getIdentity, getApiUrl } from "./utils/idmux";
4
+ import { describe, test, expect, beforeAll } from "@jest/globals";
5
+
6
+ config();
7
+
8
+ const API_URL = getApiUrl();
9
+ let client: Firecrawl;
10
+
11
+ beforeAll(async () => {
12
+ const { apiKey } = await getIdentity({ name: "js-e2e-parse" });
13
+ client = new Firecrawl({ apiKey, apiUrl: API_URL });
14
+ });
15
+
16
+ describe("v2.parse e2e", () => {
17
+ test(
18
+ "parses uploaded HTML files",
19
+ async () => {
20
+ if (!client) throw new Error();
21
+
22
+ const doc = await client.parse(
23
+ {
24
+ data: `
25
+ <!DOCTYPE html>
26
+ <html>
27
+ <body>
28
+ <h1>JS SDK Parse E2E</h1>
29
+ <p>multipart upload body</p>
30
+ </body>
31
+ </html>
32
+ `,
33
+ filename: "parse-e2e.html",
34
+ contentType: "text/html",
35
+ },
36
+ {
37
+ formats: ["markdown"],
38
+ },
39
+ );
40
+
41
+ expect(doc.markdown).toContain("JS SDK Parse E2E");
42
+ expect(doc.metadata?.creditsUsed).toBe(1);
43
+ },
44
+ 60_000,
45
+ );
46
+
47
+ test(
48
+ "returns errors for unsupported file types",
49
+ async () => {
50
+ if (!client) throw new Error();
51
+
52
+ await expect(
53
+ client.parse(
54
+ {
55
+ data: Buffer.from("image-data"),
56
+ filename: "parse-e2e.png",
57
+ contentType: "image/png",
58
+ },
59
+ {
60
+ formats: ["markdown"],
61
+ },
62
+ ),
63
+ ).rejects.toThrow();
64
+ },
65
+ 60_000,
66
+ );
67
+ });
@@ -0,0 +1,40 @@
1
+ import { describe, test, expect } from "@jest/globals";
2
+ import { FirecrawlClient } from "../../../v2/client";
3
+
4
+ describe("v2.parse unit", () => {
5
+ test("rejects empty filenames before making requests", async () => {
6
+ const client = new FirecrawlClient({
7
+ apiKey: "test-key",
8
+ apiUrl: "https://localhost:3002",
9
+ });
10
+
11
+ await expect(
12
+ client.parse(
13
+ {
14
+ data: "<html><body>test</body></html>",
15
+ filename: " ",
16
+ contentType: "text/html",
17
+ },
18
+ { formats: ["markdown"] },
19
+ ),
20
+ ).rejects.toThrow("filename cannot be empty");
21
+ });
22
+
23
+ test("rejects changeTracking format before making requests", async () => {
24
+ const client = new FirecrawlClient({
25
+ apiKey: "test-key",
26
+ apiUrl: "https://localhost:3002",
27
+ });
28
+
29
+ await expect(
30
+ client.parse(
31
+ {
32
+ data: "<html><body>test</body></html>",
33
+ filename: "upload.html",
34
+ contentType: "text/html",
35
+ },
36
+ { formats: ["markdown", { type: "changeTracking" } as any] },
37
+ ),
38
+ ).rejects.toThrow("parse does not support changeTracking format");
39
+ });
40
+ });
package/src/v2/client.ts CHANGED
@@ -4,6 +4,7 @@ import {
4
4
  interact as interactMethod,
5
5
  stopInteraction as stopInteractionMethod,
6
6
  } from "./methods/scrape";
7
+ import { parse as parseMethod } from "./methods/parse";
7
8
  import { search } from "./methods/search";
8
9
  import { map as mapMethod } from "./methods/map";
9
10
  import {
@@ -33,6 +34,8 @@ import {
33
34
  import { getConcurrency, getCreditUsage, getQueueStatus, getTokenUsage, getCreditUsageHistorical, getTokenUsageHistorical } from "./methods/usage";
34
35
  import type {
35
36
  Document,
37
+ ParseFile,
38
+ ParseOptions,
36
39
  ScrapeOptions,
37
40
  SearchData,
38
41
  SearchRequest,
@@ -177,6 +180,24 @@ export class FirecrawlClient {
177
180
  return this.stopInteraction(jobId);
178
181
  }
179
182
 
183
+ // Parse
184
+ /**
185
+ * Parse an uploaded file via the v2 parse endpoint.
186
+ * @param file File payload (data, filename, optional contentType).
187
+ * @param options Optional parse options (formats, parsers, etc.).
188
+ * Note: parse does not support changeTracking, screenshot, branding,
189
+ * actions, waitFor, location, or mobile options.
190
+ * @returns Parsed document with requested formats.
191
+ */
192
+ async parse<Opts extends ParseOptions>(
193
+ file: ParseFile,
194
+ options: Opts
195
+ ): Promise<Omit<Document, "json"> & { json?: InferredJsonFromOptions<Opts> }>;
196
+ async parse(file: ParseFile, options?: ParseOptions): Promise<Document>;
197
+ async parse(file: ParseFile, options?: ParseOptions): Promise<Document> {
198
+ return parseMethod(this.http, file, options);
199
+ }
200
+
180
201
  // Search
181
202
  /**
182
203
  * Search the web and optionally scrape each result.
@@ -27,6 +27,7 @@ function prepareCrawlPayload(request: CrawlRequest): Record<string, unknown> {
27
27
  if (request.includePaths) data.includePaths = request.includePaths;
28
28
  if (request.maxDiscoveryDepth != null) data.maxDiscoveryDepth = request.maxDiscoveryDepth;
29
29
  if (request.sitemap != null) data.sitemap = request.sitemap;
30
+ if (request.robotsUserAgent != null) data.robotsUserAgent = request.robotsUserAgent;
30
31
  if (request.ignoreQueryParameters != null) data.ignoreQueryParameters = request.ignoreQueryParameters;
31
32
  if (request.deduplicateSimilarURLs != null) data.deduplicateSimilarURLs = request.deduplicateSimilarURLs;
32
33
  if (request.limit != null) data.limit = request.limit;
@@ -0,0 +1,90 @@
1
+ import { type Document, type ParseFile, type ParseOptions } from "../types";
2
+ import { HttpClient } from "../utils/httpClient";
3
+ import { ensureValidParseOptions } from "../utils/validation";
4
+ import { throwForBadResponse, normalizeAxiosError } from "../utils/errorHandler";
5
+ import { getVersion } from "../utils/getVersion";
6
+
7
+ function toUploadBlob(input: ParseFile["data"], contentType?: string): Blob {
8
+ if (typeof Blob !== "undefined" && input instanceof Blob) {
9
+ if (contentType && input.type !== contentType) {
10
+ return new Blob([input], { type: contentType });
11
+ }
12
+ return input;
13
+ }
14
+
15
+ if (typeof Buffer !== "undefined" && Buffer.isBuffer(input)) {
16
+ return new Blob([input], { type: contentType });
17
+ }
18
+
19
+ if (input instanceof ArrayBuffer) {
20
+ return new Blob([input], { type: contentType });
21
+ }
22
+
23
+ if (ArrayBuffer.isView(input)) {
24
+ return new Blob([input], { type: contentType });
25
+ }
26
+
27
+ if (typeof input === "string") {
28
+ return new Blob([input], { type: contentType ?? "text/plain; charset=utf-8" });
29
+ }
30
+
31
+ throw new Error("Unsupported parse file data type");
32
+ }
33
+
34
+ export async function parse(
35
+ http: HttpClient,
36
+ file: ParseFile,
37
+ options?: ParseOptions,
38
+ ): Promise<Document> {
39
+ if (!file || !file.filename || !file.filename.trim()) {
40
+ throw new Error("filename cannot be empty");
41
+ }
42
+
43
+ if (file.data == null) {
44
+ throw new Error("file data cannot be empty");
45
+ }
46
+
47
+ const blob = toUploadBlob(file.data, file.contentType);
48
+ if (blob.size === 0) {
49
+ throw new Error("file data cannot be empty");
50
+ }
51
+
52
+ if (options) ensureValidParseOptions(options);
53
+
54
+ const version = getVersion();
55
+ const normalizedOptions: ParseOptions = {
56
+ ...(options ?? {}),
57
+ origin:
58
+ typeof options?.origin === "string" && options.origin.includes("mcp")
59
+ ? options.origin
60
+ : options?.origin ?? `js-sdk@${version}`,
61
+ };
62
+
63
+ const formData = new FormData();
64
+ formData.append("options", JSON.stringify(normalizedOptions));
65
+ formData.append(
66
+ "file",
67
+ toUploadBlob(file.data, file.contentType),
68
+ file.filename.trim(),
69
+ );
70
+
71
+ const requestTimeoutMs =
72
+ typeof normalizedOptions.timeout === "number"
73
+ ? normalizedOptions.timeout + 5000
74
+ : undefined;
75
+
76
+ try {
77
+ const res = await http.postMultipart<{
78
+ success: boolean;
79
+ data?: Document;
80
+ error?: string;
81
+ }>("/v2/parse", formData, undefined, requestTimeoutMs);
82
+ if (res.status !== 200 || !res.data?.success) {
83
+ throwForBadResponse(res, "parse");
84
+ }
85
+ return (res.data.data || {}) as Document;
86
+ } catch (err: any) {
87
+ if (err?.isAxiosError) return normalizeAxiosError(err, "parse");
88
+ throw err;
89
+ }
90
+ }
package/src/v2/types.ts CHANGED
@@ -66,6 +66,22 @@ export type FormatOption =
66
66
  | AttributesFormat
67
67
  | QueryFormat;
68
68
 
69
+ export type ParseFormatString = Exclude<
70
+ FormatString,
71
+ 'screenshot' | 'changeTracking' | 'branding'
72
+ >;
73
+
74
+ export interface ParseFormat {
75
+ type: ParseFormatString;
76
+ }
77
+
78
+ export type ParseFormatOption =
79
+ | ParseFormatString
80
+ | ParseFormat
81
+ | JsonFormat
82
+ | AttributesFormat
83
+ | QueryFormat;
84
+
69
85
  export interface LocationConfig {
70
86
  country?: string;
71
87
  languages?: string[];
@@ -172,6 +188,36 @@ export interface ScrapeOptions {
172
188
  origin?: string;
173
189
  }
174
190
 
191
+ export type ParseFileData =
192
+ | Blob
193
+ | File
194
+ | Buffer
195
+ | Uint8Array
196
+ | ArrayBuffer
197
+ | string;
198
+
199
+ export interface ParseFile {
200
+ data: ParseFileData;
201
+ filename: string;
202
+ contentType?: string;
203
+ }
204
+
205
+ export type ParseOptions = Omit<
206
+ ScrapeOptions,
207
+ | 'formats'
208
+ | 'waitFor'
209
+ | 'mobile'
210
+ | 'actions'
211
+ | 'location'
212
+ | 'maxAge'
213
+ | 'minAge'
214
+ | 'storeInCache'
215
+ | 'proxy'
216
+ > & {
217
+ formats?: ParseFormatOption[];
218
+ proxy?: 'basic' | 'auto';
219
+ };
220
+
175
221
  export interface WebhookConfig {
176
222
  url: string;
177
223
  headers?: Record<string, string>;
@@ -25,7 +25,6 @@ export class HttpClient {
25
25
  baseURL: this.apiUrl,
26
26
  timeout: options.timeoutMs ?? 300000,
27
27
  headers: {
28
- "Content-Type": "application/json",
29
28
  Authorization: `Bearer ${this.apiKey}`,
30
29
  },
31
30
  transitional: { clarifyTimeoutError: true },
@@ -50,16 +49,35 @@ export class HttpClient {
50
49
  for (let attempt = 0; attempt < this.maxRetries; attempt++) {
51
50
  try {
52
51
  const cfg: AxiosRequestConfig = { ...config };
53
- // For POST/PUT, ensure origin is present in JSON body too
54
- if (cfg.method && ["post", "put", "patch"].includes(cfg.method.toLowerCase())) {
52
+ const isFormDataBody =
53
+ typeof FormData !== "undefined" && cfg.data instanceof FormData;
54
+ const isPlainObjectBody =
55
+ !isFormDataBody &&
56
+ cfg.data != null &&
57
+ typeof cfg.data === "object" &&
58
+ !Array.isArray(cfg.data);
59
+
60
+ // For JSON POST/PUT/PATCH, ensure origin is present in body
61
+ if (
62
+ isPlainObjectBody &&
63
+ cfg.method &&
64
+ ["post", "put", "patch"].includes(cfg.method.toLowerCase())
65
+ ) {
55
66
  const data = (cfg.data ?? {}) as Record<string, unknown>;
56
67
  cfg.data = { ...data, origin: typeof data.origin === "string" && data.origin.includes("mcp") ? data.origin : `js-sdk@${version}` };
57
-
68
+
58
69
  // If timeout is specified in the body, use it to override the request timeout
59
70
  if (typeof data.timeout === "number") {
60
71
  cfg.timeout = data.timeout + 5000;
61
72
  }
62
73
  }
74
+
75
+ if (isFormDataBody) {
76
+ cfg.headers = { ...(cfg.headers || {}) };
77
+ delete (cfg.headers as Record<string, unknown>)["Content-Type"];
78
+ delete (cfg.headers as Record<string, unknown>)["content-type"];
79
+ }
80
+
63
81
  const res = await this.instance.request<T>(cfg);
64
82
  if (res.status === 502 && attempt < this.maxRetries - 1) {
65
83
  await this.sleep(this.backoffFactor * Math.pow(2, attempt));
@@ -87,6 +105,21 @@ export class HttpClient {
87
105
  return this.request<T>({ method: "post", url: endpoint, data: body, headers });
88
106
  }
89
107
 
108
+ postMultipart<T = any>(
109
+ endpoint: string,
110
+ formData: FormData,
111
+ headers?: Record<string, string>,
112
+ timeoutMs?: number,
113
+ ) {
114
+ return this.request<T>({
115
+ method: "post",
116
+ url: endpoint,
117
+ data: formData,
118
+ headers,
119
+ timeout: timeoutMs,
120
+ });
121
+ }
122
+
90
123
  get<T = any>(endpoint: string, headers?: Record<string, string>) {
91
124
  return this.request<T>({ method: "get", url: endpoint, headers });
92
125
  }
@@ -1,4 +1,12 @@
1
- import { type FormatOption, type JsonFormat, type ScrapeOptions, type ScreenshotFormat, type ChangeTrackingFormat } from "../types";
1
+ import {
2
+ type ChangeTrackingFormat,
3
+ type FormatOption,
4
+ type JsonFormat,
5
+ type ParseFormatOption,
6
+ type ParseOptions,
7
+ type ScrapeOptions,
8
+ type ScreenshotFormat,
9
+ } from "../types";
2
10
  import { isZodSchema, zodSchemaToJsonSchema, looksLikeZodShape } from "../../utils/zodSchemaToJson";
3
11
 
4
12
  export function ensureValidFormats(formats?: FormatOption[]): void {
@@ -62,3 +70,82 @@ export function ensureValidScrapeOptions(options?: ScrapeOptions): void {
62
70
  ensureValidFormats(options.formats);
63
71
  }
64
72
 
73
+ export function ensureValidParseFormats(formats?: ParseFormatOption[]): void {
74
+ if (!formats) return;
75
+
76
+ for (const fmt of formats) {
77
+ if (typeof fmt === "string") {
78
+ if (fmt === "json") {
79
+ throw new Error("json format must be an object with { type: 'json', prompt, schema }");
80
+ }
81
+ if (fmt === "screenshot") {
82
+ throw new Error("parse does not support screenshot format");
83
+ }
84
+ if (fmt === "changeTracking") {
85
+ throw new Error("parse does not support changeTracking format");
86
+ }
87
+ if (fmt === "branding") {
88
+ throw new Error("parse does not support branding format");
89
+ }
90
+ continue;
91
+ }
92
+
93
+ const type = (fmt as any).type;
94
+ if (type === "changeTracking") {
95
+ throw new Error("parse does not support changeTracking format");
96
+ }
97
+ if (type === "screenshot") {
98
+ throw new Error("parse does not support screenshot format");
99
+ }
100
+ if (type === "branding") {
101
+ throw new Error("parse does not support branding format");
102
+ }
103
+
104
+ if ((fmt as JsonFormat).type === "json") {
105
+ const j = fmt as JsonFormat;
106
+ if (!j.prompt && !j.schema) {
107
+ throw new Error("json format requires either 'prompt' or 'schema' (or both)");
108
+ }
109
+ const maybeSchema = j.schema;
110
+ if (isZodSchema(maybeSchema)) {
111
+ (j as any).schema = zodSchemaToJsonSchema(maybeSchema);
112
+ } else if (looksLikeZodShape(maybeSchema)) {
113
+ throw new Error(
114
+ "json format schema appears to be a Zod schema's .shape property. " +
115
+ "Pass the Zod schema directly (e.g., `schema: MySchema`) instead of `schema: MySchema.shape`. " +
116
+ "The SDK will automatically convert Zod schemas to JSON Schema format."
117
+ );
118
+ }
119
+ }
120
+ }
121
+ }
122
+
123
+ export function ensureValidParseOptions(options?: ParseOptions): void {
124
+ if (!options) return;
125
+ if (options.timeout != null && options.timeout <= 0) {
126
+ throw new Error("timeout must be positive");
127
+ }
128
+
129
+ const raw = options as Record<string, unknown>;
130
+ if (raw.waitFor !== undefined) {
131
+ throw new Error("parse does not support waitFor");
132
+ }
133
+ if (raw.actions !== undefined) {
134
+ throw new Error("parse does not support actions");
135
+ }
136
+ if (raw.location !== undefined) {
137
+ throw new Error("parse does not support location overrides");
138
+ }
139
+ if (raw.mobile !== undefined) {
140
+ throw new Error("parse does not support mobile rendering");
141
+ }
142
+ if (raw.maxAge !== undefined || raw.minAge !== undefined || raw.storeInCache !== undefined) {
143
+ throw new Error("parse does not support cache/index options");
144
+ }
145
+ if (raw.proxy !== undefined && raw.proxy !== "basic" && raw.proxy !== "auto") {
146
+ throw new Error("parse only supports proxy values of 'basic' or 'auto'");
147
+ }
148
+
149
+ ensureValidParseFormats(options.formats);
150
+ }
151
+