firecrawl 4.28.0 → 4.28.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -58,6 +58,18 @@ const doc = await app.scrape('https://www.youtube.com/watch?v=dQw4w9WgXcQ', {
58
58
  console.log(doc.video);
59
59
  ```
60
60
 
61
+ ### Product extraction
62
+
63
+ Use the `product` format to deterministically pull a product (title, price, availability, variants) from product pages — the deterministic counterpart to the LLM-based `json` format.
64
+
65
+ ```js
66
+ const doc = await app.scrape('https://example.com/product/123', {
67
+ formats: ['product'],
68
+ });
69
+
70
+ console.log(doc.product);
71
+ ```
72
+
61
73
  ### Parsing uploaded files
62
74
 
63
75
  Use `parse` to upload a file (`html`, `pdf`, `docx`, etc.) as multipart form data and process it through the same parsing pipeline.
@@ -12,7 +12,7 @@ var require_package = __commonJS({
12
12
  "package.json"(exports, module) {
13
13
  module.exports = {
14
14
  name: "@mendable/firecrawl-js",
15
- version: "4.28.0",
15
+ version: "4.28.1",
16
16
  description: "JavaScript SDK for Firecrawl API",
17
17
  main: "dist/index.js",
18
18
  types: "dist/index.d.ts",
package/dist/index.cjs CHANGED
@@ -39,7 +39,7 @@ var require_package = __commonJS({
39
39
  "package.json"(exports2, module2) {
40
40
  module2.exports = {
41
41
  name: "@mendable/firecrawl-js",
42
- version: "4.28.0",
42
+ version: "4.28.1",
43
43
  description: "JavaScript SDK for Firecrawl API",
44
44
  main: "dist/index.js",
45
45
  types: "dist/index.d.ts",
package/dist/index.d.cts CHANGED
@@ -4,7 +4,7 @@ import { AxiosResponse, AxiosRequestHeaders } from 'axios';
4
4
  import { EventEmitter } from 'events';
5
5
  import { TypedEventTarget } from 'typescript-event-target';
6
6
 
7
- type FormatString = "markdown" | "html" | "rawHtml" | "links" | "images" | "screenshot" | "summary" | "changeTracking" | "json" | "attributes" | "branding" | "audio" | "video";
7
+ type FormatString = "markdown" | "html" | "rawHtml" | "links" | "images" | "screenshot" | "summary" | "changeTracking" | "json" | "attributes" | "branding" | "product" | "audio" | "video";
8
8
  interface Viewport {
9
9
  width: number;
10
10
  height: number;
@@ -312,6 +312,40 @@ interface BrandingProfile {
312
312
  };
313
313
  [key: string]: unknown;
314
314
  }
315
+ interface ProductPrice {
316
+ amount: number;
317
+ currency?: string;
318
+ formatted?: string;
319
+ }
320
+ interface ProductAvailability {
321
+ inStock: boolean;
322
+ text?: string;
323
+ }
324
+ interface ProductImage {
325
+ url: string;
326
+ alt?: string;
327
+ }
328
+ interface ProductSale {
329
+ originalPrice: ProductPrice;
330
+ }
331
+ interface ProductVariant {
332
+ id?: string;
333
+ sku?: string;
334
+ title?: string;
335
+ values?: Record<string, unknown>;
336
+ price?: ProductPrice;
337
+ sale?: ProductSale;
338
+ availability: ProductAvailability;
339
+ images?: ProductImage[];
340
+ }
341
+ interface ProductProfile {
342
+ title: string;
343
+ brand?: string;
344
+ category?: string;
345
+ url: string;
346
+ description?: string;
347
+ variants: ProductVariant[];
348
+ }
315
349
  interface DocumentMetadata {
316
350
  title?: string;
317
351
  description?: string;
@@ -382,6 +416,7 @@ interface Document {
382
416
  warning?: string;
383
417
  changeTracking?: Record<string, unknown>;
384
418
  branding?: BrandingProfile;
419
+ product?: ProductProfile;
385
420
  }
386
421
  interface PaginationConfig {
387
422
  /** When true (default), automatically follow `next` links and aggregate all documents. */
@@ -2503,4 +2538,4 @@ declare class Firecrawl extends FirecrawlClient {
2503
2538
  get v1(): FirecrawlApp;
2504
2539
  }
2505
2540
 
2506
- export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreateMonitorRequest, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type EndpointFeedbackEndpoint, type EndpointFeedbackRequest, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, type FeedbackMissingContent, type FeedbackRating, type FeedbackResponse, type FeedbackValuableSource, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientInput, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type GetMonitorCheckOptions, type GetPaperOptions, type GitHubScoreBreakdown, type GitHubSearchItem, type GitHubSearchResponse, type HighlightsFormat, type IdMap, JobTimeoutError, type JsonFormat, type ListMonitorChecksOptions, type ListMonitorsOptions, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type Monitor, type MonitorCheck, type MonitorCheckDetail, type MonitorCheckPage, type MonitorCrawlTarget, type MonitorEmailNotification, type MonitorEmailRecipientSubscription, type MonitorJsonFieldDiff, type MonitorNotification, type MonitorPageDiff, type MonitorPageJudgment, type MonitorPageSnapshot, type MonitorSchedule, type MonitorScrapeTarget, type MonitorSummary, type MonitorTarget, type MonitorWebhookConfig, type PDFAction, type PaginationConfig, type PaperMetadata, type PaperMetadataResponse, type PaperResult, type PaperSignals, type ParseFile, type ParseFileData, type ParseFormat, type ParseFormatOption, type ParseFormatString, type ParseOptions, type Passage, type PressAction, type QueryFormat, type QuestionFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ReadPaperResponse, type RedactPIIEntity, type RedactPIIOptions, ResearchClient, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchFeedbackRequest, type SearchGithubOptions, type SearchPapersOptions, type SearchPapersResponse, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type SimilarPapersOptions, type SimilarPapersResponse, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type UpdateMonitorRequest, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
2541
+ export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreateMonitorRequest, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type EndpointFeedbackEndpoint, type EndpointFeedbackRequest, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, type FeedbackMissingContent, type FeedbackRating, type FeedbackResponse, type FeedbackValuableSource, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientInput, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type GetMonitorCheckOptions, type GetPaperOptions, type GitHubScoreBreakdown, type GitHubSearchItem, type GitHubSearchResponse, type HighlightsFormat, type IdMap, JobTimeoutError, type JsonFormat, type ListMonitorChecksOptions, type ListMonitorsOptions, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type Monitor, type MonitorCheck, type MonitorCheckDetail, type MonitorCheckPage, type MonitorCrawlTarget, type MonitorEmailNotification, type MonitorEmailRecipientSubscription, type MonitorJsonFieldDiff, type MonitorNotification, type MonitorPageDiff, type MonitorPageJudgment, type MonitorPageSnapshot, type MonitorSchedule, type MonitorScrapeTarget, type MonitorSummary, type MonitorTarget, type MonitorWebhookConfig, type PDFAction, type PaginationConfig, type PaperMetadata, type PaperMetadataResponse, type PaperResult, type PaperSignals, type ParseFile, type ParseFileData, type ParseFormat, type ParseFormatOption, type ParseFormatString, type ParseOptions, type Passage, type PressAction, type ProductAvailability, type ProductImage, type ProductPrice, type ProductProfile, type ProductSale, type ProductVariant, type QueryFormat, type QuestionFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ReadPaperResponse, type RedactPIIEntity, type RedactPIIOptions, ResearchClient, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchFeedbackRequest, type SearchGithubOptions, type SearchPapersOptions, type SearchPapersResponse, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type SimilarPapersOptions, type SimilarPapersResponse, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type UpdateMonitorRequest, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
package/dist/index.d.ts CHANGED
@@ -4,7 +4,7 @@ import { AxiosResponse, AxiosRequestHeaders } from 'axios';
4
4
  import { EventEmitter } from 'events';
5
5
  import { TypedEventTarget } from 'typescript-event-target';
6
6
 
7
- type FormatString = "markdown" | "html" | "rawHtml" | "links" | "images" | "screenshot" | "summary" | "changeTracking" | "json" | "attributes" | "branding" | "audio" | "video";
7
+ type FormatString = "markdown" | "html" | "rawHtml" | "links" | "images" | "screenshot" | "summary" | "changeTracking" | "json" | "attributes" | "branding" | "product" | "audio" | "video";
8
8
  interface Viewport {
9
9
  width: number;
10
10
  height: number;
@@ -312,6 +312,40 @@ interface BrandingProfile {
312
312
  };
313
313
  [key: string]: unknown;
314
314
  }
315
+ interface ProductPrice {
316
+ amount: number;
317
+ currency?: string;
318
+ formatted?: string;
319
+ }
320
+ interface ProductAvailability {
321
+ inStock: boolean;
322
+ text?: string;
323
+ }
324
+ interface ProductImage {
325
+ url: string;
326
+ alt?: string;
327
+ }
328
+ interface ProductSale {
329
+ originalPrice: ProductPrice;
330
+ }
331
+ interface ProductVariant {
332
+ id?: string;
333
+ sku?: string;
334
+ title?: string;
335
+ values?: Record<string, unknown>;
336
+ price?: ProductPrice;
337
+ sale?: ProductSale;
338
+ availability: ProductAvailability;
339
+ images?: ProductImage[];
340
+ }
341
+ interface ProductProfile {
342
+ title: string;
343
+ brand?: string;
344
+ category?: string;
345
+ url: string;
346
+ description?: string;
347
+ variants: ProductVariant[];
348
+ }
315
349
  interface DocumentMetadata {
316
350
  title?: string;
317
351
  description?: string;
@@ -382,6 +416,7 @@ interface Document {
382
416
  warning?: string;
383
417
  changeTracking?: Record<string, unknown>;
384
418
  branding?: BrandingProfile;
419
+ product?: ProductProfile;
385
420
  }
386
421
  interface PaginationConfig {
387
422
  /** When true (default), automatically follow `next` links and aggregate all documents. */
@@ -2503,4 +2538,4 @@ declare class Firecrawl extends FirecrawlClient {
2503
2538
  get v1(): FirecrawlApp;
2504
2539
  }
2505
2540
 
2506
- export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreateMonitorRequest, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type EndpointFeedbackEndpoint, type EndpointFeedbackRequest, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, type FeedbackMissingContent, type FeedbackRating, type FeedbackResponse, type FeedbackValuableSource, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientInput, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type GetMonitorCheckOptions, type GetPaperOptions, type GitHubScoreBreakdown, type GitHubSearchItem, type GitHubSearchResponse, type HighlightsFormat, type IdMap, JobTimeoutError, type JsonFormat, type ListMonitorChecksOptions, type ListMonitorsOptions, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type Monitor, type MonitorCheck, type MonitorCheckDetail, type MonitorCheckPage, type MonitorCrawlTarget, type MonitorEmailNotification, type MonitorEmailRecipientSubscription, type MonitorJsonFieldDiff, type MonitorNotification, type MonitorPageDiff, type MonitorPageJudgment, type MonitorPageSnapshot, type MonitorSchedule, type MonitorScrapeTarget, type MonitorSummary, type MonitorTarget, type MonitorWebhookConfig, type PDFAction, type PaginationConfig, type PaperMetadata, type PaperMetadataResponse, type PaperResult, type PaperSignals, type ParseFile, type ParseFileData, type ParseFormat, type ParseFormatOption, type ParseFormatString, type ParseOptions, type Passage, type PressAction, type QueryFormat, type QuestionFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ReadPaperResponse, type RedactPIIEntity, type RedactPIIOptions, ResearchClient, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchFeedbackRequest, type SearchGithubOptions, type SearchPapersOptions, type SearchPapersResponse, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type SimilarPapersOptions, type SimilarPapersResponse, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type UpdateMonitorRequest, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
2541
+ export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreateMonitorRequest, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type EndpointFeedbackEndpoint, type EndpointFeedbackRequest, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, type FeedbackMissingContent, type FeedbackRating, type FeedbackResponse, type FeedbackValuableSource, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientInput, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type GetMonitorCheckOptions, type GetPaperOptions, type GitHubScoreBreakdown, type GitHubSearchItem, type GitHubSearchResponse, type HighlightsFormat, type IdMap, JobTimeoutError, type JsonFormat, type ListMonitorChecksOptions, type ListMonitorsOptions, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type Monitor, type MonitorCheck, type MonitorCheckDetail, type MonitorCheckPage, type MonitorCrawlTarget, type MonitorEmailNotification, type MonitorEmailRecipientSubscription, type MonitorJsonFieldDiff, type MonitorNotification, type MonitorPageDiff, type MonitorPageJudgment, type MonitorPageSnapshot, type MonitorSchedule, type MonitorScrapeTarget, type MonitorSummary, type MonitorTarget, type MonitorWebhookConfig, type PDFAction, type PaginationConfig, type PaperMetadata, type PaperMetadataResponse, type PaperResult, type PaperSignals, type ParseFile, type ParseFileData, type ParseFormat, type ParseFormatOption, type ParseFormatString, type ParseOptions, type Passage, type PressAction, type ProductAvailability, type ProductImage, type ProductPrice, type ProductProfile, type ProductSale, type ProductVariant, type QueryFormat, type QuestionFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ReadPaperResponse, type RedactPIIEntity, type RedactPIIOptions, ResearchClient, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchFeedbackRequest, type SearchGithubOptions, type SearchPapersOptions, type SearchPapersResponse, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type SimilarPapersOptions, type SimilarPapersResponse, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type UpdateMonitorRequest, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
package/dist/index.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  require_package
3
- } from "./chunk-WZCSDWB2.js";
3
+ } from "./chunk-626CN3G5.js";
4
4
 
5
5
  // src/v2/utils/httpClient.ts
6
6
  import axios from "axios";
@@ -2300,7 +2300,7 @@ var FirecrawlApp = class {
2300
2300
  if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
2301
2301
  return process.env.npm_package_version;
2302
2302
  }
2303
- const packageJson = await import("./package-ASYTUOW4.js");
2303
+ const packageJson = await import("./package-FMOR3ELU.js");
2304
2304
  return packageJson.default.version;
2305
2305
  } catch (error) {
2306
2306
  const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);
@@ -1,4 +1,4 @@
1
1
  import {
2
2
  require_package
3
- } from "./chunk-WZCSDWB2.js";
3
+ } from "./chunk-626CN3G5.js";
4
4
  export default require_package();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "firecrawl",
3
- "version": "4.28.0",
3
+ "version": "4.28.1",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -0,0 +1,162 @@
1
+ import { describe, test, expect, jest } from "@jest/globals";
2
+ import { scrape } from "../../../v2/methods/scrape";
3
+
4
+ describe("JS SDK v2 product format", () => {
5
+ function makeHttp(postImpl: (url: string, data: any) => any) {
6
+ return { post: jest.fn(async (u: string, d: any) => postImpl(u, d)) } as any;
7
+ }
8
+
9
+ test("scrape with product format returns product data", async () => {
10
+ const mockResponse = {
11
+ status: 200,
12
+ data: {
13
+ success: true,
14
+ data: {
15
+ markdown: "# Example Product",
16
+ product: {
17
+ title: "Acme Running Shoe",
18
+ brand: "Acme",
19
+ category: "Footwear",
20
+ url: "https://example.com/shoe",
21
+ description: "A lightweight running shoe.",
22
+ variants: [
23
+ {
24
+ id: "default",
25
+ images: [{ url: "https://example.com/shoe.jpg", alt: "Acme shoe" }],
26
+ price: { amount: 89.99, currency: "USD", formatted: "$89.99" },
27
+ sale: { originalPrice: { amount: 129.99, currency: "USD", formatted: "$129.99" } },
28
+ availability: { inStock: true, text: "In stock" }
29
+ }
30
+ ]
31
+ }
32
+ }
33
+ }
34
+ };
35
+
36
+ const http = makeHttp(() => mockResponse);
37
+ const result = await scrape(http, "https://example.com", { formats: ["product"] });
38
+
39
+ expect(result.product).toBeDefined();
40
+ expect(result.product?.title).toBe("Acme Running Shoe");
41
+ expect(result.product?.brand).toBe("Acme");
42
+ expect(result.product?.variants?.[0]?.price?.amount).toBe(89.99);
43
+ expect(result.product?.variants?.[0]?.price?.currency).toBe("USD");
44
+ expect(result.product?.variants?.[0]?.sale?.originalPrice?.amount).toBe(129.99);
45
+ expect(result.product?.variants?.[0]?.availability?.inStock).toBe(true);
46
+ expect(result.product?.variants?.[0]?.images?.[0]?.url).toBe("https://example.com/shoe.jpg");
47
+ });
48
+
49
+ test("scrape with product and markdown formats returns both", async () => {
50
+ const mockResponse = {
51
+ status: 200,
52
+ data: {
53
+ success: true,
54
+ data: {
55
+ markdown: "# Example Content",
56
+ product: {
57
+ title: "Acme Mug",
58
+ url: "https://example.com/mug",
59
+ variants: [
60
+ {
61
+ price: { amount: 12.5, currency: "USD" },
62
+ availability: { inStock: true }
63
+ }
64
+ ]
65
+ }
66
+ }
67
+ }
68
+ };
69
+
70
+ const http = makeHttp(() => mockResponse);
71
+ const result = await scrape(http, "https://example.com", { formats: ["markdown", "product"] });
72
+
73
+ expect(result.markdown).toBe("# Example Content");
74
+ expect(result.product).toBeDefined();
75
+ expect(result.product?.title).toBe("Acme Mug");
76
+ expect(result.product?.variants?.[0]?.price?.amount).toBe(12.5);
77
+ });
78
+
79
+ test("scrape without product format does not return product", async () => {
80
+ const mockResponse = {
81
+ status: 200,
82
+ data: {
83
+ success: true,
84
+ data: {
85
+ markdown: "# Example"
86
+ }
87
+ }
88
+ };
89
+
90
+ const http = makeHttp(() => mockResponse);
91
+ const result = await scrape(http, "https://example.com", { formats: ["markdown"] });
92
+
93
+ expect(result.markdown).toBe("# Example");
94
+ expect(result.product).toBeUndefined();
95
+ });
96
+
97
+ test("non-product page scraped with product format yields a warning and no product", async () => {
98
+ const mockResponse = {
99
+ status: 200,
100
+ data: {
101
+ success: true,
102
+ data: {
103
+ markdown: "# Blog Post",
104
+ warning: "No product found on this page."
105
+ }
106
+ }
107
+ };
108
+
109
+ const http = makeHttp(() => mockResponse);
110
+ const result = await scrape(http, "https://example.com", { formats: ["product"] });
111
+
112
+ expect(result.product).toBeUndefined();
113
+ expect(result.warning).toContain("No product found");
114
+ });
115
+
116
+ test("product format with variants populated", async () => {
117
+ const mockResponse = {
118
+ status: 200,
119
+ data: {
120
+ success: true,
121
+ data: {
122
+ product: {
123
+ title: "Acme T-Shirt",
124
+ brand: "Acme",
125
+ url: "https://example.com/tshirt",
126
+ variants: [
127
+ {
128
+ id: "v1",
129
+ sku: "TSHIRT-S-RED",
130
+ title: "Small / Red",
131
+ values: { size: "S", color: "Red" },
132
+ price: { amount: 19.0, currency: "USD" },
133
+ sale: { originalPrice: { amount: 24.0, currency: "USD" } },
134
+ availability: { inStock: true },
135
+ images: [{ url: "https://example.com/tshirt-red.jpg" }]
136
+ },
137
+ {
138
+ id: "v2",
139
+ sku: "TSHIRT-L-BLUE",
140
+ title: "Large / Blue",
141
+ values: { size: "L", color: "Blue" },
142
+ availability: { inStock: false, text: "Sold out" }
143
+ }
144
+ ]
145
+ }
146
+ }
147
+ }
148
+ };
149
+
150
+ const http = makeHttp(() => mockResponse);
151
+ const result = await scrape(http, "https://example.com", { formats: ["product"] });
152
+
153
+ expect(result.product).toBeDefined();
154
+ expect(result.product?.variants).toHaveLength(2);
155
+ expect(result.product?.variants?.[0]?.sku).toBe("TSHIRT-S-RED");
156
+ expect(result.product?.variants?.[0]?.values?.color).toBe("Red");
157
+ expect(result.product?.variants?.[0]?.images?.[0]?.url).toBe("https://example.com/tshirt-red.jpg");
158
+ expect(result.product?.variants?.[0]?.sale?.originalPrice?.amount).toBe(24.0);
159
+ expect(result.product?.variants?.[1]?.availability?.inStock).toBe(false);
160
+ expect(result.product?.variants?.[1]?.availability?.text).toBe("Sold out");
161
+ });
162
+ });
package/src/v2/types.ts CHANGED
@@ -13,6 +13,7 @@ export type FormatString =
13
13
  | "json"
14
14
  | "attributes"
15
15
  | "branding"
16
+ | "product"
16
17
  | "audio"
17
18
  | "video";
18
19
 
@@ -427,6 +428,46 @@ export interface BrandingProfile {
427
428
  [key: string]: unknown;
428
429
  }
429
430
 
431
+ export interface ProductPrice {
432
+ amount: number;
433
+ currency?: string;
434
+ formatted?: string;
435
+ }
436
+
437
+ export interface ProductAvailability {
438
+ inStock: boolean;
439
+ text?: string;
440
+ }
441
+
442
+ export interface ProductImage {
443
+ url: string;
444
+ alt?: string;
445
+ }
446
+
447
+ export interface ProductSale {
448
+ originalPrice: ProductPrice;
449
+ }
450
+
451
+ export interface ProductVariant {
452
+ id?: string;
453
+ sku?: string;
454
+ title?: string;
455
+ values?: Record<string, unknown>;
456
+ price?: ProductPrice;
457
+ sale?: ProductSale;
458
+ availability: ProductAvailability;
459
+ images?: ProductImage[];
460
+ }
461
+
462
+ export interface ProductProfile {
463
+ title: string;
464
+ brand?: string;
465
+ category?: string;
466
+ url: string;
467
+ description?: string;
468
+ variants: ProductVariant[];
469
+ }
470
+
430
471
  export interface DocumentMetadata {
431
472
  // Common metadata fields
432
473
  title?: string;
@@ -509,6 +550,7 @@ export interface Document {
509
550
  warning?: string;
510
551
  changeTracking?: Record<string, unknown>;
511
552
  branding?: BrandingProfile;
553
+ product?: ProductProfile;
512
554
  }
513
555
 
514
556
  // Pagination configuration for auto-fetching pages from v2 endpoints that return a `next` URL