firecrawl 4.27.0 → 4.28.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -0
- package/dist/{chunk-TVFWAIIZ.js → chunk-626CN3G5.js} +1 -1
- package/dist/index.cjs +1 -1
- package/dist/index.d.cts +38 -2
- package/dist/index.d.ts +38 -2
- package/dist/index.js +2 -2
- package/dist/{package-JE2ZVGM4.js → package-FMOR3ELU.js} +1 -1
- package/package.json +1 -1
- package/src/__tests__/unit/v2/product.test.ts +162 -0
- package/src/__tests__/unit/v2/scrape-browser.unit.test.ts +2 -0
- package/src/v2/types.ts +43 -0
package/README.md
CHANGED
|
@@ -58,6 +58,18 @@ const doc = await app.scrape('https://www.youtube.com/watch?v=dQw4w9WgXcQ', {
|
|
|
58
58
|
console.log(doc.video);
|
|
59
59
|
```
|
|
60
60
|
|
|
61
|
+
### Product extraction
|
|
62
|
+
|
|
63
|
+
Use the `product` format to deterministically pull a product (title, price, availability, variants) from product pages — the deterministic counterpart to the LLM-based `json` format.
|
|
64
|
+
|
|
65
|
+
```js
|
|
66
|
+
const doc = await app.scrape('https://example.com/product/123', {
|
|
67
|
+
formats: ['product'],
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
console.log(doc.product);
|
|
71
|
+
```
|
|
72
|
+
|
|
61
73
|
### Parsing uploaded files
|
|
62
74
|
|
|
63
75
|
Use `parse` to upload a file (`html`, `pdf`, `docx`, etc.) as multipart form data and process it through the same parsing pipeline.
|
|
@@ -12,7 +12,7 @@ var require_package = __commonJS({
|
|
|
12
12
|
"package.json"(exports, module) {
|
|
13
13
|
module.exports = {
|
|
14
14
|
name: "@mendable/firecrawl-js",
|
|
15
|
-
version: "4.
|
|
15
|
+
version: "4.28.1",
|
|
16
16
|
description: "JavaScript SDK for Firecrawl API",
|
|
17
17
|
main: "dist/index.js",
|
|
18
18
|
types: "dist/index.d.ts",
|
package/dist/index.cjs
CHANGED
|
@@ -39,7 +39,7 @@ var require_package = __commonJS({
|
|
|
39
39
|
"package.json"(exports2, module2) {
|
|
40
40
|
module2.exports = {
|
|
41
41
|
name: "@mendable/firecrawl-js",
|
|
42
|
-
version: "4.
|
|
42
|
+
version: "4.28.1",
|
|
43
43
|
description: "JavaScript SDK for Firecrawl API",
|
|
44
44
|
main: "dist/index.js",
|
|
45
45
|
types: "dist/index.d.ts",
|
package/dist/index.d.cts
CHANGED
|
@@ -4,7 +4,7 @@ import { AxiosResponse, AxiosRequestHeaders } from 'axios';
|
|
|
4
4
|
import { EventEmitter } from 'events';
|
|
5
5
|
import { TypedEventTarget } from 'typescript-event-target';
|
|
6
6
|
|
|
7
|
-
type FormatString = "markdown" | "html" | "rawHtml" | "links" | "images" | "screenshot" | "summary" | "changeTracking" | "json" | "attributes" | "branding" | "audio" | "video";
|
|
7
|
+
type FormatString = "markdown" | "html" | "rawHtml" | "links" | "images" | "screenshot" | "summary" | "changeTracking" | "json" | "attributes" | "branding" | "product" | "audio" | "video";
|
|
8
8
|
interface Viewport {
|
|
9
9
|
width: number;
|
|
10
10
|
height: number;
|
|
@@ -312,6 +312,40 @@ interface BrandingProfile {
|
|
|
312
312
|
};
|
|
313
313
|
[key: string]: unknown;
|
|
314
314
|
}
|
|
315
|
+
interface ProductPrice {
|
|
316
|
+
amount: number;
|
|
317
|
+
currency?: string;
|
|
318
|
+
formatted?: string;
|
|
319
|
+
}
|
|
320
|
+
interface ProductAvailability {
|
|
321
|
+
inStock: boolean;
|
|
322
|
+
text?: string;
|
|
323
|
+
}
|
|
324
|
+
interface ProductImage {
|
|
325
|
+
url: string;
|
|
326
|
+
alt?: string;
|
|
327
|
+
}
|
|
328
|
+
interface ProductSale {
|
|
329
|
+
originalPrice: ProductPrice;
|
|
330
|
+
}
|
|
331
|
+
interface ProductVariant {
|
|
332
|
+
id?: string;
|
|
333
|
+
sku?: string;
|
|
334
|
+
title?: string;
|
|
335
|
+
values?: Record<string, unknown>;
|
|
336
|
+
price?: ProductPrice;
|
|
337
|
+
sale?: ProductSale;
|
|
338
|
+
availability: ProductAvailability;
|
|
339
|
+
images?: ProductImage[];
|
|
340
|
+
}
|
|
341
|
+
interface ProductProfile {
|
|
342
|
+
title: string;
|
|
343
|
+
brand?: string;
|
|
344
|
+
category?: string;
|
|
345
|
+
url: string;
|
|
346
|
+
description?: string;
|
|
347
|
+
variants: ProductVariant[];
|
|
348
|
+
}
|
|
315
349
|
interface DocumentMetadata {
|
|
316
350
|
title?: string;
|
|
317
351
|
description?: string;
|
|
@@ -382,6 +416,7 @@ interface Document {
|
|
|
382
416
|
warning?: string;
|
|
383
417
|
changeTracking?: Record<string, unknown>;
|
|
384
418
|
branding?: BrandingProfile;
|
|
419
|
+
product?: ProductProfile;
|
|
385
420
|
}
|
|
386
421
|
interface PaginationConfig {
|
|
387
422
|
/** When true (default), automatically follow `next` links and aggregate all documents. */
|
|
@@ -889,6 +924,7 @@ interface BrowserCreateResponse {
|
|
|
889
924
|
}
|
|
890
925
|
interface BrowserExecuteResponse {
|
|
891
926
|
success: boolean;
|
|
927
|
+
cdpUrl?: string;
|
|
892
928
|
liveViewUrl?: string;
|
|
893
929
|
interactiveLiveViewUrl?: string;
|
|
894
930
|
output?: string;
|
|
@@ -2502,4 +2538,4 @@ declare class Firecrawl extends FirecrawlClient {
|
|
|
2502
2538
|
get v1(): FirecrawlApp;
|
|
2503
2539
|
}
|
|
2504
2540
|
|
|
2505
|
-
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreateMonitorRequest, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type EndpointFeedbackEndpoint, type EndpointFeedbackRequest, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, type FeedbackMissingContent, type FeedbackRating, type FeedbackResponse, type FeedbackValuableSource, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientInput, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type GetMonitorCheckOptions, type GetPaperOptions, type GitHubScoreBreakdown, type GitHubSearchItem, type GitHubSearchResponse, type HighlightsFormat, type IdMap, JobTimeoutError, type JsonFormat, type ListMonitorChecksOptions, type ListMonitorsOptions, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type Monitor, type MonitorCheck, type MonitorCheckDetail, type MonitorCheckPage, type MonitorCrawlTarget, type MonitorEmailNotification, type MonitorEmailRecipientSubscription, type MonitorJsonFieldDiff, type MonitorNotification, type MonitorPageDiff, type MonitorPageJudgment, type MonitorPageSnapshot, type MonitorSchedule, type MonitorScrapeTarget, type MonitorSummary, type MonitorTarget, type MonitorWebhookConfig, type PDFAction, type PaginationConfig, type PaperMetadata, type PaperMetadataResponse, type PaperResult, type PaperSignals, type ParseFile, type ParseFileData, type ParseFormat, type ParseFormatOption, type ParseFormatString, type ParseOptions, type Passage, type PressAction, type QueryFormat, type QuestionFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ReadPaperResponse, type RedactPIIEntity, type RedactPIIOptions, ResearchClient, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchFeedbackRequest, type SearchGithubOptions, type SearchPapersOptions, type SearchPapersResponse, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type SimilarPapersOptions, type SimilarPapersResponse, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type UpdateMonitorRequest, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
|
|
2541
|
+
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreateMonitorRequest, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type EndpointFeedbackEndpoint, type EndpointFeedbackRequest, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, type FeedbackMissingContent, type FeedbackRating, type FeedbackResponse, type FeedbackValuableSource, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientInput, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type GetMonitorCheckOptions, type GetPaperOptions, type GitHubScoreBreakdown, type GitHubSearchItem, type GitHubSearchResponse, type HighlightsFormat, type IdMap, JobTimeoutError, type JsonFormat, type ListMonitorChecksOptions, type ListMonitorsOptions, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type Monitor, type MonitorCheck, type MonitorCheckDetail, type MonitorCheckPage, type MonitorCrawlTarget, type MonitorEmailNotification, type MonitorEmailRecipientSubscription, type MonitorJsonFieldDiff, type MonitorNotification, type MonitorPageDiff, type MonitorPageJudgment, type MonitorPageSnapshot, type MonitorSchedule, type MonitorScrapeTarget, type MonitorSummary, type MonitorTarget, type MonitorWebhookConfig, type PDFAction, type PaginationConfig, type PaperMetadata, type PaperMetadataResponse, type PaperResult, type PaperSignals, type ParseFile, type ParseFileData, type ParseFormat, type ParseFormatOption, type ParseFormatString, type ParseOptions, type Passage, type PressAction, type ProductAvailability, type ProductImage, type ProductPrice, type ProductProfile, type ProductSale, type ProductVariant, type QueryFormat, type QuestionFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ReadPaperResponse, type RedactPIIEntity, type RedactPIIOptions, ResearchClient, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchFeedbackRequest, type SearchGithubOptions, type SearchPapersOptions, type SearchPapersResponse, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type SimilarPapersOptions, type SimilarPapersResponse, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type UpdateMonitorRequest, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
|
package/dist/index.d.ts
CHANGED
|
@@ -4,7 +4,7 @@ import { AxiosResponse, AxiosRequestHeaders } from 'axios';
|
|
|
4
4
|
import { EventEmitter } from 'events';
|
|
5
5
|
import { TypedEventTarget } from 'typescript-event-target';
|
|
6
6
|
|
|
7
|
-
type FormatString = "markdown" | "html" | "rawHtml" | "links" | "images" | "screenshot" | "summary" | "changeTracking" | "json" | "attributes" | "branding" | "audio" | "video";
|
|
7
|
+
type FormatString = "markdown" | "html" | "rawHtml" | "links" | "images" | "screenshot" | "summary" | "changeTracking" | "json" | "attributes" | "branding" | "product" | "audio" | "video";
|
|
8
8
|
interface Viewport {
|
|
9
9
|
width: number;
|
|
10
10
|
height: number;
|
|
@@ -312,6 +312,40 @@ interface BrandingProfile {
|
|
|
312
312
|
};
|
|
313
313
|
[key: string]: unknown;
|
|
314
314
|
}
|
|
315
|
+
interface ProductPrice {
|
|
316
|
+
amount: number;
|
|
317
|
+
currency?: string;
|
|
318
|
+
formatted?: string;
|
|
319
|
+
}
|
|
320
|
+
interface ProductAvailability {
|
|
321
|
+
inStock: boolean;
|
|
322
|
+
text?: string;
|
|
323
|
+
}
|
|
324
|
+
interface ProductImage {
|
|
325
|
+
url: string;
|
|
326
|
+
alt?: string;
|
|
327
|
+
}
|
|
328
|
+
interface ProductSale {
|
|
329
|
+
originalPrice: ProductPrice;
|
|
330
|
+
}
|
|
331
|
+
interface ProductVariant {
|
|
332
|
+
id?: string;
|
|
333
|
+
sku?: string;
|
|
334
|
+
title?: string;
|
|
335
|
+
values?: Record<string, unknown>;
|
|
336
|
+
price?: ProductPrice;
|
|
337
|
+
sale?: ProductSale;
|
|
338
|
+
availability: ProductAvailability;
|
|
339
|
+
images?: ProductImage[];
|
|
340
|
+
}
|
|
341
|
+
interface ProductProfile {
|
|
342
|
+
title: string;
|
|
343
|
+
brand?: string;
|
|
344
|
+
category?: string;
|
|
345
|
+
url: string;
|
|
346
|
+
description?: string;
|
|
347
|
+
variants: ProductVariant[];
|
|
348
|
+
}
|
|
315
349
|
interface DocumentMetadata {
|
|
316
350
|
title?: string;
|
|
317
351
|
description?: string;
|
|
@@ -382,6 +416,7 @@ interface Document {
|
|
|
382
416
|
warning?: string;
|
|
383
417
|
changeTracking?: Record<string, unknown>;
|
|
384
418
|
branding?: BrandingProfile;
|
|
419
|
+
product?: ProductProfile;
|
|
385
420
|
}
|
|
386
421
|
interface PaginationConfig {
|
|
387
422
|
/** When true (default), automatically follow `next` links and aggregate all documents. */
|
|
@@ -889,6 +924,7 @@ interface BrowserCreateResponse {
|
|
|
889
924
|
}
|
|
890
925
|
interface BrowserExecuteResponse {
|
|
891
926
|
success: boolean;
|
|
927
|
+
cdpUrl?: string;
|
|
892
928
|
liveViewUrl?: string;
|
|
893
929
|
interactiveLiveViewUrl?: string;
|
|
894
930
|
output?: string;
|
|
@@ -2502,4 +2538,4 @@ declare class Firecrawl extends FirecrawlClient {
|
|
|
2502
2538
|
get v1(): FirecrawlApp;
|
|
2503
2539
|
}
|
|
2504
2540
|
|
|
2505
|
-
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreateMonitorRequest, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type EndpointFeedbackEndpoint, type EndpointFeedbackRequest, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, type FeedbackMissingContent, type FeedbackRating, type FeedbackResponse, type FeedbackValuableSource, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientInput, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type GetMonitorCheckOptions, type GetPaperOptions, type GitHubScoreBreakdown, type GitHubSearchItem, type GitHubSearchResponse, type HighlightsFormat, type IdMap, JobTimeoutError, type JsonFormat, type ListMonitorChecksOptions, type ListMonitorsOptions, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type Monitor, type MonitorCheck, type MonitorCheckDetail, type MonitorCheckPage, type MonitorCrawlTarget, type MonitorEmailNotification, type MonitorEmailRecipientSubscription, type MonitorJsonFieldDiff, type MonitorNotification, type MonitorPageDiff, type MonitorPageJudgment, type MonitorPageSnapshot, type MonitorSchedule, type MonitorScrapeTarget, type MonitorSummary, type MonitorTarget, type MonitorWebhookConfig, type PDFAction, type PaginationConfig, type PaperMetadata, type PaperMetadataResponse, type PaperResult, type PaperSignals, type ParseFile, type ParseFileData, type ParseFormat, type ParseFormatOption, type ParseFormatString, type ParseOptions, type Passage, type PressAction, type QueryFormat, type QuestionFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ReadPaperResponse, type RedactPIIEntity, type RedactPIIOptions, ResearchClient, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchFeedbackRequest, type SearchGithubOptions, type SearchPapersOptions, type SearchPapersResponse, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type SimilarPapersOptions, type SimilarPapersResponse, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type UpdateMonitorRequest, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
|
|
2541
|
+
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreateMonitorRequest, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type EndpointFeedbackEndpoint, type EndpointFeedbackRequest, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, type FeedbackMissingContent, type FeedbackRating, type FeedbackResponse, type FeedbackValuableSource, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientInput, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type GetMonitorCheckOptions, type GetPaperOptions, type GitHubScoreBreakdown, type GitHubSearchItem, type GitHubSearchResponse, type HighlightsFormat, type IdMap, JobTimeoutError, type JsonFormat, type ListMonitorChecksOptions, type ListMonitorsOptions, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type Monitor, type MonitorCheck, type MonitorCheckDetail, type MonitorCheckPage, type MonitorCrawlTarget, type MonitorEmailNotification, type MonitorEmailRecipientSubscription, type MonitorJsonFieldDiff, type MonitorNotification, type MonitorPageDiff, type MonitorPageJudgment, type MonitorPageSnapshot, type MonitorSchedule, type MonitorScrapeTarget, type MonitorSummary, type MonitorTarget, type MonitorWebhookConfig, type PDFAction, type PaginationConfig, type PaperMetadata, type PaperMetadataResponse, type PaperResult, type PaperSignals, type ParseFile, type ParseFileData, type ParseFormat, type ParseFormatOption, type ParseFormatString, type ParseOptions, type Passage, type PressAction, type ProductAvailability, type ProductImage, type ProductPrice, type ProductProfile, type ProductSale, type ProductVariant, type QueryFormat, type QuestionFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ReadPaperResponse, type RedactPIIEntity, type RedactPIIOptions, ResearchClient, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchFeedbackRequest, type SearchGithubOptions, type SearchPapersOptions, type SearchPapersResponse, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type SimilarPapersOptions, type SimilarPapersResponse, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type UpdateMonitorRequest, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
require_package
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-626CN3G5.js";
|
|
4
4
|
|
|
5
5
|
// src/v2/utils/httpClient.ts
|
|
6
6
|
import axios from "axios";
|
|
@@ -2300,7 +2300,7 @@ var FirecrawlApp = class {
|
|
|
2300
2300
|
if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
|
|
2301
2301
|
return process.env.npm_package_version;
|
|
2302
2302
|
}
|
|
2303
|
-
const packageJson = await import("./package-
|
|
2303
|
+
const packageJson = await import("./package-FMOR3ELU.js");
|
|
2304
2304
|
return packageJson.default.version;
|
|
2305
2305
|
} catch (error) {
|
|
2306
2306
|
const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);
|
package/package.json
CHANGED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
import { describe, test, expect, jest } from "@jest/globals";
|
|
2
|
+
import { scrape } from "../../../v2/methods/scrape";
|
|
3
|
+
|
|
4
|
+
describe("JS SDK v2 product format", () => {
|
|
5
|
+
function makeHttp(postImpl: (url: string, data: any) => any) {
|
|
6
|
+
return { post: jest.fn(async (u: string, d: any) => postImpl(u, d)) } as any;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
test("scrape with product format returns product data", async () => {
|
|
10
|
+
const mockResponse = {
|
|
11
|
+
status: 200,
|
|
12
|
+
data: {
|
|
13
|
+
success: true,
|
|
14
|
+
data: {
|
|
15
|
+
markdown: "# Example Product",
|
|
16
|
+
product: {
|
|
17
|
+
title: "Acme Running Shoe",
|
|
18
|
+
brand: "Acme",
|
|
19
|
+
category: "Footwear",
|
|
20
|
+
url: "https://example.com/shoe",
|
|
21
|
+
description: "A lightweight running shoe.",
|
|
22
|
+
variants: [
|
|
23
|
+
{
|
|
24
|
+
id: "default",
|
|
25
|
+
images: [{ url: "https://example.com/shoe.jpg", alt: "Acme shoe" }],
|
|
26
|
+
price: { amount: 89.99, currency: "USD", formatted: "$89.99" },
|
|
27
|
+
sale: { originalPrice: { amount: 129.99, currency: "USD", formatted: "$129.99" } },
|
|
28
|
+
availability: { inStock: true, text: "In stock" }
|
|
29
|
+
}
|
|
30
|
+
]
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
const http = makeHttp(() => mockResponse);
|
|
37
|
+
const result = await scrape(http, "https://example.com", { formats: ["product"] });
|
|
38
|
+
|
|
39
|
+
expect(result.product).toBeDefined();
|
|
40
|
+
expect(result.product?.title).toBe("Acme Running Shoe");
|
|
41
|
+
expect(result.product?.brand).toBe("Acme");
|
|
42
|
+
expect(result.product?.variants?.[0]?.price?.amount).toBe(89.99);
|
|
43
|
+
expect(result.product?.variants?.[0]?.price?.currency).toBe("USD");
|
|
44
|
+
expect(result.product?.variants?.[0]?.sale?.originalPrice?.amount).toBe(129.99);
|
|
45
|
+
expect(result.product?.variants?.[0]?.availability?.inStock).toBe(true);
|
|
46
|
+
expect(result.product?.variants?.[0]?.images?.[0]?.url).toBe("https://example.com/shoe.jpg");
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
test("scrape with product and markdown formats returns both", async () => {
|
|
50
|
+
const mockResponse = {
|
|
51
|
+
status: 200,
|
|
52
|
+
data: {
|
|
53
|
+
success: true,
|
|
54
|
+
data: {
|
|
55
|
+
markdown: "# Example Content",
|
|
56
|
+
product: {
|
|
57
|
+
title: "Acme Mug",
|
|
58
|
+
url: "https://example.com/mug",
|
|
59
|
+
variants: [
|
|
60
|
+
{
|
|
61
|
+
price: { amount: 12.5, currency: "USD" },
|
|
62
|
+
availability: { inStock: true }
|
|
63
|
+
}
|
|
64
|
+
]
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
const http = makeHttp(() => mockResponse);
|
|
71
|
+
const result = await scrape(http, "https://example.com", { formats: ["markdown", "product"] });
|
|
72
|
+
|
|
73
|
+
expect(result.markdown).toBe("# Example Content");
|
|
74
|
+
expect(result.product).toBeDefined();
|
|
75
|
+
expect(result.product?.title).toBe("Acme Mug");
|
|
76
|
+
expect(result.product?.variants?.[0]?.price?.amount).toBe(12.5);
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
test("scrape without product format does not return product", async () => {
|
|
80
|
+
const mockResponse = {
|
|
81
|
+
status: 200,
|
|
82
|
+
data: {
|
|
83
|
+
success: true,
|
|
84
|
+
data: {
|
|
85
|
+
markdown: "# Example"
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
const http = makeHttp(() => mockResponse);
|
|
91
|
+
const result = await scrape(http, "https://example.com", { formats: ["markdown"] });
|
|
92
|
+
|
|
93
|
+
expect(result.markdown).toBe("# Example");
|
|
94
|
+
expect(result.product).toBeUndefined();
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
test("non-product page scraped with product format yields a warning and no product", async () => {
|
|
98
|
+
const mockResponse = {
|
|
99
|
+
status: 200,
|
|
100
|
+
data: {
|
|
101
|
+
success: true,
|
|
102
|
+
data: {
|
|
103
|
+
markdown: "# Blog Post",
|
|
104
|
+
warning: "No product found on this page."
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
const http = makeHttp(() => mockResponse);
|
|
110
|
+
const result = await scrape(http, "https://example.com", { formats: ["product"] });
|
|
111
|
+
|
|
112
|
+
expect(result.product).toBeUndefined();
|
|
113
|
+
expect(result.warning).toContain("No product found");
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
test("product format with variants populated", async () => {
|
|
117
|
+
const mockResponse = {
|
|
118
|
+
status: 200,
|
|
119
|
+
data: {
|
|
120
|
+
success: true,
|
|
121
|
+
data: {
|
|
122
|
+
product: {
|
|
123
|
+
title: "Acme T-Shirt",
|
|
124
|
+
brand: "Acme",
|
|
125
|
+
url: "https://example.com/tshirt",
|
|
126
|
+
variants: [
|
|
127
|
+
{
|
|
128
|
+
id: "v1",
|
|
129
|
+
sku: "TSHIRT-S-RED",
|
|
130
|
+
title: "Small / Red",
|
|
131
|
+
values: { size: "S", color: "Red" },
|
|
132
|
+
price: { amount: 19.0, currency: "USD" },
|
|
133
|
+
sale: { originalPrice: { amount: 24.0, currency: "USD" } },
|
|
134
|
+
availability: { inStock: true },
|
|
135
|
+
images: [{ url: "https://example.com/tshirt-red.jpg" }]
|
|
136
|
+
},
|
|
137
|
+
{
|
|
138
|
+
id: "v2",
|
|
139
|
+
sku: "TSHIRT-L-BLUE",
|
|
140
|
+
title: "Large / Blue",
|
|
141
|
+
values: { size: "L", color: "Blue" },
|
|
142
|
+
availability: { inStock: false, text: "Sold out" }
|
|
143
|
+
}
|
|
144
|
+
]
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
const http = makeHttp(() => mockResponse);
|
|
151
|
+
const result = await scrape(http, "https://example.com", { formats: ["product"] });
|
|
152
|
+
|
|
153
|
+
expect(result.product).toBeDefined();
|
|
154
|
+
expect(result.product?.variants).toHaveLength(2);
|
|
155
|
+
expect(result.product?.variants?.[0]?.sku).toBe("TSHIRT-S-RED");
|
|
156
|
+
expect(result.product?.variants?.[0]?.values?.color).toBe("Red");
|
|
157
|
+
expect(result.product?.variants?.[0]?.images?.[0]?.url).toBe("https://example.com/tshirt-red.jpg");
|
|
158
|
+
expect(result.product?.variants?.[0]?.sale?.originalPrice?.amount).toBe(24.0);
|
|
159
|
+
expect(result.product?.variants?.[1]?.availability?.inStock).toBe(false);
|
|
160
|
+
expect(result.product?.variants?.[1]?.availability?.text).toBe("Sold out");
|
|
161
|
+
});
|
|
162
|
+
});
|
|
@@ -33,6 +33,7 @@ describe("JS SDK v2 scrape-browser methods", () => {
|
|
|
33
33
|
data: {
|
|
34
34
|
success: true,
|
|
35
35
|
output: "Clicked the button",
|
|
36
|
+
cdpUrl: "wss://browser.example.com/cdp",
|
|
36
37
|
liveViewUrl: "https://live.example.com/view",
|
|
37
38
|
interactiveLiveViewUrl: "https://live.example.com/interactive",
|
|
38
39
|
stdout: "",
|
|
@@ -52,6 +53,7 @@ describe("JS SDK v2 scrape-browser methods", () => {
|
|
|
52
53
|
);
|
|
53
54
|
expect(response.success).toBe(true);
|
|
54
55
|
expect(response.output).toBe("Clicked the button");
|
|
56
|
+
expect(response.cdpUrl).toBe("wss://browser.example.com/cdp");
|
|
55
57
|
expect(response.liveViewUrl).toBe("https://live.example.com/view");
|
|
56
58
|
expect(response.interactiveLiveViewUrl).toBe(
|
|
57
59
|
"https://live.example.com/interactive",
|
package/src/v2/types.ts
CHANGED
|
@@ -13,6 +13,7 @@ export type FormatString =
|
|
|
13
13
|
| "json"
|
|
14
14
|
| "attributes"
|
|
15
15
|
| "branding"
|
|
16
|
+
| "product"
|
|
16
17
|
| "audio"
|
|
17
18
|
| "video";
|
|
18
19
|
|
|
@@ -427,6 +428,46 @@ export interface BrandingProfile {
|
|
|
427
428
|
[key: string]: unknown;
|
|
428
429
|
}
|
|
429
430
|
|
|
431
|
+
export interface ProductPrice {
|
|
432
|
+
amount: number;
|
|
433
|
+
currency?: string;
|
|
434
|
+
formatted?: string;
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
export interface ProductAvailability {
|
|
438
|
+
inStock: boolean;
|
|
439
|
+
text?: string;
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
export interface ProductImage {
|
|
443
|
+
url: string;
|
|
444
|
+
alt?: string;
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
export interface ProductSale {
|
|
448
|
+
originalPrice: ProductPrice;
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
export interface ProductVariant {
|
|
452
|
+
id?: string;
|
|
453
|
+
sku?: string;
|
|
454
|
+
title?: string;
|
|
455
|
+
values?: Record<string, unknown>;
|
|
456
|
+
price?: ProductPrice;
|
|
457
|
+
sale?: ProductSale;
|
|
458
|
+
availability: ProductAvailability;
|
|
459
|
+
images?: ProductImage[];
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
export interface ProductProfile {
|
|
463
|
+
title: string;
|
|
464
|
+
brand?: string;
|
|
465
|
+
category?: string;
|
|
466
|
+
url: string;
|
|
467
|
+
description?: string;
|
|
468
|
+
variants: ProductVariant[];
|
|
469
|
+
}
|
|
470
|
+
|
|
430
471
|
export interface DocumentMetadata {
|
|
431
472
|
// Common metadata fields
|
|
432
473
|
title?: string;
|
|
@@ -509,6 +550,7 @@ export interface Document {
|
|
|
509
550
|
warning?: string;
|
|
510
551
|
changeTracking?: Record<string, unknown>;
|
|
511
552
|
branding?: BrandingProfile;
|
|
553
|
+
product?: ProductProfile;
|
|
512
554
|
}
|
|
513
555
|
|
|
514
556
|
// Pagination configuration for auto-fetching pages from v2 endpoints that return a `next` URL
|
|
@@ -1115,6 +1157,7 @@ export interface BrowserCreateResponse {
|
|
|
1115
1157
|
|
|
1116
1158
|
export interface BrowserExecuteResponse {
|
|
1117
1159
|
success: boolean;
|
|
1160
|
+
cdpUrl?: string;
|
|
1118
1161
|
liveViewUrl?: string;
|
|
1119
1162
|
interactiveLiveViewUrl?: string;
|
|
1120
1163
|
output?: string;
|