firecrawl 4.28.1 → 4.28.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -0
- package/dist/{chunk-626CN3G5.js → chunk-XZBMU524.js} +1 -1
- package/dist/index.cjs +1 -1
- package/dist/index.d.cts +52 -2
- package/dist/index.d.ts +52 -2
- package/dist/index.js +2 -2
- package/dist/{package-FMOR3ELU.js → package-7QIHFAN4.js} +1 -1
- package/package.json +1 -1
- package/src/__tests__/unit/v2/menu.test.ts +213 -0
- package/src/v2/types.ts +59 -0
package/README.md
CHANGED
|
@@ -70,6 +70,18 @@ const doc = await app.scrape('https://example.com/product/123', {
|
|
|
70
70
|
console.log(doc.product);
|
|
71
71
|
```
|
|
72
72
|
|
|
73
|
+
### Menu extraction
|
|
74
|
+
|
|
75
|
+
Use the `menu` format to deterministically pull a merchant's menu (sections, items, prices, availability) from menu pages — the deterministic counterpart to the LLM-based `json` format.
|
|
76
|
+
|
|
77
|
+
```js
|
|
78
|
+
const doc = await app.scrape('https://example.com/restaurant/menu', {
|
|
79
|
+
formats: ['menu'],
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
console.log(doc.menu);
|
|
83
|
+
```
|
|
84
|
+
|
|
73
85
|
### Parsing uploaded files
|
|
74
86
|
|
|
75
87
|
Use `parse` to upload a file (`html`, `pdf`, `docx`, etc.) as multipart form data and process it through the same parsing pipeline.
|
|
@@ -12,7 +12,7 @@ var require_package = __commonJS({
|
|
|
12
12
|
"package.json"(exports, module) {
|
|
13
13
|
module.exports = {
|
|
14
14
|
name: "@mendable/firecrawl-js",
|
|
15
|
-
version: "4.28.
|
|
15
|
+
version: "4.28.2",
|
|
16
16
|
description: "JavaScript SDK for Firecrawl API",
|
|
17
17
|
main: "dist/index.js",
|
|
18
18
|
types: "dist/index.d.ts",
|
package/dist/index.cjs
CHANGED
|
@@ -39,7 +39,7 @@ var require_package = __commonJS({
|
|
|
39
39
|
"package.json"(exports2, module2) {
|
|
40
40
|
module2.exports = {
|
|
41
41
|
name: "@mendable/firecrawl-js",
|
|
42
|
-
version: "4.28.
|
|
42
|
+
version: "4.28.2",
|
|
43
43
|
description: "JavaScript SDK for Firecrawl API",
|
|
44
44
|
main: "dist/index.js",
|
|
45
45
|
types: "dist/index.d.ts",
|
package/dist/index.d.cts
CHANGED
|
@@ -4,7 +4,7 @@ import { AxiosResponse, AxiosRequestHeaders } from 'axios';
|
|
|
4
4
|
import { EventEmitter } from 'events';
|
|
5
5
|
import { TypedEventTarget } from 'typescript-event-target';
|
|
6
6
|
|
|
7
|
-
type FormatString = "markdown" | "html" | "rawHtml" | "links" | "images" | "screenshot" | "summary" | "changeTracking" | "json" | "attributes" | "branding" | "product" | "audio" | "video";
|
|
7
|
+
type FormatString = "markdown" | "html" | "rawHtml" | "links" | "images" | "screenshot" | "summary" | "changeTracking" | "json" | "attributes" | "branding" | "product" | "menu" | "audio" | "video";
|
|
8
8
|
interface Viewport {
|
|
9
9
|
width: number;
|
|
10
10
|
height: number;
|
|
@@ -346,6 +346,55 @@ interface ProductProfile {
|
|
|
346
346
|
description?: string;
|
|
347
347
|
variants: ProductVariant[];
|
|
348
348
|
}
|
|
349
|
+
interface MenuPrice {
|
|
350
|
+
amount: number;
|
|
351
|
+
currency?: string;
|
|
352
|
+
formatted?: string;
|
|
353
|
+
}
|
|
354
|
+
interface MenuAvailability {
|
|
355
|
+
inStock: boolean;
|
|
356
|
+
text?: string;
|
|
357
|
+
}
|
|
358
|
+
interface MenuImage {
|
|
359
|
+
url: string;
|
|
360
|
+
alt?: string;
|
|
361
|
+
}
|
|
362
|
+
interface MenuItemIdentifiers {
|
|
363
|
+
merchantItemId?: string;
|
|
364
|
+
}
|
|
365
|
+
interface MenuItem {
|
|
366
|
+
id: string;
|
|
367
|
+
name: string;
|
|
368
|
+
description?: string;
|
|
369
|
+
images: MenuImage[];
|
|
370
|
+
price?: MenuPrice;
|
|
371
|
+
availability: MenuAvailability;
|
|
372
|
+
dietary: string[];
|
|
373
|
+
calories?: number;
|
|
374
|
+
optionGroups: unknown[];
|
|
375
|
+
identifiers: MenuItemIdentifiers;
|
|
376
|
+
url?: string;
|
|
377
|
+
sourceUrl: string;
|
|
378
|
+
}
|
|
379
|
+
interface MenuSection {
|
|
380
|
+
id: string;
|
|
381
|
+
name: string;
|
|
382
|
+
description?: string;
|
|
383
|
+
items: MenuItem[];
|
|
384
|
+
}
|
|
385
|
+
interface MenuMerchant {
|
|
386
|
+
name: string;
|
|
387
|
+
type?: string | null;
|
|
388
|
+
location?: unknown;
|
|
389
|
+
}
|
|
390
|
+
interface MenuProfile {
|
|
391
|
+
isMenu: boolean;
|
|
392
|
+
confidence: number;
|
|
393
|
+
merchant: MenuMerchant;
|
|
394
|
+
currency?: string | null;
|
|
395
|
+
sections: MenuSection[];
|
|
396
|
+
sourceUrl: string;
|
|
397
|
+
}
|
|
349
398
|
interface DocumentMetadata {
|
|
350
399
|
title?: string;
|
|
351
400
|
description?: string;
|
|
@@ -417,6 +466,7 @@ interface Document {
|
|
|
417
466
|
changeTracking?: Record<string, unknown>;
|
|
418
467
|
branding?: BrandingProfile;
|
|
419
468
|
product?: ProductProfile;
|
|
469
|
+
menu?: MenuProfile;
|
|
420
470
|
}
|
|
421
471
|
interface PaginationConfig {
|
|
422
472
|
/** When true (default), automatically follow `next` links and aggregate all documents. */
|
|
@@ -2538,4 +2588,4 @@ declare class Firecrawl extends FirecrawlClient {
|
|
|
2538
2588
|
get v1(): FirecrawlApp;
|
|
2539
2589
|
}
|
|
2540
2590
|
|
|
2541
|
-
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreateMonitorRequest, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type EndpointFeedbackEndpoint, type EndpointFeedbackRequest, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, type FeedbackMissingContent, type FeedbackRating, type FeedbackResponse, type FeedbackValuableSource, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientInput, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type GetMonitorCheckOptions, type GetPaperOptions, type GitHubScoreBreakdown, type GitHubSearchItem, type GitHubSearchResponse, type HighlightsFormat, type IdMap, JobTimeoutError, type JsonFormat, type ListMonitorChecksOptions, type ListMonitorsOptions, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type Monitor, type MonitorCheck, type MonitorCheckDetail, type MonitorCheckPage, type MonitorCrawlTarget, type MonitorEmailNotification, type MonitorEmailRecipientSubscription, type MonitorJsonFieldDiff, type MonitorNotification, type MonitorPageDiff, type MonitorPageJudgment, type MonitorPageSnapshot, type MonitorSchedule, type MonitorScrapeTarget, type MonitorSummary, type MonitorTarget, type MonitorWebhookConfig, type PDFAction, type PaginationConfig, type PaperMetadata, type PaperMetadataResponse, type PaperResult, type PaperSignals, type ParseFile, type ParseFileData, type ParseFormat, type ParseFormatOption, type ParseFormatString, type ParseOptions, type Passage, type PressAction, type ProductAvailability, type ProductImage, type ProductPrice, type ProductProfile, type ProductSale, type ProductVariant, type QueryFormat, type QuestionFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ReadPaperResponse, type RedactPIIEntity, type RedactPIIOptions, ResearchClient, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchFeedbackRequest, type SearchGithubOptions, type SearchPapersOptions, type SearchPapersResponse, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type SimilarPapersOptions, type SimilarPapersResponse, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type UpdateMonitorRequest, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
|
|
2591
|
+
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreateMonitorRequest, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type EndpointFeedbackEndpoint, type EndpointFeedbackRequest, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, type FeedbackMissingContent, type FeedbackRating, type FeedbackResponse, type FeedbackValuableSource, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientInput, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type GetMonitorCheckOptions, type GetPaperOptions, type GitHubScoreBreakdown, type GitHubSearchItem, type GitHubSearchResponse, type HighlightsFormat, type IdMap, JobTimeoutError, type JsonFormat, type ListMonitorChecksOptions, type ListMonitorsOptions, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type MenuAvailability, type MenuImage, type MenuItem, type MenuItemIdentifiers, type MenuMerchant, type MenuPrice, type MenuProfile, type MenuSection, type Monitor, type MonitorCheck, type MonitorCheckDetail, type MonitorCheckPage, type MonitorCrawlTarget, type MonitorEmailNotification, type MonitorEmailRecipientSubscription, type MonitorJsonFieldDiff, type MonitorNotification, type MonitorPageDiff, type MonitorPageJudgment, type MonitorPageSnapshot, type MonitorSchedule, type MonitorScrapeTarget, type MonitorSummary, type MonitorTarget, type MonitorWebhookConfig, type PDFAction, type PaginationConfig, type PaperMetadata, type PaperMetadataResponse, type PaperResult, type PaperSignals, type ParseFile, type ParseFileData, type ParseFormat, type ParseFormatOption, type ParseFormatString, type ParseOptions, type Passage, type PressAction, type ProductAvailability, type ProductImage, type ProductPrice, type ProductProfile, type ProductSale, type ProductVariant, type QueryFormat, type QuestionFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ReadPaperResponse, type RedactPIIEntity, type RedactPIIOptions, ResearchClient, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchFeedbackRequest, type SearchGithubOptions, type SearchPapersOptions, type SearchPapersResponse, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type SimilarPapersOptions, type SimilarPapersResponse, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type UpdateMonitorRequest, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
|
package/dist/index.d.ts
CHANGED
|
@@ -4,7 +4,7 @@ import { AxiosResponse, AxiosRequestHeaders } from 'axios';
|
|
|
4
4
|
import { EventEmitter } from 'events';
|
|
5
5
|
import { TypedEventTarget } from 'typescript-event-target';
|
|
6
6
|
|
|
7
|
-
type FormatString = "markdown" | "html" | "rawHtml" | "links" | "images" | "screenshot" | "summary" | "changeTracking" | "json" | "attributes" | "branding" | "product" | "audio" | "video";
|
|
7
|
+
type FormatString = "markdown" | "html" | "rawHtml" | "links" | "images" | "screenshot" | "summary" | "changeTracking" | "json" | "attributes" | "branding" | "product" | "menu" | "audio" | "video";
|
|
8
8
|
interface Viewport {
|
|
9
9
|
width: number;
|
|
10
10
|
height: number;
|
|
@@ -346,6 +346,55 @@ interface ProductProfile {
|
|
|
346
346
|
description?: string;
|
|
347
347
|
variants: ProductVariant[];
|
|
348
348
|
}
|
|
349
|
+
interface MenuPrice {
|
|
350
|
+
amount: number;
|
|
351
|
+
currency?: string;
|
|
352
|
+
formatted?: string;
|
|
353
|
+
}
|
|
354
|
+
interface MenuAvailability {
|
|
355
|
+
inStock: boolean;
|
|
356
|
+
text?: string;
|
|
357
|
+
}
|
|
358
|
+
interface MenuImage {
|
|
359
|
+
url: string;
|
|
360
|
+
alt?: string;
|
|
361
|
+
}
|
|
362
|
+
interface MenuItemIdentifiers {
|
|
363
|
+
merchantItemId?: string;
|
|
364
|
+
}
|
|
365
|
+
interface MenuItem {
|
|
366
|
+
id: string;
|
|
367
|
+
name: string;
|
|
368
|
+
description?: string;
|
|
369
|
+
images: MenuImage[];
|
|
370
|
+
price?: MenuPrice;
|
|
371
|
+
availability: MenuAvailability;
|
|
372
|
+
dietary: string[];
|
|
373
|
+
calories?: number;
|
|
374
|
+
optionGroups: unknown[];
|
|
375
|
+
identifiers: MenuItemIdentifiers;
|
|
376
|
+
url?: string;
|
|
377
|
+
sourceUrl: string;
|
|
378
|
+
}
|
|
379
|
+
interface MenuSection {
|
|
380
|
+
id: string;
|
|
381
|
+
name: string;
|
|
382
|
+
description?: string;
|
|
383
|
+
items: MenuItem[];
|
|
384
|
+
}
|
|
385
|
+
interface MenuMerchant {
|
|
386
|
+
name: string;
|
|
387
|
+
type?: string | null;
|
|
388
|
+
location?: unknown;
|
|
389
|
+
}
|
|
390
|
+
interface MenuProfile {
|
|
391
|
+
isMenu: boolean;
|
|
392
|
+
confidence: number;
|
|
393
|
+
merchant: MenuMerchant;
|
|
394
|
+
currency?: string | null;
|
|
395
|
+
sections: MenuSection[];
|
|
396
|
+
sourceUrl: string;
|
|
397
|
+
}
|
|
349
398
|
interface DocumentMetadata {
|
|
350
399
|
title?: string;
|
|
351
400
|
description?: string;
|
|
@@ -417,6 +466,7 @@ interface Document {
|
|
|
417
466
|
changeTracking?: Record<string, unknown>;
|
|
418
467
|
branding?: BrandingProfile;
|
|
419
468
|
product?: ProductProfile;
|
|
469
|
+
menu?: MenuProfile;
|
|
420
470
|
}
|
|
421
471
|
interface PaginationConfig {
|
|
422
472
|
/** When true (default), automatically follow `next` links and aggregate all documents. */
|
|
@@ -2538,4 +2588,4 @@ declare class Firecrawl extends FirecrawlClient {
|
|
|
2538
2588
|
get v1(): FirecrawlApp;
|
|
2539
2589
|
}
|
|
2540
2590
|
|
|
2541
|
-
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreateMonitorRequest, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type EndpointFeedbackEndpoint, type EndpointFeedbackRequest, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, type FeedbackMissingContent, type FeedbackRating, type FeedbackResponse, type FeedbackValuableSource, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientInput, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type GetMonitorCheckOptions, type GetPaperOptions, type GitHubScoreBreakdown, type GitHubSearchItem, type GitHubSearchResponse, type HighlightsFormat, type IdMap, JobTimeoutError, type JsonFormat, type ListMonitorChecksOptions, type ListMonitorsOptions, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type Monitor, type MonitorCheck, type MonitorCheckDetail, type MonitorCheckPage, type MonitorCrawlTarget, type MonitorEmailNotification, type MonitorEmailRecipientSubscription, type MonitorJsonFieldDiff, type MonitorNotification, type MonitorPageDiff, type MonitorPageJudgment, type MonitorPageSnapshot, type MonitorSchedule, type MonitorScrapeTarget, type MonitorSummary, type MonitorTarget, type MonitorWebhookConfig, type PDFAction, type PaginationConfig, type PaperMetadata, type PaperMetadataResponse, type PaperResult, type PaperSignals, type ParseFile, type ParseFileData, type ParseFormat, type ParseFormatOption, type ParseFormatString, type ParseOptions, type Passage, type PressAction, type ProductAvailability, type ProductImage, type ProductPrice, type ProductProfile, type ProductSale, type ProductVariant, type QueryFormat, type QuestionFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ReadPaperResponse, type RedactPIIEntity, type RedactPIIOptions, ResearchClient, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchFeedbackRequest, type SearchGithubOptions, type SearchPapersOptions, type SearchPapersResponse, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type SimilarPapersOptions, type SimilarPapersResponse, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type UpdateMonitorRequest, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
|
|
2591
|
+
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreateMonitorRequest, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type EndpointFeedbackEndpoint, type EndpointFeedbackRequest, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, type FeedbackMissingContent, type FeedbackRating, type FeedbackResponse, type FeedbackValuableSource, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientInput, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type GetMonitorCheckOptions, type GetPaperOptions, type GitHubScoreBreakdown, type GitHubSearchItem, type GitHubSearchResponse, type HighlightsFormat, type IdMap, JobTimeoutError, type JsonFormat, type ListMonitorChecksOptions, type ListMonitorsOptions, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type MenuAvailability, type MenuImage, type MenuItem, type MenuItemIdentifiers, type MenuMerchant, type MenuPrice, type MenuProfile, type MenuSection, type Monitor, type MonitorCheck, type MonitorCheckDetail, type MonitorCheckPage, type MonitorCrawlTarget, type MonitorEmailNotification, type MonitorEmailRecipientSubscription, type MonitorJsonFieldDiff, type MonitorNotification, type MonitorPageDiff, type MonitorPageJudgment, type MonitorPageSnapshot, type MonitorSchedule, type MonitorScrapeTarget, type MonitorSummary, type MonitorTarget, type MonitorWebhookConfig, type PDFAction, type PaginationConfig, type PaperMetadata, type PaperMetadataResponse, type PaperResult, type PaperSignals, type ParseFile, type ParseFileData, type ParseFormat, type ParseFormatOption, type ParseFormatString, type ParseOptions, type Passage, type PressAction, type ProductAvailability, type ProductImage, type ProductPrice, type ProductProfile, type ProductSale, type ProductVariant, type QueryFormat, type QuestionFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ReadPaperResponse, type RedactPIIEntity, type RedactPIIOptions, ResearchClient, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchFeedbackRequest, type SearchGithubOptions, type SearchPapersOptions, type SearchPapersResponse, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type SimilarPapersOptions, type SimilarPapersResponse, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type UpdateMonitorRequest, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
require_package
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-XZBMU524.js";
|
|
4
4
|
|
|
5
5
|
// src/v2/utils/httpClient.ts
|
|
6
6
|
import axios from "axios";
|
|
@@ -2300,7 +2300,7 @@ var FirecrawlApp = class {
|
|
|
2300
2300
|
if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
|
|
2301
2301
|
return process.env.npm_package_version;
|
|
2302
2302
|
}
|
|
2303
|
-
const packageJson = await import("./package-
|
|
2303
|
+
const packageJson = await import("./package-7QIHFAN4.js");
|
|
2304
2304
|
return packageJson.default.version;
|
|
2305
2305
|
} catch (error) {
|
|
2306
2306
|
const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);
|
package/package.json
CHANGED
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
import { describe, test, expect, jest } from "@jest/globals";
|
|
2
|
+
import { scrape } from "../../../v2/methods/scrape";
|
|
3
|
+
|
|
4
|
+
describe("JS SDK v2 menu format", () => {
|
|
5
|
+
function makeHttp(postImpl: (url: string, data: any) => any) {
|
|
6
|
+
return { post: jest.fn(async (u: string, d: any) => postImpl(u, d)) } as any;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
test("scrape with menu format returns menu data", async () => {
|
|
10
|
+
const mockResponse = {
|
|
11
|
+
status: 200,
|
|
12
|
+
data: {
|
|
13
|
+
success: true,
|
|
14
|
+
data: {
|
|
15
|
+
markdown: "# Example Menu",
|
|
16
|
+
menu: {
|
|
17
|
+
isMenu: true,
|
|
18
|
+
confidence: 0.95,
|
|
19
|
+
currency: "USD",
|
|
20
|
+
sourceUrl: "https://example.com/menu",
|
|
21
|
+
merchant: { name: "Acme Diner", type: "restaurant" },
|
|
22
|
+
sections: [
|
|
23
|
+
{
|
|
24
|
+
id: "mains",
|
|
25
|
+
name: "Mains",
|
|
26
|
+
description: "Hearty plates",
|
|
27
|
+
items: [
|
|
28
|
+
{
|
|
29
|
+
id: "burger",
|
|
30
|
+
name: "Classic Burger",
|
|
31
|
+
description: "Beef patty with cheese",
|
|
32
|
+
images: [{ url: "https://example.com/burger.jpg", alt: "Burger" }],
|
|
33
|
+
price: { amount: 12.5, currency: "USD", formatted: "$12.50" },
|
|
34
|
+
availability: { inStock: true, text: "Available" },
|
|
35
|
+
dietary: ["contains-gluten"],
|
|
36
|
+
calories: 800,
|
|
37
|
+
optionGroups: [],
|
|
38
|
+
identifiers: { merchantItemId: "ITEM-1" },
|
|
39
|
+
url: "https://example.com/menu#burger",
|
|
40
|
+
sourceUrl: "https://example.com/menu"
|
|
41
|
+
}
|
|
42
|
+
]
|
|
43
|
+
}
|
|
44
|
+
]
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
const http = makeHttp(() => mockResponse);
|
|
51
|
+
const result = await scrape(http, "https://example.com", { formats: ["menu"] });
|
|
52
|
+
|
|
53
|
+
expect(result.menu).toBeDefined();
|
|
54
|
+
expect(result.menu?.isMenu).toBe(true);
|
|
55
|
+
expect(result.menu?.confidence).toBe(0.95);
|
|
56
|
+
expect(result.menu?.currency).toBe("USD");
|
|
57
|
+
expect(result.menu?.merchant?.name).toBe("Acme Diner");
|
|
58
|
+
expect(result.menu?.merchant?.type).toBe("restaurant");
|
|
59
|
+
expect(result.menu?.sections?.[0]?.name).toBe("Mains");
|
|
60
|
+
expect(result.menu?.sections?.[0]?.items?.[0]?.name).toBe("Classic Burger");
|
|
61
|
+
expect(result.menu?.sections?.[0]?.items?.[0]?.price?.amount).toBe(12.5);
|
|
62
|
+
expect(result.menu?.sections?.[0]?.items?.[0]?.price?.currency).toBe("USD");
|
|
63
|
+
expect(result.menu?.sections?.[0]?.items?.[0]?.availability?.inStock).toBe(true);
|
|
64
|
+
expect(result.menu?.sections?.[0]?.items?.[0]?.images?.[0]?.url).toBe("https://example.com/burger.jpg");
|
|
65
|
+
expect(result.menu?.sections?.[0]?.items?.[0]?.dietary?.[0]).toBe("contains-gluten");
|
|
66
|
+
expect(result.menu?.sections?.[0]?.items?.[0]?.identifiers?.merchantItemId).toBe("ITEM-1");
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
test("scrape with menu and markdown formats returns both", async () => {
|
|
70
|
+
const mockResponse = {
|
|
71
|
+
status: 200,
|
|
72
|
+
data: {
|
|
73
|
+
success: true,
|
|
74
|
+
data: {
|
|
75
|
+
markdown: "# Example Content",
|
|
76
|
+
menu: {
|
|
77
|
+
isMenu: true,
|
|
78
|
+
confidence: 0.8,
|
|
79
|
+
sourceUrl: "https://example.com/cafe",
|
|
80
|
+
merchant: { name: "Cafe Acme" },
|
|
81
|
+
sections: [
|
|
82
|
+
{
|
|
83
|
+
id: "drinks",
|
|
84
|
+
name: "Drinks",
|
|
85
|
+
items: [
|
|
86
|
+
{
|
|
87
|
+
id: "coffee",
|
|
88
|
+
name: "Coffee",
|
|
89
|
+
images: [],
|
|
90
|
+
price: { amount: 3.5, currency: "USD" },
|
|
91
|
+
availability: { inStock: true },
|
|
92
|
+
dietary: [],
|
|
93
|
+
optionGroups: [],
|
|
94
|
+
identifiers: {},
|
|
95
|
+
sourceUrl: "https://example.com/cafe"
|
|
96
|
+
}
|
|
97
|
+
]
|
|
98
|
+
}
|
|
99
|
+
]
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
const http = makeHttp(() => mockResponse);
|
|
106
|
+
const result = await scrape(http, "https://example.com", { formats: ["markdown", "menu"] });
|
|
107
|
+
|
|
108
|
+
expect(result.markdown).toBe("# Example Content");
|
|
109
|
+
expect(result.menu).toBeDefined();
|
|
110
|
+
expect(result.menu?.merchant?.name).toBe("Cafe Acme");
|
|
111
|
+
expect(result.menu?.sections?.[0]?.items?.[0]?.price?.amount).toBe(3.5);
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
test("scrape without menu format does not return menu", async () => {
|
|
115
|
+
const mockResponse = {
|
|
116
|
+
status: 200,
|
|
117
|
+
data: {
|
|
118
|
+
success: true,
|
|
119
|
+
data: {
|
|
120
|
+
markdown: "# Example"
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
const http = makeHttp(() => mockResponse);
|
|
126
|
+
const result = await scrape(http, "https://example.com", { formats: ["markdown"] });
|
|
127
|
+
|
|
128
|
+
expect(result.markdown).toBe("# Example");
|
|
129
|
+
expect(result.menu).toBeUndefined();
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
test("non-menu page scraped with menu format yields a warning and no menu", async () => {
|
|
133
|
+
const mockResponse = {
|
|
134
|
+
status: 200,
|
|
135
|
+
data: {
|
|
136
|
+
success: true,
|
|
137
|
+
data: {
|
|
138
|
+
markdown: "# Blog Post",
|
|
139
|
+
warning: "No menu found on this page."
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
};
|
|
143
|
+
|
|
144
|
+
const http = makeHttp(() => mockResponse);
|
|
145
|
+
const result = await scrape(http, "https://example.com", { formats: ["menu"] });
|
|
146
|
+
|
|
147
|
+
expect(result.menu).toBeUndefined();
|
|
148
|
+
expect(result.warning).toContain("No menu found");
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
test("menu format with multiple sections and items", async () => {
|
|
152
|
+
const mockResponse = {
|
|
153
|
+
status: 200,
|
|
154
|
+
data: {
|
|
155
|
+
success: true,
|
|
156
|
+
data: {
|
|
157
|
+
menu: {
|
|
158
|
+
isMenu: true,
|
|
159
|
+
confidence: 0.9,
|
|
160
|
+
sourceUrl: "https://example.com/menu",
|
|
161
|
+
merchant: { name: "Acme Bistro", type: "restaurant", location: { city: "Springfield" } },
|
|
162
|
+
sections: [
|
|
163
|
+
{
|
|
164
|
+
id: "starters",
|
|
165
|
+
name: "Starters",
|
|
166
|
+
items: [
|
|
167
|
+
{
|
|
168
|
+
id: "soup",
|
|
169
|
+
name: "Tomato Soup",
|
|
170
|
+
images: [],
|
|
171
|
+
price: { amount: 6.0, currency: "USD" },
|
|
172
|
+
availability: { inStock: true },
|
|
173
|
+
dietary: ["vegetarian"],
|
|
174
|
+
optionGroups: [],
|
|
175
|
+
identifiers: {},
|
|
176
|
+
sourceUrl: "https://example.com/menu"
|
|
177
|
+
}
|
|
178
|
+
]
|
|
179
|
+
},
|
|
180
|
+
{
|
|
181
|
+
id: "desserts",
|
|
182
|
+
name: "Desserts",
|
|
183
|
+
items: [
|
|
184
|
+
{
|
|
185
|
+
id: "cake",
|
|
186
|
+
name: "Chocolate Cake",
|
|
187
|
+
images: [{ url: "https://example.com/cake.jpg" }],
|
|
188
|
+
availability: { inStock: false, text: "Sold out" },
|
|
189
|
+
dietary: [],
|
|
190
|
+
optionGroups: [],
|
|
191
|
+
identifiers: {},
|
|
192
|
+
sourceUrl: "https://example.com/menu"
|
|
193
|
+
}
|
|
194
|
+
]
|
|
195
|
+
}
|
|
196
|
+
]
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
};
|
|
201
|
+
|
|
202
|
+
const http = makeHttp(() => mockResponse);
|
|
203
|
+
const result = await scrape(http, "https://example.com", { formats: ["menu"] });
|
|
204
|
+
|
|
205
|
+
expect(result.menu).toBeDefined();
|
|
206
|
+
expect(result.menu?.sections).toHaveLength(2);
|
|
207
|
+
expect(result.menu?.sections?.[0]?.items?.[0]?.dietary?.[0]).toBe("vegetarian");
|
|
208
|
+
expect(result.menu?.sections?.[1]?.name).toBe("Desserts");
|
|
209
|
+
expect(result.menu?.sections?.[1]?.items?.[0]?.images?.[0]?.url).toBe("https://example.com/cake.jpg");
|
|
210
|
+
expect(result.menu?.sections?.[1]?.items?.[0]?.availability?.inStock).toBe(false);
|
|
211
|
+
expect(result.menu?.sections?.[1]?.items?.[0]?.availability?.text).toBe("Sold out");
|
|
212
|
+
});
|
|
213
|
+
});
|
package/src/v2/types.ts
CHANGED
|
@@ -14,6 +14,7 @@ export type FormatString =
|
|
|
14
14
|
| "attributes"
|
|
15
15
|
| "branding"
|
|
16
16
|
| "product"
|
|
17
|
+
| "menu"
|
|
17
18
|
| "audio"
|
|
18
19
|
| "video";
|
|
19
20
|
|
|
@@ -468,6 +469,63 @@ export interface ProductProfile {
|
|
|
468
469
|
variants: ProductVariant[];
|
|
469
470
|
}
|
|
470
471
|
|
|
472
|
+
export interface MenuPrice {
|
|
473
|
+
amount: number;
|
|
474
|
+
currency?: string;
|
|
475
|
+
formatted?: string;
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
export interface MenuAvailability {
|
|
479
|
+
inStock: boolean;
|
|
480
|
+
text?: string;
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
export interface MenuImage {
|
|
484
|
+
url: string;
|
|
485
|
+
alt?: string;
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
export interface MenuItemIdentifiers {
|
|
489
|
+
merchantItemId?: string;
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
export interface MenuItem {
|
|
493
|
+
id: string;
|
|
494
|
+
name: string;
|
|
495
|
+
description?: string;
|
|
496
|
+
images: MenuImage[];
|
|
497
|
+
price?: MenuPrice;
|
|
498
|
+
availability: MenuAvailability;
|
|
499
|
+
dietary: string[];
|
|
500
|
+
calories?: number;
|
|
501
|
+
optionGroups: unknown[];
|
|
502
|
+
identifiers: MenuItemIdentifiers;
|
|
503
|
+
url?: string;
|
|
504
|
+
sourceUrl: string;
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
export interface MenuSection {
|
|
508
|
+
id: string;
|
|
509
|
+
name: string;
|
|
510
|
+
description?: string;
|
|
511
|
+
items: MenuItem[];
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
export interface MenuMerchant {
|
|
515
|
+
name: string;
|
|
516
|
+
type?: string | null;
|
|
517
|
+
location?: unknown;
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
export interface MenuProfile {
|
|
521
|
+
isMenu: boolean;
|
|
522
|
+
confidence: number;
|
|
523
|
+
merchant: MenuMerchant;
|
|
524
|
+
currency?: string | null;
|
|
525
|
+
sections: MenuSection[];
|
|
526
|
+
sourceUrl: string;
|
|
527
|
+
}
|
|
528
|
+
|
|
471
529
|
export interface DocumentMetadata {
|
|
472
530
|
// Common metadata fields
|
|
473
531
|
title?: string;
|
|
@@ -551,6 +609,7 @@ export interface Document {
|
|
|
551
609
|
changeTracking?: Record<string, unknown>;
|
|
552
610
|
branding?: BrandingProfile;
|
|
553
611
|
product?: ProductProfile;
|
|
612
|
+
menu?: MenuProfile;
|
|
554
613
|
}
|
|
555
614
|
|
|
556
615
|
// Pagination configuration for auto-fetching pages from v2 endpoints that return a `next` URL
|