firecrawl-sdk 1.9.0 → 1.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +30 -1
- data/lib/firecrawl/models/document.rb +3 -1
- data/lib/firecrawl/models/menu_profile.rb +114 -0
- data/lib/firecrawl/models/parse_options.rb +2 -2
- data/lib/firecrawl/models/product_profile.rb +83 -0
- data/lib/firecrawl/version.rb +1 -1
- data/lib/firecrawl.rb +2 -0
- metadata +4 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f17ef4d354c427ea550adb739310cd6b7d03a148c72febb6c69133f0f39a10c0
|
|
4
|
+
data.tar.gz: e1907bd8120fc8bb893233d85ec9f63cae511e65ce8db7dfee848ed0592de1d5
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 93523ad1fe6bae7e66a9dc51b0c40d1cd580cdb6c9310a6709baa52142bf52bbed036c7c1b1f0361041d6672b2ac74cd45e974e3a0d251b27add73cae5176d34
|
|
7
|
+
data.tar.gz: ca1db36ccabd4060775c2e91f4284e3cf0f2ac393ee2b983a5437a66aa01b74fc477a7bce79fbb9468c51ca696d744e38893bb98729343b8c7c58fd0de6f318c
|
data/README.md
CHANGED
|
@@ -74,11 +74,40 @@ doc = client.scrape("https://www.youtube.com/watch?v=dQw4w9WgXcQ",
|
|
|
74
74
|
puts doc.video
|
|
75
75
|
```
|
|
76
76
|
|
|
77
|
+
### Product Extraction
|
|
78
|
+
|
|
79
|
+
Use the `product` format on product pages to get structured product data
|
|
80
|
+
(title, brand, category, and per-variant price, availability, and images).
|
|
81
|
+
It is the deterministic counterpart to the LLM-based `json` format. The
|
|
82
|
+
returned `product` field contains the extracted fields.
|
|
83
|
+
|
|
84
|
+
```ruby
|
|
85
|
+
doc = client.scrape("https://example.com/products/widget",
|
|
86
|
+
Firecrawl::Models::ScrapeOptions.new(formats: ["product"]))
|
|
87
|
+
|
|
88
|
+
puts doc.product
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Menu Extraction
|
|
92
|
+
|
|
93
|
+
Use the `menu` format on restaurant/merchant menu pages to get structured
|
|
94
|
+
menu data (merchant profile plus ordered sections, each holding items with
|
|
95
|
+
per-item price, availability, images, and dietary information). It is the
|
|
96
|
+
deterministic counterpart to the LLM-based `json` format. The returned `menu`
|
|
97
|
+
field contains the extracted fields.
|
|
98
|
+
|
|
99
|
+
```ruby
|
|
100
|
+
doc = client.scrape("https://example.com/menu",
|
|
101
|
+
Firecrawl::Models::ScrapeOptions.new(formats: ["menu"]))
|
|
102
|
+
|
|
103
|
+
puts doc.menu
|
|
104
|
+
```
|
|
105
|
+
|
|
77
106
|
### Parse
|
|
78
107
|
|
|
79
108
|
Upload a local file (`html`, `pdf`, `docx`, etc.) via multipart form data and
|
|
80
109
|
parse it synchronously. Parse options intentionally exclude browser-only
|
|
81
|
-
features such as change tracking, screenshot, branding, audio, video, actions,
|
|
110
|
+
features such as change tracking, screenshot, branding, audio, video, product, actions,
|
|
82
111
|
wait_for, location, and mobile. The `proxy` option only accepts `"auto"` or `"basic"`.
|
|
83
112
|
|
|
84
113
|
```ruby
|
|
@@ -7,7 +7,7 @@ module Firecrawl
|
|
|
7
7
|
attr_reader :markdown, :html, :raw_html, :json, :summary,
|
|
8
8
|
:metadata, :links, :images, :screenshot, :audio,
|
|
9
9
|
:video, :attributes, :actions, :answer, :highlights,
|
|
10
|
-
:warning, :change_tracking, :branding
|
|
10
|
+
:warning, :change_tracking, :branding, :product, :menu
|
|
11
11
|
|
|
12
12
|
def initialize(data)
|
|
13
13
|
@markdown = data["markdown"]
|
|
@@ -28,6 +28,8 @@ module Firecrawl
|
|
|
28
28
|
@warning = data["warning"]
|
|
29
29
|
@change_tracking = data["changeTracking"]
|
|
30
30
|
@branding = data["branding"]
|
|
31
|
+
@product = data["product"] && ProductProfile.new(data["product"])
|
|
32
|
+
@menu = data["menu"] && MenuProfile.new(data["menu"])
|
|
31
33
|
end
|
|
32
34
|
|
|
33
35
|
def to_s
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Firecrawl
|
|
4
|
+
module Models
|
|
5
|
+
# Structured menu information extracted from a restaurant/merchant page
|
|
6
|
+
# via the `menu` scrape format.
|
|
7
|
+
class MenuProfile
|
|
8
|
+
# An image associated with a menu item.
|
|
9
|
+
class Image
|
|
10
|
+
attr_reader :url, :alt
|
|
11
|
+
|
|
12
|
+
def initialize(data)
|
|
13
|
+
@url = data["url"]
|
|
14
|
+
@alt = data["alt"]
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# A monetary value with an optional currency and formatted string.
|
|
19
|
+
class Price
|
|
20
|
+
attr_reader :amount, :currency, :formatted
|
|
21
|
+
|
|
22
|
+
def initialize(data)
|
|
23
|
+
@amount = data["amount"]
|
|
24
|
+
@currency = data["currency"]
|
|
25
|
+
@formatted = data["formatted"]
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Stock availability information for a menu item. Always present.
|
|
30
|
+
class Availability
|
|
31
|
+
attr_reader :in_stock, :text
|
|
32
|
+
|
|
33
|
+
def initialize(data)
|
|
34
|
+
data ||= {}
|
|
35
|
+
@in_stock = data["inStock"] || false
|
|
36
|
+
@text = data["text"]
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Merchant (restaurant/business) profile for the menu.
|
|
41
|
+
class Merchant
|
|
42
|
+
attr_reader :name, :type, :location
|
|
43
|
+
|
|
44
|
+
def initialize(data)
|
|
45
|
+
data ||= {}
|
|
46
|
+
@name = data["name"]
|
|
47
|
+
@type = data["type"]
|
|
48
|
+
@location = data["location"]
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Identifiers carried on a menu item.
|
|
53
|
+
class Identifiers
|
|
54
|
+
attr_reader :merchant_item_id
|
|
55
|
+
|
|
56
|
+
def initialize(data)
|
|
57
|
+
data ||= {}
|
|
58
|
+
@merchant_item_id = data["merchantItemId"]
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# A single item on the menu. Pricing, availability, images, and dietary
|
|
63
|
+
# information live here rather than on the section or profile.
|
|
64
|
+
class Item
|
|
65
|
+
attr_reader :id, :name, :description, :images, :price, :availability,
|
|
66
|
+
:dietary, :calories, :option_groups, :identifiers, :url,
|
|
67
|
+
:source_url
|
|
68
|
+
|
|
69
|
+
def initialize(data)
|
|
70
|
+
@id = data["id"]
|
|
71
|
+
@name = data["name"]
|
|
72
|
+
@description = data["description"]
|
|
73
|
+
@images = (data["images"] || []).map { |img| Image.new(img) }
|
|
74
|
+
@price = data["price"] && Price.new(data["price"])
|
|
75
|
+
@availability = Availability.new(data["availability"])
|
|
76
|
+
@dietary = data["dietary"] || []
|
|
77
|
+
@calories = data["calories"]
|
|
78
|
+
@option_groups = data["optionGroups"] || []
|
|
79
|
+
@identifiers = Identifiers.new(data["identifiers"])
|
|
80
|
+
@url = data["url"]
|
|
81
|
+
@source_url = data["sourceUrl"]
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# An ordered group of menu items.
|
|
86
|
+
class Section
|
|
87
|
+
attr_reader :id, :name, :description, :items
|
|
88
|
+
|
|
89
|
+
def initialize(data)
|
|
90
|
+
@id = data["id"]
|
|
91
|
+
@name = data["name"]
|
|
92
|
+
@description = data["description"]
|
|
93
|
+
@items = (data["items"] || []).map { |item| Item.new(item) }
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
attr_reader :is_menu, :confidence, :merchant, :currency, :sections,
|
|
98
|
+
:source_url
|
|
99
|
+
|
|
100
|
+
def initialize(data)
|
|
101
|
+
@is_menu = data["isMenu"] || false
|
|
102
|
+
@confidence = data["confidence"]
|
|
103
|
+
@merchant = Merchant.new(data["merchant"])
|
|
104
|
+
@currency = data["currency"]
|
|
105
|
+
@sections = (data["sections"] || []).map { |section| Section.new(section) }
|
|
106
|
+
@source_url = data["sourceUrl"]
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def to_s
|
|
110
|
+
"MenuProfile{merchant=#{merchant&.name || 'unknown'}, sourceUrl=#{source_url || 'unknown'}}"
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
@@ -5,10 +5,10 @@ module Firecrawl
|
|
|
5
5
|
# Options for parsing uploaded files via `/v2/parse`.
|
|
6
6
|
#
|
|
7
7
|
# Parse does not support browser-rendering features (actions, waitFor,
|
|
8
|
-
# location, mobile) nor screenshot/branding/audio/video/changeTracking formats. The
|
|
8
|
+
# location, mobile) nor screenshot/branding/product/menu/audio/video/changeTracking formats. The
|
|
9
9
|
# proxy field only accepts "auto" or "basic".
|
|
10
10
|
class ParseOptions
|
|
11
|
-
UNSUPPORTED_FORMATS = %w[changeTracking screenshot screenshot@fullPage branding audio video].freeze
|
|
11
|
+
UNSUPPORTED_FORMATS = %w[changeTracking screenshot screenshot@fullPage branding product menu audio video].freeze
|
|
12
12
|
|
|
13
13
|
FIELDS = %i[
|
|
14
14
|
formats headers include_tags exclude_tags only_main_content
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Firecrawl
|
|
4
|
+
module Models
|
|
5
|
+
# Structured product information extracted from a product page via the
|
|
6
|
+
# `product` scrape format.
|
|
7
|
+
class ProductProfile
|
|
8
|
+
# An image associated with a product or variant.
|
|
9
|
+
class Image
|
|
10
|
+
attr_reader :url, :alt
|
|
11
|
+
|
|
12
|
+
def initialize(data)
|
|
13
|
+
@url = data["url"]
|
|
14
|
+
@alt = data["alt"]
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# A monetary value with an optional currency and formatted string.
|
|
19
|
+
class Price
|
|
20
|
+
attr_reader :amount, :currency, :formatted
|
|
21
|
+
|
|
22
|
+
def initialize(data)
|
|
23
|
+
@amount = data["amount"]
|
|
24
|
+
@currency = data["currency"]
|
|
25
|
+
@formatted = data["formatted"]
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Stock availability information for a variant. Always present.
|
|
30
|
+
class Availability
|
|
31
|
+
attr_reader :in_stock, :text
|
|
32
|
+
|
|
33
|
+
def initialize(data)
|
|
34
|
+
data ||= {}
|
|
35
|
+
@in_stock = data["inStock"] || false
|
|
36
|
+
@text = data["text"]
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Sale pricing for a variant, carrying the pre-sale original price.
|
|
41
|
+
class Sale
|
|
42
|
+
attr_reader :original_price
|
|
43
|
+
|
|
44
|
+
def initialize(data)
|
|
45
|
+
@original_price = data["originalPrice"] && Price.new(data["originalPrice"])
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# A purchasable variant of a product. Pricing, availability, and images
|
|
50
|
+
# live here rather than on the top-level product.
|
|
51
|
+
class Variant
|
|
52
|
+
attr_reader :id, :sku, :title, :values, :price, :sale,
|
|
53
|
+
:availability, :images
|
|
54
|
+
|
|
55
|
+
def initialize(data)
|
|
56
|
+
@id = data["id"]
|
|
57
|
+
@sku = data["sku"]
|
|
58
|
+
@title = data["title"]
|
|
59
|
+
@values = data["values"]
|
|
60
|
+
@price = data["price"] && Price.new(data["price"])
|
|
61
|
+
@sale = data["sale"] && Sale.new(data["sale"])
|
|
62
|
+
@availability = Availability.new(data["availability"])
|
|
63
|
+
@images = (data["images"] || []).map { |img| Image.new(img) }
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
attr_reader :title, :brand, :category, :url, :description, :variants
|
|
68
|
+
|
|
69
|
+
def initialize(data)
|
|
70
|
+
@title = data["title"]
|
|
71
|
+
@brand = data["brand"]
|
|
72
|
+
@category = data["category"]
|
|
73
|
+
@url = data["url"]
|
|
74
|
+
@description = data["description"]
|
|
75
|
+
@variants = (data["variants"] || []).map { |variant| Variant.new(variant) }
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def to_s
|
|
79
|
+
"ProductProfile{title=#{title || 'untitled'}, url=#{url || 'unknown'}}"
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
data/lib/firecrawl/version.rb
CHANGED
data/lib/firecrawl.rb
CHANGED
|
@@ -4,6 +4,8 @@ require_relative "firecrawl/version"
|
|
|
4
4
|
require_relative "firecrawl/errors"
|
|
5
5
|
require_relative "firecrawl/http_client"
|
|
6
6
|
require_relative "firecrawl/models/query_format"
|
|
7
|
+
require_relative "firecrawl/models/product_profile"
|
|
8
|
+
require_relative "firecrawl/models/menu_profile"
|
|
7
9
|
require_relative "firecrawl/models/document"
|
|
8
10
|
require_relative "firecrawl/models/scrape_options"
|
|
9
11
|
require_relative "firecrawl/models/crawl_options"
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: firecrawl-sdk
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.9.
|
|
4
|
+
version: 1.9.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Firecrawl
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-06-
|
|
11
|
+
date: 2026-06-19 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: A type-safe Ruby client for the Firecrawl v2 API. Supports scraping,
|
|
14
14
|
crawling, batch scraping, URL mapping, web search, and AI agent operations.
|
|
@@ -38,9 +38,11 @@ files:
|
|
|
38
38
|
- lib/firecrawl/models/document.rb
|
|
39
39
|
- lib/firecrawl/models/map_data.rb
|
|
40
40
|
- lib/firecrawl/models/map_options.rb
|
|
41
|
+
- lib/firecrawl/models/menu_profile.rb
|
|
41
42
|
- lib/firecrawl/models/monitor.rb
|
|
42
43
|
- lib/firecrawl/models/parse_file.rb
|
|
43
44
|
- lib/firecrawl/models/parse_options.rb
|
|
45
|
+
- lib/firecrawl/models/product_profile.rb
|
|
44
46
|
- lib/firecrawl/models/query_format.rb
|
|
45
47
|
- lib/firecrawl/models/scrape_options.rb
|
|
46
48
|
- lib/firecrawl/models/search_data.rb
|