firecrawl-sdk 1.8.1 → 1.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1c5ff8541498b47b08ff9b54d1c14969c734fba849e09c3fcc1d48143be53f1e
4
- data.tar.gz: 71fb5c61c9c20ec229d077b7db82eb2ba60c48581a0b96969f7d515fa91778eb
3
+ metadata.gz: 616551a9caf1736bbc849f0058ce52dca94728cf024cb91319393e5bf192cfe1
4
+ data.tar.gz: ffe33e36c76d4a69319940e148ccce30bc50b3e44b81915698c76f7e4a1cb99b
5
5
  SHA512:
6
- metadata.gz: 2a44065aa34a8bd32f691ebd249c4d11b48998582dd6ba9f5da6bca80d95370341be3b0dd0482076918b8ccad268b928034b9bd477daddd831bc042da59391a2
7
- data.tar.gz: 347d45a6810b3b66afb4a6bbb6b977f13dbf770c2c57eaedcb8bc9d7c4b071bf3be25f299c606adc74cc74819a7f6c23ae9009dbe183d789a7c36dc76a43ec8e
6
+ metadata.gz: fbbe64a2fd02c6a8214cbb77f7dfc64791e392d2192f4c9d657ed6cb9bab3c820ed6ff9a430b0fb1abfdd11076b6a64600a4f8df0f9232bda02b8c3bf27d1385
7
+ data.tar.gz: f2a722b6f10ac0254d3d8f7dda69f653751539056cce8690b703e023f35b021b9ac451a798800bcb2293a9f414014d9eb22bb54fea779971e79cde106dde4b5c
data/README.md CHANGED
@@ -74,11 +74,25 @@ doc = client.scrape("https://www.youtube.com/watch?v=dQw4w9WgXcQ",
74
74
  puts doc.video
75
75
  ```
76
76
 
77
+ ### Product Extraction
78
+
79
+ Use the `product` format on product pages to get structured product data
80
+ (title, brand, category, and per-variant price, availability, and images).
81
+ It is the deterministic counterpart to the LLM-based `json` format. The
82
+ returned `product` field contains the extracted fields.
83
+
84
+ ```ruby
85
+ doc = client.scrape("https://example.com/products/widget",
86
+ Firecrawl::Models::ScrapeOptions.new(formats: ["product"]))
87
+
88
+ puts doc.product
89
+ ```
90
+
77
91
  ### Parse
78
92
 
79
93
  Upload a local file (`html`, `pdf`, `docx`, etc.) via multipart form data and
80
94
  parse it synchronously. Parse options intentionally exclude browser-only
81
- features such as change tracking, screenshot, branding, audio, video, actions,
95
+ features such as change tracking, screenshot, branding, audio, video, product, actions,
82
96
  wait_for, location, and mobile. The `proxy` option only accepts `"auto"` or `"basic"`.
83
97
 
84
98
  ```ruby
@@ -85,6 +85,57 @@ module Firecrawl
85
85
  Models::Document.new(data)
86
86
  end
87
87
 
88
+ # Search research papers.
89
+ #
90
+ # @param query [String] research query
91
+ # @param options [Hash] optional query parameters
92
+ # @return [Hash]
93
+ def search_papers(query, options = {})
94
+ @http.get("/v2/search/research/papers#{query(options.merge("query" => query, "origin" => "ruby-sdk@#{Firecrawl::VERSION}"))}")
95
+ end
96
+
97
+ # Inspect paper metadata.
98
+ #
99
+ # @param paper_id [String] paper identifier
100
+ # @return [Hash]
101
+ def inspect_paper(paper_id)
102
+ raise ArgumentError, "Paper ID is required" if paper_id.nil?
103
+ @http.get("/v2/search/research/papers/#{URI.encode_www_form_component(paper_id)}")
104
+ end
105
+
106
+ # Read a paper with query-guided passages.
107
+ #
108
+ # @param paper_id [String] paper identifier
109
+ # @param query_text [String] passage query
110
+ # @param options [Hash] optional query parameters
111
+ # @return [Hash]
112
+ def read_paper(paper_id, query_text, options = {})
113
+ raise ArgumentError, "Paper ID is required" if paper_id.nil?
114
+ path = "/v2/search/research/papers/#{URI.encode_www_form_component(paper_id)}"
115
+ @http.get("#{path}#{query(options.merge("query" => query_text, "origin" => "ruby-sdk@#{Firecrawl::VERSION}"))}")
116
+ end
117
+
118
+ # Find papers related to a paper.
119
+ #
120
+ # @param paper_id [String] paper identifier
121
+ # @param intent [String] relatedness intent
122
+ # @param options [Hash] optional query parameters
123
+ # @return [Hash]
124
+ def related_papers(paper_id, intent, options = {})
125
+ raise ArgumentError, "Paper ID is required" if paper_id.nil?
126
+ path = "/v2/search/research/papers/#{URI.encode_www_form_component(paper_id)}/similar"
127
+ @http.get("#{path}#{query(options.merge("intent" => intent, "origin" => "ruby-sdk@#{Firecrawl::VERSION}"))}")
128
+ end
129
+
130
+ # Search GitHub research content.
131
+ #
132
+ # @param query_text [String] GitHub query
133
+ # @param options [Hash] optional query parameters
134
+ # @return [Hash]
135
+ def search_github(query_text, options = {})
136
+ @http.get("/v2/search/research/github#{query(options.merge("query" => query_text, "origin" => "ruby-sdk@#{Firecrawl::VERSION}"))}")
137
+ end
138
+
88
139
  # Interacts with the scrape-bound browser session for a scrape job.
89
140
  #
90
141
  # @param job_id [String] the scrape job ID
@@ -465,9 +516,21 @@ module Firecrawl
465
516
 
466
517
  private
467
518
 
468
- def query(**params)
469
- compact = params.compact
470
- compact.empty? ? "" : "?#{URI.encode_www_form(compact)}"
519
+ def query(params = nil, **kwargs)
520
+ params = (params || {}).merge(kwargs)
521
+ pairs = []
522
+ params.each do |key, value|
523
+ next if value.nil? || value == ""
524
+
525
+ values = value.is_a?(Array) ? value : [value]
526
+ values.each do |item|
527
+ next if item.nil? || item == ""
528
+
529
+ string_value = item == true ? "true" : item == false ? "false" : item.to_s
530
+ pairs << [key.to_s, string_value]
531
+ end
532
+ end
533
+ pairs.empty? ? "" : "?#{URI.encode_www_form(pairs)}"
471
534
  end
472
535
 
473
536
  def poll_crawl(job_id, poll_interval, timeout)
@@ -7,7 +7,7 @@ module Firecrawl
7
7
  attr_reader :markdown, :html, :raw_html, :json, :summary,
8
8
  :metadata, :links, :images, :screenshot, :audio,
9
9
  :video, :attributes, :actions, :answer, :highlights,
10
- :warning, :change_tracking, :branding
10
+ :warning, :change_tracking, :branding, :product
11
11
 
12
12
  def initialize(data)
13
13
  @markdown = data["markdown"]
@@ -28,6 +28,7 @@ module Firecrawl
28
28
  @warning = data["warning"]
29
29
  @change_tracking = data["changeTracking"]
30
30
  @branding = data["branding"]
31
+ @product = data["product"] && ProductProfile.new(data["product"])
31
32
  end
32
33
 
33
34
  def to_s
@@ -5,10 +5,10 @@ module Firecrawl
5
5
  # Options for parsing uploaded files via `/v2/parse`.
6
6
  #
7
7
  # Parse does not support browser-rendering features (actions, waitFor,
8
- # location, mobile) nor screenshot/branding/audio/video/changeTracking formats. The
8
+ # location, mobile) nor screenshot/branding/product/audio/video/changeTracking formats. The
9
9
  # proxy field only accepts "auto" or "basic".
10
10
  class ParseOptions
11
- UNSUPPORTED_FORMATS = %w[changeTracking screenshot screenshot@fullPage branding audio video].freeze
11
+ UNSUPPORTED_FORMATS = %w[changeTracking screenshot screenshot@fullPage branding product audio video].freeze
12
12
 
13
13
  FIELDS = %i[
14
14
  formats headers include_tags exclude_tags only_main_content
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Firecrawl
4
+ module Models
5
+ # Structured product information extracted from a product page via the
6
+ # `product` scrape format.
7
+ class ProductProfile
8
+ # An image associated with a product or variant.
9
+ class Image
10
+ attr_reader :url, :alt
11
+
12
+ def initialize(data)
13
+ @url = data["url"]
14
+ @alt = data["alt"]
15
+ end
16
+ end
17
+
18
+ # A monetary value with an optional currency and formatted string.
19
+ class Price
20
+ attr_reader :amount, :currency, :formatted
21
+
22
+ def initialize(data)
23
+ @amount = data["amount"]
24
+ @currency = data["currency"]
25
+ @formatted = data["formatted"]
26
+ end
27
+ end
28
+
29
+ # Stock availability information for a variant. Always present.
30
+ class Availability
31
+ attr_reader :in_stock, :text
32
+
33
+ def initialize(data)
34
+ data ||= {}
35
+ @in_stock = data["inStock"] || false
36
+ @text = data["text"]
37
+ end
38
+ end
39
+
40
+ # Sale pricing for a variant, carrying the pre-sale original price.
41
+ class Sale
42
+ attr_reader :original_price
43
+
44
+ def initialize(data)
45
+ @original_price = data["originalPrice"] && Price.new(data["originalPrice"])
46
+ end
47
+ end
48
+
49
+ # A purchasable variant of a product. Pricing, availability, and images
50
+ # live here rather than on the top-level product.
51
+ class Variant
52
+ attr_reader :id, :sku, :title, :values, :price, :sale,
53
+ :availability, :images
54
+
55
+ def initialize(data)
56
+ @id = data["id"]
57
+ @sku = data["sku"]
58
+ @title = data["title"]
59
+ @values = data["values"]
60
+ @price = data["price"] && Price.new(data["price"])
61
+ @sale = data["sale"] && Sale.new(data["sale"])
62
+ @availability = Availability.new(data["availability"])
63
+ @images = (data["images"] || []).map { |img| Image.new(img) }
64
+ end
65
+ end
66
+
67
+ attr_reader :title, :brand, :category, :url, :description, :variants
68
+
69
+ def initialize(data)
70
+ @title = data["title"]
71
+ @brand = data["brand"]
72
+ @category = data["category"]
73
+ @url = data["url"]
74
+ @description = data["description"]
75
+ @variants = (data["variants"] || []).map { |variant| Variant.new(variant) }
76
+ end
77
+
78
+ def to_s
79
+ "ProductProfile{title=#{title || 'untitled'}, url=#{url || 'unknown'}}"
80
+ end
81
+ end
82
+ end
83
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Firecrawl
4
- VERSION = "1.8.1"
4
+ VERSION = "1.9.1"
5
5
  end
data/lib/firecrawl.rb CHANGED
@@ -4,6 +4,7 @@ require_relative "firecrawl/version"
4
4
  require_relative "firecrawl/errors"
5
5
  require_relative "firecrawl/http_client"
6
6
  require_relative "firecrawl/models/query_format"
7
+ require_relative "firecrawl/models/product_profile"
7
8
  require_relative "firecrawl/models/document"
8
9
  require_relative "firecrawl/models/scrape_options"
9
10
  require_relative "firecrawl/models/crawl_options"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: firecrawl-sdk
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.1
4
+ version: 1.9.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Firecrawl
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-06-15 00:00:00.000000000 Z
11
+ date: 2026-06-17 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A type-safe Ruby client for the Firecrawl v2 API. Supports scraping,
14
14
  crawling, batch scraping, URL mapping, web search, and AI agent operations.
@@ -41,6 +41,7 @@ files:
41
41
  - lib/firecrawl/models/monitor.rb
42
42
  - lib/firecrawl/models/parse_file.rb
43
43
  - lib/firecrawl/models/parse_options.rb
44
+ - lib/firecrawl/models/product_profile.rb
44
45
  - lib/firecrawl/models/query_format.rb
45
46
  - lib/firecrawl/models/scrape_options.rb
46
47
  - lib/firecrawl/models/search_data.rb