firecrawl-sdk 1.8.0 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/firecrawl/client.rb +73 -6
- data/lib/firecrawl/http_client.rb +6 -6
- data/lib/firecrawl/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 027be403af5ba321ba2d4dbeaa774433666ece0a98cca4ac03ffbce52ac6cae2
|
|
4
|
+
data.tar.gz: d0ee19ba03f949234d00482c6e0c451986de84c4e4dfde7860c970845a23e3b4
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: fb9869b378f9d8252cd45ad2b5be1d7f5e9c30f3c67d4752177e3d205a689b75f3da972f389e5d70260bbde576e1677068c4b538ea82c92c25643a239debd879
|
|
7
|
+
data.tar.gz: f8ad0a882856666ef4662d6356608759513305babc5392100c1cba5ddcaffb89392c67c447aa181cb3ab97230cd0bec4f7f957c11e4585068ef58a2eadcbacda
|
data/lib/firecrawl/client.rb
CHANGED
|
@@ -39,9 +39,10 @@ module Firecrawl
|
|
|
39
39
|
backoff_factor: DEFAULT_BACKOFF_FACTOR
|
|
40
40
|
)
|
|
41
41
|
resolved_key = api_key || ENV["FIRECRAWL_API_KEY"]
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
42
|
+
# A nil/empty key is allowed: scrape, search, and interact fall back to the
|
|
43
|
+
# keyless free tier (rate-limited per IP). Other methods return 401 from the
|
|
44
|
+
# API until a key is provided.
|
|
45
|
+
resolved_key = nil if resolved_key.nil? || resolved_key.strip.empty?
|
|
45
46
|
|
|
46
47
|
resolved_url = api_url || ENV["FIRECRAWL_API_URL"] || DEFAULT_API_URL
|
|
47
48
|
unless resolved_url.match?(%r{\Ahttps?://}i)
|
|
@@ -78,11 +79,63 @@ module Firecrawl
|
|
|
78
79
|
|
|
79
80
|
body = { "url" => url }
|
|
80
81
|
body.merge!(options.to_h) if options
|
|
82
|
+
body["origin"] ||= "ruby-sdk@#{Firecrawl::VERSION}"
|
|
81
83
|
raw = @http.post("/v2/scrape", body)
|
|
82
84
|
data = raw["data"] || raw
|
|
83
85
|
Models::Document.new(data)
|
|
84
86
|
end
|
|
85
87
|
|
|
88
|
+
# Search research papers.
|
|
89
|
+
#
|
|
90
|
+
# @param query [String] research query
|
|
91
|
+
# @param options [Hash] optional query parameters
|
|
92
|
+
# @return [Hash]
|
|
93
|
+
def search_papers(query, options = {})
|
|
94
|
+
@http.get("/v2/search/research/papers#{query(options.merge("query" => query, "origin" => "ruby-sdk@#{Firecrawl::VERSION}"))}")
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Inspect paper metadata.
|
|
98
|
+
#
|
|
99
|
+
# @param paper_id [String] paper identifier
|
|
100
|
+
# @return [Hash]
|
|
101
|
+
def inspect_paper(paper_id)
|
|
102
|
+
raise ArgumentError, "Paper ID is required" if paper_id.nil?
|
|
103
|
+
@http.get("/v2/search/research/papers/#{URI.encode_www_form_component(paper_id)}")
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Read a paper with query-guided passages.
|
|
107
|
+
#
|
|
108
|
+
# @param paper_id [String] paper identifier
|
|
109
|
+
# @param query_text [String] passage query
|
|
110
|
+
# @param options [Hash] optional query parameters
|
|
111
|
+
# @return [Hash]
|
|
112
|
+
def read_paper(paper_id, query_text, options = {})
|
|
113
|
+
raise ArgumentError, "Paper ID is required" if paper_id.nil?
|
|
114
|
+
path = "/v2/search/research/papers/#{URI.encode_www_form_component(paper_id)}"
|
|
115
|
+
@http.get("#{path}#{query(options.merge("query" => query_text, "origin" => "ruby-sdk@#{Firecrawl::VERSION}"))}")
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Find papers related to a paper.
|
|
119
|
+
#
|
|
120
|
+
# @param paper_id [String] paper identifier
|
|
121
|
+
# @param intent [String] relatedness intent
|
|
122
|
+
# @param options [Hash] optional query parameters
|
|
123
|
+
# @return [Hash]
|
|
124
|
+
def related_papers(paper_id, intent, options = {})
|
|
125
|
+
raise ArgumentError, "Paper ID is required" if paper_id.nil?
|
|
126
|
+
path = "/v2/search/research/papers/#{URI.encode_www_form_component(paper_id)}/similar"
|
|
127
|
+
@http.get("#{path}#{query(options.merge("intent" => intent, "origin" => "ruby-sdk@#{Firecrawl::VERSION}"))}")
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Search GitHub research content.
|
|
131
|
+
#
|
|
132
|
+
# @param query_text [String] GitHub query
|
|
133
|
+
# @param options [Hash] optional query parameters
|
|
134
|
+
# @return [Hash]
|
|
135
|
+
def search_github(query_text, options = {})
|
|
136
|
+
@http.get("/v2/search/research/github#{query(options.merge("query" => query_text, "origin" => "ruby-sdk@#{Firecrawl::VERSION}"))}")
|
|
137
|
+
end
|
|
138
|
+
|
|
86
139
|
# Interacts with the scrape-bound browser session for a scrape job.
|
|
87
140
|
#
|
|
88
141
|
# @param job_id [String] the scrape job ID
|
|
@@ -96,6 +149,7 @@ module Firecrawl
|
|
|
96
149
|
|
|
97
150
|
body = { "code" => code, "language" => language }
|
|
98
151
|
body["timeout"] = timeout if timeout
|
|
152
|
+
body["origin"] ||= "ruby-sdk@#{Firecrawl::VERSION}"
|
|
99
153
|
@http.post("/v2/scrape/#{job_id}/interact", body)
|
|
100
154
|
end
|
|
101
155
|
|
|
@@ -377,6 +431,7 @@ module Firecrawl
|
|
|
377
431
|
|
|
378
432
|
body = { "query" => query }
|
|
379
433
|
body.merge!(options.to_h) if options
|
|
434
|
+
body["origin"] ||= "ruby-sdk@#{Firecrawl::VERSION}"
|
|
380
435
|
raw = @http.post("/v2/search", body)
|
|
381
436
|
data = raw["data"] || raw
|
|
382
437
|
Models::SearchData.new(data)
|
|
@@ -461,9 +516,21 @@ module Firecrawl
|
|
|
461
516
|
|
|
462
517
|
private
|
|
463
518
|
|
|
464
|
-
def query(**
|
|
465
|
-
|
|
466
|
-
|
|
519
|
+
def query(params = nil, **kwargs)
|
|
520
|
+
params = (params || {}).merge(kwargs)
|
|
521
|
+
pairs = []
|
|
522
|
+
params.each do |key, value|
|
|
523
|
+
next if value.nil? || value == ""
|
|
524
|
+
|
|
525
|
+
values = value.is_a?(Array) ? value : [value]
|
|
526
|
+
values.each do |item|
|
|
527
|
+
next if item.nil? || item == ""
|
|
528
|
+
|
|
529
|
+
string_value = item == true ? "true" : item == false ? "false" : item.to_s
|
|
530
|
+
pairs << [key.to_s, string_value]
|
|
531
|
+
end
|
|
532
|
+
end
|
|
533
|
+
pairs.empty? ? "" : "?#{URI.encode_www_form(pairs)}"
|
|
467
534
|
end
|
|
468
535
|
|
|
469
536
|
def poll_crawl(job_id, poll_interval, timeout)
|
|
@@ -23,7 +23,7 @@ module Firecrawl
|
|
|
23
23
|
def post(path, body, extra_headers: {})
|
|
24
24
|
uri = URI("#{@base_url}#{path}")
|
|
25
25
|
request = Net::HTTP::Post.new(uri)
|
|
26
|
-
request["Authorization"] = "Bearer #{@api_key}"
|
|
26
|
+
request["Authorization"] = "Bearer #{@api_key}" if @api_key
|
|
27
27
|
request["Content-Type"] = "application/json"
|
|
28
28
|
extra_headers.each { |k, v| request[k] = v }
|
|
29
29
|
request.body = JSON.generate(body)
|
|
@@ -34,7 +34,7 @@ module Firecrawl
|
|
|
34
34
|
def get(path)
|
|
35
35
|
uri = URI("#{@base_url}#{path}")
|
|
36
36
|
request = Net::HTTP::Get.new(uri)
|
|
37
|
-
request["Authorization"] = "Bearer #{@api_key}"
|
|
37
|
+
request["Authorization"] = "Bearer #{@api_key}" if @api_key
|
|
38
38
|
execute_with_retry(uri, request)
|
|
39
39
|
end
|
|
40
40
|
|
|
@@ -47,7 +47,7 @@ module Firecrawl
|
|
|
47
47
|
raise FirecrawlError, "Absolute URL origin (#{uri.scheme}://#{uri.host}:#{uri.port}) does not match API base URL origin (#{base_uri.scheme}://#{base_uri.host}:#{base_uri.port}). Refusing to send credentials."
|
|
48
48
|
end
|
|
49
49
|
request = Net::HTTP::Get.new(uri)
|
|
50
|
-
request["Authorization"] = "Bearer #{@api_key}"
|
|
50
|
+
request["Authorization"] = "Bearer #{@api_key}" if @api_key
|
|
51
51
|
execute_with_retry(uri, request)
|
|
52
52
|
end
|
|
53
53
|
|
|
@@ -55,7 +55,7 @@ module Firecrawl
|
|
|
55
55
|
def delete(path)
|
|
56
56
|
uri = URI("#{@base_url}#{path}")
|
|
57
57
|
request = Net::HTTP::Delete.new(uri)
|
|
58
|
-
request["Authorization"] = "Bearer #{@api_key}"
|
|
58
|
+
request["Authorization"] = "Bearer #{@api_key}" if @api_key
|
|
59
59
|
execute_with_retry(uri, request)
|
|
60
60
|
end
|
|
61
61
|
|
|
@@ -63,7 +63,7 @@ module Firecrawl
|
|
|
63
63
|
def patch(path, body)
|
|
64
64
|
uri = URI("#{@base_url}#{path}")
|
|
65
65
|
request = Net::HTTP::Patch.new(uri)
|
|
66
|
-
request["Authorization"] = "Bearer #{@api_key}"
|
|
66
|
+
request["Authorization"] = "Bearer #{@api_key}" if @api_key
|
|
67
67
|
request["Content-Type"] = "application/json"
|
|
68
68
|
request.body = JSON.generate(body)
|
|
69
69
|
execute_with_retry(uri, request)
|
|
@@ -84,7 +84,7 @@ module Firecrawl
|
|
|
84
84
|
|
|
85
85
|
builder = lambda do
|
|
86
86
|
request = Net::HTTP::Post.new(uri)
|
|
87
|
-
request["Authorization"] = "Bearer #{@api_key}"
|
|
87
|
+
request["Authorization"] = "Bearer #{@api_key}" if @api_key
|
|
88
88
|
request["Content-Type"] = "multipart/form-data; boundary=#{boundary}"
|
|
89
89
|
request.body = body
|
|
90
90
|
request
|
data/lib/firecrawl/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: firecrawl-sdk
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.9.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Firecrawl
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-06-
|
|
11
|
+
date: 2026-06-16 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: A type-safe Ruby client for the Firecrawl v2 API. Supports scraping,
|
|
14
14
|
crawling, batch scraping, URL mapping, web search, and AI agent operations.
|