firecrawl-sdk 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +14 -3
- data/lib/firecrawl/models/document.rb +5 -1
- data/lib/firecrawl/models/parse_options.rb +2 -2
- data/lib/firecrawl/models/query_format.rb +41 -1
- data/lib/firecrawl/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 364810a7f7627c43f70a5d9f008f3a3fc0bcfb2f655a780190d93f8b577de1bf
|
|
4
|
+
data.tar.gz: 8191bc44441edd5d9abe7671cfc9851388058be954684cc92ddf0e37cf7e3669
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 83d1e41c97ec1abb35b8d14cb0d7003b1a8a3aadbd99ff0526c088e52b75405a80b7dda5b6cb8e6e7ea9edcaa1e519d13e8b5884dcd04880cdb24b4f7a7346d6
|
|
7
|
+
data.tar.gz: 057e47a4e06af5dc218413fd574aa7b6f22a09e3ec3878efe2d3cc4d6ef0dbdbd6b2db28ab11b48fbb7385d7b6b76d58f452b04c0698468e3597c5e08af50c58
|
data/README.md
CHANGED
|
@@ -11,7 +11,7 @@ Ruby SDK for the [Firecrawl](https://firecrawl.dev) v2 web scraping API.
|
|
|
11
11
|
Add to your `Gemfile`:
|
|
12
12
|
|
|
13
13
|
```ruby
|
|
14
|
-
gem "firecrawl-sdk", "~> 1.
|
|
14
|
+
gem "firecrawl-sdk", "~> 1.5"
|
|
15
15
|
```
|
|
16
16
|
|
|
17
17
|
Or install directly:
|
|
@@ -63,12 +63,23 @@ doc = client.scrape("https://example.com",
|
|
|
63
63
|
puts doc.html
|
|
64
64
|
```
|
|
65
65
|
|
|
66
|
+
### Video Extraction
|
|
67
|
+
|
|
68
|
+
Use the `video` format on supported video URLs, including YouTube and TikTok. The returned `video` field is a signed URL to the extracted video file.
|
|
69
|
+
|
|
70
|
+
```ruby
|
|
71
|
+
doc = client.scrape("https://www.youtube.com/watch?v=dQw4w9WgXcQ",
|
|
72
|
+
Firecrawl::Models::ScrapeOptions.new(formats: ["video"]))
|
|
73
|
+
|
|
74
|
+
puts doc.video
|
|
75
|
+
```
|
|
76
|
+
|
|
66
77
|
### Parse
|
|
67
78
|
|
|
68
79
|
Upload a local file (`html`, `pdf`, `docx`, etc.) via multipart form data and
|
|
69
80
|
parse it synchronously. Parse options intentionally exclude browser-only
|
|
70
|
-
features such as change tracking, screenshot, branding,
|
|
71
|
-
location, and mobile. The `proxy` option only accepts `"auto"` or `"basic"`.
|
|
81
|
+
features such as change tracking, screenshot, branding, audio, video, actions,
|
|
82
|
+
wait_for, location, and mobile. The `proxy` option only accepts `"auto"` or `"basic"`.
|
|
72
83
|
|
|
73
84
|
```ruby
|
|
74
85
|
# From disk
|
|
@@ -6,7 +6,8 @@ module Firecrawl
|
|
|
6
6
|
class Document
|
|
7
7
|
attr_reader :markdown, :html, :raw_html, :json, :summary,
|
|
8
8
|
:metadata, :links, :images, :screenshot, :audio,
|
|
9
|
-
:attributes, :actions, :
|
|
9
|
+
:video, :attributes, :actions, :answer, :highlights,
|
|
10
|
+
:warning, :change_tracking, :branding
|
|
10
11
|
|
|
11
12
|
def initialize(data)
|
|
12
13
|
@markdown = data["markdown"]
|
|
@@ -19,8 +20,11 @@ module Firecrawl
|
|
|
19
20
|
@images = data["images"]
|
|
20
21
|
@screenshot = data["screenshot"]
|
|
21
22
|
@audio = data["audio"]
|
|
23
|
+
@video = data["video"]
|
|
22
24
|
@attributes = data["attributes"]
|
|
23
25
|
@actions = data["actions"]
|
|
26
|
+
@answer = data["answer"]
|
|
27
|
+
@highlights = data["highlights"]
|
|
24
28
|
@warning = data["warning"]
|
|
25
29
|
@change_tracking = data["changeTracking"]
|
|
26
30
|
@branding = data["branding"]
|
|
@@ -5,10 +5,10 @@ module Firecrawl
|
|
|
5
5
|
# Options for parsing uploaded files via `/v2/parse`.
|
|
6
6
|
#
|
|
7
7
|
# Parse does not support browser-rendering features (actions, waitFor,
|
|
8
|
-
# location, mobile) nor screenshot/branding/changeTracking formats. The
|
|
8
|
+
# location, mobile) nor screenshot/branding/audio/video/changeTracking formats. The
|
|
9
9
|
# proxy field only accepts "auto" or "basic".
|
|
10
10
|
class ParseOptions
|
|
11
|
-
UNSUPPORTED_FORMATS = %w[changeTracking screenshot screenshot@fullPage branding].freeze
|
|
11
|
+
UNSUPPORTED_FORMATS = %w[changeTracking screenshot screenshot@fullPage branding audio video].freeze
|
|
12
12
|
|
|
13
13
|
FIELDS = %i[
|
|
14
14
|
formats headers include_tags exclude_tags only_main_content
|
|
@@ -2,7 +2,47 @@
|
|
|
2
2
|
|
|
3
3
|
module Firecrawl
|
|
4
4
|
module Models
|
|
5
|
-
#
|
|
5
|
+
# Question format for asking a question about page content.
|
|
6
|
+
class QuestionFormat
|
|
7
|
+
attr_reader :question
|
|
8
|
+
|
|
9
|
+
def initialize(question:)
|
|
10
|
+
@question = question
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def to_h
|
|
14
|
+
{
|
|
15
|
+
"type" => "question",
|
|
16
|
+
"question" => question,
|
|
17
|
+
}
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def type
|
|
21
|
+
"question"
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Highlights format for extracting direct highlights from page content.
|
|
26
|
+
class HighlightsFormat
|
|
27
|
+
attr_reader :query
|
|
28
|
+
|
|
29
|
+
def initialize(query:)
|
|
30
|
+
@query = query
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def to_h
|
|
34
|
+
{
|
|
35
|
+
"type" => "highlights",
|
|
36
|
+
"query" => query,
|
|
37
|
+
}
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def type
|
|
41
|
+
"highlights"
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Deprecated query format for asking a question about page content.
|
|
6
46
|
class QueryFormat
|
|
7
47
|
MODE_FREEFORM = "freeform"
|
|
8
48
|
MODE_DIRECT_QUOTE = "directQuote"
|
data/lib/firecrawl/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: firecrawl-sdk
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.5.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Firecrawl
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-05-
|
|
11
|
+
date: 2026-05-12 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: A type-safe Ruby client for the Firecrawl v2 API. Supports scraping,
|
|
14
14
|
crawling, batch scraping, URL mapping, web search, and AI agent operations.
|