firecrawl 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7e55dc5e433f0632ab0c11feda818bf82ddf77ae8ea2fdaa624c5a9af1dccf4c
4
- data.tar.gz: 2781378e0a6b62c2e7befb0ebd0298b0b91b2cf249fe33115dc675606aed7bfe
3
+ metadata.gz: 5f895548e284fae55f2284c335f3e36d3d7dfb2c0c81fea525235968c90a2c8d
4
+ data.tar.gz: 91a8f19d1281a37c87b12cd37f1e312e1044808e3c7b14a976d250aea51fb989
5
5
  SHA512:
6
- metadata.gz: 6fa88114c36df02f9cd261159132298e9a44b01464334daeb31ea2d8d5b11122321066ddf55207fddc3bef72704b353cf42d7aebaa46ac70298ea1efe19c6885
7
- data.tar.gz: 622fd277c01854131b4a21742c915359c97fffa54b4dc41aff47d09b06d4d7c6c485361ff6409dc1361cf561f313d42096674a426987b5bbf696dcba1f52cd96
6
+ metadata.gz: 21420a482873a02c56a1caa7e7e149acd3f00543d6dc9e969f817a0abd42ff7ec442ea1cb0ebb18020f21c72af0da66927085c2595915388e5e169a3bea8a93f
7
+ data.tar.gz: 17b793284e7480b693f980d0b5f03e8ec1e4c606cabef3b8bd9728ad91029f7279b430bb4f6ea820347f45a7eb8cac02f82f9704fdde793ccd1028b7b86e3d18
data/firecrawl.gemspec CHANGED
@@ -1,7 +1,9 @@
1
+ require_relative 'lib/firecrawl/version'
2
+
1
3
  Gem::Specification.new do | spec |
2
4
 
3
5
  spec.name = 'firecrawl'
4
- spec.version = '0.2.0'
6
+ spec.version = Firecrawl::VERSION
5
7
  spec.authors = [ 'Kristoph Cichocki-Romanov' ]
6
8
  spec.email = [ 'rubygems.org@kristoph.net' ]
7
9
 
@@ -28,8 +30,8 @@ Gem::Specification.new do | spec |
28
30
  spec.files = Dir[ "lib/**/*.rb", "LICENSE", "README.md", "firecrawl.gemspec" ]
29
31
  spec.require_paths = [ "lib" ]
30
32
 
31
- spec.add_runtime_dependency 'faraday', '~> 2.7'
32
- spec.add_runtime_dependency 'dynamicschema', '~> 1.0.0.beta04'
33
+ spec.add_runtime_dependency 'faraday', '~> 2'
34
+ spec.add_runtime_dependency 'dynamicschema', '~> 2'
33
35
 
34
36
  spec.add_development_dependency 'rspec', '~> 3.13'
35
37
  spec.add_development_dependency 'debug', '~> 1.9'
@@ -0,0 +1,67 @@
1
+ module Firecrawl
2
+ class BatchScrapeOptions
3
+ include DynamicSchema::Definable
4
+ include Helpers
5
+
6
+ schema do
7
+ proxy Symbol, in: [ :basic, :stealth, :auto ]
8
+ skip_tls_verification [ TrueClass, FalseClass ], as: :skipTlsVerification
9
+ mobile [ TrueClass, FalseClass ]
10
+ location arguments: :country do
11
+ country String, required: true # two digit country code
12
+ languages String, array: true #en-US jp etc
13
+ end
14
+ max_age Integer, as: :maxAge
15
+ headers Hash
16
+
17
+ only_main_content [ TrueClass, FalseClass ], as: :onlyMainContent
18
+ include_tags String, as: :includeTags, array: true
19
+ exclude_tags String, as: :excludeTags, array: true
20
+ remove_base64_images [ TrueClass, FalseClass ], as: :removeBase64Images
21
+ block_ads [ TrueClass, FalseClass ], as: :blockAds
22
+
23
+ wait_for Integer, as: :waitFor
24
+ timeout Integer
25
+ parsers Symbol, array: true, in: [ :pdf ]
26
+
27
+ formats Symbol, in: ScrapeOptions::FORMATS, array: true
28
+ screenshot do
29
+ type String, default: 'screenshot'
30
+ full_page [ TrueClass, FalseClass ], as: :fullPage
31
+ quality Integer, in: 0..100
32
+ viewport do
33
+ height Integer, required: true
34
+ width Integer, required: true
35
+ end
36
+ end
37
+
38
+ webhook do
39
+ url URI, required: true
40
+ headers Hash
41
+ metadata Hash
42
+ events Symbol, array: true, in: [ :completed, :page, :failed, :started ]
43
+ end
44
+
45
+ cache [ TrueClass, FalseClass ], as: :storeInCache
46
+ zero_data_retention [ TrueClass, FalseClass ], as: :zeroDataRetention
47
+ end
48
+
49
+ def self.build( options = nil, &block )
50
+ new( api_options: builder.build( options, &block ) )
51
+ end
52
+
53
+ def self.build!( options = nil, &block )
54
+ new( api_options: builder.build!( options, &block ) )
55
+ end
56
+
57
+ def initialize( options = nil, api_options: nil )
58
+ @options = self.class.builder.build( options || {} )
59
+ @options = api_options.merge( @options ) if api_options
60
+ end
61
+
62
+ def to_h
63
+ @options.to_h
64
+ end
65
+
66
+ end
67
+ end
@@ -54,7 +54,7 @@ module Firecrawl
54
54
  def submit( urls, options = nil, &block )
55
55
  if options
56
56
  options = options.is_a?( ScrapeOptions ) ? options : ScrapeOptions.build( options.to_h )
57
- options = options.to_h
57
+ options = ScrapeOptions.normalize_options( options.to_h )
58
58
  else
59
59
  options = {}
60
60
  end
@@ -63,8 +63,7 @@ module Firecrawl
63
63
  result = nil
64
64
  attributes = JSON.parse( response.body, symbolize_names: true ) rescue nil
65
65
  if response.success?
66
- attributes ||= { success: false, status: :failed }
67
- result = BatchScrapeResult.new( attributes[ :success ], attributes )
66
+ result = BatchScrapeResult.new( attributes )
68
67
  else
69
68
  result = ErrorResult.new( response.status, attributes || {} )
70
69
  end
@@ -95,7 +94,7 @@ module Firecrawl
95
94
  attributes = JSON.parse( response.body, symbolize_names: true ) rescue nil
96
95
  if response.success?
97
96
  attributes ||= { success: false, status: :failed }
98
- result = batch_result.merge( attributes )
97
+ result = batch_result.merge_attributes( attributes )
99
98
  else
100
99
  result = ErrorResult.new( response.status, attributes || {} )
101
100
  end
@@ -131,7 +130,7 @@ module Firecrawl
131
130
  # the next url should not be set by this method so that retrieve and retrieve_all do
132
131
  # not impact each other
133
132
  attributes.delete( :next )
134
- result = batch_result.merge( attributes )
133
+ result = batch_result.merge_attributes( attributes )
135
134
  else
136
135
  result = ErrorResult.new( response.status, attributes || {} )
137
136
  end
@@ -1,63 +1,38 @@
1
1
  module Firecrawl
2
- class BatchScrapeResult
3
2
 
4
- def initialize( success, attributes )
5
- @success = success
6
- @attributes = attributes || {}
7
- end
8
-
9
- def success?
10
- @success || false
11
- end
3
+ BatchScrapeResultSchema = DynamicSchema::Struct.define do
4
+ success [ TrueClass, FalseClass ]
5
+ id String
6
+ invalid_urls String, array: true
12
7
 
13
- def status
14
- # the initial Firecrawl response does not have a status so we synthesize a 'scraping'
15
- # status if the operation was otherwise successful
16
- @attributes[ :status ]&.to_sym || ( @success ? :scraping : :failed )
17
- end
8
+ total Integer
9
+ completed Integer
10
+ credits_used Integer, as: :creditsUsed
18
11
 
19
- def status?( status )
20
- self.status == status
21
- end
22
-
23
- def id
24
- @attributes[ :id ]
25
- end
12
+ url String
13
+ _value :next, type: String
14
+
15
+ expires_at Date, as: :expiresAt
16
+ data ScrapeResultData, array: true, default: []
17
+ end
26
18
 
27
- def total
28
- @attributes[ :total ] || 0
29
- end
19
+ class BatchScrapeResult < BatchScrapeResultSchema
20
+ extend Forwardable
30
21
 
31
- def completed
32
- @attributes[ :completed ] || 0
33
- end
22
+ def_delegators :data,
23
+ :[], :[]=, :<<, :push, :pop, :shift, :unshift,
24
+ :length, :size, :empty?, :each
34
25
 
35
- def credits_used
36
- @attributes[ :creditsUsed ] || 0
37
- end
26
+ def scraping?() = !!self.next_url
27
+
28
+ def next_url() = self.next || self.url
38
29
 
39
- def expires_at
40
- Date.parse( @attributes[ :expiresAt ] ) rescue nil
30
+ def merge_attributes( attributes )
31
+ new_attributes = self.to_h.merge( attributes )
32
+ data = attributes[ :data ]
33
+ new_attributes[ :data ] = self.data.concat( data ) if data
34
+ self.class.new( attributes )
41
35
  end
42
-
43
- def url
44
- @attributes[ :url ]
45
- end
46
-
47
- def next_url
48
- @attributes[ :next ] || @attributes[ :url ]
49
- end
50
-
51
- def scrape_results
52
- success = @attributes[ :success ]
53
- # note the &.compact is here because I've noted null entries in the data
54
- ( @attributes[ :data ]&.compact || [] ).map do | attr |
55
- ScrapeResult.new( success, attr )
56
- end
57
- end
58
-
59
- def merge( attributes )
60
- self.class.new( attributes[ :success ], @attributes.merge( attributes ) )
61
- end
62
36
  end
37
+
63
38
  end
@@ -6,12 +6,23 @@ module Firecrawl
6
6
  schema do
7
7
  exclude_paths String, as: :excludePaths, array: true
8
8
  include_paths String, as: :includePaths, array: true
9
- maximum_depth Integer, as: :maxDepth
10
- ignore_sitemap [ TrueClass, FalseClass ], as: :ignoreSitemap
9
+ maximum_depth Integer, as: :maxDiscoveryDepth
10
+ sitemap Symbol, in: [ :skip, :include ]
11
11
  limit Integer, in: (0..)
12
- allow_backward_links [ TrueClass, FalseClass ], as: :allowBackwardLinks
12
+ ignore_query_parameters [ TrueClass, FalseClass ], as: :ignoreQueryParameters
13
+ crawl_entire_domain [ TrueClass, FalseClass ], as: :crawlEntireDomain
13
14
  allow_external_links [ TrueClass, FalseClass ], as: :allowExternalLinks
14
- webhook_uri URI, as: :webhook
15
+ allow_subdomains String, as: :allowSubdomains
16
+ delay Integer, in: (0..)
17
+ max_concurency Integer, in: (0..), as: :maxConcurrency
18
+
19
+ webhook do
20
+ url URI, required: true
21
+ headers Hash
22
+ metadata Hash
23
+ events Symbol, array: true, in: [ :completed, :page, :failed, :started ]
24
+ end
25
+
15
26
  scrape_options as: :scrapeOptions, &ScrapeOptions.schema
16
27
  end
17
28
 
@@ -26,11 +37,6 @@ module Firecrawl
26
37
  def initialize( options = nil, api_options: nil )
27
38
  @options = self.class.builder.build( options || {} )
28
39
  @options = api_options.merge( @options ) if api_options
29
-
30
- scrape_options = @options[ :scrapeOptions ]
31
- if scrape_options
32
- scrape_options[ :formats ]&.map! { | format | string_camelize( format.to_s ) }
33
- end
34
40
  end
35
41
 
36
42
  def to_h
@@ -57,6 +57,11 @@ module Firecrawl
57
57
  if options
58
58
  options = options.is_a?( CrawlOptions ) ? options : CrawlOptions.build( options.to_h )
59
59
  options = options.to_h
60
+
61
+ scrape_options = options[ :scrapeOptions ]
62
+ if scrape_options
63
+ options[ :scrapeOptions ] = ScrapeOptions.normalize_options( scrape_options )
64
+ end
60
65
  else
61
66
  options = {}
62
67
  end
@@ -65,8 +70,7 @@ module Firecrawl
65
70
  result = nil
66
71
  attributes = JSON.parse( response.body, symbolize_names: true ) rescue nil
67
72
  if response.success?
68
- attributes ||= { success: false, status: :failed }
69
- result = CrawlResult.new( attributes[ :success ], attributes )
73
+ result = CrawlResult.new( attributes )
70
74
  else
71
75
  result = ErrorResult.new( response.status, attributes )
72
76
  end
@@ -96,7 +100,7 @@ module Firecrawl
96
100
  result = nil
97
101
  attributes = JSON.parse( response.body, symbolize_names: true ) rescue nil
98
102
  if response.success?
99
- result = crawl_result.merge( attributes || { success: false, status: :failed } )
103
+ result = crawl_result.merge_attributes( attributes || { success: false, status: :failed } )
100
104
  else
101
105
  result = ErrorResult.new( response.status, attributes || {} )
102
106
  end
@@ -123,7 +127,7 @@ module Firecrawl
123
127
  result = nil
124
128
  attributes = JSON.parse( response.body, symbolize_names: true ) rescue nil
125
129
  if response.success?
126
- result = crawl_result.merge( attributes || { success: false, status: :failed } )
130
+ result = crawl_result.merge_attributes( attributes || { success: false, status: :failed } )
127
131
  else
128
132
  result = ErrorResult.new( response.status, attributes || {} )
129
133
  end
@@ -1,63 +1,33 @@
1
1
  module Firecrawl
2
- class CrawlResult
3
-
4
- def initialize( success, attributes )
5
- @success = success
6
- @attributes = attributes || {}
7
- end
8
-
9
- def success?
10
- @success || false
11
- end
12
-
13
- def status
14
- # the initial Firecrawl response does not have a status so we synthesize a 'crawling'
15
- # status if the operation was otherwise successful
16
- @attributes[ :status ]&.to_sym || ( @success ? :scraping : :failed )
17
- end
18
-
19
- def status?( status )
20
- self.status == status
21
- end
2
+ CrawlResultSchema = DynamicSchema::Struct.define do
3
+ success [ TrueClass, FalseClass ]
4
+ id String
5
+ total Integer
6
+ completed Integer
7
+ credits_used Integer, as: :creditsUsed
8
+ url String
9
+ _value :next, type: String
10
+ expires_at Date, as: :expiresAt
11
+ data ScrapeResultData, array: true, default: []
12
+ end
22
13
 
23
- def id
24
- @attributes[ :id ]
25
- end
14
+ class CrawlResult < CrawlResultSchema
15
+ extend Forwardable
26
16
 
27
- def total
28
- @attributes[ :total ] || 0
29
- end
17
+ def_delegators :data,
18
+ :[], :[]=, :<<, :push, :pop, :shift, :unshift,
19
+ :length, :size, :empty?, :each
30
20
 
31
- def completed
32
- @attributes[ :completed ] || 0
33
- end
21
+ def success?() = self.success
22
+ def crawling?() = !!self.next_url
34
23
 
35
- def credits_used
36
- @attributes[ :creditsUsed ] || 0
37
- end
24
+ def next_url() = self.next || self.url
38
25
 
39
- def expires_at
40
- Date.parse( @attributes[ :expiresAt ] ) rescue nil
26
+ def merge_attributes( attributes )
27
+ new_attributes = self.to_h.merge( attributes )
28
+ data = attributes[ :data ]
29
+ new_attributes[ :data ] = self.data.concat( data ) if data
30
+ self.class.new( attributes )
41
31
  end
42
-
43
- def url
44
- @attributes[ :url ]
45
- end
46
-
47
- def next_url
48
- @attributes[ :next ] || @attributes[ :url ]
49
- end
50
-
51
- def scrape_results
52
- success = @attributes[ :success ]
53
- # note the &.compact is here because I've noted null entries in the data
54
- ( @attributes[ :data ]&.compact || [] ).map do | attr |
55
- ScrapeResult.new( success, attr )
56
- end
57
- end
58
-
59
- def merge( attributes )
60
- self.class.new( attributes[ :success ], @attributes.merge( attributes ) )
61
- end
62
32
  end
63
33
  end
@@ -1,30 +1,27 @@
1
1
  module Firecrawl
2
2
  class MapOptions
3
3
  include DynamicSchema::Definable
4
+ include DynamicSchema::Buildable
4
5
 
5
6
  schema do
6
- search String
7
- ignore_sitemap [ TrueClass, FalseClass ]
8
- ignore_subdomains [ TrueClass, FalseClass ]
9
- limit Integer
7
+ search String
8
+ sitemap Symbol, in: [ :skip, :include ]
9
+ include_subdomains [ TrueClass, FalseClass ], as: :includeSubdomains
10
+ ignore_query_parameters [ TrueClass, FalseClass ], as: :ignoreQueryParameters
11
+ limit Integer, in: (0..)
12
+ timeout Integer, in: (0..)
13
+ location arguments: :country do
14
+ country String, required: true # two digit country code
15
+ languages String, array: true #en-US jp etc
16
+ end
10
17
  end
11
18
 
12
- def self.build( options = nil, &block )
13
- new( api_options: builder.build( options, &block ) )
14
- end
15
-
16
- def self.build!( options = nil, &block )
17
- new( api_options: builder.build!( options, &block ) )
18
- end
19
-
20
19
  def initialize( options = {}, api_options: nil )
21
20
  @options = self.class.builder.build( options || {} )
22
21
  @options = api_options.merge( @options ) if api_options
23
22
  end
24
23
 
25
- def to_h
26
- @options.to_h
27
- end
24
+ def to_h() = @options.to_h
28
25
 
29
26
  end
30
27
  end
@@ -45,8 +45,7 @@ module Firecrawl
45
45
  result = nil
46
46
  if response.success?
47
47
  attributes = ( JSON.parse( response.body, symbolize_names: true ) rescue nil )
48
- attributes ||= { success: false }
49
- result = MapResult.new( attributes[ :success ], attributes )
48
+ result = MapResult.new( attributes )
50
49
  else
51
50
  result = ErrorResult.new( response.status, attributes )
52
51
  end
@@ -1,29 +1,21 @@
1
1
  module Firecrawl
2
- class MapResult
3
2
 
4
- def initialize( success, attributes )
5
- @success = success
6
- @attributes = attributes
7
- end
8
-
9
- ##
10
- # The +success?+ method returns +true+ if the scraping was successful.
11
- #
12
- # Note that the response +success?+ tells you if the call to the Firecrawl api was successful
13
- # while this +success?+ method tells you if the actual scraping operation was successful.
14
- #
15
- def success?
16
- @success || false
17
- end
18
-
19
- ##
20
- # The +links+ method returns an array of the links that were scraped from the the page.
21
- # The +links+ are empty unless the request options +formats+ included +links+.
22
- #
23
- def links
24
- @attributes[ :links ] || []
25
- end
3
+ MapResultSchema = DynamicSchema::Struct.define do
4
+ success [ TrueClass, FalseClass ]
5
+ links array: true do
6
+ url String
7
+ title String
8
+ description String
9
+ end
10
+ end
26
11
 
12
+ class MapResult < MapResultSchema
13
+ extend Forwardable
14
+ def_delegators :links,
15
+ :[], :[]=, :<<, :push, :pop, :shift, :unshift,
16
+ :length, :size, :empty?, :each
17
+ def success?() = success
27
18
  end
19
+
28
20
  end
29
21
 
@@ -28,7 +28,7 @@ module Firecrawl
28
28
  #
29
29
  class Request
30
30
 
31
- BASE_URI = 'https://api.firecrawl.dev/v1'
31
+ BASE_URI = 'https://api.firecrawl.dev/v2'
32
32
 
33
33
  ##
34
34
  # The +initialize+ method initializes the +Request+ instance. You MUST pass an +api_key+ and
@@ -3,24 +3,42 @@ module Firecrawl
3
3
  include DynamicSchema::Definable
4
4
  include Helpers
5
5
 
6
- FORMATS = [ :markdown, :links, :html, :raw_html, :screenshot, :"screenshot@full_page" ]
6
+ FORMATS = [ :summary, :markdown, :html, :raw_html, :links, :screenshot ]
7
7
 
8
8
  ACTIONS = [ :wait, :click, :write, :press, :screenshot, :scrape ]
9
9
 
10
10
  schema do
11
- # note: both format and formats are defined as a semantic convenience
12
- format String, as: :formats, array: true, in: FORMATS
13
- formats String, array: true, in: FORMATS
11
+
12
+ proxy Symbol, in: [ :basic, :stealth, :auto ]
13
+ skip_tls_verification [ TrueClass, FalseClass ], as: :skipTlsVerification
14
+ mobile [ TrueClass, FalseClass ]
15
+ location arguments: :country do
16
+ country String, required: true # two digit country code
17
+ languages String, array: true #en-US jp etc
18
+ end
19
+ max_age Integer, as: :maxAge
20
+
21
+ headers Hash
22
+
14
23
  only_main_content [ TrueClass, FalseClass ], as: :onlyMainContent
15
24
  include_tags String, as: :includeTags, array: true
16
25
  exclude_tags String, as: :excludeTags, array: true
17
- wait_for Integer
26
+ wait_for Integer, as: :waitFor
18
27
  timeout Integer
19
- extract do
20
- schema Hash
21
- system_prompt String, as: :systemPrompt
22
- prompt String
23
- end
28
+ parsers Symbol, array: true, in: [ :pdf ]
29
+
30
+ formats Symbol, in: FORMATS, array: true
31
+ screenshot do
32
+ type String, default: 'screenshot'
33
+ full_page [ TrueClass, FalseClass ], as: :fullPage
34
+ quality Integer, in: 0..100
35
+ viewport do
36
+ height Integer, required: true
37
+ width Integer, required: true
38
+ end
39
+ end
40
+
41
+
24
42
  action as: :actions, arguments: :type, array: true do
25
43
  type Symbol, required: true, in: ACTIONS
26
44
  # wait
@@ -32,6 +50,9 @@ module Firecrawl
32
50
  # press
33
51
  key String
34
52
  end
53
+
54
+ cache [ TrueClass, FalseClass ], as: :storeInCache
55
+ zero_retention [ TrueClass, FalseClass ], as: :zeroDataRetention
35
56
  end
36
57
 
37
58
  def self.build( options = nil, &block )
@@ -42,10 +63,21 @@ module Firecrawl
42
63
  new( api_options: builder.build!( options, &block ) )
43
64
  end
44
65
 
66
+ def self.normalize_options( options )
67
+ options = options&.dup || {}
68
+ screenshot = options.delete( :screenshot )
69
+ if screenshot
70
+ formats = options[ :formats ] || []
71
+ formats.delete( :screenshot )
72
+ formats << screenshot
73
+ options[ :formats ] = formats
74
+ end
75
+ options
76
+ end
77
+
45
78
  def initialize( options = {}, api_options: nil )
46
79
  @options = self.class.builder.build( options || {} )
47
80
  @options = api_options.merge( @options ) if api_options
48
- @options[ :formats ]&.map! { | format | string_camelize( format.to_s ) }
49
81
  end
50
82
 
51
83
  def to_h
@@ -36,8 +36,8 @@ module Firecrawl
36
36
  #
37
37
  def submit( url, options = nil, &block )
38
38
  if options
39
- options = options.is_a?( ScrapeOptions ) ? options : ScrapeOptions.build( options.to_h )
40
- options = options.to_h
39
+ options = options.is_a?( ScrapeOptions ) ? options : ScrapeOptions.build!( options.to_h )
40
+ options = ScrapeOptions.normalize_options( options.to_h )
41
41
  else
42
42
  options = {}
43
43
  end
@@ -47,8 +47,7 @@ module Firecrawl
47
47
  result = nil
48
48
  if response.success?
49
49
  attributes = ( JSON.parse( response.body, symbolize_names: true ) rescue nil )
50
- attributes ||= { success: false }
51
- result = ScrapeResult.new( attributes[ :success ], attributes[ :data ] )
50
+ result = ScrapeResult.new( attributes )
52
51
  else
53
52
  result = ErrorResult.new( response.status, attributes )
54
53
  end
@@ -56,5 +55,7 @@ module Firecrawl
56
55
  ResponseMethods.install( response, result )
57
56
  end
58
57
 
58
+ private
59
+
59
60
  end
60
61
  end
@@ -1,92 +1,47 @@
1
1
  module Firecrawl
2
- class ScrapeResult
3
2
 
4
- def initialize( success, attributes )
5
- @success = success
6
- @attributes = attributes || {}
7
- end
8
-
9
- ##
10
- # The +success?+ method returns +true+ if the scraping was successful.
11
- #
12
- # Note that the response +success?+ tells you if the call to the Firecrawl api was successful
13
- # while this +success?+ method tells you if the actual scraping operation was successful.
14
- #
15
- def success?
16
- @success || false
17
- end
18
-
19
- def metadata
20
- unless @metadata
21
- metadata = @attributes[ :metadata ] || {}
22
- @metadata = metadata.transform_keys do | key |
23
- key.to_s.gsub( /([a-z])([A-Z])/, '\1_\2' ).downcase
24
- end
25
- # remove the camelCase forms injected by Firecrawl
26
- @metadata.delete_if do | key, _ |
27
- key.start_with?( 'og_' ) && @metadata.key?( key.sub( 'og_', 'og:' ) )
28
- end
29
- end
30
- @metadata
31
- end
32
-
33
- ##
34
- # The +markdown+ method returns scraped content that has been converted to markdown. The
35
- # markdown content is present only if the request options +formats+ included +markdown+.
36
- #
37
- def markdown
38
- @attributes[ :markdown ]
39
- end
3
+ ScrapeResultDataSchema = DynamicSchema::Struct.define do
4
+ metadata Hash
40
5
 
41
- ##
42
- # The +html+ method returns scraped html content. The html content is present only if the
43
- # request options +formats+ included +html+.
44
- #
45
- def html
46
- @attributes[ :html ]
47
- end
6
+ markdown String
7
+ html String
8
+ raw_html String, as: :rawHtml
48
9
 
49
- ##
50
- # The +raw_html+ method returns the full scraped html content of the page. The raw html
51
- # content is present only if the request options +formats+ included +raw_html+.
52
- #
53
- def raw_html
54
- @attributes[ :rawHtml ]
55
- end
10
+ screenshot_url String, as: :screenshot
11
+ links String, array: true
12
+ actions Hash, default: {}
56
13
 
57
- ##
58
- # The +screenshot_url+ method returns the url of the screenshot of the requested page. The
59
- # screenshot url is present only if the request options +formats+ included +screenshot+ or
60
- # +screenshot@full_page+.
61
- #
62
- def screenshot_url
63
- @attributes[ :screenshot ]
64
- end
65
-
66
- ##
67
- # The +links+ method returns an array of the links that were scraped from the the page.
68
- # The +links+ are empty unless the request options +formats+ included +links+.
69
- #
70
- def links
71
- @attributes[ :links ] || []
72
- end
14
+ warning String
15
+ end
73
16
 
74
- ##
75
- # The +actions+ method returns an object of action results ( +scrapes+ or +screenshots+ ).
76
- # The +actions+ are empty unless the request options included +scrape+ or +scresshot+
77
- # actions.
78
- #
79
- def actions
80
- @attributes[ :actions ] || {}
17
+ class ScrapeResultData < ScrapeResultDataSchema
18
+ def metadata() = @metadata ||= normalize_metadata( super )
19
+ private
20
+ def normalize_metadata( metadata )
21
+ return nil unless metadata
22
+ metadata = metadata.transform_keys do | key |
23
+ key.to_s.gsub( /([a-z])([A-Z])/, '\1_\2' ).downcase
24
+ end
25
+ # remove the camelCase forms injected by Firecrawl
26
+ metadata.delete_if do | key, _ |
27
+ key.start_with?( 'og_' ) && metadata.key?( key.sub( 'og_', 'og:' ) )
28
+ end
29
+ metadata
81
30
  end
31
+ end
82
32
 
83
- def llm_extraction
84
- @attributes[ :llm_extraction ] || {}
85
- end
33
+ ScrapeResultSchema = DynamicSchema::Struct.define do
34
+ success [ TrueClass, FalseClass ]
35
+ data ScrapeResultData
36
+ end
86
37
 
87
- def warning
88
- @attributes[ :warning ]
89
- end
38
+ class ScrapeResult < ScrapeResultSchema
39
+ extend Forwardable
40
+ def success?() = self.success
41
+ def_delegators :data,
42
+ :metadata, :warning,
43
+ :markdown, :html, :raw_html,
44
+ :screenshot_url, :links, :actions
90
45
 
91
46
  end
92
47
  end
@@ -0,0 +1,3 @@
1
+ module Firecrawl
2
+ VERSION = '0.3.0'
3
+ end
data/lib/firecrawl.rb CHANGED
@@ -5,6 +5,8 @@ require 'uri'
5
5
  require 'faraday'
6
6
  require 'dynamic_schema'
7
7
 
8
+ require_relative 'firecrawl/version'
9
+
8
10
  require_relative 'firecrawl/helpers'
9
11
  require_relative 'firecrawl/error_result'
10
12
  require_relative 'firecrawl/request'
@@ -14,6 +16,7 @@ require_relative 'firecrawl/scrape_options'
14
16
  require_relative 'firecrawl/scrape_result'
15
17
  require_relative 'firecrawl/scrape_request'
16
18
  require_relative 'firecrawl/batch_scrape_result'
19
+ require_relative 'firecrawl/batch_scrape_options'
17
20
  require_relative 'firecrawl/batch_scrape_request'
18
21
  require_relative 'firecrawl/map_options'
19
22
  require_relative 'firecrawl/map_result'
@@ -28,4 +31,3 @@ module Firecrawl
28
31
  extend ModuleMethods
29
32
  end
30
33
 
31
-
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: firecrawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kristoph Cichocki-Romanov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-11-29 00:00:00.000000000 Z
11
+ date: 2025-09-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday
@@ -16,28 +16,28 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '2.7'
19
+ version: '2'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '2.7'
26
+ version: '2'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: dynamicschema
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 1.0.0.beta04
33
+ version: '2'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 1.0.0.beta04
40
+ version: '2'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rspec
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -94,6 +94,7 @@ files:
94
94
  - README.md
95
95
  - firecrawl.gemspec
96
96
  - lib/firecrawl.rb
97
+ - lib/firecrawl/batch_scrape_options.rb
97
98
  - lib/firecrawl/batch_scrape_request.rb
98
99
  - lib/firecrawl/batch_scrape_result.rb
99
100
  - lib/firecrawl/crawl_options.rb
@@ -110,6 +111,7 @@ files:
110
111
  - lib/firecrawl/scrape_options.rb
111
112
  - lib/firecrawl/scrape_request.rb
112
113
  - lib/firecrawl/scrape_result.rb
114
+ - lib/firecrawl/version.rb
113
115
  homepage: https://github.com/EndlessInternational/firecrawl
114
116
  licenses:
115
117
  - MIT
@@ -131,7 +133,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
131
133
  - !ruby/object:Gem::Version
132
134
  version: '0'
133
135
  requirements: []
134
- rubygems_version: 3.5.19
136
+ rubygems_version: 3.5.22
135
137
  signing_key:
136
138
  specification_version: 4
137
139
  summary: The Firecrawl gem implements a lightweight interface to the Firecrawl.dev