crawlbase 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 50c42144b472e240907828a2656215a7f1e5004f07a3288d3eafca184b4dc16c
4
- data.tar.gz: fdbbd3ebe2a64ecde61e34b94bd5716574d04d5f1bffb5e1a9225f6500b0a793
3
+ metadata.gz: 6a1ccd286be2a6973c23cc8c9dc84a292af4d49910995054312a68e9beca6761
4
+ data.tar.gz: 0da3a67ba36a2151d6b982f7a9dc961adae4a1a58f0aac311d089e2dfd99cce9
5
5
  SHA512:
6
- metadata.gz: c29927980f6cf82b431c7385e78429802916ebe188a40e789ca17e1c5a63d7183f4f8b5b30d8b9b7bd42c84fa6f1223eda48703cf317395e359a422a88cddfee
7
- data.tar.gz: db7cf49a5dc174920a76bb35fecb92097b61025e3b2f7fb372a7143282fba93edd748763020001b0cdd928da4500cfe1a948e8123a2232a614cbfb399f3a4843
6
+ metadata.gz: 4d4b9ce783a5fa43cb0dab14550f01f2fcaed958b31bceeab91a9540456361a226f18fb41e11ae874f7d20862ba49d62088ab492b44c8a9b73c96c5812b8d4f3
7
+ data.tar.gz: 39ce7ce6996726bb7a3043277b1170fbc06b05f4cebf2903dc1db242bb4ae865de7a5a7a5eaa5af9f7861fcbccbfff92cce8227f5b2ed7df284c47d4c9f7cbe0
data/README.md CHANGED
@@ -34,6 +34,12 @@ You can get a token for free by [creating a Crawlbase account](https://crawlbase
34
34
  api = Crawlbase::API.new(token: 'YOUR_TOKEN')
35
35
  ```
36
36
 
37
+ By default, the timeout for API requests is set to 90 seconds. You can configure a custom timeout by passing a `timeout` option during initialization.
38
+
39
+ ```ruby
40
+ api = Crawlbase::API.new(token: 'YOUR_TOKEN', timeout: 120)
41
+ ```
42
+
37
43
  ### GET requests
38
44
 
39
45
  Pass the url that you want to scrape plus any options from the ones available in the [API documentation](https://crawlbase.com/dashboard/docs).
@@ -103,7 +109,7 @@ puts response.body
103
109
  If you need to scrape any website built with Javascript like React, Angular, Vue, etc. You just need to pass your javascript token and use the same calls. Note that only `.get` is available for javascript and not `.post`.
104
110
 
105
111
  ```ruby
106
- api = Crawlbase::API.new(token: 'YOUR_JAVASCRIPT_TOKEN' })
112
+ api = Crawlbase::API.new(token: 'YOUR_JAVASCRIPT_TOKEN')
107
113
  ```
108
114
 
109
115
  ```ruby
@@ -357,8 +363,8 @@ The gem is available as open source under the terms of the [MIT License](http://
357
363
 
358
364
  ## Code of Conduct
359
365
 
360
- Everyone interacting in the Crawlbase projects codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/crawlbase-source/crawlbase-ruby/blob/master/CODE_OF_CONDUCT.md).
366
+ Everyone interacting in the Crawlbase project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/crawlbase-source/crawlbase-ruby/blob/master/CODE_OF_CONDUCT.md).
361
367
 
362
368
  ---
363
369
 
364
- Copyright 2023 Crawlbase
370
+ Copyright 2025 Crawlbase
data/crawlbase.gemspec CHANGED
@@ -28,4 +28,5 @@ Gem::Specification.new do |spec|
28
28
  spec.add_development_dependency "webmock", "~> 3.4"
29
29
  spec.add_development_dependency "bundler", "~> 2.0"
30
30
  spec.add_development_dependency "rake", "~> 12.3.3"
31
+ spec.add_development_dependency "rexml", "~> 3.2"
31
32
  end
data/lib/crawlbase/api.rb CHANGED
@@ -6,23 +6,26 @@ require 'uri'
6
6
 
7
7
  module Crawlbase
8
8
  class API
9
- attr_reader :token, :body, :status_code, :original_status, :pc_status, :url, :storage_url
9
+ attr_reader :token, :body, :status_code, :original_status, :pc_status, :url, :storage_url, :timeout
10
10
 
11
11
  INVALID_TOKEN = 'Token is required'
12
12
  INVALID_URL = 'URL is required'
13
+ DEFAULT_TIMEOUT = 90
13
14
 
14
15
  def initialize(options = {})
15
16
  raise INVALID_TOKEN if options[:token].nil?
16
17
 
17
18
  @token = options[:token]
19
+ @timeout = options.fetch(:timeout, DEFAULT_TIMEOUT)
18
20
  end
19
21
 
20
22
  def get(url, options = {})
21
23
  raise INVALID_URL if url.empty?
22
24
 
23
25
  uri = prepare_uri(url, options)
24
-
25
- response = Net::HTTP.get_response(uri)
26
+ http = build_http(uri)
27
+ request = Net::HTTP::Get.new(uri.request_uri)
28
+ response = http.request(request)
26
29
 
27
30
  prepare_response(response, options[:format])
28
31
 
@@ -33,10 +36,7 @@ module Crawlbase
33
36
  raise INVALID_URL if url.empty?
34
37
 
35
38
  uri = prepare_uri(url, options)
36
-
37
- http = Net::HTTP.new(uri.host, uri.port)
38
-
39
- http.use_ssl = true
39
+ http = build_http(uri)
40
40
 
41
41
  content_type = options[:post_content_type].to_s.include?('json') ? { 'Content-Type': 'text/json' } : nil
42
42
 
@@ -57,6 +57,14 @@ module Crawlbase
57
57
 
58
58
  private
59
59
 
60
+ def build_http(uri)
61
+ http = Net::HTTP.new(uri.host, uri.port)
62
+ http.use_ssl = true
63
+ http.open_timeout = @timeout
64
+ http.read_timeout = @timeout
65
+ http
66
+ end
67
+
60
68
  def base_url
61
69
  'https://api.crawlbase.com'
62
70
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Crawlbase
4
- VERSION = '1.0.0'
4
+ VERSION = '1.1.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crawlbase
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - crawlbase
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-06-29 00:00:00.000000000 Z
11
+ date: 2025-06-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: 12.3.3
69
+ - !ruby/object:Gem::Dependency
70
+ name: rexml
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '3.2'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '3.2'
69
83
  description: Ruby based client for the Crawlbase API that helps developers crawl or
70
84
  scrape thousands of web pages anonymously
71
85
  email:
@@ -94,7 +108,7 @@ homepage: https://github.com/crawlbase-source/crawlbase-ruby
94
108
  licenses:
95
109
  - MIT
96
110
  metadata: {}
97
- post_install_message:
111
+ post_install_message:
98
112
  rdoc_options: []
99
113
  require_paths:
100
114
  - lib
@@ -109,8 +123,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
109
123
  - !ruby/object:Gem::Version
110
124
  version: '0'
111
125
  requirements: []
112
- rubygems_version: 3.1.2
113
- signing_key:
126
+ rubygems_version: 3.6.0.dev
127
+ signing_key:
114
128
  specification_version: 4
115
129
  summary: Crawlbase API client for web scraping and crawling
116
130
  test_files: []