scraper-central-ruby 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/pull-request-template.md +24 -0
- data/Gemfile.lock +32 -1
- data/README.md +23 -2
- data/lib/auth.rb +46 -0
- data/lib/cache.rb +13 -0
- data/lib/cache_server.rb +38 -13
- data/lib/proxy/base.rb +28 -2
- data/lib/proxy/crawl_base.rb +3 -11
- data/lib/proxy/scraper_api.rb +3 -12
- data/lib/scraper_central/version.rb +1 -1
- data/lib/scraper_central.rb +21 -10
- data/scraper-central-ruby.gemspec +6 -0
- metadata +47 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4a17d4043d0b53c2a19fe4629d316b3fcc3cb04c4bfd640555449121d13c034b
|
4
|
+
data.tar.gz: 8c98a99a53644ebf9e19e337b15b25c2e5887c85243f9b3c4fd2a949bf2acafa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e3426ffc94e2fd97fa80b055a0a6c2db8ee326dbcb163c33d3007278a1aaf71def15806126bc8ce0e44ae0562075d04f273cc73fe1ceec6bcc1d71d285387088
|
7
|
+
data.tar.gz: 888fc81bf4dadbc686c7819833eb46c64f390d11474afb5ea36dcd1a4ce9dfdabd7a56f0684c26d7c0a0b39a3b2d399c57d0637b3383e6d3f1bd5f46cd96966e
|
@@ -0,0 +1,24 @@
|
|
1
|
+
## PR Details
|
2
|
+
Clickup Link -
|
3
|
+
|
4
|
+
### Description
|
5
|
+
|
6
|
+
#### Types of changes
|
7
|
+
|
8
|
+
<!--- What types of changes does your code introduce? Put an `x` in all the boxes that apply: -->
|
9
|
+
|
10
|
+
- [ ] Docs change / refactoring / dependency upgrade
|
11
|
+
- [ ] Bug fix (non-breaking change which fixes an issue)
|
12
|
+
- [ ] New feature (non-breaking change which adds functionality)
|
13
|
+
- [ ] Breaking change (fix or feature that would cause existing functionality to change)
|
14
|
+
|
15
|
+
#### Checklist
|
16
|
+
|
17
|
+
<!--- Go over all the following points, and put an `x` in all the boxes that apply. -->
|
18
|
+
<!--- If you're unsure about any of these, don't hesitate to ask. We're here to help! -->
|
19
|
+
|
20
|
+
- [ ] My code follows the code style of this project.
|
21
|
+
<!--- Ruby style guide https://github.com/rubocop/ruby-style-guide -->
|
22
|
+
<!--- Go Style Guide https://github.com/uber-go/guide/blob/master/style.md -->
|
23
|
+
- [ ] My change requires a change to the documentation and I have updated the documentation accordingly.
|
24
|
+
- [ ] I have added tests to cover my changes.
|
data/Gemfile.lock
CHANGED
@@ -1,17 +1,48 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
scraper-central-ruby (
|
4
|
+
scraper-central-ruby (2.0.0)
|
5
|
+
activesupport (~> 7.0)
|
6
|
+
brotli (~> 0.5.0)
|
5
7
|
|
6
8
|
GEM
|
7
9
|
remote: https://rubygems.org/
|
8
10
|
specs:
|
11
|
+
activesupport (7.1.5.1)
|
12
|
+
base64
|
13
|
+
benchmark (>= 0.3)
|
14
|
+
bigdecimal
|
15
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
16
|
+
connection_pool (>= 2.2.5)
|
17
|
+
drb
|
18
|
+
i18n (>= 1.6, < 2)
|
19
|
+
logger (>= 1.4.2)
|
20
|
+
minitest (>= 5.1)
|
21
|
+
mutex_m
|
22
|
+
securerandom (>= 0.3)
|
23
|
+
tzinfo (~> 2.0)
|
24
|
+
base64 (0.2.0)
|
25
|
+
benchmark (0.4.0)
|
26
|
+
bigdecimal (3.1.9)
|
27
|
+
brotli (0.5.0)
|
28
|
+
concurrent-ruby (1.3.5)
|
29
|
+
connection_pool (2.5.0)
|
30
|
+
drb (2.2.1)
|
31
|
+
i18n (1.14.7)
|
32
|
+
concurrent-ruby (~> 1.0)
|
33
|
+
logger (1.6.5)
|
34
|
+
minitest (5.25.4)
|
35
|
+
mutex_m (0.3.0)
|
9
36
|
rake (13.2.1)
|
37
|
+
securerandom (0.3.2)
|
38
|
+
tzinfo (2.0.6)
|
39
|
+
concurrent-ruby (~> 1.0)
|
10
40
|
|
11
41
|
PLATFORMS
|
12
42
|
arm64-darwin-23
|
13
43
|
|
14
44
|
DEPENDENCIES
|
45
|
+
brotli (~> 0.5.0)
|
15
46
|
bundler (~> 2.4.22)
|
16
47
|
rake (~> 13.0)
|
17
48
|
scraper-central-ruby!
|
data/README.md
CHANGED
@@ -7,7 +7,7 @@ Ruby library to scrape and cache the data
|
|
7
7
|
Add gem `scraper-central-ruby` into Gemfile:
|
8
8
|
|
9
9
|
```bash
|
10
|
-
gem 'scraper-central-ruby', git: 'git@github.com:patterninc/scraper-central-ruby.git', tag: '
|
10
|
+
gem 'scraper-central-ruby', git: 'git@github.com:patterninc/scraper-central-ruby.git', tag: 'v2.0.0'
|
11
11
|
```
|
12
12
|
|
13
13
|
```bash
|
@@ -117,6 +117,22 @@ Fetches the URL through the proxy:
|
|
117
117
|
|
118
118
|
```ruby
|
119
119
|
scraper_central = ScraperCentral.new
|
120
|
+
scraper_central.proxy_name = "BrightData"
|
121
|
+
scraper_central.cache_duration = 360
|
122
|
+
scraper_central.retry_attr = {
|
123
|
+
count: 5
|
124
|
+
}
|
125
|
+
scraper_central.s3_key = {
|
126
|
+
country: "US",
|
127
|
+
marketplace: "Amazon",
|
128
|
+
page_type: "product-question",
|
129
|
+
identifier: "B0BQZBPS4G",
|
130
|
+
page_number: 1
|
131
|
+
}
|
132
|
+
scraper_central.auth_config = {
|
133
|
+
client_id: 'client_id token',
|
134
|
+
client_secret: 'client_secret token'
|
135
|
+
}
|
120
136
|
|
121
137
|
response = scraper_central.fetch("https://example.com")
|
122
138
|
|
@@ -136,13 +152,18 @@ puts "Headers: ", response.headers
|
|
136
152
|
- `scraper_central.retry_attr=`: Configures retry logic, including the number of attempts, wait time between attempts.
|
137
153
|
- `scraper_central.timeout=`: Sets the request timeout in seconds.
|
138
154
|
- `scraper_central.tls_verify=`: Configures TLS verification.
|
139
|
-
|
155
|
+
- `scraper_central.enable_image_cache=` Enable/Disable image caching by setting true/false
|
140
156
|
### Proxy Methods
|
141
157
|
|
142
158
|
- `scraper_central.query_params=`: Sets query parameters to be appended to each request URL.
|
143
159
|
- `scraper_central.headers=`: Adds custom headers to requests. (Accept, Accept-Encoding or Content-Type).
|
144
160
|
- `scraper_central.cookies=`: Parses a JSON string of cookies and sets them for subsequent requests.
|
145
161
|
|
162
|
+
### Auth configurations method
|
163
|
+
|
164
|
+
- `scraper_central.auth_config=`: Adds client_id and client_secret to generate auth token in gem and send it in headers while requesting the scraper central apis
|
165
|
+
|
166
|
+
|
146
167
|
### Proxy Methods For S3 Key
|
147
168
|
|
148
169
|
- `scraper_central.s3_key=`: Updates the proxy's target country or S3 key structure country, Marketplace, Pre defined names for page views, e.g. `detail-page`. S3 key value for page identifer e.g. ASIN or Product ID. PageNumber if page is paginated.
|
data/lib/auth.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'uri'
|
3
|
+
require 'json'
|
4
|
+
require 'openssl'
|
5
|
+
require 'cache'
|
6
|
+
|
7
|
+
class Auth
|
8
|
+
AUTH0_TOKEN_ENDPOINT = "https://dev-pattern.auth0.com/oauth/token".freeze
|
9
|
+
AUTH0_DEFAULT_AUDIENCE = "https://dev-pattern.auth0.com/api/v2/".freeze
|
10
|
+
AUTH0_DEFAULT_GRANT_TYPE = "client_credentials".freeze
|
11
|
+
AUTH0_CACHE_KEY = 'AUTH0_CACHE_KEY'.freeze
|
12
|
+
|
13
|
+
def initialize(auth_config)
|
14
|
+
@auth_config = auth_config
|
15
|
+
end
|
16
|
+
|
17
|
+
def token_params
|
18
|
+
{
|
19
|
+
grant_type: AUTH0_DEFAULT_GRANT_TYPE,
|
20
|
+
client_id: @auth_config[:client_id],
|
21
|
+
client_secret: @auth_config[:client_secret],
|
22
|
+
audience: AUTH0_DEFAULT_AUDIENCE
|
23
|
+
}
|
24
|
+
end
|
25
|
+
|
26
|
+
def get_token
|
27
|
+
Cache.read(AUTH0_CACHE_KEY) || generate_token
|
28
|
+
end
|
29
|
+
|
30
|
+
def generate_token
|
31
|
+
url = URI(AUTH0_TOKEN_ENDPOINT)
|
32
|
+
|
33
|
+
http = Net::HTTP.new(url.host, url.port)
|
34
|
+
http.use_ssl = true
|
35
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
36
|
+
|
37
|
+
request = Net::HTTP::Post.new(url)
|
38
|
+
request["content-type"] = 'application/json'
|
39
|
+
request.body = token_params.to_json
|
40
|
+
|
41
|
+
response = http.request(request)
|
42
|
+
token = JSON.parse(response.read_body)['access_token']
|
43
|
+
Cache.write(AUTH0_CACHE_KEY, token, expires_in: 1.day)
|
44
|
+
token
|
45
|
+
end
|
46
|
+
end
|
data/lib/cache.rb
ADDED
data/lib/cache_server.rb
CHANGED
@@ -4,34 +4,46 @@ require 'net/http'
|
|
4
4
|
require 'uri'
|
5
5
|
require 'json'
|
6
6
|
require 'logger'
|
7
|
+
require 'auth'
|
7
8
|
|
8
9
|
class CacheServer
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
@
|
10
|
+
AUTH0_BEARER_HEADER = 'Authorization'.freeze
|
11
|
+
CONTENT_TYPE_HEADER = 'Content-Type'.freeze
|
12
|
+
|
13
|
+
def initialize(args)
|
14
|
+
@proxy_name = args[:proxy_name]
|
15
|
+
@enable_js = args[:enable_js]
|
16
|
+
@cache_duration = args[:cache_duration]
|
17
|
+
@s3_key = args[:s3_key]
|
18
|
+
@enable_image_cache = args[:enable_image_cache]
|
19
|
+
@auth_config = args[:auth_config]
|
14
20
|
@logger = Logger.new($stdout)
|
15
21
|
end
|
16
22
|
|
17
23
|
def get_cache(url)
|
18
24
|
payload = prepare_get_cache_payload(url)
|
19
25
|
|
20
|
-
uri = URI.parse("#{ENV['SERVER_URL_GET_CACHE']}/get-cache")
|
26
|
+
uri = URI.parse("#{ENV['SERVER_URL_GET_CACHE']}/v1/get-cache")
|
21
27
|
http = Net::HTTP.new(uri.host, uri.port)
|
22
28
|
if uri.scheme == 'https'
|
23
29
|
http.use_ssl = true
|
24
30
|
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
25
31
|
end
|
26
|
-
request = Net::HTTP::
|
32
|
+
request = Net::HTTP::Get.new(uri.request_uri, headers)
|
27
33
|
request.body = payload.to_json
|
28
34
|
|
29
35
|
begin
|
30
36
|
response = http.request(request)
|
37
|
+
|
31
38
|
if response.content_type.include?('application/json')
|
32
39
|
response_body = JSON.parse(response.body)
|
33
40
|
return '', nil, proxy_from_server(response_body) if response_body.key?('proxyUrl')
|
34
|
-
|
41
|
+
page_from_server = response_body['body']
|
42
|
+
if @enable_image_cache
|
43
|
+
decoded_data = Base64.decode64(page_from_server)
|
44
|
+
page_from_server = StringIO.new(decoded_data)
|
45
|
+
end
|
46
|
+
return page_from_server, headers_from_server(response_body), nil
|
35
47
|
else
|
36
48
|
@logger.error "Unexpected response type: #{response.content_type}, body: #{response.body}, code: #{response.code}"
|
37
49
|
end
|
@@ -41,22 +53,23 @@ class CacheServer
|
|
41
53
|
['', nil, nil]
|
42
54
|
end
|
43
55
|
|
44
|
-
def put_cache(cache_key, page, headers, cookies)
|
56
|
+
def put_cache(cache_key, page, headers, cookies, enable_image_cache)
|
45
57
|
payload = {
|
46
58
|
cacheKey: cache_key,
|
47
59
|
page: page,
|
48
60
|
headers: headers,
|
49
|
-
cookies: cookies
|
61
|
+
cookies: cookies,
|
62
|
+
enableImageCache: enable_image_cache
|
50
63
|
}
|
51
64
|
|
52
|
-
uri = URI.parse("#{ENV['SERVER_URL_PUT_CACHE']}/put-cache")
|
65
|
+
uri = URI.parse("#{ENV['SERVER_URL_PUT_CACHE']}/v1/put-cache")
|
53
66
|
http = Net::HTTP.new(uri.host, uri.port)
|
54
67
|
if uri.scheme == 'https'
|
55
68
|
http.use_ssl = true
|
56
69
|
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
57
70
|
end
|
58
71
|
|
59
|
-
request = Net::HTTP::Post.new(uri.request_uri,
|
72
|
+
request = Net::HTTP::Post.new(uri.request_uri, headers)
|
60
73
|
request.body = payload.to_json
|
61
74
|
|
62
75
|
begin
|
@@ -74,6 +87,17 @@ class CacheServer
|
|
74
87
|
|
75
88
|
private
|
76
89
|
|
90
|
+
def auth_token
|
91
|
+
Auth.new(@auth_config).get_token
|
92
|
+
end
|
93
|
+
|
94
|
+
def headers
|
95
|
+
{
|
96
|
+
CONTENT_TYPE_HEADER => 'application/json',
|
97
|
+
AUTH0_BEARER_HEADER => "Bearer #{auth_token}"
|
98
|
+
}
|
99
|
+
end
|
100
|
+
|
77
101
|
def headers_from_server(response_body)
|
78
102
|
headers = {}
|
79
103
|
if response_body['headers'].is_a?(Hash)
|
@@ -102,7 +126,8 @@ class CacheServer
|
|
102
126
|
marketplace: @s3_key[:marketplace],
|
103
127
|
pageType: @s3_key[:page_type],
|
104
128
|
identifier: @s3_key[:identifier],
|
105
|
-
pageNumber: @s3_key[:page_number]
|
129
|
+
pageNumber: @s3_key[:page_number],
|
130
|
+
enableImageCache: @enable_image_cache
|
106
131
|
}
|
107
132
|
end
|
108
133
|
end
|
data/lib/proxy/base.rb
CHANGED
@@ -18,6 +18,7 @@ module Proxy
|
|
18
18
|
@tls_verify = params.fetch(:tls_verify, true)
|
19
19
|
@retry = params[:retry_attr] || {}
|
20
20
|
@enable_js = params.fetch(:enable_js, false)
|
21
|
+
@enable_image_cache = params[:enable_image_cache] || false
|
21
22
|
@logger = Logger.new($stdout)
|
22
23
|
end
|
23
24
|
|
@@ -35,14 +36,35 @@ module Proxy
|
|
35
36
|
end
|
36
37
|
|
37
38
|
def format_response(response)
|
39
|
+
if success_response?(response)
|
40
|
+
if response.header['Content-Encoding'].eql?('gzip')
|
41
|
+
sio = StringIO.new(response.body)
|
42
|
+
gz = Zlib::GzipReader.new(sio)
|
43
|
+
page_content = gz.read()
|
44
|
+
elsif response.header['Content-Encoding'].eql?('br')
|
45
|
+
# Decode response which is in brotli encryption and save it as html
|
46
|
+
page_content = Brotli.inflate(response.body)
|
47
|
+
else
|
48
|
+
page_content = response.body
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
38
52
|
Response.new(
|
39
53
|
code: response.code.to_i,
|
40
|
-
body:
|
41
|
-
headers: response
|
54
|
+
body: page_content,
|
55
|
+
headers: get_headers(response),
|
42
56
|
cookies: response.get_fields('set-cookie')
|
43
57
|
)
|
44
58
|
end
|
45
59
|
|
60
|
+
def get_headers(response)
|
61
|
+
headers = {}
|
62
|
+
response.each_header do |key, value|
|
63
|
+
headers[key] = value
|
64
|
+
end
|
65
|
+
headers
|
66
|
+
end
|
67
|
+
|
46
68
|
def prepare_request(uri, proxy_uri = nil)
|
47
69
|
http = if proxy_uri.nil?
|
48
70
|
Net::HTTP.new(uri.host, uri.port)
|
@@ -58,5 +80,9 @@ module Proxy
|
|
58
80
|
http.open_timeout = @timeout
|
59
81
|
http
|
60
82
|
end
|
83
|
+
|
84
|
+
def success_response?(response)
|
85
|
+
response.kind_of?(Net::HTTPSuccess) || response.code.to_i == 200
|
86
|
+
end
|
61
87
|
end
|
62
88
|
end
|
data/lib/proxy/crawl_base.rb
CHANGED
@@ -5,22 +5,14 @@ require 'proxy/base'
|
|
5
5
|
module Proxy
|
6
6
|
class CrawlBase < Proxy::Base
|
7
7
|
def fetch(url, proxy_from_server)
|
8
|
-
|
9
|
-
uri.query = URI.encode_www_form({
|
8
|
+
req_options = {
|
10
9
|
'token' => proxy_from_server['proxyToken'],
|
11
10
|
proxy_from_server['proxyCountryKey'] => @country,
|
12
11
|
'url' => url
|
13
|
-
}.merge(@query_params)
|
14
|
-
|
15
|
-
request = Net::HTTP::Get.new(uri, @headers)
|
16
|
-
http = prepare_request(uri)
|
17
|
-
|
18
|
-
@cookies.each do |cookie|
|
19
|
-
request.add_field('Cookie', "#{cookie[:name]}=#{cookie[:value]}")
|
20
|
-
end
|
12
|
+
}.merge(@query_params)
|
21
13
|
|
22
14
|
response = with_retry do
|
23
|
-
|
15
|
+
HTTParty.get(proxy_from_server['proxyHost'], query: req_options, headers: @headers)
|
24
16
|
end
|
25
17
|
|
26
18
|
format_response(response)
|
data/lib/proxy/scraper_api.rb
CHANGED
@@ -5,24 +5,15 @@ require 'proxy/base'
|
|
5
5
|
module Proxy
|
6
6
|
class ScraperApi < Proxy::Base
|
7
7
|
def fetch(url, proxy_from_server)
|
8
|
-
|
9
|
-
|
10
|
-
uri.query = URI.encode_www_form({
|
8
|
+
req_options = {
|
11
9
|
'api_key' => proxy_from_server['proxyToken'],
|
12
10
|
'render' => @enable_js.to_s,
|
13
11
|
proxy_from_server['proxyCountryKey'] => @country,
|
14
12
|
'url' => url
|
15
|
-
}.merge(@query_params)
|
16
|
-
|
17
|
-
request = Net::HTTP::Get.new(uri, @headers)
|
18
|
-
http = prepare_request(uri)
|
19
|
-
|
20
|
-
@cookies.each do |cookie|
|
21
|
-
request.add_field('Cookie', "#{cookie[:name]}=#{cookie[:value]}")
|
22
|
-
end
|
13
|
+
}.merge(@query_params)
|
23
14
|
|
24
15
|
response = with_retry do
|
25
|
-
|
16
|
+
HTTParty.get(proxy_from_server['proxyHost'], query: req_options, headers: @headers)
|
26
17
|
end
|
27
18
|
|
28
19
|
format_response(response)
|
data/lib/scraper_central.rb
CHANGED
@@ -9,7 +9,7 @@ require 'proxy/scraper_api'
|
|
9
9
|
|
10
10
|
class ScraperCentral
|
11
11
|
attr_accessor :cache_duration, :proxy_name, :s3_key, :enable_js, :tls_verify, :headers, :query_params, :cookies,
|
12
|
-
:timeout, :retry_attr
|
12
|
+
:timeout, :retry_attr, :enable_image_cache, :auth_config
|
13
13
|
|
14
14
|
def initialize
|
15
15
|
@lock = Mutex.new
|
@@ -25,6 +25,7 @@ class ScraperCentral
|
|
25
25
|
return Response.new(code: 200, body: page_from_server, headers: headers_from_server)
|
26
26
|
else
|
27
27
|
proxy_response = nil
|
28
|
+
|
28
29
|
params = {
|
29
30
|
country: s3_key[:country],
|
30
31
|
headers: headers,
|
@@ -33,7 +34,8 @@ class ScraperCentral
|
|
33
34
|
timeout: timeout,
|
34
35
|
tls_verify: tls_verify,
|
35
36
|
retry_attr: retry_attr,
|
36
|
-
enable_js: enable_js
|
37
|
+
enable_js: enable_js,
|
38
|
+
enable_image_cache: enable_image_cache
|
37
39
|
}
|
38
40
|
|
39
41
|
case proxy_from_server['proxyName']
|
@@ -45,18 +47,18 @@ class ScraperCentral
|
|
45
47
|
proxy_response = Proxy::ScraperApi.new(params).fetch(@url, proxy_from_server)
|
46
48
|
end
|
47
49
|
|
48
|
-
if proxy_response.nil?
|
49
|
-
|
50
|
-
|
51
|
-
|
50
|
+
if proxy_response.nil? || proxy_response&.code != 200
|
51
|
+
status_code = proxy_response&.code || 500
|
52
|
+
@logger.error("Error fetching content from proxy: #{proxy_from_server['proxyName']}, error code: #{status_code}, params: #{s3_key}")
|
53
|
+
return Response.new(code: status_code)
|
52
54
|
end
|
53
55
|
|
54
56
|
Thread.new do
|
55
57
|
cache_server.put_cache(proxy_from_server['cacheKey'], proxy_response.body, proxy_response.headers,
|
56
|
-
proxy_response.cookies)
|
57
|
-
@logger.info(
|
58
|
+
proxy_response.cookies, enable_image_cache)
|
59
|
+
@logger.info("Cache successfully sent to server key: #{proxy_from_server['cacheKey']}")
|
58
60
|
rescue StandardError => e
|
59
|
-
@logger.error("Error uploading cache to server: #{e.message}")
|
61
|
+
@logger.error("Error uploading cache to server key: #{proxy_from_server['cacheKey']}, error: #{e.message}")
|
60
62
|
end
|
61
63
|
|
62
64
|
print_proxy_values
|
@@ -86,10 +88,19 @@ class ScraperCentral
|
|
86
88
|
@logger.info("cookies: #{cookies}") if cookies
|
87
89
|
@logger.info("timeout: #{timeout}") if timeout
|
88
90
|
@logger.info("retry_attr: #{retry_attr}") if retry_attr
|
91
|
+
@logger.info("enable_image_cache: #{enable_image_cache}") if enable_image_cache
|
89
92
|
end
|
90
93
|
|
91
94
|
def cache_server
|
92
|
-
|
95
|
+
args = {
|
96
|
+
proxy_name: proxy_name,
|
97
|
+
enable_js: enable_js,
|
98
|
+
cache_duration: cache_duration,
|
99
|
+
s3_key: s3_key,
|
100
|
+
enable_image_cache: enable_image_cache,
|
101
|
+
auth_config: auth_config
|
102
|
+
}
|
103
|
+
CacheServer.new(args)
|
93
104
|
end
|
94
105
|
end
|
95
106
|
|
@@ -14,6 +14,10 @@ Gem::Specification.new do |spec|
|
|
14
14
|
spec.homepage = 'https://github.com/patterninc/scraper-central-ruby'
|
15
15
|
spec.license = 'MIT'
|
16
16
|
|
17
|
+
# Add the brotli gem dependency here
|
18
|
+
spec.add_dependency 'brotli', '~> 0.5.0'
|
19
|
+
spec.add_dependency 'activesupport', '~> 7.0'
|
20
|
+
|
17
21
|
# Specify which files should be added to the gem when it is released.
|
18
22
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
19
23
|
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
@@ -28,5 +32,7 @@ Gem::Specification.new do |spec|
|
|
28
32
|
# bundle _2.4.22_
|
29
33
|
spec.add_development_dependency 'bundler', '~> 2.4.22'
|
30
34
|
spec.add_development_dependency 'rake', '~> 13.0'
|
35
|
+
spec.add_development_dependency 'brotli', '~> 0.5.0'
|
36
|
+
|
31
37
|
# ... (other development dependencies)
|
32
38
|
end
|
metadata
CHANGED
@@ -1,15 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scraper-central-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Patterninc
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-02-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: brotli
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.5.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.5.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: activesupport
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '7.0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '7.0'
|
13
41
|
- !ruby/object:Gem::Dependency
|
14
42
|
name: bundler
|
15
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -38,6 +66,20 @@ dependencies:
|
|
38
66
|
- - "~>"
|
39
67
|
- !ruby/object:Gem::Version
|
40
68
|
version: '13.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: brotli
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 0.5.0
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.5.0
|
41
83
|
description:
|
42
84
|
email:
|
43
85
|
- amol.udage@pattern.com
|
@@ -45,9 +87,12 @@ executables: []
|
|
45
87
|
extensions: []
|
46
88
|
extra_rdoc_files: []
|
47
89
|
files:
|
90
|
+
- ".github/pull-request-template.md"
|
48
91
|
- Gemfile
|
49
92
|
- Gemfile.lock
|
50
93
|
- README.md
|
94
|
+
- lib/auth.rb
|
95
|
+
- lib/cache.rb
|
51
96
|
- lib/cache_server.rb
|
52
97
|
- lib/proxy/base.rb
|
53
98
|
- lib/proxy/bright_data.rb
|