scraper-central-ruby 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3a80d1ba8b02b21d51a048f7b153f04e23176ffcd5fc258f004ac903776fe831
4
- data.tar.gz: 9982839e9a725452a3f26022ccbd46eabea0aa8ac3b94fd1e4bdcda172ae58cc
3
+ metadata.gz: 812034013aa0b9d1fbec3961dad038a59b3c01c26e5738b5da8ee31d0af8acd1
4
+ data.tar.gz: 2da443589638a347f9ee81a73141b43d4073bb9dd148371421b9843906886e36
5
5
  SHA512:
6
- metadata.gz: 20f9efb238092af60d758971d8a84752a1cf24b9f927795352be2d66fc47784b3050b629300e0c7ed9b8baa04521970600851ce44d3a2b592f556a489a35ae5f
7
- data.tar.gz: c464c0c5a20b58366f2e738aee1cabb1b66555c3d9217b277fe9d9ab3433968f596be12dad4717738017b725b9044e3db0e24797aeec146d93d3abe1527119ce
6
+ metadata.gz: 8070123f403672e8b2f32fa6f26297447eb1f3b0bf8fcef4452b52491f82bf27fdb3e0f40a0f5e411c88372653335a94c4ed075bffa4ee6bdde589bcd1a3a4a1
7
+ data.tar.gz: 1112f3744c8c62db525394d4091e786640126a1b4a6bcaa1efa65f1ea850d5213ffcd0538df513197901d70c7f4167bf16821e1fa956e3e18be3d7bae50828ed
data/Gemfile.lock CHANGED
@@ -1,11 +1,13 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- scraper-central-ruby (1.0.0)
4
+ scraper-central-ruby (1.0.2)
5
+ brotli (~> 0.5.0)
5
6
 
6
7
  GEM
7
8
  remote: https://rubygems.org/
8
9
  specs:
10
+ brotli (0.5.0)
9
11
  rake (13.2.1)
10
12
 
11
13
  PLATFORMS
data/README.md CHANGED
@@ -117,6 +117,18 @@ Fetches the URL through the proxy:
117
117
 
118
118
  ```ruby
119
119
  scraper_central = ScraperCentral.new
120
+ scraper_central.proxy_name = "BrightData"
121
+ scraper_central.cache_duration = 360
122
+ scraper_central.retry_attr = {
123
+ count: 5
124
+ }
125
+ scraper_central.s3_key = {
126
+ country: "US",
127
+ marketplace: "Amazon",
128
+ page_type: "product-question",
129
+ identifier: "B0BQZBPS4G",
130
+ page_number: 1
131
+ }
120
132
 
121
133
  response = scraper_central.fetch("https://example.com")
122
134
 
data/lib/proxy/base.rb CHANGED
@@ -6,6 +6,7 @@ require 'json'
6
6
  require 'openssl'
7
7
  require 'response'
8
8
  require 'logger'
9
+ require 'brotli'
9
10
 
10
11
  module Proxy
11
12
  class Base
@@ -35,14 +36,33 @@ module Proxy
35
36
  end
36
37
 
37
38
  def format_response(response)
39
+ if response.kind_of?(Net::HTTPSuccess) && response.header['Content-Encoding'].eql?('gzip')
40
+ sio = StringIO.new(response.body)
41
+ gz = Zlib::GzipReader.new(sio)
42
+ page_content = gz.read()
43
+ elsif response.kind_of?(Net::HTTPSuccess) && response.header['Content-Encoding'].eql?('br')
44
+ # Decode response which is in brotli encryption and save it as html
45
+ page_content = Brotli.inflate(response.body)
46
+ else
47
+ page_content = response.body
48
+ end
49
+
38
50
  Response.new(
39
51
  code: response.code.to_i,
40
- body: response.body,
41
- headers: response.to_hash,
52
+ body: page_content,
53
+ headers: get_headers(response),
42
54
  cookies: response.get_fields('set-cookie')
43
55
  )
44
56
  end
45
57
 
58
+ def get_headers(response)
59
+ headers = {}
60
+ response.each_header do |key, value|
61
+ headers[key] = value
62
+ end
63
+ headers
64
+ end
65
+
46
66
  def prepare_request(uri, proxy_uri = nil)
47
67
  http = if proxy_uri.nil?
48
68
  Net::HTTP.new(uri.host, uri.port)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class ScraperCentral
4
- VERSION = '1.0.0'
4
+ VERSION = '1.0.2'
5
5
  end
@@ -14,6 +14,10 @@ Gem::Specification.new do |spec|
14
14
  spec.homepage = 'https://github.com/patterninc/scraper-central-ruby'
15
15
  spec.license = 'MIT'
16
16
 
17
+
18
+ # Add the brotli gem dependency here
19
+ spec.add_dependency 'brotli', '~> 0.5.0'
20
+
17
21
  # Specify which files should be added to the gem when it is released.
18
22
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
19
23
  spec.files = Dir.chdir(File.expand_path(__dir__)) do
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scraper-central-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Patterninc
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-07-11 00:00:00.000000000 Z
11
+ date: 2024-07-19 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: brotli
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.5.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.5.0
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: bundler
15
29
  requirement: !ruby/object:Gem::Requirement